diff --git a/.github/workflows/java.yml b/.github/workflows/java-glue.yml similarity index 67% rename from .github/workflows/java.yml rename to .github/workflows/java-glue.yml index b157435..89ef3ea 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java-glue.yml @@ -10,12 +10,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: Java +name: Java Glue on: push: branches: - main + paths: + - java/lance-namespace-glue/** + - java/pom.xml + - .github/workflows/java-glue.yml pull_request: types: - opened @@ -23,20 +27,21 @@ on: - ready_for_review - reopened paths: - - java/** - - .github/workflows/java.yml + - java/lance-namespace-glue/** + - java/pom.xml + - .github/workflows/java-glue.yml concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: - linux-build: + test: runs-on: ubuntu-24.04 - timeout-minutes: 60 + timeout-minutes: 30 strategy: matrix: - java-version: [ 11, 17 ] + java-version: [11, 17] steps: - name: Checkout repo uses: actions/checkout@v4 @@ -46,18 +51,12 @@ jobs: distribution: temurin java-version: ${{ matrix.java-version }} cache: "maven" - - name: Check code style - working-directory: java - run: ./mvnw spotless:check - - name: Build with Java ${{ matrix.java-version }} + - name: Lint working-directory: java - run: ./mvnw install -DskipTests - - name: Run unit tests + run: make lint-glue + - name: Build working-directory: java - run: | - ./mvnw test -pl lance-namespace-glue - ./mvnw test -pl lance-namespace-polaris - - name: Build all modules with make (Java 17 only) - if: matrix.java-version == 17 + run: make build-glue + - name: Test working-directory: java - run: make build + run: make test-glue diff --git a/.github/workflows/java-hive2.yml b/.github/workflows/java-hive2.yml new file mode 100644 index 0000000..3b32107 --- /dev/null +++ b/.github/workflows/java-hive2.yml @@ -0,0 +1,62 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Hive2 + +on: + push: + branches: + - main + paths: + - java/lance-namespace-hive2/** + - java/pom.xml + - .github/workflows/java-hive2.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - java/lance-namespace-hive2/** + - java/pom.xml + - .github/workflows/java-hive2.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + java-version: [11, 17] + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: ${{ matrix.java-version }} + cache: "maven" + - name: Lint + working-directory: java + run: make lint-hive2 + - name: Build + working-directory: java + run: make build-hive2 + - name: Test + working-directory: java + run: make test-hive2 diff --git a/.github/workflows/java-hive3.yml b/.github/workflows/java-hive3.yml new file mode 100644 index 0000000..cf8d9c3 --- /dev/null +++ b/.github/workflows/java-hive3.yml @@ -0,0 +1,62 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Hive3 + +on: + push: + branches: + - main + paths: + - java/lance-namespace-hive3/** + - java/pom.xml + - .github/workflows/java-hive3.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - java/lance-namespace-hive3/** + - java/pom.xml + - .github/workflows/java-hive3.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + java-version: [11, 17] + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: ${{ matrix.java-version }} + cache: "maven" + - name: Lint + working-directory: java + run: make lint-hive3 + - name: Build + working-directory: java + run: make build-hive3 + - name: Test + working-directory: java + run: make test-hive3 diff --git a/.github/workflows/java-iceberg.yml b/.github/workflows/java-iceberg.yml new file mode 100644 index 0000000..b121174 --- /dev/null +++ b/.github/workflows/java-iceberg.yml @@ -0,0 +1,62 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Iceberg + +on: + push: + branches: + - main + paths: + - java/lance-namespace-iceberg/** + - java/pom.xml + - .github/workflows/java-iceberg.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - java/lance-namespace-iceberg/** + - java/pom.xml + - .github/workflows/java-iceberg.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + java-version: [11, 17] + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: ${{ matrix.java-version }} + cache: "maven" + - name: Lint + working-directory: java + run: make lint-iceberg + - name: Build + working-directory: java + run: make build-iceberg + - name: Test + working-directory: java + run: make test-iceberg diff --git a/.github/workflows/java-integ-glue.yml b/.github/workflows/java-integ-glue.yml new file mode 100644 index 0000000..9d7364a --- /dev/null +++ b/.github/workflows/java-integ-glue.yml @@ -0,0 +1,60 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Glue Integration + +on: + push: + branches: + - main + paths: + - java/lance-namespace-glue/** + - .github/workflows/java-integ-glue.yml + pull_request_target: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - java/lance-namespace-glue/** + - .github/workflows/java-integ-glue.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + integration-test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha || github.sha }} + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: "17" + distribution: "temurin" + cache: "maven" + - name: Build + run: make build-java + - name: Run integration tests + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.AWS_REGION }} + AWS_S3_BUCKET_NAME: ${{ secrets.AWS_S3_BUCKET_NAME }} + run: make java-integ-test-glue diff --git a/.github/workflows/integration-hive2.yml b/.github/workflows/java-integ-hive2.yml similarity index 57% rename from .github/workflows/integration-hive2.yml rename to .github/workflows/java-integ-hive2.yml index 82bf0d4..d288006 100644 --- a/.github/workflows/integration-hive2.yml +++ b/.github/workflows/java-integ-hive2.yml @@ -1,20 +1,35 @@ -name: Hive2 Integration Tests +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Hive2 Integration on: push: - branches: [main] + branches: + - main paths: - - 'java/lance-namespace-hive2/**' - - 'docker/hive2/**' - - 'docker/docker-compose.yml' - - '.github/workflows/integration-hive2.yml' + - java/lance-namespace-hive2/** + - docker/hive2/** + - .github/workflows/java-integ-hive2.yml pull_request: - branches: [main] + types: + - opened + - synchronize + - ready_for_review + - reopened paths: - - 'java/lance-namespace-hive2/**' - - 'docker/hive2/**' - - 'docker/docker-compose.yml' - - '.github/workflows/integration-hive2.yml' + - java/lance-namespace-hive2/** + - docker/hive2/** + - .github/workflows/java-integ-hive2.yml workflow_dispatch: concurrency: @@ -23,31 +38,21 @@ concurrency: jobs: integration-test: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 30 - steps: - name: Checkout uses: actions/checkout@v4 - - name: Set up JDK 17 uses: actions/setup-java@v4 with: - java-version: '17' - distribution: 'temurin' - cache: 'maven' - + java-version: "17" + distribution: "temurin" + cache: "maven" - name: Download PostgreSQL JDBC driver - run: | - mkdir -p docker/hive2 - curl -sSL -o docker/hive2/postgresql-42.7.3.jar \ - https://jdbc.postgresql.org/download/postgresql-42.7.3.jar - + run: make docker-setup - name: Start Hive2 Metastore - run: | - docker compose -f docker/hive2/docker-compose.yml up -d - echo "Waiting for services to start..." - + run: make docker-up-hive2 - name: Wait for Hive2 Metastore to be ready run: | echo "Waiting for Hive2 Metastore to be ready..." @@ -62,27 +67,17 @@ jobs: docker compose -f docker/hive2/docker-compose.yml logs exit 1 } - echo "Hive2 Metastore is ready" - - - name: Verify Hive2 connectivity - run: | - # Wait a bit more for thrift to be fully ready sleep 10 nc -z localhost 9083 || { echo "Cannot connect to Hive2 Metastore on port 9083" docker compose -f docker/hive2/docker-compose.yml logs exit 1 } - echo "Hive2 Metastore is accepting connections on port 9083" - - - name: Run Hive2 integration tests - working-directory: java - run: | - ./mvnw test -pl lance-namespace-hive2 \ - -Dtest="*IntegrationTest" \ - -DfailIfNoTests=false \ - -Dmaven.test.failure.ignore=false - + echo "Hive2 Metastore is ready" + - name: Build + run: make build-java + - name: Run integration tests + run: make java-integ-test-hive2 - name: Collect logs on failure if: failure() run: | @@ -94,8 +89,6 @@ jobs: echo "" echo "=== PostgreSQL Logs ===" docker compose -f docker/hive2/docker-compose.yml logs postgres-hive2 - - name: Cleanup if: always() - run: | - docker compose -f docker/hive2/docker-compose.yml down -v + run: make docker-down-hive2 diff --git a/.github/workflows/integration-hive3.yml b/.github/workflows/java-integ-hive3.yml similarity index 57% rename from .github/workflows/integration-hive3.yml rename to .github/workflows/java-integ-hive3.yml index b2bdcdd..5d893ff 100644 --- a/.github/workflows/integration-hive3.yml +++ b/.github/workflows/java-integ-hive3.yml @@ -1,20 +1,35 @@ -name: Hive3 Integration Tests +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Hive3 Integration on: push: - branches: [main] + branches: + - main paths: - - 'java/lance-namespace-hive3/**' - - 'docker/hive3/**' - - 'docker/docker-compose.yml' - - '.github/workflows/integration-hive3.yml' + - java/lance-namespace-hive3/** + - docker/hive3/** + - .github/workflows/java-integ-hive3.yml pull_request: - branches: [main] + types: + - opened + - synchronize + - ready_for_review + - reopened paths: - - 'java/lance-namespace-hive3/**' - - 'docker/hive3/**' - - 'docker/docker-compose.yml' - - '.github/workflows/integration-hive3.yml' + - java/lance-namespace-hive3/** + - docker/hive3/** + - .github/workflows/java-integ-hive3.yml workflow_dispatch: concurrency: @@ -23,31 +38,21 @@ concurrency: jobs: integration-test: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 30 - steps: - name: Checkout uses: actions/checkout@v4 - - name: Set up JDK 17 uses: actions/setup-java@v4 with: - java-version: '17' - distribution: 'temurin' - cache: 'maven' - + java-version: "17" + distribution: "temurin" + cache: "maven" - name: Download PostgreSQL JDBC driver - run: | - mkdir -p docker/hive3 - curl -sSL -o docker/hive3/postgresql-42.7.3.jar \ - https://jdbc.postgresql.org/download/postgresql-42.7.3.jar - + run: make docker-setup - name: Start Hive3 Metastore - run: | - docker compose -f docker/hive3/docker-compose.yml up -d - echo "Waiting for services to start..." - + run: make docker-up-hive3 - name: Wait for Hive3 Metastore to be ready run: | echo "Waiting for Hive3 Metastore to be ready..." @@ -62,27 +67,17 @@ jobs: docker compose -f docker/hive3/docker-compose.yml logs exit 1 } - echo "Hive3 Metastore is ready" - - - name: Verify Hive3 connectivity - run: | - # Wait a bit more for thrift to be fully ready sleep 10 nc -z localhost 9084 || { echo "Cannot connect to Hive3 Metastore on port 9084" docker compose -f docker/hive3/docker-compose.yml logs exit 1 } - echo "Hive3 Metastore is accepting connections on port 9084" - - - name: Run Hive3 integration tests - working-directory: java - run: | - ./mvnw test -pl lance-namespace-hive3 \ - -Dtest="*IntegrationTest" \ - -DfailIfNoTests=false \ - -Dmaven.test.failure.ignore=false - + echo "Hive3 Metastore is ready" + - name: Build + run: make build-java + - name: Run integration tests + run: make java-integ-test-hive3 - name: Collect logs on failure if: failure() run: | @@ -94,8 +89,6 @@ jobs: echo "" echo "=== PostgreSQL Logs ===" docker compose -f docker/hive3/docker-compose.yml logs postgres-hive3 - - name: Cleanup if: always() - run: | - docker compose -f docker/hive3/docker-compose.yml down -v + run: make docker-down-hive3 diff --git a/.github/workflows/java-integ-iceberg.yml b/.github/workflows/java-integ-iceberg.yml new file mode 100644 index 0000000..7a75d38 --- /dev/null +++ b/.github/workflows/java-integ-iceberg.yml @@ -0,0 +1,105 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Iceberg Integration + +on: + push: + branches: + - main + paths: + - java/lance-namespace-iceberg/** + - docker/iceberg/** + - .github/workflows/java-integ-iceberg.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - java/lance-namespace-iceberg/** + - docker/iceberg/** + - .github/workflows/java-integ-iceberg.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + integration-test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: "17" + distribution: "temurin" + cache: "maven" + - name: Start Iceberg REST Catalog (Lakekeeper) + run: make docker-up-iceberg + - name: Wait for Iceberg REST Catalog + run: | + echo "Waiting for Iceberg REST Catalog to be ready..." + timeout 180 bash -c ' + until curl -sf http://localhost:8282/health > /dev/null 2>&1; do + echo "Waiting for health endpoint..." + sleep 5 + done + ' || { + echo "Timeout waiting for Iceberg REST Catalog" + docker compose -f docker/iceberg/docker-compose.yml logs + exit 1 + } + echo "Iceberg REST Catalog is ready" + + # Wait for setup to complete (creates test_warehouse) + echo "Waiting for warehouse setup to complete..." + timeout 60 bash -c ' + while docker ps -q -f name=lakekeeper-setup 2>/dev/null | grep -q .; do + echo "Waiting for setup container to finish..." + sleep 2 + done + ' || echo "Setup may have already completed" + + # Verify warehouse exists + echo "Verifying test_warehouse exists..." + WAREHOUSES=$(curl -s http://localhost:8282/management/v1/warehouse || echo "") + + if echo "$WAREHOUSES" | grep -q "test_warehouse"; then + echo "test_warehouse verified" + else + echo "Warning: test_warehouse not found in warehouses: $WAREHOUSES" + docker compose -f docker/iceberg/docker-compose.yml logs lakekeeper-setup + fi + - name: Build + run: make build-java + - name: Run integration tests + run: make java-integ-test-iceberg + - name: Collect logs on failure + if: failure() + run: | + echo "=== Docker Compose Status ===" + docker compose -f docker/iceberg/docker-compose.yml ps + echo "" + echo "=== Iceberg REST Catalog Logs ===" + docker compose -f docker/iceberg/docker-compose.yml logs lakekeeper + echo "" + echo "=== PostgreSQL Logs ===" + docker compose -f docker/iceberg/docker-compose.yml logs postgres-lakekeeper + - name: Cleanup + if: always() + run: make docker-down-iceberg diff --git a/.github/workflows/integration-polaris.yml b/.github/workflows/java-integ-polaris.yml similarity index 67% rename from .github/workflows/integration-polaris.yml rename to .github/workflows/java-integ-polaris.yml index e812a76..f36906b 100644 --- a/.github/workflows/integration-polaris.yml +++ b/.github/workflows/java-integ-polaris.yml @@ -1,20 +1,35 @@ -name: Polaris Integration Tests +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Polaris Integration on: push: - branches: [main] + branches: + - main paths: - - 'java/lance-namespace-polaris/**' - - 'docker/polaris/**' - - 'docker/docker-compose.yml' - - '.github/workflows/integration-polaris.yml' + - java/lance-namespace-polaris/** + - docker/polaris/** + - .github/workflows/java-integ-polaris.yml pull_request: - branches: [main] + types: + - opened + - synchronize + - ready_for_review + - reopened paths: - - 'java/lance-namespace-polaris/**' - - 'docker/polaris/**' - - 'docker/docker-compose.yml' - - '.github/workflows/integration-polaris.yml' + - java/lance-namespace-polaris/** + - docker/polaris/** + - .github/workflows/java-integ-polaris.yml workflow_dispatch: concurrency: @@ -23,25 +38,19 @@ concurrency: jobs: integration-test: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 30 - steps: - name: Checkout uses: actions/checkout@v4 - - name: Set up JDK 17 uses: actions/setup-java@v4 with: - java-version: '17' - distribution: 'temurin' - cache: 'maven' - + java-version: "17" + distribution: "temurin" + cache: "maven" - name: Start Polaris - run: | - docker compose -f docker/polaris/docker-compose.yml up -d - echo "Waiting for services to start..." - + run: make docker-up-polaris - name: Wait for Polaris to be ready run: | echo "Waiting for Polaris to be ready..." @@ -56,21 +65,16 @@ jobs: exit 1 } echo "Polaris is ready" - - name: Create test catalog run: | - # Get OAuth token TOKEN=$(curl -s -X POST http://localhost:8181/api/catalog/v1/oauth/tokens \ -H 'Content-Type: application/x-www-form-urlencoded' \ -d 'grant_type=client_credentials&client_id=root&client_secret=s3cr3t&scope=PRINCIPAL_ROLE:ALL' | \ python3 -c "import sys, json; print(json.load(sys.stdin).get('access_token', ''))") - if [ -z "$TOKEN" ]; then echo "Failed to get OAuth token" exit 1 fi - - # Create test catalog curl -s -X POST http://localhost:8181/api/catalog/v1/catalogs \ -H "Authorization: Bearer $TOKEN" \ -H 'Content-Type: application/json' \ @@ -84,17 +88,11 @@ jobs: "storageType": "FILE" } }' || echo "Catalog may already exist" - echo "Test catalog created/verified" - - - name: Run Polaris integration tests - working-directory: java - run: | - ./mvnw test -pl lance-namespace-polaris \ - -Dtest="*IntegrationTest" \ - -DfailIfNoTests=false \ - -Dmaven.test.failure.ignore=false - + - name: Build + run: make build-java + - name: Run integration tests + run: make java-integ-test-polaris - name: Collect logs on failure if: failure() run: | @@ -106,8 +104,6 @@ jobs: echo "" echo "=== PostgreSQL Logs ===" docker compose -f docker/polaris/docker-compose.yml logs postgres-polaris - - name: Cleanup if: always() - run: | - docker compose -f docker/polaris/docker-compose.yml down -v + run: make docker-down-polaris diff --git a/.github/workflows/integration-unity.yml b/.github/workflows/java-integ-unity.yml similarity index 60% rename from .github/workflows/integration-unity.yml rename to .github/workflows/java-integ-unity.yml index 1c89570..ed4d054 100644 --- a/.github/workflows/integration-unity.yml +++ b/.github/workflows/java-integ-unity.yml @@ -1,20 +1,35 @@ -name: Unity Catalog Integration Tests +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Unity Integration on: push: - branches: [main] + branches: + - main paths: - - 'java/lance-namespace-unity/**' - - 'docker/unity/**' - - 'docker/docker-compose.yml' - - '.github/workflows/integration-unity.yml' + - java/lance-namespace-unity/** + - docker/unity/** + - .github/workflows/java-integ-unity.yml pull_request: - branches: [main] + types: + - opened + - synchronize + - ready_for_review + - reopened paths: - - 'java/lance-namespace-unity/**' - - 'docker/unity/**' - - 'docker/docker-compose.yml' - - '.github/workflows/integration-unity.yml' + - java/lance-namespace-unity/** + - docker/unity/** + - .github/workflows/java-integ-unity.yml workflow_dispatch: concurrency: @@ -23,25 +38,19 @@ concurrency: jobs: integration-test: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 timeout-minutes: 30 - steps: - name: Checkout uses: actions/checkout@v4 - - name: Set up JDK 17 uses: actions/setup-java@v4 with: - java-version: '17' - distribution: 'temurin' - cache: 'maven' - + java-version: "17" + distribution: "temurin" + cache: "maven" - name: Start Unity Catalog - run: | - docker compose -f docker/unity/docker-compose.yml up -d - echo "Waiting for services to start..." - + run: make docker-up-unity - name: Wait for Unity Catalog to be ready run: | echo "Waiting for Unity Catalog to be ready..." @@ -56,31 +65,21 @@ jobs: exit 1 } echo "Unity Catalog is ready" - - name: Create test catalog run: | - # Create test catalog curl -s -X POST http://localhost:8080/api/2.1/unity-catalog/catalogs \ -H 'Content-Type: application/json' \ -d '{"name": "lance_test", "comment": "Test catalog for Lance integration tests"}' \ || echo "Catalog may already exist" - - # Verify catalog exists curl -sf http://localhost:8080/api/2.1/unity-catalog/catalogs/lance_test || { echo "Failed to create/verify test catalog" exit 1 } - echo "Test catalog created/verified" - - - name: Run Unity Catalog integration tests - working-directory: java - run: | - ./mvnw test -pl lance-namespace-unity \ - -Dtest="*IntegrationTest" \ - -DfailIfNoTests=false \ - -Dmaven.test.failure.ignore=false - + - name: Build + run: make build-java + - name: Run integration tests + run: make java-integ-test-unity - name: Collect logs on failure if: failure() run: | @@ -89,8 +88,6 @@ jobs: echo "" echo "=== Unity Catalog Logs ===" docker compose -f docker/unity/docker-compose.yml logs unity-catalog - - name: Cleanup if: always() - run: | - docker compose -f docker/unity/docker-compose.yml down -v + run: make docker-down-unity diff --git a/.github/workflows/java-polaris.yml b/.github/workflows/java-polaris.yml new file mode 100644 index 0000000..51e45d9 --- /dev/null +++ b/.github/workflows/java-polaris.yml @@ -0,0 +1,62 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Java Polaris + +on: + push: + branches: + - main + paths: + - java/lance-namespace-polaris/** + - java/pom.xml + - .github/workflows/java-polaris.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - java/lance-namespace-polaris/** + - java/pom.xml + - .github/workflows/java-polaris.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + java-version: [11, 17] + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Java + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: ${{ matrix.java-version }} + cache: "maven" + - name: Lint + working-directory: java + run: make lint-polaris + - name: Build + working-directory: java + run: make build-polaris + - name: Test + working-directory: java + run: make test-polaris diff --git a/.github/workflows/java-unity.yml b/.github/workflows/java-unity.yml index f0e1e49..964e465 100644 --- a/.github/workflows/java-unity.yml +++ b/.github/workflows/java-unity.yml @@ -10,12 +10,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: Java Unity Catalog Tests +name: Java Unity on: push: branches: - main + paths: + - java/lance-namespace-unity/** + - java/pom.xml + - .github/workflows/java-unity.yml pull_request: types: - opened @@ -24,6 +28,7 @@ on: - reopened paths: - java/lance-namespace-unity/** + - java/pom.xml - .github/workflows/java-unity.yml concurrency: @@ -31,12 +36,12 @@ concurrency: cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: - unity-catalog-tests: + test: runs-on: ubuntu-24.04 timeout-minutes: 30 strategy: matrix: - java-version: [ 17 ] + java-version: [11, 17] steps: - name: Checkout repo uses: actions/checkout@v4 @@ -46,41 +51,12 @@ jobs: distribution: temurin java-version: ${{ matrix.java-version }} cache: "maven" - - # Start Unity Catalog server using Docker Compose - - name: Start Unity Catalog server - run: | - git clone https://github.com/unitycatalog/unitycatalog.git /tmp/unitycatalog - cd /tmp/unitycatalog - docker compose up -d - # Wait for Unity Catalog to be ready - echo "Waiting for Unity Catalog server to start..." - for i in {1..30}; do - if curl -s http://localhost:8080/api/2.1/unity-catalog/catalogs > /dev/null; then - echo "Unity Catalog server is ready" - break - fi - echo "Waiting for Unity Catalog server... ($i/30)" - sleep 2 - done - # Verify server is responding - curl -s http://localhost:8080/api/2.1/unity-catalog/catalogs || exit 1 - - # Run Unity namespace tests - - name: Run Unity Catalog tests with Java ${{ matrix.java-version }} + - name: Lint working-directory: java - run: ./mvnw test -pl lance-namespace-unity -am - - # Clean up Docker containers - - name: Stop Unity Catalog server - if: always() - run: | - cd /tmp/unitycatalog - docker compose down - - # Show Docker logs if tests failed - - name: Show Unity Catalog logs on failure - if: failure() - run: | - cd /tmp/unitycatalog - docker compose logs + run: make lint-unity + - name: Build + working-directory: java + run: make build-unity + - name: Test + working-directory: java + run: make test-unity diff --git a/.github/workflows/python-glue.yml b/.github/workflows/python-glue.yml new file mode 100644 index 0000000..2084e56 --- /dev/null +++ b/.github/workflows/python-glue.yml @@ -0,0 +1,66 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Glue + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/glue.py + - python/tests/test_glue.py + - python/tests/test_glue_integration.py + - python/pyproject.toml + - .github/workflows/python-glue.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/glue.py + - python/tests/test_glue.py + - python/tests/test_glue_integration.py + - python/pyproject.toml + - .github/workflows/python-glue.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Install dependencies + working-directory: python + run: make install-glue + - name: Lint + working-directory: python + run: make lint-glue + - name: Test + working-directory: python + run: make test-glue diff --git a/.github/workflows/python.yml b/.github/workflows/python-hive.yml similarity index 67% rename from .github/workflows/python.yml rename to .github/workflows/python-hive.yml index dc67340..abb027f 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python-hive.yml @@ -10,12 +10,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: Python +name: Python Hive on: push: branches: - main + paths: + - python/src/lance_namespace_impls/hive.py + - python/tests/test_hive.py + - python/pyproject.toml + - .github/workflows/python-hive.yml pull_request: types: - opened @@ -23,20 +28,22 @@ on: - ready_for_review - reopened paths: - - python/** - - .github/workflows/python.yml + - python/src/lance_namespace_impls/hive.py + - python/tests/test_hive.py + - python/pyproject.toml + - .github/workflows/python-hive.yml concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: - linux-build: + test: runs-on: ubuntu-24.04 timeout-minutes: 30 strategy: matrix: - python-version: [ "3.10", "3.11", "3.12" ] + python-version: ["3.10", "3.11", "3.12"] steps: - name: Checkout repo uses: actions/checkout@v4 @@ -46,13 +53,12 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install uv uses: astral-sh/setup-uv@v4 - - name: Install dependencies and run tests + - name: Install dependencies + working-directory: python + run: make install-hive + - name: Lint working-directory: python - run: | - uv sync - uv run pytest - - name: Check code style + run: make lint-hive + - name: Test working-directory: python - run: | - uv run ruff check . - uv run ruff format --check . + run: make test-hive diff --git a/.github/workflows/python-iceberg.yml b/.github/workflows/python-iceberg.yml new file mode 100644 index 0000000..e43ee9d --- /dev/null +++ b/.github/workflows/python-iceberg.yml @@ -0,0 +1,64 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Iceberg + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/iceberg.py + - python/tests/test_iceberg.py + - python/pyproject.toml + - .github/workflows/python-iceberg.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/iceberg.py + - python/tests/test_iceberg.py + - python/pyproject.toml + - .github/workflows/python-iceberg.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Install dependencies + working-directory: python + run: make install-iceberg + - name: Lint + working-directory: python + run: make lint-iceberg + - name: Test + working-directory: python + run: make test-iceberg diff --git a/.github/workflows/python-integ-glue.yml b/.github/workflows/python-integ-glue.yml new file mode 100644 index 0000000..c83f233 --- /dev/null +++ b/.github/workflows/python-integ-glue.yml @@ -0,0 +1,65 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Glue Integration + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/glue.py + - python/tests/test_glue.py + - python/tests/test_glue_integration.py + - .github/workflows/python-integ-glue.yml + pull_request_target: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/glue.py + - python/tests/test_glue.py + - python/tests/test_glue_integration.py + - .github/workflows/python-integ-glue.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + integration-test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha || github.sha }} + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Install dependencies + working-directory: python + run: make install-glue + - name: Run integration tests + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.AWS_REGION }} + AWS_S3_BUCKET_NAME: ${{ secrets.AWS_S3_BUCKET_NAME }} + run: make python-integ-test-glue diff --git a/.github/workflows/python-integ-hive.yml b/.github/workflows/python-integ-hive.yml new file mode 100644 index 0000000..c36ef01 --- /dev/null +++ b/.github/workflows/python-integ-hive.yml @@ -0,0 +1,101 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Hive Integration + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/hive2.py + - python/src/lance_namespace_impls/rest_client.py + - python/tests/test_hive2.py + - python/tests/test_hive2_integration.py + - docker/hive2/** + - .github/workflows/python-integ-hive.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/hive2.py + - python/src/lance_namespace_impls/rest_client.py + - python/tests/test_hive2.py + - python/tests/test_hive2_integration.py + - docker/hive2/** + - .github/workflows/python-integ-hive.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + integration-test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Download PostgreSQL JDBC driver + run: make docker-setup + - name: Start Hive2 Metastore + run: make docker-up-hive2 + - name: Wait for Hive2 Metastore to be ready + run: | + echo "Waiting for Hive2 Metastore to be ready..." + timeout 180 bash -c ' + until docker compose -f docker/hive2/docker-compose.yml ps | grep -q "healthy"; do + echo "Waiting for healthy status..." + docker compose -f docker/hive2/docker-compose.yml ps + sleep 10 + done + ' || { + echo "Timeout waiting for Hive2 Metastore" + docker compose -f docker/hive2/docker-compose.yml logs + exit 1 + } + sleep 10 + nc -z localhost 9083 || { + echo "Cannot connect to Hive2 Metastore on port 9083" + docker compose -f docker/hive2/docker-compose.yml logs + exit 1 + } + echo "Hive2 Metastore is ready" + - name: Install dependencies + working-directory: python + run: make install-hive + - name: Run integration tests + run: make python-integ-test-hive + - name: Collect logs on failure + if: failure() + run: | + echo "=== Docker Compose Status ===" + docker compose -f docker/hive2/docker-compose.yml ps + echo "" + echo "=== Hive2 Metastore Logs ===" + docker compose -f docker/hive2/docker-compose.yml logs hive2-metastore + echo "" + echo "=== PostgreSQL Logs ===" + docker compose -f docker/hive2/docker-compose.yml logs postgres-hive2 + - name: Cleanup + if: always() + run: make docker-down-hive2 diff --git a/.github/workflows/python-integ-iceberg.yml b/.github/workflows/python-integ-iceberg.yml new file mode 100644 index 0000000..e551fdd --- /dev/null +++ b/.github/workflows/python-integ-iceberg.yml @@ -0,0 +1,112 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Iceberg Integration + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/iceberg.py + - python/src/lance_namespace_impls/rest_client.py + - python/tests/test_iceberg.py + - python/tests/test_iceberg_integration.py + - docker/iceberg/** + - .github/workflows/python-integ-iceberg.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/iceberg.py + - python/src/lance_namespace_impls/rest_client.py + - python/tests/test_iceberg.py + - python/tests/test_iceberg_integration.py + - docker/iceberg/** + - .github/workflows/python-integ-iceberg.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + integration-test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Start Iceberg REST Catalog (Lakekeeper) + run: make docker-up-iceberg + - name: Wait for Iceberg REST Catalog + run: | + echo "Waiting for Iceberg REST Catalog to be ready..." + timeout 180 bash -c ' + until curl -sf http://localhost:8282/health > /dev/null 2>&1; do + echo "Waiting for health endpoint..." + sleep 5 + done + ' || { + echo "Timeout waiting for Iceberg REST Catalog" + docker compose -f docker/iceberg/docker-compose.yml logs + exit 1 + } + echo "Iceberg REST Catalog is ready" + + # Wait for setup to complete (creates test_warehouse) + echo "Waiting for warehouse setup to complete..." + timeout 60 bash -c ' + while docker ps -q -f name=lakekeeper-setup 2>/dev/null | grep -q .; do + echo "Waiting for setup container to finish..." + sleep 2 + done + ' || echo "Setup may have already completed" + + # Verify warehouse exists + echo "Verifying test_warehouse exists..." + WAREHOUSES=$(curl -s http://localhost:8282/management/v1/warehouse || echo "") + + if echo "$WAREHOUSES" | grep -q "test_warehouse"; then + echo "test_warehouse verified" + else + echo "Warning: test_warehouse not found in warehouses: $WAREHOUSES" + docker compose -f docker/iceberg/docker-compose.yml logs lakekeeper-setup + fi + - name: Install dependencies + working-directory: python + run: make install-iceberg + - name: Run integration tests + run: make python-integ-test-iceberg + - name: Collect logs on failure + if: failure() + run: | + echo "=== Docker Compose Status ===" + docker compose -f docker/iceberg/docker-compose.yml ps + echo "" + echo "=== Iceberg REST Catalog Logs ===" + docker compose -f docker/iceberg/docker-compose.yml logs lakekeeper + echo "" + echo "=== PostgreSQL Logs ===" + docker compose -f docker/iceberg/docker-compose.yml logs postgres-lakekeeper + - name: Cleanup + if: always() + run: make docker-down-iceberg diff --git a/.github/workflows/python-integ-polaris.yml b/.github/workflows/python-integ-polaris.yml new file mode 100644 index 0000000..de80e89 --- /dev/null +++ b/.github/workflows/python-integ-polaris.yml @@ -0,0 +1,118 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Polaris Integration + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/polaris.py + - python/src/lance_namespace_impls/rest_client.py + - python/tests/test_polaris.py + - python/tests/test_polaris_integration.py + - docker/polaris/** + - .github/workflows/python-integ-polaris.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/polaris.py + - python/src/lance_namespace_impls/rest_client.py + - python/tests/test_polaris.py + - python/tests/test_polaris_integration.py + - docker/polaris/** + - .github/workflows/python-integ-polaris.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + integration-test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Start Polaris + run: make docker-up-polaris + - name: Wait for Polaris and catalog setup + run: | + echo "Waiting for Polaris to be ready..." + timeout 180 bash -c ' + until curl -sf http://localhost:8182/q/health > /dev/null 2>&1; do + echo "Waiting for Polaris health endpoint..." + sleep 5 + done + ' || { + echo "Timeout waiting for Polaris" + docker compose -f docker/polaris/docker-compose.yml logs + exit 1 + } + echo "Polaris is ready" + + # Wait for polaris-setup to complete (creates test_catalog) + echo "Waiting for polaris-setup to complete..." + timeout 60 bash -c ' + while docker ps -q -f name=polaris-setup 2>/dev/null | grep -q .; do + echo "Waiting for polaris-setup container to finish..." + sleep 2 + done + ' || echo "polaris-setup may have already completed" + + # Verify catalog exists + echo "Verifying test_catalog exists..." + TOKEN=$(curl -s -X POST http://localhost:8181/api/catalog/v1/oauth/tokens \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d 'grant_type=client_credentials&client_id=root&client_secret=s3cr3t&scope=PRINCIPAL_ROLE:ALL' | \ + python3 -c "import sys, json; print(json.load(sys.stdin).get('access_token', ''))") + + CATALOGS=$(curl -s http://localhost:8181/api/management/v1/catalogs \ + -H "Authorization: Bearer $TOKEN") + + if echo "$CATALOGS" | grep -q "test_catalog"; then + echo "test_catalog verified" + else + echo "Warning: test_catalog not found in catalogs: $CATALOGS" + docker compose -f docker/polaris/docker-compose.yml logs polaris-setup + fi + - name: Install dependencies + working-directory: python + run: make install-polaris + - name: Run integration tests + run: make python-integ-test-polaris + - name: Collect logs on failure + if: failure() + run: | + echo "=== Docker Compose Status ===" + docker compose -f docker/polaris/docker-compose.yml ps + echo "" + echo "=== Polaris Logs ===" + docker compose -f docker/polaris/docker-compose.yml logs polaris + echo "" + echo "=== PostgreSQL Logs ===" + docker compose -f docker/polaris/docker-compose.yml logs postgres-polaris + - name: Cleanup + if: always() + run: make docker-down-polaris diff --git a/.github/workflows/python-integ-unity.yml b/.github/workflows/python-integ-unity.yml new file mode 100644 index 0000000..6b59c96 --- /dev/null +++ b/.github/workflows/python-integ-unity.yml @@ -0,0 +1,100 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Unity Integration + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/unity.py + - python/src/lance_namespace_impls/rest_client.py + - python/tests/test_unity.py + - python/tests/test_unity_integration.py + - docker/unity/** + - .github/workflows/python-integ-unity.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/unity.py + - python/src/lance_namespace_impls/rest_client.py + - python/tests/test_unity.py + - python/tests/test_unity_integration.py + - docker/unity/** + - .github/workflows/python-integ-unity.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + integration-test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Start Unity Catalog + run: make docker-up-unity + - name: Wait for Unity Catalog to be ready + run: | + echo "Waiting for Unity Catalog to be ready..." + timeout 120 bash -c ' + until curl -sf http://localhost:8080/api/2.1/unity-catalog/catalogs > /dev/null 2>&1; do + echo "Waiting for Unity Catalog API..." + sleep 5 + done + ' || { + echo "Timeout waiting for Unity Catalog" + docker compose -f docker/unity/docker-compose.yml logs + exit 1 + } + echo "Unity Catalog is ready" + - name: Create test catalog + run: | + curl -s -X POST http://localhost:8080/api/2.1/unity-catalog/catalogs \ + -H 'Content-Type: application/json' \ + -d '{"name": "lance_test", "comment": "Test catalog for Lance integration tests"}' \ + || echo "Catalog may already exist" + curl -sf http://localhost:8080/api/2.1/unity-catalog/catalogs/lance_test || { + echo "Failed to create/verify test catalog" + exit 1 + } + echo "Test catalog created/verified" + - name: Install dependencies + working-directory: python + run: make install-unity + - name: Run integration tests + run: make python-integ-test-unity + - name: Collect logs on failure + if: failure() + run: | + echo "=== Docker Compose Status ===" + docker compose -f docker/unity/docker-compose.yml ps + echo "" + echo "=== Unity Catalog Logs ===" + docker compose -f docker/unity/docker-compose.yml logs unity-catalog + - name: Cleanup + if: always() + run: make docker-down-unity diff --git a/.github/workflows/python-polaris.yml b/.github/workflows/python-polaris.yml new file mode 100644 index 0000000..50130e3 --- /dev/null +++ b/.github/workflows/python-polaris.yml @@ -0,0 +1,64 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Polaris + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/polaris.py + - python/tests/test_polaris.py + - python/pyproject.toml + - .github/workflows/python-polaris.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/polaris.py + - python/tests/test_polaris.py + - python/pyproject.toml + - .github/workflows/python-polaris.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Install dependencies + working-directory: python + run: make install-polaris + - name: Lint + working-directory: python + run: make lint-polaris + - name: Test + working-directory: python + run: make test-polaris diff --git a/.github/workflows/python-unity.yml b/.github/workflows/python-unity.yml new file mode 100644 index 0000000..d57ea21 --- /dev/null +++ b/.github/workflows/python-unity.yml @@ -0,0 +1,64 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Python Unity + +on: + push: + branches: + - main + paths: + - python/src/lance_namespace_impls/unity.py + - python/tests/test_unity.py + - python/pyproject.toml + - .github/workflows/python-unity.yml + pull_request: + types: + - opened + - synchronize + - ready_for_review + - reopened + paths: + - python/src/lance_namespace_impls/unity.py + - python/tests/test_unity.py + - python/pyproject.toml + - .github/workflows/python-unity.yml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + test: + runs-on: ubuntu-24.04 + timeout-minutes: 30 + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Install dependencies + working-directory: python + run: make install-unity + - name: Lint + working-directory: python + run: make lint-unity + - name: Test + working-directory: python + run: make test-unity diff --git a/.gitignore b/.gitignore index f01b926..929f143 100644 --- a/.gitignore +++ b/.gitignore @@ -117,4 +117,7 @@ env.bak/ venv.bak/ # Docs -docs/site \ No newline at end of file +docs/site + +# Hive metastore +metastore_db/ \ No newline at end of file diff --git a/Makefile b/Makefile index 97a7572..8081a95 100644 --- a/Makefile +++ b/Makefile @@ -10,37 +10,53 @@ # See the License for the specific language governing permissions and # limitations under the License. +# ============================================================================ +# Python +# ============================================================================ + .PHONY: clean-python clean-python: - cd python; make clean + cd python && make clean .PHONY: build-python build-python: - cd python; make build + cd python && make build .PHONY: test-python test-python: - cd python; make test + cd python && make test + +# ============================================================================ +# Java +# ============================================================================ .PHONY: clean-java clean-java: - cd java; make clean + cd java && make clean .PHONY: build-java build-java: - cd java; make build + cd java && make build .PHONY: test-java test-java: - cd java; make test + cd java && make test + +# ============================================================================ +# Docs +# ============================================================================ .PHONY: build-docs build-docs: - cd docs; make build + cd docs && make build .PHONY: serve-docs serve-docs: - cd docs; make serve + cd docs && make serve + +# ============================================================================ +# All +# ============================================================================ .PHONY: clean clean: clean-python clean-java @@ -84,7 +100,7 @@ docker-logs: cd docker && make logs # Individual catalog docker targets -.PHONY: docker-up-hive2 docker-up-hive3 docker-up-polaris docker-up-unity +.PHONY: docker-up-hive2 docker-up-hive3 docker-up-polaris docker-up-unity docker-up-iceberg docker-up-hive2: cd docker && make up-hive2 docker-up-hive3: @@ -93,8 +109,10 @@ docker-up-polaris: cd docker && make up-polaris docker-up-unity: cd docker && make up-unity +docker-up-iceberg: + cd docker && make up-iceberg -.PHONY: docker-down-hive2 docker-down-hive3 docker-down-polaris docker-down-unity +.PHONY: docker-down-hive2 docker-down-hive3 docker-down-polaris docker-down-unity docker-down-iceberg docker-down-hive2: cd docker && make down-hive2 docker-down-hive3: @@ -103,30 +121,73 @@ docker-down-polaris: cd docker && make down-polaris docker-down-unity: cd docker && make down-unity +docker-down-iceberg: + cd docker && make down-iceberg # ============================================================================ -# Integration test targets +# Java Integration test targets # ============================================================================ -.PHONY: integration-test-java -integration-test-java: - cd java && ./mvnw test -Dtest="*IntegrationTest" -DfailIfNoTests=false +.PHONY: java-integ-test +java-integ-test: + cd java && make integ-test + +.PHONY: java-integ-test-hive2 +java-integ-test-hive2: + cd java && make integ-test-hive2 + +.PHONY: java-integ-test-hive3 +java-integ-test-hive3: + cd java && make integ-test-hive3 + +.PHONY: java-integ-test-polaris +java-integ-test-polaris: + cd java && make integ-test-polaris + +.PHONY: java-integ-test-iceberg +java-integ-test-iceberg: + cd java && make integ-test-iceberg + +.PHONY: java-integ-test-unity +java-integ-test-unity: + cd java && make integ-test-unity + +.PHONY: java-integ-test-glue +java-integ-test-glue: + cd java && make integ-test-glue + +# ============================================================================ +# Python Integration test targets +# ============================================================================ + +.PHONY: python-integ-test +python-integ-test: + cd python && make integ-test + +.PHONY: python-integ-test-hive +python-integ-test-hive: + cd python && make integ-test-hive + +.PHONY: python-integ-test-hive2 +python-integ-test-hive2: + cd python && make integ-test-hive2 -.PHONY: integration-test-hive2 -integration-test-hive2: - cd java && ./mvnw test -pl lance-namespace-hive2 -Dtest="*IntegrationTest" -DfailIfNoTests=false +.PHONY: python-integ-test-hive3 +python-integ-test-hive3: + cd python && make integ-test-hive3 -.PHONY: integration-test-hive3 -integration-test-hive3: - cd java && ./mvnw test -pl lance-namespace-hive3 -Dtest="*IntegrationTest" -DfailIfNoTests=false +.PHONY: python-integ-test-polaris +python-integ-test-polaris: + cd python && make integ-test-polaris -.PHONY: integration-test-polaris -integration-test-polaris: - cd java && ./mvnw test -pl lance-namespace-polaris -Dtest="*IntegrationTest" -DfailIfNoTests=false +.PHONY: python-integ-test-unity +python-integ-test-unity: + cd python && make integ-test-unity -.PHONY: integration-test-unity -integration-test-unity: - cd java && ./mvnw test -pl lance-namespace-unity -Dtest="*IntegrationTest" -DfailIfNoTests=false +.PHONY: python-integ-test-iceberg +python-integ-test-iceberg: + cd python && make integ-test-iceberg -.PHONY: integration-test -integration-test: integration-test-java +.PHONY: python-integ-test-glue +python-integ-test-glue: + cd python && make integ-test-glue diff --git a/docker/Makefile b/docker/Makefile index 449cb16..e3d4e1c 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -18,7 +18,7 @@ POSTGRES_DRIVER_VERSION := 42.7.3 POSTGRES_DRIVER_URL := https://jdbc.postgresql.org/download/postgresql-$(POSTGRES_DRIVER_VERSION).jar .PHONY: setup up down down-clean logs status health -.PHONY: up-hive2 down-hive2 up-hive3 down-hive3 up-polaris down-polaris up-unity down-unity +.PHONY: up-hive2 down-hive2 up-hive3 down-hive3 up-polaris down-polaris up-unity down-unity up-iceberg down-iceberg # Download PostgreSQL JDBC driver for Hive setup: @@ -60,6 +60,9 @@ health: @echo "" @echo "=== Unity Catalog (port 8080) ===" @curl -sf http://localhost:8080/api/2.1/unity-catalog/catalogs > /dev/null && echo "OK" || echo "NOT READY" + @echo "" + @echo "=== Iceberg REST Catalog (port 8282) ===" + @curl -sf http://localhost:8282/health > /dev/null && echo "OK" || echo "NOT READY" # ============================================================================ # Individual service management @@ -117,6 +120,19 @@ down-unity-clean: logs-unity: docker compose -f unity/docker-compose.yml logs -f +# Iceberg REST Catalog (Lakekeeper) +up-iceberg: + docker compose -f iceberg/docker-compose.yml up -d + +down-iceberg: + docker compose -f iceberg/docker-compose.yml down + +down-iceberg-clean: + docker compose -f iceberg/docker-compose.yml down -v + +logs-iceberg: + docker compose -f iceberg/docker-compose.yml logs -f + # ============================================================================ # Test utilities # ============================================================================ diff --git a/docker/hive2/docker-compose.yml b/docker/hive2/docker-compose.yml index 909d8dc..8877ba6 100644 --- a/docker/hive2/docker-compose.yml +++ b/docker/hive2/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: postgres-hive2: image: postgres:15 @@ -21,23 +19,27 @@ services: - hive2-network hive2-metastore: - image: apache/hive:2.3.9 + image: bde2020/hive:2.3.2-postgresql-metastore container_name: hive2-metastore depends_on: postgres-hive2: condition: service_healthy environment: - SERVICE_NAME: metastore - DB_DRIVER: postgres - SERVICE_OPTS: >- - -Djavax.jdo.option.ConnectionDriverName=org.postgresql.Driver - -Djavax.jdo.option.ConnectionURL=jdbc:postgresql://postgres-hive2:5432/metastore_db - -Djavax.jdo.option.ConnectionUserName=hive - -Djavax.jdo.option.ConnectionPassword=hive + HIVE_SITE_CONF_javax_jdo_option_ConnectionURL: jdbc:postgresql://postgres-hive2:5432/metastore_db + HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName: org.postgresql.Driver + HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName: hive + HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword: hive + HIVE_SITE_CONF_datanucleus_autoCreateSchema: "true" + HIVE_SITE_CONF_datanucleus_schema_autoCreateAll: "true" + HIVE_SITE_CONF_hive_metastore_schema_verification: "false" + HIVE_SITE_CONF_hive_metastore_uris: thrift://hive2-metastore:9083 + HIVE_SITE_CONF_hive_metastore_warehouse_dir: file:///user/hive/warehouse + CORE_CONF_fs_defaultFS: file:/// + SERVICE_PRECONDITION: "postgres-hive2:5432" ports: - "9083:9083" volumes: - - hive2-warehouse:/opt/hive/data/warehouse + - hive2-warehouse:/user/hive/warehouse - ./postgresql-42.7.3.jar:/opt/hive/lib/postgres.jar networks: - hive2-network @@ -46,7 +48,8 @@ services: interval: 10s timeout: 10s retries: 10 - start_period: 30s + start_period: 60s + command: /opt/hive/bin/hive --service metastore volumes: hive2-postgres-data: diff --git a/docker/hive2/postgresql-42.7.3.jar b/docker/hive2/postgresql-42.7.3.jar new file mode 100644 index 0000000..fa42b1d Binary files /dev/null and b/docker/hive2/postgresql-42.7.3.jar differ diff --git a/docker/hive3/docker-compose.yml b/docker/hive3/docker-compose.yml index 7e11999..75f4fe1 100644 --- a/docker/hive3/docker-compose.yml +++ b/docker/hive3/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: postgres-hive3: image: postgres:15 @@ -21,7 +19,7 @@ services: - hive3-network hive3-metastore: - image: apache/hive:4.0.0 + image: apache/hive:3.1.3 container_name: hive3-metastore depends_on: postgres-hive3: @@ -34,9 +32,11 @@ services: -Djavax.jdo.option.ConnectionURL=jdbc:postgresql://postgres-hive3:5432/metastore_db -Djavax.jdo.option.ConnectionUserName=hive -Djavax.jdo.option.ConnectionPassword=hive + -Dhive.metastore.warehouse.dir=file:///opt/hive/data/warehouse + -Dfs.defaultFS=file:/// ports: - "9084:9083" - - "9001:9001" + - "9002:9001" volumes: - hive3-warehouse:/opt/hive/data/warehouse - ./postgresql-42.7.3.jar:/opt/hive/lib/postgres.jar diff --git a/docker/hive3/postgresql-42.7.3.jar b/docker/hive3/postgresql-42.7.3.jar new file mode 100644 index 0000000..fa42b1d Binary files /dev/null and b/docker/hive3/postgresql-42.7.3.jar differ diff --git a/docker/iceberg/docker-compose.yml b/docker/iceberg/docker-compose.yml new file mode 100644 index 0000000..36dd394 --- /dev/null +++ b/docker/iceberg/docker-compose.yml @@ -0,0 +1,162 @@ +services: + postgres-lakekeeper: + image: postgres:17 + container_name: postgres-lakekeeper + environment: + POSTGRES_USER: lakekeeper + POSTGRES_PASSWORD: lakekeeper + POSTGRES_DB: lakekeeper + ports: + - "5435:5432" + volumes: + - lakekeeper-postgres-data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U lakekeeper -d lakekeeper"] + interval: 2s + timeout: 10s + retries: 5 + networks: + - lakekeeper-network + + minio: + image: minio/minio:latest + container_name: minio-lakekeeper + environment: + MINIO_ROOT_USER: minio-root-user + MINIO_ROOT_PASSWORD: minio-root-password + MINIO_DOMAIN: minio + command: server /data --console-address ":9001" + ports: + - "19000:9000" + - "19001:9001" + volumes: + - lakekeeper-minio-data:/data + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 2s + timeout: 10s + retries: 5 + networks: + - lakekeeper-network + + minio-setup: + image: minio/mc:latest + container_name: minio-setup + depends_on: + minio: + condition: service_healthy + entrypoint: > + /bin/sh -c " + mc alias set myminio http://minio:9000 minio-root-user minio-root-password && + mc mb myminio/warehouse --ignore-existing && + echo 'MinIO bucket created' + " + networks: + - lakekeeper-network + + lakekeeper-migrate: + image: quay.io/lakekeeper/catalog:latest-main + container_name: lakekeeper-migrate + depends_on: + postgres-lakekeeper: + condition: service_healthy + environment: + - LAKEKEEPER__PG_ENCRYPTION_KEY=test-encryption-key-not-secure + - LAKEKEEPER__PG_DATABASE_URL_READ=postgresql://lakekeeper:lakekeeper@postgres-lakekeeper:5432/lakekeeper + - LAKEKEEPER__PG_DATABASE_URL_WRITE=postgresql://lakekeeper:lakekeeper@postgres-lakekeeper:5432/lakekeeper + restart: "no" + command: ["migrate"] + networks: + - lakekeeper-network + + lakekeeper: + image: quay.io/lakekeeper/catalog:latest-main + container_name: lakekeeper + depends_on: + lakekeeper-migrate: + condition: service_completed_successfully + minio-setup: + condition: service_completed_successfully + environment: + - LAKEKEEPER__PG_ENCRYPTION_KEY=test-encryption-key-not-secure + - LAKEKEEPER__PG_DATABASE_URL_READ=postgresql://lakekeeper:lakekeeper@postgres-lakekeeper:5432/lakekeeper + - LAKEKEEPER__PG_DATABASE_URL_WRITE=postgresql://lakekeeper:lakekeeper@postgres-lakekeeper:5432/lakekeeper + - LAKEKEEPER__AUTHZ_BACKEND=allowall + - RUST_LOG=info + command: ["serve"] + healthcheck: + test: ["CMD", "/home/nonroot/lakekeeper", "healthcheck"] + interval: 2s + timeout: 10s + retries: 5 + start_period: 5s + ports: + - "8282:8181" + networks: + - lakekeeper-network + + lakekeeper-bootstrap: + image: curlimages/curl + container_name: lakekeeper-bootstrap + depends_on: + lakekeeper: + condition: service_healthy + restart: "no" + command: + - -w + - "%{http_code}" + - "-X" + - "POST" + - "-v" + - "http://lakekeeper:8181/management/v1/bootstrap" + - "-H" + - "Content-Type: application/json" + - "--data" + - '{"accept-terms-of-use": true}' + - "-o" + - "/dev/null" + networks: + - lakekeeper-network + + lakekeeper-setup: + image: curlimages/curl + container_name: lakekeeper-setup + depends_on: + lakekeeper-bootstrap: + condition: service_completed_successfully + restart: "no" + entrypoint: ["/bin/sh", "-c"] + command: + - | + echo "Creating test_warehouse..." + curl -sf -X POST "http://lakekeeper:8181/management/v1/warehouse" \ + -H "Content-Type: application/json" \ + -d '{ + "warehouse-name": "test_warehouse", + "project-id": "00000000-0000-0000-0000-000000000000", + "storage-profile": { + "type": "s3", + "bucket": "warehouse", + "endpoint": "http://minio:9000", + "region": "us-east-1", + "path-style-access": true, + "flavor": "minio", + "sts-enabled": false + }, + "storage-credential": { + "type": "s3", + "credential-type": "access-key", + "aws-access-key-id": "minio-root-user", + "aws-secret-access-key": "minio-root-password" + } + }' && echo "Warehouse created successfully" || echo "Failed to create warehouse" + networks: + - lakekeeper-network + +volumes: + lakekeeper-postgres-data: + lakekeeper-minio-data: + +networks: + lakekeeper-network: + driver: bridge diff --git a/docker/polaris/docker-compose.yml b/docker/polaris/docker-compose.yml index 9f1bec0..796c2c6 100644 --- a/docker/polaris/docker-compose.yml +++ b/docker/polaris/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: postgres-polaris: image: postgres:16 @@ -20,24 +18,37 @@ services: networks: - polaris-network - polaris: - image: apache/polaris:latest - container_name: polaris + polaris-bootstrap: + image: apache/polaris-admin-tool:1.2.0-incubating + container_name: polaris-bootstrap depends_on: postgres-polaris: condition: service_healthy environment: - # Bootstrap credentials: realm,client_id,client_secret + POLARIS_PERSISTENCE_TYPE: relational-jdbc + QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://postgres-polaris:5432/polaris + QUARKUS_DATASOURCE_USERNAME: polaris + QUARKUS_DATASOURCE_PASSWORD: polaris + command: ["bootstrap", "-r", "POLARIS", "-c", "POLARIS,root,s3cr3t"] + networks: + - polaris-network + + polaris: + image: apache/polaris:1.2.0-incubating + container_name: polaris + depends_on: + polaris-bootstrap: + condition: service_completed_successfully + environment: POLARIS_BOOTSTRAP_CREDENTIALS: "POLARIS,root,s3cr3t" - # Persistence configuration POLARIS_PERSISTENCE_TYPE: relational-jdbc QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://postgres-polaris:5432/polaris QUARKUS_DATASOURCE_USERNAME: polaris QUARKUS_DATASOURCE_PASSWORD: polaris - # Allow file:// storage for local testing - POLARIS_FEATURES_DEFAULTS_OVERRIDE_ALLOW_INSECURE_STORAGE_TYPES: "true" - POLARIS_FEATURES_DEFAULTS_OVERRIDE_SUPPORTED_CATALOG_STORAGE_TYPES: '["FILE","S3","GCS","AZURE"]' - # Disable OpenTelemetry for testing + polaris.features."ALLOW_INSECURE_STORAGE_TYPES": "true" + polaris.features."SUPPORTED_CATALOG_STORAGE_TYPES": '["FILE","S3","GCS","AZURE"]' + polaris.features."GENERIC_TABLE_ENABLED": "true" + polaris.readiness.ignore-severe-issues: "true" QUARKUS_OTEL_SDK_DISABLED: "true" ports: - "8181:8181" @@ -53,6 +64,18 @@ services: retries: 10 start_period: 30s + polaris-setup: + image: alpine:3.20 + container_name: polaris-setup + depends_on: + polaris: + condition: service_healthy + volumes: + - ./init-catalog.sh:/init-catalog.sh:ro + command: ["/bin/sh", "/init-catalog.sh"] + networks: + - polaris-network + volumes: polaris-postgres-data: polaris-warehouse: diff --git a/docker/polaris/init-catalog.sh b/docker/polaris/init-catalog.sh new file mode 100644 index 0000000..f1fe22c --- /dev/null +++ b/docker/polaris/init-catalog.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# Wait for Polaris to be healthy +echo "Waiting for Polaris to be ready..." +until wget -q -O /dev/null http://polaris:8182/q/health 2>/dev/null; do + echo "Polaris not ready, waiting..." + sleep 2 +done +echo "Polaris is healthy!" + +# Get OAuth token +echo "Getting OAuth token..." +TOKEN_RESPONSE=$(wget -q -O - --post-data='grant_type=client_credentials&client_id=root&client_secret=s3cr3t&scope=PRINCIPAL_ROLE:ALL' \ + --header='Content-Type: application/x-www-form-urlencoded' \ + http://polaris:8181/api/catalog/v1/oauth/tokens) + +TOKEN=$(echo "$TOKEN_RESPONSE" | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p') + +if [ -z "$TOKEN" ]; then + echo "Failed to get OAuth token" + echo "Response: $TOKEN_RESPONSE" + exit 1 +fi +echo "Token obtained successfully" + +# Check if catalog already exists +echo "Checking for existing catalog..." +CATALOGS=$(wget -q -O - --header="Authorization: Bearer $TOKEN" \ + http://polaris:8181/api/management/v1/catalogs) + +if echo "$CATALOGS" | grep -q '"name":"test_catalog"'; then + echo "Catalog 'test_catalog' already exists" + exit 0 +fi + +# Create test catalog +echo "Creating test catalog..." +RESULT=$(wget -q -O - --post-data='{ + "catalog": { + "name": "test_catalog", + "type": "INTERNAL", + "properties": { + "default-base-location": "file:///data/warehouse/test_catalog" + }, + "storageConfigInfo": { + "storageType": "FILE", + "allowedLocations": ["file:///data/warehouse"] + } + } +}' \ + --header="Authorization: Bearer $TOKEN" \ + --header='Content-Type: application/json' \ + http://polaris:8181/api/management/v1/catalogs 2>&1) + +echo "Result: $RESULT" + +# Verify catalog was created +VERIFY=$(wget -q -O - --header="Authorization: Bearer $TOKEN" \ + http://polaris:8181/api/management/v1/catalogs) + +if echo "$VERIFY" | grep -q '"name":"test_catalog"'; then + echo "Catalog 'test_catalog' created successfully!" + exit 0 +else + echo "Failed to create catalog" + echo "Catalogs: $VERIFY" + exit 1 +fi diff --git a/docs/src/biglake.md b/docs/src/biglake.md index 093ea88..62b3eed 100644 --- a/docs/src/biglake.md +++ b/docs/src/biglake.md @@ -1,16 +1,9 @@ -# Lance BigLake Namespace +# Google BigLake Metastore -**Google BigLake Metastore** is a fully managed, unified metastore service for data lakes on Google Cloud. +**[Google BigLake Metastore](https://docs.cloud.google.com/biglake/docs/about-blms)** +is a fully managed, unified metastore service for data lakes on Google Cloud. To use Google BigLake Metastore with Lance, you can leverage BigLake's [Iceberg REST Catalog](https://docs.cloud.google.com/biglake/docs/blms-rest-catalog), which exposes an Apache Iceberg REST Catalog-compatible interface. -## Configuration - -Configure your Lance Iceberg namespace to connect to the BigLake Metastore endpoint: - -- **endpoint**: `https://biglake.googleapis.com/iceberg/v1/restcatalog` -- **warehouse**: Your BigLake catalog name in the format `projects/{project}/locations/{location}/catalogs/{catalog}` -- **auth_token**: A valid Google Cloud OAuth2 access token - -All the features and configurations of the [Lance Iceberg REST Catalog Namespace](iceberg.md) apply when using BigLake Metastore. +See [Lance Namespace integration with Iceberg REST Catalog](iceberg.md) for more details. diff --git a/docs/src/dataproc.md b/docs/src/dataproc.md index c7abde4..85860ce 100644 --- a/docs/src/dataproc.md +++ b/docs/src/dataproc.md @@ -1,10 +1,9 @@ -# Lance Dataproc Namespace +# Google Dataproc Metastore -**Google Dataproc Metastore** is a fully managed, +**[Google Dataproc Metastore](https://docs.cloud.google.com/dataproc-metastore/docs/overview)** is a fully managed, highly available, autohealing, serverless metastore that runs on Google Cloud. To use Google Dataproc Metastore with Lance, you can leverage Dataproc's [Hive metastore](https://cloud.google.com/dataproc-metastore/docs/hive-metastore), -which exposes a Hive MetaStore-compatible interface. +which exposes a Apache Hive MetaStore-compatible interface. -Simply configure your Lance Hive namespace to connect to Dataproc's Hive MetaStore endpoint. -All the features and configurations of the Lance Hive Namespace ([V2](hive2.md) or [V3](hive3.md)) apply when using Dataproc Metastore. +See Lance Namespace integration with Hive metastore ([V2](hive2.md) or [V3](hive3.md)) for more details. diff --git a/docs/src/glue.md b/docs/src/glue.md index a6f59cf..f7c0c92 100644 --- a/docs/src/glue.md +++ b/docs/src/glue.md @@ -1,10 +1,13 @@ -# Lance Glue Namespace Implementation Spec +# AWS Glue Data Catalog Lance Namespace Implementation Spec -This document describes how the AWS Glue Data Catalog implements the Lance Namespace client spec. +This document describes how the AWS Glue Data Catalog +implements the Lance Namespace client spec. ## Background -AWS Glue Data Catalog is a fully managed metadata repository that stores structural and operational metadata for data assets. It is compatible with the Apache Hive Metastore API and can be used as a central metadata repository for data lakes. For details on AWS Glue, see the [AWS Glue Data Catalog Documentation](https://docs.aws.amazon.com/glue/). +AWS Glue Data Catalog is a fully managed metadata repository that stores structural and operational metadata for data assets. +It is compatible with the Apache Hive Metastore API and can be used as a central metadata repository for data lakes. +For details on AWS Glue, see the [AWS Glue Data Catalog Documentation](https://docs.aws.amazon.com/glue/latest/dg/manage-catalog.html). ## Namespace Implementation Configuration Properties @@ -22,9 +25,15 @@ The **secret_access_key** property is optional and specifies the AWS secret acce The **session_token** property is optional and specifies the AWS session token for temporary credentials. -The **root** property is optional and specifies the storage root location of the lakehouse on Glue catalog. Default value is the current working directory. +The **assume_role_arn** property is optional and specifies the ARN of the IAM role to assume for Glue operations. -The **storage.*** prefix properties are optional and specify additional storage configurations to access tables (e.g., `storage.region=us-west-2`). +The **assume_role_region** property is optional and specifies the AWS region for the STS client when assuming a role. + +The **assume_role_external_id** property is optional and specifies the external ID for cross-account role assumption. For more details, see [AWS external ID documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html). + +The **assume_role_session_name** property is optional and specifies the session name for the assumed role session. For more details, see [AWS role session name documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_iam-condition-keys.html#ck_rolesessionname). + +The **assume_role_timeout_sec** property is optional and specifies the duration in seconds for which the assumed role session is valid (default: 3600). At the end of the timeout, a new set of role session credentials will be fetched through the STS client. ### Authentication @@ -33,24 +42,34 @@ The Glue namespace supports multiple authentication methods: 1. **Default AWS credential provider chain**: When no explicit credentials are provided, the client uses the default AWS credential provider chain 2. **Static credentials**: Set `access_key_id` and `secret_access_key` for basic AWS credentials 3. **Session credentials**: Additionally provide `session_token` for temporary AWS credentials +4. **Assume role credentials**: Set `assume_role_arn` to assume an IAM role. Optionally configure `assume_role_region`, `assume_role_external_id`, `assume_role_session_name`, and `assume_role_timeout_sec` to customize the role assumption behavior ## Object Mapping ### Namespace -The **root namespace** is represented by the AWS Glue Data Catalog itself. +AWS Glue Data Catalog supports a recursive catalog structure through the [GetCatalog](https://docs.aws.amazon.com/glue/latest/webapi/API_GetCatalog.html) and [GetCatalogs](https://docs.aws.amazon.com/glue/latest/webapi/API_GetCatalogs.html) APIs. +This allows for multi-level namespace hierarchies. + +The **root namespace** is represented by the default AWS Glue Data Catalog, which has a catalog ID of `None` or equal to the caller's AWS account ID. + +A **child catalog** within the root catalog forms a child namespace. The [GetCatalogs](https://docs.aws.amazon.com/glue/latest/webapi/API_GetCatalogs.html) API supports `ParentCatalogId` parameter to traverse the catalog hierarchy. -A **child namespace** is a database in Glue, forming a 2-level namespace hierarchy. +A **database** within a catalog represents the leaf namespace level. Databases are created within a specific catalog using the `CatalogId` parameter in the [CreateDatabase](https://docs.aws.amazon.com/glue/latest/webapi/API_CreateDatabase.html) API. -The **namespace identifier** is the database name. +The **namespace identifier** follows a hierarchical pattern: +- For catalogs: the catalog name (e.g., `my_catalog`) +- For databases: the catalog chain joined with database name using the `$` delimiter (e.g., `catalog$database` or `parent_catalog$child_catalog$database`) -**Namespace properties** are stored in the Glue Database object's parameters map. +**Namespace properties** are stored in: +- Catalog's `Parameters` map for catalog-level namespaces +- Database's `Parameters` map for database-level namespaces ### Table A **table** is represented as a [Table](https://docs.aws.amazon.com/glue/latest/webapi/API_Table.html) object in AWS Glue with `TableType` set to `EXTERNAL_TABLE`. -The **table identifier** is constructed by joining database and table name with the `$` delimiter (e.g., `database$table`). +The **table identifier** is constructed by joining the full namespace path and table name with the `$` delimiter (e.g., `catalog$database$table`). The **table location** is stored in the [`StorageDescriptor.Location`](https://docs.aws.amazon.com/glue/latest/webapi/API_StorageDescriptor.html#Glue-Type-StorageDescriptor-Location) field, pointing to the root location of the Lance table. @@ -60,6 +79,189 @@ The **table location** is stored in the [`StorageDescriptor.Location`](https://d A table in AWS Glue is identified as a Lance table when it meets the following criteria: the `TableType` is `EXTERNAL_TABLE`, and the `Parameters` map contains a key `table_type` with value `lance` (case insensitive). The `StorageDescriptor.Location` must point to a valid Lance table root directory. -## Optimistic Concurrency Control +## Basic Operations + +### CreateNamespace + +Creates a new catalog or database in AWS Glue. + +The implementation: + +1. Parse the namespace identifier to determine if it is a catalog or database level +2. For catalog-level namespace: + - Construct a [CreateCatalog](https://docs.aws.amazon.com/glue/latest/webapi/API_CreateCatalog.html) request with name and properties + - Set the `Parameters` map with the provided namespace properties +3. For database-level namespace: + - Verify the parent catalog exists + - Construct a [CreateDatabase](https://docs.aws.amazon.com/glue/latest/webapi/API_CreateDatabase.html) request with database name and `CatalogId` + - Set the `Parameters` map with the provided namespace properties +4. Handle creation mode (CREATE, EXIST_OK, OVERWRITE) appropriately + +**Error Handling:** + +If the namespace already exists and mode is CREATE, return error code `2` (NamespaceAlreadyExists). + +If the parent catalog does not exist, return error code `1` (NamespaceNotFound). + +If access is denied, return error code `16` (Forbidden). + +If the Glue service is unavailable, return error code `17` (ServiceUnavailable). + +### ListNamespaces + +Lists catalogs or databases in AWS Glue. + +The implementation: + +1. Parse the parent namespace identifier +2. For root namespace (no parent): + - Use [GetCatalogs](https://docs.aws.amazon.com/glue/latest/webapi/API_GetCatalogs.html) with `IncludeRoot=true` to list all catalogs + - Use `ParentCatalogId` set to account ID and `Recursive=false` for direct children +3. For catalog-level namespace: + - Use [GetDatabases](https://docs.aws.amazon.com/glue/latest/webapi/API_GetDatabases.html) with the catalog's `CatalogId` + - Additionally use [GetCatalogs](https://docs.aws.amazon.com/glue/latest/webapi/API_GetCatalogs.html) with `ParentCatalogId` to list child catalogs +4. Sort the results and apply pagination using `NextToken` + +**Error Handling:** + +If the parent namespace does not exist, return error code `1` (NamespaceNotFound). + +If access is denied, return error code `16` (Forbidden). + +If the Glue service is unavailable, return error code `17` (ServiceUnavailable). + +### DescribeNamespace + +Retrieves properties and metadata for a catalog or database. + +The implementation: + +1. Parse the namespace identifier to determine the level +2. For catalog-level namespace: + - Use [GetCatalog](https://docs.aws.amazon.com/glue/latest/webapi/API_GetCatalog.html) with the catalog ID + - Extract properties from the `Parameters` map +3. For database-level namespace: + - Use [GetDatabase](https://docs.aws.amazon.com/glue/latest/webapi/API_GetDatabase.html) with the database name and `CatalogId` + - Extract properties from the Database's `Parameters` map + +**Error Handling:** + +If the namespace does not exist, return error code `1` (NamespaceNotFound). + +If access is denied, return error code `16` (Forbidden). + +If the Glue service is unavailable, return error code `17` (ServiceUnavailable). + +### DropNamespace + +Removes a catalog or database from AWS Glue. Only RESTRICT mode is supported; CASCADE mode is not implemented. + +The implementation: + +1. Parse the namespace identifier to determine the level +2. Check if the namespace exists (handle SKIP mode if not) +3. For catalog-level namespace: + - Verify the catalog has no child catalogs or databases + - Use [DeleteCatalog](https://docs.aws.amazon.com/glue/latest/webapi/API_DeleteCatalog.html) with the catalog ID +4. For database-level namespace: + - Verify the database is empty (no tables) + - Use [DeleteDatabase](https://docs.aws.amazon.com/glue/latest/webapi/API_DeleteDatabase.html) with the database name and `CatalogId` + +**Error Handling:** + +If the namespace does not exist and mode is FAIL, return error code `1` (NamespaceNotFound). + +If the namespace is not empty, return error code `3` (NamespaceNotEmpty). + +If access is denied, return error code `16` (Forbidden). + +If the Glue service is unavailable, return error code `17` (ServiceUnavailable). + +### DeclareTable + +Declares a new Lance table in AWS Glue without creating the underlying data. + +The implementation: + +1. Parse the table identifier to extract catalog, database, and table name +2. Verify the parent namespace (database) exists using [GetDatabase](https://docs.aws.amazon.com/glue/latest/webapi/API_GetDatabase.html) +3. Construct a [CreateTable](https://docs.aws.amazon.com/glue/latest/webapi/API_CreateTable.html) request with: + - `CatalogId`: the catalog ID from the namespace + - `DatabaseName`: the database name + - `TableInput.Name`: the table name + - `TableInput.TableType`: `EXTERNAL_TABLE` + - `TableInput.Parameters`: include `table_type=lance` and other properties + - `TableInput.StorageDescriptor.Location`: the specified table location +4. POST the CreateTable request to Glue + +**Error Handling:** + +If the parent namespace does not exist, return error code `1` (NamespaceNotFound). + +If the table already exists, return error code `5` (TableAlreadyExists). + +If access is denied, return error code `16` (Forbidden). + +If the Glue service is unavailable, return error code `17` (ServiceUnavailable). + +### ListTables + +Lists all Lance tables in a database. + +The implementation: + +1. Parse the namespace identifier to extract catalog and database +2. Verify the namespace exists using [GetDatabase](https://docs.aws.amazon.com/glue/latest/webapi/API_GetDatabase.html) +3. Use [GetTables](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTables.html) with `CatalogId` and `DatabaseName` +4. Filter tables where `Parameters.table_type=lance` (case insensitive) +5. Sort the results and apply pagination using `NextToken` + +**Error Handling:** + +If the namespace does not exist, return error code `1` (NamespaceNotFound). + +If access is denied, return error code `16` (Forbidden). + +If the Glue service is unavailable, return error code `17` (ServiceUnavailable). + +### DescribeTable + +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. + +The implementation: + +1. Parse the table identifier to extract catalog, database, and table name +2. Use [GetTable](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTable.html) with `CatalogId`, `DatabaseName`, and `Name` +3. Validate that the table is a Lance table (check `Parameters.table_type=lance`) +4. Return the table location from `StorageDescriptor.Location` and storage_options from `Parameters` + +**Error Handling:** + +If the table does not exist, return error code `4` (TableNotFound). + +If the table is not a Lance table, return error code `13` (InvalidInput). + +If access is denied, return error code `16` (Forbidden). + +If the Glue service is unavailable, return error code `17` (ServiceUnavailable). + +### DeregisterTable + +Removes a Lance table registration from AWS Glue without deleting the underlying data. + +The implementation: + +1. Parse the table identifier to extract catalog, database, and table name +2. Use [GetTable](https://docs.aws.amazon.com/glue/latest/webapi/API_GetTable.html) to retrieve and validate the table is a Lance table +3. Use [DeleteTable](https://docs.aws.amazon.com/glue/latest/webapi/API_DeleteTable.html) with `CatalogId`, `DatabaseName`, and `Name` +4. The underlying Lance table data at `StorageDescriptor.Location` is not deleted + +**Error Handling:** + +If the table does not exist, return error code `4` (TableNotFound). + +If the table is not a Lance table, return error code `13` (InvalidInput). + +If access is denied, return error code `16` (Forbidden). -Updates to Lance tables in AWS Glue should use the `VersionId` for conditional updates through the [UpdateTable](https://docs.aws.amazon.com/glue/latest/webapi/API_UpdateTable.html) API. If the `VersionId` does not match the expected version, the update fails to prevent concurrent modification conflicts. +If the Glue service is unavailable, return error code `17` (ServiceUnavailable). diff --git a/docs/src/hive2.md b/docs/src/hive2.md index bb4e325..cced4c5 100644 --- a/docs/src/hive2.md +++ b/docs/src/hive2.md @@ -87,19 +87,22 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DropNamespace -Removes a database from HMS. +Removes a database from HMS. Only RESTRICT mode is supported; CASCADE mode is not implemented. The implementation: 1. Parse the namespace identifier (database name) 2. Check if the namespace exists (handle SKIP mode if not) -3. For RESTRICT behavior: verify the namespace is empty (no tables) -4. For CASCADE behavior: drop all tables first -5. Drop the database from HMS +3. Verify the namespace is empty (no tables) +4. Drop the database from HMS **Error Handling:** -If the namespace does not exist and mode is FAIL, return error code `1` (NamespaceNotFound). If the namespace is not empty and behavior is RESTRICT, return error code `3` (NamespaceNotEmpty). If the HMS connection fails, return error code `17` (ServiceUnavailable). +If the namespace does not exist and mode is FAIL, return error code `1` (NamespaceNotFound). + +If the namespace is not empty, return error code `3` (NamespaceNotEmpty). + +If the HMS connection fails, return error code `17` (ServiceUnavailable). ### DeclareTable @@ -136,18 +139,22 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DescribeTable -Retrieves metadata for a Lance table. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. The implementation: 1. Parse the table identifier 2. Retrieve the Table object from HMS 3. Validate that it is a Lance table (check `table_type=lance`) -4. Return the table location and properties +4. Return the table location from `storageDescriptor.location` and storage_options from `parameters` **Error Handling:** -If the table does not exist, return error code `4` (TableNotFound). If the table is not a Lance table, return error code `13` (InvalidInput). If the HMS connection fails, return error code `17` (ServiceUnavailable). +If the table does not exist, return error code `4` (TableNotFound). + +If the table is not a Lance table, return error code `13` (InvalidInput). + +If the HMS connection fails, return error code `17` (ServiceUnavailable). ### DeregisterTable diff --git a/docs/src/hive3.md b/docs/src/hive3.md index 63a9b79..99f5746 100644 --- a/docs/src/hive3.md +++ b/docs/src/hive3.md @@ -90,19 +90,22 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DropNamespace -Removes a namespace from HMS. +Removes a namespace from HMS. Only RESTRICT mode is supported; CASCADE mode is not implemented. The implementation: 1. Parse the namespace identifier 2. Check if the namespace exists (handle SKIP mode if not) -3. For RESTRICT behavior: verify the namespace is empty (no child namespaces or tables) -4. For CASCADE behavior: recursively drop all child objects first -5. Drop the catalog or database from HMS +3. Verify the namespace is empty (no child namespaces or tables) +4. Drop the catalog or database from HMS **Error Handling:** -If the namespace does not exist and mode is FAIL, return error code `1` (NamespaceNotFound). If the namespace is not empty and behavior is RESTRICT, return error code `3` (NamespaceNotEmpty). If the HMS connection fails, return error code `17` (ServiceUnavailable). +If the namespace does not exist and mode is FAIL, return error code `1` (NamespaceNotFound). + +If the namespace is not empty, return error code `3` (NamespaceNotEmpty). + +If the HMS connection fails, return error code `17` (ServiceUnavailable). ### DeclareTable @@ -139,18 +142,22 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DescribeTable -Retrieves metadata for a Lance table. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. The implementation: 1. Parse the table identifier 2. Retrieve the Table object from HMS 3. Validate that it is a Lance table (check `table_type=lance`) -4. Return the table location and properties +4. Return the table location from `storageDescriptor.location` and storage_options from `parameters` **Error Handling:** -If the table does not exist, return error code `4` (TableNotFound). If the table is not a Lance table, return error code `13` (InvalidInput). If the HMS connection fails, return error code `17` (ServiceUnavailable). +If the table does not exist, return error code `4` (TableNotFound). + +If the table is not a Lance table, return error code `13` (InvalidInput). + +If the HMS connection fails, return error code `17` (ServiceUnavailable). ### DeregisterTable diff --git a/docs/src/iceberg.md b/docs/src/iceberg.md index fa15609..9550ee5 100644 --- a/docs/src/iceberg.md +++ b/docs/src/iceberg.md @@ -10,23 +10,16 @@ Apache Iceberg REST Catalog is a standardized REST API for interacting with Iceb The Lance Iceberg REST Catalog namespace implementation accepts the following configuration properties: -The **endpoint** property is required and specifies the Iceberg REST Catalog server endpoint URL (e.g., `http://localhost:8181`). Must start with `http://` or `https://`. - -The **warehouse** property is optional and specifies the warehouse identifier to use. Some Iceberg REST implementations require this. - -The **prefix** property is optional and specifies the API path prefix (e.g., `v1`). Default value is empty. - -The **auth_token** property is optional and specifies the bearer token for authentication. - -The **credential** property is optional and specifies the OAuth2 client credential in the format `client_id:client_secret` for client credentials authentication flow. - -The **connect_timeout** property is optional and specifies the connection timeout in milliseconds. Default value is `10000` (10 seconds). - -The **read_timeout** property is optional and specifies the read timeout in milliseconds. Default value is `30000` (30 seconds). - -The **max_retries** property is optional and specifies the maximum number of retries for failed requests. Default value is `3`. - -The **root** property is optional and specifies the default storage root location for tables. Default value is the current working directory. +| Property | Required | Default | Description | +|----------|----------|---------|-------------| +| `endpoint` | Yes | - | Iceberg REST Catalog server endpoint URL (e.g., `http://localhost:8181`). Must start with `http://` or `https://`. | +| `warehouse` | No | - | Warehouse identifier. Some Iceberg REST implementations require this. The warehouse name is resolved to an API prefix via the `/v1/config` endpoint. | +| `auth_token` | No | - | Bearer token for authentication. | +| `credential` | No | - | OAuth2 client credential in `client_id:client_secret` format for client credentials authentication flow. | +| `connect_timeout` | No | `10000` | Connection timeout in milliseconds. | +| `read_timeout` | No | `30000` | Read timeout in milliseconds. | +| `max_retries` | No | `3` | Maximum number of retries for failed requests. | +| `root` | No | current working directory | Default storage root location for tables. | ## Object Mapping @@ -101,7 +94,7 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DropNamespace -Removes a namespace from the Iceberg catalog. +Removes a namespace from the Iceberg catalog. Only RESTRICT mode is supported; CASCADE mode is not implemented. The implementation: @@ -110,7 +103,11 @@ The implementation: **Error Handling:** -If the namespace does not exist, return error code `1` (NamespaceNotFound). If the namespace is not empty, return error code `3` (NamespaceNotEmpty). If the server returns an error, return error code `18` (Internal). +If the namespace does not exist, the operation succeeds (idempotent behavior). + +If the namespace is not empty, return error code `3` (NamespaceNotEmpty). + +If the server returns an error, return error code `18` (Internal). ### DeclareTable @@ -121,11 +118,11 @@ The implementation: 1. Parse the table identifier to extract namespace and table name 2. Construct a CreateTableRequest with: - `name`: the table name - - `location`: the specified or default location + - `location`: the specified or default location (defaults to `{root}/{prefix}/{namespace}/{table_name}`) - `schema`: a dummy Iceberg schema with a single nullable string column `dummy` - `properties`: table properties including `table_type=lance` 3. POST to `/v1/{prefix}/namespaces/{namespace}/tables` -4. Return the created table location and properties +4. Return the declared table location **Error Handling:** @@ -148,18 +145,22 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DescribeTable -Retrieves metadata for a Lance table. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. The implementation: 1. Parse the table identifier to extract namespace and table name 2. GET `/v1/{prefix}/namespaces/{namespace}/tables/{table}` 3. Verify the table has `table_type=lance` property -4. Return the table location and properties +4. Return the table location and storage_options from `properties` **Error Handling:** -If the table does not exist, return error code `4` (TableNotFound). If the table is not a Lance table, return error code `13` (InvalidInput). If the server returns an error, return error code `18` (Internal). +If the table does not exist, return error code `4` (TableNotFound). + +If the table is not a Lance table, return error code `13` (InvalidInput). + +If the server returns an error, return error code `18` (Internal). ### DeregisterTable diff --git a/docs/src/polaris.md b/docs/src/polaris.md index de52d30..77cd883 100644 --- a/docs/src/polaris.md +++ b/docs/src/polaris.md @@ -6,6 +6,8 @@ This document describes how the Polaris Catalog implements the Lance Namespace c Apache Polaris is an open-source catalog implementation for Apache Iceberg that provides a REST API for managing tables and namespaces. Polaris supports the Generic Table API which allows registering non-Iceberg table formats. For details on Polaris Catalog, see the [Polaris Catalog Documentation](https://polaris.apache.org). +**Note:** The Generic Table API is available in Polaris 1.2.0-incubating and later versions. Ensure your Polaris deployment is running a compatible version. + ## Namespace Implementation Configuration Properties The Lance Polaris namespace implementation accepts the following configuration properties: @@ -24,11 +26,14 @@ The **max_retries** property is optional and specifies the maximum number of ret ### Namespace -The **root namespace** is represented by the Polaris catalog root, accessed via the `/namespaces` endpoint. +The **namespace identifier** follows a hierarchical structure where the first level represents the Polaris catalog (warehouse), and subsequent levels represent namespaces within that catalog. For example, `my_catalog.my_schema` refers to namespace `my_schema` in catalog `my_catalog`. -A **child namespace** is a nested namespace in Polaris. Polaris supports arbitrary nesting depth, allowing flexible namespace organization. First-level namespaces typically represent catalogs, with subsequent levels representing schemas or other organizational units. +A **child namespace** is a nested namespace in Polaris. Polaris supports arbitrary nesting depth, allowing flexible namespace organization within a catalog. -The **namespace identifier** is constructed by joining namespace levels with the `.` delimiter (e.g., `catalog.schema`). When making API calls, the namespace path is URL-encoded. +The **namespace identifier** is constructed by joining the catalog and namespace levels with the `.` delimiter (e.g., `catalog.schema.subschema`). When making API calls: +- The catalog is extracted as the first level +- Remaining levels form the namespace path within that catalog +- The namespace path is URL-encoded using `.` as the separator **Namespace properties** are stored in the namespace's properties map, returned by the Polaris namespace API. @@ -54,10 +59,11 @@ Creates a new namespace in Polaris. The implementation: -1. Parse the namespace identifier to get the namespace path -2. Construct a CreateNamespaceRequest with the namespace array and properties -3. POST to `/namespaces` endpoint -4. Return the created namespace properties +1. Parse the namespace identifier to extract the catalog (first level) and namespace levels +2. Validate that at least 2 levels are provided (catalog + namespace) +3. Construct a CreateNamespaceRequest with the namespace array and properties +4. POST to `/api/catalog/v1/{catalog}/namespaces` endpoint +5. Return the created namespace properties **Error Handling:** @@ -69,10 +75,11 @@ Lists child namespaces under a given parent namespace. The implementation: -1. Parse the parent namespace identifier -2. For root namespace: GET `/namespaces` -3. For nested namespace: GET `/namespaces/{parent}/namespaces` -4. Convert the response namespace arrays to dot-separated strings +1. Parse the parent namespace identifier to extract the catalog (first level) +2. Validate that at least 1 level (catalog) is provided +3. For catalog-level listing: GET `/api/catalog/v1/{catalog}/namespaces` +4. For nested namespace listing: GET `/api/catalog/v1/{catalog}/namespaces/{parent}/namespaces` +5. Convert the response namespace arrays to dot-separated strings, prefixing with the catalog name **Error Handling:** @@ -84,9 +91,10 @@ Retrieves properties and metadata for a namespace. The implementation: -1. Parse the namespace identifier -2. GET `/namespaces/{namespace}` with URL-encoded namespace path -3. Return the namespace properties +1. Parse the namespace identifier to extract the catalog (first level) and namespace path +2. Validate that at least 2 levels are provided (catalog + namespace) +3. GET `/api/catalog/v1/{catalog}/namespaces/{namespace}` with URL-encoded namespace path +4. Return the namespace properties **Error Handling:** @@ -94,16 +102,21 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DropNamespace -Removes a namespace from Polaris. +Removes a namespace from Polaris. Only RESTRICT mode is supported; CASCADE mode is not implemented. The implementation: -1. Parse the namespace identifier -2. DELETE `/namespaces/{namespace}` with URL-encoded namespace path +1. Parse the namespace identifier to extract the catalog (first level) and namespace path +2. Validate that at least 2 levels are provided (catalog + namespace) +3. DELETE `/api/catalog/v1/{catalog}/namespaces/{namespace}` with URL-encoded namespace path **Error Handling:** -If the namespace does not exist, return error code `1` (NamespaceNotFound). If the namespace is not empty, return error code `3` (NamespaceNotEmpty). If the server returns an error, return error code `18` (Internal). +If the namespace does not exist, return error code `1` (NamespaceNotFound). + +If the namespace is not empty, return error code `3` (NamespaceNotEmpty). + +If the server returns an error, return error code `18` (Internal). ### DeclareTable @@ -111,15 +124,16 @@ Declares a new Lance table in Polaris without creating the underlying data. The implementation: -1. Parse the table identifier to extract namespace and table name -2. Construct a CreateGenericTableRequest with: +1. Parse the table identifier to extract catalog (first level), namespace (middle levels), and table name (last level) +2. Validate that at least 3 levels are provided (catalog + namespace + table) +3. Construct a CreateGenericTableRequest with: - `name`: the table name - `format`: `lance` - `base-location`: the specified location - `doc`: optional description from properties - `properties`: table properties including `table_type=lance` -3. POST to `/namespaces/{namespace}/generic-tables` -4. Return the created table location and properties +4. POST to `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables` +5. Return the created table location and properties **Error Handling:** @@ -131,9 +145,10 @@ Lists all Lance tables in a namespace. The implementation: -1. Parse the namespace identifier -2. GET `/namespaces/{namespace}/generic-tables` -3. Extract table names from the response identifiers +1. Parse the namespace identifier to extract the catalog (first level) and namespace path +2. Validate that at least 2 levels are provided (catalog + namespace) +3. GET `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables` +4. Extract table names from the response identifiers **Error Handling:** @@ -141,18 +156,23 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DescribeTable -Retrieves metadata for a Lance table. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. The implementation: -1. Parse the table identifier to extract namespace and table name -2. GET `/namespaces/{namespace}/generic-tables/{table}` -3. Verify the table format is `lance` -4. Return the table location, properties, and optional doc as comment +1. Parse the table identifier to extract catalog (first level), namespace (middle levels), and table name (last level) +2. Validate that at least 3 levels are provided (catalog + namespace + table) +3. GET `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}` +4. Verify the table format is `lance` +5. Return the table location from `base-location` and storage_options from `properties` **Error Handling:** -If the table does not exist, return error code `4` (TableNotFound). If the table format is not `lance`, return error code `13` (InvalidInput). If the server returns an error, return error code `18` (Internal). +If the table does not exist, return error code `4` (TableNotFound). + +If the table format is not `lance`, return error code `13` (InvalidInput). + +If the server returns an error, return error code `18` (Internal). ### DeregisterTable @@ -160,8 +180,9 @@ Removes a Lance table registration from Polaris without deleting the underlying The implementation: -1. Parse the table identifier to extract namespace and table name -2. DELETE `/namespaces/{namespace}/generic-tables/{table}` +1. Parse the table identifier to extract catalog (first level), namespace (middle levels), and table name (last level) +2. Validate that at least 3 levels are provided (catalog + namespace + table) +3. DELETE `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}` **Error Handling:** diff --git a/docs/src/unity.md b/docs/src/unity.md index ed27665..161038d 100644 --- a/docs/src/unity.md +++ b/docs/src/unity.md @@ -12,31 +12,25 @@ The Lance Unity namespace implementation accepts the following configuration pro The **endpoint** property is required and specifies the Unity Catalog REST API endpoint (e.g., `http://localhost:8080`). -The **catalog** property is required and specifies the Unity Catalog name to use. - -The **api_path** property is optional and specifies the API path prefix. Default value is `/api/2.1/unity-catalog`. - The **auth_token** property is optional and specifies the bearer token for authentication. -The **connect_timeout** property is optional and specifies the HTTP connection timeout in seconds. Default value is `10`. +The **connect_timeout** property is optional and specifies the HTTP connection timeout in milliseconds. Default value is `10000` (10 seconds). -The **read_timeout** property is optional and specifies the HTTP read timeout in seconds. Default value is `60`. +The **read_timeout** property is optional and specifies the HTTP read timeout in milliseconds. Default value is `300000` (5 minutes). The **max_retries** property is optional and specifies the maximum number of retries for failed requests. Default value is `3`. -The **root** property is optional and specifies the storage root location of the lakehouse. Default value is the current working directory. - -The **storage.*** prefix properties are optional and specify additional storage configurations to access tables (e.g., `storage.region=us-west-2`). +The **root** property is optional and specifies the storage root location for new tables. Default value is `/tmp/lance`. ## Object Mapping ### Namespace -The **root namespace** is represented by the configured Unity Catalog. The catalog name is fixed at initialization time. +The **namespace identifier** follows a hierarchical structure where the first level represents the Unity Catalog, and the second level represents a schema within that catalog. For example, `my_catalog.my_schema` refers to schema `my_schema` in catalog `my_catalog`. -A **child namespace** is a schema within the Unity Catalog. Unity supports a fixed 3-level hierarchy: catalog.schema.table. +The **root namespace** (empty identifier) lists all available catalogs in the Unity Catalog server. -The **namespace identifier** is constructed by joining the catalog and schema names with the `$` delimiter (e.g., `catalog$schema`). The first level is always the configured catalog. +A **child namespace** is a schema within a Unity Catalog. Unity supports a fixed 3-level hierarchy: catalog.schema.table. **Namespace properties** are stored in the Unity schema's properties map. @@ -44,7 +38,7 @@ The **namespace identifier** is constructed by joining the catalog and schema na A **table** is represented as a [Table](https://github.com/unitycatalog/unitycatalog/blob/main/api/all.yaml) object in Unity Catalog with `table_type` set to `EXTERNAL`. -The **table identifier** is constructed by joining catalog, schema, and table name with the `$` delimiter (e.g., `catalog$schema$table`). +The **table identifier** is constructed by joining catalog, schema, and table name (e.g., `catalog.schema.table`). The **table location** is stored in the `storage_location` field of the Unity Table, pointing to the root location of the Lance table. @@ -65,29 +59,28 @@ Creates a new schema in Unity Catalog. The implementation: 1. Parse the namespace identifier (must be 2-level: catalog.schema) -2. Verify the catalog matches the configured catalog -3. Construct a CreateSchema request with name, catalog name, and properties -4. POST to `/schemas` endpoint -5. Return the created schema properties +2. Construct a CreateSchema request with name, catalog name, and properties +3. POST to `/schemas` endpoint +4. Return the created schema properties **Error Handling:** -If the catalog does not match the configured catalog, return error code `13` (InvalidInput). If the schema already exists, return error code `2` (NamespaceAlreadyExists). If the server returns an error, return error code `18` (Internal). +If the schema already exists, return error code `2` (NamespaceAlreadyExists). If the server returns an error, return error code `18` (Internal). ### ListNamespaces -Lists schemas in the Unity Catalog. +Lists catalogs or schemas in the Unity Catalog. The implementation: 1. Parse the parent namespace identifier -2. For root namespace (level 0): return the configured catalog name -3. For catalog namespace (level 1): GET `/schemas` with catalog_name parameter +2. For root namespace (level 0): GET `/catalogs` to list all available catalogs +3. For catalog namespace (level 1): GET `/schemas` with catalog_name parameter to list schemas 4. Sort the results **Error Handling:** -If the catalog does not match the configured catalog, return error code `1` (NamespaceNotFound). If the server returns an error, return error code `18` (Internal). +If the catalog does not exist, return error code `1` (NamespaceNotFound). If the server returns an error, return error code `18` (Internal). ### DescribeNamespace @@ -96,9 +89,8 @@ Retrieves properties and metadata for a schema. The implementation: 1. Parse the namespace identifier (must be 2-level: catalog.schema) -2. Verify the catalog matches the configured catalog -3. GET `/schemas/{catalog}.{schema}` -4. Return the schema properties +2. GET `/schemas/{catalog}.{schema}` +3. Return the schema properties **Error Handling:** @@ -106,18 +98,20 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DropNamespace -Removes a schema from Unity Catalog. +Removes a schema from Unity Catalog. Only RESTRICT mode is supported; CASCADE mode is not implemented. The implementation: 1. Parse the namespace identifier (must be 2-level: catalog.schema) -2. Verify the catalog matches the configured catalog -3. For CASCADE behavior: add `force=true` parameter -4. DELETE `/schemas/{catalog}.{schema}` +2. DELETE `/schemas/{catalog}.{schema}` **Error Handling:** -If the namespace does not exist, return error code `1` (NamespaceNotFound). If the namespace is not empty and behavior is RESTRICT, return error code `3` (NamespaceNotEmpty). If the server returns an error, return error code `18` (Internal). +If the namespace does not exist, return error code `1` (NamespaceNotFound). + +If the namespace is not empty, return error code `3` (NamespaceNotEmpty). + +If the server returns an error, return error code `18` (Internal). ### DeclareTable @@ -126,8 +120,7 @@ Declares a new Lance table in Unity Catalog without creating the underlying data The implementation: 1. Parse the table identifier (must be 3-level: catalog.schema.table) -2. Verify the catalog matches the configured catalog -3. Construct a CreateTable request with: +2. Construct a CreateTable request with: - `name`: the table name - `catalog_name`: the catalog - `schema_name`: the schema @@ -135,8 +128,8 @@ The implementation: - `data_source_format`: `TEXT` - `storage_location`: the specified or default location - `properties`: including `table_type=lance` -4. POST to `/tables` endpoint -5. Return the created table location and properties +3. POST to `/tables` endpoint +4. Return the created table location and properties **Error Handling:** @@ -149,10 +142,9 @@ Lists all Lance tables in a schema. The implementation: 1. Parse the namespace identifier (must be 2-level: catalog.schema) -2. Verify the catalog matches the configured catalog -3. GET `/tables` with catalog_name and schema_name parameters -4. Filter tables where `properties.table_type=lance` -5. Sort the results +2. GET `/tables` with catalog_name and schema_name parameters +3. Filter tables where `properties.table_type=lance` +4. Sort the results **Error Handling:** @@ -160,19 +152,22 @@ If the namespace does not exist, return error code `1` (NamespaceNotFound). If t ### DescribeTable -Retrieves metadata for a Lance table. +Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is supported. When `load_detailed_metadata=false`, only the table location and storage_options are returned; other fields (version, table_uri, schema, stats) are null. The implementation: 1. Parse the table identifier (must be 3-level: catalog.schema.table) -2. Verify the catalog matches the configured catalog -3. GET `/tables/{catalog}.{schema}.{table}` -4. Verify the table is a Lance table (check `properties.table_type=lance`) -5. Return the table location, properties, and schema +2. GET `/tables/{catalog}.{schema}.{table}` +3. Verify the table is a Lance table (check `properties.table_type=lance`) +4. Return the table location from `storage_location` and storage_options from `properties` **Error Handling:** -If the table does not exist, return error code `4` (TableNotFound). If the table is not a Lance table, return error code `13` (InvalidInput). If the server returns an error, return error code `18` (Internal). +If the table does not exist, return error code `4` (TableNotFound). + +If the table is not a Lance table, return error code `13` (InvalidInput). + +If the server returns an error, return error code `18` (Internal). ### DeregisterTable @@ -181,9 +176,8 @@ Removes a Lance table registration from Unity Catalog without deleting the under The implementation: 1. Parse the table identifier (must be 3-level: catalog.schema.table) -2. Verify the catalog matches the configured catalog -3. GET the table and verify it is a Lance table -4. DELETE `/tables/{catalog}.{schema}.{table}` +2. GET the table and verify it is a Lance table +3. DELETE `/tables/{catalog}.{schema}.{table}` **Error Handling:** diff --git a/java/Makefile b/java/Makefile index 781b067..e1eea6e 100644 --- a/java/Makefile +++ b/java/Makefile @@ -10,30 +10,115 @@ # See the License for the specific language governing permissions and # limitations under the License. +# ============================================================================ +# Glue +# ============================================================================ + +.PHONY: lint-glue +lint-glue: + ./mvnw spotless:check -pl lance-namespace-glue + .PHONY: build-glue build-glue: ./mvnw spotless:apply -pl lance-namespace-glue -am - ./mvnw install -pl lance-namespace-glue -am + ./mvnw install -pl lance-namespace-glue -am -DskipTests + +.PHONY: test-glue +test-glue: + ./mvnw test -pl lance-namespace-glue -Dtest="!*Integration" + +# ============================================================================ +# Hive2 +# ============================================================================ + +.PHONY: lint-hive2 +lint-hive2: + ./mvnw spotless:check -pl lance-namespace-hive2 .PHONY: build-hive2 build-hive2: ./mvnw spotless:apply -pl lance-namespace-hive2 -am - ./mvnw install -pl lance-namespace-hive2 -am + ./mvnw install -pl lance-namespace-hive2 -am -DskipTests + +.PHONY: test-hive2 +test-hive2: + ./mvnw test -pl lance-namespace-hive2 -Dtest="!*IntegrationTest" + +# ============================================================================ +# Hive3 +# ============================================================================ + +.PHONY: lint-hive3 +lint-hive3: + ./mvnw spotless:check -pl lance-namespace-hive3 .PHONY: build-hive3 build-hive3: ./mvnw spotless:apply -pl lance-namespace-hive3 -am - ./mvnw install -pl lance-namespace-hive3 -am + ./mvnw install -pl lance-namespace-hive3 -am -DskipTests -.PHONY: build-unity -build-unity: - ./mvnw spotless:apply -pl lance-namespace-unity -am - ./mvnw install -pl lance-namespace-unity -am +.PHONY: test-hive3 +test-hive3: + ./mvnw test -pl lance-namespace-hive3 -Dtest="!*IntegrationTest" + +# ============================================================================ +# Iceberg +# ============================================================================ + +.PHONY: lint-iceberg +lint-iceberg: + ./mvnw spotless:check -pl lance-namespace-iceberg + +.PHONY: build-iceberg +build-iceberg: + ./mvnw spotless:apply -pl lance-namespace-iceberg -am + ./mvnw install -pl lance-namespace-iceberg -am -DskipTests + +.PHONY: test-iceberg +test-iceberg: + ./mvnw test -pl lance-namespace-iceberg -Dtest="!*IntegrationTest" + +# ============================================================================ +# Polaris +# ============================================================================ + +.PHONY: lint-polaris +lint-polaris: + ./mvnw spotless:check -pl lance-namespace-polaris .PHONY: build-polaris build-polaris: ./mvnw spotless:apply -pl lance-namespace-polaris -am - ./mvnw install -pl lance-namespace-polaris -am + ./mvnw install -pl lance-namespace-polaris -am -DskipTests + +.PHONY: test-polaris +test-polaris: + ./mvnw test -pl lance-namespace-polaris -Dtest="!*IntegrationTest" + +# ============================================================================ +# Unity +# ============================================================================ + +.PHONY: lint-unity +lint-unity: + ./mvnw spotless:check -pl lance-namespace-unity + +.PHONY: build-unity +build-unity: + ./mvnw spotless:apply -pl lance-namespace-unity -am + ./mvnw install -pl lance-namespace-unity -am -DskipTests + +.PHONY: test-unity +test-unity: + ./mvnw test -pl lance-namespace-unity -Dtest="!*IntegrationTest" + +# ============================================================================ +# All +# ============================================================================ + +.PHONY: lint +lint: + ./mvnw spotless:check .PHONY: clean clean: @@ -42,8 +127,40 @@ clean: .PHONY: build build: ./mvnw spotless:apply - ./mvnw install + ./mvnw install -DskipTests .PHONY: test test: ./mvnw test + +# ============================================================================ +# Integration tests +# ============================================================================ + +.PHONY: integ-test +integ-test: + ./mvnw test -Dtest="*IntegrationTest" -DfailIfNoTests=false + +.PHONY: integ-test-hive2 +integ-test-hive2: + ./mvnw test -pl lance-namespace-hive2 -Dtest="*NamespaceIntegration" -DfailIfNoTests=false + +.PHONY: integ-test-hive3 +integ-test-hive3: + ./mvnw test -pl lance-namespace-hive3 -Dtest="*NamespaceIntegration" -DfailIfNoTests=false + +.PHONY: integ-test-polaris +integ-test-polaris: + ./mvnw test -pl lance-namespace-polaris -Dtest="*Integration" -DfailIfNoTests=false + +.PHONY: integ-test-iceberg +integ-test-iceberg: + ./mvnw test -pl lance-namespace-iceberg -Dtest="*Integration" -DfailIfNoTests=false + +.PHONY: integ-test-unity +integ-test-unity: + ./mvnw test -pl lance-namespace-unity -Dtest="*Integration" -DfailIfNoTests=false + +.PHONY: integ-test-glue +integ-test-glue: + ./mvnw test -pl lance-namespace-glue -Dtest="*Integration" -DfailIfNoTests=false diff --git a/java/lance-namespace-glue/pom.xml b/java/lance-namespace-glue/pom.xml index 142154f..cb8576c 100644 --- a/java/lance-namespace-glue/pom.xml +++ b/java/lance-namespace-glue/pom.xml @@ -26,6 +26,10 @@ org.lance lance-core + + org.lance + lance-namespace-core + org.lance lance-namespace-apache-client @@ -71,6 +75,20 @@ 5.18.0 test + + + org.assertj + assertj-core + 3.26.3 + test + + + + software.amazon.awssdk + sts + ${aws.sdk.version} + test + diff --git a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java index 7f2b42e..847c691 100644 --- a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java +++ b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueNamespace.java @@ -13,16 +13,16 @@ */ package org.lance.namespace.glue; -import com.lancedb.lance.Dataset; -import com.lancedb.lance.WriteParams; +import org.lance.Dataset; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.InternalException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.NamespaceNotFoundException; +import org.lance.namespace.errors.TableNotFoundException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; import org.lance.namespace.model.DeregisterTableRequest; import org.lance.namespace.model.DeregisterTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; @@ -31,28 +31,17 @@ import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.JsonArrowSchema; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; -import org.lance.namespace.model.NamespaceExistsRequest; -import org.lance.namespace.model.RegisterTableRequest; -import org.lance.namespace.model.RegisterTableResponse; -import org.lance.namespace.model.TableExistsRequest; -import org.lance.namespace.util.ArrowIpcUtil; -import org.lance.namespace.util.JsonArrowSchemaConverter; -import org.lance.namespace.util.OpenDalUtil; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.opendal.Operator; import software.amazon.awssdk.services.glue.GlueClient; import software.amazon.awssdk.services.glue.model.AlreadyExistsException; import software.amazon.awssdk.services.glue.model.CreateDatabaseRequest; @@ -74,7 +63,6 @@ import software.amazon.awssdk.services.glue.model.TableInput; import java.io.Closeable; -import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; @@ -158,23 +146,19 @@ public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest requ public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { String namespaceName = namespaceFromId(request.getId()); - CreateNamespaceRequest.ModeEnum mode = - request.getMode() != null ? request.getMode() : CreateNamespaceRequest.ModeEnum.CREATE; + String mode = request.getMode() != null ? request.getMode().toLowerCase() : "create"; Map params = request.getProperties() != null ? request.getProperties() : ImmutableMap.of(); boolean namespaceExists = databaseExists(namespaceName); - switch (mode) { - case EXIST_OK: - if (namespaceExists) { - return describeNamespaceAsCreateResponse(namespaceName); - } - break; - case OVERWRITE: - if (namespaceExists) { - deleteDatabase(namespaceName); - } - break; + if ("exist_ok".equals(mode) || "existok".equals(mode)) { + if (namespaceExists) { + return describeNamespaceAsCreateResponse(namespaceName); + } + } else if ("overwrite".equals(mode)) { + if (namespaceExists) { + deleteDatabase(namespaceName); + } } try { @@ -185,7 +169,7 @@ public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { .build()); return new CreateNamespaceResponse().properties(params); } catch (AlreadyExistsException e) { - if (mode == CreateNamespaceRequest.ModeEnum.EXIST_OK) { + if ("exist_ok".equals(mode) || "existok".equals(mode)) { return describeNamespaceAsCreateResponse(namespaceName); } throw GlueToLanceErrorConverter.conflict(e, "Namespace already exists: %s", namespaceName); @@ -197,46 +181,28 @@ public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { @Override public DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { + if ("Cascade".equalsIgnoreCase(request.getBehavior())) { + throw new InvalidInputException("Cascade behavior is not supported for this implementation"); + } + String namespaceName = namespaceFromId(request.getId()); - DropNamespaceRequest.ModeEnum mode = - request.getMode() != null ? request.getMode() : DropNamespaceRequest.ModeEnum.FAIL; - DropNamespaceRequest.BehaviorEnum behavior = - request.getBehavior() != null - ? request.getBehavior() - : DropNamespaceRequest.BehaviorEnum.RESTRICT; + String mode = request.getMode() != null ? request.getMode().toLowerCase() : "fail"; if (!databaseExists(namespaceName)) { - if (mode == DropNamespaceRequest.ModeEnum.SKIP) { + if ("skip".equals(mode)) { return new DropNamespaceResponse(); } - throw LanceNamespaceException.badRequest( - "Namespace not found: " + namespaceName, - "NAMESPACE_NOT_FOUND", - namespaceName, - "The requested namespace does not exist"); + throw new NamespaceNotFoundException( + "Namespace not found: " + namespaceName, "NAMESPACE_NOT_FOUND", namespaceName); } - switch (behavior) { - case CASCADE: - deleteAllTables(namespaceName); - break; - case RESTRICT: - ensureNamespaceEmpty(namespaceName); - break; - } + ensureNamespaceEmpty(namespaceName); deleteDatabase(namespaceName); return new DropNamespaceResponse(); } - @Override - public void namespaceExists(NamespaceExistsRequest request) { - String namespaceName = namespaceFromId(request.getId()); - // Throws if database doesn't exist - getDatabase(namespaceName); - } - @Override public ListTablesResponse listTables(ListTablesRequest request) { String namespaceName = namespaceFromId(request.getId()); @@ -270,6 +236,11 @@ public ListTablesResponse listTables(ListTablesRequest request) { @Override public DescribeTableResponse describeTable(DescribeTableRequest request) { + if (Boolean.TRUE.equals(request.getLoadDetailedMetadata())) { + throw new InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation"); + } + validateTableId(request.getId()); String namespaceName = request.getId().get(0); String tableName = request.getId().get(1); @@ -285,62 +256,6 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { return response; } - @Override - public RegisterTableResponse registerTable(RegisterTableRequest request) { - validateTableId(request.getId()); - String namespaceName = request.getId().get(0); - String tableName = request.getId().get(1); - - if (request.getLocation().isEmpty()) { - throw LanceNamespaceException.badRequest("Table location is required", "BAD_REQUEST", "", ""); - } - String location = OpenDalUtil.stripTrailingSlash(request.getLocation()); - RegisterTableRequest.ModeEnum mode = - request.getMode() != null ? request.getMode() : RegisterTableRequest.ModeEnum.CREATE; - - if (mode == RegisterTableRequest.ModeEnum.OVERWRITE) { - // If table exists, delete its Glue Table (ignore if missing) - deleteGlueTable(namespaceName, tableName, false); - } - - try { - // TODO: register table mode - Map params = Maps.newHashMap(); - if (request.getProperties() != null) { - params.putAll(request.getProperties()); - } - params.put(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE); - params.put(MANAGED_BY_PROP, STORAGE_VALUE); // Always storage for existing tables - - TableInput tableInput = - TableInput.builder() - .name(tableName) - .storageDescriptor( - StorageDescriptor.builder().location(location).parameters(params).build()) - .build(); - - glueClient.createTable( - software.amazon.awssdk.services.glue.model.CreateTableRequest.builder() - .catalogId(config.catalogId()) - .databaseName(namespaceName) - .tableInput(tableInput) - .build()); - - RegisterTableResponse response = new RegisterTableResponse(); - response.setLocation(location); - response.setProperties(request.getProperties()); - return response; - } catch (AlreadyExistsException e) { - throw GlueToLanceErrorConverter.conflict( - e, "Table already exists: %s.%s", namespaceName, tableName); - } catch (EntityNotFoundException e) { - throw GlueToLanceErrorConverter.notFound(e, "Namespace not found: %s", namespaceName); - } catch (GlueException e) { - throw GlueToLanceErrorConverter.serverError( - e, "Failed to register table: %s.%s", namespaceName, tableName); - } - } - @Override public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { validateTableId(request.getId()); @@ -371,98 +286,7 @@ public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { } } - @Override - public CreateTableResponse createTable(CreateTableRequest request, byte[] requestData) { - // Validate that requestData is a valid Arrow IPC stream - if (requestData == null || requestData.length == 0) { - throw LanceNamespaceException.badRequest( - "Request data (Arrow IPC stream) is required for createTable", - "INVALID_REQUEST", - String.join(".", request.getId()), - "Arrow IPC stream data is required"); - } - - validateTableId(request.getId()); - String namespaceName = request.getId().get(0); - String tableName = request.getId().get(1); - - String location = request.getLocation(); - if (location == null || location.isEmpty()) { - location = getDefaultTableLocation(namespaceName, tableName); - } - - try { - Map params = Maps.newHashMap(); - if (request.getProperties() != null) { - params.putAll(request.getProperties()); - } - params.put(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE); - params.put(MANAGED_BY_PROP, params.getOrDefault(MANAGED_BY_PROP, STORAGE_VALUE)); - params.put(VERSION_PROP, "1"); - - // Extract schema from Arrow IPC stream - JsonArrowSchema jsonSchema; - try { - jsonSchema = ArrowIpcUtil.extractSchemaFromIpc(requestData); - } catch (IOException e) { - throw LanceNamespaceException.badRequest( - "Invalid Arrow IPC stream: " + e.getMessage(), - "INVALID_ARROW_IPC", - namespaceName + "." + tableName, - "Failed to extract schema from Arrow IPC stream"); - } - Schema schema = JsonArrowSchemaConverter.convertToArrowSchema(jsonSchema); - - WriteParams writeParams = - new WriteParams.Builder() - .withMode(WriteParams.WriteMode.CREATE) - .withStorageOptions(config.getStorageOptions()) - .build(); - - try { - Dataset.create(allocator, location, schema, writeParams); - } catch (Exception e) { - throw LanceNamespaceException.serverError( - "Failed to create Lance dataset at location: " + location, - "DATASET_CREATE_ERROR", - location, - "An error occurred while creating the Lance dataset: " + e.getMessage()); - } - - TableInput tableInput = - TableInput.builder() - .name(tableName) - .storageDescriptor( - StorageDescriptor.builder().location(location).parameters(params).build()) - .parameters(params) - .build(); - - glueClient.createTable( - software.amazon.awssdk.services.glue.model.CreateTableRequest.builder() - .catalogId(config.catalogId()) - .databaseName(namespaceName) - .tableInput(tableInput) - .build()); - - CreateTableResponse response = new CreateTableResponse(); - response.setLocation(location); - response.setVersion(1L); - response.setProperties(request.getProperties()); - response.setStorageOptions(config.getStorageOptions()); - return response; - } catch (GlueException e) { - safeDropDataset(location); - if (e instanceof AlreadyExistsException) { - throw GlueToLanceErrorConverter.conflict( - e, "Table already exists: %s.%s", namespaceName, tableName); - } else if (e instanceof EntityNotFoundException) { - throw GlueToLanceErrorConverter.notFound(e, "Namespace not found: %s", namespaceName); - } else { - throw GlueToLanceErrorConverter.serverError( - e, "Failed to create table: %s.%s", namespaceName, tableName); - } - } - } + // Removed: createTable(CreateTableRequest, byte[]) - using default implementation from interface @Override public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { @@ -477,11 +301,8 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request try { Map params = Maps.newHashMap(); - if (request.getProperties() != null) { - params.putAll(request.getProperties()); - } params.put(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE); - params.put(MANAGED_BY_PROP, params.getOrDefault(MANAGED_BY_PROP, STORAGE_VALUE)); + params.put(MANAGED_BY_PROP, STORAGE_VALUE); TableInput tableInput = TableInput.builder() @@ -500,105 +321,56 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request CreateEmptyTableResponse response = new CreateEmptyTableResponse(); response.setLocation(location); - response.setProperties(request.getProperties()); response.setStorageOptions(config.getStorageOptions()); return response; } catch (AlreadyExistsException e) { - throw GlueToLanceErrorConverter.conflict( + throw GlueToLanceErrorConverter.tableConflict( e, "Table already exists: %s.%s", namespaceName, tableName); } catch (EntityNotFoundException e) { - throw GlueToLanceErrorConverter.notFound(e, "Namespace not found: %s", namespaceName); + throw GlueToLanceErrorConverter.namespaceNotFound( + e, "Namespace not found: %s", namespaceName); } catch (GlueException e) { throw GlueToLanceErrorConverter.serverError( e, "Failed to create empty table: %s.%s", namespaceName, tableName); } } - @Override - public DropTableResponse dropTable(DropTableRequest request) { - validateTableId(request.getId()); - String namespaceName = request.getId().get(0); - String tableName = request.getId().get(1); - - Table table = getGlueTable(namespaceName, tableName); - ensureLanceTable(table); - String tableLocation = null; - if (table.storageDescriptor() != null && table.storageDescriptor().location() != null) { - tableLocation = OpenDalUtil.stripTrailingSlash(table.storageDescriptor().location()); - } - - try { - Dataset.drop(tableLocation, config.getStorageOptions()); - } catch (Exception e) { - // Log warning but continue with Glue metadata deletion - throw LanceNamespaceException.serverError( - "Failed to drop Lance dataset at location: " + tableLocation, - "DATASET_DROP_ERROR", - tableLocation, - e.getMessage()); - } - - deleteGlueTable(namespaceName, tableName, false); - DropTableResponse response = new DropTableResponse(); - response.setId(request.getId()); - response.setLocation(tableLocation); - if (table.parameters() != null && !table.parameters().isEmpty()) { - response.setProperties(table.parameters()); - } - return response; - } - - @Override - public void tableExists(TableExistsRequest request) { - validateTableId(request.getId()); - String namespaceName = request.getId().get(0); - String tableName = request.getId().get(1); - - Table table = getGlueTableAtVersion(namespaceName, tableName, request.getVersion()); - ensureLanceTable(table); - } - private void validateParent(List id) { if (id != null && id.size() > 1) { String instance = String.join("/", id); - throw LanceNamespaceException.badRequest( + throw new InvalidInputException( "Glue does not support nested namespaces. Found nested path: " + String.join("/", id), "BAD_REQUEST", - instance, - "Nested namespaces must have only one parent"); + instance); } } private String namespaceFromId(List id) { if (id == null || id.isEmpty()) { - throw LanceNamespaceException.badRequest( - "Namespace identifier cannot be null or empty", "BAD_REQUEST", "", ""); + throw new InvalidInputException("Namespace identifier cannot be null or empty"); } validateParent(id); String namespace = id.get(0); if (namespace == null || namespace.isEmpty()) { - throw LanceNamespaceException.badRequest( - "Namespace name cannot be empty", "BAD_REQUEST", "", ""); + throw new InvalidInputException("Namespace name cannot be empty"); } return namespace; } private void validateTableId(List id) { if (id == null || id.size() != 2) { - throw LanceNamespaceException.badRequest( + throw new InvalidInputException( "Table identifier must contain exactly 2 elements, but got " + id, "BAD_REQUEST", - id != null ? String.join("/", id) : "", - "Expected format: [namespace, table]"); + id != null ? String.join("/", id) : ""); } if (id.get(0) == null || id.get(0).isEmpty()) { - throw LanceNamespaceException.badRequest( - "Namespace name cannot be empty", "BAD_REQUEST", "", ""); + throw new InvalidInputException("Namespace name cannot be empty"); } if (id.get(1) == null || id.get(1).isEmpty()) { - throw LanceNamespaceException.badRequest("Table name cannot be empty", "BAD_REQUEST", "", ""); + throw new InvalidInputException("Table name cannot be empty"); } } @@ -661,7 +433,11 @@ private DatabaseInput buildDatabaseInput(String namespaceName, Map properties) { this.accessKeyId = properties.get(ACCESS_KEY_ID); this.secretAccessKey = properties.get(SECRET_ACCESS_KEY); this.sessionToken = properties.get(SESSION_TOKEN); - this.storageOptions = PropertyUtil.propertiesWithPrefix(properties, STORAGE_OPTIONS_PREFIX); + + // Inline PropertyUtil.propertiesWithPrefix + Map filteredStorageOptions = new HashMap<>(); + for (Map.Entry entry : properties.entrySet()) { + if (entry.getKey().startsWith(STORAGE_OPTIONS_PREFIX)) { + filteredStorageOptions.put( + entry.getKey().substring(STORAGE_OPTIONS_PREFIX.length()), entry.getValue()); + } + } + this.storageOptions = filteredStorageOptions; + + // Inline PropertyUtil.propertyAsString and OpenDalUtil.stripTrailingSlash + String rootValue = properties.getOrDefault(ROOT, ROOT_DEFAULT); this.root = - OpenDalUtil.stripTrailingSlash( - PropertyUtil.propertyAsString(properties, ROOT, ROOT_DEFAULT)); + rootValue != null && rootValue.endsWith("/") + ? rootValue.substring(0, rootValue.length() - 1) + : rootValue; } public String catalogId() { diff --git a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueToLanceErrorConverter.java b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueToLanceErrorConverter.java index 2a0a7c8..8c15914 100644 --- a/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueToLanceErrorConverter.java +++ b/java/lance-namespace-glue/src/main/java/org/lance/namespace/glue/GlueToLanceErrorConverter.java @@ -13,7 +13,12 @@ */ package org.lance.namespace.glue; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.InternalException; +import org.lance.namespace.errors.LanceNamespaceException; +import org.lance.namespace.errors.NamespaceAlreadyExistsException; +import org.lance.namespace.errors.NamespaceNotFoundException; +import org.lance.namespace.errors.TableAlreadyExistsException; +import org.lance.namespace.errors.TableNotFoundException; import software.amazon.awssdk.services.glue.model.GlueException; @@ -22,27 +27,30 @@ public class GlueToLanceErrorConverter { private GlueToLanceErrorConverter() {} public static LanceNamespaceException notFound(GlueException e, String message, Object... args) { - return LanceNamespaceException.notFound( - String.format(message, args), - e.getMessage().getClass().getSimpleName(), - e.requestId(), - e.getMessage()); + return new TableNotFoundException( + String.format(message, args), e.getClass().getSimpleName(), e.requestId()); + } + + public static LanceNamespaceException namespaceNotFound( + GlueException e, String message, Object... args) { + return new NamespaceNotFoundException( + String.format(message, args), e.getClass().getSimpleName(), e.requestId()); } public static LanceNamespaceException conflict(GlueException e, String message, Object... args) { - return LanceNamespaceException.notFound( - String.format(message, args), - e.getMessage().getClass().getSimpleName(), - e.requestId(), - e.getMessage()); + return new NamespaceAlreadyExistsException( + String.format(message, args), e.getClass().getSimpleName(), e.requestId()); + } + + public static LanceNamespaceException tableConflict( + GlueException e, String message, Object... args) { + return new TableAlreadyExistsException( + String.format(message, args), e.getClass().getSimpleName(), e.requestId()); } public static LanceNamespaceException serverError( GlueException e, String message, Object... args) { - return LanceNamespaceException.serverError( - String.format(message, args), - e.getMessage().getClass().getSimpleName(), - e.requestId(), - e.getMessage()); + return new InternalException( + String.format(message, args), e.getClass().getSimpleName(), e.requestId()); } } diff --git a/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java b/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java index 1979280..0735518 100644 --- a/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java +++ b/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespace.java @@ -13,12 +13,10 @@ */ package org.lance.namespace.glue; -import com.lancedb.lance.Dataset; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.LanceNamespaceException; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; import org.lance.namespace.model.DeregisterTableRequest; import org.lance.namespace.model.DeregisterTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; @@ -26,19 +24,10 @@ import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; -import org.lance.namespace.model.JsonArrowDataType; -import org.lance.namespace.model.JsonArrowField; -import org.lance.namespace.model.JsonArrowSchema; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; -import org.lance.namespace.model.NamespaceExistsRequest; -import org.lance.namespace.model.RegisterTableRequest; -import org.lance.namespace.model.RegisterTableResponse; -import org.lance.namespace.model.TableExistsRequest; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -50,7 +39,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.io.TempDir; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import software.amazon.awssdk.services.glue.GlueClient; @@ -69,33 +57,22 @@ import software.amazon.awssdk.services.glue.model.GetDatabasesResponse; import software.amazon.awssdk.services.glue.model.GetTableRequest; import software.amazon.awssdk.services.glue.model.GetTableResponse; -import software.amazon.awssdk.services.glue.model.GetTableVersionRequest; -import software.amazon.awssdk.services.glue.model.GetTableVersionResponse; import software.amazon.awssdk.services.glue.model.GetTablesRequest; import software.amazon.awssdk.services.glue.model.GetTablesResponse; import software.amazon.awssdk.services.glue.model.StorageDescriptor; import software.amazon.awssdk.services.glue.model.Table; -import software.amazon.awssdk.services.glue.model.TableVersion; -import java.io.File; -import java.net.URI; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; import java.util.List; import java.util.Map; -import static org.lance.namespace.glue.GlueNamespace.LANCE_TABLE_TYPE_VALUE; -import static org.lance.namespace.glue.GlueNamespace.TABLE_TYPE_PROP; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.lance.namespace.glue.GlueNamespace.LANCE_TABLE_TYPE_VALUE; +import static org.lance.namespace.glue.GlueNamespace.TABLE_TYPE_PROP; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -106,7 +83,6 @@ public class TestGlueNamespace { private GlueNamespace glueNamespace; private static BufferAllocator allocator; - @TempDir Path tempDir; @BeforeAll public static void setUpAll() { @@ -212,7 +188,7 @@ public void testCreateNamespaceWithCreateMode() { CreateNamespaceRequest request = new CreateNamespaceRequest() .id(ImmutableList.of("test")) - .mode(CreateNamespaceRequest.ModeEnum.CREATE) + .mode("Create") .properties(ImmutableMap.of("location", "s3://bucket/test")); // Mock namespace doesn't exist @@ -231,9 +207,7 @@ public void testCreateNamespaceWithCreateMode() { public void testCreateNamespaceWithCreateModeAlreadyExists() { String namespaceName = "existing"; CreateNamespaceRequest request = - new CreateNamespaceRequest() - .id(ImmutableList.of(namespaceName)) - .mode(CreateNamespaceRequest.ModeEnum.CREATE); + new CreateNamespaceRequest().id(ImmutableList.of(namespaceName)).mode("Create"); // Mock namespace exists Database existingDatabase = Database.builder().name(namespaceName).build(); @@ -252,9 +226,7 @@ public void testCreateNamespaceWithCreateModeAlreadyExists() { public void testCreateNamespaceWithExistOkModeNamespaceExists() { String namespaceName = "existing"; CreateNamespaceRequest request = - new CreateNamespaceRequest() - .id(ImmutableList.of(namespaceName)) - .mode(CreateNamespaceRequest.ModeEnum.EXIST_OK); + new CreateNamespaceRequest().id(ImmutableList.of(namespaceName)).mode("ExistOk"); Database existingDatabase = Database.builder() @@ -281,7 +253,7 @@ public void testCreateNamespaceWithExistOkModeNamespaceDoesNotExist() { CreateNamespaceRequest request = new CreateNamespaceRequest() .id(ImmutableList.of(namespaceName)) - .mode(CreateNamespaceRequest.ModeEnum.EXIST_OK) + .mode("ExistOk") .properties(ImmutableMap.of("key", "val")); when(glue.getDatabase(any(GetDatabaseRequest.class))) @@ -299,9 +271,7 @@ public void testCreateNamespaceWithExistOkModeNamespaceDoesNotExist() { public void testCreateNamespaceWithOverwriteMode() { String namespaceName = "overwrite"; CreateNamespaceRequest request = - new CreateNamespaceRequest() - .id(ImmutableList.of(namespaceName)) - .mode(CreateNamespaceRequest.ModeEnum.OVERWRITE); + new CreateNamespaceRequest().id(ImmutableList.of(namespaceName)).mode("Overwrite"); // Mock namespace exists Database existingDatabase = Database.builder().name(namespaceName).build(); @@ -325,9 +295,7 @@ public void testCreateNamespaceWithOverwriteMode() { public void testCreateNamespaceWithOverwriteModeNamespaceDoesNotExist() { String namespaceName = "overwrite"; CreateNamespaceRequest request = - new CreateNamespaceRequest() - .id(ImmutableList.of(namespaceName)) - .mode(CreateNamespaceRequest.ModeEnum.OVERWRITE); + new CreateNamespaceRequest().id(ImmutableList.of(namespaceName)).mode("Overwrite"); // Mock namespace doesn't exist when(glue.getDatabase(any(GetDatabaseRequest.class))) @@ -347,7 +315,7 @@ public void testBasicCreateNamespaceWithLocationAndDescription() { CreateNamespaceRequest request = new CreateNamespaceRequest() .id(ImmutableList.of(namespaceName)) - .mode(CreateNamespaceRequest.ModeEnum.CREATE) + .mode("Create") .properties( ImmutableMap.of( "location", @@ -374,8 +342,7 @@ public void testBasicCreateNamespaceWithLocationAndDescription() { @Test public void testCreateNamespaceWithNullName() { - CreateNamespaceRequest request = - new CreateNamespaceRequest().mode(CreateNamespaceRequest.ModeEnum.CREATE); + CreateNamespaceRequest request = new CreateNamespaceRequest().mode("Create"); assertThrows(LanceNamespaceException.class, () -> glueNamespace.createNamespace(request)); } @@ -383,9 +350,7 @@ public void testCreateNamespaceWithNullName() { @Test public void testCreateNamespaceWithEmptyName() { CreateNamespaceRequest request = - new CreateNamespaceRequest() - .id(ImmutableList.of("")) - .mode(CreateNamespaceRequest.ModeEnum.CREATE); + new CreateNamespaceRequest().id(ImmutableList.of("")).mode("Create"); assertThrows(LanceNamespaceException.class, () -> glueNamespace.createNamespace(request)); } @@ -393,9 +358,7 @@ public void testCreateNamespaceWithEmptyName() { @Test public void testCreateNamespaceWithNestedParent() { CreateNamespaceRequest request = - new CreateNamespaceRequest() - .id(ImmutableList.of("parent", "ns1")) - .mode(CreateNamespaceRequest.ModeEnum.CREATE); + new CreateNamespaceRequest().id(ImmutableList.of("parent", "ns1")).mode("Create"); assertThrows(LanceNamespaceException.class, () -> glueNamespace.createNamespace(request)); } @@ -404,9 +367,7 @@ public void testCreateNamespaceWithNestedParent() { public void testDropNamespaceWithFailModeExists() { String namespaceName = "ns1"; DropNamespaceRequest request = - new DropNamespaceRequest() - .id(ImmutableList.of(namespaceName)) - .mode(DropNamespaceRequest.ModeEnum.FAIL); + new DropNamespaceRequest().id(ImmutableList.of(namespaceName)).mode("Fail"); // Mock database exists Database database = Database.builder().name(namespaceName).build(); @@ -431,9 +392,7 @@ public void testDropNamespaceWithFailModeExists() { public void testDropNamespaceWithFailModeDoesNotExist() { String namespaceName = "nonexistent"; DropNamespaceRequest request = - new DropNamespaceRequest() - .id(ImmutableList.of(namespaceName)) - .mode(DropNamespaceRequest.ModeEnum.FAIL); + new DropNamespaceRequest().id(ImmutableList.of(namespaceName)).mode("Fail"); when(glue.getDatabase(any(GetDatabaseRequest.class))) .thenThrow(EntityNotFoundException.builder().build()); @@ -445,9 +404,7 @@ public void testDropNamespaceWithFailModeDoesNotExist() { public void testDropNamespaceWithSkipModeDoesNotExist() { String namespaceName = "nonexistent"; DropNamespaceRequest request = - new DropNamespaceRequest() - .id(ImmutableList.of(namespaceName)) - .mode(DropNamespaceRequest.ModeEnum.SKIP); + new DropNamespaceRequest().id(ImmutableList.of(namespaceName)).mode("Skip"); when(glue.getDatabase(any(GetDatabaseRequest.class))) .thenThrow(EntityNotFoundException.builder().build()); @@ -463,8 +420,8 @@ public void testDropNamespaceWithRestrictBehaviorHasTables() { DropNamespaceRequest request = new DropNamespaceRequest() .id(ImmutableList.of(namespaceName)) - .mode(DropNamespaceRequest.ModeEnum.FAIL) - .behavior(DropNamespaceRequest.BehaviorEnum.RESTRICT); + .mode("Fail") + .behavior("Restrict"); Database database = Database.builder().name(namespaceName).build(); Table table = Table.builder().name("table").build(); @@ -479,111 +436,27 @@ public void testDropNamespaceWithRestrictBehaviorHasTables() { } @Test - public void testDropNamespaceWithCascadeBehaviorHasTables() { + public void testDropNamespaceWithCascadeBehaviorRejected() { String namespaceName = "ns1"; DropNamespaceRequest request = new DropNamespaceRequest() .id(ImmutableList.of(namespaceName)) - .mode(DropNamespaceRequest.ModeEnum.FAIL) - .behavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - - Database database = Database.builder().name(namespaceName).build(); - Table table1 = Table.builder().name("table1").build(); - Table table2 = Table.builder().name("table2").build(); - - // Mock database call - when(glue.getDatabase(any(GetDatabaseRequest.class))) - .thenReturn(GetDatabaseResponse.builder().database(database).build()); - - // Mock get tables for cascade - when(glue.getTables(any(GetTablesRequest.class))) - .thenReturn(GetTablesResponse.builder().tableList(table1, table2).build()); - - when(glue.deleteDatabase(any(DeleteDatabaseRequest.class))) - .thenReturn(DeleteDatabaseResponse.builder().build()); - - glueNamespace.dropNamespace(request); - - verify(glue).getTables(any(GetTablesRequest.class)); - verify(glue, times(2)).deleteTable(any(DeleteTableRequest.class)); - verify(glue).deleteDatabase(any(DeleteDatabaseRequest.class)); - } - - @Test - public void testDeleteAllTablesDropsLanceAndNonLance() throws Exception { - String namespace = "ns1"; - Path nsDir = tempDir.resolve(namespace); - Path lanceTable = nsDir.resolve("tbl1"); - - // First create a lance table - org.lance.namespace.model.CreateTableRequest createReq = - new org.lance.namespace.model.CreateTableRequest() - .id(ImmutableList.of(namespace, "tbl1")) - .location(lanceTable.toString()); - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenReturn( - software.amazon.awssdk.services.glue.model.CreateTableResponse.builder().build()); - glueNamespace.createTable(createReq, createTestArrowData()); - - // Create a mocked directory of another table with data - Path nonLanceTable = nsDir.resolve("tbl2"); - Files.createDirectories(nonLanceTable); - Files.write(nonLanceTable.resolve("foo.metadata"), "bar".getBytes()); - assertTrue(Files.exists(nonLanceTable.resolve("foo.metadata"))); - when(glue.getDatabase(any(GetDatabaseRequest.class))) - .thenReturn( - GetDatabaseResponse.builder() - .database(Database.builder().name(namespace).build()) - .build()); - - // Mock delete glue calls - Table t1 = - Table.builder() - .databaseName(namespace) - .name("tbl1") - .storageDescriptor(StorageDescriptor.builder().location(lanceTable.toString()).build()) - .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE)) - .build(); - Table t2 = - Table.builder() - .databaseName(namespace) - .name("tbl2") - .storageDescriptor( - StorageDescriptor.builder().location(nonLanceTable.toString()).build()) - .build(); - - when(glue.getTables(any(GetTablesRequest.class))) - .thenReturn(GetTablesResponse.builder().tableList(t1, t2).build()); - when(glue.deleteTable(any(DeleteTableRequest.class))) - .thenReturn(DeleteTableResponse.builder().build()); - when(glue.deleteDatabase(any(DeleteDatabaseRequest.class))) - .thenReturn(DeleteDatabaseResponse.builder().build()); - - // Drop with cascade - DropNamespaceRequest drop = - new DropNamespaceRequest() - .id(ImmutableList.of(namespace)) - .mode(DropNamespaceRequest.ModeEnum.FAIL) - .behavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - glueNamespace.dropNamespace(drop); + .mode("Fail") + .behavior("Cascade"); - assertFalse(Files.exists(lanceTable), "Lance dataset directory should have been deleted"); - assertFalse(Files.exists(nonLanceTable), "Non-Lance directory should have been deleted"); + // CASCADE behavior should be rejected + assertThrows(InvalidInputException.class, () -> glueNamespace.dropNamespace(request)); } @Test public void testDropNamespaceWithNullName() { - DropNamespaceRequest request = - new DropNamespaceRequest().mode(DropNamespaceRequest.ModeEnum.FAIL); + DropNamespaceRequest request = new DropNamespaceRequest().mode("Fail"); assertThrows(LanceNamespaceException.class, () -> glueNamespace.dropNamespace(request)); } @Test public void testDropNamespaceWithEmptyName() { - DropNamespaceRequest request = - new DropNamespaceRequest() - .id(ImmutableList.of("")) - .mode(DropNamespaceRequest.ModeEnum.FAIL); + DropNamespaceRequest request = new DropNamespaceRequest().id(ImmutableList.of("")).mode("Fail"); assertThrows(LanceNamespaceException.class, () -> glueNamespace.dropNamespace(request)); } @@ -591,64 +464,11 @@ public void testDropNamespaceWithEmptyName() { @Test public void testDropNamespaceWithNestedParent() { DropNamespaceRequest request = - new DropNamespaceRequest() - .id(ImmutableList.of("parent", "ns1")) - .mode(DropNamespaceRequest.ModeEnum.FAIL); + new DropNamespaceRequest().id(ImmutableList.of("parent", "ns1")).mode("Fail"); assertThrows(LanceNamespaceException.class, () -> glueNamespace.dropNamespace(request)); } - @Test - public void testNamespaceExistsTrue() { - String namespaceName = "existing"; - NamespaceExistsRequest request = - new NamespaceExistsRequest().id(ImmutableList.of(namespaceName)); - - Database database = Database.builder().name(namespaceName).build(); - when(glue.getDatabase(any(GetDatabaseRequest.class))) - .thenReturn(GetDatabaseResponse.builder().database(database).build()); - - // Should not throw any exception for existing namespace - glueNamespace.namespaceExists(request); - - verify(glue).getDatabase(any(GetDatabaseRequest.class)); - } - - @Test - public void testNamespaceExistsFalse() { - String namespaceName = "nonexistent"; - NamespaceExistsRequest request = - new NamespaceExistsRequest().id(ImmutableList.of(namespaceName)); - - when(glue.getDatabase(any(GetDatabaseRequest.class))) - .thenThrow(EntityNotFoundException.builder().message("Entity Not Found").build()); - - assertThrows(LanceNamespaceException.class, () -> glueNamespace.namespaceExists(request)); - verify(glue).getDatabase(any(GetDatabaseRequest.class)); - } - - @Test - public void testNamespaceExistsWithNullName() { - NamespaceExistsRequest request = new NamespaceExistsRequest(); - - assertThrows(LanceNamespaceException.class, () -> glueNamespace.namespaceExists(request)); - } - - @Test - public void testNamespaceExistsWithEmptyName() { - NamespaceExistsRequest request = new NamespaceExistsRequest().id(ImmutableList.of("")); - - assertThrows(LanceNamespaceException.class, () -> glueNamespace.namespaceExists(request)); - } - - @Test - public void testNamespaceExistsWithNestedParent() { - NamespaceExistsRequest request = - new NamespaceExistsRequest().id(ImmutableList.of("parent", "test")); - - assertThrows(LanceNamespaceException.class, () -> glueNamespace.namespaceExists(request)); - } - @Test public void testBasicListTables() { Map parameters = ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE); @@ -782,67 +602,6 @@ public void testDescribeTableWithInvalidId() { () -> glueNamespace.describeTable(new DescribeTableRequest().id(ImmutableList.of("ns1")))); } - @Test - public void testBasicRegisterTable() { - RegisterTableRequest req = - new RegisterTableRequest() - .id(ImmutableList.of("ns1", "tbl")) - .location("s3://bucket/tbl") - .properties(ImmutableMap.of("key", "val")); - - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenReturn( - software.amazon.awssdk.services.glue.model.CreateTableResponse.builder().build()); - - RegisterTableResponse resp = glueNamespace.registerTable(req); - assertEquals("s3://bucket/tbl", resp.getLocation()); - assertEquals(ImmutableMap.of("key", "val"), resp.getProperties()); - } - - @Test - public void testRegisterTableAlreadyExists() { - RegisterTableRequest req = - new RegisterTableRequest().id(ImmutableList.of("ns1", "tbl")).location("s3://bucket/tbl"); - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenThrow(AlreadyExistsException.builder().message("Table Already Exists").build()); - - assertThrows(LanceNamespaceException.class, () -> glueNamespace.registerTable(req)); - } - - @Test - public void testRegisterTableWithOverwrite() { - // First create a table - RegisterTableRequest req = - new RegisterTableRequest().id(ImmutableList.of("ns", "tbl")).location("s3://bucket/tbl"); - - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenReturn( - software.amazon.awssdk.services.glue.model.CreateTableResponse.builder().build()); - - glueNamespace.registerTable(req); - - // Now overwrite - req.setMode(RegisterTableRequest.ModeEnum.OVERWRITE); - glueNamespace.registerTable(req); - } - - @Test - public void testRegisterTableNamespaceNotFound() { - RegisterTableRequest req = - new RegisterTableRequest().id(ImmutableList.of("ns1", "tbl")).location("s3://bucket/tbl"); - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenThrow(EntityNotFoundException.builder().message("Database Not Found").build()); - - assertThrows(LanceNamespaceException.class, () -> glueNamespace.registerTable(req)); - } - - @Test - public void testRegisterTableMissingLocation() { - RegisterTableRequest req = - new RegisterTableRequest().id(ImmutableList.of("ns1", "tbl")).location(""); - assertThrows(LanceNamespaceException.class, () -> glueNamespace.registerTable(req)); - } - @Test public void testBasicDeregisterTable() { List id = ImmutableList.of("ns1", "tbl"); @@ -894,281 +653,4 @@ public void testDeregisterTableNotFound() { glueNamespace.deregisterTable( new DeregisterTableRequest().id(ImmutableList.of("ns1", "tbl")))); } - - @Test - public void testTableExistsNoVersion() { - ImmutableMap parameters = - ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE); - - TableExistsRequest req = new TableExistsRequest().id(ImmutableList.of("ns1", "tbl")); - - when(glue.getTable(any(GetTableRequest.class))) - .thenReturn( - GetTableResponse.builder() - .table(Table.builder().name("tbl").parameters(parameters).build()) - .build()); - - glueNamespace.tableExists(req); - } - - @Test - public void testTableExistsWithVersion() { - TableExistsRequest req = - new TableExistsRequest().id(ImmutableList.of("ns1", "tbl")).version(42L); - - TableVersion tableVersion = - TableVersion.builder() - .table( - Table.builder() - .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE)) - .build()) - .build(); - - when(glue.getTableVersion(any(GetTableVersionRequest.class))) - .thenReturn(GetTableVersionResponse.builder().tableVersion(tableVersion).build()); - - glueNamespace.tableExists(req); - } - - @Test - public void testTableExistsNotFound() { - TableExistsRequest req = new TableExistsRequest().id(ImmutableList.of("ns1", "tbl")); - when(glue.getTable(any(GetTableRequest.class))) - .thenThrow(EntityNotFoundException.builder().message("Entity Not Found").build()); - - assertThrows(LanceNamespaceException.class, () -> glueNamespace.tableExists(req)); - } - - @Test - public void testTableExistsInvalidId() { - TableExistsRequest req = new TableExistsRequest(); - - req.addIdItem("ns1"); - req.addIdItem(null); - - assertThrows(LanceNamespaceException.class, () -> glueNamespace.tableExists(req)); - } - - @Test - public void testBasicCreateTable() { - String location = tempDir.resolve("ns1/tbl").toString(); - CreateTableRequest request = - new CreateTableRequest().id(ImmutableList.of("ns1", "tbl")).location(location); - - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenReturn( - software.amazon.awssdk.services.glue.model.CreateTableResponse.builder().build()); - - CreateTableResponse response = glueNamespace.createTable(request, createTestArrowData()); - assertNotNull(response); - assertNotNull(response.getLocation()); - assertTrue(response.getLocation().contains("tbl")); - assertEquals(Long.valueOf(1), response.getVersion()); - - // Verify Lance dataset was created (check for _versions directory) - File tableDir = new File(location); - assertTrue(tableDir.exists()); - assertTrue(tableDir.isDirectory()); - - File versionsDir = new File(location, "_versions"); - assertTrue(versionsDir.exists()); - assertTrue(versionsDir.isDirectory()); - - // Verify dataset can be loaded and has expected schema - try (Dataset dataset = Dataset.open(response.getLocation(), allocator)) { - assertNotNull(dataset); - assertNotNull(dataset.getSchema()); - assertEquals(2, dataset.getSchema().getFields().size()); - assertEquals("id", dataset.getSchema().getFields().get(0).getName()); - assertEquals("name", dataset.getSchema().getFields().get(1).getName()); - } catch (Exception e) { - throw new RuntimeException("Failed to verify created dataset", e); - } - } - - @Test - public void testCreateTableDerivesLocationFromNamespaceUri() throws Exception { - org.lance.namespace.model.CreateTableRequest req = - new org.lance.namespace.model.CreateTableRequest() - .id(ImmutableList.of("ns1", "tbl")); - - Database db = - Database.builder() - .name("ns1") - .locationUri(tempDir.resolve("ns1").toUri().toString()) - .build(); - - when(glue.getDatabase(any(GetDatabaseRequest.class))) - .thenReturn(GetDatabaseResponse.builder().database(db).build()); - - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenReturn( - software.amazon.awssdk.services.glue.model.CreateTableResponse.builder().build()); - - org.lance.namespace.model.CreateTableResponse resp = - glueNamespace.createTable(req, createTestArrowData()); - - String expectedPrefix = tempDir.resolve("ns1").resolve("tbl").toString(); - assertTrue(resp.getLocation().contains(expectedPrefix)); - - File dir = new File(new URI(resp.getLocation())); - assertTrue(dir.exists(), "Derived table directory must exist"); - assertTrue(new File(dir, "_versions").isDirectory()); - } - - @Test - public void testCreateTableDerivesLocationFromDefaultRootWhenNoNamespaceUri() { - // Initialize with a custom root directory using tempDir - GlueNamespaceConfig config = - new GlueNamespaceConfig(ImmutableMap.of("root", tempDir.toString())); - glueNamespace.initialize(config, glue, allocator); - - org.lance.namespace.model.CreateTableRequest req = - new org.lance.namespace.model.CreateTableRequest() - .id(ImmutableList.of("ns1", "tbl")); - - Database db = Database.builder().name("ns1").build(); - - when(glue.getDatabase(any(GetDatabaseRequest.class))) - .thenReturn(GetDatabaseResponse.builder().database(db).build()); - - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenReturn( - software.amazon.awssdk.services.glue.model.CreateTableResponse.builder().build()); - - // Now the implementation uses the configured root when namespace URI is missing - org.lance.namespace.model.CreateTableResponse resp = - glueNamespace.createTable(req, createTestArrowData()); - - // The location should be derived from the configured root - assertNotNull(resp.getLocation()); - String expectedLocationPattern = String.format("%s/ns1/tbl.lance", tempDir.toString()); - assertEquals(expectedLocationPattern, resp.getLocation()); - - // Verify the dataset was created - Path tableDir = tempDir.resolve("ns1").resolve("tbl.lance"); - assertTrue(Files.exists(tableDir)); - assertTrue(Files.exists(tableDir.resolve("_versions"))); - } - - @Test - public void testCreateTableConflictCleansUpDataset() { - String namespace = "ns"; - String tbl = "tbl"; - Path loc = tempDir.resolve(namespace).resolve(tbl); - - org.lance.namespace.model.CreateTableRequest req = - new org.lance.namespace.model.CreateTableRequest() - .id(ImmutableList.of(namespace, tbl)) - .location(loc.toString()); - - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenThrow(AlreadyExistsException.builder().message("Table already exists").build()); - - assertThrows( - LanceNamespaceException.class, () -> glueNamespace.createTable(req, createTestArrowData())); - assertFalse(Files.exists(loc), "Dataset should be removed on exception"); - } - - @Test - public void testCreateTableMissingSchema() { - org.lance.namespace.model.CreateTableRequest req = - new org.lance.namespace.model.CreateTableRequest() - .id(ImmutableList.of("ns", "tbl")) - .location(tempDir.toString()); - - LanceNamespaceException ex = - assertThrows( - LanceNamespaceException.class, () -> glueNamespace.createTable(req, new byte[0])); - assertTrue(ex.getMessage().contains("Arrow IPC")); - } - - @Test - public void testDropTableExplicitLocationOnDisk() { - // First create table - Path location = tempDir.resolve("ns1/tbl"); - List tableId = ImmutableList.of("ns1", "tbl"); - org.lance.namespace.model.CreateTableRequest request = - new org.lance.namespace.model.CreateTableRequest() - .id(tableId) - .location(location.toString()); - - when(glue.createTable(any(software.amazon.awssdk.services.glue.model.CreateTableRequest.class))) - .thenReturn( - software.amazon.awssdk.services.glue.model.CreateTableResponse.builder().build()); - - glueNamespace.createTable(request, createTestArrowData()); - - // Verify it exists - File tableDir = new File(location.toString()); - assertTrue(tableDir.exists()); - File versionsDir = new File(tableDir, "_versions"); - assertTrue(versionsDir.exists()); - - // Drop the table - Table tbl = - Table.builder() - .databaseName(tableId.get(0)) - .name(tableId.get(1)) - .storageDescriptor(StorageDescriptor.builder().location(location.toString()).build()) - .parameters(ImmutableMap.of(TABLE_TYPE_PROP, LANCE_TABLE_TYPE_VALUE)) - .build(); - when(glue.getTable(any(GetTableRequest.class))) - .thenReturn(GetTableResponse.builder().table(tbl).build()); - - DropTableRequest dropRequest = new DropTableRequest(); - dropRequest.setId(tableId); - DropTableResponse response = glueNamespace.dropTable(dropRequest); - - assertNotNull(response); - assertFalse(tableDir.exists()); - } - - @Test - public void testDropTableTableNotFound() { - when(glue.getTable(any(GetTableRequest.class))) - .thenThrow(EntityNotFoundException.builder().message("Entity Not found").build()); - DropTableRequest req = new DropTableRequest().id(ImmutableList.of("ns1", "tbl")); - LanceNamespaceException e = - assertThrows(LanceNamespaceException.class, () -> glueNamespace.dropTable(req)); - - assertTrue(e.getMessage().contains("Glue table not found: ns1.tbl")); - } - - private JsonArrowSchema createTestSchema() { - // Create a simple schema with id (int32) and name (string) fields - JsonArrowDataType intType = new JsonArrowDataType(); - intType.setType("int32"); - - JsonArrowDataType stringType = new JsonArrowDataType(); - stringType.setType("utf8"); - - JsonArrowField idField = new JsonArrowField(); - idField.setName("id"); - idField.setType(intType); - idField.setNullable(false); - - JsonArrowField nameField = new JsonArrowField(); - nameField.setName("name"); - nameField.setType(stringType); - nameField.setNullable(true); - - List fields = new ArrayList<>(); - fields.add(idField); - fields.add(nameField); - - JsonArrowSchema schema = new JsonArrowSchema(); - schema.setFields(fields); - return schema; - } - - private byte[] createTestArrowData() { - // Create a proper Arrow IPC stream with test schema - try { - return org.lance.namespace.util.ArrowIpcUtil.createEmptyArrowIpcStream( - createTestSchema()); - } catch (Exception e) { - throw new RuntimeException("Failed to create test Arrow data", e); - } - } } diff --git a/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespaceIntegration.java b/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespaceIntegration.java new file mode 100644 index 0000000..e37d6bf --- /dev/null +++ b/java/lance-namespace-glue/src/test/java/org/lance/namespace/glue/TestGlueNamespaceIntegration.java @@ -0,0 +1,322 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.glue; + +import org.lance.namespace.errors.LanceNamespaceException; +import org.lance.namespace.model.CreateEmptyTableRequest; +import org.lance.namespace.model.CreateEmptyTableResponse; +import org.lance.namespace.model.CreateNamespaceRequest; +import org.lance.namespace.model.CreateNamespaceResponse; +import org.lance.namespace.model.DeregisterTableRequest; +import org.lance.namespace.model.DescribeNamespaceRequest; +import org.lance.namespace.model.DescribeNamespaceResponse; +import org.lance.namespace.model.DescribeTableRequest; +import org.lance.namespace.model.DescribeTableResponse; +import org.lance.namespace.model.DropNamespaceRequest; +import org.lance.namespace.model.ListNamespacesRequest; +import org.lance.namespace.model.ListNamespacesResponse; +import org.lance.namespace.model.ListTablesRequest; +import org.lance.namespace.model.ListTablesResponse; +import org.lance.namespace.model.NamespaceExistsRequest; +import org.lance.namespace.model.TableExistsRequest; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Integration tests for GlueNamespace against a real AWS Glue catalog. + * + *

To run these tests locally: + * + *

    + *
  1. Configure AWS credentials (via environment variables, ~/.aws/credentials, or IAM role) + *
  2. Set AWS_S3_BUCKET_NAME environment variable + *
  3. Run: make integ-test-glue + *
+ * + *

Tests are automatically skipped if AWS credentials are not available. + */ +public class TestGlueNamespaceIntegration { + + private static final String AWS_REGION = + System.getenv("AWS_REGION") != null ? System.getenv("AWS_REGION") : "us-east-1"; + private static final String AWS_S3_BUCKET_NAME = System.getenv("AWS_S3_BUCKET_NAME"); + private static boolean awsCredentialsAvailable = false; + private static String s3Root; + + private GlueNamespace namespace; + private BufferAllocator allocator; + private String testDatabase; + private List createdDatabases; + + @BeforeAll + public static void checkAwsCredentialsAvailable() { + // Check if S3 bucket is configured + if (AWS_S3_BUCKET_NAME == null || AWS_S3_BUCKET_NAME.isEmpty()) { + System.out.println("AWS_S3_BUCKET_NAME not set - skipping integration tests"); + awsCredentialsAvailable = false; + return; + } + + // Check if AWS credentials are available via environment variables + String accessKeyId = System.getenv("AWS_ACCESS_KEY_ID"); + String secretAccessKey = System.getenv("AWS_SECRET_ACCESS_KEY"); + + if (accessKeyId != null + && !accessKeyId.isEmpty() + && secretAccessKey != null + && !secretAccessKey.isEmpty()) { + awsCredentialsAvailable = true; + System.out.println("AWS credentials found in environment variables"); + } else { + // Try to use default credentials chain by making a simple API call + try { + software.amazon.awssdk.services.sts.StsClient stsClient = + software.amazon.awssdk.services.sts.StsClient.builder() + .region(software.amazon.awssdk.regions.Region.of(AWS_REGION)) + .build(); + stsClient.getCallerIdentity(); + stsClient.close(); + awsCredentialsAvailable = true; + System.out.println("AWS credentials found via default credentials chain"); + } catch (Exception e) { + awsCredentialsAvailable = false; + System.out.println( + "AWS credentials not available (" + e.getMessage() + ") - skipping integration tests"); + } + } + + if (awsCredentialsAvailable) { + String uniqueId = UUID.randomUUID().toString().substring(0, 8); + s3Root = "s3://" + AWS_S3_BUCKET_NAME + "/lance_glue_test_" + uniqueId; + System.out.println("Using S3 root: " + s3Root); + } + } + + @BeforeEach + public void setUp() { + Assumptions.assumeTrue(awsCredentialsAvailable, "AWS credentials are not available"); + + allocator = new RootAllocator(); + namespace = new GlueNamespace(); + createdDatabases = new ArrayList<>(); + + String uniqueId = UUID.randomUUID().toString().substring(0, 8); + testDatabase = "lance_test_db_" + uniqueId; + + Map config = new HashMap<>(); + config.put("region", AWS_REGION); + config.put("root", s3Root); + + namespace.initialize(config, allocator); + } + + @AfterEach + public void tearDown() { + // Clean up test resources + for (String dbName : createdDatabases) { + try { + cleanupDatabase(dbName); + } catch (Exception e) { + // Ignore cleanup errors + } + } + + if (namespace != null) { + namespace.close(); + } + + if (allocator != null) { + allocator.close(); + } + } + + private void cleanupDatabase(String databaseName) { + try { + // First, delete all tables in the database + ListTablesRequest listRequest = new ListTablesRequest(); + listRequest.setId(Collections.singletonList(databaseName)); + ListTablesResponse listResponse = namespace.listTables(listRequest); + + for (String tableName : listResponse.getTables()) { + try { + DeregisterTableRequest deregRequest = new DeregisterTableRequest(); + deregRequest.setId(Arrays.asList(databaseName, tableName)); + namespace.deregisterTable(deregRequest); + } catch (Exception e) { + // Ignore + } + } + + // Then drop the database + DropNamespaceRequest dropRequest = new DropNamespaceRequest(); + dropRequest.setId(Collections.singletonList(databaseName)); + namespace.dropNamespace(dropRequest); + } catch (Exception e) { + // Ignore cleanup errors + } + } + + private String createTestDatabase(String suffix) { + String dbName = "lance_test_" + UUID.randomUUID().toString().substring(0, 8) + suffix; + createdDatabases.add(dbName); + + CreateNamespaceRequest createRequest = new CreateNamespaceRequest(); + createRequest.setId(Collections.singletonList(dbName)); + createRequest.setProperties( + Collections.singletonMap("description", "Lance integration test database")); + namespace.createNamespace(createRequest); + + return dbName; + } + + @Test + public void testNamespaceOperations() { + String dbName = "lance_test_" + UUID.randomUUID().toString().substring(0, 8); + createdDatabases.add(dbName); + + // Create namespace + CreateNamespaceRequest createRequest = new CreateNamespaceRequest(); + createRequest.setId(Collections.singletonList(dbName)); + createRequest.setProperties(Collections.singletonMap("description", "Test database for Lance")); + + CreateNamespaceResponse createResponse = namespace.createNamespace(createRequest); + assertThat(createResponse).isNotNull(); + + // Describe namespace + DescribeNamespaceRequest describeRequest = new DescribeNamespaceRequest(); + describeRequest.setId(Collections.singletonList(dbName)); + + DescribeNamespaceResponse describeResponse = namespace.describeNamespace(describeRequest); + assertThat(describeResponse).isNotNull(); + assertThat(describeResponse.getProperties()) + .containsEntry("description", "Test database for Lance"); + + // Check namespace exists + NamespaceExistsRequest existsRequest = new NamespaceExistsRequest(); + existsRequest.setId(Collections.singletonList(dbName)); + namespace.namespaceExists(existsRequest); // Should not throw + + // List namespaces + ListNamespacesRequest listRequest = new ListNamespacesRequest(); + listRequest.setId(Collections.emptyList()); + ListNamespacesResponse listResponse = namespace.listNamespaces(listRequest); + assertThat(listResponse.getNamespaces()).contains(dbName); + + // Drop namespace + DropNamespaceRequest dropRequest = new DropNamespaceRequest(); + dropRequest.setId(Collections.singletonList(dbName)); + namespace.dropNamespace(dropRequest); + createdDatabases.remove(dbName); + + // Verify namespace doesn't exist + assertThatThrownBy(() -> namespace.namespaceExists(existsRequest)) + .isInstanceOf(LanceNamespaceException.class); + } + + @Test + public void testTableOperations() { + String dbName = createTestDatabase(""); + String tableName = "test_table_" + UUID.randomUUID().toString().substring(0, 8); + String tableLocation = s3Root + "/" + dbName + "/" + tableName + ".lance"; + + // Create empty table + CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); + createRequest.setId(Arrays.asList(dbName, tableName)); + createRequest.setLocation(tableLocation); + + CreateEmptyTableResponse createResponse = namespace.createEmptyTable(createRequest); + assertThat(createResponse.getLocation()).isNotNull(); + assertThat(createResponse.getLocation()).isEqualTo(tableLocation); + + // Describe table + DescribeTableRequest describeRequest = new DescribeTableRequest(); + describeRequest.setId(Arrays.asList(dbName, tableName)); + + DescribeTableResponse describeResponse = namespace.describeTable(describeRequest); + assertThat(describeResponse.getLocation()).isNotNull(); + assertThat(describeResponse.getLocation()).isEqualTo(tableLocation); + + // Check table exists + TableExistsRequest existsRequest = new TableExistsRequest(); + existsRequest.setId(Arrays.asList(dbName, tableName)); + namespace.tableExists(existsRequest); // Should not throw + + // List tables + ListTablesRequest listRequest = new ListTablesRequest(); + listRequest.setId(Collections.singletonList(dbName)); + + ListTablesResponse listResponse = namespace.listTables(listRequest); + assertThat(listResponse.getTables()).contains(tableName); + + // Deregister table + DeregisterTableRequest deregisterRequest = new DeregisterTableRequest(); + deregisterRequest.setId(Arrays.asList(dbName, tableName)); + namespace.deregisterTable(deregisterRequest); + + // Verify table doesn't exist + assertThatThrownBy(() -> namespace.tableExists(existsRequest)) + .isInstanceOf(LanceNamespaceException.class); + } + + @Test + public void testMultipleTablesInNamespace() { + String dbName = createTestDatabase(""); + List tableNames = new ArrayList<>(); + + // Create multiple tables + for (int i = 0; i < 3; i++) { + String tableName = "table_" + i + "_" + UUID.randomUUID().toString().substring(0, 6); + tableNames.add(tableName); + + String tableLocation = s3Root + "/" + dbName + "/" + tableName + ".lance"; + CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); + createRequest.setId(Arrays.asList(dbName, tableName)); + createRequest.setLocation(tableLocation); + namespace.createEmptyTable(createRequest); + } + + // List tables and verify all are present + ListTablesRequest listRequest = new ListTablesRequest(); + listRequest.setId(Collections.singletonList(dbName)); + + ListTablesResponse listResponse = namespace.listTables(listRequest); + for (String tableName : tableNames) { + assertThat(listResponse.getTables()).contains(tableName); + } + + // Clean up tables + for (String tableName : tableNames) { + DeregisterTableRequest deregisterRequest = new DeregisterTableRequest(); + deregisterRequest.setId(Arrays.asList(dbName, tableName)); + namespace.deregisterTable(deregisterRequest); + } + } +} diff --git a/java/lance-namespace-hive2/pom.xml b/java/lance-namespace-hive2/pom.xml index f21cf78..52585f1 100644 --- a/java/lance-namespace-hive2/pom.xml +++ b/java/lance-namespace-hive2/pom.xml @@ -22,6 +22,10 @@ org.lance lance-core + + org.lance + lance-namespace-core + org.lance lance-namespace-apache-client @@ -159,6 +163,12 @@ 4.1.19 test + + org.lance + lance-namespace-impls-core + ${project.version} + test + org.junit.jupiter junit-jupiter diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ClientPoolImpl.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ClientPoolImpl.java new file mode 100644 index 0000000..e4fcd8c --- /dev/null +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ClientPoolImpl.java @@ -0,0 +1,116 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive2; + +import java.io.Closeable; +import java.util.ArrayDeque; +import java.util.Deque; + +/** + * A simple connection pool implementation for reusing clients. Adapted from Apache Iceberg. + * + * @param the client type + * @param the exception type thrown by client operations + */ +public abstract class ClientPoolImpl implements Closeable { + + private final int poolSize; + private final Deque clients; + private final Class reconnectExc; + private final boolean retryByDefault; + private volatile int currentSize; + private boolean closed; + + protected ClientPoolImpl(int poolSize, Class reconnectExc, boolean retryByDefault) { + this.poolSize = poolSize; + this.clients = new ArrayDeque<>(); + this.reconnectExc = reconnectExc; + this.retryByDefault = retryByDefault; + this.currentSize = 0; + this.closed = false; + } + + public interface Action { + R run(C client) throws E; + } + + public R run(Action action) throws E, InterruptedException { + return run(action, retryByDefault); + } + + public R run(Action action, boolean retry) throws E, InterruptedException { + C client = get(); + try { + return action.run(client); + } catch (Exception exc) { + if (retry && isConnectionException(exc)) { + try { + client = reconnect(client); + } catch (Exception reconnectExc) { + release(client); + throw (E) exc; + } + return action.run(client); + } + throw (E) exc; + } finally { + release(client); + } + } + + protected abstract C newClient(); + + protected abstract C reconnect(C client); + + protected abstract void close(C client); + + protected boolean isConnectionException(Exception exc) { + return reconnectExc.isInstance(exc); + } + + private synchronized C get() throws InterruptedException { + if (closed) { + throw new IllegalStateException("Cannot get a client from a closed pool"); + } + + while (clients.isEmpty() && currentSize >= poolSize) { + wait(); + } + + if (!clients.isEmpty()) { + return clients.removeFirst(); + } + + currentSize++; + return newClient(); + } + + private synchronized void release(C client) { + if (closed) { + close(client); + } else { + clients.addFirst(client); + notify(); + } + } + + @Override + public synchronized void close() { + this.closed = true; + while (!clients.isEmpty()) { + close(clients.removeFirst()); + } + notifyAll(); + } +} diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/CommonUtil.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/CommonUtil.java new file mode 100644 index 0000000..bbfb978 --- /dev/null +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/CommonUtil.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive2; + +/** Common utility methods. */ +public class CommonUtil { + + private CommonUtil() {} + + public static String formatCurrentStackTrace() { + StackTraceElement[] stack = Thread.currentThread().getStackTrace(); + StringBuilder sb = new StringBuilder(); + for (int i = 2; i < Math.min(stack.length, 10); i++) { + sb.append(stack[i].toString()).append("\n"); + } + return sb.toString(); + } + + public static String makeQualified(String path) { + if (path == null) { + return null; + } + return path.endsWith("/") ? path.substring(0, path.length() - 1) : path; + } +} diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2ClientPool.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2ClientPool.java index 9ee9cb4..eef2bf7 100644 --- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2ClientPool.java +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2ClientPool.java @@ -13,8 +13,6 @@ */ package org.lance.namespace.hive2; -import org.lance.namespace.util.ClientPoolImpl; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java index 1812ac0..7c07ac2 100644 --- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Namespace.java @@ -13,38 +13,33 @@ */ package org.lance.namespace.hive2; -import com.lancedb.lance.Dataset; -import com.lancedb.lance.WriteParams; -import org.lance.namespace.Configurable; +import org.lance.Dataset; +import org.lance.WriteParams; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.ObjectIdentifier; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.NamespaceAlreadyExistsException; +import org.lance.namespace.errors.NamespaceNotFoundException; +import org.lance.namespace.errors.ServiceUnavailableException; +import org.lance.namespace.errors.TableAlreadyExistsException; +import org.lance.namespace.errors.TableNotFoundException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; +import org.lance.namespace.model.DeregisterTableRequest; +import org.lance.namespace.model.DeregisterTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; -import org.lance.namespace.model.JsonArrowSchema; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; import org.lance.namespace.model.NamespaceExistsRequest; import org.lance.namespace.model.TableExistsRequest; -import org.lance.namespace.util.ArrowIpcUtil; -import org.lance.namespace.util.CommonUtil; -import org.lance.namespace.util.JsonArrowSchemaConverter; -import org.lance.namespace.util.PageUtil; -import org.lance.namespace.util.ValidationUtil; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -60,7 +55,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -73,7 +67,7 @@ import static org.lance.namespace.hive2.Hive2ErrorType.TableAlreadyExists; import static org.lance.namespace.hive2.Hive2ErrorType.TableNotFound; -public class Hive2Namespace implements LanceNamespace, Configurable { +public class Hive2Namespace implements LanceNamespace { private static final Logger LOG = LoggerFactory.getLogger(Hive2Namespace.class); private Hive2ClientPool clientPool; @@ -83,6 +77,16 @@ public class Hive2Namespace implements LanceNamespace, Configurable configProperties, BufferAllocator allocator) { this.allocator = allocator; @@ -125,7 +129,7 @@ public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { @Override public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { ObjectIdentifier id = ObjectIdentifier.of(request.getId()); - CreateNamespaceRequest.ModeEnum mode = request.getMode(); + String mode = request.getMode() != null ? request.getMode().toLowerCase() : "create"; Map properties = request.getProperties(); ValidationUtil.checkArgument( @@ -149,11 +153,10 @@ public void namespaceExists(NamespaceExistsRequest request) { Database database = Hive2Util.getDatabaseOrNull(clientPool, db); if (database == null) { - throw LanceNamespaceException.notFound( + throw new NamespaceNotFoundException( String.format("Namespace does not exist: %s", id.stringStyleId()), HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + id.stringStyleId()); } } @@ -168,11 +171,10 @@ public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest requ Database database = Hive2Util.getDatabaseOrNull(clientPool, db); if (database == null) { - throw LanceNamespaceException.notFound( + throw new NamespaceNotFoundException( String.format("Namespace does not exist: %s", id.stringStyleId()), HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + id.stringStyleId()); } DescribeNamespaceResponse response = new DescribeNamespaceResponse(); @@ -201,20 +203,17 @@ public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest requ @Override public DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { + if ("Cascade".equalsIgnoreCase(request.getBehavior())) { + throw new InvalidInputException("Cascade behavior is not supported for this implementation"); + } + ObjectIdentifier id = ObjectIdentifier.of(request.getId()); - DropNamespaceRequest.ModeEnum mode = request.getMode(); - DropNamespaceRequest.BehaviorEnum behavior = request.getBehavior(); + String mode = request.getMode() != null ? request.getMode().toLowerCase() : "fail"; + String behavior = request.getBehavior() != null ? request.getBehavior() : "Restrict"; ValidationUtil.checkArgument( !id.isRoot() && id.levels() <= 1, "Expect a 1-level namespace but get %s", id); - if (mode == null) { - mode = DropNamespaceRequest.ModeEnum.FAIL; - } - if (behavior == null) { - behavior = DropNamespaceRequest.BehaviorEnum.RESTRICT; - } - Map properties = doDropNamespace(id, mode, behavior); DropNamespaceResponse response = new DropNamespaceResponse(); @@ -256,11 +255,10 @@ public void tableExists(TableExistsRequest request) { Optional hmsTable = Hive2Util.getTable(clientPool, db, table); if (!hmsTable.isPresent()) { - throw LanceNamespaceException.notFound( + throw new TableNotFoundException( String.format("Table does not exist: %s", tableId.stringStyleId()), TableNotFound.getType(), - tableId.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + tableId.stringStyleId()); } Hive2Util.validateLanceTable(hmsTable.get()); @@ -268,6 +266,11 @@ public void tableExists(TableExistsRequest request) { @Override public DescribeTableResponse describeTable(DescribeTableRequest request) { + if (Boolean.TRUE.equals(request.getLoadDetailedMetadata())) { + throw new InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation"); + } + ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( @@ -276,11 +279,10 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { Optional location = doDescribeTable(tableId); if (!location.isPresent()) { - throw LanceNamespaceException.notFound( + throw new TableNotFoundException( String.format("Table does not exist: %s", tableId.stringStyleId()), TableNotFound.getType(), - tableId.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + tableId.stringStyleId()); } DescribeTableResponse response = new DescribeTableResponse(); @@ -289,45 +291,7 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { return response; } - @Override - public CreateTableResponse createTable(CreateTableRequest request, byte[] requestData) { - // Validate that requestData is a valid Arrow IPC stream - ValidationUtil.checkNotNull( - requestData, "Request data (Arrow IPC stream) is required for createTable"); - ValidationUtil.checkArgument( - requestData.length > 0, "Request data (Arrow IPC stream) cannot be empty"); - - ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); - - // Extract schema from Arrow IPC stream - JsonArrowSchema jsonSchema; - try { - jsonSchema = ArrowIpcUtil.extractSchemaFromIpc(requestData); - } catch (IOException e) { - throw LanceNamespaceException.badRequest( - "Invalid Arrow IPC stream: " + e.getMessage(), - "INVALID_ARROW_IPC", - tableId.stringStyleId(), - "Failed to extract schema from Arrow IPC stream"); - } - Schema schema = JsonArrowSchemaConverter.convertToArrowSchema(jsonSchema); - - ValidationUtil.checkArgument( - tableId.levels() == 2, "Expect 2-level table identifier but get %s", tableId); - - String location = request.getLocation(); - if (location == null || location.isEmpty()) { - location = getDefaultTableLocation(tableId.levelAtListPos(0), tableId.levelAtListPos(1)); - } - - doCreateTable(tableId, schema, location, request.getProperties(), requestData); - - CreateTableResponse response = new CreateTableResponse(); - response.setLocation(location); - response.setVersion(1L); - response.setStorageOptions(config.getStorageOptions()); - return response; - } + // Removed: createTable(CreateTableRequest, byte[]) - using default implementation from interface @Override public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { @@ -342,7 +306,7 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request } // Create table in metastore without data (pass null for requestData) - doCreateTable(tableId, null, location, request.getProperties(), null); + doCreateTable(tableId, null, location, null, null); CreateEmptyTableResponse response = new CreateEmptyTableResponse(); response.setLocation(location); @@ -351,22 +315,20 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request } @Override - public DropTableResponse dropTable(DropTableRequest request) { + public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( tableId.levels() == 2, "Expect 2-level table identifier but get %s", tableId); String location = doDropTable(tableId); - // TODO: remove data - DropTableResponse response = new DropTableResponse(); - response.setLocation(location); + DeregisterTableResponse response = new DeregisterTableResponse(); response.setId(request.getId()); + response.setLocation(location); return response; } - @Override public void setConf(Configuration conf) { this.hadoopConf = conf; } @@ -383,16 +345,13 @@ protected List doListNamespaces(ObjectIdentifier parent) { Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to list namespaces: " + errorMessage, - HiveMetaStoreError.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException( + "Failed to list namespaces: " + errorMessage, HiveMetaStoreError.getType(), ""); } } protected void doCreateNamespace( - ObjectIdentifier id, CreateNamespaceRequest.ModeEnum mode, Map properties) { + ObjectIdentifier id, String mode, Map properties) { try { String db = id.levelAtListPos(0).toLowerCase(); @@ -402,34 +361,26 @@ protected void doCreateNamespace( Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to create namespace: " + errorMessage, - HiveMetaStoreError.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException( + "Failed to create namespace: " + errorMessage, HiveMetaStoreError.getType(), ""); } } - private void createDatabase( - String dbName, CreateNamespaceRequest.ModeEnum mode, Map properties) + private void createDatabase(String dbName, String mode, Map properties) throws TException, InterruptedException { Database oldDb = Hive2Util.getDatabaseOrNull(clientPool, dbName); if (oldDb != null) { - switch (mode) { - case CREATE: - throw LanceNamespaceException.conflict( - String.format("Database %s already exist", dbName), - DatabaseAlreadyExist.getType(), - "", - CommonUtil.formatCurrentStackTrace()); - case EXIST_OK: - return; - case OVERWRITE: - clientPool.run( - client -> { - client.dropDatabase(dbName, false, true, false); - return null; - }); + if ("create".equals(mode)) { + throw new NamespaceAlreadyExistsException( + String.format("Database %s already exist", dbName), DatabaseAlreadyExist.getType(), ""); + } else if ("exist_ok".equals(mode) || "existok".equals(mode)) { + return; + } else if ("overwrite".equals(mode)) { + clientPool.run( + client -> { + client.dropDatabase(dbName, false, true, false); + return null; + }); } } @@ -491,11 +442,10 @@ protected void doCreateTable( try { Optional
existing = Hive2Util.getTable(clientPool, db, tableName); if (existing.isPresent()) { - throw LanceNamespaceException.conflict( + throw new TableAlreadyExistsException( String.format("Table %s.%s already exists", db, tableName), TableAlreadyExists.getType(), - String.format("%s.%s", db, tableName), - CommonUtil.formatCurrentStackTrace()); + String.format("%s.%s", db, tableName)); } Table table = new Table(); @@ -537,11 +487,8 @@ protected void doCreateTable( Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to create table: " + errorMessage, - HiveMetaStoreError.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException( + "Failed to create table: " + errorMessage, HiveMetaStoreError.getType(), ""); } } @@ -550,11 +497,8 @@ protected List doListTables(String db) { // First validate that database exists Database database = Hive2Util.getDatabaseOrNull(clientPool, db); if (database == null) { - throw LanceNamespaceException.notFound( - String.format("Database %s doesn't exist", db), - HiveMetaStoreError.getType(), - db, - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException( + String.format("Database %s doesn't exist", db), HiveMetaStoreError.getType(), db); } List allTables = clientPool.run(client -> client.getAllTables(db)); @@ -581,11 +525,8 @@ protected List doListTables(String db) { Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to list tables: " + errorMessage, - HiveMetaStoreError.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException( + "Failed to list tables: " + errorMessage, HiveMetaStoreError.getType(), ""); } } @@ -596,11 +537,10 @@ protected String doDropTable(ObjectIdentifier id) { try { Optional
hmsTable = Hive2Util.getTable(clientPool, db, tableName); if (!hmsTable.isPresent()) { - throw LanceNamespaceException.notFound( + throw new TableNotFoundException( String.format("Table %s.%s does not exist", db, tableName), TableNotFound.getType(), - String.format("%s.%s", db, tableName), - CommonUtil.formatCurrentStackTrace()); + String.format("%s.%s", db, tableName)); } Hive2Util.validateLanceTable(hmsTable.get()); @@ -618,63 +558,35 @@ protected String doDropTable(ObjectIdentifier id) { Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to drop table: " + errorMessage, - HiveMetaStoreError.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException( + "Failed to drop table: " + errorMessage, HiveMetaStoreError.getType(), ""); } } - protected Map doDropNamespace( - ObjectIdentifier id, - DropNamespaceRequest.ModeEnum mode, - DropNamespaceRequest.BehaviorEnum behavior) { + protected Map doDropNamespace(ObjectIdentifier id, String mode, String behavior) { String db = id.levelAtListPos(0).toLowerCase(); try { Database database = Hive2Util.getDatabaseOrNull(clientPool, db); if (database == null) { - if (mode == DropNamespaceRequest.ModeEnum.SKIP) { - // Return empty properties for SKIP mode when namespace doesn't exist + if ("skip".equals(mode)) { return new HashMap<>(); } else { - throw LanceNamespaceException.notFound( - String.format("Database %s doesn't exist", db), - HiveMetaStoreError.getType(), - db, - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException( + String.format("Database %s doesn't exist", db), HiveMetaStoreError.getType(), db); } } - // Check if database contains tables - List tables = doListTables(db); - if (!tables.isEmpty()) { - if (behavior == DropNamespaceRequest.BehaviorEnum.RESTRICT) { - throw LanceNamespaceException.badRequest( + // Check if database contains tables (RESTRICT behavior only, not for Cascade) + boolean cascade = "Cascade".equalsIgnoreCase(behavior); + if (!cascade) { + List tables = doListTables(db); + if (!tables.isEmpty()) { + throw new InvalidInputException( String.format( "Database %s is not empty. Contains %d tables: %s", db, tables.size(), tables), HiveMetaStoreError.getType(), - db, - CommonUtil.formatCurrentStackTrace()); - } else if (behavior == DropNamespaceRequest.BehaviorEnum.CASCADE) { - // Drop all tables first - for (String tableName : tables) { - try { - ObjectIdentifier tableId = ObjectIdentifier.of(Lists.newArrayList(db, tableName)); - doDropTable(tableId); - LOG.info("Dropped table {}.{} during CASCADE operation", db, tableName); - } catch (Exception e) { - LOG.warn("Failed to drop table {}.{}: {}", db, tableName, e.getMessage()); - throw LanceNamespaceException.serviceUnavailable( - String.format( - "Failed to drop table %s.%s during CASCADE operation: %s", - db, tableName, e.getMessage()), - HiveMetaStoreError.getType(), - String.format("%s.%s", db, tableName), - CommonUtil.formatCurrentStackTrace()); - } - } + db); } } @@ -697,9 +609,10 @@ protected Map doDropNamespace( } // Drop the database + final boolean cascadeDrop = cascade; clientPool.run( client -> { - client.dropDatabase(db, false, true, false); + client.dropDatabase(db, false, true, cascadeDrop); return null; }); @@ -710,11 +623,8 @@ protected Map doDropNamespace( Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to drop namespace: " + errorMessage, - HiveMetaStoreError.getType(), - db, - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException( + "Failed to drop namespace: " + errorMessage, HiveMetaStoreError.getType(), db); } } diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2NamespaceConfig.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2NamespaceConfig.java index 9237c92..28a5e41 100644 --- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2NamespaceConfig.java +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2NamespaceConfig.java @@ -13,9 +13,7 @@ */ package org.lance.namespace.hive2; -import org.lance.namespace.util.OpenDalUtil; -import org.lance.namespace.util.PropertyUtil; - +import java.util.HashMap; import java.util.Map; public class Hive2NamespaceConfig { @@ -41,12 +39,27 @@ public class Hive2NamespaceConfig { private final String root; public Hive2NamespaceConfig(Map properties) { + // Inline PropertyUtil.propertyAsInt + String clientPoolSizeStr = properties.get(CLIENT_POOL_SIZE); this.clientPoolSize = - PropertyUtil.propertyAsInt(properties, CLIENT_POOL_SIZE, CLIENT_POOL_SIZE_DEFAULT); - this.storageOptions = PropertyUtil.propertiesWithPrefix(properties, STORAGE_OPTIONS_PREFIX); + clientPoolSizeStr != null ? Integer.parseInt(clientPoolSizeStr) : CLIENT_POOL_SIZE_DEFAULT; + + // Inline PropertyUtil.propertiesWithPrefix + Map filteredStorageOptions = new HashMap<>(); + for (Map.Entry entry : properties.entrySet()) { + if (entry.getKey().startsWith(STORAGE_OPTIONS_PREFIX)) { + filteredStorageOptions.put( + entry.getKey().substring(STORAGE_OPTIONS_PREFIX.length()), entry.getValue()); + } + } + this.storageOptions = filteredStorageOptions; + + // Inline PropertyUtil.propertyAsString and OpenDalUtil.stripTrailingSlash + String rootValue = properties.getOrDefault(ROOT, ROOT_DEFAULT); this.root = - OpenDalUtil.stripTrailingSlash( - PropertyUtil.propertyAsString(properties, ROOT, ROOT_DEFAULT)); + rootValue != null && rootValue.endsWith("/") + ? rootValue.substring(0, rootValue.length() - 1) + : rootValue; } public int getClientPoolSize() { diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java index 4f26739..23c5374 100644 --- a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/Hive2Util.java @@ -13,8 +13,8 @@ */ package org.lance.namespace.hive2; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.util.CommonUtil; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.ServiceUnavailableException; import com.google.common.collect.Maps; import org.apache.hadoop.hive.metastore.api.Database; @@ -44,8 +44,7 @@ public static Database getDatabaseOrNull(Hive2ClientPool clientPool, String db) if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - throw LanceNamespaceException.serviceUnavailable( - e.getMessage(), HiveMetaStoreError.getType(), "", CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException(e.getMessage(), HiveMetaStoreError.getType(), ""); } } @@ -103,20 +102,18 @@ public static Optional
getTable(Hive2ClientPool clientPool, String db, St if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - throw LanceNamespaceException.serviceUnavailable( - e.getMessage(), HiveMetaStoreError.getType(), "", CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException(e.getMessage(), HiveMetaStoreError.getType(), ""); } } public static void validateLanceTable(Table table) { Map params = table.getParameters(); if (params == null || !"lance".equalsIgnoreCase(params.get("table_type"))) { - throw LanceNamespaceException.badRequest( + throw new InvalidInputException( String.format( "Table %s.%s is not a Lance table", table.getDbName(), table.getTableName()), InvalidLanceTable.getType(), - String.format("%s.%s", table.getDbName(), table.getTableName()), - CommonUtil.formatCurrentStackTrace()); + String.format("%s.%s", table.getDbName(), table.getTableName())); } } diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ObjectIdentifier.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ObjectIdentifier.java new file mode 100644 index 0000000..af3f0de --- /dev/null +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ObjectIdentifier.java @@ -0,0 +1,55 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive2; + +import java.util.Collections; +import java.util.List; + +/** Represents a hierarchical identifier for namespaces and tables. */ +public class ObjectIdentifier { + private final List levels; + + private ObjectIdentifier(List levels) { + this.levels = levels != null ? levels : Collections.emptyList(); + } + + public static ObjectIdentifier of(List levels) { + return new ObjectIdentifier(levels); + } + + public boolean isRoot() { + return levels.isEmpty(); + } + + public int levels() { + return levels.size(); + } + + public String levelAtListPos(int pos) { + if (pos < 0 || pos >= levels.size()) { + throw new IndexOutOfBoundsException( + "Position " + pos + " is out of bounds for size " + levels.size()); + } + return levels.get(pos); + } + + public String stringStyleId() { + return String.join(".", levels); + } + + @Override + public String toString() { + return stringStyleId(); + } +} diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/PageUtil.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/PageUtil.java new file mode 100644 index 0000000..3acf941 --- /dev/null +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/PageUtil.java @@ -0,0 +1,70 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive2; + +import java.util.List; + +/** Utility methods for pagination. */ +public class PageUtil { + + private static final int DEFAULT_PAGE_SIZE = 100; + + private PageUtil() {} + + public static int normalizePageSize(Integer pageSize) { + if (pageSize == null || pageSize <= 0) { + return DEFAULT_PAGE_SIZE; + } + return pageSize; + } + + public static Page splitPage(List items, String pageToken, int pageSize) { + int startIndex = 0; + if (pageToken != null && !pageToken.isEmpty()) { + try { + startIndex = Integer.parseInt(pageToken); + } catch (NumberFormatException e) { + startIndex = 0; + } + } + + if (startIndex >= items.size()) { + return new Page(java.util.Collections.emptyList(), null); + } + + int endIndex = Math.min(startIndex + pageSize, items.size()); + List pageItems = items.subList(startIndex, endIndex); + + String nextPageToken = endIndex < items.size() ? String.valueOf(endIndex) : null; + return new Page(pageItems, nextPageToken); + } + + public static class Page { + private final List items; + private final String nextPageToken; + + public Page(List items, String nextPageToken) { + this.items = items; + this.nextPageToken = nextPageToken; + } + + public List items() { + return items; + } + + public String nextPageToken() { + return nextPageToken; + } + } +} diff --git a/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ValidationUtil.java b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ValidationUtil.java new file mode 100644 index 0000000..3652187 --- /dev/null +++ b/java/lance-namespace-hive2/src/main/java/org/lance/namespace/hive2/ValidationUtil.java @@ -0,0 +1,35 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive2; + +import org.lance.namespace.errors.InvalidInputException; + +/** Utility methods for validation. */ +public class ValidationUtil { + + private ValidationUtil() {} + + public static void checkArgument(boolean expression, String message, Object... args) { + if (!expression) { + throw new InvalidInputException(String.format(message, args)); + } + } + + public static String checkNotNullOrEmptyString(String value, String message) { + if (value == null || value.isEmpty()) { + throw new InvalidInputException(message); + } + return value; + } +} diff --git a/java/lance-namespace-hive2/src/test/java/org/lance/namespace/hive2/TestHive2Namespace.java b/java/lance-namespace-hive2/src/test/java/org/lance/namespace/hive2/TestHive2Namespace.java index c6073e8..27bbb82 100644 --- a/java/lance-namespace-hive2/src/test/java/org/lance/namespace/hive2/TestHive2Namespace.java +++ b/java/lance-namespace-hive2/src/test/java/org/lance/namespace/hive2/TestHive2Namespace.java @@ -14,20 +14,13 @@ package org.lance.namespace.hive2; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.LanceNamespaces; -import org.lance.namespace.TestHelper; +import org.lance.namespace.errors.LanceNamespaceException; import org.lance.namespace.model.CreateNamespaceRequest; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; -import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; import org.lance.namespace.model.NamespaceExistsRequest; @@ -41,7 +34,6 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.io.File; @@ -74,7 +66,10 @@ public static void setup() throws IOException { tmpDirBase = file.getAbsolutePath(); HiveConf hiveConf = metastore.hiveConf(); - namespace = LanceNamespaces.connect("hive2", Maps.newHashMap(), hiveConf, allocator); + Hive2Namespace hive2Namespace = new Hive2Namespace(); + hive2Namespace.setHadoopConf(hiveConf); + hive2Namespace.initialize(Maps.newHashMap(), allocator); + namespace = hive2Namespace; } @AfterAll @@ -97,123 +92,12 @@ public void cleanup() throws Exception { metastore.reset(); } - @Disabled("Need to figure out the proper interface") - @Test - public void testCreateTable() throws IOException { - // Setup: Create database - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - // Test: Create table with valid parameters - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_db", "test_table")); - request.setLocation(tmpDirBase + "/test_db/test_table.lance"); - - Map properties = Maps.newHashMap(); - properties.put("custom_prop", "custom_value"); - request.setProperties(properties); - - byte[] testData = TestHelper.createTestArrowData(allocator); - CreateTableResponse response = namespace.createTable(request, testData); - - assertEquals(request.getLocation(), response.getLocation()); - assertEquals(1L, response.getVersion()); - } - - @Test - public void testCreateTableAlreadyExists() throws IOException { - // Setup: Create database and table - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_db", "test_table")); - request.setLocation(tmpDirBase + "/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(request, testData); - - // Test: Create table that already exists - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.createTable(request, testData)); - assertTrue(error.getMessage().contains("Table test_db.test_table already exists")); - } - - @Test - public void testCreateTableManagedByImpl() throws IOException { - // Setup: Create database - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - // Test: Create table with managed_by=impl (not supported) - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_db", "impl_table")); - request.setLocation(tmpDirBase + "/test_db/impl_table.lance"); - - Map properties = Maps.newHashMap(); - properties.put("managed_by", "impl"); - request.setProperties(properties); - - byte[] testData = TestHelper.createTestArrowData(allocator); - Exception error = - assertThrows( - UnsupportedOperationException.class, () -> namespace.createTable(request, testData)); - assertTrue(error.getMessage().contains("managed_by=impl is not supported yet")); - } - - @Test - public void testCreateTableWithoutData() throws IOException { - // Setup: Create database - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - // Test: Create table without data - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_db", "no_data_table")); - request.setLocation(tmpDirBase + "/test_db/no_data_table.lance"); - - byte[] emptyData = TestHelper.createEmptyArrowData(allocator); - CreateTableResponse response = namespace.createTable(request, emptyData); - assertEquals(request.getLocation(), response.getLocation()); - } - - @Test - public void testDescribeTable() throws IOException { - // Setup: Create database and table - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_db", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Describe existing Lance table - DescribeTableRequest request = new DescribeTableRequest(); - request.setId(Lists.list("test_db", "test_table")); - - DescribeTableResponse response = namespace.describeTable(request); - assertEquals("file:" + tmpDirBase + "/test_db/test_table.lance", response.getLocation()); - } - @Test public void testDescribeNonExistentTable() { // Setup: Create database CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); namespace.createNamespace(nsRequest); // Test: Describe non-existent table @@ -224,133 +108,12 @@ public void testDescribeNonExistentTable() { assertTrue(error.getMessage().contains("Table does not exist")); } - @Test - public void testDropTable() throws IOException { - // Setup: Create database and table - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_db", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Drop existing table - DropTableRequest request = new DropTableRequest(); - request.setId(Lists.list("test_db", "test_table")); - - DropTableResponse response = namespace.dropTable(request); - assertEquals("file:" + tmpDirBase + "/test_db/test_table.lance", response.getLocation()); - assertEquals(request.getId(), response.getId()); - - // Verify table is dropped by trying to describe it - DescribeTableRequest descRequest = new DescribeTableRequest(); - descRequest.setId(request.getId()); - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.describeTable(descRequest)); - assertTrue(error.getMessage().contains("Table does not exist")); - } - - @Test - public void testDropNonExistentTable() { - // Setup: Create database - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - // Test: Drop non-existent table - DropTableRequest request = new DropTableRequest(); - request.setId(Lists.list("test_db", "non_existent")); - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.dropTable(request)); - assertTrue(error.getMessage().contains("Table test_db.non_existent does not exist")); - } - - @Test - public void testCreateTableWithDefaultLocationFromRoot() throws IOException { - // With our enhancement, databases created without explicit location - // will use the root config location instead of Hive warehouse - - // Setup: Create namespace with custom root configuration - Map properties = Maps.newHashMap(); - properties.put("root", tmpDirBase); - - HiveConf hiveConf = metastore.hiveConf(); - LanceNamespace customNamespace = - LanceNamespaces.connect("hive2", properties, hiveConf, allocator); - - // Setup: Create database (will use root location) - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - customNamespace.createNamespace(nsRequest); - - // Test: Create table without specifying location - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_db", "test_table")); - // Don't set location - it will be derived from database location - - // Create test Arrow IPC data - byte[] testData = TestHelper.createTestArrowData(allocator); - CreateTableResponse response = customNamespace.createTable(request, testData); - - // Verify: Location should be derived from root-based database location - // Hive adds file: prefix to locations - String expectedLocation = "file:" + tmpDirBase + "/test_db/test_table.lance"; - assertEquals(expectedLocation, response.getLocation()); - assertEquals(1L, response.getVersion()); - } - - @Test - public void testCreateTableWithDefaultLocationFromDatabaseLocation() throws IOException { - // Setup: Create namespace with custom root configuration - Map properties = Maps.newHashMap(); - properties.put("root", tmpDirBase); - - HiveConf hiveConf = metastore.hiveConf(); - LanceNamespace customNamespace = - LanceNamespaces.connect("hive2", properties, hiveConf, allocator); - - // Setup: Create database with specific location - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db_with_location")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - - // Set database location - this should take precedence over root config - String databaseLocation = tmpDirBase + "/custom_db_location"; - Map dbProperties = Maps.newHashMap(); - dbProperties.put("database.location-uri", databaseLocation); - nsRequest.setProperties(dbProperties); - - customNamespace.createNamespace(nsRequest); - - // Test: Create table without specifying location (should derive from database location) - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_db_with_location", "test_table")); - // Don't set location - it should be derived from database location - - // Create test Arrow IPC data - byte[] testData = TestHelper.createTestArrowData(allocator); - CreateTableResponse response = customNamespace.createTable(request, testData); - - // Verify: Location should be derived as {database_location}/{table}.lance - // Database locations in Hive typically have file: prefix - String expectedLocation = "file:" + databaseLocation + "/test_table.lance"; - assertEquals(expectedLocation, response.getLocation()); - assertEquals(1L, response.getVersion()); - } - @Test public void testDescribeNamespace() { // Setup: Create database CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); Map properties = Maps.newHashMap(); properties.put("database.description", "Test database description"); @@ -388,7 +151,7 @@ public void testNamespaceExists() { // Setup: Create database CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); namespace.createNamespace(nsRequest); // Test: Check existing namespace @@ -410,35 +173,12 @@ public void testNamespaceExistsNonExistent() { assertTrue(error.getMessage().contains("Namespace does not exist")); } - @Test - public void testTableExists() throws IOException { - // Setup: Create database and table - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_db", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Check existing table - TableExistsRequest request = new TableExistsRequest(); - request.setId(Lists.list("test_db", "test_table")); - - // Should not throw exception for existing Lance table - namespace.tableExists(request); - } - @Test public void testTableExistsNonExistent() { // Setup: Create database CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); namespace.createNamespace(nsRequest); // Test: Check non-existent table @@ -450,46 +190,12 @@ public void testTableExistsNonExistent() { assertTrue(error.getMessage().contains("Table does not exist")); } - @Test - public void testListTables() throws IOException { - // Setup: Create database and multiple tables - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - // Create first table - CreateTableRequest createRequest1 = new CreateTableRequest(); - createRequest1.setId(Lists.list("test_db", "table1")); - createRequest1.setLocation(tmpDirBase + "/test_db/table1.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest1, testData); - - // Create second table - CreateTableRequest createRequest2 = new CreateTableRequest(); - createRequest2.setId(Lists.list("test_db", "table2")); - createRequest2.setLocation(tmpDirBase + "/test_db/table2.lance"); - - namespace.createTable(createRequest2, testData); - - // Test: List tables - ListTablesRequest request = new ListTablesRequest(); - request.setId(Lists.list("test_db")); - - ListTablesResponse response = namespace.listTables(request); - - assertEquals(2, response.getTables().size()); - assertTrue(response.getTables().contains("table1")); - assertTrue(response.getTables().contains("table2")); - } - @Test public void testListTablesEmpty() { // Setup: Create empty database CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); nsRequest.setId(Lists.list("empty_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); namespace.createNamespace(nsRequest); // Test: List tables in empty database @@ -501,47 +207,6 @@ public void testListTablesEmpty() { assertEquals(0, response.getTables().size()); } - @Test - public void testListTablesWithPagination() throws IOException { - // Setup: Create database and multiple tables - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - // Create multiple tables - for (int i = 1; i <= 5; i++) { - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_db", "table" + i)); - createRequest.setLocation(tmpDirBase + "/test_db/table" + i + ".lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - } - - // Test: List tables with pagination (limit 3) - ListTablesRequest request = new ListTablesRequest(); - request.setId(Lists.list("test_db")); - request.setLimit(3); - - ListTablesResponse response = namespace.listTables(request); - - assertEquals(3, response.getTables().size()); - // Should have a page token for remaining results - assertTrue(response.getPageToken() != null && !response.getPageToken().isEmpty()); - - // Get remaining tables - ListTablesRequest nextRequest = new ListTablesRequest(); - nextRequest.setId(Lists.list("test_db")); - nextRequest.setPageToken(response.getPageToken()); - - ListTablesResponse nextResponse = namespace.listTables(nextRequest); - - assertEquals(2, nextResponse.getTables().size()); - // No more pages - assertTrue(nextResponse.getPageToken() == null || nextResponse.getPageToken().isEmpty()); - } - @Test public void testListTablesNonExistentDatabase() { // Test: List tables in non-existent database @@ -558,7 +223,7 @@ public void testDropNamespaceBasic() { // Setup: Create database CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); nsRequest.setId(Lists.list("test_db_basic")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); Map properties = Maps.newHashMap(); properties.put("database.description", "Test database for dropping"); @@ -592,7 +257,7 @@ public void testDropNamespaceSkipMode() { // Test: Drop non-existent namespace with SKIP mode DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Lists.list("non_existent_db")); - dropRequest.setMode(DropNamespaceRequest.ModeEnum.SKIP); + dropRequest.setMode("Skip"); DropNamespaceResponse response = namespace.dropNamespace(dropRequest); @@ -605,78 +270,10 @@ public void testDropNamespaceFailMode() { // Test: Drop non-existent namespace with FAIL mode (default) DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Lists.list("non_existent_db")); - dropRequest.setMode(DropNamespaceRequest.ModeEnum.FAIL); + dropRequest.setMode("Fail"); Exception error = assertThrows(LanceNamespaceException.class, () -> namespace.dropNamespace(dropRequest)); assertTrue(error.getMessage().contains("Database non_existent_db doesn't exist")); } - - @Test - public void testDropNamespaceRestrictWithTables() throws IOException { - // Setup: Create database and table - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db_restrict")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_db_restrict", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_db_restrict/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Try to drop namespace with RESTRICT behavior (should fail) - DropNamespaceRequest dropRequest = new DropNamespaceRequest(); - dropRequest.setId(Lists.list("test_db_restrict")); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.RESTRICT); - - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.dropNamespace(dropRequest)); - assertTrue(error.getMessage().contains("Database test_db_restrict is not empty")); - assertTrue(error.getMessage().contains("Contains 1 tables")); - } - - @Test - public void testDropNamespaceCascadeWithTables() throws IOException { - // Setup: Create database and multiple tables - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_db_cascade")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - // Create first table - CreateTableRequest createRequest1 = new CreateTableRequest(); - createRequest1.setId(Lists.list("test_db_cascade", "table1")); - createRequest1.setLocation(tmpDirBase + "/test_db_cascade/table1.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest1, testData); - - // Create second table - CreateTableRequest createRequest2 = new CreateTableRequest(); - createRequest2.setId(Lists.list("test_db_cascade", "table2")); - createRequest2.setLocation(tmpDirBase + "/test_db_cascade/table2.lance"); - - namespace.createTable(createRequest2, testData); - - // Test: Drop namespace with CASCADE behavior - DropNamespaceRequest dropRequest = new DropNamespaceRequest(); - dropRequest.setId(Lists.list("test_db_cascade")); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - - DropNamespaceResponse response = namespace.dropNamespace(dropRequest); - - // Verify namespace properties were returned - assertTrue(response.getProperties().containsKey("database.location-uri")); - - // Verify namespace was dropped - NamespaceExistsRequest existsRequest = new NamespaceExistsRequest(); - existsRequest.setId(Lists.list("test_db_cascade")); - - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.namespaceExists(existsRequest)); - assertTrue(error.getMessage().contains("Namespace does not exist")); - } } diff --git a/java/lance-namespace-hive2/src/test/java/org/lance/namespace/hive2/TestHive2NamespaceIntegration.java b/java/lance-namespace-hive2/src/test/java/org/lance/namespace/hive2/TestHive2NamespaceIntegration.java index ca95529..e6ba013 100644 --- a/java/lance-namespace-hive2/src/test/java/org/lance/namespace/hive2/TestHive2NamespaceIntegration.java +++ b/java/lance-namespace-hive2/src/test/java/org/lance/namespace/hive2/TestHive2NamespaceIntegration.java @@ -13,17 +13,18 @@ */ package org.lance.namespace.hive2; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.LanceNamespaceException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; +import org.lance.namespace.model.DeregisterTableRequest; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; -import org.lance.namespace.model.DropTableRequest; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -114,19 +115,13 @@ public void tearDown() { // Clean up test database DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Collections.singletonList(testDatabase)); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); + dropRequest.setBehavior("Restrict"); namespace.dropNamespace(dropRequest); } catch (Exception e) { // Ignore cleanup errors } - if (namespace != null) { - try { - namespace.close(); - } catch (Exception e) { - // Ignore - } - } + // Namespace cleanup handled by Hive internals if (allocator != null) { allocator.close(); @@ -163,8 +158,8 @@ public void testDatabaseOperations() { DescribeNamespaceResponse describeResponse = namespace.describeNamespace(describeRequest); assertThat(describeResponse).isNotNull(); - assertThat(describeResponse.getProperties()).containsEntry( - "database.description", "Integration test database"); + assertThat(describeResponse.getProperties()) + .containsEntry("database.description", "Integration test database"); // List databases ListNamespacesRequest listRequest = new ListNamespacesRequest(); @@ -190,7 +185,8 @@ public void testTableOperations() { nsRequest.setId(Collections.singletonList(testDatabase)); namespace.createNamespace(nsRequest); - String tableName = "test_table_" + UUID.randomUUID().toString().substring(0, 8).replace("-", ""); + String tableName = + "test_table_" + UUID.randomUUID().toString().substring(0, 8).replace("-", ""); // Create empty table (declare table without data) CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); @@ -206,7 +202,6 @@ public void testTableOperations() { DescribeTableResponse describeResponse = namespace.describeTable(describeRequest); assertThat(describeResponse.getLocation()).contains(tableName); - assertThat(describeResponse.getProperties()).containsEntry("table_type", "lance"); // List tables ListTablesRequest listRequest = new ListTablesRequest(); @@ -215,10 +210,10 @@ public void testTableOperations() { ListTablesResponse listResponse = namespace.listTables(listRequest); assertThat(listResponse.getTables()).contains(tableName); - // Drop table - DropTableRequest dropRequest = new DropTableRequest(); - dropRequest.setId(Arrays.asList(testDatabase, tableName)); - namespace.dropTable(dropRequest); + // Deregister table + DeregisterTableRequest deregisterRequest = new DeregisterTableRequest(); + deregisterRequest.setId(Arrays.asList(testDatabase, tableName)); + namespace.deregisterTable(deregisterRequest); // Verify table doesn't exist assertThatThrownBy(() -> namespace.describeTable(describeRequest)) @@ -226,29 +221,14 @@ public void testTableOperations() { } @Test - public void testCascadeDropDatabase() { - // Create database - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Collections.singletonList(testDatabase)); - namespace.createNamespace(nsRequest); - - // Create a table in the database - String tableName = "cascade_test_table"; - CreateEmptyTableRequest tableRequest = new CreateEmptyTableRequest(); - tableRequest.setId(Arrays.asList(testDatabase, tableName)); - tableRequest.setLocation("/tmp/lance-integration-test/" + testDatabase + "/" + tableName); - namespace.createEmptyTable(tableRequest); - - // Drop database with cascade + public void testCascadeDropDatabaseRejected() { + // Drop database with cascade - should be rejected DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Collections.singletonList(testDatabase)); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - namespace.dropNamespace(dropRequest); + dropRequest.setBehavior("Cascade"); - // Verify database doesn't exist - DescribeNamespaceRequest describeRequest = new DescribeNamespaceRequest(); - describeRequest.setId(Collections.singletonList(testDatabase)); - assertThatThrownBy(() -> namespace.describeNamespace(describeRequest)) - .isInstanceOf(LanceNamespaceException.class); + assertThatThrownBy(() -> namespace.dropNamespace(dropRequest)) + .isInstanceOf(InvalidInputException.class) + .hasMessageContaining("Cascade behavior is not supported"); } } diff --git a/java/lance-namespace-hive3/derby.log b/java/lance-namespace-hive3/derby.log new file mode 100644 index 0000000..9063ad7 --- /dev/null +++ b/java/lance-namespace-hive3/derby.log @@ -0,0 +1,13 @@ +---------------------------------------------------------------- +Tue Dec 30 21:25:14 PST 2025: +Booting Derby version The Apache Software Foundation - Apache Derby - 10.14.2.0 - (1828579): instance a816c00e-019b-72de-0fb6-00000746a618 +on database directory /Users/zhaoqinye/oss/lance-namespace-impls/java/lance-namespace-hive3/metastore_db with class loader jdk.internal.loader.ClassLoaders$AppClassLoader@5ffd2b27 +Loaded from file:/Users/zhaoqinye/.m2/repository/org/apache/derby/derby/10.14.2.0/derby-10.14.2.0.jar +java.vendor=Amazon.com Inc. +java.runtime.version=17.0.14+7-LTS +user.dir=/Users/zhaoqinye/oss/lance-namespace-impls/java/lance-namespace-hive3 +os.name=Mac OS X +os.arch=aarch64 +os.version=15.5 +derby.system.home=null +Database Class Loader started - derby.database.classpath='' diff --git a/java/lance-namespace-hive3/pom.xml b/java/lance-namespace-hive3/pom.xml index f16eaa9..54616be 100644 --- a/java/lance-namespace-hive3/pom.xml +++ b/java/lance-namespace-hive3/pom.xml @@ -22,6 +22,10 @@ org.lance lance-core + + org.lance + lance-namespace-core + org.lance lance-namespace-apache-client @@ -73,6 +77,12 @@ 10.14.2.0 test + + org.lance + lance-namespace-impls-core + ${project.version} + test + org.junit.jupiter junit-jupiter diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ClientPoolImpl.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ClientPoolImpl.java new file mode 100644 index 0000000..2a268df --- /dev/null +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ClientPoolImpl.java @@ -0,0 +1,116 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive3; + +import java.io.Closeable; +import java.util.ArrayDeque; +import java.util.Deque; + +/** + * A simple connection pool implementation for reusing clients. Adapted from Apache Iceberg. + * + * @param the client type + * @param the exception type thrown by client operations + */ +public abstract class ClientPoolImpl implements Closeable { + + private final int poolSize; + private final Deque clients; + private final Class reconnectExc; + private final boolean retryByDefault; + private volatile int currentSize; + private boolean closed; + + protected ClientPoolImpl(int poolSize, Class reconnectExc, boolean retryByDefault) { + this.poolSize = poolSize; + this.clients = new ArrayDeque<>(); + this.reconnectExc = reconnectExc; + this.retryByDefault = retryByDefault; + this.currentSize = 0; + this.closed = false; + } + + public interface Action { + R run(C client) throws E; + } + + public R run(Action action) throws E, InterruptedException { + return run(action, retryByDefault); + } + + public R run(Action action, boolean retry) throws E, InterruptedException { + C client = get(); + try { + return action.run(client); + } catch (Exception exc) { + if (retry && isConnectionException(exc)) { + try { + client = reconnect(client); + } catch (Exception reconnectExc) { + release(client); + throw (E) exc; + } + return action.run(client); + } + throw (E) exc; + } finally { + release(client); + } + } + + protected abstract C newClient(); + + protected abstract C reconnect(C client); + + protected abstract void close(C client); + + protected boolean isConnectionException(Exception exc) { + return reconnectExc.isInstance(exc); + } + + private synchronized C get() throws InterruptedException { + if (closed) { + throw new IllegalStateException("Cannot get a client from a closed pool"); + } + + while (clients.isEmpty() && currentSize >= poolSize) { + wait(); + } + + if (!clients.isEmpty()) { + return clients.removeFirst(); + } + + currentSize++; + return newClient(); + } + + private synchronized void release(C client) { + if (closed) { + close(client); + } else { + clients.addFirst(client); + notify(); + } + } + + @Override + public synchronized void close() { + this.closed = true; + while (!clients.isEmpty()) { + close(clients.removeFirst()); + } + notifyAll(); + } +} diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/CommonUtil.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/CommonUtil.java new file mode 100644 index 0000000..9d4194f --- /dev/null +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/CommonUtil.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive3; + +/** Common utility methods. */ +public class CommonUtil { + + private CommonUtil() {} + + public static String formatCurrentStackTrace() { + StackTraceElement[] stack = Thread.currentThread().getStackTrace(); + StringBuilder sb = new StringBuilder(); + for (int i = 2; i < Math.min(stack.length, 10); i++) { + sb.append(stack[i].toString()).append("\n"); + } + return sb.toString(); + } + + public static String makeQualified(String path) { + if (path == null) { + return null; + } + return path.endsWith("/") ? path.substring(0, path.length() - 1) : path; + } +} diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/DynMethods.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/DynMethods.java new file mode 100644 index 0000000..7bf259a --- /dev/null +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/DynMethods.java @@ -0,0 +1,491 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive3; + +import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.Arrays; + +/** Copied from parquet-common */ +public class DynMethods { + + private DynMethods() {} + + /** + * Convenience wrapper class around {@link java.lang.reflect.Method}. + * + *

Allows callers to invoke the wrapped method with all Exceptions wrapped by RuntimeException, + * or with a single Exception catch block. + */ + public static class UnboundMethod { + + private final Method method; + private final String name; + private final int argLength; + + UnboundMethod(Method method, String name) { + this.method = method; + this.name = name; + this.argLength = + (method == null || method.isVarArgs()) ? -1 : method.getParameterTypes().length; + } + + @SuppressWarnings("unchecked") + R invokeChecked(Object target, Object... args) throws Exception { + try { + if (argLength < 0) { + return (R) method.invoke(target, args); + } else { + return (R) method.invoke(target, Arrays.copyOfRange(args, 0, argLength)); + } + + } catch (InvocationTargetException e) { + Throwables.propagateIfInstanceOf(e.getCause(), Exception.class); + Throwables.propagateIfInstanceOf(e.getCause(), RuntimeException.class); + throw Throwables.propagate(e.getCause()); + } + } + + public R invoke(Object target, Object... args) { + try { + return this.invokeChecked(target, args); + } catch (Exception e) { + Throwables.propagateIfInstanceOf(e, RuntimeException.class); + throw Throwables.propagate(e); + } + } + + /** + * Returns this method as a BoundMethod for the given receiver. + * + * @param receiver an Object to receive the method invocation + * @return a {@link BoundMethod} for this method and the receiver + * @throws IllegalStateException if the method is static + * @throws IllegalArgumentException if the receiver's class is incompatible + */ + public BoundMethod bind(Object receiver) { + Preconditions.checkState( + !isStatic(), "Cannot bind static method %s", method.toGenericString()); + Preconditions.checkArgument( + method.getDeclaringClass().isAssignableFrom(receiver.getClass()), + "Cannot bind %s to instance of %s", + method.toGenericString(), + receiver.getClass()); + + return new BoundMethod(this, receiver); + } + + /** Returns whether the method is a static method. */ + public boolean isStatic() { + return Modifier.isStatic(method.getModifiers()); + } + + /** Returns whether the method is a noop. */ + public boolean isNoop() { + return this == NOOP; + } + + /** + * Returns this method as a StaticMethod. + * + * @return a {@link StaticMethod} for this method + * @throws IllegalStateException if the method is not static + */ + public StaticMethod asStatic() { + Preconditions.checkState(isStatic(), "Method is not static"); + return new StaticMethod(this); + } + + @Override + public String toString() { + return "DynMethods.UnboundMethod(name=" + name + " method=" + method.toGenericString() + ")"; + } + + /** Singleton {@link UnboundMethod}, performs no operation and returns null. */ + private static final UnboundMethod NOOP = + new UnboundMethod(null, "NOOP") { + @Override + R invokeChecked(Object target, Object... args) { + return null; + } + + @Override + public BoundMethod bind(Object receiver) { + return new BoundMethod(this, receiver); + } + + @Override + public StaticMethod asStatic() { + return new StaticMethod(this); + } + + @Override + public boolean isStatic() { + return true; + } + + @Override + public String toString() { + return "DynMethods.UnboundMethod(NOOP)"; + } + }; + } + + public static class BoundMethod { + private final UnboundMethod method; + private final Object receiver; + + private BoundMethod(UnboundMethod method, Object receiver) { + this.method = method; + this.receiver = receiver; + } + + public R invokeChecked(Object... args) throws Exception { + return method.invokeChecked(receiver, args); + } + + public R invoke(Object... args) { + return method.invoke(receiver, args); + } + } + + public static class StaticMethod { + private final UnboundMethod method; + + private StaticMethod(UnboundMethod method) { + this.method = method; + } + + public R invokeChecked(Object... args) throws Exception { + return method.invokeChecked(null, args); + } + + public R invoke(Object... args) { + return method.invoke(null, args); + } + } + + /** + * Constructs a new builder for calling methods dynamically. + * + * @param methodName name of the method the builder will locate + * @return a Builder for finding a method + */ + public static Builder builder(String methodName) { + return new Builder(methodName); + } + + public static class Builder { + private final String name; + private ClassLoader loader = Thread.currentThread().getContextClassLoader(); + private UnboundMethod method = null; + + public Builder(String methodName) { + this.name = methodName; + } + + /** + * Set the {@link ClassLoader} used to lookup classes by name. + * + *

If not set, the current thread's ClassLoader is used. + * + * @param newLoader a ClassLoader + * @return this Builder for method chaining + */ + public Builder loader(ClassLoader newLoader) { + this.loader = newLoader; + return this; + } + + /** + * If no implementation has been found, adds a NOOP method. + * + *

Note: calls to impl will not match after this method is called! + * + * @return this Builder for method chaining + */ + public Builder orNoop() { + if (method == null) { + this.method = UnboundMethod.NOOP; + } + return this; + } + + /** + * Checks for an implementation, first finding the given class by name. + * + * @param className name of a class + * @param methodName name of a method (different from constructor) + * @param argClasses argument classes for the method + * @return this Builder for method chaining + * @see java.lang.Class#forName(String) + * @see java.lang.Class#getMethod(String, Class[]) + */ + public Builder impl(String className, String methodName, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + Class targetClass = Class.forName(className, true, loader); + impl(targetClass, methodName, argClasses); + } catch (ClassNotFoundException e) { + // not the right implementation + } + return this; + } + + /** + * Checks for an implementation, first finding the given class by name. + * + *

The name passed to the constructor is the method name used. + * + * @param className name of a class + * @param argClasses argument classes for the method + * @return this Builder for method chaining + * @see java.lang.Class#forName(String) + * @see java.lang.Class#getMethod(String, Class[]) + */ + public Builder impl(String className, Class... argClasses) { + impl(className, name, argClasses); + return this; + } + + /** + * Checks for a method implementation. + * + * @param targetClass a class instance + * @param methodName name of a method (different from constructor) + * @param argClasses argument classes for the method + * @return this Builder for method chaining + * @see java.lang.Class#forName(String) + * @see java.lang.Class#getMethod(String, Class[]) + */ + public Builder impl(Class targetClass, String methodName, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + this.method = new UnboundMethod(targetClass.getMethod(methodName, argClasses), name); + } catch (NoSuchMethodException e) { + // not the right implementation + } + return this; + } + + /** + * Checks for a method implementation. + * + *

The name passed to the constructor is the method name used. + * + * @param targetClass a class instance + * @param argClasses argument classes for the method + * @return this Builder for method chaining + * @see java.lang.Class#forName(String) + * @see java.lang.Class#getMethod(String, Class[]) + */ + public Builder impl(Class targetClass, Class... argClasses) { + impl(targetClass, name, argClasses); + return this; + } + + /** + * Checks for an implementation, first finding the given class by name. + * + * @param className name of a class + * @param methodName name of a method (different from constructor) + * @param argClasses argument classes for the method + * @return this Builder for method chaining + * @see java.lang.Class#forName(String) + * @see java.lang.Class#getMethod(String, Class[]) + */ + public Builder hiddenImpl(String className, String methodName, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + Class targetClass = Class.forName(className, true, loader); + hiddenImpl(targetClass, methodName, argClasses); + } catch (ClassNotFoundException e) { + // not the right implementation + } + return this; + } + + /** + * Checks for an implementation, first finding the given class by name. + * + *

The name passed to the constructor is the method name used. + * + * @param className name of a class + * @param argClasses argument classes for the method + * @return this Builder for method chaining + * @see java.lang.Class#forName(String) + * @see java.lang.Class#getMethod(String, Class[]) + */ + public Builder hiddenImpl(String className, Class... argClasses) { + hiddenImpl(className, name, argClasses); + return this; + } + + /** + * Checks for a method implementation. + * + * @param targetClass a class instance + * @param methodName name of a method (different from constructor) + * @param argClasses argument classes for the method + * @return this Builder for method chaining + * @see java.lang.Class#forName(String) + * @see java.lang.Class#getMethod(String, Class[]) + */ + public Builder hiddenImpl(Class targetClass, String methodName, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + Method hidden = targetClass.getDeclaredMethod(methodName, argClasses); + AccessController.doPrivileged(new MakeAccessible(hidden)); + this.method = new UnboundMethod(hidden, name); + } catch (SecurityException | NoSuchMethodException e) { + // unusable or not the right implementation + } + return this; + } + + /** + * Checks for a method implementation. + * + *

The name passed to the constructor is the method name used. + * + * @param targetClass a class instance + * @param argClasses argument classes for the method + * @return this Builder for method chaining + * @see java.lang.Class#forName(String) + * @see java.lang.Class#getMethod(String, Class[]) + */ + public Builder hiddenImpl(Class targetClass, Class... argClasses) { + hiddenImpl(targetClass, name, argClasses); + return this; + } + + /** + * Returns the first valid implementation as a UnboundMethod or throws a RuntimeError if there + * is none. + * + * @return a {@link UnboundMethod} with a valid implementation + * @throws RuntimeException if no implementation was found + */ + public UnboundMethod build() { + if (method != null) { + return method; + } else { + throw new RuntimeException("Cannot find method: " + name); + } + } + + /** + * Returns the first valid implementation as a BoundMethod or throws a RuntimeError if there is + * none. + * + * @param receiver an Object to receive the method invocation + * @return a {@link BoundMethod} with a valid implementation and receiver + * @throws IllegalStateException if the method is static + * @throws IllegalArgumentException if the receiver's class is incompatible + * @throws RuntimeException if no implementation was found + */ + public BoundMethod build(Object receiver) { + return build().bind(receiver); + } + + /** + * Returns the first valid implementation as a UnboundMethod or throws a NoSuchMethodException + * if there is none. + * + * @return a {@link UnboundMethod} with a valid implementation + * @throws NoSuchMethodException if no implementation was found + */ + public UnboundMethod buildChecked() throws NoSuchMethodException { + if (method != null) { + return method; + } else { + throw new NoSuchMethodException("Cannot find method: " + name); + } + } + + /** + * Returns the first valid implementation as a BoundMethod or throws a NoSuchMethodException if + * there is none. + * + * @param receiver an Object to receive the method invocation + * @return a {@link BoundMethod} with a valid implementation and receiver + * @throws IllegalStateException if the method is static + * @throws IllegalArgumentException if the receiver's class is incompatible + * @throws NoSuchMethodException if no implementation was found + */ + public BoundMethod buildChecked(Object receiver) throws NoSuchMethodException { + return buildChecked().bind(receiver); + } + + /** + * Returns the first valid implementation as a StaticMethod or throws a NoSuchMethodException if + * there is none. + * + * @return a {@link StaticMethod} with a valid implementation + * @throws IllegalStateException if the method is not static + * @throws NoSuchMethodException if no implementation was found + */ + public StaticMethod buildStaticChecked() throws NoSuchMethodException { + return buildChecked().asStatic(); + } + + /** + * Returns the first valid implementation as a StaticMethod or throws a RuntimeException if + * there is none. + * + * @return a {@link StaticMethod} with a valid implementation + * @throws IllegalStateException if the method is not static + * @throws RuntimeException if no implementation was found + */ + public StaticMethod buildStatic() { + return build().asStatic(); + } + } + + private static class MakeAccessible implements PrivilegedAction { + private final Method hidden; + + MakeAccessible(Method hidden) { + this.hidden = hidden; + } + + @Override + public Void run() { + hidden.setAccessible(true); + return null; + } + } +} diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3ClientPool.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3ClientPool.java index 8e9f858..cf276ad 100644 --- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3ClientPool.java +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3ClientPool.java @@ -13,9 +13,6 @@ */ package org.lance.namespace.hive3; -import org.lance.namespace.util.ClientPoolImpl; -import org.lance.namespace.util.DynMethods; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaHookLoader; diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java index 3f46207..258332a 100644 --- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Namespace.java @@ -13,38 +13,34 @@ */ package org.lance.namespace.hive3; -import com.lancedb.lance.Dataset; -import com.lancedb.lance.WriteParams; -import org.lance.namespace.Configurable; +import org.lance.Dataset; +import org.lance.WriteParams; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.ObjectIdentifier; +import org.lance.namespace.errors.InternalException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.NamespaceAlreadyExistsException; +import org.lance.namespace.errors.NamespaceNotFoundException; +import org.lance.namespace.errors.ServiceUnavailableException; +import org.lance.namespace.errors.TableAlreadyExistsException; +import org.lance.namespace.errors.TableNotFoundException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; +import org.lance.namespace.model.DeregisterTableRequest; +import org.lance.namespace.model.DeregisterTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; -import org.lance.namespace.model.JsonArrowSchema; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; import org.lance.namespace.model.NamespaceExistsRequest; import org.lance.namespace.model.TableExistsRequest; -import org.lance.namespace.util.ArrowIpcUtil; -import org.lance.namespace.util.CommonUtil; -import org.lance.namespace.util.JsonArrowSchemaConverter; -import org.lance.namespace.util.PageUtil; -import org.lance.namespace.util.ValidationUtil; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -61,19 +57,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import static org.lance.namespace.hive3.Hive3ErrorType.DatabaseAlreadyExist; -import static org.lance.namespace.hive3.Hive3ErrorType.HiveMetaStoreError; -import static org.lance.namespace.hive3.Hive3ErrorType.TableAlreadyExists; -import static org.lance.namespace.hive3.Hive3ErrorType.TableNotFound; - -public class Hive3Namespace implements LanceNamespace, Configurable { +public class Hive3Namespace implements LanceNamespace { private static final Logger LOG = LoggerFactory.getLogger(Hive3Namespace.class); private Hive3ClientPool clientPool; @@ -83,6 +73,11 @@ public class Hive3Namespace implements LanceNamespace, Configurable configProperties, BufferAllocator allocator) { this.allocator = allocator; @@ -126,7 +121,7 @@ public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { @Override public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { ObjectIdentifier id = ObjectIdentifier.of(request.getId()); - CreateNamespaceRequest.ModeEnum mode = request.getMode(); + String mode = request.getMode() != null ? request.getMode().toLowerCase() : "create"; Map properties = request.getProperties(); ValidationUtil.checkArgument( @@ -154,11 +149,8 @@ public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest requ Catalog catalogObj = Hive3Util.getCatalogOrNull(clientPool, catalog); if (catalogObj == null) { - throw LanceNamespaceException.notFound( - String.format("Namespace does not exist: %s", id.stringStyleId()), - HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException( + String.format("Namespace does not exist: %s", id.stringStyleId())); } if (catalogObj.getDescription() != null) { @@ -173,11 +165,8 @@ public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest requ Database database = Hive3Util.getDatabaseOrNull(clientPool, catalog, db); if (database == null) { - throw LanceNamespaceException.notFound( - String.format("Namespace does not exist: %s", id.stringStyleId()), - HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException( + String.format("Namespace does not exist: %s", id.stringStyleId())); } if (database.getDescription() != null) { @@ -214,11 +203,8 @@ public void namespaceExists(NamespaceExistsRequest request) { Catalog catalogObj = Hive3Util.getCatalogOrNull(clientPool, catalog); if (catalogObj == null) { - throw LanceNamespaceException.notFound( - String.format("Namespace does not exist: %s", id.stringStyleId()), - HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException( + String.format("Namespace does not exist: %s", id.stringStyleId())); } } else { String catalog = id.levelAtListPos(0).toLowerCase(); @@ -226,31 +212,25 @@ public void namespaceExists(NamespaceExistsRequest request) { Database database = Hive3Util.getDatabaseOrNull(clientPool, catalog, db); if (database == null) { - throw LanceNamespaceException.notFound( - String.format("Namespace does not exist: %s", id.stringStyleId()), - HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException( + String.format("Namespace does not exist: %s", id.stringStyleId())); } } } @Override public DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { + if ("Cascade".equalsIgnoreCase(request.getBehavior())) { + throw new InvalidInputException("Cascade behavior is not supported for this implementation"); + } + ObjectIdentifier id = ObjectIdentifier.of(request.getId()); - DropNamespaceRequest.ModeEnum mode = request.getMode(); - DropNamespaceRequest.BehaviorEnum behavior = request.getBehavior(); + String mode = request.getMode() != null ? request.getMode().toLowerCase() : "fail"; + String behavior = request.getBehavior() != null ? request.getBehavior() : "Restrict"; ValidationUtil.checkArgument( !id.isRoot() && id.levels() <= 2, "Expect a 2-level namespace but get %s", id); - if (mode == null) { - mode = DropNamespaceRequest.ModeEnum.FAIL; - } - if (behavior == null) { - behavior = DropNamespaceRequest.BehaviorEnum.RESTRICT; - } - Map properties = doDropNamespace(id, mode, behavior); DropNamespaceResponse response = new DropNamespaceResponse(); @@ -272,11 +252,8 @@ public void tableExists(TableExistsRequest request) { Optional

hmsTable = Hive3Util.getTable(clientPool, catalog, db, table); if (!hmsTable.isPresent()) { - throw LanceNamespaceException.notFound( - String.format("Table does not exist: %s", tableId.stringStyleId()), - TableNotFound.getType(), - tableId.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new TableNotFoundException( + String.format("Table does not exist: %s", tableId.stringStyleId())); } Hive3Util.validateLanceTable(hmsTable.get()); @@ -306,6 +283,11 @@ public ListTablesResponse listTables(ListTablesRequest request) { @Override public DescribeTableResponse describeTable(DescribeTableRequest request) { + if (Boolean.TRUE.equals(request.getLoadDetailedMetadata())) { + throw new InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation"); + } + ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( @@ -314,11 +296,8 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { Optional location = doDescribeTable(tableId); if (!location.isPresent()) { - throw LanceNamespaceException.notFound( - String.format("Table does not exist: %s", tableId.stringStyleId()), - TableNotFound.getType(), - tableId.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new TableNotFoundException( + String.format("Table does not exist: %s", tableId.stringStyleId())); } DescribeTableResponse response = new DescribeTableResponse(); @@ -326,46 +305,7 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { return response; } - @Override - public CreateTableResponse createTable(CreateTableRequest request, byte[] requestData) { - // Validate that requestData is a valid Arrow IPC stream - ValidationUtil.checkNotNull( - requestData, "Request data (Arrow IPC stream) is required for createTable"); - ValidationUtil.checkArgument( - requestData.length > 0, "Request data (Arrow IPC stream) cannot be empty"); - - ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); - - // Extract schema from Arrow IPC stream - JsonArrowSchema jsonSchema; - try { - jsonSchema = ArrowIpcUtil.extractSchemaFromIpc(requestData); - } catch (IOException e) { - throw LanceNamespaceException.badRequest( - "Invalid Arrow IPC stream: " + e.getMessage(), - "INVALID_ARROW_IPC", - tableId.stringStyleId(), - "Failed to extract schema from Arrow IPC stream"); - } - Schema schema = JsonArrowSchemaConverter.convertToArrowSchema(jsonSchema); - - ValidationUtil.checkArgument( - tableId.levels() == 3, "Expect 3-level table identifier but get %s", tableId); - - String location = request.getLocation(); - if (location == null || location.isEmpty()) { - location = - getDefaultTableLocation( - tableId.levelAtListPos(0), tableId.levelAtListPos(1), tableId.levelAtListPos(2)); - } - - doCreateTable(tableId, schema, location, request.getProperties(), requestData); - - CreateTableResponse response = new CreateTableResponse(); - response.setLocation(location); - response.setVersion(1L); - return response; - } + // Removed: createTable(CreateTableRequest, byte[]) - using default implementation from interface @Override public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { @@ -381,8 +321,8 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request tableId.levelAtListPos(0), tableId.levelAtListPos(1), tableId.levelAtListPos(2)); } - // Create table in metastore without data (pass null for requestData) - doCreateTable(tableId, null, location, request.getProperties(), null); + // Create table in metastore without data (pass null for requestData and properties) + doCreateTable(tableId, null, location, null, null); CreateEmptyTableResponse response = new CreateEmptyTableResponse(); response.setLocation(location); @@ -390,22 +330,20 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request } @Override - public DropTableResponse dropTable(DropTableRequest request) { + public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( tableId.levels() == 3, "Expect 3-level table identifier but get %s", tableId); String location = doDropTable(tableId); - // TODO: remove data - DropTableResponse response = new DropTableResponse(); - response.setLocation(location); + DeregisterTableResponse response = new DeregisterTableResponse(); response.setId(request.getId()); + response.setLocation(location); return response; } - @Override public void setConf(Configuration conf) { this.hadoopConf = conf; } @@ -424,16 +362,12 @@ protected List doListNamespaces(ObjectIdentifier parent) { Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed operation: " + errorMessage, - HiveMetaStoreError.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException("Failed operation: " + errorMessage); } } protected void doCreateNamespace( - ObjectIdentifier id, CreateNamespaceRequest.ModeEnum mode, Map properties) { + ObjectIdentifier id, String mode, Map properties) { try { if (id.levels() == 1) { @@ -449,35 +383,26 @@ protected void doCreateNamespace( Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed operation: " + errorMessage, - HiveMetaStoreError.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException("Failed operation: " + errorMessage); } } - private void createCatalog( - String catalogName, CreateNamespaceRequest.ModeEnum mode, Map properties) + private void createCatalog(String catalogName, String mode, Map properties) throws TException, InterruptedException { Catalog existingCatalog = Hive3Util.getCatalogOrNull(clientPool, catalogName); if (existingCatalog != null) { - switch (mode) { - case CREATE: - throw LanceNamespaceException.conflict( - String.format("Catalog %s already exists", catalogName), - DatabaseAlreadyExist.getType(), - "", - CommonUtil.formatCurrentStackTrace()); - case EXIST_OK: - return; - case OVERWRITE: - clientPool.run( - client -> { - client.dropCatalog(catalogName); - return null; - }); + if ("create".equals(mode)) { + throw new NamespaceAlreadyExistsException( + String.format("Catalog %s already exists", catalogName)); + } else if ("exist_ok".equals(mode) || "existok".equals(mode)) { + return; + } else if ("overwrite".equals(mode)) { + clientPool.run( + client -> { + client.dropCatalog(catalogName); + return null; + }); } } @@ -506,30 +431,23 @@ private void createCatalog( } private void createDatabase( - String catalogName, - String dbName, - CreateNamespaceRequest.ModeEnum mode, - Map properties) + String catalogName, String dbName, String mode, Map properties) throws TException, InterruptedException { Catalog catalog = Hive3Util.getCatalogOrThrowNotFoundException(clientPool, catalogName); Database oldDb = Hive3Util.getDatabaseOrNull(clientPool, catalogName, dbName); if (oldDb != null) { - switch (mode) { - case CREATE: - throw LanceNamespaceException.conflict( - String.format("Database %s.%s already exist", catalogName, dbName), - DatabaseAlreadyExist.getType(), - "", - CommonUtil.formatCurrentStackTrace()); - case EXIST_OK: - return; - case OVERWRITE: - clientPool.run( - client -> { - client.dropDatabase(catalogName, dbName, false, true, false); - return null; - }); + if ("create".equals(mode)) { + throw new NamespaceAlreadyExistsException( + String.format("Database %s.%s already exist", catalogName, dbName)); + } else if ("exist_ok".equals(mode) || "existok".equals(mode)) { + return; + } else if ("overwrite".equals(mode)) { + clientPool.run( + client -> { + client.dropDatabase(catalogName, dbName, false, true, false); + return null; + }); } } @@ -580,11 +498,8 @@ protected void doCreateTable( try { Optional
existing = Hive3Util.getTable(clientPool, catalog, db, tableName); if (existing.isPresent()) { - throw LanceNamespaceException.conflict( - String.format("Table %s.%s.%s already exists", catalog, db, tableName), - TableAlreadyExists.getType(), - String.format("%s.%s.%s", catalog, db, tableName), - CommonUtil.formatCurrentStackTrace()); + throw new TableAlreadyExistsException( + String.format("Table %s.%s.%s already exists", catalog, db, tableName)); } Table table = new Table(); @@ -614,11 +529,7 @@ protected void doCreateTable( if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - throw LanceNamespaceException.serverError( - "Fail to create table: " + e.getMessage(), - HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new InternalException("Fail to create table: " + e.getMessage()); } if (data != null && data.length > 0) { @@ -636,20 +547,13 @@ protected List doListTables(String catalog, String db) { // First validate that catalog and database exist Catalog catalogObj = Hive3Util.getCatalogOrNull(clientPool, catalog); if (catalogObj == null) { - throw LanceNamespaceException.notFound( - String.format("Catalog %s doesn't exist", catalog), - HiveMetaStoreError.getType(), - catalog, - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException(String.format("Catalog %s doesn't exist", catalog)); } Database database = Hive3Util.getDatabaseOrNull(clientPool, catalog, db); if (database == null) { - throw LanceNamespaceException.notFound( - String.format("Database %s.%s doesn't exist", catalog, db), - HiveMetaStoreError.getType(), - String.format("%s.%s", catalog, db), - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException( + String.format("Database %s.%s doesn't exist", catalog, db)); } List allTables = clientPool.run(client -> client.getAllTables(catalog, db)); @@ -676,11 +580,7 @@ protected List doListTables(String catalog, String db) { Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to list tables: " + errorMessage, - HiveMetaStoreError.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException("Failed to list tables: " + errorMessage); } } @@ -692,11 +592,8 @@ protected String doDropTable(ObjectIdentifier id) { try { Optional
hmsTable = Hive3Util.getTable(clientPool, catalog, db, tableName); if (!hmsTable.isPresent()) { - throw LanceNamespaceException.notFound( - String.format("Table %s.%s.%s does not exist", catalog, db, tableName), - TableNotFound.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new TableNotFoundException( + String.format("Table %s.%s.%s does not exist", catalog, db, tableName)); } Hive3Util.validateLanceTable(hmsTable.get()); @@ -714,18 +611,11 @@ protected String doDropTable(ObjectIdentifier id) { Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to drop table: " + errorMessage, - HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException("Failed to drop table: " + errorMessage); } } - protected Map doDropNamespace( - ObjectIdentifier id, - DropNamespaceRequest.ModeEnum mode, - DropNamespaceRequest.BehaviorEnum behavior) { + protected Map doDropNamespace(ObjectIdentifier id, String mode, String behavior) { try { if (id.levels() == 1) { @@ -741,64 +631,30 @@ protected Map doDropNamespace( Thread.currentThread().interrupt(); } String errorMessage = e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - throw LanceNamespaceException.serviceUnavailable( - "Failed to drop namespace: " + errorMessage, - HiveMetaStoreError.getType(), - id.stringStyleId(), - CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException("Failed to drop namespace: " + errorMessage); } } - private Map doDropCatalog( - String catalog, - DropNamespaceRequest.ModeEnum mode, - DropNamespaceRequest.BehaviorEnum behavior) + private Map doDropCatalog(String catalog, String mode, String behavior) throws TException, InterruptedException { Catalog catalogObj = Hive3Util.getCatalogOrNull(clientPool, catalog); if (catalogObj == null) { - if (mode == DropNamespaceRequest.ModeEnum.SKIP) { + if ("skip".equals(mode)) { return new HashMap<>(); } else { - throw LanceNamespaceException.notFound( - String.format("Catalog %s doesn't exist", catalog), - HiveMetaStoreError.getType(), - catalog, - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException(String.format("Catalog %s doesn't exist", catalog)); } } - // Check for child databases - List databases = clientPool.run(client -> client.getAllDatabases(catalog)); - if (!databases.isEmpty()) { - if (behavior == DropNamespaceRequest.BehaviorEnum.RESTRICT) { - throw LanceNamespaceException.badRequest( + // Check for child databases (RESTRICT behavior only, not for Cascade) + boolean cascade = "Cascade".equalsIgnoreCase(behavior); + if (!cascade) { + List databases = clientPool.run(client -> client.getAllDatabases(catalog)); + if (!databases.isEmpty()) { + throw new InvalidInputException( String.format( "Catalog %s is not empty. Contains %d databases: %s", - catalog, databases.size(), databases), - HiveMetaStoreError.getType(), - catalog, - CommonUtil.formatCurrentStackTrace()); - } else if (behavior == DropNamespaceRequest.BehaviorEnum.CASCADE) { - // Drop all databases first - for (String dbName : databases) { - try { - doDropDatabase( - catalog, - dbName, - DropNamespaceRequest.ModeEnum.FAIL, - DropNamespaceRequest.BehaviorEnum.CASCADE); - LOG.info("Dropped database {}.{} during CASCADE operation", catalog, dbName); - } catch (Exception e) { - LOG.warn("Failed to drop database {}.{}: {}", catalog, dbName, e.getMessage()); - throw LanceNamespaceException.serviceUnavailable( - String.format( - "Failed to drop database %s.%s during CASCADE operation: %s", - catalog, dbName, e.getMessage()), - HiveMetaStoreError.getType(), - String.format("%s.%s", catalog, dbName), - CommonUtil.formatCurrentStackTrace()); - } - } + catalog, databases.size(), databases)); } } @@ -823,54 +679,27 @@ private Map doDropCatalog( } private Map doDropDatabase( - String catalog, - String db, - DropNamespaceRequest.ModeEnum mode, - DropNamespaceRequest.BehaviorEnum behavior) + String catalog, String db, String mode, String behavior) throws TException, InterruptedException { Database database = Hive3Util.getDatabaseOrNull(clientPool, catalog, db); if (database == null) { - if (mode == DropNamespaceRequest.ModeEnum.SKIP) { + if ("skip".equals(mode)) { return new HashMap<>(); } else { - throw LanceNamespaceException.notFound( - String.format("Database %s.%s doesn't exist", catalog, db), - HiveMetaStoreError.getType(), - String.format("%s.%s", catalog, db), - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException( + String.format("Database %s.%s doesn't exist", catalog, db)); } } - // Check if database contains tables - List tables = doListTables(catalog, db); - if (!tables.isEmpty()) { - if (behavior == DropNamespaceRequest.BehaviorEnum.RESTRICT) { - throw LanceNamespaceException.badRequest( + // Check if database contains tables (RESTRICT behavior only, not for Cascade) + boolean cascade = "Cascade".equalsIgnoreCase(behavior); + if (!cascade) { + List tables = doListTables(catalog, db); + if (!tables.isEmpty()) { + throw new InvalidInputException( String.format( "Database %s.%s is not empty. Contains %d tables: %s", - catalog, db, tables.size(), tables), - HiveMetaStoreError.getType(), - String.format("%s.%s", catalog, db), - CommonUtil.formatCurrentStackTrace()); - } else if (behavior == DropNamespaceRequest.BehaviorEnum.CASCADE) { - // Drop all tables first - for (String tableName : tables) { - try { - ObjectIdentifier tableId = - ObjectIdentifier.of(Lists.newArrayList(catalog, db, tableName)); - doDropTable(tableId); - LOG.info("Dropped table {}.{}.{} during CASCADE operation", catalog, db, tableName); - } catch (Exception e) { - LOG.warn("Failed to drop table {}.{}.{}: {}", catalog, db, tableName, e.getMessage()); - throw LanceNamespaceException.serviceUnavailable( - String.format( - "Failed to drop table %s.%s.%s during CASCADE operation: %s", - catalog, db, tableName, e.getMessage()), - HiveMetaStoreError.getType(), - String.format("%s.%s.%s", catalog, db, tableName), - CommonUtil.formatCurrentStackTrace()); - } - } + catalog, db, tables.size(), tables)); } } @@ -893,9 +722,10 @@ private Map doDropDatabase( } // Drop the database + final boolean cascadeDrop = cascade; clientPool.run( client -> { - client.dropDatabase(catalog, db, false, true, false); + client.dropDatabase(catalog, db, false, true, cascadeDrop); return null; }); diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3NamespaceConfig.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3NamespaceConfig.java index 243cd8b..c5ba2a2 100644 --- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3NamespaceConfig.java +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3NamespaceConfig.java @@ -13,9 +13,7 @@ */ package org.lance.namespace.hive3; -import org.lance.namespace.util.OpenDalUtil; -import org.lance.namespace.util.PropertyUtil; - +import java.util.HashMap; import java.util.Map; public class Hive3NamespaceConfig { @@ -42,12 +40,27 @@ public class Hive3NamespaceConfig { private final String root; public Hive3NamespaceConfig(Map properties) { + // Inline PropertyUtil.propertyAsInt + String clientPoolSizeStr = properties.get(CLIENT_POOL_SIZE); this.clientPoolSize = - PropertyUtil.propertyAsInt(properties, CLIENT_POOL_SIZE, CLIENT_POOL_SIZE_DEFAULT); - this.storageOptions = PropertyUtil.propertiesWithPrefix(properties, STORAGE_OPTIONS_PREFIX); + clientPoolSizeStr != null ? Integer.parseInt(clientPoolSizeStr) : CLIENT_POOL_SIZE_DEFAULT; + + // Inline PropertyUtil.propertiesWithPrefix + Map filteredStorageOptions = new HashMap<>(); + for (Map.Entry entry : properties.entrySet()) { + if (entry.getKey().startsWith(STORAGE_OPTIONS_PREFIX)) { + filteredStorageOptions.put( + entry.getKey().substring(STORAGE_OPTIONS_PREFIX.length()), entry.getValue()); + } + } + this.storageOptions = filteredStorageOptions; + + // Inline PropertyUtil.propertyAsString and OpenDalUtil.stripTrailingSlash + String rootValue = properties.getOrDefault(ROOT, ROOT_DEFAULT); this.root = - OpenDalUtil.stripTrailingSlash( - PropertyUtil.propertyAsString(properties, ROOT, ROOT_DEFAULT)); + rootValue != null && rootValue.endsWith("/") + ? rootValue.substring(0, rootValue.length() - 1) + : rootValue; } public int getClientPoolSize() { diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java index 245ec9b..b77eb89 100644 --- a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/Hive3Util.java @@ -13,8 +13,9 @@ */ package org.lance.namespace.hive3; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.util.CommonUtil; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.NamespaceNotFoundException; +import org.lance.namespace.errors.ServiceUnavailableException; import com.google.common.collect.Maps; import org.apache.hadoop.hive.metastore.api.Catalog; @@ -31,10 +32,6 @@ import java.util.Optional; import java.util.function.Supplier; -import static org.lance.namespace.hive3.Hive3ErrorType.HiveMetaStoreError; -import static org.lance.namespace.hive3.Hive3ErrorType.InvalidLanceTable; -import static org.lance.namespace.hive3.Hive3ErrorType.UnknownCatalog; - public class Hive3Util { public static Catalog getCatalogOrNull(Hive3ClientPool clientPool, String catalog) { try { @@ -45,8 +42,7 @@ public static Catalog getCatalogOrNull(Hive3ClientPool clientPool, String catalo if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - throw LanceNamespaceException.serviceUnavailable( - e.getMessage(), HiveMetaStoreError.getType(), "", CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException(e.getMessage()); } } @@ -54,11 +50,7 @@ public static Catalog getCatalogOrThrowNotFoundException( Hive3ClientPool clientPool, String catalog) { Catalog catalogObj = getCatalogOrNull(clientPool, catalog); if (catalogObj == null) { - throw LanceNamespaceException.notFound( - String.format("Catalog %s doesn't exist", catalog), - UnknownCatalog.getType(), - "", - CommonUtil.formatCurrentStackTrace()); + throw new NamespaceNotFoundException(String.format("Catalog %s doesn't exist", catalog)); } return catalogObj; } @@ -72,8 +64,7 @@ public static Database getDatabaseOrNull(Hive3ClientPool clientPool, String cata if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - throw LanceNamespaceException.serviceUnavailable( - e.getMessage(), HiveMetaStoreError.getType(), "", CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException(e.getMessage()); } } @@ -86,8 +77,7 @@ public static Database getDatabaseOrNull(Hive3ClientPool clientPool, String db) if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - throw LanceNamespaceException.serviceUnavailable( - e.getMessage(), HiveMetaStoreError.getType(), "", CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException(e.getMessage()); } } @@ -151,8 +141,7 @@ public static Optional
getTable(Hive3ClientPool clientPool, String db, St if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - throw LanceNamespaceException.serviceUnavailable( - e.getMessage(), HiveMetaStoreError.getType(), "", CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException(e.getMessage()); } } @@ -166,20 +155,16 @@ public static Optional
getTable( if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - throw LanceNamespaceException.serviceUnavailable( - e.getMessage(), HiveMetaStoreError.getType(), "", CommonUtil.formatCurrentStackTrace()); + throw new ServiceUnavailableException(e.getMessage()); } } public static void validateLanceTable(Table table) { Map params = table.getParameters(); if (params == null || !"lance".equalsIgnoreCase(params.get("table_type"))) { - throw LanceNamespaceException.badRequest( + throw new InvalidInputException( String.format( - "Table %s.%s is not a Lance table", table.getDbName(), table.getTableName()), - InvalidLanceTable.getType(), - String.format("%s.%s", table.getDbName(), table.getTableName()), - CommonUtil.formatCurrentStackTrace()); + "Table %s.%s is not a Lance table", table.getDbName(), table.getTableName())); } } diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ObjectIdentifier.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ObjectIdentifier.java new file mode 100644 index 0000000..cfd5e36 --- /dev/null +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ObjectIdentifier.java @@ -0,0 +1,55 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive3; + +import java.util.Collections; +import java.util.List; + +/** Represents a hierarchical identifier for namespaces and tables. */ +public class ObjectIdentifier { + private final List levels; + + private ObjectIdentifier(List levels) { + this.levels = levels != null ? levels : Collections.emptyList(); + } + + public static ObjectIdentifier of(List levels) { + return new ObjectIdentifier(levels); + } + + public boolean isRoot() { + return levels.isEmpty(); + } + + public int levels() { + return levels.size(); + } + + public String levelAtListPos(int pos) { + if (pos < 0 || pos >= levels.size()) { + throw new IndexOutOfBoundsException( + "Position " + pos + " is out of bounds for size " + levels.size()); + } + return levels.get(pos); + } + + public String stringStyleId() { + return String.join(".", levels); + } + + @Override + public String toString() { + return stringStyleId(); + } +} diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/PageUtil.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/PageUtil.java new file mode 100644 index 0000000..62475cc --- /dev/null +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/PageUtil.java @@ -0,0 +1,70 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive3; + +import java.util.List; + +/** Utility methods for pagination. */ +public class PageUtil { + + private static final int DEFAULT_PAGE_SIZE = 100; + + private PageUtil() {} + + public static int normalizePageSize(Integer pageSize) { + if (pageSize == null || pageSize <= 0) { + return DEFAULT_PAGE_SIZE; + } + return pageSize; + } + + public static Page splitPage(List items, String pageToken, int pageSize) { + int startIndex = 0; + if (pageToken != null && !pageToken.isEmpty()) { + try { + startIndex = Integer.parseInt(pageToken); + } catch (NumberFormatException e) { + startIndex = 0; + } + } + + if (startIndex >= items.size()) { + return new Page(java.util.Collections.emptyList(), null); + } + + int endIndex = Math.min(startIndex + pageSize, items.size()); + List pageItems = items.subList(startIndex, endIndex); + + String nextPageToken = endIndex < items.size() ? String.valueOf(endIndex) : null; + return new Page(pageItems, nextPageToken); + } + + public static class Page { + private final List items; + private final String nextPageToken; + + public Page(List items, String nextPageToken) { + this.items = items; + this.nextPageToken = nextPageToken; + } + + public List items() { + return items; + } + + public String nextPageToken() { + return nextPageToken; + } + } +} diff --git a/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ValidationUtil.java b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ValidationUtil.java new file mode 100644 index 0000000..c2dbc3a --- /dev/null +++ b/java/lance-namespace-hive3/src/main/java/org/lance/namespace/hive3/ValidationUtil.java @@ -0,0 +1,35 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.hive3; + +import org.lance.namespace.errors.InvalidInputException; + +/** Utility methods for validation. */ +public class ValidationUtil { + + private ValidationUtil() {} + + public static void checkArgument(boolean expression, String message, Object... args) { + if (!expression) { + throw new InvalidInputException(String.format(message, args)); + } + } + + public static String checkNotNullOrEmptyString(String value, String message) { + if (value == null || value.isEmpty()) { + throw new InvalidInputException(message); + } + return value; + } +} diff --git a/java/lance-namespace-hive3/src/test/java/org/lance/namespace/hive3/TestHive3Namespace.java b/java/lance-namespace-hive3/src/test/java/org/lance/namespace/hive3/TestHive3Namespace.java index 9ae66c4..e98376a 100644 --- a/java/lance-namespace-hive3/src/test/java/org/lance/namespace/hive3/TestHive3Namespace.java +++ b/java/lance-namespace-hive3/src/test/java/org/lance/namespace/hive3/TestHive3Namespace.java @@ -14,20 +14,14 @@ package org.lance.namespace.hive3; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.LanceNamespaces; -import org.lance.namespace.TestHelper; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.LanceNamespaceException; import org.lance.namespace.model.CreateNamespaceRequest; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; -import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.ListTablesRequest; import org.lance.namespace.model.ListTablesResponse; import org.lance.namespace.model.NamespaceExistsRequest; @@ -41,7 +35,6 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.io.File; @@ -74,7 +67,10 @@ public static void setup() throws IOException { tmpDirBase = file.getAbsolutePath(); HiveConf hiveConf = metastore.hiveConf(); - namespace = LanceNamespaces.connect("hive3", Maps.newHashMap(), hiveConf, allocator); + Hive3Namespace hive3Namespace = new Hive3Namespace(); + hive3Namespace.setHadoopConf(hiveConf); + hive3Namespace.initialize(Maps.newHashMap(), allocator); + namespace = hive3Namespace; // Setup: Create catalog and database for tests CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); @@ -82,7 +78,7 @@ public static void setup() throws IOException { properties.put("catalog.location.uri", "file://" + tmpDirBase + "/test_catalog"); nsRequest.setProperties(properties); nsRequest.setId(Lists.list("test_catalog")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); namespace.createNamespace(nsRequest); nsRequest.setId(Lists.list("test_catalog", "test_db")); @@ -114,97 +110,13 @@ public void cleanup() throws Exception { properties.put("catalog.location.uri", "file://" + tmpDirBase + "/test_catalog"); nsRequest.setProperties(properties); nsRequest.setId(Lists.list("test_catalog")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); namespace.createNamespace(nsRequest); nsRequest.setId(Lists.list("test_catalog", "test_db")); namespace.createNamespace(nsRequest); } - @Disabled("Need to figure out the proper interface") - @Test - public void testCreateTable() throws IOException { - // Test: Create table with valid parameters - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_catalog", "test_db", "test_table")); - request.setLocation(tmpDirBase + "/test_catalog/test_db/test_table.lance"); - - Map properties = Maps.newHashMap(); - properties.put("custom_prop", "custom_value"); - request.setProperties(properties); - - byte[] testData = TestHelper.createTestArrowData(allocator); - CreateTableResponse response = namespace.createTable(request, testData); - - assertEquals(request.getLocation(), response.getLocation()); - assertEquals(1L, response.getVersion()); - } - - @Test - public void testCreateTableAlreadyExists() throws IOException { - // Setup: Create table - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_catalog", "test_db", "test_table")); - request.setLocation(tmpDirBase + "/test_catalog/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(request, testData); - - // Test: Create table that already exists - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.createTable(request, testData)); - assertTrue(error.getMessage().contains("Table test_catalog.test_db.test_table already exists")); - } - - @Test - public void testCreateTableManagedByImpl() throws IOException { - // Test: Create table with managed_by=impl (not supported) - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_catalog", "test_db", "impl_table")); - request.setLocation(tmpDirBase + "/test_catalog/test_db/impl_table.lance"); - - Map properties = Maps.newHashMap(); - properties.put("managed_by", "impl"); - request.setProperties(properties); - - byte[] testData = TestHelper.createTestArrowData(allocator); - Exception error = - assertThrows( - UnsupportedOperationException.class, () -> namespace.createTable(request, testData)); - assertTrue(error.getMessage().contains("managed_by=impl is not supported yet")); - } - - @Test - public void testCreateTableWithoutData() throws IOException { - // Test: Create table without data - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_catalog", "test_db", "no_data_table")); - request.setLocation(tmpDirBase + "/test_catalog/test_db/no_data_table.lance"); - - byte[] emptyData = TestHelper.createEmptyArrowData(allocator); - CreateTableResponse response = namespace.createTable(request, emptyData); - assertEquals(request.getLocation(), response.getLocation()); - } - - @Test - public void testDescribeTable() throws IOException { - // Setup: Create table - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_catalog", "test_db", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_catalog/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Describe existing Lance table - DescribeTableRequest request = new DescribeTableRequest(); - request.setId(Lists.list("test_catalog", "test_db", "test_table")); - - DescribeTableResponse response = namespace.describeTable(request); - assertEquals( - "file:" + tmpDirBase + "/test_catalog/test_db/test_table.lance", response.getLocation()); - } - @Test public void testDescribeNonExistentTable() { // Test: Describe non-existent table @@ -215,131 +127,6 @@ public void testDescribeNonExistentTable() { assertTrue(error.getMessage().contains("Table does not exist")); } - @Test - public void testDropTable() throws IOException { - // Setup: Create table - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_catalog", "test_db", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_catalog/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Drop existing table - DropTableRequest request = new DropTableRequest(); - request.setId(Lists.list("test_catalog", "test_db", "test_table")); - - DropTableResponse response = namespace.dropTable(request); - assertEquals( - "file:" + tmpDirBase + "/test_catalog/test_db/test_table.lance", response.getLocation()); - assertEquals(request.getId(), response.getId()); - - // Verify table is dropped by trying to describe it - DescribeTableRequest descRequest = new DescribeTableRequest(); - descRequest.setId(request.getId()); - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.describeTable(descRequest)); - assertTrue(error.getMessage().contains("Table does not exist")); - } - - @Test - public void testDropNonExistentTable() { - // Test: Drop non-existent table - DropTableRequest request = new DropTableRequest(); - request.setId(Lists.list("test_catalog", "test_db", "non_existent")); - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.dropTable(request)); - assertTrue( - error.getMessage().contains("Table test_catalog.test_db.non_existent does not exist")); - } - - @Test - public void testCreateTableWithDefaultLocationFromRoot() throws IOException { - // With our enhancement, databases created without explicit location - // will use the root config location instead of Hive warehouse - - // Setup: Create namespace with custom root configuration - Map properties = Maps.newHashMap(); - properties.put("root", tmpDirBase); - - HiveConf hiveConf = metastore.hiveConf(); - LanceNamespace customNamespace = - LanceNamespaces.connect("hive3", properties, hiveConf, allocator); - - // Setup: Create database (will use root location) - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_catalog", "test_db_root")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - customNamespace.createNamespace(nsRequest); - - // Test: Create table without specifying location - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_catalog", "test_db_root", "test_table")); - // Don't set location - it will be derived from database location - - // Create test Arrow IPC data - byte[] testData = TestHelper.createTestArrowData(allocator); - CreateTableResponse response = customNamespace.createTable(request, testData); - - // Verify: Location should be derived from root-based database location - // Note: The location may or may not have file: prefix depending on how Hive processes it - String expectedLocation = tmpDirBase + "/test_db_root/test_table.lance"; - assertTrue( - response.getLocation().equals(expectedLocation) - || response.getLocation().equals("file:" + expectedLocation), - "Expected location (with or without file: prefix): " - + expectedLocation - + " but got: " - + response.getLocation()); - assertEquals(1L, response.getVersion()); - } - - @Test - public void testCreateTableWithExplicitDatabaseLocation() throws IOException { - // Note: This test verifies that when a database location is explicitly set, - // it takes precedence over the root config. However, the current implementation - // may fall back to root config if database location retrieval fails. - - // Setup: Create namespace with custom root configuration - Map properties = Maps.newHashMap(); - properties.put("root", tmpDirBase); - - HiveConf hiveConf = metastore.hiveConf(); - LanceNamespace customNamespace = - LanceNamespaces.connect("hive3", properties, hiveConf, allocator); - - // Setup: Create database with specific location - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_catalog", "test_db_with_location")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - - // Set database location - this should take precedence over root config - String databaseLocation = tmpDirBase + "/custom_db_location"; - Map dbProperties = Maps.newHashMap(); - dbProperties.put("database.location-uri", databaseLocation); - nsRequest.setProperties(dbProperties); - - customNamespace.createNamespace(nsRequest); - - // Test: Create table without specifying location - CreateTableRequest request = new CreateTableRequest(); - request.setId(Lists.list("test_catalog", "test_db_with_location", "test_table")); - // Don't set location - should be derived from database location or root fallback - - // Create test Arrow IPC data - byte[] testData = TestHelper.createTestArrowData(allocator); - CreateTableResponse response = customNamespace.createTable(request, testData); - - // Verify: Location should be derived from either database location or root fallback - // For now, accept either pattern until database location retrieval is fixed - assertTrue( - response.getLocation().contains("custom_db_location/test_table.lance") - || response.getLocation().contains("test_db_with_location/test_table.lance"), - "Expected either custom database location or root fallback but got: " - + response.getLocation()); - assertEquals(1L, response.getVersion()); - } - @Test public void testDescribeNamespaceCatalog() { // Test: Describe catalog-level namespace @@ -370,7 +157,7 @@ public void testDescribeNamespaceDatabaseWithCustomProperties() { // Setup: Create database with custom properties CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); nsRequest.setId(Lists.list("test_catalog", "custom_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + nsRequest.setMode("Create"); Map properties = Maps.newHashMap(); properties.put("database.description", "Custom database description"); @@ -457,24 +244,6 @@ public void testNamespaceExistsNonExistentDatabase() { assertTrue(error.getMessage().contains("Namespace does not exist")); } - @Test - public void testTableExists() throws IOException { - // Setup: Create table - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_catalog", "test_db", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_catalog/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Check existing table - TableExistsRequest request = new TableExistsRequest(); - request.setId(Lists.list("test_catalog", "test_db", "test_table")); - - // Should not throw exception for existing Lance table - namespace.tableExists(request); - } - @Test public void testTableExistsNonExistent() { // Test: Check non-existent table @@ -486,34 +255,6 @@ public void testTableExistsNonExistent() { assertTrue(error.getMessage().contains("Table does not exist")); } - @Test - public void testListTables() throws IOException { - // Create first table - CreateTableRequest createRequest1 = new CreateTableRequest(); - createRequest1.setId(Lists.list("test_catalog", "test_db", "table1")); - createRequest1.setLocation(tmpDirBase + "/test_catalog/test_db/table1.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest1, testData); - - // Create second table - CreateTableRequest createRequest2 = new CreateTableRequest(); - createRequest2.setId(Lists.list("test_catalog", "test_db", "table2")); - createRequest2.setLocation(tmpDirBase + "/test_catalog/test_db/table2.lance"); - - namespace.createTable(createRequest2, testData); - - // Test: List tables - ListTablesRequest request = new ListTablesRequest(); - request.setId(Lists.list("test_catalog", "test_db")); - - ListTablesResponse response = namespace.listTables(request); - - assertEquals(2, response.getTables().size()); - assertTrue(response.getTables().contains("table1")); - assertTrue(response.getTables().contains("table2")); - } - @Test public void testListTablesEmpty() { // Test: List tables in empty database @@ -525,67 +266,6 @@ public void testListTablesEmpty() { assertEquals(0, response.getTables().size()); } - @Test - public void testListTablesWithPagination() throws IOException { - // Create multiple tables - for (int i = 1; i <= 5; i++) { - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_catalog", "test_db", "table" + i)); - createRequest.setLocation(tmpDirBase + "/test_catalog/test_db/table" + i + ".lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - } - - // Test: List tables with pagination (limit 3) - ListTablesRequest request = new ListTablesRequest(); - request.setId(Lists.list("test_catalog", "test_db")); - request.setLimit(3); - - ListTablesResponse response = namespace.listTables(request); - - assertEquals(3, response.getTables().size()); - // Should have a page token for remaining results - assertTrue(response.getPageToken() != null && !response.getPageToken().isEmpty()); - - // Get remaining tables - ListTablesRequest nextRequest = new ListTablesRequest(); - nextRequest.setId(Lists.list("test_catalog", "test_db")); - nextRequest.setPageToken(response.getPageToken()); - - ListTablesResponse nextResponse = namespace.listTables(nextRequest); - - assertEquals(2, nextResponse.getTables().size()); - // No more pages - assertTrue(nextResponse.getPageToken() == null || nextResponse.getPageToken().isEmpty()); - } - - @Test - public void testListTablesWithCustomDatabase() throws IOException { - // Setup: Create database with custom name - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Lists.list("test_catalog", "custom_db")); - nsRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(nsRequest); - - // Create table in custom database - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_catalog", "custom_db", "custom_table")); - createRequest.setLocation(tmpDirBase + "/test_catalog/custom_db/custom_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: List tables in custom database - ListTablesRequest request = new ListTablesRequest(); - request.setId(Lists.list("test_catalog", "custom_db")); - - ListTablesResponse response = namespace.listTables(request); - - assertEquals(1, response.getTables().size()); - assertTrue(response.getTables().contains("custom_table")); - } - @Test public void testListTablesNonExistentDatabase() { // Test: List tables in non-existent database @@ -613,12 +293,12 @@ public void testDropNamespaceBasicDatabase() throws IOException { // Setup: Create catalog and database CreateNamespaceRequest catalogRequest = new CreateNamespaceRequest(); catalogRequest.setId(Lists.list("test_catalog_basic_db")); - catalogRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + catalogRequest.setMode("Create"); namespace.createNamespace(catalogRequest); CreateNamespaceRequest dbRequest = new CreateNamespaceRequest(); dbRequest.setId(Lists.list("test_catalog_basic_db", "test_db")); - dbRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + dbRequest.setMode("Create"); Map properties = Maps.newHashMap(); properties.put("database.description", "Test database for dropping"); @@ -648,35 +328,15 @@ public void testDropNamespaceBasicDatabase() throws IOException { } @Test - public void testDropNamespaceBasicCatalog() { - // Setup: Create catalog - CreateNamespaceRequest catalogRequest = new CreateNamespaceRequest(); - catalogRequest.setId(Lists.list("test_catalog_basic")); - catalogRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - - Map properties = Maps.newHashMap(); - properties.put("description", "Test catalog for dropping"); - catalogRequest.setProperties(properties); - - namespace.createNamespace(catalogRequest); - - // Test: Drop the catalog with CASCADE (since Hive creates default database automatically) + public void testDropNamespaceCascadeRejected() { + // Test: Drop with CASCADE behavior - should be rejected DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Lists.list("test_catalog_basic")); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - - DropNamespaceResponse response = namespace.dropNamespace(dropRequest); - - // Verify properties were returned - assertEquals("Test catalog for dropping", response.getProperties().get("description")); - - // Verify catalog was dropped - NamespaceExistsRequest existsRequest = new NamespaceExistsRequest(); - existsRequest.setId(Lists.list("test_catalog_basic")); + dropRequest.setBehavior("Cascade"); Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.namespaceExists(existsRequest)); - assertTrue(error.getMessage().contains("Namespace does not exist")); + assertThrows(InvalidInputException.class, () -> namespace.dropNamespace(dropRequest)); + assertTrue(error.getMessage().contains("Cascade behavior is not supported")); } @Test @@ -684,7 +344,7 @@ public void testDropNamespaceSkipMode() { // Test: Drop non-existent database with SKIP mode DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Lists.list("non_existent_catalog", "non_existent_db")); - dropRequest.setMode(DropNamespaceRequest.ModeEnum.SKIP); + dropRequest.setMode("Skip"); DropNamespaceResponse response = namespace.dropNamespace(dropRequest); @@ -697,151 +357,34 @@ public void testDropNamespaceFailMode() { // Test: Drop non-existent database with FAIL mode (default) DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Lists.list("non_existent_catalog", "non_existent_db")); - dropRequest.setMode(DropNamespaceRequest.ModeEnum.FAIL); + dropRequest.setMode("Fail"); Exception error = assertThrows(LanceNamespaceException.class, () -> namespace.dropNamespace(dropRequest)); assertTrue(error.getMessage().contains("doesn't exist")); } - @Test - public void testDropDatabaseRestrictWithTables() throws IOException { - // Setup: Create catalog, database and table - CreateNamespaceRequest catalogRequest = new CreateNamespaceRequest(); - catalogRequest.setId(Lists.list("test_catalog_restrict")); - catalogRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(catalogRequest); - - CreateNamespaceRequest dbRequest = new CreateNamespaceRequest(); - dbRequest.setId(Lists.list("test_catalog_restrict", "test_db")); - dbRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(dbRequest); - - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_catalog_restrict", "test_db", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_catalog_restrict/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Try to drop database with RESTRICT behavior (should fail) - DropNamespaceRequest dropRequest = new DropNamespaceRequest(); - dropRequest.setId(Lists.list("test_catalog_restrict", "test_db")); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.RESTRICT); - - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.dropNamespace(dropRequest)); - assertTrue(error.getMessage().contains("Database test_catalog_restrict.test_db is not empty")); - assertTrue(error.getMessage().contains("Contains 1 tables")); - } - @Test public void testDropCatalogRestrictWithDatabases() { // Setup: Create catalog and database CreateNamespaceRequest catalogRequest = new CreateNamespaceRequest(); catalogRequest.setId(Lists.list("test_catalog_restrict_db")); - catalogRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + catalogRequest.setMode("Create"); namespace.createNamespace(catalogRequest); CreateNamespaceRequest dbRequest = new CreateNamespaceRequest(); dbRequest.setId(Lists.list("test_catalog_restrict_db", "test_db")); - dbRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); + dbRequest.setMode("Create"); namespace.createNamespace(dbRequest); // Test: Try to drop catalog with RESTRICT behavior (should fail) DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Lists.list("test_catalog_restrict_db")); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.RESTRICT); + dropRequest.setBehavior("Restrict"); Exception error = assertThrows(LanceNamespaceException.class, () -> namespace.dropNamespace(dropRequest)); assertTrue(error.getMessage().contains("is not empty")); assertTrue(error.getMessage().contains("databases")); } - - @Test - public void testDropDatabaseCascadeWithTables() throws IOException { - // Setup: Create catalog, database and multiple tables - CreateNamespaceRequest catalogRequest = new CreateNamespaceRequest(); - catalogRequest.setId(Lists.list("test_catalog_cascade_db")); - catalogRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(catalogRequest); - - CreateNamespaceRequest dbRequest = new CreateNamespaceRequest(); - dbRequest.setId(Lists.list("test_catalog_cascade_db", "test_db")); - dbRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(dbRequest); - - // Create first table - CreateTableRequest createRequest1 = new CreateTableRequest(); - createRequest1.setId(Lists.list("test_catalog_cascade_db", "test_db", "table1")); - createRequest1.setLocation(tmpDirBase + "/test_catalog_cascade_db/test_db/table1.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest1, testData); - - // Create second table - CreateTableRequest createRequest2 = new CreateTableRequest(); - createRequest2.setId(Lists.list("test_catalog_cascade_db", "test_db", "table2")); - createRequest2.setLocation(tmpDirBase + "/test_catalog_cascade_db/test_db/table2.lance"); - - namespace.createTable(createRequest2, testData); - - // Test: Drop database with CASCADE behavior - DropNamespaceRequest dropRequest = new DropNamespaceRequest(); - dropRequest.setId(Lists.list("test_catalog_cascade_db", "test_db")); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - - DropNamespaceResponse response = namespace.dropNamespace(dropRequest); - - // Verify database properties were returned - assertTrue(response.getProperties().containsKey("database.location-uri")); - - // Verify database was dropped - NamespaceExistsRequest existsRequest = new NamespaceExistsRequest(); - existsRequest.setId(Lists.list("test_catalog_cascade_db", "test_db")); - - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.namespaceExists(existsRequest)); - assertTrue(error.getMessage().contains("Namespace does not exist")); - } - - @Test - public void testDropCatalogCascadeWithDatabasesAndTables() throws IOException { - // Setup: Create catalog, database and table - CreateNamespaceRequest catalogRequest = new CreateNamespaceRequest(); - catalogRequest.setId(Lists.list("test_catalog_cascade")); - catalogRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(catalogRequest); - - CreateNamespaceRequest dbRequest = new CreateNamespaceRequest(); - dbRequest.setId(Lists.list("test_catalog_cascade", "test_db")); - dbRequest.setMode(CreateNamespaceRequest.ModeEnum.CREATE); - namespace.createNamespace(dbRequest); - - CreateTableRequest createRequest = new CreateTableRequest(); - createRequest.setId(Lists.list("test_catalog_cascade", "test_db", "test_table")); - createRequest.setLocation(tmpDirBase + "/test_catalog_cascade/test_db/test_table.lance"); - - byte[] testData = TestHelper.createTestArrowData(allocator); - namespace.createTable(createRequest, testData); - - // Test: Drop catalog with CASCADE behavior - DropNamespaceRequest dropRequest = new DropNamespaceRequest(); - dropRequest.setId(Lists.list("test_catalog_cascade")); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - - DropNamespaceResponse response = namespace.dropNamespace(dropRequest); - - // Verify catalog properties were returned - assertTrue(response.getProperties().containsKey("catalog.location.uri")); - - // Verify catalog was dropped - NamespaceExistsRequest existsRequest = new NamespaceExistsRequest(); - existsRequest.setId(Lists.list("test_catalog_cascade")); - - Exception error = - assertThrows(LanceNamespaceException.class, () -> namespace.namespaceExists(existsRequest)); - assertTrue(error.getMessage().contains("Namespace does not exist")); - } } diff --git a/java/lance-namespace-hive3/src/test/java/org/lance/namespace/hive3/TestHive3NamespaceIntegration.java b/java/lance-namespace-hive3/src/test/java/org/lance/namespace/hive3/TestHive3NamespaceIntegration.java index 457f466..8e26396 100644 --- a/java/lance-namespace-hive3/src/test/java/org/lance/namespace/hive3/TestHive3NamespaceIntegration.java +++ b/java/lance-namespace-hive3/src/test/java/org/lance/namespace/hive3/TestHive3NamespaceIntegration.java @@ -13,17 +13,18 @@ */ package org.lance.namespace.hive3; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.LanceNamespaceException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; +import org.lance.namespace.model.DeregisterTableRequest; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; -import org.lance.namespace.model.DropTableRequest; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -31,6 +32,8 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeAll; @@ -96,8 +99,12 @@ public void setUp() throws Exception { testCatalog = "hive"; // Default catalog in Hive 3.x testDatabase = "test_db_" + uniqueId; + // Set up Hadoop configuration with metastore URI + Configuration hadoopConf = new Configuration(); + hadoopConf.set(HiveConf.ConfVars.METASTOREURIS.varname, METASTORE_URI); + namespace.setConf(hadoopConf); + Map config = new HashMap<>(); - config.put("hive.metastore.uris", METASTORE_URI); config.put("client.pool-size", "3"); config.put("root", "/tmp/lance-integration-test"); @@ -110,19 +117,13 @@ public void tearDown() { // Clean up test database DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Arrays.asList(testCatalog, testDatabase)); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); + dropRequest.setBehavior("Restrict"); namespace.dropNamespace(dropRequest); } catch (Exception e) { // Ignore cleanup errors } - if (namespace != null) { - try { - namespace.close(); - } catch (Exception e) { - // Ignore - } - } + // Namespace cleanup handled by Hive internals if (allocator != null) { allocator.close(); @@ -157,8 +158,8 @@ public void testDatabaseOperations() { DescribeNamespaceResponse describeResponse = namespace.describeNamespace(describeRequest); assertThat(describeResponse).isNotNull(); - assertThat(describeResponse.getProperties()).containsEntry( - "database.description", "Integration test database"); + assertThat(describeResponse.getProperties()) + .containsEntry("database.description", "Integration test database"); // List databases in catalog ListNamespacesRequest listRequest = new ListNamespacesRequest(); @@ -184,7 +185,8 @@ public void testTableOperations() { nsRequest.setId(Arrays.asList(testCatalog, testDatabase)); namespace.createNamespace(nsRequest); - String tableName = "test_table_" + UUID.randomUUID().toString().substring(0, 8).replace("-", ""); + String tableName = + "test_table_" + UUID.randomUUID().toString().substring(0, 8).replace("-", ""); // Create empty table (declare table without data) CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); @@ -200,7 +202,6 @@ public void testTableOperations() { DescribeTableResponse describeResponse = namespace.describeTable(describeRequest); assertThat(describeResponse.getLocation()).contains(tableName); - assertThat(describeResponse.getProperties()).containsEntry("table_type", "lance"); // List tables ListTablesRequest listRequest = new ListTablesRequest(); @@ -209,10 +210,10 @@ public void testTableOperations() { ListTablesResponse listResponse = namespace.listTables(listRequest); assertThat(listResponse.getTables()).contains(tableName); - // Drop table - DropTableRequest dropRequest = new DropTableRequest(); - dropRequest.setId(Arrays.asList(testCatalog, testDatabase, tableName)); - namespace.dropTable(dropRequest); + // Deregister table + DeregisterTableRequest deregisterRequest = new DeregisterTableRequest(); + deregisterRequest.setId(Arrays.asList(testCatalog, testDatabase, tableName)); + namespace.deregisterTable(deregisterRequest); // Verify table doesn't exist assertThatThrownBy(() -> namespace.describeTable(describeRequest)) @@ -220,29 +221,14 @@ public void testTableOperations() { } @Test - public void testCascadeDropDatabase() { - // Create database - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Arrays.asList(testCatalog, testDatabase)); - namespace.createNamespace(nsRequest); - - // Create a table in the database - String tableName = "cascade_test_table"; - CreateEmptyTableRequest tableRequest = new CreateEmptyTableRequest(); - tableRequest.setId(Arrays.asList(testCatalog, testDatabase, tableName)); - tableRequest.setLocation("/tmp/lance-integration-test/" + testDatabase + "/" + tableName); - namespace.createEmptyTable(tableRequest); - - // Drop database with cascade + public void testCascadeDropDatabaseRejected() { + // Drop database with cascade - should be rejected DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Arrays.asList(testCatalog, testDatabase)); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - namespace.dropNamespace(dropRequest); + dropRequest.setBehavior("Cascade"); - // Verify database doesn't exist - DescribeNamespaceRequest describeRequest = new DescribeNamespaceRequest(); - describeRequest.setId(Arrays.asList(testCatalog, testDatabase)); - assertThatThrownBy(() -> namespace.describeNamespace(describeRequest)) - .isInstanceOf(LanceNamespaceException.class); + assertThatThrownBy(() -> namespace.dropNamespace(dropRequest)) + .isInstanceOf(InvalidInputException.class) + .hasMessageContaining("Cascade behavior is not supported"); } } diff --git a/java/lance-namespace-iceberg/pom.xml b/java/lance-namespace-iceberg/pom.xml index b486c54..bfa5d9d 100644 --- a/java/lance-namespace-iceberg/pom.xml +++ b/java/lance-namespace-iceberg/pom.xml @@ -18,10 +18,18 @@ Iceberg REST Catalog namespace implementation for Lance + + org.lance + lance-namespace-impls-core + org.lance lance-core + + org.lance + lance-namespace-core + org.lance lance-namespace-apache-client @@ -48,20 +56,28 @@ - junit - junit + org.junit.jupiter + junit-jupiter test - org.mockito mockito-core test - - org.slf4j - slf4j-simple + org.mockito + mockito-junit-jupiter + test + + + org.assertj + assertj-core + test + + + ch.qos.logback + logback-classic test diff --git a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergModels.java b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergModels.java index 4db65a3..b177e58 100644 --- a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergModels.java +++ b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergModels.java @@ -418,6 +418,31 @@ public void setConfig(Map config) { } } + @JsonIgnoreProperties(ignoreUnknown = true) + public static class ConfigResponse { + @JsonProperty("defaults") + private Map defaults; + + @JsonProperty("overrides") + private Map overrides; + + public Map getDefaults() { + return defaults; + } + + public void setDefaults(Map defaults) { + this.defaults = defaults; + } + + public Map getOverrides() { + return overrides; + } + + public void setOverrides(Map overrides) { + this.overrides = overrides; + } + } + public static IcebergSchema createDummySchema() { IcebergSchema schema = new IcebergSchema(); schema.setType("struct"); @@ -429,7 +454,7 @@ public static IcebergSchema createDummySchema() { dummyField.setRequired(false); dummyField.setType("string"); - schema.setFields(List.of(dummyField)); + schema.setFields(java.util.Collections.singletonList(dummyField)); return schema; } } diff --git a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java index f683f4a..9eb2050 100644 --- a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java +++ b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespace.java @@ -14,20 +14,26 @@ package org.lance.namespace.iceberg; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.ObjectIdentifier; +import org.lance.namespace.errors.InternalException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.NamespaceAlreadyExistsException; +import org.lance.namespace.errors.NamespaceNotFoundException; +import org.lance.namespace.errors.TableAlreadyExistsException; +import org.lance.namespace.errors.TableNotFoundException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; +import org.lance.namespace.model.DeclareTableRequest; +import org.lance.namespace.model.DeclareTableResponse; +import org.lance.namespace.model.DeregisterTableRequest; +import org.lance.namespace.model.DeregisterTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -35,15 +41,17 @@ import org.lance.namespace.model.NamespaceExistsRequest; import org.lance.namespace.model.TableExistsRequest; import org.lance.namespace.rest.RestClient; +import org.lance.namespace.rest.RestClientException; +import org.lance.namespace.util.ObjectIdentifier; import org.lance.namespace.util.ValidationUtil; import org.apache.arrow.memory.BufferAllocator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.Closeable; import java.io.IOException; import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -51,10 +59,22 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -/** Iceberg REST Catalog namespace implementation for Lance. */ -public class IcebergNamespace implements LanceNamespace { +/** + * Iceberg REST Catalog namespace implementation for Lance. + * + *

The prefix (warehouse) is included in the namespace identifier: + * + *

    + *
  • Namespace ID format: [prefix, namespace1, namespace2, ...] + *
  • Table ID format: [prefix, namespace1, namespace2, ..., table_name] + *
+ * + *

This is consistent with how Polaris handles catalog names. + */ +public class IcebergNamespace implements LanceNamespace, Closeable { private static final Logger LOG = LoggerFactory.getLogger(IcebergNamespace.class); private static final String TABLE_TYPE_LANCE = "lance"; private static final String TABLE_TYPE_KEY = "table_type"; @@ -63,6 +83,7 @@ public class IcebergNamespace implements LanceNamespace { private IcebergNamespaceConfig config; private RestClient restClient; private BufferAllocator allocator; + private final Map prefixCache = new HashMap<>(); public IcebergNamespace() {} @@ -73,20 +94,16 @@ public void initialize(Map configProperties, BufferAllocator all RestClient.Builder clientBuilder = RestClient.builder() - .baseUrl(config.getFullApiUrl()) - .connectTimeout(config.getConnectTimeout()) - .readTimeout(config.getReadTimeout()) + .baseUrl(config.getBaseApiUrl()) + .connectTimeout(config.getConnectTimeout(), TimeUnit.MILLISECONDS) + .readTimeout(config.getReadTimeout(), TimeUnit.MILLISECONDS) .maxRetries(config.getMaxRetries()); - Map headers = new HashMap<>(); if (config.getAuthToken() != null) { - headers.put("Authorization", "Bearer " + config.getAuthToken()); + clientBuilder.authToken(config.getAuthToken()); } if (config.getWarehouse() != null) { - headers.put("X-Iceberg-Access-Delegation", "vended-credentials"); - } - if (!headers.isEmpty()) { - clientBuilder.defaultHeaders(headers); + clientBuilder.header("X-Iceberg-Access-Delegation", "vended-credentials"); } this.restClient = clientBuilder.build(); @@ -98,14 +115,58 @@ public String namespaceId() { return String.format("IcebergNamespace { endpoint: \"%s\" }", config.getEndpoint()); } + private String resolvePrefix(String warehouse) { + if (prefixCache.containsKey(warehouse)) { + return prefixCache.get(warehouse); + } + + try { + Map params = new HashMap<>(); + params.put("warehouse", warehouse); + IcebergModels.ConfigResponse response = + restClient.get("/v1/config", params, IcebergModels.ConfigResponse.class); + if (response != null + && response.getDefaults() != null + && response.getDefaults().get("prefix") != null) { + String prefix = response.getDefaults().get("prefix"); + prefixCache.put(warehouse, prefix); + LOG.debug("Resolved warehouse '{}' to prefix '{}'", warehouse, prefix); + return prefix; + } + } catch (Exception e) { + LOG.debug("Failed to resolve prefix for warehouse '{}': {}", warehouse, e.getMessage()); + } + + prefixCache.put(warehouse, warehouse); + return warehouse; + } + + private String getPrefixPath(String warehouse) { + String prefix = resolvePrefix(warehouse); + return "/v1/" + prefix; + } + @Override public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { - ObjectIdentifier nsId = ObjectIdentifier.of(request.getId()); + ObjectIdentifier nsId = + request.getId() != null + ? ObjectIdentifier.of(request.getId()) + : ObjectIdentifier.of(Collections.emptyList()); + + ValidationUtil.checkArgument( + nsId.levels() >= 1, "Must specify at least the prefix (warehouse)"); try { + String prefix = nsId.levelAtListPos(0); + List parentNs = + nsId.levels() > 1 + ? nsId.listStyleId().subList(1, nsId.levels()) + : Collections.emptyList(); + String prefixPath = getPrefixPath(prefix); + Map params = new HashMap<>(); - if (nsId.levels() > 0) { - String parent = encodeNamespace(nsId.getIdentifier()); + if (!parentNs.isEmpty()) { + String parent = encodeNamespace(parentNs); params.put("parent", parent); } if (request.getPageToken() != null) { @@ -113,13 +174,20 @@ public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { } IcebergModels.ListNamespacesResponse response = - restClient.get("/namespaces", params, IcebergModels.ListNamespacesResponse.class); + params.isEmpty() + ? restClient.get( + prefixPath + "/namespaces", IcebergModels.ListNamespacesResponse.class) + : restClient.get( + prefixPath + "/namespaces", params, IcebergModels.ListNamespacesResponse.class); List namespaces = new ArrayList<>(); if (response != null && response.getNamespaces() != null) { for (List ns : response.getNamespaces()) { if (!ns.isEmpty()) { - namespaces.add(ns.get(ns.size() - 1)); + List fullNs = new ArrayList<>(); + fullNs.add(prefix); + fullNs.addAll(ns); + namespaces.add(String.join(".", fullNs)); } } } @@ -130,69 +198,72 @@ public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { ListNamespacesResponse result = new ListNamespacesResponse(); result.setNamespaces(resultNamespaces); return result; - - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to list namespaces: " + e.getMessage()); + } catch (RestClientException e) { + throw new InternalException("Failed to list namespaces: " + e.getMessage()); } } @Override public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { ObjectIdentifier nsId = ObjectIdentifier.of(request.getId()); - ValidationUtil.checkArgument(nsId.levels() >= 1, "Namespace must have at least one level"); + ValidationUtil.checkArgument( + nsId.levels() >= 2, "Namespace must have at least prefix and namespace levels"); try { + String prefix = nsId.levelAtListPos(0); + List namespace = nsId.listStyleId().subList(1, nsId.levels()); + String prefixPath = getPrefixPath(prefix); + IcebergModels.CreateNamespaceRequest createRequest = new IcebergModels.CreateNamespaceRequest(); - createRequest.setNamespace(nsId.getIdentifier()); + createRequest.setNamespace(namespace); createRequest.setProperties(request.getProperties()); IcebergModels.CreateNamespaceResponse response = - restClient.post("/namespaces", createRequest, IcebergModels.CreateNamespaceResponse.class); + restClient.post( + prefixPath + "/namespaces", + createRequest, + IcebergModels.CreateNamespaceResponse.class); + + LOG.info("Created namespace: {}.{}", prefix, String.join(".", namespace)); CreateNamespaceResponse result = new CreateNamespaceResponse(); result.setProperties(response != null ? response.getProperties() : null); return result; - - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 409) { - throw LanceNamespaceException.conflict( - "Namespace already exists", - "NAMESPACE_EXISTS", - request.getId().toString(), - e.getResponseBody()); + } catch (RestClientException e) { + if (e.isConflict()) { + throw new NamespaceAlreadyExistsException( + "Namespace already exists: " + nsId.stringStyleId()); } - throw new LanceNamespaceException(500, "Failed to create namespace: " + e.getMessage()); - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to create namespace: " + e.getMessage()); + throw new InternalException("Failed to create namespace: " + e.getMessage()); } } @Override public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest request) { ObjectIdentifier nsId = ObjectIdentifier.of(request.getId()); - ValidationUtil.checkArgument(nsId.levels() >= 1, "Namespace must have at least one level"); + ValidationUtil.checkArgument( + nsId.levels() >= 2, "Namespace must have at least prefix and namespace levels"); try { - String namespacePath = encodeNamespace(nsId.getIdentifier()); + String prefix = nsId.levelAtListPos(0); + List namespace = nsId.listStyleId().subList(1, nsId.levels()); + String prefixPath = getPrefixPath(prefix); + String namespacePath = encodeNamespace(namespace); + IcebergModels.GetNamespaceResponse response = - restClient.get("/namespaces/" + namespacePath, IcebergModels.GetNamespaceResponse.class); + restClient.get( + prefixPath + "/namespaces/" + namespacePath, + IcebergModels.GetNamespaceResponse.class); DescribeNamespaceResponse result = new DescribeNamespaceResponse(); result.setProperties(response != null ? response.getProperties() : null); return result; - - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 404) { - throw LanceNamespaceException.notFound( - "Namespace not found", - "NAMESPACE_NOT_FOUND", - request.getId().toString(), - e.getResponseBody()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new NamespaceNotFoundException("Namespace not found: " + nsId.stringStyleId()); } - throw new LanceNamespaceException(500, "Failed to describe namespace: " + e.getMessage()); - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to describe namespace: " + e.getMessage()); + throw new InternalException("Failed to describe namespace: " + e.getMessage()); } } @@ -203,54 +274,61 @@ public void namespaceExists(NamespaceExistsRequest request) { @Override public DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { + if ("Cascade".equalsIgnoreCase(request.getBehavior())) { + throw new InvalidInputException("Cascade behavior is not supported for this implementation"); + } + ObjectIdentifier nsId = ObjectIdentifier.of(request.getId()); - ValidationUtil.checkArgument(nsId.levels() >= 1, "Namespace must have at least one level"); + ValidationUtil.checkArgument( + nsId.levels() >= 2, "Namespace must have at least prefix and namespace levels"); try { - String namespacePath = encodeNamespace(nsId.getIdentifier()); - restClient.delete("/namespaces/" + namespacePath); + String prefix = nsId.levelAtListPos(0); + List namespace = nsId.listStyleId().subList(1, nsId.levels()); + String prefixPath = getPrefixPath(prefix); + String namespacePath = encodeNamespace(namespace); + restClient.delete(prefixPath + "/namespaces/" + namespacePath); + LOG.info("Dropped namespace: {}.{}", prefix, String.join(".", namespace)); return new DropNamespaceResponse(); - - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 404) { + } catch (RestClientException e) { + if (e.isNotFound()) { return new DropNamespaceResponse(); } - if (e.getStatusCode() == 409) { - throw LanceNamespaceException.conflict( - "Namespace not empty", - "NAMESPACE_NOT_EMPTY", - request.getId().toString(), - e.getResponseBody()); - } - throw new LanceNamespaceException(500, "Failed to drop namespace: " + e.getMessage()); - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to drop namespace: " + e.getMessage()); + throw new InternalException("Failed to drop namespace: " + e.getMessage()); } } @Override public ListTablesResponse listTables(ListTablesRequest request) { ObjectIdentifier nsId = ObjectIdentifier.of(request.getId()); - ValidationUtil.checkArgument(nsId.levels() >= 1, "Namespace must have at least one level"); + ValidationUtil.checkArgument(nsId.levels() >= 2, "Must specify at least prefix and namespace"); try { - String namespacePath = encodeNamespace(nsId.getIdentifier()); + String prefix = nsId.levelAtListPos(0); + List namespace = nsId.listStyleId().subList(1, nsId.levels()); + String prefixPath = getPrefixPath(prefix); + String namespacePath = encodeNamespace(namespace); + Map params = new HashMap<>(); if (request.getPageToken() != null) { params.put("pageToken", request.getPageToken()); } IcebergModels.ListTablesResponse response = - restClient.get( - "/namespaces/" + namespacePath + "/tables", - params, - IcebergModels.ListTablesResponse.class); + params.isEmpty() + ? restClient.get( + prefixPath + "/namespaces/" + namespacePath + "/tables", + IcebergModels.ListTablesResponse.class) + : restClient.get( + prefixPath + "/namespaces/" + namespacePath + "/tables", + params, + IcebergModels.ListTablesResponse.class); List tables = new ArrayList<>(); if (response != null && response.getIdentifiers() != null) { for (IcebergModels.TableIdentifier tableId : response.getIdentifiers()) { - if (isLanceTable(nsId.getIdentifier(), tableId.getName())) { + if (isLanceTable(prefix, namespace, tableId.getName())) { tables.add(tableId.getName()); } } @@ -262,25 +340,31 @@ public ListTablesResponse listTables(ListTablesRequest request) { ListTablesResponse result = new ListTablesResponse(); result.setTables(resultTables); return result; - - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to list tables: " + e.getMessage()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new NamespaceNotFoundException("Namespace not found: " + nsId.stringStyleId()); + } + throw new InternalException("Failed to list tables: " + e.getMessage()); } } @Override - public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { + public DeclareTableResponse declareTable(DeclareTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - tableId.levels() >= 2, "Table identifier must have at least namespace and table name"); + tableId.levels() >= 3, "Table identifier must have prefix, namespace, and table name"); - List namespace = tableId.getIdentifier().subList(0, tableId.levels() - 1); + String prefix = tableId.levelAtListPos(0); + List namespace = tableId.listStyleId().subList(1, tableId.levels() - 1); String tableName = tableId.levelAtListPos(tableId.levels() - 1); try { + String prefixPath = getPrefixPath(prefix); + String tablePath = request.getLocation(); if (tablePath == null || tablePath.isEmpty()) { - tablePath = config.getRoot() + "/" + String.join("/", namespace) + "/" + tableName; + List pathParts = tableId.listStyleId().subList(0, tableId.levels() - 1); + tablePath = config.getRoot() + "/" + String.join("/", pathParts) + "/" + tableName; } IcebergModels.CreateTableRequest createRequest = new IcebergModels.CreateTableRequest(); @@ -290,91 +374,92 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request Map properties = new HashMap<>(); properties.put(TABLE_TYPE_KEY, TABLE_TYPE_LANCE); - if (request.getProperties() != null) { - properties.putAll(request.getProperties()); - } createRequest.setProperties(properties); String namespacePath = encodeNamespace(namespace); - IcebergModels.LoadTableResponse response = - restClient.post( - "/namespaces/" + namespacePath + "/tables", - createRequest, - IcebergModels.LoadTableResponse.class); + restClient.post( + prefixPath + "/namespaces/" + namespacePath + "/tables", + createRequest, + IcebergModels.LoadTableResponse.class); - CreateEmptyTableResponse result = new CreateEmptyTableResponse(); + LOG.info("Declared Lance table: {}", tableId.stringStyleId()); + + DeclareTableResponse result = new DeclareTableResponse(); result.setLocation(tablePath); - if (response != null && response.getMetadata() != null) { - result.setProperties(response.getMetadata().getProperties()); - } return result; - - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 409) { - throw LanceNamespaceException.conflict( - "Table already exists", - "TABLE_EXISTS", - request.getId().toString(), - e.getResponseBody()); + } catch (RestClientException e) { + if (e.isConflict()) { + throw new TableAlreadyExistsException("Table already exists: " + tableId.stringStyleId()); } - if (e.getStatusCode() == 404) { - throw LanceNamespaceException.notFound( - "Namespace not found", - "NAMESPACE_NOT_FOUND", - String.join(".", namespace), - e.getResponseBody()); + if (e.isNotFound()) { + throw new NamespaceNotFoundException( + "Namespace not found: " + prefix + "." + String.join(".", namespace)); } - throw new LanceNamespaceException(500, "Failed to create empty table: " + e.getMessage()); - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to create empty table: " + e.getMessage()); + throw new InternalException("Failed to declare table: " + e.getMessage()); } } + /** + * @deprecated Use {@link #declareTable(DeclareTableRequest)} instead. + */ + @Deprecated + @Override + public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { + DeclareTableRequest declareRequest = new DeclareTableRequest(); + declareRequest.setId(request.getId()); + declareRequest.setLocation(request.getLocation()); + DeclareTableResponse response = declareTable(declareRequest); + CreateEmptyTableResponse result = new CreateEmptyTableResponse(); + result.setLocation(response.getLocation()); + return result; + } + @Override public DescribeTableResponse describeTable(DescribeTableRequest request) { + if (Boolean.TRUE.equals(request.getLoadDetailedMetadata())) { + throw new InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation"); + } + ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - tableId.levels() >= 2, "Table identifier must have at least namespace and table name"); + tableId.levels() >= 3, "Table identifier must have prefix, namespace, and table name"); - List namespace = tableId.getIdentifier().subList(0, tableId.levels() - 1); + String prefix = tableId.levelAtListPos(0); + List namespace = tableId.listStyleId().subList(1, tableId.levels() - 1); String tableName = tableId.levelAtListPos(tableId.levels() - 1); try { + String prefixPath = getPrefixPath(prefix); String namespacePath = encodeNamespace(namespace); - String encodedTableName = URLEncoder.encode(tableName, StandardCharsets.UTF_8); + String encodedTableName = urlEncode(tableName); IcebergModels.LoadTableResponse response = restClient.get( - "/namespaces/" + namespacePath + "/tables/" + encodedTableName, + prefixPath + "/namespaces/" + namespacePath + "/tables/" + encodedTableName, IcebergModels.LoadTableResponse.class); if (response == null || response.getMetadata() == null) { - throw LanceNamespaceException.notFound( - "Table not found", "TABLE_NOT_FOUND", request.getId().toString(), "No metadata"); + throw new TableNotFoundException("Table not found: " + tableId.stringStyleId()); } Map props = response.getMetadata().getProperties(); if (props == null || !TABLE_TYPE_LANCE.equalsIgnoreCase(props.get(TABLE_TYPE_KEY))) { - throw LanceNamespaceException.badRequest( - "Not a Lance table", - "INVALID_TABLE", - request.getId().toString(), - "Table is not managed by Lance"); + throw new InvalidInputException( + String.format( + "Table %s is not a Lance table (missing table_type property)", + tableId.stringStyleId())); } DescribeTableResponse result = new DescribeTableResponse(); result.setLocation(response.getMetadata().getLocation()); - result.setProperties(props); + result.setStorageOptions(props); return result; - - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 404) { - throw LanceNamespaceException.notFound( - "Table not found", "TABLE_NOT_FOUND", request.getId().toString(), e.getResponseBody()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new TableNotFoundException("Table not found: " + tableId.stringStyleId()); } - throw new LanceNamespaceException(500, "Failed to describe table: " + e.getMessage()); - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to describe table: " + e.getMessage()); + throw new InternalException("Failed to describe table: " + e.getMessage()); } } @@ -384,49 +469,46 @@ public void tableExists(TableExistsRequest request) { } @Override - public DropTableResponse dropTable(DropTableRequest request) { + public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - tableId.levels() >= 2, "Table identifier must have at least namespace and table name"); + tableId.levels() >= 3, "Table identifier must have prefix, namespace, and table name"); - List namespace = tableId.getIdentifier().subList(0, tableId.levels() - 1); + String prefix = tableId.levelAtListPos(0); + List namespace = tableId.listStyleId().subList(1, tableId.levels() - 1); String tableName = tableId.levelAtListPos(tableId.levels() - 1); try { + String prefixPath = getPrefixPath(prefix); String namespacePath = encodeNamespace(namespace); - String encodedTableName = URLEncoder.encode(tableName, StandardCharsets.UTF_8); - - String tableLocation = null; - try { - IcebergModels.LoadTableResponse tableResponse = - restClient.get( - "/namespaces/" + namespacePath + "/tables/" + encodedTableName, - IcebergModels.LoadTableResponse.class); - if (tableResponse != null && tableResponse.getMetadata() != null) { - tableLocation = tableResponse.getMetadata().getLocation(); - } - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 404) { - DropTableResponse result = new DropTableResponse(); - result.setId(request.getId()); - return result; - } + String encodedTableName = urlEncode(tableName); + + IcebergModels.LoadTableResponse getResponse = + restClient.get( + prefixPath + "/namespaces/" + namespacePath + "/tables/" + encodedTableName, + IcebergModels.LoadTableResponse.class); + + String location = null; + if (getResponse != null && getResponse.getMetadata() != null) { + location = getResponse.getMetadata().getLocation(); } - Map params = new HashMap<>(); - params.put("purgeRequested", "false"); - restClient.delete("/namespaces/" + namespacePath + "/tables/" + encodedTableName, params); + restClient.delete( + prefixPath + "/namespaces/" + namespacePath + "/tables/" + encodedTableName); + LOG.info("Deregistered table: {}", tableId.stringStyleId()); - DropTableResponse result = new DropTableResponse(); - result.setId(request.getId()); - result.setLocation(tableLocation); + DeregisterTableResponse result = new DeregisterTableResponse(); + result.setLocation(location); return result; - - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to drop table: " + e.getMessage()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new TableNotFoundException("Table not found: " + tableId.stringStyleId()); + } + throw new InternalException("Failed to deregister table: " + e.getMessage()); } } + @Override public void close() throws IOException { if (restClient != null) { restClient.close(); @@ -436,19 +518,28 @@ public void close() throws IOException { private String encodeNamespace(List namespace) { String joined = namespace.stream() - .map(s -> URLEncoder.encode(s, StandardCharsets.UTF_8)) + .map(this::urlEncode) .collect(Collectors.joining(String.valueOf(NAMESPACE_SEPARATOR))); - return URLEncoder.encode(joined, StandardCharsets.UTF_8); + return urlEncode(joined); + } + + private String urlEncode(String s) { + try { + return URLEncoder.encode(s, "UTF-8"); + } catch (java.io.UnsupportedEncodingException e) { + throw new RuntimeException("UTF-8 encoding not supported", e); + } } - private boolean isLanceTable(List namespace, String tableName) { + private boolean isLanceTable(String prefix, List namespace, String tableName) { try { + String prefixPath = getPrefixPath(prefix); String namespacePath = encodeNamespace(namespace); - String encodedTableName = URLEncoder.encode(tableName, StandardCharsets.UTF_8); + String encodedTableName = urlEncode(tableName); IcebergModels.LoadTableResponse response = restClient.get( - "/namespaces/" + namespacePath + "/tables/" + encodedTableName, + prefixPath + "/namespaces/" + namespacePath + "/tables/" + encodedTableName, IcebergModels.LoadTableResponse.class); if (response != null && response.getMetadata() != null) { diff --git a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespaceConfig.java b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespaceConfig.java index f5bc7a5..ce1793c 100644 --- a/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespaceConfig.java +++ b/java/lance-namespace-iceberg/src/main/java/org/lance/namespace/iceberg/IcebergNamespaceConfig.java @@ -20,7 +20,6 @@ public class IcebergNamespaceConfig { public static final String ENDPOINT = "endpoint"; public static final String WAREHOUSE = "warehouse"; - public static final String PREFIX = "prefix"; public static final String AUTH_TOKEN = "auth_token"; public static final String CREDENTIAL = "credential"; public static final String CONNECT_TIMEOUT = "connect_timeout"; @@ -30,7 +29,6 @@ public class IcebergNamespaceConfig { private final String endpoint; private final String warehouse; - private final String prefix; private final String authToken; private final String credential; private final int connectTimeout; @@ -45,7 +43,6 @@ public IcebergNamespaceConfig(Map properties) { } this.warehouse = properties.get(WAREHOUSE); - this.prefix = properties.getOrDefault(PREFIX, ""); this.authToken = properties.get(AUTH_TOKEN); this.credential = properties.get(CREDENTIAL); this.connectTimeout = Integer.parseInt(properties.getOrDefault(CONNECT_TIMEOUT, "10000")); @@ -62,10 +59,6 @@ public String getWarehouse() { return warehouse; } - public String getPrefix() { - return prefix; - } - public String getAuthToken() { return authToken; } @@ -90,11 +83,7 @@ public String getRoot() { return root; } - public String getFullApiUrl() { - String base = endpoint.endsWith("/") ? endpoint.substring(0, endpoint.length() - 1) : endpoint; - if (prefix != null && !prefix.isEmpty()) { - return base + "/" + prefix; - } - return base; + public String getBaseApiUrl() { + return endpoint.endsWith("/") ? endpoint.substring(0, endpoint.length() - 1) : endpoint; } } diff --git a/java/lance-namespace-iceberg/src/test/java/org/lance/namespace/iceberg/TestIcebergNamespaceIntegration.java b/java/lance-namespace-iceberg/src/test/java/org/lance/namespace/iceberg/TestIcebergNamespaceIntegration.java new file mode 100644 index 0000000..ad7eb7c --- /dev/null +++ b/java/lance-namespace-iceberg/src/test/java/org/lance/namespace/iceberg/TestIcebergNamespaceIntegration.java @@ -0,0 +1,280 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.iceberg; + +import org.lance.namespace.errors.LanceNamespaceException; +import org.lance.namespace.model.CreateEmptyTableRequest; +import org.lance.namespace.model.CreateEmptyTableResponse; +import org.lance.namespace.model.CreateNamespaceRequest; +import org.lance.namespace.model.CreateNamespaceResponse; +import org.lance.namespace.model.DeregisterTableRequest; +import org.lance.namespace.model.DescribeNamespaceRequest; +import org.lance.namespace.model.DescribeNamespaceResponse; +import org.lance.namespace.model.DescribeTableRequest; +import org.lance.namespace.model.DescribeTableResponse; +import org.lance.namespace.model.DropNamespaceRequest; +import org.lance.namespace.model.ListNamespacesRequest; +import org.lance.namespace.model.ListNamespacesResponse; +import org.lance.namespace.model.ListTablesRequest; +import org.lance.namespace.model.ListTablesResponse; +import org.lance.namespace.model.NamespaceExistsRequest; +import org.lance.namespace.model.TableExistsRequest; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Integration tests for IcebergNamespace against a running Iceberg REST Catalog. + * + *

This test uses Lakekeeper as the Iceberg REST Catalog implementation. To run these tests, + * start the catalog with: + * + *

+ *   cd docker/iceberg && docker-compose up -d
+ * 
+ * + *

Tests are automatically skipped if the catalog is not available. + */ +public class TestIcebergNamespaceIntegration { + + private static final String ICEBERG_ENDPOINT = "http://localhost:8282/catalog"; + private static final String TEST_WAREHOUSE = "test_warehouse"; + private static boolean icebergAvailable = false; + + private IcebergNamespace namespace; + private BufferAllocator allocator; + private String testNamespace; + + @BeforeAll + public static void checkIcebergAvailable() { + try { + URL url = new URL(ICEBERG_ENDPOINT + "/v1/config?warehouse=" + TEST_WAREHOUSE); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.setConnectTimeout(5000); + conn.setReadTimeout(5000); + + int responseCode = conn.getResponseCode(); + conn.disconnect(); + + icebergAvailable = responseCode == 200; + + if (!icebergAvailable) { + System.out.println( + "Iceberg REST Catalog is not available at " + + ICEBERG_ENDPOINT + + " - skipping integration tests"); + } else { + System.out.println( + "Iceberg REST Catalog detected at " + + ICEBERG_ENDPOINT + + " (response code: " + + responseCode + + ")"); + } + } catch (Exception e) { + icebergAvailable = false; + System.out.println( + "Iceberg REST Catalog is not available at " + + ICEBERG_ENDPOINT + + " (" + + e.getMessage() + + ") - skipping integration tests"); + } + } + + @BeforeEach + public void setUp() throws Exception { + Assumptions.assumeTrue( + icebergAvailable, "Iceberg REST Catalog is not available at " + ICEBERG_ENDPOINT); + + allocator = new RootAllocator(); + namespace = new IcebergNamespace(); + + String uniqueId = UUID.randomUUID().toString().substring(0, 8); + testNamespace = "test_ns_" + uniqueId; + + Map config = new HashMap<>(); + config.put("endpoint", ICEBERG_ENDPOINT); + config.put("root", "s3://warehouse"); + + namespace.initialize(config, allocator); + } + + @AfterEach + public void tearDown() { + try { + DropNamespaceRequest dropRequest = new DropNamespaceRequest(); + dropRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + namespace.dropNamespace(dropRequest); + } catch (Exception e) { + // Ignore cleanup errors + } + + if (allocator != null) { + allocator.close(); + } + } + + @Test + public void testNamespaceOperations() { + // Create namespace + CreateNamespaceRequest createRequest = new CreateNamespaceRequest(); + createRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + createRequest.setProperties(Collections.singletonMap("description", "Test namespace")); + + CreateNamespaceResponse createResponse = namespace.createNamespace(createRequest); + assertThat(createResponse).isNotNull(); + + // Describe namespace + DescribeNamespaceRequest describeRequest = new DescribeNamespaceRequest(); + describeRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + + DescribeNamespaceResponse describeResponse = namespace.describeNamespace(describeRequest); + assertThat(describeResponse).isNotNull(); + + // Check namespace exists + NamespaceExistsRequest existsRequest = new NamespaceExistsRequest(); + existsRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + namespace.namespaceExists(existsRequest); + + // List namespaces + ListNamespacesRequest listRequest = new ListNamespacesRequest(); + listRequest.setId(Collections.singletonList(TEST_WAREHOUSE)); + ListNamespacesResponse listResponse = namespace.listNamespaces(listRequest); + assertThat(listResponse.getNamespaces()).contains(TEST_WAREHOUSE + "." + testNamespace); + + // Drop namespace + DropNamespaceRequest dropRequest = new DropNamespaceRequest(); + dropRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + namespace.dropNamespace(dropRequest); + + // Verify namespace doesn't exist + assertThatThrownBy(() -> namespace.namespaceExists(existsRequest)) + .isInstanceOf(LanceNamespaceException.class) + .hasMessageContaining("not found"); + } + + @Test + public void testTableOperations() { + // Create namespace first + CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); + nsRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + namespace.createNamespace(nsRequest); + + String tableName = "test_table_" + UUID.randomUUID().toString().substring(0, 8); + + // Create empty table + CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); + createRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace, tableName)); + createRequest.setLocation("s3://warehouse/" + testNamespace + "/" + tableName); + + CreateEmptyTableResponse createResponse = namespace.createEmptyTable(createRequest); + assertThat(createResponse.getLocation()).isNotNull(); + + // Describe table + DescribeTableRequest describeRequest = new DescribeTableRequest(); + describeRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace, tableName)); + + DescribeTableResponse describeResponse = namespace.describeTable(describeRequest); + assertThat(describeResponse.getLocation()).isNotNull(); + + // Check table exists + TableExistsRequest existsRequest = new TableExistsRequest(); + existsRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace, tableName)); + namespace.tableExists(existsRequest); + + // List tables + ListTablesRequest listRequest = new ListTablesRequest(); + listRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + + ListTablesResponse listResponse = namespace.listTables(listRequest); + assertThat(listResponse.getTables()).contains(tableName); + + // Deregister table + DeregisterTableRequest deregisterRequest = new DeregisterTableRequest(); + deregisterRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace, tableName)); + namespace.deregisterTable(deregisterRequest); + + // Verify table doesn't exist + assertThatThrownBy(() -> namespace.tableExists(existsRequest)) + .isInstanceOf(LanceNamespaceException.class) + .hasMessageContaining("not found"); + } + + @Test + public void testCreateEmptyTableWithLocation() { + // Create namespace first + CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); + nsRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + namespace.createNamespace(nsRequest); + + String tableName = "lance_table"; + CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); + createRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace, tableName)); + createRequest.setLocation("s3://warehouse/" + testNamespace + "/" + tableName); + + CreateEmptyTableResponse response = namespace.createEmptyTable(createRequest); + assertThat(response.getLocation()).isNotNull(); + + // Clean up table + DeregisterTableRequest deregisterRequest = new DeregisterTableRequest(); + deregisterRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace, tableName)); + namespace.deregisterTable(deregisterRequest); + } + + @Test + public void testNestedNamespace() { + String nestedNs = "nested_" + UUID.randomUUID().toString().substring(0, 8); + + // Create parent namespace + CreateNamespaceRequest parentRequest = new CreateNamespaceRequest(); + parentRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + namespace.createNamespace(parentRequest); + + // Create nested namespace + CreateNamespaceRequest nestedRequest = new CreateNamespaceRequest(); + nestedRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace, nestedNs)); + nestedRequest.setProperties(Collections.singletonMap("description", "Nested namespace")); + namespace.createNamespace(nestedRequest); + + // List nested namespaces + ListNamespacesRequest listRequest = new ListNamespacesRequest(); + listRequest.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace)); + ListNamespacesResponse listResponse = namespace.listNamespaces(listRequest); + assertThat(listResponse.getNamespaces()) + .contains(TEST_WAREHOUSE + "." + testNamespace + "." + nestedNs); + + // Drop nested namespace first + DropNamespaceRequest dropNested = new DropNamespaceRequest(); + dropNested.setId(Arrays.asList(TEST_WAREHOUSE, testNamespace, nestedNs)); + namespace.dropNamespace(dropNested); + } +} diff --git a/java/lance-namespace-impls-core/pom.xml b/java/lance-namespace-impls-core/pom.xml new file mode 100644 index 0000000..878d8c3 --- /dev/null +++ b/java/lance-namespace-impls-core/pom.xml @@ -0,0 +1,97 @@ + + + 4.0.0 + + + org.lance + lance-namespace-impls-root + 0.0.1 + + + lance-namespace-impls-core + ${project.artifactId} + Core utilities for Lance Namespace implementations including RestClient + + + + + org.lance + lance-namespace-core + + + + + org.apache.httpcomponents.client5 + httpclient5 + 5.2.1 + + + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + + + + + org.slf4j + slf4j-api + + + + + org.apache.arrow + arrow-vector + + + org.apache.arrow + arrow-memory-netty + + + + + org.junit.jupiter + junit-jupiter + test + + + org.mockito + mockito-core + test + + + org.mockito + mockito-junit-jupiter + test + + + org.assertj + assertj-core + test + + + org.slf4j + slf4j-simple + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + + + + diff --git a/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/rest/RestClient.java b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/rest/RestClient.java new file mode 100644 index 0000000..d92cbed --- /dev/null +++ b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/rest/RestClient.java @@ -0,0 +1,412 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.rest; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import org.apache.hc.client5.http.classic.methods.HttpDelete; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.classic.methods.HttpPatch; +import org.apache.hc.client5.http.classic.methods.HttpPost; +import org.apache.hc.client5.http.classic.methods.HttpPut; +import org.apache.hc.client5.http.classic.methods.HttpUriRequestBase; +import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.HttpClients; +import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager; +import org.apache.hc.core5.http.ContentType; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.http.io.entity.StringEntity; +import org.apache.hc.core5.util.Timeout; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +/** + * A reusable REST client for making HTTP requests to REST APIs. + * + *

This client provides: + * + *

    + *
  • Connection pooling for efficient HTTP connections + *
  • Configurable timeouts for connect and read operations + *
  • Retry logic with exponential backoff + *
  • JSON serialization/deserialization via Jackson + *
  • Support for common HTTP methods (GET, POST, PUT, PATCH, DELETE) + *
+ * + *

Example usage: + * + *

{@code
+ * RestClient client = RestClient.builder()
+ *     .baseUrl("http://localhost:8080/api")
+ *     .header("Authorization", "Bearer token")
+ *     .connectTimeout(10, TimeUnit.SECONDS)
+ *     .readTimeout(30, TimeUnit.SECONDS)
+ *     .maxRetries(3)
+ *     .build();
+ *
+ * MyResponse response = client.get("/resource", MyResponse.class);
+ * }
+ */ +public class RestClient implements Closeable { + private static final Logger LOG = LoggerFactory.getLogger(RestClient.class); + + private static final int DEFAULT_MAX_CONNECTIONS = 20; + private static final int DEFAULT_MAX_CONNECTIONS_PER_ROUTE = 10; + private static final int DEFAULT_CONNECT_TIMEOUT_MS = 10000; + private static final int DEFAULT_READ_TIMEOUT_MS = 30000; + private static final int DEFAULT_MAX_RETRIES = 3; + private static final long DEFAULT_RETRY_DELAY_MS = 1000; + + private final String baseUrl; + private final Map defaultHeaders; + private final CloseableHttpClient httpClient; + private final ObjectMapper objectMapper; + private final int maxRetries; + private final long retryDelayMs; + + private RestClient(Builder builder) { + this.baseUrl = + builder.baseUrl.endsWith("/") + ? builder.baseUrl.substring(0, builder.baseUrl.length() - 1) + : builder.baseUrl; + this.defaultHeaders = new HashMap<>(builder.defaultHeaders); + this.maxRetries = builder.maxRetries; + this.retryDelayMs = builder.retryDelayMs; + + this.objectMapper = + builder.objectMapper != null + ? builder.objectMapper + : new ObjectMapper() + .registerModule(new JavaTimeModule()) + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + .configure(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS, false); + + PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(); + connectionManager.setMaxTotal(builder.maxConnections); + connectionManager.setDefaultMaxPerRoute(builder.maxConnectionsPerRoute); + + RequestConfig requestConfig = + RequestConfig.custom() + .setConnectTimeout(Timeout.ofMilliseconds(builder.connectTimeoutMs)) + .setResponseTimeout(Timeout.ofMilliseconds(builder.readTimeoutMs)) + .build(); + + this.httpClient = + HttpClients.custom() + .setConnectionManager(connectionManager) + .setDefaultRequestConfig(requestConfig) + .build(); + } + + public static Builder builder() { + return new Builder(); + } + + public String getBaseUrl() { + return baseUrl; + } + + public T get(String path, Class responseType) throws RestClientException { + return execute(new HttpGet(buildUri(path)), null, responseType); + } + + public T get(String path, Map queryParams, Class responseType) + throws RestClientException { + HttpGet request = new HttpGet(buildUri(path, queryParams)); + return execute(request, null, responseType); + } + + public T getWithHeaders(String path, Map headers, Class responseType) + throws RestClientException { + HttpGet request = new HttpGet(buildUri(path)); + headers.forEach(request::addHeader); + return execute(request, null, responseType); + } + + public T get( + String path, + Map queryParams, + Map headers, + Class responseType) + throws RestClientException { + HttpGet request = new HttpGet(buildUri(path, queryParams)); + headers.forEach(request::addHeader); + return execute(request, null, responseType); + } + + public T post(String path, Object body, Class responseType) throws RestClientException { + HttpPost request = new HttpPost(buildUri(path)); + return execute(request, body, responseType); + } + + public T post(String path, Object body, Map headers, Class responseType) + throws RestClientException { + HttpPost request = new HttpPost(buildUri(path)); + headers.forEach(request::addHeader); + return execute(request, body, responseType); + } + + public T put(String path, Object body, Class responseType) throws RestClientException { + HttpPut request = new HttpPut(buildUri(path)); + return execute(request, body, responseType); + } + + public T put(String path, Object body, Map headers, Class responseType) + throws RestClientException { + HttpPut request = new HttpPut(buildUri(path)); + headers.forEach(request::addHeader); + return execute(request, body, responseType); + } + + public T patch(String path, Object body, Class responseType) throws RestClientException { + HttpPatch request = new HttpPatch(buildUri(path)); + return execute(request, body, responseType); + } + + public void delete(String path) throws RestClientException { + execute(new HttpDelete(buildUri(path)), null, Void.class); + } + + public void delete(String path, Map headers) throws RestClientException { + HttpDelete request = new HttpDelete(buildUri(path)); + headers.forEach(request::addHeader); + execute(request, null, Void.class); + } + + public T delete(String path, Class responseType) throws RestClientException { + return execute(new HttpDelete(buildUri(path)), null, responseType); + } + + private URI buildUri(String path) { + String fullPath = path.startsWith("/") ? baseUrl + path : baseUrl + "/" + path; + return URI.create(fullPath); + } + + private URI buildUri(String path, Map queryParams) { + String fullPath = path.startsWith("/") ? baseUrl + path : baseUrl + "/" + path; + if (queryParams != null && !queryParams.isEmpty()) { + StringBuilder sb = new StringBuilder(fullPath); + sb.append("?"); + boolean first = true; + for (Map.Entry entry : queryParams.entrySet()) { + if (!first) { + sb.append("&"); + } + try { + sb.append(java.net.URLEncoder.encode(entry.getKey(), "UTF-8")); + sb.append("="); + sb.append(java.net.URLEncoder.encode(entry.getValue(), "UTF-8")); + } catch (java.io.UnsupportedEncodingException e) { + throw new RuntimeException("UTF-8 encoding not supported", e); + } + first = false; + } + fullPath = sb.toString(); + } + return URI.create(fullPath); + } + + private T execute(HttpUriRequestBase request, Object body, Class responseType) + throws RestClientException { + defaultHeaders.forEach(request::addHeader); + + if (body != null) { + try { + String jsonBody = objectMapper.writeValueAsString(body); + request.setEntity(new StringEntity(jsonBody, ContentType.APPLICATION_JSON)); + } catch (JsonProcessingException e) { + throw new RestClientException(-1, "Failed to serialize request body", e); + } + } + + int attempt = 0; + RestClientException lastException = null; + + while (attempt <= maxRetries) { + try { + return httpClient.execute( + request, + response -> { + int statusCode = response.getCode(); + String responseBody = + response.getEntity() != null ? EntityUtils.toString(response.getEntity()) : null; + + if (statusCode >= 200 && statusCode < 300) { + if (responseType == Void.class || responseBody == null || responseBody.isEmpty()) { + return null; + } + try { + return objectMapper.readValue(responseBody, responseType); + } catch (JsonProcessingException e) { + throw new RestClientException( + statusCode, "Failed to deserialize response: " + responseBody, e); + } + } else { + throw new RestClientException(statusCode, responseBody); + } + }); + } catch (RestClientException e) { + lastException = e; + if (e.getStatusCode() >= 400 && e.getStatusCode() < 500) { + throw e; + } + attempt++; + if (attempt <= maxRetries) { + long delay = retryDelayMs * (1L << (attempt - 1)); + LOG.warn( + "Request failed with status {}, retrying in {}ms (attempt {}/{})", + e.getStatusCode(), + delay, + attempt, + maxRetries); + try { + Thread.sleep(delay); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new RestClientException(-1, "Interrupted during retry", ie); + } + } + } catch (IOException e) { + lastException = new RestClientException(-1, "IO error: " + e.getMessage(), e); + attempt++; + if (attempt <= maxRetries) { + long delay = retryDelayMs * (1L << (attempt - 1)); + LOG.warn( + "Request failed with IO error, retrying in {}ms (attempt {}/{})", + delay, + attempt, + maxRetries); + try { + Thread.sleep(delay); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new RestClientException(-1, "Interrupted during retry", ie); + } + } + } + } + + throw lastException != null + ? lastException + : new RestClientException(-1, "Unknown error after retries"); + } + + @Override + public void close() throws IOException { + if (httpClient != null) { + httpClient.close(); + } + } + + public static class Builder { + private String baseUrl; + private final Map defaultHeaders = new HashMap<>(); + private int maxConnections = DEFAULT_MAX_CONNECTIONS; + private int maxConnectionsPerRoute = DEFAULT_MAX_CONNECTIONS_PER_ROUTE; + private int connectTimeoutMs = DEFAULT_CONNECT_TIMEOUT_MS; + private int readTimeoutMs = DEFAULT_READ_TIMEOUT_MS; + private int maxRetries = DEFAULT_MAX_RETRIES; + private long retryDelayMs = DEFAULT_RETRY_DELAY_MS; + private ObjectMapper objectMapper; + + public Builder baseUrl(String baseUrl) { + this.baseUrl = Objects.requireNonNull(baseUrl, "baseUrl cannot be null"); + return this; + } + + public Builder header(String name, String value) { + this.defaultHeaders.put(name, value); + return this; + } + + public Builder headers(Map headers) { + this.defaultHeaders.putAll(headers); + return this; + } + + public Builder authToken(String token) { + if (token != null && !token.isEmpty()) { + this.defaultHeaders.put("Authorization", "Bearer " + token); + } + return this; + } + + public Builder maxConnections(int maxConnections) { + this.maxConnections = maxConnections; + return this; + } + + public Builder maxConnectionsPerRoute(int maxConnectionsPerRoute) { + this.maxConnectionsPerRoute = maxConnectionsPerRoute; + return this; + } + + public Builder connectTimeout(int timeout, TimeUnit unit) { + this.connectTimeoutMs = (int) unit.toMillis(timeout); + return this; + } + + public Builder connectTimeoutMs(int connectTimeoutMs) { + this.connectTimeoutMs = connectTimeoutMs; + return this; + } + + public Builder readTimeout(int timeout, TimeUnit unit) { + this.readTimeoutMs = (int) unit.toMillis(timeout); + return this; + } + + public Builder readTimeoutMs(int readTimeoutMs) { + this.readTimeoutMs = readTimeoutMs; + return this; + } + + public Builder maxRetries(int maxRetries) { + this.maxRetries = maxRetries; + return this; + } + + public Builder retryDelay(long delay, TimeUnit unit) { + this.retryDelayMs = unit.toMillis(delay); + return this; + } + + public Builder retryDelayMs(long retryDelayMs) { + this.retryDelayMs = retryDelayMs; + return this; + } + + public Builder objectMapper(ObjectMapper objectMapper) { + this.objectMapper = objectMapper; + return this; + } + + public RestClient build() { + Objects.requireNonNull(baseUrl, "baseUrl is required"); + return new RestClient(this); + } + } +} diff --git a/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/rest/RestClientException.java b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/rest/RestClientException.java new file mode 100644 index 0000000..b976b1d --- /dev/null +++ b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/rest/RestClientException.java @@ -0,0 +1,72 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.rest; + +/** + * Exception thrown when a REST API call fails. + * + *

Contains the HTTP status code and response body for error diagnosis. + */ +public class RestClientException extends RuntimeException { + private final int statusCode; + private final String responseBody; + + public RestClientException(int statusCode, String responseBody) { + super(String.format("HTTP %d: %s", statusCode, responseBody)); + this.statusCode = statusCode; + this.responseBody = responseBody; + } + + public RestClientException(int statusCode, String message, Throwable cause) { + super(String.format("HTTP %d: %s", statusCode, message), cause); + this.statusCode = statusCode; + this.responseBody = message; + } + + public int getStatusCode() { + return statusCode; + } + + public String getResponseBody() { + return responseBody; + } + + public boolean isClientError() { + return statusCode >= 400 && statusCode < 500; + } + + public boolean isServerError() { + return statusCode >= 500; + } + + public boolean isNotFound() { + return statusCode == 404; + } + + public boolean isConflict() { + return statusCode == 409; + } + + public boolean isBadRequest() { + return statusCode == 400; + } + + public boolean isUnauthorized() { + return statusCode == 401; + } + + public boolean isForbidden() { + return statusCode == 403; + } +} diff --git a/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/test/TestHelper.java b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/test/TestHelper.java new file mode 100644 index 0000000..e4e7d75 --- /dev/null +++ b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/test/TestHelper.java @@ -0,0 +1,78 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.test; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; + +import java.io.ByteArrayOutputStream; +import java.nio.channels.Channels; +import java.util.Arrays; + +/** Test utilities for creating Arrow IPC data. */ +public final class TestHelper { + + private TestHelper() {} + + /** + * Creates test Arrow IPC data with a simple schema (id: int32, name: utf8). + * + * @param allocator Arrow buffer allocator + * @return Arrow IPC stream bytes + */ + public static byte[] createTestArrowData(BufferAllocator allocator) { + Schema schema = + new Schema( + Arrays.asList( + Field.nullable("id", new ArrowType.Int(32, true)), + Field.nullable("name", ArrowType.Utf8.INSTANCE))); + + return createArrowIpcStream(allocator, schema); + } + + /** + * Creates empty Arrow IPC data with a simple schema. + * + * @param allocator Arrow buffer allocator + * @return Arrow IPC stream bytes + */ + public static byte[] createEmptyArrowData(BufferAllocator allocator) { + return createTestArrowData(allocator); + } + + /** + * Creates Arrow IPC stream bytes from a schema. + * + * @param allocator Arrow buffer allocator + * @param schema Arrow schema + * @return Arrow IPC stream bytes + */ + public static byte[] createArrowIpcStream(BufferAllocator allocator, Schema schema) { + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); + ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))) { + writer.start(); + writer.end(); + } + return out.toByteArray(); + } catch (Exception e) { + throw new RuntimeException("Failed to create Arrow IPC stream", e); + } + } +} diff --git a/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/ObjectIdentifier.java b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/ObjectIdentifier.java new file mode 100644 index 0000000..6945fbd --- /dev/null +++ b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/ObjectIdentifier.java @@ -0,0 +1,140 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.util; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +/** + * Represents a hierarchical object identifier (namespace or table). + * + *

An identifier consists of one or more levels, where each level is a string. For example: + * + *

    + *
  • A root identifier has 0 levels + *
  • A catalog identifier has 1 level (e.g., ["my_catalog"]) + *
  • A database identifier has 2 levels (e.g., ["my_catalog", "my_database"]) + *
  • A table identifier has 3 levels (e.g., ["my_catalog", "my_database", "my_table"]) + *
+ */ +public class ObjectIdentifier { + private static final ObjectIdentifier ROOT = new ObjectIdentifier(Collections.emptyList()); + private final List levels; + + private ObjectIdentifier(List levels) { + this.levels = Collections.unmodifiableList(new ArrayList<>(levels)); + } + + public static ObjectIdentifier root() { + return ROOT; + } + + public static ObjectIdentifier of(List levels) { + if (levels == null || levels.isEmpty()) { + return ROOT; + } + return new ObjectIdentifier(levels); + } + + public static ObjectIdentifier of(Set levels) { + if (levels == null || levels.isEmpty()) { + return ROOT; + } + return new ObjectIdentifier(new ArrayList<>(levels)); + } + + public static ObjectIdentifier of(String... levels) { + if (levels == null || levels.length == 0) { + return ROOT; + } + return new ObjectIdentifier(Arrays.asList(levels)); + } + + public boolean isRoot() { + return levels.isEmpty(); + } + + public int levels() { + return levels.size(); + } + + public String levelAtListPos(int index) { + if (index < 0 || index >= levels.size()) { + throw new IndexOutOfBoundsException( + "Index " + index + " out of bounds for identifier with " + levels.size() + " levels"); + } + return levels.get(index); + } + + public List getLevels() { + return levels; + } + + public ObjectIdentifier parent() { + if (isRoot()) { + throw new IllegalStateException("Root identifier has no parent"); + } + return of(levels.subList(0, levels.size() - 1)); + } + + public String name() { + if (isRoot()) { + throw new IllegalStateException("Root identifier has no name"); + } + return levels.get(levels.size() - 1); + } + + public ObjectIdentifier child(String name) { + List newLevels = new ArrayList<>(levels); + newLevels.add(name); + return of(newLevels); + } + + @Override + public String toString() { + if (isRoot()) { + return "[]"; + } + return levels.toString(); + } + + public String toDelimited(String delimiter) { + return String.join(delimiter, levels); + } + + public List listStyleId() { + return levels; + } + + public String stringStyleId() { + return String.join(".", levels); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null || getClass() != obj.getClass()) return false; + ObjectIdentifier that = (ObjectIdentifier) obj; + return Objects.equals(levels, that.levels); + } + + @Override + public int hashCode() { + return Objects.hash(levels); + } +} diff --git a/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/ValidationUtil.java b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/ValidationUtil.java new file mode 100644 index 0000000..71cd20a --- /dev/null +++ b/java/lance-namespace-impls-core/src/main/java/org/lance/namespace/util/ValidationUtil.java @@ -0,0 +1,46 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.lance.namespace.util; + +import org.lance.namespace.errors.InvalidInputException; + +/** Utility methods for validation. */ +public final class ValidationUtil { + + private ValidationUtil() {} + + public static void checkArgument(boolean condition, String message, Object... args) { + if (!condition) { + throw new InvalidInputException(String.format(message, args)); + } + } + + public static void checkNotNull(Object reference, String message, Object... args) { + if (reference == null) { + throw new InvalidInputException(String.format(message, args)); + } + } + + public static void checkNotEmpty(String value, String message, Object... args) { + if (value == null || value.isEmpty()) { + throw new InvalidInputException(String.format(message, args)); + } + } + + public static void checkState(boolean condition, String message, Object... args) { + if (!condition) { + throw new IllegalStateException(String.format(message, args)); + } + } +} diff --git a/java/lance-namespace-polaris/pom.xml b/java/lance-namespace-polaris/pom.xml index 2c0f5cf..cf71b17 100644 --- a/java/lance-namespace-polaris/pom.xml +++ b/java/lance-namespace-polaris/pom.xml @@ -31,10 +31,18 @@ Polaris Catalog implementation for Lance namespace management + + org.lance + lance-namespace-impls-core + org.lance lance-core + + org.lance + lance-namespace-core + org.lance lance-namespace-apache-client diff --git a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisModels.java b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisModels.java index 24fcbbe..595da3c 100644 --- a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisModels.java +++ b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisModels.java @@ -180,21 +180,21 @@ public void setTable(GenericTable table) { /** Table identifier. */ @JsonIgnoreProperties(ignoreUnknown = true) public static class TableIdentifier { - private String namespace; + private List namespace; private String name; public TableIdentifier() {} - public TableIdentifier(String namespace, String name) { + public TableIdentifier(List namespace, String name) { this.namespace = namespace; this.name = name; } - public String getNamespace() { + public List getNamespace() { return namespace; } - public void setNamespace(String namespace) { + public void setNamespace(List namespace) { this.namespace = namespace; } @@ -339,11 +339,11 @@ public static class ListNamespacesResponse { @JsonProperty("next-page-token") private String nextPageToken; - private List namespaces; + private List> namespaces; public ListNamespacesResponse() {} - public ListNamespacesResponse(String nextPageToken, List namespaces) { + public ListNamespacesResponse(String nextPageToken, List> namespaces) { this.nextPageToken = nextPageToken; this.namespaces = namespaces; } @@ -356,32 +356,13 @@ public void setNextPageToken(String nextPageToken) { this.nextPageToken = nextPageToken; } - public List getNamespaces() { + public List> getNamespaces() { return namespaces; } - public void setNamespaces(List namespaces) { + public void setNamespaces(List> namespaces) { this.namespaces = namespaces; } - - @JsonIgnoreProperties(ignoreUnknown = true) - public static class Namespace { - private List namespace; - - public Namespace() {} - - public Namespace(List namespace) { - this.namespace = namespace; - } - - public List getNamespace() { - return namespace; - } - - public void setNamespace(List namespace) { - this.namespace = namespace; - } - } } /** Create namespace request. */ diff --git a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java index e5ef4f3..b70461a 100644 --- a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java +++ b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespace.java @@ -13,23 +13,25 @@ */ package org.lance.namespace.polaris; -import com.lancedb.lance.Dataset; -import com.lancedb.lance.WriteParams; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.ObjectIdentifier; +import org.lance.namespace.errors.InternalException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.NamespaceAlreadyExistsException; +import org.lance.namespace.errors.NamespaceNotFoundException; +import org.lance.namespace.errors.TableAlreadyExistsException; +import org.lance.namespace.errors.TableNotFoundException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; +import org.lance.namespace.model.DeregisterTableRequest; +import org.lance.namespace.model.DeregisterTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -37,13 +39,15 @@ import org.lance.namespace.model.NamespaceExistsRequest; import org.lance.namespace.model.TableExistsRequest; import org.lance.namespace.rest.RestClient; +import org.lance.namespace.rest.RestClientException; +import org.lance.namespace.util.ObjectIdentifier; import org.lance.namespace.util.ValidationUtil; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.Schema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.Closeable; import java.io.IOException; import java.util.Collections; import java.util.HashMap; @@ -51,9 +55,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; /** Polaris Catalog namespace implementation for Lance. */ -public class PolarisNamespace implements LanceNamespace { +public class PolarisNamespace implements LanceNamespace, Closeable { private static final Logger LOG = LoggerFactory.getLogger(PolarisNamespace.class); private static final String TABLE_FORMAT_LANCE = "lance"; private static final String TABLE_TYPE_KEY = "table_type"; @@ -69,19 +74,15 @@ public void initialize(Map configProperties, BufferAllocator all this.allocator = allocator; this.config = new PolarisNamespaceConfig(configProperties); - // Build REST client with authentication if provided RestClient.Builder clientBuilder = RestClient.builder() .baseUrl(config.getFullApiUrl()) - .connectTimeout(config.getConnectTimeout()) - .readTimeout(config.getReadTimeout()) + .connectTimeout(config.getConnectTimeout(), TimeUnit.MILLISECONDS) + .readTimeout(config.getReadTimeout(), TimeUnit.MILLISECONDS) .maxRetries(config.getMaxRetries()); - // Add auth token if provided if (config.getAuthToken() != null) { - Map headers = new HashMap<>(); - headers.put("Authorization", "Bearer " + config.getAuthToken()); - clientBuilder.defaultHeaders(headers); + clientBuilder.authToken(config.getAuthToken()); } this.restClient = clientBuilder.build(); @@ -98,27 +99,33 @@ public String namespaceId() { public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { ObjectIdentifier namespaceId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - namespaceId.levels() >= 1, "Namespace must have at least one level"); + namespaceId.levels() >= 2, "Namespace must have at least catalog and namespace levels"); try { - // Convert request to Polaris format - List namespace = namespaceId.listStyleId(); + List parts = namespaceId.listStyleId(); + String catalog = parts.get(0); + List namespace = parts.subList(1, parts.size()); PolarisModels.CreateNamespaceRequest polarisRequest = new PolarisModels.CreateNamespaceRequest(namespace, request.getProperties()); - // Create namespace using Iceberg REST API endpoint PolarisModels.NamespaceResponse response = - restClient.post("/namespaces", polarisRequest, PolarisModels.NamespaceResponse.class); + restClient.post( + "/v1/" + catalog + "/namespaces", + polarisRequest, + PolarisModels.NamespaceResponse.class); - LOG.info("Created namespace: {}", String.join(".", namespace)); + LOG.info("Created namespace: {}.{}", catalog, String.join(".", namespace)); CreateNamespaceResponse result = new CreateNamespaceResponse(); result.setProperties(response.getProperties()); return result; - } catch (IOException e) { - throw LanceNamespaceException.serverError( - "Failed to create namespace", "ServerError", namespaceId.stringStyleId(), e.getMessage()); + } catch (RestClientException e) { + if (e.isConflict()) { + throw new NamespaceAlreadyExistsException( + "Namespace already exists: " + namespaceId.stringStyleId()); + } + throw new InternalException("Failed to create namespace: " + e.getMessage()); } } @@ -126,31 +133,27 @@ public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest request) { ObjectIdentifier namespaceId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - namespaceId.levels() >= 1, "Namespace must have at least one level"); + namespaceId.levels() >= 2, "Namespace must have at least catalog and namespace levels"); try { - String namespacePath = namespaceId.stringStyleId(); + List parts = namespaceId.listStyleId(); + String catalog = parts.get(0); + List namespaceParts = parts.subList(1, parts.size()); + String namespacePath = String.join(".", namespaceParts); - // Get namespace properties using Iceberg REST API PolarisModels.NamespaceResponse response = - restClient.get("/namespaces/" + namespacePath, PolarisModels.NamespaceResponse.class); + restClient.get( + "/v1/" + catalog + "/namespaces/" + namespacePath, + PolarisModels.NamespaceResponse.class); DescribeNamespaceResponse result = new DescribeNamespaceResponse(); result.setProperties(response.getProperties()); return result; - } catch (IOException e) { - if (e.getMessage() != null && e.getMessage().contains("404")) { - throw LanceNamespaceException.notFound( - "Namespace not found", - "NoSuchNamespace", - namespaceId.stringStyleId(), - "Namespace not found: " + namespaceId.stringStyleId()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new NamespaceNotFoundException("Namespace not found: " + namespaceId.stringStyleId()); } - throw LanceNamespaceException.serverError( - "Failed to describe namespace", - "ServerError", - namespaceId.stringStyleId(), - e.getMessage()); + throw new InternalException("Failed to describe namespace: " + e.getMessage()); } } @@ -161,59 +164,61 @@ public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { ? ObjectIdentifier.of(request.getId()) : ObjectIdentifier.of(Collections.emptyList()); + ValidationUtil.checkArgument(parentId.levels() >= 1, "Must specify at least the catalog"); + try { - String path = "/namespaces"; - if (!parentId.isRoot()) { - path += "/" + parentId.stringStyleId() + "/namespaces"; + List parts = parentId.listStyleId(); + String catalog = parts.get(0); + String path = "/v1/" + catalog + "/namespaces"; + if (parts.size() > 1) { + List namespaceParts = parts.subList(1, parts.size()); + path = "/v1/" + catalog + "/namespaces/" + String.join(".", namespaceParts) + "/namespaces"; } - // List namespaces using Iceberg REST API PolarisModels.ListNamespacesResponse response = restClient.get(path, PolarisModels.ListNamespacesResponse.class); ListNamespacesResponse result = new ListNamespacesResponse(); - // Convert namespace identifiers to Set with full paths Set namespaceSet = new LinkedHashSet<>(); if (response.getNamespaces() != null) { - for (PolarisModels.ListNamespacesResponse.Namespace ns : response.getNamespaces()) { - namespaceSet.add(String.join(".", ns.getNamespace())); + for (List ns : response.getNamespaces()) { + List fullNs = new java.util.ArrayList<>(); + fullNs.add(catalog); + fullNs.addAll(ns); + namespaceSet.add(String.join(".", fullNs)); } } result.setNamespaces(namespaceSet); return result; - } catch (IOException e) { - throw LanceNamespaceException.serverError( - "Failed to list namespaces", "ServerError", "listNamespaces", e.getMessage()); + } catch (RestClientException e) { + throw new InternalException("Failed to list namespaces: " + e.getMessage()); } } @Override public DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { + if ("Cascade".equalsIgnoreCase(request.getBehavior())) { + throw new InvalidInputException("Cascade behavior is not supported for this implementation"); + } + ObjectIdentifier namespaceId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - namespaceId.levels() >= 1, "Namespace must have at least one level"); + namespaceId.levels() >= 2, "Namespace must have at least catalog and namespace levels"); try { - String namespacePath = namespaceId.stringStyleId(); - - // Drop namespace using Iceberg REST API - restClient.delete("/namespaces/" + namespacePath); - - LOG.info("Dropped namespace: {}", namespacePath); + List parts = namespaceId.listStyleId(); + String catalog = parts.get(0); + List namespaceParts = parts.subList(1, parts.size()); + String namespacePath = String.join(".", namespaceParts); - DropNamespaceResponse result = new DropNamespaceResponse(); - // DropNamespaceResponse has no fields to set - return result; - } catch (IOException e) { - if (e.getMessage() != null && e.getMessage().contains("404")) { - throw LanceNamespaceException.notFound( - "Namespace not found", - "NoSuchNamespace", - namespaceId.stringStyleId(), - "Namespace not found: " + namespaceId.stringStyleId()); + restClient.delete("/v1/" + catalog + "/namespaces/" + namespacePath); + LOG.info("Dropped namespace: {}.{}", catalog, namespacePath); + return new DropNamespaceResponse(); + } catch (RestClientException e) { + if (e.isNotFound()) { + return new DropNamespaceResponse(); } - throw LanceNamespaceException.serverError( - "Failed to drop namespace", "ServerError", namespaceId.stringStyleId(), e.getMessage()); + throw new InternalException("Failed to drop namespace: " + e.getMessage()); } } @@ -221,26 +226,21 @@ public DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { public void namespaceExists(NamespaceExistsRequest request) { ObjectIdentifier namespaceId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - namespaceId.levels() >= 1, "Namespace must have at least one level"); + namespaceId.levels() >= 2, "Namespace must have at least catalog and namespace levels"); try { - String namespacePath = namespaceId.stringStyleId(); - // Use GET request to check if namespace exists - restClient.get("/namespaces/" + namespacePath, PolarisModels.NamespaceResponse.class); - // If we get here, namespace exists - return normally - } catch (IOException e) { - if (e.getMessage() != null && e.getMessage().contains("404")) { - throw LanceNamespaceException.notFound( - "Namespace not found", - "NoSuchNamespace", - namespaceId.stringStyleId(), - "Namespace not found: " + namespaceId.stringStyleId()); + List parts = namespaceId.listStyleId(); + String catalog = parts.get(0); + List namespaceParts = parts.subList(1, parts.size()); + String namespacePath = String.join(".", namespaceParts); + + restClient.get( + "/v1/" + catalog + "/namespaces/" + namespacePath, PolarisModels.NamespaceResponse.class); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new NamespaceNotFoundException("Namespace not found: " + namespaceId.stringStyleId()); } - throw LanceNamespaceException.serverError( - "Failed to check namespace existence", - "ServerError", - namespaceId.stringStyleId(), - e.getMessage()); + throw new InternalException("Failed to check namespace existence: " + e.getMessage()); } } @@ -248,33 +248,28 @@ public void namespaceExists(NamespaceExistsRequest request) { public void tableExists(TableExistsRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - tableId.levels() >= 2, "Table identifier must have at least 2 levels"); + tableId.levels() >= 3, "Table identifier must have catalog, namespace, and table name"); try { - // Split into namespace and table name List parts = tableId.listStyleId(); + String catalog = parts.get(0); String tableName = parts.get(parts.size() - 1); - List namespaceParts = parts.subList(0, parts.size() - 1); + List namespaceParts = parts.subList(1, parts.size() - 1); String namespacePath = String.join(".", namespaceParts); - // Use GET request to check if table exists restClient.get( - "/namespaces/" + namespacePath + "/generic-tables/" + tableName, + "/polaris/v1/" + + catalog + + "/namespaces/" + + namespacePath + + "/generic-tables/" + + tableName, PolarisModels.LoadGenericTableResponse.class); - // If we get here, table exists - return normally - } catch (IOException e) { - if (e.getMessage() != null && e.getMessage().contains("404")) { - throw LanceNamespaceException.notFound( - "Table not found", - "NoSuchTable", - tableId.stringStyleId(), - "Table not found: " + tableId.stringStyleId()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new TableNotFoundException("Table not found: " + tableId.stringStyleId()); } - throw LanceNamespaceException.serverError( - "Failed to check table existence", - "ServerError", - tableId.stringStyleId(), - e.getMessage()); + throw new InternalException("Failed to check table existence: " + e.getMessage()); } } @@ -282,93 +277,74 @@ public void tableExists(TableExistsRequest request) { public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - tableId.levels() >= 2, "Table identifier must have at least 2 levels"); + tableId.levels() >= 3, "Table identifier must have catalog, namespace, and table name"); try { - // Split into namespace and table name List parts = tableId.listStyleId(); + String catalog = parts.get(0); String tableName = parts.get(parts.size() - 1); - List namespaceParts = parts.subList(0, parts.size() - 1); + List namespaceParts = parts.subList(1, parts.size() - 1); String namespacePath = String.join(".", namespaceParts); - // Prepare table properties Map properties = new HashMap<>(); - String comment = null; - if (request.getProperties() != null) { - properties.putAll(request.getProperties()); - // Extract comment to use as doc field - comment = properties.remove("comment"); - } - - // Add Lance-specific properties properties.put(TABLE_TYPE_KEY, TABLE_FORMAT_LANCE); + String comment = null; - // Create generic table request PolarisModels.CreateGenericTableRequest tableRequest = new PolarisModels.CreateGenericTableRequest( - tableName, - TABLE_FORMAT_LANCE, - request.getLocation(), // location from request - comment, // doc field from comment property - properties); + tableName, TABLE_FORMAT_LANCE, request.getLocation(), comment, properties); - // Create table using Generic Table API PolarisModels.LoadGenericTableResponse response = restClient.post( - "/namespaces/" + namespacePath + "/generic-tables", + "/polaris/v1/" + catalog + "/namespaces/" + namespacePath + "/generic-tables", tableRequest, PolarisModels.LoadGenericTableResponse.class); - LOG.info("Created Lance table: {}.{}", namespacePath, tableName); + LOG.info("Created Lance table: {}.{}.{}", catalog, namespacePath, tableName); CreateEmptyTableResponse result = new CreateEmptyTableResponse(); result.setLocation(response.getTable().getBaseLocation()); - Map resultProps = new HashMap<>(response.getTable().getProperties()); - if (response.getTable().getDoc() != null) { - resultProps.put("comment", response.getTable().getDoc()); - } - result.setProperties(resultProps); return result; - } catch (IOException e) { - if (e.getMessage() != null && e.getMessage().contains("409")) { - throw LanceNamespaceException.conflict( - "Table already exists", - "TableAlreadyExists", - tableId.stringStyleId(), - "Table already exists: " + tableId.stringStyleId()); + } catch (RestClientException e) { + if (e.isConflict()) { + throw new TableAlreadyExistsException("Table already exists: " + tableId.stringStyleId()); } - throw LanceNamespaceException.serverError( - "Failed to create table", "ServerError", tableId.stringStyleId(), e.getMessage()); + throw new InternalException("Failed to create table: " + e.getMessage()); } } @Override public DescribeTableResponse describeTable(DescribeTableRequest request) { + if (Boolean.TRUE.equals(request.getLoadDetailedMetadata())) { + throw new InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation"); + } + ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - tableId.levels() >= 2, "Table identifier must have at least 2 levels"); + tableId.levels() >= 3, "Table identifier must have catalog, namespace, and table name"); try { - // Split into namespace and table name List parts = tableId.listStyleId(); + String catalog = parts.get(0); String tableName = parts.get(parts.size() - 1); - List namespaceParts = parts.subList(0, parts.size() - 1); + List namespaceParts = parts.subList(1, parts.size() - 1); String namespacePath = String.join(".", namespaceParts); - // Get table using Generic Table API PolarisModels.LoadGenericTableResponse response = restClient.get( - "/namespaces/" + namespacePath + "/generic-tables/" + tableName, + "/polaris/v1/" + + catalog + + "/namespaces/" + + namespacePath + + "/generic-tables/" + + tableName, PolarisModels.LoadGenericTableResponse.class); PolarisModels.GenericTable table = response.getTable(); - // Verify it's a Lance table if (!TABLE_FORMAT_LANCE.equals(table.getFormat())) { - throw LanceNamespaceException.badRequest( - "Invalid table format", - "InvalidTableFormat", - tableId.stringStyleId(), + throw new InvalidInputException( String.format( "Table %s is not a Lance table (format: %s)", tableId.stringStyleId(), table.getFormat())); @@ -376,25 +352,13 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { DescribeTableResponse result = new DescribeTableResponse(); result.setLocation(table.getBaseLocation()); - Map resultProps = new HashMap<>(); - if (table.getProperties() != null) { - resultProps.putAll(table.getProperties()); - } - if (table.getDoc() != null) { - resultProps.put("comment", table.getDoc()); - } - result.setProperties(resultProps); + result.setStorageOptions(table.getProperties()); return result; - } catch (IOException e) { - if (e.getMessage() != null && e.getMessage().contains("404")) { - throw LanceNamespaceException.notFound( - "Table not found", - "NoSuchTable", - tableId.stringStyleId(), - "Table not found: " + tableId.stringStyleId()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new TableNotFoundException("Table not found: " + tableId.stringStyleId()); } - throw LanceNamespaceException.serverError( - "Failed to describe table", "ServerError", tableId.stringStyleId(), e.getMessage()); + throw new InternalException("Failed to describe table: " + e.getMessage()); } } @@ -402,19 +366,20 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { public ListTablesResponse listTables(ListTablesRequest request) { ObjectIdentifier namespaceId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - namespaceId.levels() >= 1, "Namespace must have at least one level"); + namespaceId.levels() >= 2, "Must specify at least catalog and namespace"); try { - String namespacePath = namespaceId.stringStyleId(); + List parts = namespaceId.listStyleId(); + String catalog = parts.get(0); + List namespaceParts = parts.subList(1, parts.size()); + String namespacePath = String.join(".", namespaceParts); - // List tables using Generic Table API PolarisModels.ListGenericTablesResponse response = restClient.get( - "/namespaces/" + namespacePath + "/generic-tables", + "/polaris/v1/" + catalog + "/namespaces/" + namespacePath + "/generic-tables", PolarisModels.ListGenericTablesResponse.class); ListTablesResponse result = new ListTablesResponse(); - // Convert table identifiers to table names only Set tableNames = new LinkedHashSet<>(); if (response.getIdentifiers() != null) { for (PolarisModels.TableIdentifier id : response.getIdentifiers()) { @@ -423,86 +388,61 @@ public ListTablesResponse listTables(ListTablesRequest request) { } result.setTables(tableNames); return result; - } catch (IOException e) { - if (e.getMessage() != null && e.getMessage().contains("404")) { - throw LanceNamespaceException.notFound( - "Namespace not found", - "NoSuchNamespace", - namespaceId.stringStyleId(), - "Namespace not found: " + namespaceId.stringStyleId()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new NamespaceNotFoundException("Namespace not found: " + namespaceId.stringStyleId()); } - throw LanceNamespaceException.serverError( - "Failed to list tables", "ServerError", namespaceId.stringStyleId(), e.getMessage()); + throw new InternalException("Failed to list tables: " + e.getMessage()); } } @Override - public DropTableResponse dropTable(DropTableRequest request) { + public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( - tableId.levels() >= 2, "Table identifier must have at least 2 levels"); + tableId.levels() >= 3, "Table identifier must have catalog, namespace, and table name"); try { - // Split into namespace and table name List parts = tableId.listStyleId(); + String catalog = parts.get(0); String tableName = parts.get(parts.size() - 1); - List namespaceParts = parts.subList(0, parts.size() - 1); + List namespaceParts = parts.subList(1, parts.size() - 1); String namespacePath = String.join(".", namespaceParts); - // Drop table using Generic Table API - restClient.delete("/namespaces/" + namespacePath + "/generic-tables/" + tableName); - - LOG.info("Dropped table: {}.{}", namespacePath, tableName); + PolarisModels.LoadGenericTableResponse getResponse = + restClient.get( + "/polaris/v1/" + + catalog + + "/namespaces/" + + namespacePath + + "/generic-tables/" + + tableName, + PolarisModels.LoadGenericTableResponse.class); - DropTableResponse result = new DropTableResponse(); - // DropTableResponse has no fields to set based on the model + String location = getResponse.getTable().getBaseLocation(); + restClient.delete( + "/polaris/v1/" + + catalog + + "/namespaces/" + + namespacePath + + "/generic-tables/" + + tableName); + + DeregisterTableResponse result = new DeregisterTableResponse(); + result.setLocation(location); return result; - } catch (IOException e) { - if (e.getMessage() != null && e.getMessage().contains("404")) { - throw LanceNamespaceException.notFound( - "Table not found", - "NoSuchTable", - tableId.stringStyleId(), - "Table not found: " + tableId.stringStyleId()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new TableNotFoundException("Table not found: " + tableId.stringStyleId()); } - throw LanceNamespaceException.serverError( - "Failed to drop table", "ServerError", tableId.stringStyleId(), e.getMessage()); - } - } - - // These methods are not part of the LanceNamespace interface - // They were removed as they don't exist in the interface - private Dataset openTableInternal(String location, Schema schema) { - try { - return Dataset.open(location, allocator); - } catch (Exception e) { - throw LanceNamespaceException.serverError( - "Failed to open Lance table", - "DatasetError", - location, - "Failed to open Lance table at: " + location + ": " + e.getMessage()); + throw new InternalException("Failed to deregister table: " + e.getMessage()); } } - private Dataset createTableInternal(String location, Schema schema, WriteParams params) { - try { - return Dataset.create(allocator, location, schema, params); - } catch (Exception e) { - throw LanceNamespaceException.serverError( - "Failed to create Lance table", - "DatasetError", - location, - "Failed to create Lance table at: " + location + ": " + e.getMessage()); - } - } - - public void close() { + @Override + public void close() throws IOException { if (restClient != null) { - try { - restClient.close(); - } catch (IOException e) { - LOG.warn("Failed to close REST client", e); - } + restClient.close(); } } } diff --git a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespaceConfig.java b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespaceConfig.java index 8329b39..26fa519 100644 --- a/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespaceConfig.java +++ b/java/lance-namespace-polaris/src/main/java/org/lance/namespace/polaris/PolarisNamespaceConfig.java @@ -13,7 +13,7 @@ */ package org.lance.namespace.polaris; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.InvalidInputException; import java.util.Map; @@ -55,10 +55,7 @@ public PolarisNamespaceConfig(Map properties) { private String getRequiredProperty(Map properties, String key) { String value = properties.get(key); if (value == null || value.trim().isEmpty()) { - throw LanceNamespaceException.badRequest( - "Missing required configuration", - "ConfigurationError", - key, + throw new InvalidInputException( String.format("Required configuration property '%s' is not set", key)); } return value.trim(); @@ -66,35 +63,20 @@ private String getRequiredProperty(Map properties, String key) { private void validateConfig() { if (!endpoint.startsWith("http://") && !endpoint.startsWith("https://")) { - throw LanceNamespaceException.badRequest( - "Invalid endpoint format", - "ConfigurationError", - POLARIS_ENDPOINT, + throw new InvalidInputException( "Polaris endpoint must start with http:// or https://: " + endpoint); } if (connectTimeout <= 0) { - throw LanceNamespaceException.badRequest( - "Invalid timeout value", - "ConfigurationError", - POLARIS_CONNECT_TIMEOUT, - "Connect timeout must be positive: " + connectTimeout); + throw new InvalidInputException("Connect timeout must be positive: " + connectTimeout); } if (readTimeout <= 0) { - throw LanceNamespaceException.badRequest( - "Invalid timeout value", - "ConfigurationError", - POLARIS_READ_TIMEOUT, - "Read timeout must be positive: " + readTimeout); + throw new InvalidInputException("Read timeout must be positive: " + readTimeout); } if (maxRetries < 0) { - throw LanceNamespaceException.badRequest( - "Invalid retry value", - "ConfigurationError", - POLARIS_MAX_RETRIES, - "Max retries cannot be negative: " + maxRetries); + throw new InvalidInputException("Max retries cannot be negative: " + maxRetries); } } @@ -118,10 +100,10 @@ public int getMaxRetries() { return maxRetries; } - /** Get the full API URL for Polaris catalog operations. Format: {endpoint}/api/catalog/v1 */ + /** Get the full API URL for Polaris catalog operations. Format: {endpoint}/api/catalog */ public String getFullApiUrl() { String baseUrl = endpoint.endsWith("/") ? endpoint.substring(0, endpoint.length() - 1) : endpoint; - return baseUrl + "/api/catalog/v1"; + return baseUrl + "/api/catalog"; } } diff --git a/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java b/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java index d45dc78..55d7622 100644 --- a/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java +++ b/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespace.java @@ -13,19 +13,19 @@ */ package org.lance.namespace.polaris; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.LanceNamespaceException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; +import org.lance.namespace.model.DeregisterTableRequest; +import org.lance.namespace.model.DeregisterTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -47,6 +47,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; @@ -116,7 +117,7 @@ public void testCreateNamespace() throws IOException { mockResponse.setProperties(Collections.singletonMap("key", "value")); when(restClient.post( - eq("/namespaces"), + eq("/v1/test_catalog/namespaces"), any(PolarisModels.CreateNamespaceRequest.class), eq(PolarisModels.NamespaceResponse.class))) .thenReturn(mockResponse); @@ -138,7 +139,7 @@ public void testDescribeNamespace() throws IOException { mockResponse.setProperties(Collections.singletonMap("description", "test schema")); when(restClient.get( - eq("/namespaces/test_catalog.schema1"), eq(PolarisModels.NamespaceResponse.class))) + eq("/v1/test_catalog/namespaces/schema1"), eq(PolarisModels.NamespaceResponse.class))) .thenReturn(mockResponse); DescribeNamespaceRequest request = new DescribeNamespaceRequest(); @@ -153,18 +154,16 @@ public void testDescribeNamespace() throws IOException { @Test public void testListNamespaces() throws IOException { PolarisModels.ListNamespacesResponse mockResponse = new PolarisModels.ListNamespacesResponse(); - PolarisModels.ListNamespacesResponse.Namespace ns1 = - new PolarisModels.ListNamespacesResponse.Namespace(); - ns1.setNamespace(Arrays.asList("test_catalog", "schema1")); - PolarisModels.ListNamespacesResponse.Namespace ns2 = - new PolarisModels.ListNamespacesResponse.Namespace(); - ns2.setNamespace(Arrays.asList("test_catalog", "schema2")); - mockResponse.setNamespaces(Arrays.asList(ns1, ns2)); - - when(restClient.get(eq("/namespaces"), eq(PolarisModels.ListNamespacesResponse.class))) + List> namespaces = + Arrays.asList(Arrays.asList("schema1"), Arrays.asList("schema2")); + mockResponse.setNamespaces(namespaces); + + when(restClient.get( + eq("/v1/test_catalog/namespaces"), eq(PolarisModels.ListNamespacesResponse.class))) .thenReturn(mockResponse); ListNamespacesRequest request = new ListNamespacesRequest(); + request.setId(Collections.singletonList("test_catalog")); ListNamespacesResponse response = namespace.listNamespaces(request); @@ -179,14 +178,14 @@ public void testDropNamespace() throws IOException { DropNamespaceResponse response = namespace.dropNamespace(request); - verify(restClient).delete("/namespaces/test_catalog.schema1"); + verify(restClient).delete("/v1/test_catalog/namespaces/schema1"); // Response doesn't have getId() method, just verify the delete was called } @Test public void testNamespaceExists() throws IOException { when(restClient.get( - eq("/namespaces/test_catalog.schema1"), eq(PolarisModels.NamespaceResponse.class))) + eq("/v1/test_catalog/namespaces/schema1"), eq(PolarisModels.NamespaceResponse.class))) .thenReturn(new PolarisModels.NamespaceResponse()); NamespaceExistsRequest request = new NamespaceExistsRequest(); @@ -199,8 +198,8 @@ public void testNamespaceExists() throws IOException { @Test public void testNamespaceNotExists() throws IOException { when(restClient.get( - eq("/namespaces/test_catalog.schema1"), eq(PolarisModels.NamespaceResponse.class))) - .thenThrow(new IOException("404 Not Found")); + eq("/v1/test_catalog/namespaces/schema1"), eq(PolarisModels.NamespaceResponse.class))) + .thenThrow(new org.lance.namespace.rest.RestClientException(404, "Not Found")); NamespaceExistsRequest request = new NamespaceExistsRequest(); request.setId(Arrays.asList("test_catalog", "schema1")); @@ -226,7 +225,7 @@ public void testCreateEmptyTable() throws IOException { mockResponse.setTable(mockTable); when(restClient.post( - eq("/namespaces/test_catalog.schema1/generic-tables"), + eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables"), any(PolarisModels.CreateGenericTableRequest.class), eq(PolarisModels.LoadGenericTableResponse.class))) .thenReturn(mockResponse); @@ -234,13 +233,10 @@ public void testCreateEmptyTable() throws IOException { CreateEmptyTableRequest request = new CreateEmptyTableRequest(); request.setId(Arrays.asList("test_catalog", "schema1", "test_table")); request.setLocation("s3://bucket/path/to/table"); - request.setProperties(Collections.singletonMap("comment", "Test table")); CreateEmptyTableResponse response = namespace.createEmptyTable(request); assertThat(response.getLocation()).isEqualTo("s3://bucket/path/to/table"); - assertThat(response.getProperties()).containsEntry("managed_by", "storage"); - assertThat(response.getProperties()).containsEntry("comment", "Test table"); } @Test @@ -259,7 +255,7 @@ public void testDescribeTable() throws IOException { mockResponse.setTable(mockTable); when(restClient.get( - eq("/namespaces/test_catalog.schema1/generic-tables/test_table"), + eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables/test_table"), eq(PolarisModels.LoadGenericTableResponse.class))) .thenReturn(mockResponse); @@ -269,8 +265,6 @@ public void testDescribeTable() throws IOException { DescribeTableResponse response = namespace.describeTable(request); assertThat(response.getLocation()).isEqualTo("s3://bucket/path/to/table"); - assertThat(response.getProperties()).containsEntry("comment", "Test table"); - assertThat(response.getProperties()).containsEntry("managed_by", "storage"); } @Test @@ -285,7 +279,7 @@ public void testDescribeTableNotLanceFormat() throws IOException { mockResponse.setTable(mockTable); when(restClient.get( - eq("/namespaces/test_catalog.schema1/generic-tables/test_table"), + eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables/test_table"), eq(PolarisModels.LoadGenericTableResponse.class))) .thenReturn(mockResponse); @@ -300,11 +294,11 @@ public void testDescribeTableNotLanceFormat() throws IOException { @Test public void testListTables() throws IOException { PolarisModels.TableIdentifier id1 = new PolarisModels.TableIdentifier(); - id1.setNamespace("test_catalog.schema1"); + id1.setNamespace(Collections.singletonList("schema1")); id1.setName("table1"); PolarisModels.TableIdentifier id2 = new PolarisModels.TableIdentifier(); - id2.setNamespace("test_catalog.schema1"); + id2.setNamespace(Collections.singletonList("schema1")); id2.setName("table2"); PolarisModels.ListGenericTablesResponse mockResponse = @@ -312,7 +306,7 @@ public void testListTables() throws IOException { mockResponse.setIdentifiers(Arrays.asList(id1, id2)); when(restClient.get( - eq("/namespaces/test_catalog.schema1/generic-tables"), + eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables"), eq(PolarisModels.ListGenericTablesResponse.class))) .thenReturn(mockResponse); @@ -326,13 +320,28 @@ public void testListTables() throws IOException { } @Test - public void testDropTable() throws IOException { - DropTableRequest request = new DropTableRequest(); + public void testDeregisterTable() throws IOException { + PolarisModels.GenericTable mockTable = new PolarisModels.GenericTable(); + mockTable.setName("test_table"); + mockTable.setFormat("lance"); + mockTable.setBaseLocation("s3://bucket/path/to/table"); + + PolarisModels.LoadGenericTableResponse mockResponse = + new PolarisModels.LoadGenericTableResponse(); + mockResponse.setTable(mockTable); + + when(restClient.get( + eq("/polaris/v1/test_catalog/namespaces/schema1/generic-tables/test_table"), + eq(PolarisModels.LoadGenericTableResponse.class))) + .thenReturn(mockResponse); + + DeregisterTableRequest request = new DeregisterTableRequest(); request.setId(Arrays.asList("test_catalog", "schema1", "test_table")); - DropTableResponse response = namespace.dropTable(request); + DeregisterTableResponse response = namespace.deregisterTable(request); - verify(restClient).delete("/namespaces/test_catalog.schema1/generic-tables/test_table"); - // Response doesn't have getId() method, just verify the delete was called + assertThat(response.getLocation()).isEqualTo("s3://bucket/path/to/table"); + verify(restClient) + .delete("/polaris/v1/test_catalog/namespaces/schema1/generic-tables/test_table"); } } diff --git a/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespaceIntegration.java b/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespaceIntegration.java index 06a3815..e7a4804 100644 --- a/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespaceIntegration.java +++ b/java/lance-namespace-polaris/src/test/java/org/lance/namespace/polaris/TestPolarisNamespaceIntegration.java @@ -13,17 +13,17 @@ */ package org.lance.namespace.polaris; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.LanceNamespaceException; +import org.lance.namespace.model.CreateEmptyTableRequest; +import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; +import org.lance.namespace.model.DeregisterTableRequest; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; -import org.lance.namespace.model.DropTableRequest; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -74,8 +74,8 @@ public class TestPolarisNamespaceIntegration { public static void checkPolarisAvailable() { try { // Try to check if Polaris API is available by checking a known endpoint - // We'll try the namespaces endpoint which should exist - URL url = new URL(POLARIS_ENDPOINT + "/api/catalog/v1/namespaces"); + // We'll try the namespaces endpoint with the test catalog warehouse + URL url = new URL(POLARIS_ENDPOINT + "/api/catalog/v1/test_catalog/namespaces"); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setConnectTimeout(1000); @@ -224,9 +224,9 @@ public void testNamespaceOperations() { // List namespaces ListNamespacesRequest listRequest = new ListNamespacesRequest(); + listRequest.setId(Collections.singletonList(testCatalog)); ListNamespacesResponse listResponse = namespace.listNamespaces(listRequest); - assertThat(listResponse.getNamespaces()) - .anyMatch(ns -> ns.equals(Arrays.asList(testCatalog, testNamespace))); + assertThat(listResponse.getNamespaces()).contains(testCatalog + "." + testNamespace); // Drop namespace DropNamespaceRequest dropRequest = new DropNamespaceRequest(); @@ -236,7 +236,7 @@ public void testNamespaceOperations() { // Verify namespace doesn't exist assertThatThrownBy(() -> namespace.namespaceExists(existsRequest)) .isInstanceOf(LanceNamespaceException.class) - .hasMessageContaining("404"); + .hasMessageContaining("not found"); } @Test @@ -248,22 +248,20 @@ public void testTableOperations() { String tableName = "test_table_" + UUID.randomUUID().toString().substring(0, 8); - // Create table - CreateTableRequest createRequest = new CreateTableRequest(); + // Create empty table + CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); createRequest.setId(Arrays.asList(testCatalog, testNamespace, tableName)); - createRequest.setLocation("s3://test-bucket/lance/" + tableName); - createRequest.setProperties(Collections.singletonMap("comment", "Test table")); + createRequest.setLocation("/tmp/polaris-test/" + testNamespace + "/" + tableName); - CreateTableResponse createResponse = namespace.createTable(createRequest, new byte[0]); - assertThat(createResponse.getLocation()).isEqualTo("s3://test-bucket/lance/" + tableName); - assertThat(createResponse.getProperties()).containsEntry("comment", "Test table"); + CreateEmptyTableResponse createResponse = namespace.createEmptyTable(createRequest); + assertThat(createResponse.getLocation()).isNotNull(); // Describe table DescribeTableRequest describeRequest = new DescribeTableRequest(); describeRequest.setId(Arrays.asList(testCatalog, testNamespace, tableName)); DescribeTableResponse describeResponse = namespace.describeTable(describeRequest); - assertThat(describeResponse.getLocation()).isEqualTo("s3://test-bucket/lance/" + tableName); + assertThat(describeResponse.getLocation()).isNotNull(); // Check table exists TableExistsRequest existsRequest = new TableExistsRequest(); @@ -277,34 +275,31 @@ public void testTableOperations() { ListTablesResponse listResponse = namespace.listTables(listRequest); assertThat(listResponse.getTables()).contains(tableName); - // Drop table - DropTableRequest dropRequest = new DropTableRequest(); - dropRequest.setId(Arrays.asList(testCatalog, testNamespace, tableName)); - namespace.dropTable(dropRequest); + // Deregister table + DeregisterTableRequest deregisterRequest = new DeregisterTableRequest(); + deregisterRequest.setId(Arrays.asList(testCatalog, testNamespace, tableName)); + namespace.deregisterTable(deregisterRequest); // Verify table doesn't exist assertThatThrownBy(() -> namespace.tableExists(existsRequest)) .isInstanceOf(LanceNamespaceException.class) - .hasMessageContaining("404"); + .hasMessageContaining("not found"); } @Test - public void testCreateTableWithInvalidFormat() { + public void testCreateEmptyTableWithLocation() { // Create namespace first CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); nsRequest.setId(Arrays.asList(testCatalog, testNamespace)); namespace.createNamespace(nsRequest); - // Try to describe a non-Lance table (would need to be created through Polaris directly) - // This test demonstrates the format validation - - // For now, just verify Lance tables work correctly + // Create an empty Lance table with location String tableName = "lance_table"; - CreateTableRequest createRequest = new CreateTableRequest(); + CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); createRequest.setId(Arrays.asList(testCatalog, testNamespace, tableName)); - createRequest.setLocation("s3://test-bucket/lance/" + tableName); + createRequest.setLocation("/tmp/polaris-test/" + testNamespace + "/" + tableName); - CreateTableResponse response = namespace.createTable(createRequest, new byte[0]); - assertThat(response.getProperties()).containsEntry("table_type", "lance"); + CreateEmptyTableResponse response = namespace.createEmptyTable(createRequest); + assertThat(response.getLocation()).isNotNull(); } } diff --git a/java/lance-namespace-unity/README.md b/java/lance-namespace-unity/README.md index c42e2c0..1b2a39c 100644 --- a/java/lance-namespace-unity/README.md +++ b/java/lance-namespace-unity/README.md @@ -70,11 +70,11 @@ CreateNamespaceRequest request = new CreateNamespaceRequest(); request.setId(Arrays.asList("unity", "my_schema")); namespace.createNamespace(request); -// Create a table -CreateTableRequest tableRequest = new CreateTableRequest(); +// Declare a table entry (use lance SDK to create the actual table) +DeclareTableRequest tableRequest = new DeclareTableRequest(); tableRequest.setId(Arrays.asList("unity", "my_schema", "my_table")); -tableRequest.setJsonArrowSchema(arrowSchema); -namespace.createTable(tableRequest); +tableRequest.setLocation("/path/to/storage/my_schema/my_table"); +namespace.declareTable(tableRequest); ``` ## Integration with Unity Catalog diff --git a/java/lance-namespace-unity/pom.xml b/java/lance-namespace-unity/pom.xml index 3ffe9d6..767707a 100644 --- a/java/lance-namespace-unity/pom.xml +++ b/java/lance-namespace-unity/pom.xml @@ -18,10 +18,18 @@ Unity Catalog namespace implementation for Lance + + org.lance + lance-namespace-impls-core + org.lance lance-core + + org.lance + lance-namespace-core + org.lance lance-namespace-apache-client @@ -54,6 +62,18 @@ test + + org.junit.jupiter + junit-jupiter + test + + + + org.assertj + assertj-core + test + + org.mockito mockito-core diff --git a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java index 0579ef3..6dc5d1e 100644 --- a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java +++ b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespace.java @@ -13,26 +13,25 @@ */ package org.lance.namespace.unity; -import com.lancedb.lance.Dataset; -import com.lancedb.lance.WriteParams; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; -import org.lance.namespace.ObjectIdentifier; +import org.lance.namespace.errors.InternalException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.NamespaceAlreadyExistsException; +import org.lance.namespace.errors.NamespaceNotFoundException; +import org.lance.namespace.errors.TableAlreadyExistsException; +import org.lance.namespace.errors.TableNotFoundException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; +import org.lance.namespace.model.DeregisterTableRequest; +import org.lance.namespace.model.DeregisterTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; -import org.lance.namespace.model.JsonArrowSchema; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -40,8 +39,8 @@ import org.lance.namespace.model.NamespaceExistsRequest; import org.lance.namespace.model.TableExistsRequest; import org.lance.namespace.rest.RestClient; -import org.lance.namespace.util.ArrowIpcUtil; -import org.lance.namespace.util.JsonArrowSchemaConverter; +import org.lance.namespace.rest.RestClientException; +import org.lance.namespace.util.ObjectIdentifier; import org.lance.namespace.util.ValidationUtil; import org.apache.arrow.memory.BufferAllocator; @@ -52,6 +51,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; @@ -60,10 +60,11 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; /** Unity Catalog namespace implementation for Lance. */ -public class UnityNamespace implements LanceNamespace { +public class UnityNamespace implements LanceNamespace, Closeable { private static final Logger LOG = LoggerFactory.getLogger(UnityNamespace.class); private static final String TABLE_TYPE_LANCE = "lance"; private static final String TABLE_TYPE_EXTERNAL = "EXTERNAL"; @@ -80,19 +81,15 @@ public void initialize(Map configProperties, BufferAllocator all this.allocator = allocator; this.config = new UnityNamespaceConfig(configProperties); - // Build REST client with authentication if provided RestClient.Builder clientBuilder = RestClient.builder() .baseUrl(config.getFullApiUrl()) - .connectTimeout(config.getConnectTimeout()) - .readTimeout(config.getReadTimeout()) + .connectTimeout(config.getConnectTimeout(), TimeUnit.MILLISECONDS) + .readTimeout(config.getReadTimeout(), TimeUnit.MILLISECONDS) .maxRetries(config.getMaxRetries()); - // Add auth token if provided if (config.getAuthToken() != null) { - Map headers = new HashMap<>(); - headers.put("Authorization", "Bearer " + config.getAuthToken()); - clientBuilder.defaultHeaders(headers); + clientBuilder.authToken(config.getAuthToken()); } this.restClient = clientBuilder.build(); @@ -110,7 +107,6 @@ public String namespaceId() { public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { ObjectIdentifier nsId = ObjectIdentifier.of(request.getId()); - // Unity supports 3-level namespace: catalog.schema.table ValidationUtil.checkArgument( nsId.levels() <= 2, "Expect at most 2-level namespace but get %s", nsId); @@ -118,30 +114,24 @@ public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { List namespaces; if (nsId.levels() == 0) { - // Return the configured catalog as the only top-level namespace namespaces = Collections.singletonList(config.getCatalog()); } else if (nsId.levels() == 1) { - // List schemas in the catalog String catalog = nsId.levelAtListPos(0); if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.notFound( - "Catalog not found", - "CATALOG_NOT_FOUND", - catalog, - "Expected: " + config.getCatalog()); + throw new NamespaceNotFoundException( + "Catalog not found. Expected: " + config.getCatalog()); } - Map params = new HashMap<>(); - params.put("catalog_name", catalog); + String path = "/schemas?catalog_name=" + catalog; if (request.getLimit() != null) { - params.put("max_results", request.getLimit().toString()); + path += "&max_results=" + request.getLimit(); } if (request.getPageToken() != null) { - params.put("page_token", request.getPageToken()); + path += "&page_token=" + request.getPageToken(); } UnityModels.ListSchemasResponse response = - restClient.get("/schemas", params, UnityModels.ListSchemasResponse.class); + restClient.get(path, UnityModels.ListSchemasResponse.class); if (response != null && response.getSchemas() != null) { namespaces = @@ -162,8 +152,8 @@ public ListNamespacesResponse listNamespaces(ListNamespacesRequest request) { response.setNamespaces(resultNamespaces); return response; - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to list namespaces: " + e.getMessage()); + } catch (RestClientException e) { + throw new InternalException("Failed to list namespaces: " + e.getMessage()); } } @@ -176,11 +166,8 @@ public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { String schema = nsId.levelAtListPos(1); if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.badRequest( - "Cannot create namespace in catalog", - "INVALID_CATALOG", - catalog, - "Expected: " + config.getCatalog()); + throw new InvalidInputException( + "Cannot create namespace in catalog. Expected: " + config.getCatalog()); } try { @@ -196,17 +183,12 @@ public CreateNamespaceResponse createNamespace(CreateNamespaceRequest request) { response.setProperties(schemaInfo.getProperties()); return response; - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 409) { - throw LanceNamespaceException.conflict( - "Namespace already exists", - "NAMESPACE_EXISTS", - request.getId().toString(), - e.getResponseBody()); + } catch (RestClientException e) { + if (e.isConflict()) { + throw new NamespaceAlreadyExistsException( + "Namespace already exists: " + request.getId().toString()); } - throw new LanceNamespaceException(500, "Failed to create namespace: " + e.getMessage()); - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to create namespace: " + e.getMessage()); + throw new InternalException("Failed to create namespace: " + e.getMessage()); } } @@ -219,8 +201,8 @@ public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest requ String schema = nsId.levelAtListPos(1); if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.notFound( - "Catalog not found", "CATALOG_NOT_FOUND", catalog, "Expected: " + config.getCatalog()); + throw new NamespaceNotFoundException( + "Catalog not found: " + catalog + ". Expected: " + config.getCatalog()); } try { @@ -232,17 +214,11 @@ public DescribeNamespaceResponse describeNamespace(DescribeNamespaceRequest requ response.setProperties(schemaInfo.getProperties()); return response; - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 404) { - throw LanceNamespaceException.notFound( - "Namespace not found", - "NAMESPACE_NOT_FOUND", - request.getId().toString(), - e.getResponseBody()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new NamespaceNotFoundException("Namespace not found: " + request.getId().toString()); } - throw new LanceNamespaceException(500, "Failed to describe namespace: " + e.getMessage()); - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to describe namespace: " + e.getMessage()); + throw new InternalException("Failed to describe namespace: " + e.getMessage()); } } @@ -253,6 +229,10 @@ public void namespaceExists(NamespaceExistsRequest request) { @Override public DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { + if ("Cascade".equalsIgnoreCase(request.getBehavior())) { + throw new InvalidInputException("Cascade behavior is not supported for this implementation"); + } + ObjectIdentifier nsId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument(nsId.levels() == 2, "Expect a 2-level namespace but get %s", nsId); @@ -260,34 +240,22 @@ public DropNamespaceResponse dropNamespace(DropNamespaceRequest request) { String schema = nsId.levelAtListPos(1); if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.badRequest( - "Cannot drop namespace in catalog", - "INVALID_CATALOG", - catalog, - "Expected: " + config.getCatalog()); + throw new InvalidInputException( + "Cannot drop namespace in catalog. Expected: " + config.getCatalog()); } try { String fullName = catalog + "." + schema; - Map params = new HashMap<>(); - if (request.getBehavior() != null - && request.getBehavior() == DropNamespaceRequest.BehaviorEnum.CASCADE) { - params.put("force", "true"); - } - - restClient.delete("/schemas/" + fullName, params); + String path = "/schemas/" + fullName; - DropNamespaceResponse response = new DropNamespaceResponse(); - return response; + restClient.delete(path); + return new DropNamespaceResponse(); - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 404) { - DropNamespaceResponse response = new DropNamespaceResponse(); - return response; + } catch (RestClientException e) { + if (e.isNotFound()) { + return new DropNamespaceResponse(); } - throw new LanceNamespaceException(500, "Failed to drop namespace: " + e.getMessage()); - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to drop namespace: " + e.getMessage()); + throw new InternalException("Failed to drop namespace: " + e.getMessage()); } } @@ -300,27 +268,24 @@ public ListTablesResponse listTables(ListTablesRequest request) { String schema = nsId.levelAtListPos(1); if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.notFound( - "Catalog not found", "CATALOG_NOT_FOUND", catalog, "Expected: " + config.getCatalog()); + throw new NamespaceNotFoundException( + "Catalog not found: " + catalog + ". Expected: " + config.getCatalog()); } try { - Map params = new HashMap<>(); - params.put("catalog_name", catalog); - params.put("schema_name", schema); + String path = "/tables?catalog_name=" + catalog + "&schema_name=" + schema; if (request.getLimit() != null) { - params.put("max_results", request.getLimit().toString()); + path += "&max_results=" + request.getLimit(); } if (request.getPageToken() != null) { - params.put("page_token", request.getPageToken()); + path += "&page_token=" + request.getPageToken(); } UnityModels.ListTablesResponse unityResponse = - restClient.get("/tables", params, UnityModels.ListTablesResponse.class); + restClient.get(path, UnityModels.ListTablesResponse.class); List tables = Collections.emptyList(); if (unityResponse != null && unityResponse.getTables() != null) { - // Filter only Lance tables tables = unityResponse.getTables().stream() .filter(this::isLanceTable) @@ -335,96 +300,8 @@ public ListTablesResponse listTables(ListTablesRequest request) { response.setTables(resultTables); return response; - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to list tables: " + e.getMessage()); - } - } - - @Override - public CreateTableResponse createTable(CreateTableRequest request, byte[] requestData) { - // Validate that requestData is a valid Arrow IPC stream - ValidationUtil.checkNotNull( - requestData, "Request data (Arrow IPC stream) is required for createTable"); - ValidationUtil.checkArgument( - requestData.length > 0, "Request data (Arrow IPC stream) cannot be empty"); - - ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); - ValidationUtil.checkArgument( - tableId.levels() == 3, "Expect a 3-level table identifier but get %s", tableId); - - String catalog = tableId.levelAtListPos(0); - String schema = tableId.levelAtListPos(1); - String table = tableId.levelAtListPos(2); - - if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.badRequest( - "Cannot create table in catalog", - "INVALID_CATALOG", - catalog, - "Expected: " + config.getCatalog()); - } - - try { - // First create an empty Lance table dataset - String tablePath = config.getRoot() + "/" + catalog + "/" + schema + "/" + table; - // Extract schema from Arrow IPC stream - JsonArrowSchema jsonSchema; - try { - jsonSchema = ArrowIpcUtil.extractSchemaFromIpc(requestData); - } catch (IOException e) { - throw LanceNamespaceException.badRequest( - "Invalid Arrow IPC stream: " + e.getMessage(), - "INVALID_ARROW_IPC", - catalog + "." + schema + "." + table, - "Failed to extract schema from Arrow IPC stream"); - } - Schema arrowSchema = JsonArrowSchemaConverter.convertToArrowSchema(jsonSchema); - - WriteParams writeParams = - new WriteParams.Builder().withMode(WriteParams.WriteMode.CREATE).build(); - - Dataset dataset = Dataset.create(allocator, tablePath, arrowSchema, writeParams); - dataset.close(); - - // Create Unity table metadata - UnityModels.CreateTable createTable = new UnityModels.CreateTable(); - createTable.setName(table); - createTable.setCatalogName(catalog); - createTable.setSchemaName(schema); - createTable.setTableType(TABLE_TYPE_EXTERNAL); - // Unity doesn't recognize LANCE format, use TEXT as a generic format for external tables - // The actual format is determined by the table_type=lance property - createTable.setDataSourceFormat("TEXT"); - createTable.setColumns(convertArrowSchemaToUnityColumns(arrowSchema)); - createTable.setStorageLocation(tablePath); - - Map properties = new HashMap<>(); - properties.put(TABLE_TYPE_KEY, TABLE_TYPE_LANCE); - if (request.getProperties() != null) { - properties.putAll(request.getProperties()); - } - createTable.setProperties(properties); - - UnityModels.TableInfo tableInfo = - restClient.post("/tables", createTable, UnityModels.TableInfo.class); - - CreateTableResponse response = new CreateTableResponse(); - response.setLocation(tablePath); - response.setVersion(1L); - response.setProperties(tableInfo.getProperties()); - return response; - - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 409) { - throw LanceNamespaceException.conflict( - "Table already exists", - "TABLE_EXISTS", - request.getId().toString(), - e.getResponseBody()); - } - throw new LanceNamespaceException(500, "Failed to create table: " + e.getMessage()); - } catch (Exception e) { - throw new LanceNamespaceException(500, "Failed to create table: " + e.getMessage()); + } catch (RestClientException e) { + throw new InternalException("Failed to list tables: " + e.getMessage()); } } @@ -439,34 +316,28 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request String table = tableId.levelAtListPos(2); if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.badRequest( - "Cannot create empty table in catalog", - "INVALID_CATALOG", - catalog, - "Expected: " + config.getCatalog()); + throw new InvalidInputException( + "Cannot create empty table in catalog. Expected: " + config.getCatalog()); } try { - // Determine table location String tablePath = request.getLocation(); if (tablePath == null || tablePath.isEmpty()) { tablePath = config.getRoot() + "/" + catalog + "/" + schema + "/" + table; } - // Create Unity table metadata without creating Lance dataset UnityModels.CreateTable createTable = new UnityModels.CreateTable(); createTable.setName(table); createTable.setCatalogName(catalog); createTable.setSchemaName(schema); createTable.setTableType(TABLE_TYPE_EXTERNAL); - // Unity doesn't recognize LANCE format, use TEXT as a generic format for external tables createTable.setDataSourceFormat("TEXT"); - // For empty table, create minimal schema with just an ID column + List columns = new ArrayList<>(); UnityModels.ColumnInfo idColumn = new UnityModels.ColumnInfo(); idColumn.setName("__placeholder_id"); - idColumn.setTypeText("BIGINT"); - idColumn.setTypeName("BIGINT"); + idColumn.setTypeText("LONG"); + idColumn.setTypeName("LONG"); idColumn.setTypeJson("{\"type\":\"long\"}"); idColumn.setPosition(0); idColumn.setNullable(true); @@ -476,9 +347,6 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request Map properties = new HashMap<>(); properties.put(TABLE_TYPE_KEY, TABLE_TYPE_LANCE); - if (request.getProperties() != null) { - properties.putAll(request.getProperties()); - } createTable.setProperties(properties); UnityModels.TableInfo tableInfo = @@ -486,25 +354,24 @@ public CreateEmptyTableResponse createEmptyTable(CreateEmptyTableRequest request CreateEmptyTableResponse response = new CreateEmptyTableResponse(); response.setLocation(tablePath); - response.setProperties(tableInfo.getProperties()); return response; - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 409) { - throw LanceNamespaceException.conflict( - "Table already exists", - "TABLE_EXISTS", - request.getId().toString(), - e.getResponseBody()); + } catch (RestClientException e) { + if (e.isConflict()) { + throw new TableAlreadyExistsException( + "Table already exists: " + request.getId().toString()); } - throw new LanceNamespaceException(500, "Failed to create empty table: " + e.getMessage()); - } catch (Exception e) { - throw new LanceNamespaceException(500, "Failed to create empty table: " + e.getMessage()); + throw new InternalException("Failed to create empty table: " + e.getMessage()); } } @Override public DescribeTableResponse describeTable(DescribeTableRequest request) { + if (Boolean.TRUE.equals(request.getLoadDetailedMetadata())) { + throw new InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation"); + } + ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( tableId.levels() == 3, "Expect a 3-level table identifier but get %s", tableId); @@ -514,8 +381,8 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { String table = tableId.levelAtListPos(2); if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.notFound( - "Catalog not found", "CATALOG_NOT_FOUND", catalog, "Expected: " + config.getCatalog()); + throw new NamespaceNotFoundException( + "Catalog not found: " + catalog + ". Expected: " + config.getCatalog()); } try { @@ -524,27 +391,19 @@ public DescribeTableResponse describeTable(DescribeTableRequest request) { restClient.get("/tables/" + fullName, UnityModels.TableInfo.class); if (!isLanceTable(tableInfo)) { - throw LanceNamespaceException.badRequest( - "Not a Lance table", - "INVALID_TABLE", - request.getId().toString(), - "Table is not managed by Lance"); + throw new InvalidInputException("Not a Lance table: " + request.getId().toString()); } DescribeTableResponse response = new DescribeTableResponse(); response.setLocation(tableInfo.getStorageLocation()); - response.setProperties(tableInfo.getProperties()); - + response.setStorageOptions(tableInfo.getProperties()); return response; - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 404) { - throw LanceNamespaceException.notFound( - "Table not found", "TABLE_NOT_FOUND", request.getId().toString(), e.getResponseBody()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new TableNotFoundException("Table not found: " + request.getId().toString()); } - throw new LanceNamespaceException(500, "Failed to describe table: " + e.getMessage()); - } catch (Exception e) { - throw new LanceNamespaceException(500, "Failed to describe table: " + e.getMessage()); + throw new InternalException("Failed to describe table: " + e.getMessage()); } } @@ -554,7 +413,7 @@ public void tableExists(TableExistsRequest request) { } @Override - public DropTableResponse dropTable(DropTableRequest request) { + public DeregisterTableResponse deregisterTable(DeregisterTableRequest request) { ObjectIdentifier tableId = ObjectIdentifier.of(request.getId()); ValidationUtil.checkArgument( tableId.levels() == 3, "Expect a 3-level table identifier but get %s", tableId); @@ -564,61 +423,35 @@ public DropTableResponse dropTable(DropTableRequest request) { String table = tableId.levelAtListPos(2); if (!catalog.equals(config.getCatalog())) { - throw LanceNamespaceException.badRequest( - "Cannot drop table in catalog", - "INVALID_CATALOG", - catalog, - "Expected: " + config.getCatalog()); + throw new NamespaceNotFoundException( + "Catalog not found: " + catalog + ". Expected: " + config.getCatalog()); } try { String fullName = catalog + "." + schema + "." + table; - - // First get the table info to check if it's a Lance table - UnityModels.TableInfo tableInfo = null; - try { - tableInfo = restClient.get("/tables/" + fullName, UnityModels.TableInfo.class); - } catch (RestClient.RestClientException e) { - if (e.getStatusCode() == 404) { - DropTableResponse response = new DropTableResponse(); - response.setId(request.getId()); - return response; - } - throw e; - } + UnityModels.TableInfo tableInfo = + restClient.get("/tables/" + fullName, UnityModels.TableInfo.class); if (!isLanceTable(tableInfo)) { - throw LanceNamespaceException.badRequest( - "Not a Lance table", - "INVALID_TABLE", - request.getId().toString(), - "Table is not managed by Lance"); + throw new InvalidInputException("Not a Lance table: " + request.getId().toString()); } - // Delete from Unity + String location = tableInfo.getStorageLocation(); restClient.delete("/tables/" + fullName); - // Delete Lance dataset data - try { - Dataset.drop(tableInfo.getStorageLocation(), Collections.emptyMap()); - } catch (Exception e) { - // Log warning but continue - Unity metadata already deleted - LOG.warn( - "Failed to delete Lance dataset at {}: {}", - tableInfo.getStorageLocation(), - e.getMessage()); - } - - DropTableResponse response = new DropTableResponse(); - response.setId(request.getId()); - response.setLocation(tableInfo.getStorageLocation()); + DeregisterTableResponse response = new DeregisterTableResponse(); + response.setLocation(location); return response; - } catch (IOException e) { - throw new LanceNamespaceException(500, "Failed to drop table: " + e.getMessage()); + } catch (RestClientException e) { + if (e.isNotFound()) { + throw new TableNotFoundException("Table not found: " + request.getId().toString()); + } + throw new InternalException("Failed to deregister table: " + e.getMessage()); } } + @Override public void close() throws IOException { if (restClient != null) { restClient.close(); @@ -642,13 +475,8 @@ private List convertArrowSchemaToUnityColumns(Schema arr columnInfo.setTypeText(unityType); columnInfo.setTypeJson(convertArrowTypeToUnityTypeJson(field.getType())); columnInfo.setTypeName(unityType); - columnInfo.setTypeScale(null); - columnInfo.setTypePrecision(null); - columnInfo.setTypeIntervalType(null); columnInfo.setPosition(columns.size()); - columnInfo.setComment(null); columnInfo.setNullable(field.isNullable()); - columnInfo.setPartitionIndex(null); columns.add(columnInfo); } return columns; @@ -662,7 +490,7 @@ private String convertArrowTypeToUnityType(ArrowType arrowType) { if (intType.getBitWidth() == 32) { return "INT"; } else if (intType.getBitWidth() == 64) { - return "BIGINT"; + return "LONG"; } } else if (arrowType instanceof ArrowType.FloatingPoint) { ArrowType.FloatingPoint fpType = (ArrowType.FloatingPoint) arrowType; @@ -678,7 +506,6 @@ private String convertArrowTypeToUnityType(ArrowType arrowType) { } else if (arrowType instanceof ArrowType.Timestamp) { return "TIMESTAMP"; } - // Default fallback return "STRING"; } @@ -706,7 +533,6 @@ private String convertArrowTypeToUnityTypeJson(ArrowType arrowType) { } else if (arrowType instanceof ArrowType.Timestamp) { return "{\"type\":\"timestamp\"}"; } - // Default fallback return "{\"type\":\"string\"}"; } } diff --git a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java index 5d1c2a4..76343b8 100644 --- a/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java +++ b/java/lance-namespace-unity/src/main/java/org/lance/namespace/unity/UnityNamespaceConfig.java @@ -13,8 +13,7 @@ */ package org.lance.namespace.unity; -import org.lance.namespace.util.PropertyUtil; - +import java.util.HashMap; import java.util.Map; /** Configuration for Unity Catalog namespace. */ @@ -46,22 +45,40 @@ public class UnityNamespaceConfig { public UnityNamespaceConfig(Map properties) { this.properties = properties; - this.endpoint = PropertyUtil.propertyAsString(properties, ENDPOINT, null); + this.endpoint = properties.get(ENDPOINT); if (endpoint == null) { throw new IllegalArgumentException("Unity endpoint is required"); } - this.apiPath = PropertyUtil.propertyAsString(properties, API_PATH, DEFAULT_API_PATH); - this.authToken = PropertyUtil.propertyAsString(properties, AUTH_TOKEN, null); - this.catalog = PropertyUtil.propertyAsString(properties, CATALOG, null); + this.apiPath = properties.getOrDefault(API_PATH, DEFAULT_API_PATH); + this.authToken = properties.get(AUTH_TOKEN); + this.catalog = properties.get(CATALOG); if (catalog == null) { throw new IllegalArgumentException("Unity catalog is required"); } + + // Inline PropertyUtil.propertyAsInt + String connectTimeoutStr = properties.get(CONNECT_TIMEOUT); this.connectTimeout = - PropertyUtil.propertyAsInt(properties, CONNECT_TIMEOUT, DEFAULT_CONNECT_TIMEOUT); - this.readTimeout = PropertyUtil.propertyAsInt(properties, READ_TIMEOUT, DEFAULT_READ_TIMEOUT); - this.maxRetries = PropertyUtil.propertyAsInt(properties, MAX_RETRIES, DEFAULT_MAX_RETRIES); - this.root = PropertyUtil.propertyAsString(properties, ROOT, System.getProperty("user.dir")); - this.storageProperties = PropertyUtil.propertiesWithPrefix(properties, "storage."); + connectTimeoutStr != null ? Integer.parseInt(connectTimeoutStr) : DEFAULT_CONNECT_TIMEOUT; + + String readTimeoutStr = properties.get(READ_TIMEOUT); + this.readTimeout = + readTimeoutStr != null ? Integer.parseInt(readTimeoutStr) : DEFAULT_READ_TIMEOUT; + + String maxRetriesStr = properties.get(MAX_RETRIES); + this.maxRetries = maxRetriesStr != null ? Integer.parseInt(maxRetriesStr) : DEFAULT_MAX_RETRIES; + + this.root = properties.getOrDefault(ROOT, System.getProperty("user.dir")); + + // Inline PropertyUtil.propertiesWithPrefix + Map filteredStorageProperties = new HashMap<>(); + for (Map.Entry entry : properties.entrySet()) { + if (entry.getKey().startsWith("storage.")) { + filteredStorageProperties.put( + entry.getKey().substring("storage.".length()), entry.getValue()); + } + } + this.storageProperties = filteredStorageProperties; } public String getEndpoint() { diff --git a/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespace.java b/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespace.java index efee14f..b935abb 100644 --- a/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespace.java +++ b/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespace.java @@ -14,28 +14,16 @@ package org.lance.namespace.unity; import org.lance.namespace.LanceNamespace; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.LanceNamespaceException; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; -import org.lance.namespace.model.CreateTableRequest; -import org.lance.namespace.model.CreateTableResponse; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; -import org.lance.namespace.model.DescribeTableRequest; -import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; import org.lance.namespace.model.DropNamespaceResponse; -import org.lance.namespace.model.DropTableRequest; -import org.lance.namespace.model.DropTableResponse; -import org.lance.namespace.model.JsonArrowDataType; -import org.lance.namespace.model.JsonArrowField; -import org.lance.namespace.model.JsonArrowSchema; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; -import org.lance.namespace.model.ListTablesRequest; -import org.lance.namespace.model.ListTablesResponse; import org.lance.namespace.model.NamespaceExistsRequest; -import org.lance.namespace.model.TableExistsRequest; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -50,11 +38,9 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.UUID; -import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -153,7 +139,7 @@ public void tearDown() throws IOException { try { DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Arrays.asList(UNITY_CATALOG, TEST_SCHEMA)); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); + dropRequest.setBehavior("Restrict"); namespace.dropNamespace(dropRequest); } catch (Exception e) { // Ignore cleanup errors @@ -243,90 +229,6 @@ public void testCreateDuplicateSchema() { } } - @Test - public void testTableLifecycle() throws IOException { - // Create schema first - CreateNamespaceRequest createNsRequest = new CreateNamespaceRequest(); - createNsRequest.setId(Arrays.asList(UNITY_CATALOG, TEST_SCHEMA)); - namespace.createNamespace(createNsRequest); - - String tableName = "test_table_" + UUID.randomUUID().toString().replace("-", ""); - List tableId = Arrays.asList(UNITY_CATALOG, TEST_SCHEMA, tableName); - - // Create table - CreateTableRequest createTableRequest = new CreateTableRequest(); - createTableRequest.setId(tableId); - - // Create a simple Arrow schema - JsonArrowSchema arrowSchema = new JsonArrowSchema(); - JsonArrowField field1 = new JsonArrowField(); - field1.setName("id"); - - JsonArrowDataType intType = new JsonArrowDataType(); - intType.setType("int32"); - field1.setType(intType); - field1.setNullable(false); - - JsonArrowField field2 = new JsonArrowField(); - field2.setName("name"); - - JsonArrowDataType stringType = new JsonArrowDataType(); - stringType.setType("utf8"); - field2.setType(stringType); - field2.setNullable(true); - - arrowSchema.setFields(Arrays.asList(field1, field2)); - - Map tableProps = new HashMap<>(); - tableProps.put("custom_prop", "custom_value"); - createTableRequest.setProperties(tableProps); - // Unity tables are always managed by storage - - // Create proper Arrow IPC stream from the schema - byte[] arrowData = - org.lance.namespace.util.ArrowIpcUtil.createEmptyArrowIpcStream(arrowSchema); - CreateTableResponse createTableResponse = namespace.createTable(createTableRequest, arrowData); - assertNotNull(createTableResponse); - // Table created successfully - just verify non-null response - assertNotNull(createTableResponse.getLocation()); - - // Check table exists - TableExistsRequest existsRequest = new TableExistsRequest(); - existsRequest.setId(tableId); - namespace.tableExists(existsRequest); - - // List tables - ListTablesRequest listRequest = new ListTablesRequest(); - listRequest.setId(Arrays.asList(UNITY_CATALOG, TEST_SCHEMA)); - ListTablesResponse listResponse = namespace.listTables(listRequest); - assertNotNull(listResponse); - assertTrue(listResponse.getTables().contains(tableName)); - - // Describe table - DescribeTableRequest describeRequest = new DescribeTableRequest(); - describeRequest.setId(tableId); - DescribeTableResponse describeResponse = namespace.describeTable(describeRequest); - assertNotNull(describeResponse); - assertNotNull(describeResponse.getLocation()); - assertNotNull(describeResponse.getProperties()); - assertEquals("storage", describeResponse.getProperties().get("managed_by")); - - // Drop table - DropTableRequest dropRequest = new DropTableRequest(); - dropRequest.setId(tableId); - DropTableResponse dropResponse = namespace.dropTable(dropRequest); - assertNotNull(dropResponse); - assertEquals(tableId, dropResponse.getId()); - - // Check table no longer exists - should throw exception - try { - namespace.tableExists(existsRequest); - fail("Expected table not found exception"); - } catch (LanceNamespaceException e) { - assertTrue(e.getMessage().contains("not found")); - } - } - @Test public void testInvalidCatalog() { CreateNamespaceRequest request = new CreateNamespaceRequest(); diff --git a/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceIntegration.java b/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceIntegration.java index e4442b1..21033ae 100644 --- a/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceIntegration.java +++ b/java/lance-namespace-unity/src/test/java/org/lance/namespace/unity/TestUnityNamespaceIntegration.java @@ -13,17 +13,18 @@ */ package org.lance.namespace.unity; -import org.lance.namespace.LanceNamespaceException; +import org.lance.namespace.errors.InvalidInputException; +import org.lance.namespace.errors.LanceNamespaceException; import org.lance.namespace.model.CreateEmptyTableRequest; import org.lance.namespace.model.CreateEmptyTableResponse; import org.lance.namespace.model.CreateNamespaceRequest; import org.lance.namespace.model.CreateNamespaceResponse; +import org.lance.namespace.model.DeregisterTableRequest; import org.lance.namespace.model.DescribeNamespaceRequest; import org.lance.namespace.model.DescribeNamespaceResponse; import org.lance.namespace.model.DescribeTableRequest; import org.lance.namespace.model.DescribeTableResponse; import org.lance.namespace.model.DropNamespaceRequest; -import org.lance.namespace.model.DropTableRequest; import org.lance.namespace.model.ListNamespacesRequest; import org.lance.namespace.model.ListNamespacesResponse; import org.lance.namespace.model.ListTablesRequest; @@ -119,7 +120,8 @@ private static void ensureTestCatalogExists() { createConn.setRequestProperty("Content-Type", "application/json"); createConn.setDoOutput(true); - String body = String.format("{\"name\": \"%s\", \"comment\": \"Test catalog\"}", TEST_CATALOG); + String body = + String.format("{\"name\": \"%s\", \"comment\": \"Test catalog\"}", TEST_CATALOG); createConn.getOutputStream().write(body.getBytes()); int createResponse = createConn.getResponseCode(); @@ -160,7 +162,7 @@ public void tearDown() { // Clean up test schema DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Arrays.asList(TEST_CATALOG, testSchema)); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); + dropRequest.setBehavior("Restrict"); namespace.dropNamespace(dropRequest); } catch (Exception e) { // Ignore cleanup errors @@ -231,7 +233,8 @@ public void testTableOperations() { nsRequest.setId(Arrays.asList(TEST_CATALOG, testSchema)); namespace.createNamespace(nsRequest); - String tableName = "test_table_" + UUID.randomUUID().toString().substring(0, 8).replace("-", ""); + String tableName = + "test_table_" + UUID.randomUUID().toString().substring(0, 8).replace("-", ""); // Create empty table CreateEmptyTableRequest createRequest = new CreateEmptyTableRequest(); @@ -247,7 +250,6 @@ public void testTableOperations() { DescribeTableResponse describeResponse = namespace.describeTable(describeRequest); assertThat(describeResponse.getLocation()).contains(tableName); - assertThat(describeResponse.getProperties()).containsEntry("table_type", "lance"); // List tables ListTablesRequest listRequest = new ListTablesRequest(); @@ -256,10 +258,10 @@ public void testTableOperations() { ListTablesResponse listResponse = namespace.listTables(listRequest); assertThat(listResponse.getTables()).contains(tableName); - // Drop table - DropTableRequest dropRequest = new DropTableRequest(); - dropRequest.setId(Arrays.asList(TEST_CATALOG, testSchema, tableName)); - namespace.dropTable(dropRequest); + // Deregister table + DeregisterTableRequest deregisterRequest = new DeregisterTableRequest(); + deregisterRequest.setId(Arrays.asList(TEST_CATALOG, testSchema, tableName)); + namespace.deregisterTable(deregisterRequest); // Verify table doesn't exist assertThatThrownBy(() -> namespace.describeTable(describeRequest)) @@ -267,29 +269,14 @@ public void testTableOperations() { } @Test - public void testCascadeDropSchema() { - // Create schema - CreateNamespaceRequest nsRequest = new CreateNamespaceRequest(); - nsRequest.setId(Arrays.asList(TEST_CATALOG, testSchema)); - namespace.createNamespace(nsRequest); - - // Create a table in the schema - String tableName = "cascade_test_table"; - CreateEmptyTableRequest tableRequest = new CreateEmptyTableRequest(); - tableRequest.setId(Arrays.asList(TEST_CATALOG, testSchema, tableName)); - tableRequest.setLocation("/tmp/lance-integration-test/" + testSchema + "/" + tableName); - namespace.createEmptyTable(tableRequest); - - // Drop schema with cascade + public void testCascadeDropSchemaRejected() { + // Drop schema with cascade - should be rejected DropNamespaceRequest dropRequest = new DropNamespaceRequest(); dropRequest.setId(Arrays.asList(TEST_CATALOG, testSchema)); - dropRequest.setBehavior(DropNamespaceRequest.BehaviorEnum.CASCADE); - namespace.dropNamespace(dropRequest); + dropRequest.setBehavior("Cascade"); - // Verify schema doesn't exist - DescribeNamespaceRequest describeRequest = new DescribeNamespaceRequest(); - describeRequest.setId(Arrays.asList(TEST_CATALOG, testSchema)); - assertThatThrownBy(() -> namespace.describeNamespace(describeRequest)) - .isInstanceOf(LanceNamespaceException.class); + assertThatThrownBy(() -> namespace.dropNamespace(dropRequest)) + .isInstanceOf(InvalidInputException.class) + .hasMessageContaining("Cascade behavior is not supported"); } } diff --git a/java/pom.xml b/java/pom.xml index f0e3baf..7e57514 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -57,8 +57,8 @@ UTF-8 - 1.0.0-beta.10 - 0.0.21 + 2.0.0-beta.4 + 0.4.2 15.0.0 5.8.2 @@ -86,6 +86,7 @@ + lance-namespace-impls-core lance-namespace-glue lance-namespace-hive2 lance-namespace-hive3 @@ -96,15 +97,25 @@ + + org.lance + lance-namespace-impls-core + ${project.version} + org.lance lance-core ${lance-core.version} + + org.lance + lance-namespace-core + ${lance-namespace.version} + org.lance lance-namespace-apache-client - ${lance-namespace-client.version} + ${lance-namespace.version} org.apache.arrow diff --git a/python/Makefile b/python/Makefile index 6f06f43..3408479 100644 --- a/python/Makefile +++ b/python/Makefile @@ -10,34 +10,108 @@ # See the License for the specific language governing permissions and # limitations under the License. -.PHONY: install -install: - uv pip install -e ".[all,dev]" +# ============================================================================ +# Glue +# ============================================================================ + +.PHONY: lint-glue +lint-glue: + uv run ruff check src/lance_namespace_impls/glue.py tests/test_glue.py tests/test_glue_integration.py + uv run ruff format --check src/lance_namespace_impls/glue.py tests/test_glue.py tests/test_glue_integration.py .PHONY: install-glue install-glue: - uv pip install -e ".[glue,dev]" - -.PHONY: install-hive2 -install-hive2: - uv pip install -e ".[hive2,dev]" - -.PHONY: test -test: - uv run pytest tests/ + uv sync --extra glue --extra dev .PHONY: test-glue test-glue: uv run pytest tests/test_glue.py +# ============================================================================ +# Hive2 +# ============================================================================ + +.PHONY: lint-hive +lint-hive: + uv run ruff check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_integration.py tests/test_hive3_integration.py + uv run ruff format --check src/lance_namespace_impls/hive2.py src/lance_namespace_impls/hive3.py tests/test_hive2.py tests/test_hive3.py tests/test_hive2_integration.py tests/test_hive3_integration.py + +.PHONY: install-hive +install-hive: + uv sync --extra hive2 --extra hive3 --extra dev + .PHONY: test-hive test-hive: - uv run pytest tests/test_hive.py + uv run pytest tests/test_hive2.py tests/test_hive3.py + +# ============================================================================ +# Unity +# ============================================================================ + +.PHONY: lint-unity +lint-unity: + uv run ruff check src/lance_namespace_impls/unity.py tests/test_unity.py tests/test_unity_integration.py + uv run ruff format --check src/lance_namespace_impls/unity.py tests/test_unity.py tests/test_unity_integration.py + +.PHONY: install-unity +install-unity: + uv sync --extra unity --extra dev .PHONY: test-unity test-unity: uv run pytest tests/test_unity.py +# ============================================================================ +# Iceberg +# ============================================================================ + +.PHONY: lint-iceberg +lint-iceberg: + uv run ruff check src/lance_namespace_impls/iceberg.py tests/test_iceberg.py tests/test_iceberg_integration.py + uv run ruff format --check src/lance_namespace_impls/iceberg.py tests/test_iceberg.py tests/test_iceberg_integration.py + +.PHONY: install-iceberg +install-iceberg: + uv sync --extra iceberg --extra dev + +.PHONY: test-iceberg +test-iceberg: + uv run pytest tests/test_iceberg.py + +# ============================================================================ +# Polaris +# ============================================================================ + +.PHONY: lint-polaris +lint-polaris: + uv run ruff check src/lance_namespace_impls/polaris.py tests/test_polaris.py tests/test_polaris_integration.py + uv run ruff format --check src/lance_namespace_impls/polaris.py tests/test_polaris.py tests/test_polaris_integration.py + +.PHONY: install-polaris +install-polaris: + uv sync --extra polaris --extra dev + +.PHONY: test-polaris +test-polaris: + uv run pytest tests/test_polaris.py + +# ============================================================================ +# All +# ============================================================================ + +.PHONY: lint +lint: + uv run ruff check . + uv run ruff format --check . + +.PHONY: install +install: + uv sync --extra all --extra dev + +.PHONY: test +test: + uv run pytest tests/ + .PHONY: clean clean: rm -rf dist/ @@ -48,9 +122,45 @@ clean: find . -type f -name "*.pyc" -delete .PHONY: build -build: install +build: uv build .PHONY: publish publish: uv build + +# ============================================================================ +# Integration tests +# ============================================================================ + +.PHONY: integ-test +integ-test: + uv run pytest tests/ -m integration + +.PHONY: integ-test-hive +integ-test-hive: + uv run pytest tests/test_hive2_integration.py tests/test_hive3_integration.py -v + +.PHONY: integ-test-hive2 +integ-test-hive2: + uv run pytest tests/test_hive2_integration.py -v + +.PHONY: integ-test-hive3 +integ-test-hive3: + uv run pytest tests/test_hive3_integration.py -v + +.PHONY: integ-test-polaris +integ-test-polaris: + uv run pytest tests/test_polaris_integration.py -v + +.PHONY: integ-test-unity +integ-test-unity: + uv run pytest tests/test_unity_integration.py -v + +.PHONY: integ-test-iceberg +integ-test-iceberg: + uv run pytest tests/test_iceberg_integration.py -v + +.PHONY: integ-test-glue +integ-test-glue: + uv run pytest tests/test_glue_integration.py -v diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..7ff4607 --- /dev/null +++ b/python/README.md @@ -0,0 +1,3 @@ +# Lance Namespace Implementations (Python) + +Third-party catalog implementations for Lance Namespace. diff --git a/python/pyproject.toml b/python/pyproject.toml index a93ef8d..055d965 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -12,7 +12,7 @@ requires-python = ">=3.10" dependencies = [ "pylance>=0.26.0", - "lance-namespace-urllib3-client>=0.0.21", + "lance-namespace-urllib3-client>=0.4.2", "pyarrow>=15.0.0", "typing-extensions>=4.5.0", ] @@ -26,6 +26,12 @@ hive2 = [ "thrift>=0.13.0", "hive-metastore-client>=1.0.0", ] +hive3 = [ + "thrift>=0.13.0", + "hive-metastore-client>=1.0.0", +] +iceberg = [] +polaris = [] unity = [] all = [ "boto3>=1.35.0", @@ -36,6 +42,12 @@ all = [ dev = [ "pytest>=7.0.0", "pytest-cov>=4.0.0", + "ruff>=0.4.0", +] + +[tool.pytest.ini_options] +markers = [ + "integration: marks tests as integration tests (deselect with '-m \"not integration\"')", ] [tool.hatch.build.targets.wheel] diff --git a/python/src/lance_namespace_impls/__init__.py b/python/src/lance_namespace_impls/__init__.py index 6d65254..ceacc6f 100644 --- a/python/src/lance_namespace_impls/__init__.py +++ b/python/src/lance_namespace_impls/__init__.py @@ -7,13 +7,49 @@ This package provides third-party catalog implementations for Lance Namespace: - GlueNamespace: AWS Glue Data Catalog - Hive2Namespace: Apache Hive 2.x Metastore +- Hive3Namespace: Apache Hive 3.x Metastore (with catalog support) - IcebergNamespace: Apache Iceberg REST Catalog +- PolarisNamespace: Apache Polaris Catalog - UnityNamespace: Unity Catalog + +Shared infrastructure: +- RestClient: Reusable HTTP client for REST API implementations +- RestClientException: Exception raised by RestClient +- NamespaceException: Base exception for namespace operations """ from lance_namespace_impls.glue import GlueNamespace -from lance_namespace_impls.hive import Hive2Namespace +from lance_namespace_impls.hive2 import Hive2Namespace +from lance_namespace_impls.hive3 import Hive3Namespace from lance_namespace_impls.iceberg import IcebergNamespace +from lance_namespace_impls.polaris import PolarisNamespace from lance_namespace_impls.unity import UnityNamespace +from lance_namespace_impls.rest_client import ( + RestClient, + RestClientException, + NamespaceException, + NamespaceNotFoundException, + NamespaceAlreadyExistsException, + TableNotFoundException, + TableAlreadyExistsException, + InvalidInputException, + InternalException, +) -__all__ = ["GlueNamespace", "Hive2Namespace", "IcebergNamespace", "UnityNamespace"] +__all__ = [ + "GlueNamespace", + "Hive2Namespace", + "Hive3Namespace", + "IcebergNamespace", + "PolarisNamespace", + "UnityNamespace", + "RestClient", + "RestClientException", + "NamespaceException", + "NamespaceNotFoundException", + "NamespaceAlreadyExistsException", + "TableNotFoundException", + "TableAlreadyExistsException", + "InvalidInputException", + "InternalException", +] diff --git a/python/src/lance_namespace_impls/glue.py b/python/src/lance_namespace_impls/glue.py index 3401b43..5ec803e 100644 --- a/python/src/lance_namespace_impls/glue.py +++ b/python/src/lance_namespace_impls/glue.py @@ -1,27 +1,20 @@ """ Lance Glue Namespace implementation using AWS Glue Data Catalog. """ -from typing import Dict, List, Optional, Any, Union -from urllib.parse import urlparse -import os + +from typing import Dict, List, Optional, Any try: import boto3 from botocore.config import Config + HAS_BOTO3 = True except ImportError: boto3 = None Config = None HAS_BOTO3 = False -import lance -import pyarrow as pa - from lance.namespace import LanceNamespace -from lance_namespace_impls.schema import ( - convert_json_arrow_schema_to_pyarrow, - convert_json_arrow_type_to_pyarrow, -) from lance_namespace_urllib3_client.models import ( ListNamespacesRequest, ListNamespacesResponse, @@ -31,27 +24,17 @@ CreateNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, - NamespaceExistsRequest, ListTablesRequest, ListTablesResponse, - CreateTableRequest, - CreateTableResponse, CreateEmptyTableRequest, CreateEmptyTableResponse, - DropTableRequest, - DropTableResponse, DescribeTableRequest, DescribeTableResponse, - RegisterTableRequest, - RegisterTableResponse, DeregisterTableRequest, DeregisterTableResponse, - TableExistsRequest, - JsonArrowSchema, - JsonArrowField, - JsonArrowDataType, ) +from lance_namespace_impls.rest_client import InvalidInputException LANCE_TABLE_TYPE = "LANCE" TABLE_TYPE = "table_type" @@ -61,59 +44,59 @@ class GlueNamespace(LanceNamespace): """Lance Glue Namespace implementation using AWS Glue Data Catalog. - + This namespace implementation integrates Lance with AWS Glue Data Catalog, allowing you to manage Lance table metadata in a centralized AWS service. - + Usage Examples: - + >>> from lance_namespace import connect - + >>> # Connect using default AWS credentials >>> namespace = connect("glue", { ... "region": "us-east-1" ... }) - + >>> # Connect with specific credentials >>> namespace = connect("glue", { ... "region": "us-east-1", ... "access_key_id": "YOUR_ACCESS_KEY", ... "secret_access_key": "YOUR_SECRET_KEY" ... }) - + >>> # Connect with custom catalog ID and endpoint >>> namespace = connect("glue", { ... "region": "us-east-1", ... "catalog_id": "123456789012", ... "endpoint": "https://glue.example.com" ... }) - + >>> # Create a database (namespace) >>> from lance_namespace_urllib3_client.models import CreateNamespaceRequest >>> namespace.create_namespace(CreateNamespaceRequest( ... id=["my_database"], ... properties={"description": "My Lance tables"} ... )) - + >>> # List databases >>> from lance_namespace_urllib3_client.models import ListNamespacesRequest >>> response = namespace.list_namespaces(ListNamespacesRequest()) >>> print(response.namespaces) - + >>> # Create a table >>> from lance_namespace_urllib3_client.models import CreateTableRequest >>> namespace.create_table(CreateTableRequest( ... id=["my_database", "my_table"], ... var_schema=arrow_schema # PyArrow schema ... ), data_bytes) - + Note: Requires boto3 to be installed: pip install lance-namespace[glue] """ - + def __init__(self, **properties): """Initialize the Glue namespace. - + Args: catalog_id: Glue catalog ID (AWS account ID) endpoint: Optional custom Glue endpoint @@ -133,7 +116,7 @@ def __init__(self, **properties): "boto3 is required for GlueNamespace. " "Install with: pip install lance-namespace[glue]" ) - + self.config = GlueNamespaceConfig(properties) self._glue = None # Lazy initialization to support pickling @@ -149,7 +132,7 @@ def glue(self): if self._glue is None: self._glue = self._initialize_glue_client() return self._glue - + def _initialize_glue_client(self): """Initialize the AWS Glue client.""" session = boto3.Session( @@ -159,555 +142,354 @@ def _initialize_glue_client(self): aws_secret_access_key=self.config.secret_access_key, aws_session_token=self.config.session_token, ) - + config_kwargs = {} if self.config.max_retries: - config_kwargs['retries'] = { - 'max_attempts': self.config.max_retries, - 'mode': self.config.retry_mode or 'standard' + config_kwargs["retries"] = { + "max_attempts": self.config.max_retries, + "mode": self.config.retry_mode or "standard", } - + glue_client = session.client( - 'glue', + "glue", endpoint_url=self.config.endpoint, - config=Config(**config_kwargs) if config_kwargs else None + config=Config(**config_kwargs) if config_kwargs else None, ) - + # Register catalog ID if provided if self.config.catalog_id: self._register_catalog_id(glue_client, self.config.catalog_id) - + return glue_client - + def _register_catalog_id(self, glue_client, catalog_id): """Register the Glue Catalog ID with the client.""" event_system = glue_client.meta.events - + def add_catalog_id(params, **kwargs): - if 'CatalogId' not in params: - params['CatalogId'] = catalog_id - - event_system.register('provide-client-params.glue', add_catalog_id) - + if "CatalogId" not in params: + params["CatalogId"] = catalog_id + + event_system.register("provide-client-params.glue", add_catalog_id) + def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse: """List namespaces (databases) in Glue.""" # Only list databases if we're at root namespace (no id or empty id) if request.id and len(request.id) > 0: # Hierarchical namespaces are not supported in Glue return ListNamespacesResponse(namespaces=[]) - + try: databases = [] next_token = None - + while True: if next_token: response = self.glue.get_databases(NextToken=next_token) else: response = self.glue.get_databases() - - for db in response.get('DatabaseList', []): - databases.append(db['Name']) - - next_token = response.get('NextToken') + + for db in response.get("DatabaseList", []): + databases.append(db["Name"]) + + next_token = response.get("NextToken") if not next_token: break - + return ListNamespacesResponse(namespaces=databases) except Exception as e: raise RuntimeError(f"Failed to list namespaces: {e}") - - def describe_namespace(self, request: DescribeNamespaceRequest) -> DescribeNamespaceResponse: + + def describe_namespace( + self, request: DescribeNamespaceRequest + ) -> DescribeNamespaceResponse: """Describe a namespace (database) in Glue.""" # Handle root namespace if not request.id or len(request.id) == 0: # Root namespace always exists properties = {} if self.config.root: - properties['location'] = self.config.root - properties['description'] = 'Root Glue catalog namespace' + properties["location"] = self.config.root + properties["description"] = "Root Glue catalog namespace" return DescribeNamespaceResponse(properties=properties) - + if len(request.id) != 1: raise ValueError("Glue namespace requires exactly one level identifier") - + database_name = request.id[0] - + try: response = self.glue.get_database(Name=database_name) - database = response['Database'] - - properties = database.get('Parameters', {}) - if 'LocationUri' in database: - properties['location'] = database['LocationUri'] - if 'Description' in database: - properties['description'] = database['Description'] - + database = response["Database"] + + properties = database.get("Parameters", {}) + if "LocationUri" in database: + properties["location"] = database["LocationUri"] + if "Description" in database: + properties["description"] = database["Description"] + return DescribeNamespaceResponse(properties=properties) except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'EntityNotFoundException': + error_name = e.__class__.__name__ if hasattr(e, "__class__") else "" + if error_name == "EntityNotFoundException": raise RuntimeError(f"Namespace does not exist: {database_name}") raise RuntimeError(f"Failed to describe namespace: {e}") - - def create_namespace(self, request: CreateNamespaceRequest) -> CreateNamespaceResponse: + + def create_namespace( + self, request: CreateNamespaceRequest + ) -> CreateNamespaceResponse: """Create a namespace (database) in Glue.""" # Handle root namespace if not request.id or len(request.id) == 0: raise RuntimeError("Root namespace already exists") - + if len(request.id) != 1: raise ValueError("Glue namespace requires exactly one level identifier") - + database_name = request.id[0] - database_input = {'Name': database_name} - + database_input = {"Name": database_name} + if request.properties: parameters = {} for key, value in request.properties.items(): - if key == 'description': - database_input['Description'] = value - elif key == 'location': - database_input['LocationUri'] = value + if key == "description": + database_input["Description"] = value + elif key == "location": + database_input["LocationUri"] = value else: parameters[key] = value if parameters: - database_input['Parameters'] = parameters - + database_input["Parameters"] = parameters + try: self.glue.create_database(DatabaseInput=database_input) return CreateNamespaceResponse() except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'AlreadyExistsException': + error_name = e.__class__.__name__ if hasattr(e, "__class__") else "" + if error_name == "AlreadyExistsException": raise RuntimeError(f"Namespace already exists: {database_name}") raise RuntimeError(f"Failed to create namespace: {e}") - + def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse: """Drop a namespace (database) in Glue.""" + if request.behavior and request.behavior.lower() == "cascade": + raise InvalidInputException( + "Cascade behavior is not supported for this implementation" + ) + # Handle root namespace if not request.id or len(request.id) == 0: raise RuntimeError("Cannot drop root namespace") - + if len(request.id) != 1: raise ValueError("Glue namespace requires exactly one level identifier") - + database_name = request.id[0] - + try: # Check if database is empty tables_response = self.glue.get_tables(DatabaseName=database_name) - if tables_response.get('TableList'): + if tables_response.get("TableList"): raise RuntimeError(f"Cannot drop non-empty namespace: {database_name}") - + self.glue.delete_database(Name=database_name) return DropNamespaceResponse() except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'EntityNotFoundException': + error_name = e.__class__.__name__ if hasattr(e, "__class__") else "" + if error_name == "EntityNotFoundException": raise RuntimeError(f"Namespace does not exist: {database_name}") if isinstance(e, RuntimeError): raise raise RuntimeError(f"Failed to drop namespace: {e}") - - def namespace_exists(self, request: NamespaceExistsRequest) -> None: - """Check if a namespace exists.""" - # Handle root namespace - it always exists - if not request.id or len(request.id) == 0: - return # Root namespace always exists - - if len(request.id) != 1: - raise ValueError("Glue namespace requires exactly one level identifier") - - database_name = request.id[0] - - try: - self.glue.get_database(Name=database_name) - except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'EntityNotFoundException': - raise RuntimeError(f"Namespace does not exist: {database_name}") - raise RuntimeError(f"Failed to check namespace existence: {e}") - + def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: """List tables in a namespace.""" # Handle root namespace - no tables at root level if not request.id or len(request.id) == 0: return ListTablesResponse(tables=[]) - + if len(request.id) != 1: raise ValueError("Glue namespace requires exactly one level identifier") - + database_name = request.id[0] - + try: tables = [] next_token = None - + while True: if next_token: response = self.glue.get_tables( - DatabaseName=database_name, - NextToken=next_token + DatabaseName=database_name, NextToken=next_token ) else: response = self.glue.get_tables(DatabaseName=database_name) - - for table in response.get('TableList', []): + + for table in response.get("TableList", []): # Only include Lance tables if self._is_lance_table(table): - tables.append(table['Name']) - - next_token = response.get('NextToken') + tables.append(table["Name"]) + + next_token = response.get("NextToken") if not next_token: break - + return ListTablesResponse(tables=tables) except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'EntityNotFoundException': + error_name = e.__class__.__name__ if hasattr(e, "__class__") else "" + if error_name == "EntityNotFoundException": raise RuntimeError(f"Namespace does not exist: {database_name}") raise RuntimeError(f"Failed to list tables: {e}") - + def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: """Describe a table.""" + if request.load_detailed_metadata: + raise RuntimeError( + "load_detailed_metadata=true is not supported for this implementation" + ) + database_name, table_name = self._parse_table_identifier(request.id) - + try: - response = self.glue.get_table( - DatabaseName=database_name, - Name=table_name - ) - table = response['Table'] - + response = self.glue.get_table(DatabaseName=database_name, Name=table_name) + table = response["Table"] + if not self._is_lance_table(table): - raise RuntimeError(f"Table is not a Lance table: {database_name}.{table_name}") - - location = table.get('StorageDescriptor', {}).get('Location') + raise RuntimeError( + f"Table is not a Lance table: {database_name}.{table_name}" + ) + + location = table.get("StorageDescriptor", {}).get("Location") if not location: - raise RuntimeError(f"Table has no location: {database_name}.{table_name}") - - return DescribeTableResponse(location=location) + raise RuntimeError( + f"Table has no location: {database_name}.{table_name}" + ) + + return DescribeTableResponse( + location=location, storage_options=self.config.storage_options + ) except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'EntityNotFoundException': - raise RuntimeError(f"Table does not exist: {database_name}.{table_name}") + error_name = e.__class__.__name__ if hasattr(e, "__class__") else "" + if error_name == "EntityNotFoundException": + raise RuntimeError( + f"Table does not exist: {database_name}.{table_name}" + ) if isinstance(e, RuntimeError): raise raise RuntimeError(f"Failed to describe table: {e}") - - def create_table(self, request: CreateTableRequest, request_data: bytes) -> CreateTableResponse: - """Create a table with data from Arrow IPC stream.""" - database_name, table_name = self._parse_table_identifier(request.id) - - if not request_data: - raise ValueError("Request data (Arrow IPC stream) is required for create_table") - - # Determine table location - if request.location: - table_location = request.location - else: - # Use default location pattern - db_response = self.glue.get_database(Name=database_name) - db_location = db_response['Database'].get('LocationUri', '') - if db_location: - table_location = f"{db_location}/{table_name}.lance" - else: - # Use S3 default location - table_location = f"s3://lance-namespace/{database_name}/{table_name}.lance" - - # Extract table from Arrow IPC stream - try: - reader = pa.ipc.open_stream(pa.py_buffer(request_data)) - table = reader.read_all() - schema = table.schema - except Exception as e: - raise ValueError(f"Invalid Arrow IPC stream: {e}") - - # Write Lance dataset - lance.write_dataset(table, table_location, storage_options=self.config.storage_options) - - # Create Glue table entry - table_input = { - 'Name': table_name, - 'TableType': EXTERNAL_TABLE, - 'Parameters': { - TABLE_TYPE: LANCE_TABLE_TYPE, - }, - 'StorageDescriptor': { - 'Location': table_location, - 'Columns': self._convert_pyarrow_schema_to_glue_columns(schema) - } - } - - try: - self.glue.create_table( - DatabaseName=database_name, - TableInput=table_input - ) - return CreateTableResponse(location=table_location, version=1) - except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'AlreadyExistsException': - raise RuntimeError(f"Table already exists: {database_name}.{table_name}") - raise RuntimeError(f"Failed to create table: {e}") - - def create_empty_table(self, request: CreateEmptyTableRequest) -> CreateEmptyTableResponse: + + def create_empty_table( + self, request: CreateEmptyTableRequest + ) -> CreateEmptyTableResponse: """Create an empty table (metadata only) in Glue catalog.""" database_name, table_name = self._parse_table_identifier(request.id) - + # Determine table location - if request.location: - table_location = request.location - else: + table_location = getattr(request, "location", None) + if not table_location: # Use default location pattern db_response = self.glue.get_database(Name=database_name) - db_location = db_response['Database'].get('LocationUri', '') + db_location = db_response["Database"].get("LocationUri", "") if db_location: table_location = f"{db_location}/{table_name}.lance" else: # Use S3 default location - table_location = f"s3://lance-namespace/{database_name}/{table_name}.lance" - + table_location = ( + f"s3://lance-namespace/{database_name}/{table_name}.lance" + ) + # Create a minimal schema for Glue (placeholder schema) glue_columns = [ { - 'Name': '__placeholder_id', - 'Type': 'bigint', - 'Comment': 'Placeholder column for empty table' + "Name": "__placeholder_id", + "Type": "bigint", + "Comment": "Placeholder column for empty table", } ] - + # Create Glue table entry without creating actual Lance dataset table_input = { - 'Name': table_name, - 'TableType': EXTERNAL_TABLE, - 'Parameters': { + "Name": table_name, + "TableType": EXTERNAL_TABLE, + "Parameters": { TABLE_TYPE: LANCE_TABLE_TYPE, - 'empty_table': 'true', # Mark as empty table + "empty_table": "true", # Mark as empty table + }, + "StorageDescriptor": { + "Location": table_location, + "Columns": glue_columns, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" + }, }, - 'StorageDescriptor': { - 'Location': table_location, - 'Columns': glue_columns, - 'InputFormat': 'org.apache.hadoop.mapred.TextInputFormat', - 'OutputFormat': 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', - 'SerdeInfo': { - 'SerializationLibrary': 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - } - } } - + # Add additional properties if specified if request.properties: - table_input['Parameters'].update(request.properties) - + table_input["Parameters"].update(request.properties) + try: - self.glue.create_table( - DatabaseName=database_name, - TableInput=table_input - ) + self.glue.create_table(DatabaseName=database_name, TableInput=table_input) except Exception as e: - if 'AlreadyExistsException' in str(e): - raise RuntimeError(f"Table already exists: {database_name}.{table_name}") + if "AlreadyExistsException" in str(e): + raise RuntimeError( + f"Table already exists: {database_name}.{table_name}" + ) raise RuntimeError(f"Failed to create empty table: {e}") - + return CreateEmptyTableResponse(location=table_location) - - def drop_table(self, request: DropTableRequest) -> DropTableResponse: - """Drop a table - deletes both the Lance dataset and Glue catalog entry.""" - database_name, table_name = self._parse_table_identifier(request.id) - - try: - # First get the table to find its location - response = self.glue.get_table( - DatabaseName=database_name, - Name=table_name - ) - table = response['Table'] - - # Verify it's a Lance table - if not self._is_lance_table(table): - raise RuntimeError(f"Table is not a Lance table: {database_name}.{table_name}") - - # Get the table location - location = table.get('StorageDescriptor', {}).get('Location') - if not location: - raise RuntimeError(f"Table has no location: {database_name}.{table_name}") - - # Drop the Lance dataset first - lance_dataset = lance.dataset(location, storage_options=self.config.storage_options) - lance_dataset.delete() - - # Then remove from Glue catalog - self.glue.delete_table( - DatabaseName=database_name, - Name=table_name - ) - return DropTableResponse() - except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'EntityNotFoundException': - raise RuntimeError(f"Table does not exist: {database_name}.{table_name}") - if isinstance(e, RuntimeError): - raise - raise RuntimeError(f"Failed to drop table: {e}") - - def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse: - """Register an existing Lance table in Glue.""" - database_name, table_name = self._parse_table_identifier(request.id) - - if not request.location: - raise ValueError("Location is required to register a table") - - # Read Lance dataset to get schema - try: - dataset = lance.dataset(request.location, storage_options=self.config.storage_options) - schema = dataset.schema - except Exception as e: - raise RuntimeError(f"Failed to read Lance dataset at {request.location}: {e}") - - # Create Glue table entry - table_input = { - 'Name': table_name, - 'TableType': EXTERNAL_TABLE, - 'Parameters': { - TABLE_TYPE: LANCE_TABLE_TYPE, - }, - 'StorageDescriptor': { - 'Location': request.location, - 'Columns': self._convert_pyarrow_schema_to_glue_columns(schema) - } - } - - try: - self.glue.create_table( - DatabaseName=database_name, - TableInput=table_input - ) - return RegisterTableResponse(location=request.location) - except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'AlreadyExistsException': - raise RuntimeError(f"Table already exists: {database_name}.{table_name}") - raise RuntimeError(f"Failed to register table: {e}") - - def deregister_table(self, request: DeregisterTableRequest) -> DeregisterTableResponse: + + def deregister_table( + self, request: DeregisterTableRequest + ) -> DeregisterTableResponse: """Deregister a table - removes only the Glue catalog entry, keeps the Lance dataset.""" database_name, table_name = self._parse_table_identifier(request.id) - + try: # Only remove from Glue catalog, don't delete the Lance dataset - self.glue.delete_table( - DatabaseName=database_name, - Name=table_name - ) + self.glue.delete_table(DatabaseName=database_name, Name=table_name) return DeregisterTableResponse() except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'EntityNotFoundException': - raise RuntimeError(f"Table does not exist: {database_name}.{table_name}") + error_name = e.__class__.__name__ if hasattr(e, "__class__") else "" + if error_name == "EntityNotFoundException": + raise RuntimeError( + f"Table does not exist: {database_name}.{table_name}" + ) raise RuntimeError(f"Failed to deregister table: {e}") - - def table_exists(self, request: TableExistsRequest) -> None: - """Check if a table exists.""" - database_name, table_name = self._parse_table_identifier(request.id) - - try: - response = self.glue.get_table( - DatabaseName=database_name, - Name=table_name - ) - if not self._is_lance_table(response['Table']): - raise RuntimeError(f"Table is not a Lance table: {database_name}.{table_name}") - except Exception as e: - error_name = e.__class__.__name__ if hasattr(e, '__class__') else '' - if error_name == 'EntityNotFoundException': - raise RuntimeError(f"Table does not exist: {database_name}.{table_name}") - if isinstance(e, RuntimeError): - raise - raise RuntimeError(f"Failed to check table existence: {e}") - + def _parse_table_identifier(self, identifier: List[str]) -> tuple[str, str]: """Parse table identifier into database and table name.""" if not identifier or len(identifier) != 2: - raise ValueError("Table identifier must have exactly 2 parts: [database, table]") + raise ValueError( + "Table identifier must have exactly 2 parts: [database, table]" + ) return identifier[0], identifier[1] - + def _is_lance_table(self, glue_table: Dict[str, Any]) -> bool: """Check if a Glue table is a Lance table.""" - return glue_table.get('Parameters', {}).get(TABLE_TYPE, '').upper() == LANCE_TABLE_TYPE - - def _convert_pyarrow_schema_to_glue_columns(self, schema: pa.Schema) -> List[Dict[str, str]]: - """Convert PyArrow schema to Glue column definitions.""" - columns = [] - for field in schema: - column = { - 'Name': field.name, - 'Type': self._convert_pyarrow_type_to_glue_type(field.type) - } - columns.append(column) - return columns - - def _convert_pyarrow_type_to_glue_type(self, arrow_type: pa.DataType) -> str: - """Convert PyArrow type to Glue/Hive type string.""" - if pa.types.is_boolean(arrow_type): - return 'boolean' - elif pa.types.is_int8(arrow_type) or pa.types.is_uint8(arrow_type): - return 'tinyint' - elif pa.types.is_int16(arrow_type) or pa.types.is_uint16(arrow_type): - return 'smallint' - elif pa.types.is_int32(arrow_type) or pa.types.is_uint32(arrow_type): - return 'int' - elif pa.types.is_int64(arrow_type) or pa.types.is_uint64(arrow_type): - return 'bigint' - elif pa.types.is_float32(arrow_type): - return 'float' - elif pa.types.is_float64(arrow_type): - return 'double' - elif pa.types.is_string(arrow_type): - return 'string' - elif pa.types.is_binary(arrow_type): - return 'binary' - elif pa.types.is_date32(arrow_type) or pa.types.is_date64(arrow_type): - return 'date' - elif pa.types.is_timestamp(arrow_type): - return 'timestamp' - elif pa.types.is_decimal(arrow_type): - return f'decimal({arrow_type.precision},{arrow_type.scale})' - elif pa.types.is_list(arrow_type): - element_type = self._convert_pyarrow_type_to_glue_type(arrow_type.value_type) - return f'array<{element_type}>' - elif pa.types.is_struct(arrow_type): - field_strs = [] - for field in arrow_type: - field_type = self._convert_pyarrow_type_to_glue_type(field.type) - field_strs.append(f'{field.name}:{field_type}') - return f'struct<{",".join(field_strs)}>' - elif pa.types.is_map(arrow_type): - key_type = self._convert_pyarrow_type_to_glue_type(arrow_type.key_type) - value_type = self._convert_pyarrow_type_to_glue_type(arrow_type.item_type) - return f'map<{key_type},{value_type}>' - else: - # Default to string for unknown types - return 'string' - + return ( + glue_table.get("Parameters", {}).get(TABLE_TYPE, "").upper() + == LANCE_TABLE_TYPE + ) + def __getstate__(self): """Prepare instance for pickling by excluding unpickleable objects.""" state = self.__dict__.copy() # Remove the unpickleable Glue client - state['_glue'] = None + state["_glue"] = None return state - + def __setstate__(self, state): """Restore instance from pickled state.""" self.__dict__.update(state) # The Glue client will be re-initialized lazily via the property - class GlueNamespaceConfig: """Configuration for GlueNamespace.""" - + # Glue configuration keys (without prefix as per documentation) CATALOG_ID = "catalog_id" ENDPOINT = "endpoint" @@ -719,22 +501,22 @@ class GlueNamespaceConfig: MAX_RETRIES = "max_retries" RETRY_MODE = "retry_mode" ROOT = "root" - + # Storage configuration prefix STORAGE_OPTIONS_PREFIX = "storage." - + def __init__(self, properties: Optional[Dict[str, str]] = None): """Initialize configuration from properties. - + Args: properties: Dictionary of configuration properties """ if properties is None: properties = {} - + # Store raw properties for pickling support self._properties = properties.copy() - + self._catalog_id = properties.get(self.CATALOG_ID) self._endpoint = properties.get(self.ENDPOINT) self._region = properties.get(self.REGION) @@ -743,71 +525,71 @@ def __init__(self, properties: Optional[Dict[str, str]] = None): self._session_token = properties.get(self.SESSION_TOKEN) self._profile_name = properties.get(self.PROFILE_NAME) self._root = properties.get(self.ROOT) - + # Parse max retries max_retries_str = properties.get(self.MAX_RETRIES) self._max_retries = int(max_retries_str) if max_retries_str else None - + self._retry_mode = properties.get(self.RETRY_MODE) - + # Extract storage options self._storage_options = self._extract_storage_options(properties) - + def _extract_storage_options(self, properties: Dict[str, str]) -> Dict[str, str]: """Extract storage configuration properties by removing the prefix.""" storage_options = {} for key, value in properties.items(): if key.startswith(self.STORAGE_OPTIONS_PREFIX): - storage_key = key[len(self.STORAGE_OPTIONS_PREFIX):] + storage_key = key[len(self.STORAGE_OPTIONS_PREFIX) :] storage_options[storage_key] = value return storage_options - + @property def catalog_id(self) -> Optional[str]: return self._catalog_id - + @property def endpoint(self) -> Optional[str]: return self._endpoint - + @property def region(self) -> Optional[str]: return self._region - + @property def access_key_id(self) -> Optional[str]: return self._access_key_id - + @property def secret_access_key(self) -> Optional[str]: return self._secret_access_key - + @property def session_token(self) -> Optional[str]: return self._session_token - + @property def profile_name(self) -> Optional[str]: return self._profile_name - + @property def max_retries(self) -> Optional[int]: return self._max_retries - + @property def retry_mode(self) -> Optional[str]: return self._retry_mode - + @property def root(self) -> Optional[str]: return self._root - + @property def storage_options(self) -> Dict[str, str]: """Get the storage configuration properties.""" return self._storage_options.copy() - + @property def properties(self) -> Dict[str, str]: """Get the raw properties dictionary.""" - return self._properties.copy() \ No newline at end of file + return self._properties.copy() diff --git a/python/src/lance_namespace_impls/hive.py b/python/src/lance_namespace_impls/hive.py deleted file mode 100644 index 28065d3..0000000 --- a/python/src/lance_namespace_impls/hive.py +++ /dev/null @@ -1,810 +0,0 @@ -""" -Lance Hive2 Namespace implementation using Hive Metastore. - -This module provides integration with Apache Hive Metastore for managing Lance tables. -Lance tables are registered as external tables in Hive with specific metadata properties -to identify them as Lance format. - -Installation: - pip install 'lance-namespace[hive2]' - -Usage: - from lance_namespace import connect - - # Connect to Hive Metastore - namespace = connect("hive2", { - "uri": "thrift://localhost:9083", - "root": "/my/dir", # Or "s3://bucket/prefix" - "ugi": "user:group1,group2" # Optional user/group info - }) - - # List databases - from lance_namespace import ListNamespacesRequest - response = namespace.list_namespaces(ListNamespacesRequest()) - - # Create a table - from lance_namespace import CreateTableRequest - import pyarrow as pa - import io - - data = pa.table({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) - buf = io.BytesIO() - with pa.ipc.new_stream(buf, data.schema) as writer: - writer.write_table(data) - - request = CreateTableRequest( - id=["my_database", "my_table"], - mode="create" - ) - response = namespace.create_table(request, buf.getvalue()) - - # Register existing Lance table - from lance_namespace import RegisterTableRequest - request = RegisterTableRequest( - id=["my_database", "existing_table"], - location="/path/to/lance/table" - ) - response = namespace.register_table(request) - -Configuration Properties: - uri (str): Hive Metastore Thrift URI (e.g., "thrift://localhost:9083") - root (str): Storage root location of the lakehouse on Hive catalog (default: current working directory) - ugi (str): Optional User Group Information for authentication (format: "user:group1,group2") - client.pool-size (int): Size of the HMS client connection pool (default: 3) - storage.* (str): Additional storage configurations to access table -""" -from typing import Dict, List, Optional, Any -from urllib.parse import urlparse, unquote -import os -import logging - -try: - from hive_metastore.ThriftHiveMetastore import Client - from hive_metastore.ttypes import ( - Database as HiveDatabase, - Table as HiveTable, - StorageDescriptor, - SerDeInfo, - FieldSchema, - NoSuchObjectException, - AlreadyExistsException, - InvalidOperationException, - MetaException, - ) - from thrift.protocol import TBinaryProtocol - from thrift.transport import TSocket, TTransport - HIVE_AVAILABLE = True -except ImportError: - HIVE_AVAILABLE = False - Client = None - HiveDatabase = None - HiveTable = None - StorageDescriptor = None - SerDeInfo = None - FieldSchema = None - NoSuchObjectException = None - AlreadyExistsException = None - InvalidOperationException = None - MetaException = None - -import lance -import pyarrow as pa - -from lance.namespace import LanceNamespace -from lance_namespace_urllib3_client.models import ( - ListNamespacesRequest, - ListNamespacesResponse, - DescribeNamespaceRequest, - DescribeNamespaceResponse, - CreateNamespaceRequest, - CreateNamespaceResponse, - DropNamespaceRequest, - DropNamespaceResponse, - NamespaceExistsRequest, - ListTablesRequest, - ListTablesResponse, - CreateTableRequest, - CreateTableResponse, - CreateEmptyTableRequest, - CreateEmptyTableResponse, - DropTableRequest, - DropTableResponse, - DescribeTableRequest, - DescribeTableResponse, - RegisterTableRequest, - RegisterTableResponse, - DeregisterTableRequest, - DeregisterTableResponse, - TableExistsRequest, - JsonArrowSchema, - JsonArrowField, - JsonArrowDataType, -) - -logger = logging.getLogger(__name__) - -# Table properties used by Lance (per hive.md specification) -TABLE_TYPE_KEY = "table_type" # Case insensitive -LANCE_TABLE_FORMAT = "lance" # Case insensitive -MANAGED_BY_KEY = "managed_by" # Case insensitive, values: "storage" or "impl" -VERSION_KEY = "version" # Numeric version number -EXTERNAL_TABLE = "EXTERNAL_TABLE" - - -class HiveMetastoreClient: - """Helper class to manage Hive Metastore client connections.""" - - def __init__(self, uri: str, ugi: Optional[str] = None): - if not HIVE_AVAILABLE: - raise ImportError( - "Hive dependencies not installed. Please install with: " - "pip install 'lance-namespace[hive2]'" - ) - - self._uri = uri - self._ugi = ugi.split(":") if ugi else None - self._transport = None - self._client = None - self._init_client() - - def _init_client(self): - """Initialize the Thrift client connection.""" - url_parts = urlparse(self._uri) - socket = TSocket.TSocket(url_parts.hostname, url_parts.port or 9083) - self._transport = TTransport.TBufferedTransport(socket) - protocol = TBinaryProtocol.TBinaryProtocol(self._transport) - self._client = Client(protocol) - - if not self._transport.isOpen(): - self._transport.open() - - if self._ugi: - self._client.set_ugi(*self._ugi) - - def __enter__(self): - """Enter context manager.""" - if not self._transport or not self._transport.isOpen(): - self._init_client() - return self._client - - def __exit__(self, exc_type, exc_val, exc_tb): - """Exit context manager.""" - if self._transport and self._transport.isOpen(): - self._transport.close() - - def close(self): - """Close the client connection.""" - if self._transport and self._transport.isOpen(): - self._transport.close() - - -class Hive2Namespace(LanceNamespace): - """Lance Hive2 Namespace implementation using Hive Metastore.""" - - def __init__(self, **properties): - """Initialize the Hive2 namespace. - - Args: - uri: The Hive Metastore URI (e.g., "thrift://localhost:9083") - root: Storage root location of the lakehouse on Hive catalog (optional) - ugi: User Group Information for authentication (optional, format: "user:group1,group2") - client.pool-size: Size of the HMS client connection pool (optional, default: 3) - storage.*: Additional storage configurations to access table - **properties: Additional configuration properties - """ - if not HIVE_AVAILABLE: - raise ImportError( - "Hive dependencies not installed. Please install with: " - "pip install 'lance-namespace[hive2]'" - ) - - self.uri = properties.get("uri", "thrift://localhost:9083") - self.ugi = properties.get("ugi") - self.root = properties.get("root", os.getcwd()) - self.pool_size = int(properties.get("client.pool-size", "3")) - # Extract storage properties - self.storage_properties = {k[8:]: v for k, v in properties.items() if k.startswith("storage.")} - - # Store properties for pickling support - self._properties = properties.copy() - - # Lazy initialization to support pickling - self._client = None - - def namespace_id(self) -> str: - """Return a human-readable unique identifier for this namespace instance.""" - return f"Hive2Namespace {{ uri: {self.uri!r} }}" - - @property - def client(self): - """Get the Hive client, initializing it if necessary.""" - if self._client is None: - self._client = HiveMetastoreClient(self.uri, self.ugi) - return self._client - - def _normalize_identifier(self, identifier: List[str]) -> tuple: - """Normalize identifier to (database, table) tuple.""" - if len(identifier) == 1: - return ("default", identifier[0]) - elif len(identifier) == 2: - return (identifier[0], identifier[1]) - else: - raise ValueError(f"Invalid identifier: {identifier}") - - def _is_root_namespace(self, identifier: Optional[List[str]]) -> bool: - """Check if the identifier refers to the root namespace.""" - return not identifier or len(identifier) == 0 - - def _get_table_location(self, database: str, table: str) -> str: - """Get the location for a table.""" - return os.path.join(self.root, f"{database}.db", table) - - def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse: - """List all databases in the Hive Metastore.""" - try: - # Only list namespaces if we're at the root level - if not self._is_root_namespace(request.id): - # Non-root namespaces don't have children in Hive2 - return ListNamespacesResponse(namespaces=[]) - - with self.client as client: - databases = client.get_all_databases() - # Return just database names as strings (excluding default) - namespaces = [db for db in databases if db != "default"] - - return ListNamespacesResponse(namespaces=namespaces) - except Exception as e: - logger.error(f"Failed to list namespaces: {e}") - raise - - def describe_namespace(self, request: DescribeNamespaceRequest) -> DescribeNamespaceResponse: - """Describe a database in the Hive Metastore.""" - try: - # Handle root namespace - if self._is_root_namespace(request.id): - properties = { - "location": self.root, - "description": "Root namespace (Hive Metastore)" - } - if self.ugi: - properties["ugi"] = self.ugi - return DescribeNamespaceResponse(properties=properties) - - if len(request.id) != 1: - raise ValueError(f"Invalid namespace identifier: {request.id}") - - database_name = request.id[0] - - with self.client as client: - database = client.get_database(database_name) - - properties = {} - if database.description: - properties["comment"] = database.description - if database.ownerName: - properties["owner"] = database.ownerName - if database.locationUri: - properties["location"] = database.locationUri - if database.parameters: - properties.update(database.parameters) - - return DescribeNamespaceResponse( - properties=properties - ) - except Exception as e: - if NoSuchObjectException and isinstance(e, NoSuchObjectException): - raise ValueError(f"Namespace {request.id} does not exist") - logger.error(f"Failed to describe namespace {request.id}: {e}") - raise - - def create_namespace(self, request: CreateNamespaceRequest) -> CreateNamespaceResponse: - """Create a new database in the Hive Metastore.""" - try: - # Cannot create root namespace - if self._is_root_namespace(request.id): - raise ValueError("Root namespace already exists") - - if len(request.id) != 1: - raise ValueError(f"Invalid namespace identifier: {request.id}") - - database_name = request.id[0] - - # Create database object - if not HiveDatabase: - raise ImportError("Hive dependencies not available") - database = HiveDatabase() - database.name = database_name - database.description = request.properties.get("comment", "") - database.ownerName = request.properties.get("owner", os.getenv("USER", "")) - database.locationUri = request.properties.get( - "location", - os.path.join(self.root, f"{database_name}.db") - ) - database.parameters = { - k: v for k, v in request.properties.items() - if k not in ["comment", "owner", "location"] - } - - with self.client as client: - client.create_database(database) - - return CreateNamespaceResponse() - except Exception as e: - if AlreadyExistsException and isinstance(e, AlreadyExistsException): - raise ValueError(f"Namespace {request.id} already exists") - logger.error(f"Failed to create namespace {request.id}: {e}") - raise - - def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse: - """Drop a database from the Hive Metastore.""" - try: - # Cannot drop root namespace - if self._is_root_namespace(request.id): - raise ValueError("Cannot drop root namespace") - - if len(request.id) != 1: - raise ValueError(f"Invalid namespace identifier: {request.id}") - - database_name = request.id[0] - - with self.client as client: - # Check if database is empty - tables = client.get_all_tables(database_name) - cascade = request.behavior == "CASCADE" if request.behavior else False - if tables and not cascade: - raise ValueError(f"Namespace {request.id} is not empty") - - # Drop database - client.drop_database(database_name, deleteData=True, cascade=cascade) - - return DropNamespaceResponse() - except Exception as e: - if NoSuchObjectException and isinstance(e, NoSuchObjectException): - raise ValueError(f"Namespace {request.id} does not exist") - logger.error(f"Failed to drop namespace {request.id}: {e}") - raise - - def namespace_exists(self, request: NamespaceExistsRequest) -> None: - """Check if a database exists in the Hive Metastore.""" - try: - # Root namespace always exists - if self._is_root_namespace(request.id): - return - - if len(request.id) != 1: - raise ValueError(f"Invalid namespace identifier: {request.id}") - - database_name = request.id[0] - - with self.client as client: - client.get_database(database_name) - except Exception as e: - if NoSuchObjectException and isinstance(e, NoSuchObjectException): - raise ValueError(f"Namespace {request.id} does not exist") - logger.error(f"Failed to check namespace existence {request.id}: {e}") - raise - - def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: - """List tables in a database.""" - try: - # Root namespace has no tables - if self._is_root_namespace(request.id): - return ListTablesResponse(tables=[]) - - if len(request.id) != 1: - raise ValueError(f"Invalid namespace identifier: {request.id}") - - database_name = request.id[0] - - with self.client as client: - table_names = client.get_all_tables(database_name) - - # Filter for Lance tables if needed - tables = [] - for table_name in table_names: - try: - table = client.get_table(database_name, table_name) - # Check if it's a Lance table (case insensitive) - if table.parameters: - table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() - if table_type == LANCE_TABLE_FORMAT: - # Return just table name, not full identifier - tables.append(table_name) - except Exception: - # Skip tables we can't read - continue - - return ListTablesResponse(tables=tables) - except Exception as e: - if NoSuchObjectException and isinstance(e, NoSuchObjectException): - raise ValueError(f"Namespace {request.id} does not exist") - logger.error(f"Failed to list tables in namespace {request.id}: {e}") - raise - - def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: - """Describe a table in the Hive Metastore.""" - try: - database, table_name = self._normalize_identifier(request.id) - - with self.client as client: - table = client.get_table(database, table_name) - - # Check if it's a Lance table (case insensitive) - if not table.parameters: - raise ValueError(f"Table {request.id} is not a Lance table") - table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() - if table_type != LANCE_TABLE_FORMAT: - raise ValueError(f"Table {request.id} is not a Lance table") - - # Get table location - location = table.sd.location if table.sd else None - if not location: - raise ValueError(f"Table {request.id} has no location") - - # Build properties from Hive metadata - properties = {} - if table.parameters: - properties.update(table.parameters) - if table.owner: - properties["owner"] = table.owner - - # Get version from table parameters if available - version = None - if table.parameters and VERSION_KEY in table.parameters: - try: - version = int(table.parameters[VERSION_KEY]) - except (ValueError, TypeError): - pass - - # Note: We don't load the Lance dataset here, just return Hive metadata - # Schema will be None as we're not opening the dataset - return DescribeTableResponse( - var_schema=None, - location=location, - version=version, - properties=properties - ) - except Exception as e: - if NoSuchObjectException and isinstance(e, NoSuchObjectException): - raise ValueError(f"Table {request.id} does not exist") - logger.error(f"Failed to describe table {request.id}: {e}") - raise - - def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse: - """Register an existing Lance table in the Hive Metastore. - - Note: This will open the Lance dataset to get schema and version information. - If you want to avoid opening the dataset, you can provide 'version' in properties. - """ - try: - database, table_name = self._normalize_identifier(request.id) - - # Determine managed_by value - managed_by = request.properties.get(MANAGED_BY_KEY, "storage") if request.properties else "storage" - - # We always need to open the dataset to get schema for Hive columns - dataset = lance.dataset(request.location) - schema = dataset.schema - - # Only track version if managed_by is "impl" - version = None - if managed_by == "impl": - # Get version from properties or dataset - version = request.properties.get(VERSION_KEY) if request.properties else None - if version is None: - version = str(dataset.version) - - # Create Hive table object - if not HiveTable: - raise ImportError("Hive dependencies not available") - hive_table = HiveTable() - hive_table.dbName = database - hive_table.tableName = table_name - hive_table.owner = request.properties.get("owner", os.getenv("USER", "")) if request.properties else os.getenv("USER", "") - # Use current time if file doesn't exist yet - import time - current_time = int(time.time()) - try: - hive_table.createTime = int(os.path.getctime(request.location)) - hive_table.lastAccessTime = int(os.path.getatime(request.location)) - except (OSError, FileNotFoundError): - hive_table.createTime = current_time - hive_table.lastAccessTime = current_time - hive_table.tableType = EXTERNAL_TABLE - - # Set storage descriptor - if not StorageDescriptor: - raise ImportError("Hive dependencies not available") - sd = StorageDescriptor() - sd.location = request.location - sd.inputFormat = "com.lancedb.lance.mapred.LanceInputFormat" - sd.outputFormat = "com.lancedb.lance.mapred.LanceOutputFormat" - sd.compressed = False - sd.cols = self._pyarrow_schema_to_hive_fields(schema) - - # Set SerDe info - if not SerDeInfo: - raise ImportError("Hive dependencies not available") - serde = SerDeInfo() - serde.serializationLib = "com.lancedb.lance.mapred.LanceSerDe" - sd.serdeInfo = serde - - hive_table.sd = sd - - # Set table parameters per hive.md specification - hive_table.parameters = { - TABLE_TYPE_KEY: LANCE_TABLE_FORMAT, - MANAGED_BY_KEY: managed_by, - } - - # Only set version if managed_by is "impl" - if managed_by == "impl" and version is not None: - hive_table.parameters[VERSION_KEY] = version - - if request.properties: - # Add other properties but don't override the required ones - for k, v in request.properties.items(): - if k not in [TABLE_TYPE_KEY, MANAGED_BY_KEY, VERSION_KEY]: - hive_table.parameters[k] = v - - with self.client as client: - client.create_table(hive_table) - - return RegisterTableResponse( - location=request.location, - properties=request.properties - ) - except Exception as e: - if AlreadyExistsException and isinstance(e, AlreadyExistsException): - raise ValueError(f"Table {request.id} already exists") - logger.error(f"Failed to register table {request.id}: {e}") - raise - - def table_exists(self, request: TableExistsRequest) -> None: - """Check if a table exists in the Hive Metastore.""" - try: - database, table_name = self._normalize_identifier(request.id) - - with self.client as client: - table = client.get_table(database, table_name) - - # Check if it's a Lance table (case insensitive) - if not table.parameters: - raise ValueError(f"Table {request.id} is not a Lance table") - table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() - if table_type != LANCE_TABLE_FORMAT: - raise ValueError(f"Table {request.id} is not a Lance table") - except Exception as e: - if NoSuchObjectException and isinstance(e, NoSuchObjectException): - raise ValueError(f"Table {request.id} does not exist") - logger.error(f"Failed to check table existence {request.id}: {e}") - raise - - def drop_table(self, request: DropTableRequest) -> DropTableResponse: - """Drop a table from the Hive Metastore.""" - try: - database, table_name = self._normalize_identifier(request.id) - - with self.client as client: - # Get table to check if it's a Lance table - table = client.get_table(database, table_name) - - # Check if it's a Lance table (case insensitive) - if not table.parameters: - raise ValueError(f"Table {request.id} is not a Lance table") - table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() - if table_type != LANCE_TABLE_FORMAT: - raise ValueError(f"Table {request.id} is not a Lance table") - - # Drop the table (always delete data for Lance tables) - client.drop_table(database, table_name, deleteData=True) - - return DropTableResponse() - except Exception as e: - if NoSuchObjectException and isinstance(e, NoSuchObjectException): - raise ValueError(f"Table {request.id} does not exist") - logger.error(f"Failed to drop table {request.id}: {e}") - raise - - def deregister_table(self, request: DeregisterTableRequest) -> DeregisterTableResponse: - """Deregister a table from the Hive Metastore without deleting data.""" - try: - database, table_name = self._normalize_identifier(request.id) - - with self.client as client: - # Get table to check if it's a Lance table - table = client.get_table(database, table_name) - - # Check if it's a Lance table (case insensitive) - if not table.parameters: - raise ValueError(f"Table {request.id} is not a Lance table") - table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() - if table_type != LANCE_TABLE_FORMAT: - raise ValueError(f"Table {request.id} is not a Lance table") - - location = table.sd.location if table.sd else None - - # Drop the table metadata only (don't delete data) - client.drop_table(database, table_name, deleteData=False) - - return DeregisterTableResponse(location=location) - except Exception as e: - if NoSuchObjectException and isinstance(e, NoSuchObjectException): - raise ValueError(f"Table {request.id} does not exist") - logger.error(f"Failed to deregister table {request.id}: {e}") - raise - - def create_table(self, request: CreateTableRequest, request_data: bytes) -> CreateTableResponse: - """Create a new Lance table and register it in the Hive Metastore.""" - try: - database, table_name = self._normalize_identifier(request.id) - - if not request_data: - raise ValueError("Request data (Arrow IPC stream) is required for create_table") - - # Determine table location - location = request.location - if not location: - location = self._get_table_location(database, table_name) - - # Extract table from Arrow IPC stream - try: - reader = pa.ipc.open_stream(request_data) - table = reader.read_all() - except Exception as e: - raise ValueError(f"Invalid Arrow IPC stream: {e}") - - # Create Lance dataset - if request.mode == "create": - # Check if dataset already exists - if os.path.exists(location): - raise ValueError(f"Table {request.id} already exists at {location}") - dataset = lance.write_dataset(table, location) - elif request.mode == "create_or_replace": - dataset = lance.write_dataset(table, location, mode="overwrite") - else: - raise ValueError(f"Unsupported create mode: {request.mode}") - - # Register in Hive Metastore - register_request = RegisterTableRequest( - id=request.id, - location=location, - properties=request.properties - ) - self.register_table(register_request) - - return CreateTableResponse( - id=request.id, - location=location, - version=dataset.version - ) - except Exception as e: - logger.error(f"Failed to create table {request.id}: {e}") - raise - - def create_empty_table(self, request: CreateEmptyTableRequest) -> CreateEmptyTableResponse: - """Create an empty table (metadata only) in Hive metastore.""" - try: - database, table_name = self._normalize_identifier(request.id) - - # Determine table location - location = request.location - if not location: - location = self._get_table_location(database, table_name) - - # Create a minimal schema for Hive (placeholder schema) - if not FieldSchema: - raise ImportError("Hive dependencies not available") - - fields = [ - FieldSchema( - name='__placeholder_id', - type='bigint', - comment='Placeholder column for empty table' - ) - ] - - # Create Hive table metadata without creating actual Lance dataset - storage_descriptor = StorageDescriptor( - cols=fields, - location=location, - inputFormat='org.apache.hadoop.mapred.TextInputFormat', - outputFormat='org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', - serdeInfo=SerDeInfo( - serializationLib='org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' - ) - ) - - # Set table parameters to identify it as Lance table - parameters = { - TABLE_TYPE_KEY: "LANCE", - MANAGED_BY_KEY: "storage", - 'empty_table': 'true', # Mark as empty table - } - - if request.properties: - parameters.update(request.properties) - - hive_table = HiveTable( - tableName=table_name, - dbName=database, - sd=storage_descriptor, - parameters=parameters, - tableType='EXTERNAL_TABLE' - ) - - # Create table in Hive - with self.client_pool.get_client() as client: - client.create_table(hive_table) - - return CreateEmptyTableResponse(location=location) - - except AlreadyExistsException: - raise ValueError(f"Table {request.id} already exists") - except Exception as e: - logger.error(f"Failed to create empty table {request.id}: {e}") - raise - - def _pyarrow_schema_to_hive_fields(self, schema: pa.Schema) -> List[FieldSchema]: - """Convert PyArrow schema to Hive field schemas.""" - fields = [] - for field in schema: - hive_type = self._pyarrow_type_to_hive_type(field.type) - if not FieldSchema: - raise ImportError("Hive dependencies not available") - hive_field = FieldSchema( - name=field.name, - type=hive_type, - comment="" - ) - fields.append(hive_field) - return fields - - def _pyarrow_type_to_hive_type(self, dtype: pa.DataType) -> str: - """Convert PyArrow data type to Hive type string.""" - if pa.types.is_boolean(dtype): - return "boolean" - elif pa.types.is_int8(dtype): - return "tinyint" - elif pa.types.is_int16(dtype): - return "smallint" - elif pa.types.is_int32(dtype): - return "int" - elif pa.types.is_int64(dtype): - return "bigint" - elif pa.types.is_float32(dtype): - return "float" - elif pa.types.is_float64(dtype): - return "double" - elif pa.types.is_string(dtype): - return "string" - elif pa.types.is_binary(dtype): - return "binary" - elif pa.types.is_timestamp(dtype): - return "timestamp" - elif pa.types.is_date32(dtype) or pa.types.is_date64(dtype): - return "date" - elif pa.types.is_list(dtype): - inner_type = self._pyarrow_type_to_hive_type(dtype.value_type) - return f"array<{inner_type}>" - elif pa.types.is_struct(dtype): - field_strs = [] - for i in range(dtype.num_fields): - field = dtype.field(i) - field_type = self._pyarrow_type_to_hive_type(field.type) - field_strs.append(f"{field.name}:{field_type}") - return f"struct<{','.join(field_strs)}>" - else: - return "string" # Default to string for unknown types - - def __getstate__(self): - """Prepare instance for pickling by excluding unpickleable objects.""" - state = self.__dict__.copy() - # Remove the unpickleable Hive client - state['_client'] = None - return state - - def __setstate__(self, state): - """Restore instance from pickled state.""" - self.__dict__.update(state) - # The Hive client will be re-initialized lazily via the property \ No newline at end of file diff --git a/python/src/lance_namespace_impls/hive2.py b/python/src/lance_namespace_impls/hive2.py new file mode 100644 index 0000000..68bd488 --- /dev/null +++ b/python/src/lance_namespace_impls/hive2.py @@ -0,0 +1,519 @@ +""" +Lance Hive2 Namespace implementation using Hive Metastore. + +This module provides integration with Apache Hive Metastore for managing Lance tables. +Lance tables are registered as external tables in Hive with specific metadata properties +to identify them as Lance format. + +Installation: + pip install 'lance-namespace[hive2]' + +Usage: + from lance_namespace import connect + + # Connect to Hive Metastore + namespace = connect("hive2", { + "uri": "thrift://localhost:9083", + "root": "/my/dir", # Or "s3://bucket/prefix" + "ugi": "user:group1,group2" # Optional user/group info + }) + + # List databases + from lance_namespace import ListNamespacesRequest + response = namespace.list_namespaces(ListNamespacesRequest()) + +Configuration Properties: + uri (str): Hive Metastore Thrift URI (e.g., "thrift://localhost:9083") + root (str): Storage root location of the lakehouse on Hive catalog (default: current working directory) + ugi (str): Optional User Group Information for authentication (format: "user:group1,group2") + client.pool-size (int): Size of the HMS client connection pool (default: 3) + storage.* (str): Additional storage configurations to access table +""" + +from typing import List, Optional +from urllib.parse import urlparse +import os +import logging + +try: + from hive_metastore_client import HiveMetastoreClient as Client + from thrift_files.libraries.thrift_hive_metastore_client.ttypes import ( + Database as HiveDatabase, + Table as HiveTable, + StorageDescriptor, + SerDeInfo, + FieldSchema, + NoSuchObjectException, + AlreadyExistsException, + InvalidOperationException, + MetaException, + ) + + HIVE_AVAILABLE = True +except ImportError: + HIVE_AVAILABLE = False + Client = None + HiveDatabase = None + HiveTable = None + StorageDescriptor = None + SerDeInfo = None + FieldSchema = None + NoSuchObjectException = None + AlreadyExistsException = None + InvalidOperationException = None + MetaException = None + +from lance.namespace import LanceNamespace +from lance_namespace_urllib3_client.models import ( + ListNamespacesRequest, + ListNamespacesResponse, + DescribeNamespaceRequest, + DescribeNamespaceResponse, + CreateNamespaceRequest, + CreateNamespaceResponse, + DropNamespaceRequest, + DropNamespaceResponse, + ListTablesRequest, + ListTablesResponse, + CreateEmptyTableRequest, + CreateEmptyTableResponse, + DescribeTableRequest, + DescribeTableResponse, + DeregisterTableRequest, + DeregisterTableResponse, +) + +from lance_namespace_impls.rest_client import InvalidInputException + +logger = logging.getLogger(__name__) + +# Table properties used by Lance (per hive.md specification) +TABLE_TYPE_KEY = "table_type" # Case insensitive +LANCE_TABLE_FORMAT = "lance" # Case insensitive +MANAGED_BY_KEY = "managed_by" # Case insensitive, values: "storage" or "impl" +VERSION_KEY = "version" # Numeric version number +EXTERNAL_TABLE = "EXTERNAL_TABLE" + + +class HiveMetastoreClientWrapper: + """Helper class to manage Hive Metastore client connections.""" + + def __init__(self, uri: str, ugi: Optional[str] = None): + if not HIVE_AVAILABLE: + raise ImportError( + "Hive dependencies not installed. Please install with: " + "pip install 'lance-namespace[hive2]'" + ) + + self._uri = uri + self._ugi = ugi.split(":") if ugi else None + url_parts = urlparse(self._uri) + self._host = url_parts.hostname or "localhost" + self._port = url_parts.port or 9083 + self._client = None + + def __enter__(self): + """Enter context manager.""" + self._client = Client(host=self._host, port=self._port) + self._client.open() + if self._ugi: + self._client.set_ugi(*self._ugi) + return self._client + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit context manager.""" + if self._client: + self._client.close() + self._client = None + + def close(self): + """Close the client connection.""" + if self._client: + self._client.close() + self._client = None + + +class Hive2Namespace(LanceNamespace): + """Lance Hive2 Namespace implementation using Hive Metastore.""" + + def __init__(self, **properties): + """Initialize the Hive2 namespace. + + Args: + uri: The Hive Metastore URI (e.g., "thrift://localhost:9083") + root: Storage root location of the lakehouse on Hive catalog (optional) + ugi: User Group Information for authentication (optional, format: "user:group1,group2") + client.pool-size: Size of the HMS client connection pool (optional, default: 3) + storage.*: Additional storage configurations to access table + **properties: Additional configuration properties + """ + if not HIVE_AVAILABLE: + raise ImportError( + "Hive dependencies not installed. Please install with: " + "pip install 'lance-namespace[hive2]'" + ) + + self.uri = properties.get("uri", "thrift://localhost:9083") + self.ugi = properties.get("ugi") + self.root = properties.get("root", os.getcwd()) + self.pool_size = int(properties.get("client.pool-size", "3")) + # Extract storage properties + self.storage_properties = { + k[8:]: v for k, v in properties.items() if k.startswith("storage.") + } + + # Store properties for pickling support + self._properties = properties.copy() + + # Lazy initialization to support pickling + self._client = None + + def namespace_id(self) -> str: + """Return a human-readable unique identifier for this namespace instance.""" + return f"Hive2Namespace {{ uri: {self.uri!r} }}" + + @property + def client(self): + """Get the Hive client, initializing it if necessary.""" + if self._client is None: + self._client = HiveMetastoreClientWrapper(self.uri, self.ugi) + return self._client + + def _normalize_identifier(self, identifier: List[str]) -> tuple: + """Normalize identifier to (database, table) tuple.""" + if len(identifier) == 1: + return ("default", identifier[0]) + elif len(identifier) == 2: + return (identifier[0], identifier[1]) + else: + raise ValueError(f"Invalid identifier: {identifier}") + + def _is_root_namespace(self, identifier: Optional[List[str]]) -> bool: + """Check if the identifier refers to the root namespace.""" + return not identifier or len(identifier) == 0 + + def _get_table_location(self, database: str, table: str) -> str: + """Get the location for a table.""" + return os.path.join(self.root, f"{database}.db", table) + + def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse: + """List all databases in the Hive Metastore.""" + try: + # Only list namespaces if we're at the root level + if not self._is_root_namespace(request.id): + # Non-root namespaces don't have children in Hive2 + return ListNamespacesResponse(namespaces=[]) + + with self.client as client: + databases = client.get_all_databases() + # Return just database names as strings (excluding default) + namespaces = [db for db in databases if db != "default"] + + return ListNamespacesResponse(namespaces=namespaces) + except Exception as e: + logger.error(f"Failed to list namespaces: {e}") + raise + + def describe_namespace( + self, request: DescribeNamespaceRequest + ) -> DescribeNamespaceResponse: + """Describe a database in the Hive Metastore.""" + try: + # Handle root namespace + if self._is_root_namespace(request.id): + properties = { + "location": self.root, + "description": "Root namespace (Hive Metastore)", + } + if self.ugi: + properties["ugi"] = self.ugi + return DescribeNamespaceResponse(properties=properties) + + if len(request.id) != 1: + raise ValueError(f"Invalid namespace identifier: {request.id}") + + database_name = request.id[0] + + with self.client as client: + database = client.get_database(database_name) + + properties = {} + if database.description: + properties["comment"] = database.description + if database.ownerName: + properties["owner"] = database.ownerName + if database.locationUri: + properties["location"] = database.locationUri + if database.parameters: + properties.update(database.parameters) + + return DescribeNamespaceResponse(properties=properties) + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Namespace {request.id} does not exist") + logger.error(f"Failed to describe namespace {request.id}: {e}") + raise + + def create_namespace( + self, request: CreateNamespaceRequest + ) -> CreateNamespaceResponse: + """Create a new database in the Hive Metastore.""" + try: + # Cannot create root namespace + if self._is_root_namespace(request.id): + raise ValueError("Root namespace already exists") + + if len(request.id) != 1: + raise ValueError(f"Invalid namespace identifier: {request.id}") + + database_name = request.id[0] + + # Create database object + if not HiveDatabase: + raise ImportError("Hive dependencies not available") + + props = request.properties or {} + database = HiveDatabase() + database.name = database_name + database.description = props.get("comment", "") + database.ownerName = props.get("owner", os.getenv("USER", "")) + database.locationUri = props.get( + "location", os.path.join(self.root, f"{database_name}.db") + ) + database.parameters = { + k: v + for k, v in props.items() + if k not in ["comment", "owner", "location"] + } + + with self.client as client: + client.create_database(database) + + return CreateNamespaceResponse() + except Exception as e: + if AlreadyExistsException and isinstance(e, AlreadyExistsException): + raise ValueError(f"Namespace {request.id} already exists") + logger.error(f"Failed to create namespace {request.id}: {e}") + raise + + def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse: + """Drop a database from the Hive Metastore. Only RESTRICT mode is supported.""" + if request.behavior and request.behavior.lower() == "cascade": + raise InvalidInputException( + "Cascade behavior is not supported for this implementation" + ) + + try: + # Cannot drop root namespace + if self._is_root_namespace(request.id): + raise ValueError("Cannot drop root namespace") + + if len(request.id) != 1: + raise ValueError(f"Invalid namespace identifier: {request.id}") + + database_name = request.id[0] + + with self.client as client: + # Check if database is empty (RESTRICT mode only) + tables = client.get_all_tables(database_name) + if tables: + raise ValueError(f"Namespace {request.id} is not empty") + + # Drop database + client.drop_database(database_name, deleteData=True, cascade=False) + + return DropNamespaceResponse() + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Namespace {request.id} does not exist") + logger.error(f"Failed to drop namespace {request.id}: {e}") + raise + + def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: + """List tables in a database.""" + try: + # Root namespace has no tables + if self._is_root_namespace(request.id): + return ListTablesResponse(tables=[]) + + if len(request.id) != 1: + raise ValueError(f"Invalid namespace identifier: {request.id}") + + database_name = request.id[0] + + with self.client as client: + table_names = client.get_all_tables(database_name) + + # Filter for Lance tables if needed + tables = [] + for table_name in table_names: + try: + table = client.get_table(database_name, table_name) + # Check if it's a Lance table (case insensitive) + if table.parameters: + table_type = table.parameters.get( + TABLE_TYPE_KEY, "" + ).lower() + if table_type == LANCE_TABLE_FORMAT: + # Return just table name, not full identifier + tables.append(table_name) + except Exception: + # Skip tables we can't read + continue + + return ListTablesResponse(tables=tables) + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Namespace {request.id} does not exist") + logger.error(f"Failed to list tables in namespace {request.id}: {e}") + raise + + def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: + """Describe a table in the Hive Metastore. + + Only load_detailed_metadata=false is supported. Returns location and storage_options only. + """ + if request.load_detailed_metadata: + raise ValueError( + "load_detailed_metadata=true is not supported for this implementation" + ) + + try: + database, table_name = self._normalize_identifier(request.id) + + with self.client as client: + table = client.get_table(database, table_name) + + # Check if it's a Lance table (case insensitive) + if not table.parameters: + raise ValueError(f"Table {request.id} is not a Lance table") + table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() + if table_type != LANCE_TABLE_FORMAT: + raise ValueError(f"Table {request.id} is not a Lance table") + + # Get table location + location = table.sd.location if table.sd else None + if not location: + raise ValueError(f"Table {request.id} has no location") + + return DescribeTableResponse( + location=location, storage_options=self.storage_properties + ) + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Table {request.id} does not exist") + logger.error(f"Failed to describe table {request.id}: {e}") + raise + + def deregister_table( + self, request: DeregisterTableRequest + ) -> DeregisterTableResponse: + """Deregister a table from the Hive Metastore without deleting data.""" + try: + database, table_name = self._normalize_identifier(request.id) + + with self.client as client: + # Get table to check if it's a Lance table + table = client.get_table(database, table_name) + + # Check if it's a Lance table (case insensitive) + if not table.parameters: + raise ValueError(f"Table {request.id} is not a Lance table") + table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() + if table_type != LANCE_TABLE_FORMAT: + raise ValueError(f"Table {request.id} is not a Lance table") + + location = table.sd.location if table.sd else None + + # Drop the table metadata only (don't delete data) + client.drop_table(database, table_name, deleteData=False) + + return DeregisterTableResponse(location=location) + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Table {request.id} does not exist") + logger.error(f"Failed to deregister table {request.id}: {e}") + raise + + def create_empty_table( + self, request: CreateEmptyTableRequest + ) -> CreateEmptyTableResponse: + """Create an empty table (metadata only) in Hive metastore.""" + try: + database, table_name = self._normalize_identifier(request.id) + + # Determine table location + location = request.location + if not location: + location = self._get_table_location(database, table_name) + + # Create a minimal schema for Hive (placeholder schema) + if not FieldSchema: + raise ImportError("Hive dependencies not available") + + fields = [ + FieldSchema( + name="__placeholder_id", + type="bigint", + comment="Placeholder column for empty table", + ) + ] + + # Create Hive table metadata without creating actual Lance dataset + storage_descriptor = StorageDescriptor( + cols=fields, + location=location, + inputFormat="org.apache.hadoop.mapred.TextInputFormat", + outputFormat="org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + serdeInfo=SerDeInfo( + serializationLib="org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" + ), + ) + + # Set table parameters to identify it as Lance table + parameters = { + TABLE_TYPE_KEY: "LANCE", + MANAGED_BY_KEY: "storage", + "empty_table": "true", # Mark as empty table + } + + if hasattr(request, "properties") and request.properties: + parameters.update(request.properties) + + hive_table = HiveTable( + tableName=table_name, + dbName=database, + sd=storage_descriptor, + parameters=parameters, + tableType="EXTERNAL_TABLE", + ) + + # Create table in Hive + with self.client as client: + client.create_table(hive_table) + + return CreateEmptyTableResponse(location=location) + + except AlreadyExistsException: + raise ValueError(f"Table {request.id} already exists") + except Exception as e: + logger.error(f"Failed to create empty table {request.id}: {e}") + raise + + def __getstate__(self): + """Prepare instance for pickling by excluding unpickleable objects.""" + state = self.__dict__.copy() + # Remove the unpickleable Hive client + state["_client"] = None + return state + + def __setstate__(self, state): + """Restore instance from pickled state.""" + self.__dict__.update(state) + # The Hive client will be re-initialized lazily via the property + + def close(self): + """Close the Hive Metastore client connection.""" + if self._client is not None: + self._client.close() + self._client = None diff --git a/python/src/lance_namespace_impls/hive3.py b/python/src/lance_namespace_impls/hive3.py new file mode 100644 index 0000000..1b44f7f --- /dev/null +++ b/python/src/lance_namespace_impls/hive3.py @@ -0,0 +1,584 @@ +""" +Lance Hive3 Namespace implementation using Hive 3.x Metastore. + +This module provides integration with Apache Hive 3.x Metastore for managing Lance tables. +Hive3 supports a 3-level namespace hierarchy: catalog > database > table. + +Installation: + pip install 'lance-namespace[hive3]' + +Usage: + from lance_namespace import connect + + # Connect to Hive 3.x Metastore + namespace = connect("hive3", { + "uri": "thrift://localhost:9083", + "root": "/my/dir", # Or "s3://bucket/prefix" + "ugi": "user:group1,group2" # Optional user/group info + }) + + # List catalogs (root level) + from lance_namespace import ListNamespacesRequest + response = namespace.list_namespaces(ListNamespacesRequest()) + + # List databases in a catalog + response = namespace.list_namespaces(ListNamespacesRequest(id=["my_catalog"])) + +Configuration Properties: + uri (str): Hive Metastore Thrift URI (e.g., "thrift://localhost:9083") + root (str): Storage root location of the lakehouse (default: current working directory) + ugi (str): Optional User Group Information for authentication (format: "user:group1,group2") + client.pool-size (int): Size of the HMS client connection pool (default: 3) + storage.* (str): Additional storage configurations +""" + +from typing import List, Optional +from urllib.parse import urlparse +import os +import logging + +try: + from hive_metastore_client import HiveMetastoreClient as Client + from thrift_files.libraries.thrift_hive_metastore_client.ttypes import ( + Database as HiveDatabase, + Table as HiveTable, + StorageDescriptor, + SerDeInfo, + FieldSchema, + NoSuchObjectException, + AlreadyExistsException, + InvalidOperationException, + MetaException, + ) + + HIVE_AVAILABLE = True +except ImportError: + HIVE_AVAILABLE = False + Client = None + HiveDatabase = None + HiveTable = None + StorageDescriptor = None + SerDeInfo = None + FieldSchema = None + NoSuchObjectException = None + AlreadyExistsException = None + InvalidOperationException = None + MetaException = None + +from lance.namespace import LanceNamespace +from lance_namespace_urllib3_client.models import ( + ListNamespacesRequest, + ListNamespacesResponse, + DescribeNamespaceRequest, + DescribeNamespaceResponse, + CreateNamespaceRequest, + CreateNamespaceResponse, + DropNamespaceRequest, + DropNamespaceResponse, + ListTablesRequest, + ListTablesResponse, + CreateEmptyTableRequest, + CreateEmptyTableResponse, + DescribeTableRequest, + DescribeTableResponse, + DeregisterTableRequest, + DeregisterTableResponse, +) + +from lance_namespace_impls.rest_client import InvalidInputException + +logger = logging.getLogger(__name__) + +TABLE_TYPE_KEY = "table_type" +LANCE_TABLE_FORMAT = "lance" +MANAGED_BY_KEY = "managed_by" +VERSION_KEY = "version" +EXTERNAL_TABLE = "EXTERNAL_TABLE" +DEFAULT_CATALOG = "hive" + + +class Hive3MetastoreClientWrapper: + """Helper class to manage Hive 3.x Metastore client connections.""" + + def __init__(self, uri: str, ugi: Optional[str] = None): + if not HIVE_AVAILABLE: + raise ImportError( + "Hive dependencies not installed. Please install with: " + "pip install 'lance-namespace[hive3]'" + ) + + self._uri = uri + self._ugi = ugi.split(":") if ugi else None + url_parts = urlparse(self._uri) + self._host = url_parts.hostname or "localhost" + self._port = url_parts.port or 9083 + self._client = None + + def __enter__(self): + """Enter context manager.""" + self._client = Client(host=self._host, port=self._port) + self._client.open() + if self._ugi: + self._client.set_ugi(*self._ugi) + return self._client + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit context manager.""" + if self._client: + self._client.close() + self._client = None + + def close(self): + """Close the client connection.""" + if self._client: + self._client.close() + self._client = None + + +class Hive3Namespace(LanceNamespace): + """Lance Hive3 Namespace implementation using Hive 3.x Metastore. + + Supports 3-level namespace hierarchy: catalog > database > table. + """ + + def __init__(self, **properties): + """Initialize the Hive3 namespace. + + Args: + uri: The Hive Metastore URI (e.g., "thrift://localhost:9083") + root: Storage root location (optional) + ugi: User Group Information for authentication (optional) + client.pool-size: Size of the HMS client connection pool (optional, default: 3) + storage.*: Additional storage configurations + **properties: Additional configuration properties + """ + if not HIVE_AVAILABLE: + raise ImportError( + "Hive dependencies not installed. Please install with: " + "pip install 'lance-namespace[hive3]'" + ) + + self.uri = properties.get("uri", "thrift://localhost:9083") + self.ugi = properties.get("ugi") + self.root = properties.get("root", os.getcwd()) + self.pool_size = int(properties.get("client.pool-size", "3")) + self.storage_properties = { + k[8:]: v for k, v in properties.items() if k.startswith("storage.") + } + + self._properties = properties.copy() + self._client = None + + def namespace_id(self) -> str: + """Return a human-readable unique identifier for this namespace instance.""" + return f"Hive3Namespace {{ uri: {self.uri!r} }}" + + @property + def client(self): + """Get the Hive client, initializing it if necessary.""" + if self._client is None: + self._client = Hive3MetastoreClientWrapper(self.uri, self.ugi) + return self._client + + def _normalize_identifier(self, identifier: List[str]) -> tuple: + """Normalize identifier to (catalog, database, table) tuple.""" + if len(identifier) == 1: + return (DEFAULT_CATALOG, "default", identifier[0]) + elif len(identifier) == 2: + return (DEFAULT_CATALOG, identifier[0], identifier[1]) + elif len(identifier) == 3: + return (identifier[0], identifier[1], identifier[2]) + else: + raise ValueError(f"Invalid identifier: {identifier}") + + def _is_root_namespace(self, identifier: Optional[List[str]]) -> bool: + """Check if the identifier refers to the root namespace.""" + return not identifier or len(identifier) == 0 + + def _get_table_location(self, catalog: str, database: str, table: str) -> str: + """Get the location for a table.""" + return os.path.join(self.root, database, f"{table}.lance") + + def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse: + """List namespaces at the given level. + + - Root level: lists catalogs + - Catalog level: lists databases in that catalog + """ + try: + ns_id = request.id if request.id else [] + + if self._is_root_namespace(ns_id): + # List catalogs + with self.client as client: + # Try to get catalogs if supported (Hive 3.x) + try: + catalogs = ( + client.get_catalogs().names + if hasattr(client, "get_catalogs") + else [] + ) + except Exception: + # Fall back to default catalog + catalogs = [DEFAULT_CATALOG] + return ListNamespacesResponse(namespaces=catalogs) + + elif len(ns_id) == 1: + # List databases in catalog + # Note: Hive 2.x Metastore API doesn't support catalog operations, + # so we ignore the catalog name and list all databases + _catalog = ns_id[0].lower() # noqa: F841 + with self.client as client: + try: + databases = client.get_all_databases() + except Exception: + databases = [] + # Exclude 'default' database from list + namespaces = [db for db in databases if db != "default"] + return ListNamespacesResponse(namespaces=namespaces) + + else: + # 2+ level namespaces don't have children + return ListNamespacesResponse(namespaces=[]) + + except Exception as e: + logger.error(f"Failed to list namespaces: {e}") + raise + + def describe_namespace( + self, request: DescribeNamespaceRequest + ) -> DescribeNamespaceResponse: + """Describe a namespace (catalog or database).""" + try: + if self._is_root_namespace(request.id): + properties = { + "location": self.root, + "description": "Root namespace (Hive 3.x Metastore)", + } + if self.ugi: + properties["ugi"] = self.ugi + return DescribeNamespaceResponse(properties=properties) + + if len(request.id) == 1: + # Describe catalog + catalog_name = request.id[0].lower() + properties = { + "description": f"Catalog: {catalog_name}", + "catalog.location.uri": os.path.join(self.root, catalog_name), + } + return DescribeNamespaceResponse(properties=properties) + + elif len(request.id) == 2: + # Describe database + catalog_name = request.id[0].lower() + database_name = request.id[1].lower() + + with self.client as client: + database = client.get_database(database_name) + + properties = {} + if database.description: + properties["comment"] = database.description + if database.ownerName: + properties["owner"] = database.ownerName + if database.locationUri: + properties["location"] = database.locationUri + if database.parameters: + properties.update(database.parameters) + + return DescribeNamespaceResponse(properties=properties) + else: + raise ValueError(f"Invalid namespace identifier: {request.id}") + + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Namespace {request.id} does not exist") + logger.error(f"Failed to describe namespace {request.id}: {e}") + raise + + def create_namespace( + self, request: CreateNamespaceRequest + ) -> CreateNamespaceResponse: + """Create a new namespace (catalog or database).""" + try: + if self._is_root_namespace(request.id): + raise ValueError("Root namespace already exists") + + mode = request.mode.lower() if request.mode else "create" + + if len(request.id) == 1: + # Create catalog (Hive 3.x) + # Note: Python Hive client may not support catalog creation + catalog_name = request.id[0].lower() + logger.warning(f"Catalog creation may not be supported: {catalog_name}") + return CreateNamespaceResponse() + + elif len(request.id) == 2: + # Create database + catalog_name = request.id[0].lower() + database_name = request.id[1].lower() + + if not HiveDatabase: + raise ImportError("Hive dependencies not available") + + database = HiveDatabase() + database.name = database_name + database.description = ( + request.properties.get("comment", "") if request.properties else "" + ) + database.ownerName = ( + request.properties.get("owner", os.getenv("USER", "")) + if request.properties + else os.getenv("USER", "") + ) + database.locationUri = ( + request.properties.get( + "location", os.path.join(self.root, database_name) + ) + if request.properties + else os.path.join(self.root, database_name) + ) + + if request.properties: + database.parameters = { + k: v + for k, v in request.properties.items() + if k not in ["comment", "owner", "location"] + } + + with self.client as client: + try: + client.create_database(database) + except AlreadyExistsException: + if mode == "create": + raise ValueError(f"Namespace {request.id} already exists") + elif mode in ("exist_ok", "existok"): + pass # OK to exist + elif mode == "overwrite": + client.drop_database( + database_name, deleteData=True, cascade=True + ) + client.create_database(database) + + return CreateNamespaceResponse() + else: + raise ValueError(f"Invalid namespace identifier: {request.id}") + + except Exception as e: + if AlreadyExistsException and isinstance(e, AlreadyExistsException): + raise ValueError(f"Namespace {request.id} already exists") + logger.error(f"Failed to create namespace {request.id}: {e}") + raise + + def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse: + """Drop a namespace (catalog or database). Only RESTRICT mode is supported.""" + if request.behavior and request.behavior.lower() == "cascade": + raise InvalidInputException( + "Cascade behavior is not supported for this implementation" + ) + + try: + if self._is_root_namespace(request.id): + raise ValueError("Cannot drop root namespace") + + if len(request.id) == 1: + # Drop catalog (Hive 3.x) + catalog_name = request.id[0].lower() + logger.warning(f"Catalog drop may not be supported: {catalog_name}") + return DropNamespaceResponse() + + elif len(request.id) == 2: + # Drop database + database_name = request.id[1].lower() + + with self.client as client: + # Check if database is empty (RESTRICT mode only) + tables = client.get_all_tables(database_name) + if tables: + raise ValueError(f"Namespace {request.id} is not empty") + + client.drop_database(database_name, deleteData=True, cascade=False) + + return DropNamespaceResponse() + else: + raise ValueError(f"Invalid namespace identifier: {request.id}") + + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Namespace {request.id} does not exist") + logger.error(f"Failed to drop namespace {request.id}: {e}") + raise + + def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: + """List tables in a database.""" + try: + if self._is_root_namespace(request.id) or len(request.id) < 2: + return ListTablesResponse(tables=[]) + + # Note: Hive 2.x Metastore API doesn't support catalog operations, + # so we ignore the catalog name + _catalog_name = request.id[0].lower() # noqa: F841 + database_name = request.id[1].lower() + + with self.client as client: + table_names = client.get_all_tables(database_name) + + # Filter for Lance tables + tables = [] + for table_name in table_names: + try: + table = client.get_table(database_name, table_name) + if table.parameters: + table_type = table.parameters.get( + TABLE_TYPE_KEY, "" + ).lower() + if table_type == LANCE_TABLE_FORMAT: + tables.append(table_name) + except Exception: + continue + + return ListTablesResponse(tables=tables) + + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Namespace {request.id} does not exist") + logger.error(f"Failed to list tables in namespace {request.id}: {e}") + raise + + def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: + """Describe a table. + + Only load_detailed_metadata=false is supported. Returns location and storage_options only. + """ + if request.load_detailed_metadata: + raise ValueError( + "load_detailed_metadata=true is not supported for this implementation" + ) + + try: + catalog, database, table_name = self._normalize_identifier(request.id) + + with self.client as client: + table = client.get_table(database, table_name) + + if not table.parameters: + raise ValueError(f"Table {request.id} is not a Lance table") + table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() + if table_type != LANCE_TABLE_FORMAT: + raise ValueError(f"Table {request.id} is not a Lance table") + + location = table.sd.location if table.sd else None + if not location: + raise ValueError(f"Table {request.id} has no location") + + return DescribeTableResponse( + location=location, storage_options=self.storage_properties + ) + + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Table {request.id} does not exist") + logger.error(f"Failed to describe table {request.id}: {e}") + raise + + def deregister_table( + self, request: DeregisterTableRequest + ) -> DeregisterTableResponse: + """Deregister a table without deleting data.""" + try: + catalog, database, table_name = self._normalize_identifier(request.id) + + with self.client as client: + table = client.get_table(database, table_name) + + if not table.parameters: + raise ValueError(f"Table {request.id} is not a Lance table") + table_type = table.parameters.get(TABLE_TYPE_KEY, "").lower() + if table_type != LANCE_TABLE_FORMAT: + raise ValueError(f"Table {request.id} is not a Lance table") + + location = table.sd.location if table.sd else None + + client.drop_table(database, table_name, deleteData=False) + + return DeregisterTableResponse(location=location) + + except Exception as e: + if NoSuchObjectException and isinstance(e, NoSuchObjectException): + raise ValueError(f"Table {request.id} does not exist") + logger.error(f"Failed to deregister table {request.id}: {e}") + raise + + def create_empty_table( + self, request: CreateEmptyTableRequest + ) -> CreateEmptyTableResponse: + """Create an empty table (metadata only).""" + try: + catalog, database, table_name = self._normalize_identifier(request.id) + + location = request.location + if not location: + location = self._get_table_location(catalog, database, table_name) + + if not FieldSchema: + raise ImportError("Hive dependencies not available") + + fields = [ + FieldSchema( + name="__placeholder_id", type="bigint", comment="Placeholder column" + ) + ] + + storage_descriptor = StorageDescriptor( + cols=fields, + location=location, + inputFormat="org.apache.hadoop.mapred.TextInputFormat", + outputFormat="org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + serdeInfo=SerDeInfo( + serializationLib="org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" + ), + ) + + parameters = { + TABLE_TYPE_KEY: LANCE_TABLE_FORMAT, + MANAGED_BY_KEY: "storage", + "empty_table": "true", + } + + if hasattr(request, "properties") and request.properties: + parameters.update(request.properties) + + hive_table = HiveTable( + tableName=table_name, + dbName=database, + sd=storage_descriptor, + parameters=parameters, + tableType="EXTERNAL_TABLE", + ) + + with self.client as client: + client.create_table(hive_table) + + return CreateEmptyTableResponse(location=location) + + except AlreadyExistsException: + raise ValueError(f"Table {request.id} already exists") + except Exception as e: + logger.error(f"Failed to create empty table {request.id}: {e}") + raise + + def __getstate__(self): + """Prepare instance for pickling.""" + state = self.__dict__.copy() + state["_client"] = None + return state + + def __setstate__(self, state): + """Restore instance from pickled state.""" + self.__dict__.update(state) + + def close(self): + """Close the Hive Metastore client connection.""" + if self._client is not None: + self._client.close() + self._client = None diff --git a/python/src/lance_namespace_impls/iceberg.py b/python/src/lance_namespace_impls/iceberg.py index 69d389c..c655eee 100644 --- a/python/src/lance_namespace_impls/iceberg.py +++ b/python/src/lance_namespace_impls/iceberg.py @@ -1,193 +1,95 @@ """ Iceberg REST Catalog namespace implementation for Lance. + +The prefix (typically a warehouse or catalog name) is treated as part of the +namespace identifier, similar to how Polaris handles catalog names. +For example: [warehouse_name, namespace1, namespace2, ..., table_name] """ -import json import logging -import os -from typing import Dict, List, Optional, Any -from dataclasses import dataclass, field -import urllib3 import urllib.parse +import warnings +from dataclasses import dataclass +from typing import Any, Dict, List, Optional +from lance.namespace import LanceNamespace from lance_namespace_urllib3_client.models import ( - ListNamespacesRequest, - ListNamespacesResponse, - DescribeNamespaceRequest, - DescribeNamespaceResponse, + CreateEmptyTableRequest, + CreateEmptyTableResponse, CreateNamespaceRequest, CreateNamespaceResponse, + DeclareTableRequest, + DeclareTableResponse, + DeregisterTableRequest, + DeregisterTableResponse, + DescribeNamespaceRequest, + DescribeNamespaceResponse, + DescribeTableRequest, + DescribeTableResponse, DropNamespaceRequest, DropNamespaceResponse, - NamespaceExistsRequest, + ListNamespacesRequest, + ListNamespacesResponse, ListTablesRequest, ListTablesResponse, - DescribeTableRequest, - DescribeTableResponse, - TableExistsRequest, - DropTableRequest, - DropTableResponse, - CreateEmptyTableRequest, - CreateEmptyTableResponse, ) -from lance.namespace import LanceNamespace - +from lance_namespace_impls.rest_client import ( + RestClient, + RestClientException, + InternalException, + InvalidInputException, + NamespaceAlreadyExistsException, + NamespaceNotFoundException, + TableAlreadyExistsException, + TableNotFoundException, +) logger = logging.getLogger(__name__) -NAMESPACE_SEPARATOR = '\x1F' +NAMESPACE_SEPARATOR = "\x1f" @dataclass class IcebergNamespaceConfig: """Configuration for Iceberg REST Catalog namespace.""" - ENDPOINT = "iceberg.endpoint" - WAREHOUSE = "iceberg.warehouse" - PREFIX = "iceberg.prefix" - AUTH_TOKEN = "iceberg.auth_token" - CREDENTIAL = "iceberg.credential" - CONNECT_TIMEOUT = "iceberg.connect_timeout_millis" - READ_TIMEOUT = "iceberg.read_timeout_millis" - MAX_RETRIES = "iceberg.max_retries" - ROOT = "iceberg.root" + ENDPOINT = "endpoint" + WAREHOUSE = "warehouse" + AUTH_TOKEN = "auth_token" + CREDENTIAL = "credential" + CONNECT_TIMEOUT = "connect_timeout" + READ_TIMEOUT = "read_timeout" + MAX_RETRIES = "max_retries" + ROOT = "root" endpoint: str warehouse: Optional[str] = None - prefix: str = "" auth_token: Optional[str] = None credential: Optional[str] = None connect_timeout: int = 10000 read_timeout: int = 30000 max_retries: int = 3 - root: str = "/tmp/lance" + root: str = "" def __init__(self, properties: Dict[str, str]): + import os + self.endpoint = properties.get(self.ENDPOINT) if not self.endpoint: raise ValueError(f"Required property {self.ENDPOINT} is not set") self.warehouse = properties.get(self.WAREHOUSE) - self.prefix = properties.get(self.PREFIX, "") self.auth_token = properties.get(self.AUTH_TOKEN) self.credential = properties.get(self.CREDENTIAL) self.connect_timeout = int(properties.get(self.CONNECT_TIMEOUT, "10000")) self.read_timeout = int(properties.get(self.READ_TIMEOUT, "30000")) self.max_retries = int(properties.get(self.MAX_RETRIES, "3")) - self.root = properties.get(self.ROOT, "/tmp/lance") - - def get_full_api_url(self) -> str: - """Get the full API URL with prefix.""" - base = self.endpoint.rstrip('/') - if self.prefix: - return f"{base}/{self.prefix}" - return base - - -class RestClient: - """Simple REST client for Iceberg REST Catalog API.""" - - def __init__(self, base_url: str, headers: Optional[Dict[str, str]] = None, - connect_timeout: int = 10, read_timeout: int = 30, max_retries: int = 3): - self.base_url = base_url.rstrip('/') - self.headers = headers or {} - self.headers['Content-Type'] = 'application/json' - self.headers['Accept'] = 'application/json' - - timeout = urllib3.Timeout(connect=connect_timeout/1000, read=read_timeout/1000) - self.http = urllib3.PoolManager( - timeout=timeout, - retries=urllib3.Retry(total=max_retries, backoff_factor=0.3) - ) - - def _make_request(self, method: str, path: str, params: Optional[Dict[str, str]] = None, - body: Optional[Any] = None) -> Any: - """Make HTTP request to Iceberg API.""" - url = f"{self.base_url}{path}" - - if params: - query_string = urllib.parse.urlencode(params) - url = f"{url}?{query_string}" - - body_data = None - if body is not None: - body_data = json.dumps(body).encode('utf-8') - - try: - response = self.http.request( - method, - url, - headers=self.headers, - body=body_data - ) - - if response.status >= 400: - raise RestClientException(response.status, response.data.decode('utf-8')) + self.root = properties.get(self.ROOT, os.getcwd()) - if response.data: - return json.loads(response.data.decode('utf-8')) - return None - - except urllib3.exceptions.HTTPError as e: - raise RestClientException(500, str(e)) - - def get(self, path: str, params: Optional[Dict[str, str]] = None) -> Any: - """Make GET request.""" - return self._make_request('GET', path, params=params) - - def post(self, path: str, body: Any) -> Any: - """Make POST request.""" - return self._make_request('POST', path, body=body) - - def delete(self, path: str, params: Optional[Dict[str, str]] = None) -> None: - """Make DELETE request.""" - self._make_request('DELETE', path, params=params) - - def close(self): - """Close the HTTP connection pool.""" - self.http.clear() - - -class RestClientException(Exception): - """Exception raised by REST client.""" - - def __init__(self, status_code: int, response_body: str): - self.status_code = status_code - self.response_body = response_body - super().__init__(f"HTTP {status_code}: {response_body}") - - -class LanceNamespaceException(Exception): - """Exception for Lance namespace operations.""" - - def __init__(self, status_code: int, message: str): - self.status_code = status_code - super().__init__(message) - - @classmethod - def not_found(cls, message: str, error_code: str, resource: str, details: str = ""): - """Create a not found exception.""" - full_message = f"{message} [{error_code}]: {resource}" - if details: - full_message += f" - {details}" - return cls(404, full_message) - - @classmethod - def bad_request(cls, message: str, error_code: str, resource: str, details: str = ""): - """Create a bad request exception.""" - full_message = f"{message} [{error_code}]: {resource}" - if details: - full_message += f" - {details}" - return cls(400, full_message) - - @classmethod - def conflict(cls, message: str, error_code: str, resource: str, details: str = ""): - """Create a conflict exception.""" - full_message = f"{message} [{error_code}]: {resource}" - if details: - full_message += f" - {details}" - return cls(409, full_message) + def get_base_api_url(self) -> str: + """Get the base API URL without prefix.""" + return self.endpoint.rstrip("/") def create_dummy_schema() -> Dict[str, Any]: @@ -195,19 +97,20 @@ def create_dummy_schema() -> Dict[str, Any]: return { "type": "struct", "schema-id": 0, - "fields": [ - { - "id": 1, - "name": "dummy", - "required": False, - "type": "string" - } - ] + "fields": [{"id": 1, "name": "dummy", "required": False, "type": "string"}], } class IcebergNamespace(LanceNamespace): - """Iceberg REST Catalog namespace implementation for Lance.""" + """ + Iceberg REST Catalog namespace implementation for Lance. + + The prefix (warehouse) is included in the namespace identifier: + - Namespace ID format: [prefix, namespace1, namespace2, ...] + - Table ID format: [prefix, namespace1, namespace2, ..., table_name] + + This is consistent with how Polaris handles catalog names. + """ TABLE_TYPE_LANCE = "lance" TABLE_TYPE_KEY = "table_type" @@ -215,22 +118,25 @@ class IcebergNamespace(LanceNamespace): def __init__(self, **properties): """Initialize Iceberg namespace with configuration properties.""" self.config = IcebergNamespaceConfig(properties) + self._prefix_cache: Dict[str, str] = {} headers = {} if self.config.auth_token: - headers['Authorization'] = f"Bearer {self.config.auth_token}" + headers["Authorization"] = f"Bearer {self.config.auth_token}" if self.config.warehouse: - headers['X-Iceberg-Access-Delegation'] = 'vended-credentials' + headers["X-Iceberg-Access-Delegation"] = "vended-credentials" self.rest_client = RestClient( - base_url=self.config.get_full_api_url(), + base_url=self.config.get_base_api_url(), headers=headers, connect_timeout=self.config.connect_timeout, read_timeout=self.config.read_timeout, - max_retries=self.config.max_retries + max_retries=self.config.max_retries, ) - logger.info(f"Initialized Iceberg namespace with endpoint: {self.config.endpoint}") + logger.info( + f"Initialized Iceberg namespace with endpoint: {self.config.endpoint}" + ) def namespace_id(self) -> str: """Return a human-readable unique identifier for this namespace instance.""" @@ -238,330 +144,452 @@ def namespace_id(self) -> str: def _encode_namespace(self, namespace: List[str]) -> str: """Encode namespace for URL path.""" - encoded_parts = [urllib.parse.quote(s, safe='') for s in namespace] + encoded_parts = [urllib.parse.quote(s, safe="") for s in namespace] joined = NAMESPACE_SEPARATOR.join(encoded_parts) - return urllib.parse.quote(joined, safe='') + return urllib.parse.quote(joined, safe="") + + def _resolve_prefix(self, warehouse: str) -> str: + """Resolve warehouse name to actual API prefix. + + Some Iceberg REST catalogs (like Lakekeeper) use a different prefix + (e.g., warehouse UUID) than the warehouse name. This method calls + the config endpoint to get the actual prefix. + """ + if warehouse in self._prefix_cache: + return self._prefix_cache[warehouse] + + try: + response = self.rest_client.get( + "/v1/config", params={"warehouse": warehouse} + ) + if response and "defaults" in response: + prefix = response["defaults"].get("prefix") + if prefix: + self._prefix_cache[warehouse] = prefix + logger.debug( + f"Resolved warehouse '{warehouse}' to prefix '{prefix}'" + ) + return prefix + except Exception as e: + logger.debug(f"Failed to resolve prefix for warehouse '{warehouse}': {e}") + + self._prefix_cache[warehouse] = warehouse + return warehouse + + def _get_prefix_path(self, warehouse: str) -> str: + """Get the API path with prefix.""" + prefix = self._resolve_prefix(warehouse) + return f"/v1/{prefix}" def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse: - """List namespaces.""" + """List namespaces. + + The first element of request.id is treated as the prefix (warehouse). + Remaining elements specify the parent namespace to list children of. + """ ns_id = self._parse_identifier(request.id) + if not ns_id: + raise InvalidInputException("Must specify at least the prefix (warehouse)") + try: + prefix = ns_id[0] + parent_ns = ns_id[1:] if len(ns_id) > 1 else [] + prefix_path = self._get_prefix_path(prefix) + params = {} - if ns_id: - parent = self._encode_namespace(ns_id) - params['parent'] = parent + if parent_ns: + parent = self._encode_namespace(parent_ns) + params["parent"] = parent if request.page_token: - params['pageToken'] = request.page_token + params["pageToken"] = request.page_token - response = self.rest_client.get('/namespaces', params=params if params else None) + response = self.rest_client.get( + f"{prefix_path}/namespaces", params=params if params else None + ) namespaces = [] - if response and 'namespaces' in response: - for ns in response['namespaces']: + if response and "namespaces" in response: + for ns in response["namespaces"]: if ns: - namespaces.append(ns[-1]) + full_ns = [prefix] + list(ns) + namespaces.append(".".join(full_ns)) namespaces = sorted(set(namespaces)) - result = ListNamespacesResponse() - result.namespaces = namespaces - return result + return ListNamespacesResponse(namespaces=namespaces) + except RestClientException as e: + raise InternalException(f"Failed to list namespaces: {e}") + except InvalidInputException: + raise except Exception as e: - if isinstance(e, LanceNamespaceException): - raise - raise LanceNamespaceException(500, f"Failed to list namespaces: {e}") + raise InternalException(f"Failed to list namespaces: {e}") - def create_namespace(self, request: CreateNamespaceRequest) -> CreateNamespaceResponse: - """Create a new namespace.""" + def create_namespace( + self, request: CreateNamespaceRequest + ) -> CreateNamespaceResponse: + """Create a new namespace. + + The first element of request.id is treated as the prefix (warehouse). + Remaining elements are the namespace to create. + """ ns_id = self._parse_identifier(request.id) - if not ns_id: - raise ValueError("Namespace must have at least one level") + if len(ns_id) < 2: + raise InvalidInputException( + "Namespace must have at least prefix and namespace levels" + ) try: + prefix = ns_id[0] + namespace = ns_id[1:] + prefix_path = self._get_prefix_path(prefix) + create_request = { - "namespace": ns_id, - "properties": request.properties or {} + "namespace": namespace, + "properties": request.properties or {}, } - response = self.rest_client.post('/namespaces', create_request) + response = self.rest_client.post( + f"{prefix_path}/namespaces", create_request + ) - result = CreateNamespaceResponse() - result.properties = response.get('properties') if response else None - return result + logger.info(f"Created namespace: {prefix}.{'.'.join(namespace)}") + + properties = response.get("properties") if response else {} + return CreateNamespaceResponse(properties=properties) except RestClientException as e: - if e.status_code == 409: - raise LanceNamespaceException.conflict( - "Namespace already exists", - "NAMESPACE_EXISTS", - '.'.join(request.id), - e.response_body + if e.is_conflict(): + raise NamespaceAlreadyExistsException( + f"Namespace already exists: {'.'.join(request.id)}" ) - raise LanceNamespaceException(500, f"Failed to create namespace: {e}") + raise InternalException(f"Failed to create namespace: {e}") + except (NamespaceAlreadyExistsException, InvalidInputException): + raise except Exception as e: - raise LanceNamespaceException(500, f"Failed to create namespace: {e}") + raise InternalException(f"Failed to create namespace: {e}") + + def describe_namespace( + self, request: DescribeNamespaceRequest + ) -> DescribeNamespaceResponse: + """Describe a namespace. - def describe_namespace(self, request: DescribeNamespaceRequest) -> DescribeNamespaceResponse: - """Describe a namespace.""" + The first element of request.id is treated as the prefix (warehouse). + Remaining elements are the namespace to describe. + """ ns_id = self._parse_identifier(request.id) - if not ns_id: - raise ValueError("Namespace must have at least one level") + if len(ns_id) < 2: + raise InvalidInputException( + "Namespace must have at least prefix and namespace levels" + ) try: - namespace_path = self._encode_namespace(ns_id) - response = self.rest_client.get(f"/namespaces/{namespace_path}") + prefix = ns_id[0] + namespace = ns_id[1:] + prefix_path = self._get_prefix_path(prefix) + namespace_path = self._encode_namespace(namespace) + + response = self.rest_client.get( + f"{prefix_path}/namespaces/{namespace_path}" + ) - result = DescribeNamespaceResponse() - result.properties = response.get('properties') if response else None - return result + properties = response.get("properties") if response else {} + return DescribeNamespaceResponse(properties=properties) except RestClientException as e: - if e.status_code == 404: - raise LanceNamespaceException.not_found( - "Namespace not found", - "NAMESPACE_NOT_FOUND", - '.'.join(request.id), - e.response_body + if e.is_not_found(): + raise NamespaceNotFoundException( + f"Namespace not found: {'.'.join(request.id)}" ) - raise LanceNamespaceException(500, f"Failed to describe namespace: {e}") + raise InternalException(f"Failed to describe namespace: {e}") + except (NamespaceNotFoundException, InvalidInputException): + raise except Exception as e: - raise LanceNamespaceException(500, f"Failed to describe namespace: {e}") - - def namespace_exists(self, request: NamespaceExistsRequest) -> None: - """Check if a namespace exists.""" - describe_request = DescribeNamespaceRequest() - describe_request.id = request.id - self.describe_namespace(describe_request) + raise InternalException(f"Failed to describe namespace: {e}") def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse: - """Drop a namespace.""" + """Drop a namespace. + + The first element of request.id is treated as the prefix (warehouse). + Remaining elements are the namespace to drop. + """ + if request.behavior and request.behavior.lower() == "cascade": + raise InvalidInputException( + "Cascade behavior is not supported for this implementation" + ) + ns_id = self._parse_identifier(request.id) - if not ns_id: - raise ValueError("Namespace must have at least one level") + if len(ns_id) < 2: + raise InvalidInputException( + "Namespace must have at least prefix and namespace levels" + ) try: - namespace_path = self._encode_namespace(ns_id) - self.rest_client.delete(f"/namespaces/{namespace_path}") + prefix = ns_id[0] + namespace = ns_id[1:] + prefix_path = self._get_prefix_path(prefix) + namespace_path = self._encode_namespace(namespace) + + self.rest_client.delete(f"{prefix_path}/namespaces/{namespace_path}") - return DropNamespaceResponse() + logger.info(f"Dropped namespace: {prefix}.{'.'.join(namespace)}") + + return DropNamespaceResponse(properties={}) except RestClientException as e: - if e.status_code == 404: - return DropNamespaceResponse() - if e.status_code == 409: - raise LanceNamespaceException.conflict( - "Namespace not empty", - "NAMESPACE_NOT_EMPTY", - '.'.join(request.id), - e.response_body - ) - raise LanceNamespaceException(500, f"Failed to drop namespace: {e}") + if e.is_not_found(): + return DropNamespaceResponse(properties={}) + if e.is_conflict(): + raise InternalException(f"Namespace not empty: {'.'.join(request.id)}") + raise InternalException(f"Failed to drop namespace: {e}") + except InvalidInputException: + raise except Exception as e: - raise LanceNamespaceException(500, f"Failed to drop namespace: {e}") + raise InternalException(f"Failed to drop namespace: {e}") def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: - """List tables in a namespace.""" + """List tables in a namespace. + + The first element of request.id is treated as the prefix (warehouse). + Remaining elements are the namespace to list tables from. + """ ns_id = self._parse_identifier(request.id) - if not ns_id: - raise ValueError("Namespace must have at least one level") + if len(ns_id) < 2: + raise InvalidInputException("Must specify at least prefix and namespace") try: - namespace_path = self._encode_namespace(ns_id) + prefix = ns_id[0] + namespace = ns_id[1:] + prefix_path = self._get_prefix_path(prefix) + namespace_path = self._encode_namespace(namespace) + params = {} if request.page_token: - params['pageToken'] = request.page_token + params["pageToken"] = request.page_token response = self.rest_client.get( - f"/namespaces/{namespace_path}/tables", - params=params if params else None + f"{prefix_path}/namespaces/{namespace_path}/tables", + params=params if params else None, ) tables = [] - if response and 'identifiers' in response: - for table_id in response['identifiers']: - table_name = table_id.get('name') - if table_name and self._is_lance_table(ns_id, table_name): + if response and "identifiers" in response: + for table_id in response["identifiers"]: + table_name = table_id.get("name") + if table_name and self._is_lance_table( + prefix, namespace, table_name + ): tables.append(table_name) tables = sorted(set(tables)) - result = ListTablesResponse() - result.tables = tables - return result + return ListTablesResponse(tables=tables) + except RestClientException as e: + if e.is_not_found(): + raise NamespaceNotFoundException( + f"Namespace not found: {'.'.join(ns_id)}" + ) + raise InternalException(f"Failed to list tables: {e}") + except (NamespaceNotFoundException, InvalidInputException): + raise except Exception as e: - if isinstance(e, LanceNamespaceException): - raise - raise LanceNamespaceException(500, f"Failed to list tables: {e}") + raise InternalException(f"Failed to list tables: {e}") + + def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse: + """Declare a table (metadata only operation). - def create_empty_table(self, request: CreateEmptyTableRequest) -> CreateEmptyTableResponse: - """Create an empty table (metadata only operation).""" + The first element of request.id is treated as the prefix (warehouse). + Middle elements are the namespace, last element is the table name. + """ table_id = self._parse_identifier(request.id) - if len(table_id) < 2: - raise ValueError("Table identifier must have at least namespace and table name") + if len(table_id) < 3: + raise InvalidInputException( + "Table identifier must have prefix, namespace, and table name" + ) - namespace = table_id[:-1] + prefix = table_id[0] + namespace = table_id[1:-1] table_name = table_id[-1] try: + prefix_path = self._get_prefix_path(prefix) + table_path = request.location if not table_path: - table_path = f"{self.config.root}/{'/'.join(namespace)}/{table_name}" + table_path = ( + f"{self.config.root}/{'/'.join(table_id[:-1])}/{table_name}" + ) - properties = { - self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE - } - if request.properties: - properties.update(request.properties) + properties = {self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE} create_request = { "name": table_name, "location": table_path, "schema": create_dummy_schema(), - "properties": properties + "properties": properties, } namespace_path = self._encode_namespace(namespace) - response = self.rest_client.post( - f"/namespaces/{namespace_path}/tables", - create_request + self.rest_client.post( + f"{prefix_path}/namespaces/{namespace_path}/tables", create_request ) - result = CreateEmptyTableResponse() - result.location = table_path - if response and 'metadata' in response: - result.properties = response['metadata'].get('properties') - return result + logger.info(f"Declared table: {'.'.join(table_id)}") + + return DeclareTableResponse(location=table_path) except RestClientException as e: - if e.status_code == 409: - raise LanceNamespaceException.conflict( - "Table already exists", - "TABLE_EXISTS", - '.'.join(request.id), - e.response_body + if e.is_conflict(): + raise TableAlreadyExistsException( + f"Table already exists: {'.'.join(request.id)}" ) - if e.status_code == 404: - raise LanceNamespaceException.not_found( - "Namespace not found", - "NAMESPACE_NOT_FOUND", - '.'.join(namespace), - e.response_body + if e.is_not_found(): + raise NamespaceNotFoundException( + f"Namespace not found: {prefix}.{'.'.join(namespace)}" ) - raise LanceNamespaceException(500, f"Failed to create empty table: {e}") + raise InternalException(f"Failed to declare table: {e}") + except ( + TableAlreadyExistsException, + NamespaceNotFoundException, + InvalidInputException, + ): + raise except Exception as e: - raise LanceNamespaceException(500, f"Failed to create empty table: {e}") + raise InternalException(f"Failed to declare table: {e}") + + def create_empty_table( + self, request: CreateEmptyTableRequest + ) -> CreateEmptyTableResponse: + """Create an empty table (metadata only operation). + + .. deprecated:: + Use :meth:`declare_table` instead. + """ + warnings.warn( + "create_empty_table is deprecated, use declare_table instead", + DeprecationWarning, + stacklevel=2, + ) + declare_request = DeclareTableRequest() + declare_request.id = request.id + declare_request.location = request.location + response = self.declare_table(declare_request) + return CreateEmptyTableResponse(location=response.location) def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: - """Describe a table.""" + """Describe a table. + + The first element of request.id is treated as the prefix (warehouse). + Middle elements are the namespace, last element is the table name. + """ + if request.load_detailed_metadata: + raise InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation" + ) + table_id = self._parse_identifier(request.id) - if len(table_id) < 2: - raise ValueError("Table identifier must have at least namespace and table name") + if len(table_id) < 3: + raise InvalidInputException( + "Table identifier must have prefix, namespace, and table name" + ) - namespace = table_id[:-1] + prefix = table_id[0] + namespace = table_id[1:-1] table_name = table_id[-1] try: + prefix_path = self._get_prefix_path(prefix) namespace_path = self._encode_namespace(namespace) - encoded_table_name = urllib.parse.quote(table_name, safe='') + encoded_table_name = urllib.parse.quote(table_name, safe="") response = self.rest_client.get( - f"/namespaces/{namespace_path}/tables/{encoded_table_name}" + f"{prefix_path}/namespaces/{namespace_path}/tables/{encoded_table_name}" ) - if not response or 'metadata' not in response: - raise LanceNamespaceException.not_found( - "Table not found", - "TABLE_NOT_FOUND", - '.'.join(request.id), - "No metadata" - ) + if not response or "metadata" not in response: + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") - metadata = response['metadata'] - props = metadata.get('properties', {}) + metadata = response["metadata"] + props = metadata.get("properties", {}) - if not props.get(self.TABLE_TYPE_KEY, '').lower() == self.TABLE_TYPE_LANCE.lower(): - raise LanceNamespaceException.bad_request( - "Not a Lance table", - "INVALID_TABLE", - '.'.join(request.id), - "Table is not managed by Lance" + if ( + not props.get(self.TABLE_TYPE_KEY, "").lower() + == self.TABLE_TYPE_LANCE.lower() + ): + raise InvalidInputException( + f"Table {'.'.join(request.id)} is not a Lance table" ) - result = DescribeTableResponse() - result.location = metadata.get('location') - result.properties = props - return result + return DescribeTableResponse( + location=metadata.get("location"), storage_options=props + ) except RestClientException as e: - if e.status_code == 404: - raise LanceNamespaceException.not_found( - "Table not found", - "TABLE_NOT_FOUND", - '.'.join(request.id), - e.response_body - ) - raise LanceNamespaceException(500, f"Failed to describe table: {e}") + if e.is_not_found(): + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + raise InternalException(f"Failed to describe table: {e}") + except (TableNotFoundException, InvalidInputException): + raise except Exception as e: - if isinstance(e, LanceNamespaceException): - raise - raise LanceNamespaceException(500, f"Failed to describe table: {e}") - - def table_exists(self, request: TableExistsRequest) -> None: - """Check if a table exists.""" - describe_request = DescribeTableRequest() - describe_request.id = request.id - self.describe_table(describe_request) - - def drop_table(self, request: DropTableRequest) -> DropTableResponse: - """Drop a table.""" + raise InternalException(f"Failed to describe table: {e}") + + def deregister_table( + self, request: DeregisterTableRequest + ) -> DeregisterTableResponse: + """Deregister a table (remove from catalog without deleting data). + + The first element of request.id is treated as the prefix (warehouse). + Middle elements are the namespace, last element is the table name. + """ table_id = self._parse_identifier(request.id) - if len(table_id) < 2: - raise ValueError("Table identifier must have at least namespace and table name") + if len(table_id) < 3: + raise InvalidInputException( + "Table identifier must have prefix, namespace, and table name" + ) - namespace = table_id[:-1] + prefix = table_id[0] + namespace = table_id[1:-1] table_name = table_id[-1] try: + prefix_path = self._get_prefix_path(prefix) namespace_path = self._encode_namespace(namespace) - encoded_table_name = urllib.parse.quote(table_name, safe='') + encoded_table_name = urllib.parse.quote(table_name, safe="") + + response = self.rest_client.get( + f"{prefix_path}/namespaces/{namespace_path}/tables/{encoded_table_name}" + ) table_location = None - try: - response = self.rest_client.get( - f"/namespaces/{namespace_path}/tables/{encoded_table_name}" - ) - if response and 'metadata' in response: - table_location = response['metadata'].get('location') - except RestClientException as e: - if e.status_code == 404: - result = DropTableResponse() - result.id = request.id - return result + if response and "metadata" in response: + table_location = response["metadata"].get("location") self.rest_client.delete( - f"/namespaces/{namespace_path}/tables/{encoded_table_name}", - params={'purgeRequested': 'false'} + f"{prefix_path}/namespaces/{namespace_path}/tables/{encoded_table_name}", + params={"purgeRequested": "false"}, ) - result = DropTableResponse() - result.id = request.id - result.location = table_location - return result + logger.info(f"Deregistered table: {'.'.join(table_id)}") + return DeregisterTableResponse(location=table_location) + + except RestClientException as e: + if e.is_not_found(): + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + raise InternalException(f"Failed to deregister table: {e}") + except (TableNotFoundException, InvalidInputException): + raise except Exception as e: - if isinstance(e, LanceNamespaceException): - raise - raise LanceNamespaceException(500, f"Failed to drop table: {e}") + raise InternalException(f"Failed to deregister table: {e}") def close(self): """Close the namespace connection.""" @@ -572,19 +600,25 @@ def _parse_identifier(self, identifier: List[str]) -> List[str]: """Parse identifier list.""" return identifier if identifier else [] - def _is_lance_table(self, namespace: List[str], table_name: str) -> bool: + def _is_lance_table( + self, prefix: str, namespace: List[str], table_name: str + ) -> bool: """Check if a table is a Lance table.""" try: + prefix_path = self._get_prefix_path(prefix) namespace_path = self._encode_namespace(namespace) - encoded_table_name = urllib.parse.quote(table_name, safe='') + encoded_table_name = urllib.parse.quote(table_name, safe="") response = self.rest_client.get( - f"/namespaces/{namespace_path}/tables/{encoded_table_name}" + f"{prefix_path}/namespaces/{namespace_path}/tables/{encoded_table_name}" ) - if response and 'metadata' in response: - props = response['metadata'].get('properties', {}) - return props.get(self.TABLE_TYPE_KEY, '').lower() == self.TABLE_TYPE_LANCE.lower() + if response and "metadata" in response: + props = response["metadata"].get("properties", {}) + return ( + props.get(self.TABLE_TYPE_KEY, "").lower() + == self.TABLE_TYPE_LANCE.lower() + ) except Exception as e: logger.debug(f"Failed to check if table is Lance table: {e}") return False diff --git a/python/src/lance_namespace_impls/polaris.py b/python/src/lance_namespace_impls/polaris.py new file mode 100644 index 0000000..2dbcc21 --- /dev/null +++ b/python/src/lance_namespace_impls/polaris.py @@ -0,0 +1,448 @@ +""" +Polaris Catalog namespace implementation for Lance. +""" + +import logging +from dataclasses import dataclass +from typing import Dict, List, Optional + +from lance.namespace import LanceNamespace +from lance_namespace_urllib3_client.models import ( + CreateEmptyTableRequest, + CreateEmptyTableResponse, + CreateNamespaceRequest, + CreateNamespaceResponse, + DeregisterTableRequest, + DeregisterTableResponse, + DescribeNamespaceRequest, + DescribeNamespaceResponse, + DescribeTableRequest, + DescribeTableResponse, + DropNamespaceRequest, + DropNamespaceResponse, + ListNamespacesRequest, + ListNamespacesResponse, + ListTablesRequest, + ListTablesResponse, +) + +from lance_namespace_impls.rest_client import ( + RestClient, + RestClientException, + InternalException, + InvalidInputException, + NamespaceAlreadyExistsException, + NamespaceNotFoundException, + TableAlreadyExistsException, + TableNotFoundException, +) + +logger = logging.getLogger(__name__) + + +@dataclass +class PolarisNamespaceConfig: + """Configuration for Polaris Catalog namespace.""" + + ENDPOINT = "polaris.endpoint" + AUTH_TOKEN = "polaris.auth_token" + CONNECT_TIMEOUT = "polaris.connect_timeout_millis" + READ_TIMEOUT = "polaris.read_timeout_millis" + MAX_RETRIES = "polaris.max_retries" + ROOT = "polaris.root" + + endpoint: str + auth_token: Optional[str] = None + connect_timeout: int = 10000 + read_timeout: int = 30000 + max_retries: int = 3 + root: str = "/tmp/lance" + + def __init__(self, properties: Dict[str, str]): + self.endpoint = properties.get(self.ENDPOINT) + if not self.endpoint: + raise ValueError(f"Required property {self.ENDPOINT} is not set") + + self.auth_token = properties.get(self.AUTH_TOKEN) + self.connect_timeout = int(properties.get(self.CONNECT_TIMEOUT, "10000")) + self.read_timeout = int(properties.get(self.READ_TIMEOUT, "30000")) + self.max_retries = int(properties.get(self.MAX_RETRIES, "3")) + self.root = properties.get(self.ROOT, "/tmp/lance") + + def get_full_api_url(self) -> str: + """Get the full API URL for Polaris catalog operations.""" + return self.endpoint.rstrip("/") + "/api/catalog" + + +class PolarisNamespace(LanceNamespace): + """Polaris Catalog namespace implementation for Lance.""" + + TABLE_FORMAT_LANCE = "lance" + TABLE_TYPE_KEY = "table_type" + + def __init__(self, **properties): + """Initialize Polaris namespace with configuration properties.""" + self.config = PolarisNamespaceConfig(properties) + + headers = {} + if self.config.auth_token: + headers["Authorization"] = f"Bearer {self.config.auth_token}" + + self.rest_client = RestClient( + base_url=self.config.get_full_api_url(), + headers=headers, + connect_timeout=self.config.connect_timeout, + read_timeout=self.config.read_timeout, + max_retries=self.config.max_retries, + ) + + logger.info( + f"Initialized Polaris namespace with endpoint: {self.config.endpoint}" + ) + + def namespace_id(self) -> str: + """Return a human-readable unique identifier for this namespace instance.""" + return f"PolarisNamespace {{ endpoint: {self.config.endpoint!r} }}" + + def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse: + """List namespaces.""" + ns_id = self._parse_identifier(request.id) + + if not ns_id: + raise InvalidInputException("Must specify at least the catalog") + + try: + catalog = ns_id[0] + if len(ns_id) == 1: + # List namespaces at catalog level + path = f"/v1/{catalog}/namespaces" + else: + # List nested namespaces + parent_path = ".".join(ns_id[1:]) + path = f"/v1/{catalog}/namespaces/{parent_path}/namespaces" + + response = self.rest_client.get(path) + + namespaces = [] + if response and "namespaces" in response: + for ns in response["namespaces"]: + if ns: + # Prefix with catalog name + full_ns = [catalog] + list(ns) + namespaces.append(".".join(full_ns)) + + namespaces = sorted(set(namespaces)) + + return ListNamespacesResponse(namespaces=namespaces) + + except RestClientException as e: + raise InternalException(f"Failed to list namespaces: {e}") + except InvalidInputException: + raise + except Exception as e: + raise InternalException(f"Failed to list namespaces: {e}") + + def create_namespace( + self, request: CreateNamespaceRequest + ) -> CreateNamespaceResponse: + """Create a new namespace.""" + ns_id = self._parse_identifier(request.id) + + if len(ns_id) < 2: + raise InvalidInputException( + "Namespace must have at least catalog and namespace levels" + ) + + try: + catalog = ns_id[0] + namespace = ns_id[1:] + + create_request = { + "namespace": namespace, + "properties": request.properties or {}, + } + + response = self.rest_client.post( + f"/v1/{catalog}/namespaces", create_request + ) + + logger.info(f"Created namespace: {catalog}.{'.'.join(namespace)}") + + properties = response.get("properties") if response else {} + return CreateNamespaceResponse(properties=properties) + + except RestClientException as e: + if e.is_conflict(): + raise NamespaceAlreadyExistsException( + f"Namespace already exists: {'.'.join(request.id)}" + ) + raise InternalException(f"Failed to create namespace: {e}") + except (NamespaceAlreadyExistsException, InvalidInputException): + raise + except Exception as e: + raise InternalException(f"Failed to create namespace: {e}") + + def describe_namespace( + self, request: DescribeNamespaceRequest + ) -> DescribeNamespaceResponse: + """Describe a namespace.""" + ns_id = self._parse_identifier(request.id) + + if len(ns_id) < 2: + raise InvalidInputException( + "Namespace must have at least catalog and namespace levels" + ) + + try: + catalog = ns_id[0] + namespace_path = ".".join(ns_id[1:]) + response = self.rest_client.get( + f"/v1/{catalog}/namespaces/{namespace_path}" + ) + + properties = response.get("properties") if response else {} + return DescribeNamespaceResponse(properties=properties) + + except RestClientException as e: + if e.is_not_found(): + raise NamespaceNotFoundException( + f"Namespace not found: {'.'.join(request.id)}" + ) + raise InternalException(f"Failed to describe namespace: {e}") + except (NamespaceNotFoundException, InvalidInputException): + raise + except Exception as e: + raise InternalException(f"Failed to describe namespace: {e}") + + def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse: + """Drop a namespace. Only RESTRICT mode is supported.""" + if request.behavior and request.behavior.lower() == "cascade": + raise InvalidInputException( + "Cascade behavior is not supported for this implementation" + ) + + ns_id = self._parse_identifier(request.id) + + if len(ns_id) < 2: + raise InvalidInputException( + "Namespace must have at least catalog and namespace levels" + ) + + try: + catalog = ns_id[0] + namespace_path = ".".join(ns_id[1:]) + self.rest_client.delete(f"/v1/{catalog}/namespaces/{namespace_path}") + + logger.info(f"Dropped namespace: {catalog}.{namespace_path}") + + return DropNamespaceResponse(properties={}) + + except RestClientException as e: + if e.is_not_found(): + return DropNamespaceResponse(properties={}) + raise InternalException(f"Failed to drop namespace: {e}") + except InvalidInputException: + raise + except Exception as e: + raise InternalException(f"Failed to drop namespace: {e}") + + def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: + """List tables in a namespace.""" + ns_id = self._parse_identifier(request.id) + + if len(ns_id) < 2: + raise InvalidInputException("Must specify at least catalog and namespace") + + try: + catalog = ns_id[0] + namespace_path = ".".join(ns_id[1:]) + response = self.rest_client.get( + f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables" + ) + + tables = [] + if response and "identifiers" in response: + for table_id in response["identifiers"]: + table_name = table_id.get("name") + if table_name: + tables.append(table_name) + + tables = sorted(set(tables)) + + return ListTablesResponse(tables=tables) + + except RestClientException as e: + if e.is_not_found(): + raise NamespaceNotFoundException( + f"Namespace not found: {'.'.join(ns_id)}" + ) + raise InternalException(f"Failed to list tables: {e}") + except (NamespaceNotFoundException, InvalidInputException): + raise + except Exception as e: + raise InternalException(f"Failed to list tables: {e}") + + def create_empty_table( + self, request: CreateEmptyTableRequest + ) -> CreateEmptyTableResponse: + """Create an empty table (metadata only operation).""" + table_id = self._parse_identifier(request.id) + + if len(table_id) < 3: + raise InvalidInputException( + "Table identifier must have catalog, namespace, and table name" + ) + + catalog = table_id[0] + namespace = table_id[1:-1] + table_name = table_id[-1] + + try: + table_path = request.location + if not table_path: + table_path = ( + f"{self.config.root}/{'/'.join(table_id[:-1])}/{table_name}" + ) + + properties = {self.TABLE_TYPE_KEY: self.TABLE_FORMAT_LANCE} + + create_request = { + "name": table_name, + "format": self.TABLE_FORMAT_LANCE, + "base-location": table_path, + "properties": properties, + } + + namespace_path = ".".join(namespace) + self.rest_client.post( + f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables", + create_request, + ) + + logger.info(f"Created table: {'.'.join(table_id)}") + + return CreateEmptyTableResponse(location=table_path) + + except RestClientException as e: + if e.is_conflict(): + raise TableAlreadyExistsException( + f"Table already exists: {'.'.join(request.id)}" + ) + if e.is_not_found(): + raise NamespaceNotFoundException( + f"Namespace not found: {catalog}.{'.'.join(namespace)}" + ) + raise InternalException(f"Failed to create empty table: {e}") + except ( + TableAlreadyExistsException, + NamespaceNotFoundException, + InvalidInputException, + ): + raise + except Exception as e: + raise InternalException(f"Failed to create empty table: {e}") + + def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: + """Describe a table. + + Only load_detailed_metadata=false is supported. Returns location and storage_options only. + """ + if request.load_detailed_metadata: + raise InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation" + ) + + table_id = self._parse_identifier(request.id) + + if len(table_id) < 3: + raise InvalidInputException( + "Table identifier must have catalog, namespace, and table name" + ) + + catalog = table_id[0] + namespace = table_id[1:-1] + table_name = table_id[-1] + + try: + namespace_path = ".".join(namespace) + + response = self.rest_client.get( + f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}" + ) + + if not response or "table" not in response: + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + + table = response["table"] + table_format = table.get("format", "") + + if table_format.lower() != self.TABLE_FORMAT_LANCE: + raise InvalidInputException( + f"Table {'.'.join(request.id)} is not a Lance table (format: {table_format})" + ) + + return DescribeTableResponse( + location=table.get("base-location"), + storage_options=table.get("properties", {}), + ) + + except RestClientException as e: + if e.is_not_found(): + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + raise InternalException(f"Failed to describe table: {e}") + except (TableNotFoundException, InvalidInputException): + raise + except Exception as e: + raise InternalException(f"Failed to describe table: {e}") + + def deregister_table( + self, request: DeregisterTableRequest + ) -> DeregisterTableResponse: + """Deregister a table (remove from catalog without deleting data).""" + table_id = self._parse_identifier(request.id) + + if len(table_id) < 3: + raise InvalidInputException( + "Table identifier must have catalog, namespace, and table name" + ) + + catalog = table_id[0] + namespace = table_id[1:-1] + table_name = table_id[-1] + + try: + namespace_path = ".".join(namespace) + + response = self.rest_client.get( + f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}" + ) + + table_location = None + if response and "table" in response: + table_location = response["table"].get("base-location") + + self.rest_client.delete( + f"/polaris/v1/{catalog}/namespaces/{namespace_path}/generic-tables/{table_name}" + ) + + logger.info(f"Deregistered table: {'.'.join(table_id)}") + + return DeregisterTableResponse(location=table_location) + + except RestClientException as e: + if e.is_not_found(): + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + raise InternalException(f"Failed to deregister table: {e}") + except (TableNotFoundException, InvalidInputException): + raise + except Exception as e: + raise InternalException(f"Failed to deregister table: {e}") + + def close(self): + """Close the namespace connection.""" + if self.rest_client: + self.rest_client.close() + + def _parse_identifier(self, identifier: List[str]) -> List[str]: + """Parse identifier list.""" + return identifier if identifier else [] diff --git a/python/src/lance_namespace_impls/rest_client.py b/python/src/lance_namespace_impls/rest_client.py new file mode 100644 index 0000000..d2634ad --- /dev/null +++ b/python/src/lance_namespace_impls/rest_client.py @@ -0,0 +1,294 @@ +""" +Shared REST client infrastructure for Lance namespace implementations. +""" + +import json +import logging +from typing import Any, Callable, Dict, Optional, Type, TypeVar + +import urllib3 +import urllib.parse + +logger = logging.getLogger(__name__) + +T = TypeVar("T") + + +class RestClientException(Exception): + """Exception raised by REST client.""" + + def __init__(self, status_code: int, response_body: str): + self.status_code = status_code + self.response_body = response_body + super().__init__(f"HTTP {status_code}: {response_body}") + + def is_not_found(self) -> bool: + """Check if this is a 404 Not Found error.""" + return self.status_code == 404 + + def is_conflict(self) -> bool: + """Check if this is a 409 Conflict error.""" + return self.status_code == 409 + + def is_bad_request(self) -> bool: + """Check if this is a 400 Bad Request error.""" + return self.status_code == 400 + + +class RestClient: + """ + A reusable REST client for making HTTP requests to REST APIs. + + This client provides: + - Connection pooling for efficient HTTP connections + - Configurable timeouts for connect and read operations + - Retry logic with exponential backoff + - JSON serialization/deserialization + - Support for common HTTP methods (GET, POST, DELETE) + + Example usage: + client = RestClient( + base_url="http://localhost:8080/api", + headers={"Authorization": "Bearer token"}, + connect_timeout=10000, + read_timeout=30000, + max_retries=3 + ) + response = client.get("/resource") + """ + + def __init__( + self, + base_url: str, + headers: Optional[Dict[str, str]] = None, + connect_timeout: int = 10000, + read_timeout: int = 30000, + max_retries: int = 3, + ): + """ + Initialize the REST client. + + Args: + base_url: The base URL for all requests (e.g., "http://localhost:8080/api") + headers: Default headers to include with every request + connect_timeout: Connection timeout in milliseconds (default: 10000) + read_timeout: Read timeout in milliseconds (default: 30000) + max_retries: Maximum number of retry attempts (default: 3) + """ + self.base_url = base_url.rstrip("/") + self.headers = headers.copy() if headers else {} + self.headers.setdefault("Content-Type", "application/json") + self.headers.setdefault("Accept", "application/json") + + timeout = urllib3.Timeout( + connect=connect_timeout / 1000, read=read_timeout / 1000 + ) + self.http = urllib3.PoolManager( + timeout=timeout, + retries=urllib3.Retry(total=max_retries, backoff_factor=0.3), + ) + + def _make_request( + self, + method: str, + path: str, + params: Optional[Dict[str, str]] = None, + body: Optional[Any] = None, + ) -> Any: + """Make HTTP request.""" + url = f"{self.base_url}{path}" + + if params: + query_string = urllib.parse.urlencode(params) + url = f"{url}?{query_string}" + + body_data = None + if body is not None: + if hasattr(body, "__dict__"): + body_dict = self._dataclass_to_dict(body) + elif isinstance(body, dict): + body_dict = body + else: + body_dict = body + body_data = json.dumps(body_dict).encode("utf-8") + + try: + response = self.http.request( + method, url, headers=self.headers, body=body_data + ) + + if response.status >= 400: + raise RestClientException( + response.status, response.data.decode("utf-8") + ) + + if response.data: + data = response.data.decode("utf-8") + # Handle empty or non-JSON responses (e.g., "200 OK" for DELETE) + if not data or data.strip() in ("", "200 OK", "OK"): + return None + try: + return json.loads(data) + except json.JSONDecodeError: + # If it's not valid JSON, return None for successful responses + if response.status < 300: + return None + raise + return None + + except urllib3.exceptions.HTTPError as e: + raise RestClientException(500, str(e)) + + def _dataclass_to_dict(self, obj: Any) -> Dict[str, Any]: + """Convert dataclass to dictionary, handling nested structures.""" + if hasattr(obj, "__dict__"): + result = {} + for key, value in obj.__dict__.items(): + if value is not None: + if isinstance(value, list): + result[key] = [self._dataclass_to_dict(item) for item in value] + elif hasattr(value, "__dict__"): + result[key] = self._dataclass_to_dict(value) + else: + result[key] = value + return result + return obj + + def get( + self, + path: str, + params: Optional[Dict[str, str]] = None, + response_class: Optional[Type[T]] = None, + response_converter: Optional[Callable[[Dict[str, Any]], T]] = None, + ) -> Any: + """ + Make GET request. + + Args: + path: The URL path (will be appended to base_url) + params: Optional query parameters + response_class: Optional class to instantiate with response data + response_converter: Optional function to convert response dict to object + + Returns: + The response data (as dict, or converted to response_class/using response_converter) + """ + response = self._make_request("GET", path, params=params) + if response_converter and response: + return response_converter(response) + if response_class and response: + return response_class(**response) + return response + + def post( + self, + path: str, + body: Any, + response_class: Optional[Type[T]] = None, + response_converter: Optional[Callable[[Dict[str, Any]], T]] = None, + ) -> Any: + """ + Make POST request. + + Args: + path: The URL path (will be appended to base_url) + body: The request body (will be JSON serialized) + response_class: Optional class to instantiate with response data + response_converter: Optional function to convert response dict to object + + Returns: + The response data (as dict, or converted to response_class/using response_converter) + """ + response = self._make_request("POST", path, body=body) + if response_converter and response: + return response_converter(response) + if response_class and response: + return response_class(**response) + return response + + def put( + self, + path: str, + body: Any, + response_class: Optional[Type[T]] = None, + response_converter: Optional[Callable[[Dict[str, Any]], T]] = None, + ) -> Any: + """ + Make PUT request. + + Args: + path: The URL path (will be appended to base_url) + body: The request body (will be JSON serialized) + response_class: Optional class to instantiate with response data + response_converter: Optional function to convert response dict to object + + Returns: + The response data (as dict, or converted to response_class/using response_converter) + """ + response = self._make_request("PUT", path, body=body) + if response_converter and response: + return response_converter(response) + if response_class and response: + return response_class(**response) + return response + + def delete( + self, + path: str, + params: Optional[Dict[str, str]] = None, + ) -> None: + """ + Make DELETE request. + + Args: + path: The URL path (will be appended to base_url) + params: Optional query parameters + """ + self._make_request("DELETE", path, params=params) + + def close(self) -> None: + """Close the HTTP connection pool.""" + self.http.clear() + + +class NamespaceException(Exception): + """Base exception for namespace operations.""" + + def __init__(self, message: str): + super().__init__(message) + + +class NamespaceNotFoundException(NamespaceException): + """Exception raised when a namespace is not found.""" + + pass + + +class NamespaceAlreadyExistsException(NamespaceException): + """Exception raised when a namespace already exists.""" + + pass + + +class TableNotFoundException(NamespaceException): + """Exception raised when a table is not found.""" + + pass + + +class TableAlreadyExistsException(NamespaceException): + """Exception raised when a table already exists.""" + + pass + + +class InvalidInputException(NamespaceException): + """Exception raised for invalid input.""" + + pass + + +class InternalException(NamespaceException): + """Exception raised for internal errors.""" + + pass diff --git a/python/src/lance_namespace_impls/schema.py b/python/src/lance_namespace_impls/schema.py index 17c9c2a..20d7f6e 100644 --- a/python/src/lance_namespace_impls/schema.py +++ b/python/src/lance_namespace_impls/schema.py @@ -8,6 +8,7 @@ try: import pyarrow as pa + HAS_PYARROW = True except ImportError: pa = None @@ -22,72 +23,72 @@ def convert_json_arrow_schema_to_pyarrow(json_schema: JsonArrowSchema) -> "pa.Schema": """Convert JsonArrowSchema to PyArrow Schema. - + Args: json_schema: JsonArrowSchema from the client models - + Returns: PyArrow Schema object - + Raises: ImportError: If PyArrow is not available ValueError: If unsupported Arrow type is encountered """ if not HAS_PYARROW: raise ImportError("PyArrow is required for schema conversion") - + fields = [] for json_field in json_schema.fields: arrow_type = convert_json_arrow_type_to_pyarrow(json_field.type) field = pa.field(json_field.name, arrow_type, nullable=json_field.nullable) fields.append(field) - + return pa.schema(fields, metadata=json_schema.metadata) def convert_pyarrow_schema_to_json_arrow(schema: "pa.Schema") -> JsonArrowSchema: """Convert PyArrow schema to JSON Arrow schema. - + Args: schema: PyArrow schema to convert - + Returns: JsonArrowSchema object - + Raises: ImportError: If PyArrow is not available """ if not HAS_PYARROW: raise ImportError("PyArrow is required for schema conversion") - + fields = [] for field in schema: json_field = JsonArrowField( name=field.name, nullable=field.nullable, type=convert_pyarrow_type_to_json_arrow(field.type), - metadata=field.metadata + metadata=field.metadata, ) fields.append(json_field) - + return JsonArrowSchema(fields=fields) def convert_pyarrow_type_to_json_arrow(dtype: "pa.DataType") -> JsonArrowDataType: """Convert PyArrow data type to JSON Arrow data type. - + Args: dtype: PyArrow data type to convert - + Returns: JsonArrowDataType object - + Raises: ImportError: If PyArrow is not available """ if not HAS_PYARROW: raise ImportError("PyArrow is required for schema conversion") - + if pa.types.is_boolean(dtype): return JsonArrowDataType(name="bool") elif pa.types.is_int8(dtype): @@ -115,20 +116,14 @@ def convert_pyarrow_type_to_json_arrow(dtype: "pa.DataType") -> JsonArrowDataTyp elif pa.types.is_binary(dtype): return JsonArrowDataType(name="binary") elif pa.types.is_timestamp(dtype): - return JsonArrowDataType( - name="timestamp", - unit=dtype.unit, - timezone=dtype.tz - ) + return JsonArrowDataType(name="timestamp", unit=dtype.unit, timezone=dtype.tz) elif pa.types.is_date32(dtype): return JsonArrowDataType(name="date", unit="DAY") elif pa.types.is_date64(dtype): return JsonArrowDataType(name="date", unit="MILLISECOND") elif pa.types.is_decimal(dtype): return JsonArrowDataType( - name="decimal", - precision=dtype.precision, - scale=dtype.scale + name="decimal", precision=dtype.precision, scale=dtype.scale ) elif pa.types.is_list(dtype): return JsonArrowDataType(name="list") @@ -142,24 +137,24 @@ def convert_pyarrow_type_to_json_arrow(dtype: "pa.DataType") -> JsonArrowDataTyp def convert_json_arrow_type_to_pyarrow(json_type: JsonArrowDataType) -> "pa.DataType": """Convert JsonArrowDataType to PyArrow DataType. - + Args: json_type: JsonArrowDataType from the client models - + Returns: PyArrow DataType object - + Raises: ImportError: If PyArrow is not available ValueError: If unsupported Arrow type is encountered """ if not HAS_PYARROW: raise ImportError("PyArrow is required for type conversion") - + # Convert type name to lowercase but preserve timezone case type_name = json_type.type type_name_lower = type_name.lower() - + if type_name_lower == "null": return pa.null() elif type_name_lower in ["bool", "boolean"]: @@ -196,13 +191,14 @@ def convert_json_arrow_type_to_pyarrow(json_type: JsonArrowDataType) -> "pa.Data # Handle timestamp with timezone if "tz=" in type_name: tz = type_name.split("tz=")[1].rstrip("]") - return pa.timestamp('us', tz=tz) + return pa.timestamp("us", tz=tz) else: - return pa.timestamp('us') + return pa.timestamp("us") elif type_name_lower.startswith("decimal"): # Parse decimal(precision, scale) import re - match = re.match(r'decimal\((\d+),\s*(\d+)\)', type_name) + + match = re.match(r"decimal\((\d+),\s*(\d+)\)", type_name) if match: precision = int(match.group(1)) scale = int(match.group(2)) @@ -210,4 +206,4 @@ def convert_json_arrow_type_to_pyarrow(json_type: JsonArrowDataType) -> "pa.Data else: return pa.decimal128(38, 10) # Default precision/scale else: - raise ValueError(f"Unsupported Arrow type: {type_name_lower}") \ No newline at end of file + raise ValueError(f"Unsupported Arrow type: {type_name_lower}") diff --git a/python/src/lance_namespace_impls/unity.py b/python/src/lance_namespace_impls/unity.py index c089461..d70b4b2 100644 --- a/python/src/lance_namespace_impls/unity.py +++ b/python/src/lance_namespace_impls/unity.py @@ -2,45 +2,44 @@ Unity Catalog namespace implementation for Lance. """ -import json +import io import logging -import os -from typing import Dict, List, Optional, Any from dataclasses import dataclass, field -import urllib3 -import urllib.parse -from urllib.error import HTTPError -import io +from typing import Any, Dict, List, Optional import pyarrow as pa import pyarrow.ipc as ipc -import lance +from lance.namespace import LanceNamespace from lance_namespace_urllib3_client.models import ( - ListNamespacesRequest, - ListNamespacesResponse, - DescribeNamespaceRequest, - DescribeNamespaceResponse, + CreateEmptyTableRequest, + CreateEmptyTableResponse, CreateNamespaceRequest, CreateNamespaceResponse, + DeregisterTableRequest, + DeregisterTableResponse, + DescribeNamespaceRequest, + DescribeNamespaceResponse, + DescribeTableRequest, + DescribeTableResponse, DropNamespaceRequest, DropNamespaceResponse, - NamespaceExistsRequest, + ListNamespacesRequest, + ListNamespacesResponse, ListTablesRequest, ListTablesResponse, - DescribeTableRequest, - DescribeTableResponse, - TableExistsRequest, - DropTableRequest, - DropTableResponse, - CreateTableRequest, - CreateTableResponse, - CreateEmptyTableRequest, - CreateEmptyTableResponse, ) -from lance.namespace import LanceNamespace - +from lance_namespace_impls.rest_client import ( + RestClient, + RestClientException, + InternalException, + InvalidInputException, + NamespaceAlreadyExistsException, + NamespaceNotFoundException, + TableAlreadyExistsException, + TableNotFoundException, +) logger = logging.getLogger(__name__) @@ -48,46 +47,47 @@ @dataclass class UnityNamespaceConfig: """Configuration for Unity Catalog namespace.""" - + ENDPOINT = "unity.endpoint" - CATALOG = "unity.catalog" ROOT = "unity.root" AUTH_TOKEN = "unity.auth_token" CONNECT_TIMEOUT = "unity.connect_timeout_millis" READ_TIMEOUT = "unity.read_timeout_millis" MAX_RETRIES = "unity.max_retries" - + endpoint: str - catalog: str root: str auth_token: Optional[str] = None connect_timeout: int = 10000 read_timeout: int = 300000 max_retries: int = 3 - + def __init__(self, properties: Dict[str, str]): self.endpoint = properties.get(self.ENDPOINT) if not self.endpoint: raise ValueError(f"Required property {self.ENDPOINT} is not set") - - self.catalog = properties.get(self.CATALOG, "unity") + self.root = properties.get(self.ROOT, "/tmp/lance") self.auth_token = properties.get(self.AUTH_TOKEN) self.connect_timeout = int(properties.get(self.CONNECT_TIMEOUT, "10000")) self.read_timeout = int(properties.get(self.READ_TIMEOUT, "300000")) self.max_retries = int(properties.get(self.MAX_RETRIES, "3")) - + def get_full_api_url(self) -> str: - """Get the full API URL with /api/2.1 path.""" - base = self.endpoint.rstrip('/') - if not base.endswith('/api/2.1'): - base = f"{base}/api/2.1" + """Get the full API URL with /api/2.1/unity-catalog path.""" + base = self.endpoint.rstrip("/") + if not base.endswith("/api/2.1/unity-catalog"): + if base.endswith("/api/2.1"): + base = f"{base}/unity-catalog" + else: + base = f"{base}/api/2.1/unity-catalog" return base @dataclass class SchemaInfo: """Unity schema information.""" + name: str catalog_name: str comment: Optional[str] = None @@ -101,6 +101,7 @@ class SchemaInfo: @dataclass class ColumnInfo: """Unity column information.""" + name: str type_text: str type_json: str @@ -117,6 +118,7 @@ class ColumnInfo: @dataclass class TableInfo: """Unity table information.""" + name: str catalog_name: str schema_name: str @@ -135,6 +137,7 @@ class TableInfo: @dataclass class CreateSchema: """Request to create a schema.""" + name: str catalog_name: str properties: Optional[Dict[str, str]] = None @@ -143,6 +146,7 @@ class CreateSchema: @dataclass class CreateTable: """Request to create a table.""" + name: str catalog_name: str schema_name: str @@ -153,526 +157,307 @@ class CreateTable: properties: Optional[Dict[str, str]] = None -class RestClient: - """Simple REST client for Unity Catalog API.""" - - def __init__(self, base_url: str, headers: Optional[Dict[str, str]] = None, - connect_timeout: int = 10, read_timeout: int = 300, max_retries: int = 3): - self.base_url = base_url.rstrip('/') - self.headers = headers or {} - self.headers['Content-Type'] = 'application/json' - self.headers['Accept'] = 'application/json' - - # Create urllib3 pool manager - timeout = urllib3.Timeout(connect=connect_timeout/1000, read=read_timeout/1000) - self.http = urllib3.PoolManager( - timeout=timeout, - retries=urllib3.Retry(total=max_retries, backoff_factor=0.3) - ) - - def _make_request(self, method: str, path: str, params: Optional[Dict[str, str]] = None, - body: Optional[Any] = None) -> Any: - """Make HTTP request to Unity API.""" - url = f"{self.base_url}{path}" - - # Add query parameters - if params: - query_string = urllib.parse.urlencode(params) - url = f"{url}?{query_string}" - - # Prepare body - body_data = None - if body is not None: - if hasattr(body, '__dict__'): - # Convert dataclass to dict - body_dict = self._dataclass_to_dict(body) - else: - body_dict = body - body_data = json.dumps(body_dict).encode('utf-8') - - try: - response = self.http.request( - method, - url, - headers=self.headers, - body=body_data - ) - - if response.status >= 400: - raise RestClientException(response.status, response.data.decode('utf-8')) - - if response.data: - return json.loads(response.data.decode('utf-8')) - return None - - except urllib3.exceptions.HTTPError as e: - raise RestClientException(500, str(e)) - - def _dataclass_to_dict(self, obj: Any) -> Dict[str, Any]: - """Convert dataclass to dictionary, handling nested structures.""" - if hasattr(obj, '__dict__'): - result = {} - for key, value in obj.__dict__.items(): - if value is not None: - if isinstance(value, list): - result[key] = [self._dataclass_to_dict(item) for item in value] - elif hasattr(value, '__dict__'): - result[key] = self._dataclass_to_dict(value) - else: - result[key] = value - return result - return obj - - def get(self, path: str, params: Optional[Dict[str, str]] = None, - response_class: Optional[type] = None) -> Any: - """Make GET request.""" - response = self._make_request('GET', path, params=params) - if response_class and response: - return self._dict_to_dataclass(response, response_class) - return response - - def post(self, path: str, body: Any, response_class: Optional[type] = None) -> Any: - """Make POST request.""" - response = self._make_request('POST', path, body=body) - if response_class and response: - return self._dict_to_dataclass(response, response_class) - return response - - def delete(self, path: str, params: Optional[Dict[str, str]] = None) -> None: - """Make DELETE request.""" - self._make_request('DELETE', path, params=params) - - def _dict_to_dataclass(self, data: Dict[str, Any], cls: type) -> Any: - """Convert dictionary to dataclass instance.""" - if cls == SchemaInfo: - return SchemaInfo(**data) - elif cls == TableInfo: - # Handle nested ColumnInfo objects - columns_data = data.get('columns', []) - columns = [ColumnInfo(**col) for col in columns_data] - data['columns'] = columns - return TableInfo(**data) - return data - - def close(self): - """Close the HTTP connection pool.""" - self.http.clear() - - -class RestClientException(Exception): - """Exception raised by REST client.""" - - def __init__(self, status_code: int, response_body: str): - self.status_code = status_code - self.response_body = response_body - super().__init__(f"HTTP {status_code}: {response_body}") - - -class LanceNamespaceException(Exception): - """Exception for Lance namespace operations.""" - - def __init__(self, status_code: int, message: str): - self.status_code = status_code - super().__init__(message) - - @classmethod - def not_found(cls, message: str, error_code: str, resource: str, details: str = ""): - """Create a not found exception.""" - full_message = f"{message} [{error_code}]: {resource}" - if details: - full_message += f" - {details}" - return cls(404, full_message) - - @classmethod - def bad_request(cls, message: str, error_code: str, resource: str, details: str = ""): - """Create a bad request exception.""" - full_message = f"{message} [{error_code}]: {resource}" - if details: - full_message += f" - {details}" - return cls(400, full_message) - - @classmethod - def conflict(cls, message: str, error_code: str, resource: str, details: str = ""): - """Create a conflict exception.""" - full_message = f"{message} [{error_code}]: {resource}" - if details: - full_message += f" - {details}" - return cls(409, full_message) +def _parse_schema_info(data: Dict[str, Any]) -> SchemaInfo: + """Parse SchemaInfo from response dict.""" + return SchemaInfo( + name=data.get("name", ""), + catalog_name=data.get("catalog_name", ""), + comment=data.get("comment"), + properties=data.get("properties", {}), + full_name=data.get("full_name"), + created_at=data.get("created_at"), + updated_at=data.get("updated_at"), + schema_id=data.get("schema_id"), + ) + + +def _parse_table_info(data: Dict[str, Any]) -> TableInfo: + """Parse TableInfo from response dict.""" + columns_data = data.get("columns", []) + columns = [ColumnInfo(**col) for col in columns_data] + return TableInfo( + name=data.get("name", ""), + catalog_name=data.get("catalog_name", ""), + schema_name=data.get("schema_name", ""), + table_type=data.get("table_type", ""), + data_source_format=data.get("data_source_format", ""), + columns=columns, + storage_location=data.get("storage_location", ""), + comment=data.get("comment"), + properties=data.get("properties", {}), + created_at=data.get("created_at"), + updated_at=data.get("updated_at"), + table_id=data.get("table_id"), + full_name=data.get("full_name"), + ) class UnityNamespace(LanceNamespace): """Unity Catalog namespace implementation for Lance.""" - + TABLE_TYPE_LANCE = "lance" TABLE_TYPE_EXTERNAL = "EXTERNAL" MANAGED_BY_KEY = "managed_by" TABLE_TYPE_KEY = "table_type" VERSION_KEY = "version" - + def __init__(self, **properties): """Initialize Unity namespace with configuration properties.""" self.config = UnityNamespaceConfig(properties) - - # Build REST client with authentication if provided + headers = {} if self.config.auth_token: - headers['Authorization'] = f"Bearer {self.config.auth_token}" - + headers["Authorization"] = f"Bearer {self.config.auth_token}" + self.rest_client = RestClient( base_url=self.config.get_full_api_url(), headers=headers, connect_timeout=self.config.connect_timeout, read_timeout=self.config.read_timeout, - max_retries=self.config.max_retries + max_retries=self.config.max_retries, ) - logger.info(f"Initialized Unity namespace with endpoint: {self.config.endpoint}") + logger.info( + f"Initialized Unity namespace with endpoint: {self.config.endpoint}" + ) def namespace_id(self) -> str: """Return a human-readable unique identifier for this namespace instance.""" - return f"UnityNamespace {{ endpoint: {self.config.endpoint!r}, catalog: {self.config.catalog!r} }}" + return f"UnityNamespace {{ endpoint: {self.config.endpoint!r} }}" def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse: """List namespaces.""" ns_id = self._parse_identifier(request.id) - - # Unity supports 3-level namespace: catalog.schema.table - if len(ns_id) > 2: - raise ValueError(f"Expect at most 2-level namespace but get {'.'.join(ns_id)}") - + + if len(ns_id) > 1: + raise InvalidInputException( + f"Expect at most 1-level namespace but get {'.'.join(ns_id)}" + ) + try: namespaces = [] - + if len(ns_id) == 0: - # Return the configured catalog as the only top-level namespace - namespaces = [self.config.catalog] + # List all catalogs + params = {} + if request.limit: + params["max_results"] = str(request.limit) + if request.page_token: + params["page_token"] = request.page_token + + response = self.rest_client.get( + "/catalogs", params=params if params else None + ) + + if response and "catalogs" in response: + namespaces = [catalog["name"] for catalog in response["catalogs"]] + elif len(ns_id) == 1: - # List schemas in the catalog + # List schemas in a catalog catalog = ns_id[0] - if catalog != self.config.catalog: - raise LanceNamespaceException.not_found( - "Catalog not found", - "CATALOG_NOT_FOUND", - catalog, - f"Expected: {self.config.catalog}" - ) - - params = {'catalog_name': catalog} + + params = {"catalog_name": catalog} if request.limit: - params['max_results'] = str(request.limit) + params["max_results"] = str(request.limit) if request.page_token: - params['page_token'] = request.page_token - - response = self.rest_client.get('/schemas', params=params) - - if response and 'schemas' in response: - namespaces = [schema['name'] for schema in response['schemas']] - - # Sort and deduplicate + params["page_token"] = request.page_token + + response = self.rest_client.get("/schemas", params=params) + + if response and "schemas" in response: + namespaces = [schema["name"] for schema in response["schemas"]] + namespaces = sorted(set(namespaces)) - - response = ListNamespacesResponse() - response.namespaces = namespaces - return response - + + return ListNamespacesResponse(namespaces=namespaces) + + except RestClientException as e: + if e.is_not_found(): + raise NamespaceNotFoundException( + f"Namespace not found: {'.'.join(ns_id)}" + ) + raise InternalException(f"Failed to list namespaces: {e}") + except InvalidInputException: + raise except Exception as e: - if isinstance(e, LanceNamespaceException): - raise - raise LanceNamespaceException(500, f"Failed to list namespaces: {e}") - - def create_namespace(self, request: CreateNamespaceRequest) -> CreateNamespaceResponse: + raise InternalException(f"Failed to list namespaces: {e}") + + def create_namespace( + self, request: CreateNamespaceRequest + ) -> CreateNamespaceResponse: """Create a new namespace.""" ns_id = self._parse_identifier(request.id) - + if len(ns_id) != 2: - raise ValueError(f"Expect a 2-level namespace but get {'.'.join(ns_id)}") - + raise InvalidInputException( + f"Expect a 2-level namespace (catalog.schema) but get {'.'.join(ns_id)}" + ) + catalog = ns_id[0] schema = ns_id[1] - - if catalog != self.config.catalog: - raise LanceNamespaceException.bad_request( - "Cannot create namespace in catalog", - "INVALID_CATALOG", - catalog, - f"Expected: {self.config.catalog}" - ) - + try: create_schema = CreateSchema( - name=schema, - catalog_name=catalog, - properties=request.properties + name=schema, catalog_name=catalog, properties=request.properties ) - - schema_info = self.rest_client.post('/schemas', create_schema, SchemaInfo) - - response = CreateNamespaceResponse() - response.properties = schema_info.properties - return response - + + schema_info = self.rest_client.post( + "/schemas", create_schema, response_converter=_parse_schema_info + ) + + logger.info(f"Created namespace: {catalog}.{schema}") + + return CreateNamespaceResponse(properties=schema_info.properties) + except RestClientException as e: - if e.status_code == 409: - raise LanceNamespaceException.conflict( - "Namespace already exists", - "NAMESPACE_EXISTS", - '.'.join(request.id), - e.response_body + if e.is_conflict(): + raise NamespaceAlreadyExistsException( + f"Namespace already exists: {'.'.join(request.id)}" ) - raise LanceNamespaceException(500, f"Failed to create namespace: {e}") + raise InternalException(f"Failed to create namespace: {e}") except Exception as e: - raise LanceNamespaceException(500, f"Failed to create namespace: {e}") - - def describe_namespace(self, request: DescribeNamespaceRequest) -> DescribeNamespaceResponse: + if isinstance(e, (NamespaceAlreadyExistsException, InvalidInputException)): + raise + raise InternalException(f"Failed to create namespace: {e}") + + def describe_namespace( + self, request: DescribeNamespaceRequest + ) -> DescribeNamespaceResponse: """Describe a namespace.""" ns_id = self._parse_identifier(request.id) - + if len(ns_id) != 2: - raise ValueError(f"Expect a 2-level namespace but get {'.'.join(ns_id)}") - + raise InvalidInputException( + f"Expect a 2-level namespace (catalog.schema) but get {'.'.join(ns_id)}" + ) + catalog = ns_id[0] schema = ns_id[1] - - if catalog != self.config.catalog: - raise LanceNamespaceException.not_found( - "Catalog not found", - "CATALOG_NOT_FOUND", - catalog, - f"Expected: {self.config.catalog}" - ) - + try: full_name = f"{catalog}.{schema}" - schema_info = self.rest_client.get(f"/schemas/{full_name}", response_class=SchemaInfo) - - response = DescribeNamespaceResponse() - response.properties = schema_info.properties - return response - + schema_info = self.rest_client.get( + f"/schemas/{full_name}", response_converter=_parse_schema_info + ) + + return DescribeNamespaceResponse(properties=schema_info.properties) + except RestClientException as e: - if e.status_code == 404: - raise LanceNamespaceException.not_found( - "Namespace not found", - "NAMESPACE_NOT_FOUND", - '.'.join(request.id), - e.response_body + if e.is_not_found(): + raise NamespaceNotFoundException( + f"Namespace not found: {'.'.join(request.id)}" ) - raise LanceNamespaceException(500, f"Failed to describe namespace: {e}") + raise InternalException(f"Failed to describe namespace: {e}") except Exception as e: - raise LanceNamespaceException(500, f"Failed to describe namespace: {e}") - - def namespace_exists(self, request: NamespaceExistsRequest) -> None: - """Check if a namespace exists.""" - describe_request = DescribeNamespaceRequest() - describe_request.id = request.id - self.describe_namespace(describe_request) - + if isinstance(e, (NamespaceNotFoundException, InvalidInputException)): + raise + raise InternalException(f"Failed to describe namespace: {e}") + def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse: """Drop a namespace.""" + if request.behavior and request.behavior.lower() == "cascade": + raise InvalidInputException( + "Cascade behavior is not supported for this implementation" + ) + ns_id = self._parse_identifier(request.id) - + if len(ns_id) != 2: - raise ValueError(f"Expect a 2-level namespace but get {'.'.join(ns_id)}") - + raise InvalidInputException( + f"Expect a 2-level namespace (catalog.schema) but get {'.'.join(ns_id)}" + ) + catalog = ns_id[0] schema = ns_id[1] - - if catalog != self.config.catalog: - raise LanceNamespaceException.bad_request( - "Cannot drop namespace in catalog", - "INVALID_CATALOG", - catalog, - f"Expected: {self.config.catalog}" - ) - + try: full_name = f"{catalog}.{schema}" - params = {} - if request.behavior == DropNamespaceRequest.BehaviorEnum.CASCADE: - params['force'] = 'true' - - self.rest_client.delete(f"/schemas/{full_name}", params=params) - - return DropNamespaceResponse() - + self.rest_client.delete(f"/schemas/{full_name}") + logger.info(f"Dropped namespace: {full_name}") + + return DropNamespaceResponse(properties={}) + except RestClientException as e: - if e.status_code == 404: - # Namespace doesn't exist, return success - return DropNamespaceResponse() - raise LanceNamespaceException(500, f"Failed to drop namespace: {e}") + if e.is_not_found(): + return DropNamespaceResponse(properties={}) + raise InternalException(f"Failed to drop namespace: {e}") except Exception as e: - raise LanceNamespaceException(500, f"Failed to drop namespace: {e}") - + if isinstance(e, InvalidInputException): + raise + raise InternalException(f"Failed to drop namespace: {e}") + def list_tables(self, request: ListTablesRequest) -> ListTablesResponse: """List tables in a namespace.""" ns_id = self._parse_identifier(request.id) - + if len(ns_id) != 2: - raise ValueError(f"Expect a 2-level namespace but get {'.'.join(ns_id)}") - + raise InvalidInputException( + f"Expect a 2-level namespace (catalog.schema) but get {'.'.join(ns_id)}" + ) + catalog = ns_id[0] schema = ns_id[1] - - if catalog != self.config.catalog: - raise LanceNamespaceException.not_found( - "Catalog not found", - "CATALOG_NOT_FOUND", - catalog, - f"Expected: {self.config.catalog}" - ) - + try: - params = { - 'catalog_name': catalog, - 'schema_name': schema - } + params = {"catalog_name": catalog, "schema_name": schema} if request.limit: - params['max_results'] = str(request.limit) + params["max_results"] = str(request.limit) if request.page_token: - params['page_token'] = request.page_token - - response = self.rest_client.get('/tables', params=params) - + params["page_token"] = request.page_token + + response = self.rest_client.get("/tables", params=params) + tables = [] - if response and 'tables' in response: - # Filter only Lance tables - for table_data in response['tables']: + if response and "tables" in response: + for table_data in response["tables"]: if self._is_lance_table(table_data): - tables.append(table_data['name']) - - # Sort and deduplicate + tables.append(table_data["name"]) + tables = sorted(set(tables)) - response = ListTablesResponse() - response.tables = tables - return response - - except Exception as e: - raise LanceNamespaceException(500, f"Failed to list tables: {e}") - - def create_table(self, request: CreateTableRequest, request_data: bytes) -> CreateTableResponse: - """Create a new table with data from Arrow IPC stream.""" - if not request_data: - raise ValueError("Request data (Arrow IPC stream) is required for createTable") - - table_id = self._parse_identifier(request.id) - - if len(table_id) != 3: - raise ValueError(f"Expect a 3-level table identifier but get {'.'.join(table_id)}") - - catalog = table_id[0] - schema = table_id[1] - table = table_id[2] - - if catalog != self.config.catalog: - raise LanceNamespaceException.bad_request( - "Cannot create table in catalog", - "INVALID_CATALOG", - catalog, - f"Expected: {self.config.catalog}" - ) - - try: - # First create an empty Lance table dataset - table_path = f"{self.config.root}/{catalog}/{schema}/{table}" - - # Extract schema from Arrow IPC stream - arrow_schema = self._extract_schema_from_ipc(request_data) - - # Create Lance dataset - lance.write_dataset( - pa.table([], schema=arrow_schema), - table_path, - mode="create" - ) - - # Create Unity table metadata - columns = self._convert_arrow_schema_to_unity_columns(arrow_schema) - - properties = { - self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE, - self.MANAGED_BY_KEY: "storage", - self.VERSION_KEY: "0" - } - if request.properties: - properties.update(request.properties) - - create_table = CreateTable( - name=table, - catalog_name=catalog, - schema_name=schema, - table_type=self.TABLE_TYPE_EXTERNAL, - data_source_format="TEXT", # Unity doesn't recognize LANCE format - columns=columns, - storage_location=table_path, - properties=properties - ) - - table_info = self.rest_client.post('/tables', create_table, TableInfo) - - response = CreateTableResponse() - response.location = table_path - response.version = 1 - response.properties = table_info.properties - return response - - except RestClientException as e: - if e.status_code == 409: - raise LanceNamespaceException.conflict( - "Table already exists", - "TABLE_EXISTS", - '.'.join(request.id), - e.response_body - ) - raise LanceNamespaceException(500, f"Failed to create table: {e}") + return ListTablesResponse(tables=tables) + + except (NamespaceNotFoundException, InvalidInputException): + raise except Exception as e: - raise LanceNamespaceException(500, f"Failed to create table: {e}") - - def create_empty_table(self, request: CreateEmptyTableRequest) -> CreateEmptyTableResponse: + raise InternalException(f"Failed to list tables: {e}") + + def create_empty_table( + self, request: CreateEmptyTableRequest + ) -> CreateEmptyTableResponse: """Create an empty table (metadata only operation).""" table_id = self._parse_identifier(request.id) - + if len(table_id) != 3: - raise ValueError(f"Expect a 3-level table identifier but get {'.'.join(table_id)}") - + raise InvalidInputException( + f"Expect a 3-level table identifier (catalog.schema.table) but get {'.'.join(table_id)}" + ) + catalog = table_id[0] schema = table_id[1] table = table_id[2] - - if catalog != self.config.catalog: - raise LanceNamespaceException.bad_request( - "Cannot create empty table in catalog", - "INVALID_CATALOG", - catalog, - f"Expected: {self.config.catalog}" - ) - + try: - # Determine table location table_path = request.location if not table_path: table_path = f"{self.config.root}/{catalog}/{schema}/{table}" - - # Create Unity table metadata without creating Lance dataset - # For empty table, create minimal schema with just an ID column + columns = [ ColumnInfo( name="__placeholder_id", - type_text="BIGINT", + type_text="LONG", type_json='{"type":"long"}', - type_name="BIGINT", + type_name="LONG", position=0, - nullable=True + nullable=True, ) ] - + properties = { self.TABLE_TYPE_KEY: self.TABLE_TYPE_LANCE, - self.MANAGED_BY_KEY: "catalog" + self.MANAGED_BY_KEY: "catalog", } - if request.properties: - properties.update(request.properties) - + create_table = CreateTable( name=table, catalog_name=catalog, @@ -681,206 +466,176 @@ def create_empty_table(self, request: CreateEmptyTableRequest) -> CreateEmptyTab data_source_format="TEXT", columns=columns, storage_location=table_path, - properties=properties + properties=properties, + ) + + self.rest_client.post( + "/tables", create_table, response_converter=_parse_table_info ) - - table_info = self.rest_client.post('/tables', create_table, TableInfo) - - response = CreateEmptyTableResponse() - response.location = table_path - response.properties = table_info.properties - return response - + + logger.info(f"Created empty table: {catalog}.{schema}.{table}") + + return CreateEmptyTableResponse(location=table_path) + except RestClientException as e: - if e.status_code == 409: - raise LanceNamespaceException.conflict( - "Table already exists", - "TABLE_EXISTS", - '.'.join(request.id), - e.response_body + if e.is_conflict(): + raise TableAlreadyExistsException( + f"Table already exists: {'.'.join(request.id)}" ) - raise LanceNamespaceException(500, f"Failed to create empty table: {e}") + raise InternalException(f"Failed to create empty table: {e}") except Exception as e: - raise LanceNamespaceException(500, f"Failed to create empty table: {e}") - + if isinstance(e, (TableAlreadyExistsException, InvalidInputException)): + raise + raise InternalException(f"Failed to create empty table: {e}") + def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse: """Describe a table.""" + if request.load_detailed_metadata: + raise InvalidInputException( + "load_detailed_metadata=true is not supported for this implementation" + ) + table_id = self._parse_identifier(request.id) - + if len(table_id) != 3: - raise ValueError(f"Expect a 3-level table identifier but get {'.'.join(table_id)}") - + raise InvalidInputException( + f"Expect a 3-level table identifier (catalog.schema.table) but get {'.'.join(table_id)}" + ) + catalog = table_id[0] schema = table_id[1] table = table_id[2] - - if catalog != self.config.catalog: - raise LanceNamespaceException.not_found( - "Catalog not found", - "CATALOG_NOT_FOUND", - catalog, - f"Expected: {self.config.catalog}" - ) - + try: full_name = f"{catalog}.{schema}.{table}" - table_info = self.rest_client.get(f"/tables/{full_name}", response_class=TableInfo) - + table_info = self.rest_client.get( + f"/tables/{full_name}", response_converter=_parse_table_info + ) + if not self._is_lance_table_info(table_info): - raise LanceNamespaceException.bad_request( - "Not a Lance table", - "INVALID_TABLE", - '.'.join(request.id), - "Table is not managed by Lance" + raise InvalidInputException( + f"Table {'.'.join(request.id)} is not a Lance table" ) - - # Get the actual schema from the Lance dataset - dataset = lance.dataset(table_info.storage_location) - arrow_schema = dataset.schema - - response = DescribeTableResponse() - response.location = table_info.storage_location - response.properties = table_info.properties - # TODO: Convert Arrow schema to JsonArrowSchema if needed - - return response - + + return DescribeTableResponse( + location=table_info.storage_location, + storage_options=table_info.properties, + ) + except RestClientException as e: - if e.status_code == 404: - raise LanceNamespaceException.not_found( - "Table not found", - "TABLE_NOT_FOUND", - '.'.join(request.id), - e.response_body - ) - raise LanceNamespaceException(500, f"Failed to describe table: {e}") + if e.is_not_found(): + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + raise InternalException(f"Failed to describe table: {e}") except Exception as e: - raise LanceNamespaceException(500, f"Failed to describe table: {e}") - - def table_exists(self, request: TableExistsRequest) -> None: - """Check if a table exists.""" - describe_request = DescribeTableRequest() - describe_request.id = request.id - self.describe_table(describe_request) - - def drop_table(self, request: DropTableRequest) -> DropTableResponse: - """Drop a table.""" + if isinstance( + e, + ( + TableNotFoundException, + NamespaceNotFoundException, + InvalidInputException, + ), + ): + raise + raise InternalException(f"Failed to describe table: {e}") + + def deregister_table( + self, request: DeregisterTableRequest + ) -> DeregisterTableResponse: + """Deregister a table (remove from catalog without deleting data).""" table_id = self._parse_identifier(request.id) - + if len(table_id) != 3: - raise ValueError(f"Expect a 3-level table identifier but get {'.'.join(table_id)}") - + raise InvalidInputException( + f"Expect a 3-level table identifier (catalog.schema.table) but get {'.'.join(table_id)}" + ) + catalog = table_id[0] schema = table_id[1] table = table_id[2] - - if catalog != self.config.catalog: - raise LanceNamespaceException.bad_request( - "Cannot drop table in catalog", - "INVALID_CATALOG", - catalog, - f"Expected: {self.config.catalog}" - ) - + try: full_name = f"{catalog}.{schema}.{table}" - - # First get the table info to check if it's a Lance table - try: - table_info = self.rest_client.get(f"/tables/{full_name}", response_class=TableInfo) - except RestClientException as e: - if e.status_code == 404: - response = DropTableResponse() - response.id = request.id - return response - raise - + + table_info = self.rest_client.get( + f"/tables/{full_name}", response_converter=_parse_table_info + ) + if not self._is_lance_table_info(table_info): - raise LanceNamespaceException.bad_request( - "Not a Lance table", - "INVALID_TABLE", - '.'.join(request.id), - "Table is not managed by Lance" + raise InvalidInputException( + f"Table {'.'.join(request.id)} is not a Lance table" ) - - # Delete from Unity + + location = table_info.storage_location self.rest_client.delete(f"/tables/{full_name}") - - # Delete Lance dataset data - try: - import shutil - if os.path.exists(table_info.storage_location): - shutil.rmtree(table_info.storage_location) - except Exception as e: - # Log warning but continue - Unity metadata already deleted - logger.warning(f"Failed to delete Lance dataset at {table_info.storage_location}: {e}") - - response = DropTableResponse() - response.id = request.id - response.location = table_info.storage_location - return response - + + logger.info(f"Deregistered table: {full_name}") + + return DeregisterTableResponse(location=location) + + except RestClientException as e: + if e.is_not_found(): + raise TableNotFoundException(f"Table not found: {'.'.join(request.id)}") + raise InternalException(f"Failed to deregister table: {e}") except Exception as e: - if isinstance(e, LanceNamespaceException): + if isinstance(e, (TableNotFoundException, InvalidInputException)): raise - raise LanceNamespaceException(500, f"Failed to drop table: {e}") - + raise InternalException(f"Failed to deregister table: {e}") + def close(self): """Close the namespace connection.""" if self.rest_client: self.rest_client.close() - + def _parse_identifier(self, identifier: List[str]) -> List[str]: """Parse identifier list.""" return identifier if identifier else [] - + def _is_lance_table(self, table_data: Dict[str, Any]) -> bool: """Check if a table dictionary represents a Lance table.""" - if not table_data or 'properties' not in table_data: + if not table_data or "properties" not in table_data: return False - properties = table_data.get('properties', {}) + properties = table_data.get("properties", {}) table_type = properties.get(self.TABLE_TYPE_KEY) return table_type and table_type.lower() == self.TABLE_TYPE_LANCE.lower() - + def _is_lance_table_info(self, table_info: TableInfo) -> bool: """Check if a TableInfo represents a Lance table.""" if not table_info or not table_info.properties: return False table_type = table_info.properties.get(self.TABLE_TYPE_KEY) return table_type and table_type.lower() == self.TABLE_TYPE_LANCE.lower() - + def _extract_schema_from_ipc(self, ipc_data: bytes) -> pa.Schema: """Extract Arrow schema from IPC stream.""" try: reader = ipc.open_stream(io.BytesIO(ipc_data)) return reader.schema except Exception as e: - raise LanceNamespaceException.bad_request( - f"Invalid Arrow IPC stream: {e}", - "INVALID_ARROW_IPC", - "", - "Failed to extract schema from Arrow IPC stream" - ) - - def _convert_arrow_schema_to_unity_columns(self, arrow_schema: pa.Schema) -> List[ColumnInfo]: + raise InvalidInputException(f"Invalid Arrow IPC stream: {e}") + + def _convert_arrow_schema_to_unity_columns( + self, arrow_schema: pa.Schema + ) -> List[ColumnInfo]: """Convert Arrow schema to Unity column definitions.""" columns = [] - for i, field in enumerate(arrow_schema): - unity_type = self._convert_arrow_type_to_unity_type(field.type) - unity_type_json = self._convert_arrow_type_to_unity_type_json(field.type) - + for i, arrow_field in enumerate(arrow_schema): + unity_type = self._convert_arrow_type_to_unity_type(arrow_field.type) + unity_type_json = self._convert_arrow_type_to_unity_type_json( + arrow_field.type + ) + column = ColumnInfo( - name=field.name, + name=arrow_field.name, type_text=unity_type, type_json=unity_type_json, type_name=unity_type, position=i, - nullable=field.nullable + nullable=arrow_field.nullable, ) columns.append(column) - + return columns - + def _convert_arrow_type_to_unity_type(self, arrow_type: pa.DataType) -> str: """Convert Arrow type to Unity type string.""" if pa.types.is_string(arrow_type) or pa.types.is_large_string(arrow_type): @@ -888,7 +643,7 @@ def _convert_arrow_type_to_unity_type(self, arrow_type: pa.DataType) -> str: elif pa.types.is_int32(arrow_type): return "INT" elif pa.types.is_int64(arrow_type): - return "BIGINT" + return "LONG" elif pa.types.is_float32(arrow_type): return "FLOAT" elif pa.types.is_float64(arrow_type): @@ -900,9 +655,8 @@ def _convert_arrow_type_to_unity_type(self, arrow_type: pa.DataType) -> str: elif pa.types.is_timestamp(arrow_type): return "TIMESTAMP" else: - # Default fallback return "STRING" - + def _convert_arrow_type_to_unity_type_json(self, arrow_type: pa.DataType) -> str: """Convert Arrow type to Unity type JSON string.""" if pa.types.is_string(arrow_type) or pa.types.is_large_string(arrow_type): @@ -922,5 +676,4 @@ def _convert_arrow_type_to_unity_type_json(self, arrow_type: pa.DataType) -> str elif pa.types.is_timestamp(arrow_type): return '{"type":"timestamp"}' else: - # Default fallback - return '{"type":"string"}' \ No newline at end of file + return '{"type":"string"}' diff --git a/python/tests/__init__.py b/python/tests/__init__.py index ce0f5ed..37e3b4c 100644 --- a/python/tests/__init__.py +++ b/python/tests/__init__.py @@ -1 +1 @@ -"""Tests for lance_namespace package.""" \ No newline at end of file +"""Tests for lance_namespace package.""" diff --git a/python/tests/test_glue.py b/python/tests/test_glue.py index cc5b70c..0893bfb 100644 --- a/python/tests/test_glue.py +++ b/python/tests/test_glue.py @@ -1,94 +1,76 @@ """ Tests for Lance Glue Namespace implementation. """ + import pytest -from unittest.mock import Mock, MagicMock, patch -import pyarrow as pa +from unittest.mock import MagicMock, patch -from lance_namespace.glue import GlueNamespace, GlueNamespaceConfig +from lance_namespace_impls.glue import GlueNamespace, GlueNamespaceConfig from lance_namespace_urllib3_client.models import ( ListNamespacesRequest, CreateNamespaceRequest, DescribeNamespaceRequest, DropNamespaceRequest, - NamespaceExistsRequest, ListTablesRequest, - CreateTableRequest, - DropTableRequest, DescribeTableRequest, - RegisterTableRequest, DeregisterTableRequest, - TableExistsRequest, - JsonArrowSchema, - JsonArrowField, - JsonArrowDataType, ) @pytest.fixture def mock_boto3(): """Mock boto3 module.""" - with patch('lance_namespace.glue.boto3') as mock: + with patch("lance_namespace_impls.glue.boto3") as mock: mock.Session.return_value.client.return_value = MagicMock() yield mock @pytest.fixture -def mock_lance(): - """Mock lance module.""" - with patch('lance_namespace.glue.lance') as mock: - yield mock - - -@pytest.fixture -def glue_namespace(mock_boto3, mock_lance): +def glue_namespace(mock_boto3): """Create a GlueNamespace instance with mocked dependencies.""" - properties = { - 'region': 'us-east-1', - 'catalog_id': '123456789012' - } + properties = {"region": "us-east-1", "catalog_id": "123456789012"} namespace = GlueNamespace(**properties) return namespace class TestGlueNamespaceConfig: """Test GlueNamespaceConfig class.""" - + def test_config_initialization(self): """Test configuration initialization.""" properties = { - 'catalog_id': '123456789012', - 'endpoint': 'https://glue.example.com', - 'region': 'us-west-2', - 'access_key_id': 'AKIAEXAMPLE', - 'secret_access_key': 'secret', - 'session_token': 'token', - 'profile_name': 'default', - 'max_retries': '5', - 'retry_mode': 'adaptive', - 'root': 's3://bucket/path', - 'storage.key1': 'value1', - 'storage.key2': 'value2', + "catalog_id": "123456789012", + "endpoint": "https://glue.example.com", + "region": "us-west-2", + "access_key_id": "AKIAEXAMPLE", + "secret_access_key": "secret", + "session_token": "token", + "profile_name": "default", + "max_retries": "5", + "retry_mode": "adaptive", + "root": "s3://bucket/path", + "storage.key1": "value1", + "storage.key2": "value2", } - + config = GlueNamespaceConfig(properties) - - assert config.catalog_id == '123456789012' - assert config.endpoint == 'https://glue.example.com' - assert config.region == 'us-west-2' - assert config.access_key_id == 'AKIAEXAMPLE' - assert config.secret_access_key == 'secret' - assert config.session_token == 'token' - assert config.profile_name == 'default' + + assert config.catalog_id == "123456789012" + assert config.endpoint == "https://glue.example.com" + assert config.region == "us-west-2" + assert config.access_key_id == "AKIAEXAMPLE" + assert config.secret_access_key == "secret" + assert config.session_token == "token" + assert config.profile_name == "default" assert config.max_retries == 5 - assert config.retry_mode == 'adaptive' - assert config.root == 's3://bucket/path' - assert config.storage_options == {'key1': 'value1', 'key2': 'value2'} - + assert config.retry_mode == "adaptive" + assert config.root == "s3://bucket/path" + assert config.storage_options == {"key1": "value1", "key2": "value2"} + def test_config_with_empty_properties(self): """Test configuration with empty properties.""" config = GlueNamespaceConfig({}) - + assert config.catalog_id is None assert config.endpoint is None assert config.region is None @@ -99,549 +81,291 @@ def test_config_with_empty_properties(self): class TestGlueNamespace: """Test GlueNamespace class.""" - + def test_initialization_without_boto3(self): """Test that initialization fails without boto3.""" - with patch('lance_namespace.glue.HAS_BOTO3', False): + with patch("lance_namespace_impls.glue.HAS_BOTO3", False): with pytest.raises(ImportError, match="boto3 is required"): GlueNamespace() - + def test_list_namespaces(self, glue_namespace): """Test listing namespaces.""" glue_namespace.glue.get_databases.return_value = { - 'DatabaseList': [ - {'Name': 'db1'}, - {'Name': 'db2'}, + "DatabaseList": [ + {"Name": "db1"}, + {"Name": "db2"}, ] } - + request = ListNamespacesRequest() response = glue_namespace.list_namespaces(request) - - assert response.namespaces == ['db1', 'db2'] + + assert response.namespaces == ["db1", "db2"] glue_namespace.glue.get_databases.assert_called_once() - + def test_list_namespaces_with_pagination(self, glue_namespace): """Test listing namespaces with pagination.""" glue_namespace.glue.get_databases.side_effect = [ + {"DatabaseList": [{"Name": "db1"}], "NextToken": "token1"}, { - 'DatabaseList': [{'Name': 'db1'}], - 'NextToken': 'token1' + "DatabaseList": [{"Name": "db2"}], }, - { - 'DatabaseList': [{'Name': 'db2'}], - } ] - + request = ListNamespacesRequest() response = glue_namespace.list_namespaces(request) - - assert response.namespaces == ['db1', 'db2'] + + assert response.namespaces == ["db1", "db2"] assert glue_namespace.glue.get_databases.call_count == 2 - + def test_list_namespaces_hierarchical_not_supported(self, glue_namespace): """Test that hierarchical namespaces are not supported.""" - request = ListNamespacesRequest(id=['parent']) + request = ListNamespacesRequest(id=["parent"]) response = glue_namespace.list_namespaces(request) - + assert response.namespaces == [] glue_namespace.glue.get_databases.assert_not_called() - + def test_list_namespaces_root(self, glue_namespace): """Test listing namespaces at root level.""" glue_namespace.glue.get_databases.return_value = { - 'DatabaseList': [ - {'Name': 'db1'}, - {'Name': 'db2'}, + "DatabaseList": [ + {"Name": "db1"}, + {"Name": "db2"}, ] } - + # Empty id means root namespace request = ListNamespacesRequest(id=[]) response = glue_namespace.list_namespaces(request) - - assert response.namespaces == ['db1', 'db2'] + + assert response.namespaces == ["db1", "db2"] glue_namespace.glue.get_databases.assert_called_once() - + def test_create_namespace(self, glue_namespace): """Test creating a namespace.""" request = CreateNamespaceRequest( - id=['test_db'], - properties={'description': 'Test database', 'location': 's3://bucket/path'} + id=["test_db"], + properties={"description": "Test database", "location": "s3://bucket/path"}, ) - - response = glue_namespace.create_namespace(request) - + + glue_namespace.create_namespace(request) + glue_namespace.glue.create_database.assert_called_once() call_args = glue_namespace.glue.create_database.call_args - assert call_args[1]['DatabaseInput']['Name'] == 'test_db' - assert call_args[1]['DatabaseInput']['Description'] == 'Test database' - assert call_args[1]['DatabaseInput']['LocationUri'] == 's3://bucket/path' - + assert call_args[1]["DatabaseInput"]["Name"] == "test_db" + assert call_args[1]["DatabaseInput"]["Description"] == "Test database" + assert call_args[1]["DatabaseInput"]["LocationUri"] == "s3://bucket/path" + def test_create_namespace_root(self, glue_namespace): """Test creating root namespace fails.""" request = CreateNamespaceRequest(id=[]) - + with pytest.raises(RuntimeError, match="Root namespace already exists"): glue_namespace.create_namespace(request) - + glue_namespace.glue.create_database.assert_not_called() - + def test_create_namespace_already_exists(self, glue_namespace): """Test creating a namespace that already exists.""" + # Create a custom exception with the right name class AlreadyExistsException(Exception): pass - + glue_namespace.glue.exceptions.AlreadyExistsException = AlreadyExistsException - glue_namespace.glue.create_database.side_effect = AlreadyExistsException("Already exists") - - request = CreateNamespaceRequest(id=['test_db']) - + glue_namespace.glue.create_database.side_effect = AlreadyExistsException( + "Already exists" + ) + + request = CreateNamespaceRequest(id=["test_db"]) + with pytest.raises(RuntimeError, match="Namespace already exists"): glue_namespace.create_namespace(request) - + def test_describe_namespace_root(self, glue_namespace): """Test describing root namespace.""" request = DescribeNamespaceRequest(id=[]) response = glue_namespace.describe_namespace(request) - - assert response.properties['description'] == 'Root Glue catalog namespace' + + assert response.properties["description"] == "Root Glue catalog namespace" glue_namespace.glue.get_database.assert_not_called() - + def test_describe_namespace(self, glue_namespace): """Test describing a namespace.""" glue_namespace.glue.get_database.return_value = { - 'Database': { - 'Name': 'test_db', - 'Description': 'Test database', - 'LocationUri': 's3://bucket/path', - 'Parameters': {'key': 'value'} + "Database": { + "Name": "test_db", + "Description": "Test database", + "LocationUri": "s3://bucket/path", + "Parameters": {"key": "value"}, } } - - request = DescribeNamespaceRequest(id=['test_db']) + + request = DescribeNamespaceRequest(id=["test_db"]) response = glue_namespace.describe_namespace(request) - - assert response.properties['description'] == 'Test database' - assert response.properties['location'] == 's3://bucket/path' - assert response.properties['key'] == 'value' - + + assert response.properties["description"] == "Test database" + assert response.properties["location"] == "s3://bucket/path" + assert response.properties["key"] == "value" + def test_drop_namespace_root(self, glue_namespace): """Test dropping root namespace fails.""" request = DropNamespaceRequest(id=[]) - + with pytest.raises(RuntimeError, match="Cannot drop root namespace"): glue_namespace.drop_namespace(request) - + glue_namespace.glue.get_tables.assert_not_called() glue_namespace.glue.delete_database.assert_not_called() - + def test_drop_namespace(self, glue_namespace): """Test dropping an empty namespace.""" - glue_namespace.glue.get_tables.return_value = {'TableList': []} - - request = DropNamespaceRequest(id=['test_db']) - response = glue_namespace.drop_namespace(request) - - glue_namespace.glue.get_tables.assert_called_once_with(DatabaseName='test_db') - glue_namespace.glue.delete_database.assert_called_once_with(Name='test_db') - + glue_namespace.glue.get_tables.return_value = {"TableList": []} + + request = DropNamespaceRequest(id=["test_db"]) + glue_namespace.drop_namespace(request) + + glue_namespace.glue.get_tables.assert_called_once_with(DatabaseName="test_db") + glue_namespace.glue.delete_database.assert_called_once_with(Name="test_db") + def test_drop_namespace_not_empty(self, glue_namespace): """Test dropping a non-empty namespace.""" glue_namespace.glue.get_tables.return_value = { - 'TableList': [{'Name': 'table1'}] + "TableList": [{"Name": "table1"}] } - - request = DropNamespaceRequest(id=['test_db']) - + + request = DropNamespaceRequest(id=["test_db"]) + with pytest.raises(RuntimeError, match="Cannot drop non-empty namespace"): glue_namespace.drop_namespace(request) - - def test_namespace_exists_root(self, glue_namespace): - """Test checking if root namespace exists.""" - request = NamespaceExistsRequest(id=[]) - glue_namespace.namespace_exists(request) # Should not raise - - glue_namespace.glue.get_database.assert_not_called() - - def test_namespace_exists(self, glue_namespace): - """Test checking if a namespace exists.""" - glue_namespace.glue.get_database.return_value = {'Database': {'Name': 'test_db'}} - - request = NamespaceExistsRequest(id=['test_db']) - glue_namespace.namespace_exists(request) # Should not raise - - glue_namespace.glue.get_database.assert_called_once_with(Name='test_db') - - def test_namespace_not_exists(self, glue_namespace): - """Test checking if a namespace doesn't exist.""" - # Create a custom exception with the right name - class EntityNotFoundException(Exception): - pass - - glue_namespace.glue.exceptions.EntityNotFoundException = EntityNotFoundException - glue_namespace.glue.get_database.side_effect = EntityNotFoundException("Not found") - - request = NamespaceExistsRequest(id=['test_db']) - - with pytest.raises(RuntimeError, match="Namespace does not exist"): - glue_namespace.namespace_exists(request) - + def test_list_tables_root(self, glue_namespace): """Test listing tables at root namespace returns empty.""" request = ListTablesRequest(id=[]) response = glue_namespace.list_tables(request) - + assert response.tables == [] glue_namespace.glue.get_tables.assert_not_called() - + def test_list_tables(self, glue_namespace): """Test listing tables in a namespace.""" glue_namespace.glue.get_tables.return_value = { - 'TableList': [ - {'Name': 'table1', 'Parameters': {'table_type': 'LANCE'}}, - {'Name': 'table2', 'Parameters': {'table_type': 'LANCE'}}, - {'Name': 'table3', 'Parameters': {'table_type': 'HIVE'}}, # Not a Lance table + "TableList": [ + {"Name": "table1", "Parameters": {"table_type": "LANCE"}}, + {"Name": "table2", "Parameters": {"table_type": "LANCE"}}, + { + "Name": "table3", + "Parameters": {"table_type": "HIVE"}, + }, # Not a Lance table ] } - - request = ListTablesRequest(id=['test_db']) + + request = ListTablesRequest(id=["test_db"]) response = glue_namespace.list_tables(request) - - assert response.tables == ['table1', 'table2'] - glue_namespace.glue.get_tables.assert_called_once_with(DatabaseName='test_db') - - def test_create_table(self, glue_namespace, mock_lance): - """Test creating a table.""" - glue_namespace.glue.get_database.return_value = { - 'Database': {'LocationUri': 's3://bucket/db'} - } - - schema = JsonArrowSchema( - fields=[ - JsonArrowField(name='id', type=JsonArrowDataType(type='int64'), nullable=False), - JsonArrowField(name='name', type=JsonArrowDataType(type='utf8'), nullable=True), - ] - ) - - request = CreateTableRequest( - id=['test_db', 'test_table'], - var_schema=schema - ) - - # Create mock Arrow IPC stream data - arrow_schema = pa.schema([ - pa.field('id', pa.int64(), nullable=False), - pa.field('name', pa.string(), nullable=True), - ]) - table = pa.table({'id': [1, 2], 'name': ['Alice', 'Bob']}, schema=arrow_schema) - - # Convert to IPC stream bytes - with pa.BufferOutputStream() as sink: - with pa.ipc.new_stream(sink, arrow_schema) as writer: - writer.write_table(table) - request_data = sink.getvalue().to_pybytes() - - response = glue_namespace.create_table(request, request_data) - - assert response.location == 's3://bucket/db/test_table.lance' - assert response.version == 1 - - # Verify Lance dataset was written - mock_lance.write_dataset.assert_called_once() - - # Verify Glue table was created - glue_namespace.glue.create_table.assert_called_once() - call_args = glue_namespace.glue.create_table.call_args - assert call_args[1]['DatabaseName'] == 'test_db' - assert call_args[1]['TableInput']['Name'] == 'test_table' - assert call_args[1]['TableInput']['Parameters']['table_type'] == 'LANCE' - - def test_create_table_empty_data(self, glue_namespace, mock_lance): - """Test creating a table with empty data.""" - import pyarrow as pa - import io - - glue_namespace.glue.get_database.return_value = { - 'Database': {'LocationUri': 's3://bucket/db'} - } - - # Create an empty Arrow table with schema - arrow_schema = pa.schema([ - pa.field('id', pa.int64(), nullable=False), - pa.field('name', pa.utf8(), nullable=True), - ]) - # Create empty arrays for each field - empty_arrays = [ - pa.array([], type=pa.int64()), - pa.array([], type=pa.utf8()) - ] - empty_table = pa.table(empty_arrays, schema=arrow_schema) - - # Convert to Arrow IPC stream - buffer = io.BytesIO() - with pa.ipc.RecordBatchStreamWriter(buffer, arrow_schema) as writer: - writer.write_table(empty_table) - ipc_data = buffer.getvalue() - - request = CreateTableRequest( - id=['test_db', 'test_table'] - ) - - # Test with empty IPC stream - response = glue_namespace.create_table(request, ipc_data) - - assert response.location == 's3://bucket/db/test_table.lance' - assert response.version == 1 - - # Verify Lance dataset was written with empty table - mock_lance.write_dataset.assert_called_once() - written_table = mock_lance.write_dataset.call_args[0][0] - assert written_table.num_rows == 0 - assert len(written_table.schema) == 2 # id and name columns - - def test_drop_table(self, glue_namespace, mock_lance): - """Test dropping a table.""" - # Mock the Glue get_table response - glue_namespace.glue.get_table.return_value = { - 'Table': { - 'Name': 'test_table', - 'Parameters': {'table_type': 'LANCE'}, - 'StorageDescriptor': {'Location': 's3://bucket/table.lance'} - } - } - - # Mock the Lance dataset - mock_dataset = mock_lance.dataset.return_value - - request = DropTableRequest(id=['test_db', 'test_table']) - response = glue_namespace.drop_table(request) - - # Verify Lance dataset was deleted first - mock_lance.dataset.assert_called_once_with( - 's3://bucket/table.lance', - storage_options={} - ) - mock_dataset.delete.assert_called_once() - - # Then verify Glue table was deleted - glue_namespace.glue.delete_table.assert_called_once_with( - DatabaseName='test_db', - Name='test_table' - ) - - def test_deregister_table(self, glue_namespace, mock_lance): + + assert response.tables == ["table1", "table2"] + glue_namespace.glue.get_tables.assert_called_once_with(DatabaseName="test_db") + + def test_deregister_table(self, glue_namespace): """Test deregistering a table (only removes from Glue, keeps Lance dataset).""" - request = DeregisterTableRequest(id=['test_db', 'test_table']) - response = glue_namespace.deregister_table(request) - - # Verify only Glue table was deleted (no Lance operations) - mock_lance.dataset.assert_not_called() + request = DeregisterTableRequest(id=["test_db", "test_table"]) + glue_namespace.deregister_table(request) + glue_namespace.glue.delete_table.assert_called_once_with( - DatabaseName='test_db', - Name='test_table' + DatabaseName="test_db", Name="test_table" ) - + def test_describe_table(self, glue_namespace): """Test describing a table.""" glue_namespace.glue.get_table.return_value = { - 'Table': { - 'Name': 'test_table', - 'Parameters': {'table_type': 'LANCE'}, - 'StorageDescriptor': {'Location': 's3://bucket/table.lance'} + "Table": { + "Name": "test_table", + "Parameters": {"table_type": "LANCE"}, + "StorageDescriptor": {"Location": "s3://bucket/table.lance"}, } } - - request = DescribeTableRequest(id=['test_db', 'test_table']) + + request = DescribeTableRequest(id=["test_db", "test_table"]) response = glue_namespace.describe_table(request) - - assert response.location == 's3://bucket/table.lance' - + + assert response.location == "s3://bucket/table.lance" + def test_describe_table_not_lance(self, glue_namespace): """Test describing a non-Lance table.""" glue_namespace.glue.get_table.return_value = { - 'Table': { - 'Name': 'test_table', - 'Parameters': {'table_type': 'HIVE'}, - 'StorageDescriptor': {'Location': 's3://bucket/table'} + "Table": { + "Name": "test_table", + "Parameters": {"table_type": "HIVE"}, + "StorageDescriptor": {"Location": "s3://bucket/table"}, } } - - request = DescribeTableRequest(id=['test_db', 'test_table']) - + + request = DescribeTableRequest(id=["test_db", "test_table"]) + with pytest.raises(RuntimeError, match="Table is not a Lance table"): glue_namespace.describe_table(request) - - def test_register_table(self, glue_namespace, mock_lance): - """Test registering an existing table.""" - # Mock Lance dataset - mock_dataset = MagicMock() - mock_dataset.schema = pa.schema([ - pa.field('id', pa.int64()), - pa.field('name', pa.string()), - ]) - mock_lance.dataset.return_value = mock_dataset - - request = RegisterTableRequest( - id=['test_db', 'test_table'], - location='s3://bucket/existing_table.lance' - ) - - response = glue_namespace.register_table(request) - - assert response.location == 's3://bucket/existing_table.lance' - - # Verify Lance dataset was read - mock_lance.dataset.assert_called_once_with( - 's3://bucket/existing_table.lance', - storage_options={} - ) - - # Verify Glue table was created - glue_namespace.glue.create_table.assert_called_once() - call_args = glue_namespace.glue.create_table.call_args - assert call_args[1]['DatabaseName'] == 'test_db' - assert call_args[1]['TableInput']['Name'] == 'test_table' - assert call_args[1]['TableInput']['Parameters']['table_type'] == 'LANCE' - - def test_table_exists(self, glue_namespace): - """Test checking if a table exists.""" - glue_namespace.glue.get_table.return_value = { - 'Table': { - 'Name': 'test_table', - 'Parameters': {'table_type': 'LANCE'} - } - } - - request = TableExistsRequest(id=['test_db', 'test_table']) - glue_namespace.table_exists(request) # Should not raise - - glue_namespace.glue.get_table.assert_called_once_with( - DatabaseName='test_db', - Name='test_table' - ) - - def test_table_not_exists(self, glue_namespace): - """Test checking if a table doesn't exist.""" - # Create a custom exception with the right name - class EntityNotFoundException(Exception): - pass - - glue_namespace.glue.exceptions.EntityNotFoundException = EntityNotFoundException - glue_namespace.glue.get_table.side_effect = EntityNotFoundException("Not found") - - request = TableExistsRequest(id=['test_db', 'test_table']) - - with pytest.raises(RuntimeError, match="Table does not exist"): - glue_namespace.table_exists(request) - + def test_parse_table_identifier(self, glue_namespace): """Test parsing table identifier.""" - db, table = glue_namespace._parse_table_identifier(['db', 'table']) - assert db == 'db' - assert table == 'table' - + db, table = glue_namespace._parse_table_identifier(["db", "table"]) + assert db == "db" + assert table == "table" + with pytest.raises(ValueError, match="exactly 2 parts"): - glue_namespace._parse_table_identifier(['db']) - + glue_namespace._parse_table_identifier(["db"]) + with pytest.raises(ValueError, match="exactly 2 parts"): - glue_namespace._parse_table_identifier(['db', 'schema', 'table']) - + glue_namespace._parse_table_identifier(["db", "schema", "table"]) + def test_is_lance_table(self, glue_namespace): """Test checking if a Glue table is a Lance table.""" - lance_table = {'Parameters': {'table_type': 'LANCE'}} + lance_table = {"Parameters": {"table_type": "LANCE"}} assert glue_namespace._is_lance_table(lance_table) is True - - lance_table_lower = {'Parameters': {'table_type': 'lance'}} + + lance_table_lower = {"Parameters": {"table_type": "lance"}} assert glue_namespace._is_lance_table(lance_table_lower) is True - - hive_table = {'Parameters': {'table_type': 'HIVE'}} + + hive_table = {"Parameters": {"table_type": "HIVE"}} assert glue_namespace._is_lance_table(hive_table) is False - + no_params = {} assert glue_namespace._is_lance_table(no_params) is False - - def test_pyarrow_type_conversions(self, glue_namespace): - """Test PyArrow to Glue type conversions.""" - # Test basic types - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.bool_()) == 'boolean' - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.int32()) == 'int' - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.int64()) == 'bigint' - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.float32()) == 'float' - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.float64()) == 'double' - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.string()) == 'string' - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.binary()) == 'binary' - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.date32()) == 'date' - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.timestamp('us')) == 'timestamp' - - # Test complex types - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.list_(pa.int32())) == 'array' - assert glue_namespace._convert_pyarrow_type_to_glue_type( - pa.struct([pa.field('a', pa.int32()), pa.field('b', pa.string())]) - ) == 'struct' - assert glue_namespace._convert_pyarrow_type_to_glue_type( - pa.map_(pa.string(), pa.int32()) - ) == 'map' - - # Test decimal - assert glue_namespace._convert_pyarrow_type_to_glue_type(pa.decimal128(10, 2)) == 'decimal(10,2)' - - def test_pyarrow_schema_to_glue_columns(self, glue_namespace): - """Test conversion of PyArrow schema to Glue column definitions.""" - schema = pa.schema([ - pa.field('id', pa.int64()), - pa.field('name', pa.string()), - pa.field('scores', pa.list_(pa.float32())), - pa.field('metadata', pa.struct([ - pa.field('created', pa.timestamp('us')), - pa.field('version', pa.int32()) - ])) - ]) - - columns = glue_namespace._convert_pyarrow_schema_to_glue_columns(schema) - - assert len(columns) == 4 - assert columns[0] == {'Name': 'id', 'Type': 'bigint'} - assert columns[1] == {'Name': 'name', 'Type': 'string'} - assert columns[2] == {'Name': 'scores', 'Type': 'array'} - assert columns[3] == {'Name': 'metadata', 'Type': 'struct'} - + def test_pickle_support(self, mock_boto3): """Test that GlueNamespace can be pickled and unpickled for Ray compatibility.""" import pickle - + # Create a GlueNamespace instance properties = { - 'region': 'us-east-1', - 'catalog_id': '123456789012', - 'endpoint': 'https://glue.example.com', - 'storage.access_key_id': 'test-key', - 'storage.secret_access_key': 'test-secret' + "region": "us-east-1", + "catalog_id": "123456789012", + "endpoint": "https://glue.example.com", + "storage.access_key_id": "test-key", + "storage.secret_access_key": "test-secret", } namespace = GlueNamespace(**properties) - + # Test pickling pickled = pickle.dumps(namespace) assert pickled is not None - + # Test unpickling restored = pickle.loads(pickled) assert isinstance(restored, GlueNamespace) - + # Verify configuration is preserved - assert restored.config.region == 'us-east-1' - assert restored.config.catalog_id == '123456789012' - assert restored.config.endpoint == 'https://glue.example.com' - assert restored.config.storage_options['access_key_id'] == 'test-key' - assert restored.config.storage_options['secret_access_key'] == 'test-secret' - + assert restored.config.region == "us-east-1" + assert restored.config.catalog_id == "123456789012" + assert restored.config.endpoint == "https://glue.example.com" + assert restored.config.storage_options["access_key_id"] == "test-key" + assert restored.config.storage_options["secret_access_key"] == "test-secret" + # Verify glue client is None after unpickling (will be lazily initialized) assert restored._glue is None - + # Test that glue client can be re-initialized after unpickling # This will create a new mock client when accessed client = restored.glue assert client is not None assert restored._glue is not None - diff --git a/python/tests/test_glue_integration.py b/python/tests/test_glue_integration.py new file mode 100644 index 0000000..a6a212a --- /dev/null +++ b/python/tests/test_glue_integration.py @@ -0,0 +1,235 @@ +""" +Integration tests for AWS Glue namespace implementation. + +To run these tests locally: + 1. Configure AWS credentials (via environment variables, ~/.aws/credentials, or IAM role) + 2. Set AWS_S3_BUCKET_NAME environment variable + 3. Run: make integ-test-glue + +Tests are automatically skipped if AWS credentials are not available. +""" + +import os +import uuid +import unittest + +import pytest + +AWS_REGION = os.environ.get("AWS_REGION", "us-east-1") +AWS_S3_BUCKET_NAME = os.environ.get("AWS_S3_BUCKET_NAME") + + +def check_aws_credentials_available(): + """Check if AWS credentials and S3 bucket are available.""" + if not AWS_S3_BUCKET_NAME: + return False + + if os.environ.get("AWS_ACCESS_KEY_ID") and os.environ.get("AWS_SECRET_ACCESS_KEY"): + return True + + try: + import boto3 + + sts = boto3.client("sts", region_name=AWS_REGION) + sts.get_caller_identity() + return True + except Exception: + return False + + +aws_credentials_available = check_aws_credentials_available() + + +@pytest.mark.integration +@unittest.skipUnless(aws_credentials_available, "AWS credentials are not available") +class TestGlueNamespaceIntegration(unittest.TestCase): + """Integration tests for GlueNamespace against a real AWS Glue catalog.""" + + @classmethod + def setUpClass(cls): + """Set up class-level resources.""" + from lance_namespace_impls.glue import GlueNamespace + + cls.unique_id = uuid.uuid4().hex[:8] + cls.test_database = f"lance_test_db_{cls.unique_id}" + cls.s3_root = f"s3://{AWS_S3_BUCKET_NAME}/lance_glue_test_{cls.unique_id}" + + properties = { + "region": AWS_REGION, + "root": cls.s3_root, + } + + cls.namespace = GlueNamespace(**properties) + + @classmethod + def tearDownClass(cls): + """Clean up class-level resources.""" + if hasattr(cls, "namespace") and cls.namespace: + try: + cls._cleanup_database(cls.test_database) + except Exception: + pass + + @classmethod + def _cleanup_database(cls, database_name): + """Helper to clean up a database and all its tables.""" + from lance_namespace_urllib3_client.models import ( + DropNamespaceRequest, + ListTablesRequest, + DeregisterTableRequest, + ) + + try: + list_request = ListTablesRequest() + list_request.id = [database_name] + response = cls.namespace.list_tables(list_request) + + for table_name in response.tables: + try: + dereg_request = DeregisterTableRequest() + dereg_request.id = [database_name, table_name] + cls.namespace.deregister_table(dereg_request) + except Exception: + pass + + drop_request = DropNamespaceRequest() + drop_request.id = [database_name] + cls.namespace.drop_namespace(drop_request) + except Exception: + pass + + def setUp(self): + """Set up test fixtures.""" + self.created_databases = [] + + def tearDown(self): + """Clean up test resources.""" + for db_name in self.created_databases: + try: + self._cleanup_database(db_name) + except Exception: + pass + + def _create_test_database(self, suffix=""): + """Helper to create a test database with tracking for cleanup.""" + from lance_namespace_urllib3_client.models import CreateNamespaceRequest + + db_name = f"lance_test_{uuid.uuid4().hex[:8]}{suffix}" + self.created_databases.append(db_name) + + create_request = CreateNamespaceRequest() + create_request.id = [db_name] + create_request.properties = {"description": "Lance integration test database"} + self.namespace.create_namespace(create_request) + return db_name + + def test_namespace_operations(self): + """Test namespace (database) CRUD operations.""" + from lance_namespace_urllib3_client.models import ( + CreateNamespaceRequest, + DescribeNamespaceRequest, + DropNamespaceRequest, + ListNamespacesRequest, + ) + + db_name = f"lance_test_{uuid.uuid4().hex[:8]}" + self.created_databases.append(db_name) + + create_request = CreateNamespaceRequest() + create_request.id = [db_name] + create_request.properties = {"description": "Test database for Lance"} + + create_response = self.namespace.create_namespace(create_request) + self.assertIsNotNone(create_response) + + describe_request = DescribeNamespaceRequest() + describe_request.id = [db_name] + + describe_response = self.namespace.describe_namespace(describe_request) + self.assertIsNotNone(describe_response) + self.assertEqual( + describe_response.properties.get("description"), "Test database for Lance" + ) + + list_request = ListNamespacesRequest() + list_request.id = [] + list_response = self.namespace.list_namespaces(list_request) + self.assertIn(db_name, list_response.namespaces) + + drop_request = DropNamespaceRequest() + drop_request.id = [db_name] + self.namespace.drop_namespace(drop_request) + self.created_databases.remove(db_name) + + def test_table_operations(self): + """Test table CRUD operations.""" + from lance_namespace_urllib3_client.models import ( + CreateEmptyTableRequest, + DescribeTableRequest, + DeregisterTableRequest, + ListTablesRequest, + ) + + db_name = self._create_test_database() + table_name = f"test_table_{uuid.uuid4().hex[:8]}" + table_location = f"{self.s3_root}/{db_name}/{table_name}.lance" + + # Create empty table (DeclareTable) + create_request = CreateEmptyTableRequest() + create_request.id = [db_name, table_name] + create_request.location = table_location + + create_response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(create_response.location) + self.assertEqual(create_response.location, table_location) + + describe_request = DescribeTableRequest() + describe_request.id = [db_name, table_name] + + describe_response = self.namespace.describe_table(describe_request) + self.assertIsNotNone(describe_response.location) + self.assertEqual(describe_response.location, table_location) + + list_request = ListTablesRequest() + list_request.id = [db_name] + + list_response = self.namespace.list_tables(list_request) + self.assertIn(table_name, list_response.tables) + + deregister_request = DeregisterTableRequest() + deregister_request.id = [db_name, table_name] + self.namespace.deregister_table(deregister_request) + + def test_multiple_tables_in_namespace(self): + """Test creating and listing multiple tables in a namespace.""" + from lance_namespace_urllib3_client.models import ( + CreateEmptyTableRequest, + DeregisterTableRequest, + ListTablesRequest, + ) + + db_name = self._create_test_database() + table_names = [f"table_{i}_{uuid.uuid4().hex[:6]}" for i in range(3)] + + for table_name in table_names: + table_location = f"{self.s3_root}/{db_name}/{table_name}.lance" + create_request = CreateEmptyTableRequest() + create_request.id = [db_name, table_name] + create_request.location = table_location + self.namespace.create_empty_table(create_request) + + list_request = ListTablesRequest() + list_request.id = [db_name] + list_response = self.namespace.list_tables(list_request) + + for table_name in table_names: + self.assertIn(table_name, list_response.tables) + + for table_name in table_names: + deregister_request = DeregisterTableRequest() + deregister_request.id = [db_name, table_name] + self.namespace.deregister_table(deregister_request) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_hive.py b/python/tests/test_hive.py deleted file mode 100644 index 637a628..0000000 --- a/python/tests/test_hive.py +++ /dev/null @@ -1,480 +0,0 @@ -""" -Tests for Lance Hive2 Namespace implementation. -""" -import os -import pytest -import tempfile -from unittest.mock import Mock, MagicMock, patch -import pyarrow as pa - -from lance_namespace import connect -from lance_namespace_urllib3_client.models import ( - ListNamespacesRequest, - DescribeNamespaceRequest, - CreateNamespaceRequest, - DropNamespaceRequest, - NamespaceExistsRequest, - ListTablesRequest, - DescribeTableRequest, - RegisterTableRequest, - DeregisterTableRequest, - TableExistsRequest, - DropTableRequest, - CreateTableRequest, -) - - -@pytest.fixture -def mock_hive_client(): - """Create a mock Hive client.""" - with patch("lance_namespace.hive.HIVE_AVAILABLE", True): - with patch("lance_namespace.hive.HiveMetastoreClient") as mock_client_class: - mock_client = MagicMock() - mock_client_class.return_value = mock_client - yield mock_client - - -@pytest.fixture -def hive_namespace(mock_hive_client): - """Create a Hive2Namespace instance with mocked client.""" - with patch("lance_namespace.hive.HIVE_AVAILABLE", True): - namespace = connect("hive2", { - "uri": "thrift://localhost:9083", - "root": "/tmp/warehouse" - }) - namespace._client = mock_hive_client - return namespace - - -class TestHive2Namespace: - """Test cases for Hive2Namespace.""" - - def test_initialization(self): - """Test namespace initialization.""" - with patch("lance_namespace.hive.HIVE_AVAILABLE", True): - with patch("lance_namespace.hive.HiveMetastoreClient") as mock_client: - namespace = connect("hive2", { - "uri": "thrift://localhost:9083", - "root": "/tmp/warehouse", - "ugi": "user:group1,group2" - }) - - assert namespace.uri == "thrift://localhost:9083" - assert namespace.root == "/tmp/warehouse" - assert namespace.ugi == "user:group1,group2" - - # Client should not be initialized yet (lazy loading) - mock_client.assert_not_called() - - # Access the client property to trigger initialization - _ = namespace.client - mock_client.assert_called_once_with("thrift://localhost:9083", "user:group1,group2") - - def test_initialization_without_hive_deps(self): - """Test that initialization fails gracefully without Hive dependencies.""" - with patch("lance_namespace.hive.HIVE_AVAILABLE", False): - with pytest.raises(ValueError, match="Hive dependencies not installed"): - connect("hive2", {"uri": "thrift://localhost:9083"}) - - def test_list_namespaces(self, hive_namespace, mock_hive_client): - """Test listing namespaces (databases).""" - mock_client_instance = MagicMock() - mock_client_instance.get_all_databases.return_value = ["default", "test_db", "prod_db"] - mock_hive_client.__enter__.return_value = mock_client_instance - - request = ListNamespacesRequest() - response = hive_namespace.list_namespaces(request) - - assert response.namespaces == ["test_db", "prod_db"] - mock_client_instance.get_all_databases.assert_called_once() - - def test_describe_namespace(self, hive_namespace, mock_hive_client): - """Test describing a namespace (database).""" - mock_database = MagicMock() - mock_database.description = "Test database" - mock_database.ownerName = "test_user" - mock_database.locationUri = "/tmp/warehouse/test_db.db" - mock_database.parameters = {"key": "value"} - - mock_client_instance = MagicMock() - mock_client_instance.get_database.return_value = mock_database - mock_hive_client.__enter__.return_value = mock_client_instance - - request = DescribeNamespaceRequest(id=["test_db"]) - response = hive_namespace.describe_namespace(request) - - # Response doesn't include id, only properties - assert response.properties["comment"] == "Test database" - assert response.properties["owner"] == "test_user" - assert response.properties["location"] == "/tmp/warehouse/test_db.db" - assert response.properties["key"] == "value" - mock_client_instance.get_database.assert_called_once_with("test_db") - - def test_create_namespace(self, hive_namespace, mock_hive_client): - """Test creating a namespace (database).""" - mock_client_instance = MagicMock() - mock_hive_client.__enter__.return_value = mock_client_instance - - # Mock HiveDatabase class - with patch("lance_namespace.hive.HiveDatabase") as mock_hive_db_class: - mock_hive_db = MagicMock() - mock_hive_db_class.return_value = mock_hive_db - - request = CreateNamespaceRequest( - id=["test_db"], - properties={ - "comment": "Test database", - "owner": "test_user", - "location": "/custom/location" - } - ) - response = hive_namespace.create_namespace(request) - - # Response doesn't include id - mock_client_instance.create_database.assert_called_once_with(mock_hive_db) - - # Verify the database object properties were set - assert mock_hive_db.name == "test_db" - assert mock_hive_db.description == "Test database" - assert mock_hive_db.ownerName == "test_user" - assert mock_hive_db.locationUri == "/custom/location" - - def test_drop_namespace(self, hive_namespace, mock_hive_client): - """Test dropping a namespace (database).""" - mock_client_instance = MagicMock() - mock_client_instance.get_all_tables.return_value = [] - mock_hive_client.__enter__.return_value = mock_client_instance - - request = DropNamespaceRequest(id=["test_db"]) - response = hive_namespace.drop_namespace(request) - - mock_client_instance.get_all_tables.assert_called_once_with("test_db") - mock_client_instance.drop_database.assert_called_once_with( - "test_db", deleteData=True, cascade=False - ) - - def test_drop_namespace_cascade(self, hive_namespace, mock_hive_client): - """Test dropping a non-empty namespace with cascade.""" - mock_client_instance = MagicMock() - mock_client_instance.get_all_tables.return_value = ["table1", "table2"] - mock_hive_client.__enter__.return_value = mock_client_instance - - request = DropNamespaceRequest(id=["test_db"], behavior="CASCADE") - response = hive_namespace.drop_namespace(request) - - mock_client_instance.drop_database.assert_called_once_with( - "test_db", deleteData=True, cascade=True - ) - - def test_namespace_exists(self, hive_namespace, mock_hive_client): - """Test checking if a namespace exists.""" - mock_client_instance = MagicMock() - mock_hive_client.__enter__.return_value = mock_client_instance - - request = NamespaceExistsRequest(id=["test_db"]) - hive_namespace.namespace_exists(request) - - mock_client_instance.get_database.assert_called_once_with("test_db") - - def test_list_tables(self, hive_namespace, mock_hive_client): - """Test listing tables in a namespace.""" - mock_table1 = MagicMock() - mock_table1.parameters = {"table_type": "lance"} - - mock_table2 = MagicMock() - mock_table2.parameters = {"other_type": "OTHER"} - - mock_table3 = MagicMock() - mock_table3.parameters = {"table_type": "lance"} - - mock_client_instance = MagicMock() - mock_client_instance.get_all_tables.return_value = ["table1", "table2", "table3"] - mock_client_instance.get_table.side_effect = [mock_table1, mock_table2, mock_table3] - mock_hive_client.__enter__.return_value = mock_client_instance - - request = ListTablesRequest(id=["test_db"]) - response = hive_namespace.list_tables(request) - - # Should only return Lance table names - assert response.tables == ["table1", "table3"] - mock_client_instance.get_all_tables.assert_called_once_with("test_db") - - def test_describe_table(self, hive_namespace, mock_hive_client): - """Test describing a table returns Hive metadata without opening Lance dataset.""" - mock_table = MagicMock() - mock_table.sd.location = "/tmp/warehouse/test_db.db/test_table" - mock_table.owner = "table_owner" # Set owner on table object - mock_table.parameters = { - "table_type": "lance", - "version": "42", # Use 'version' not 'lance.version' per hive.md spec - "created_time": "2024-01-01" - } - - mock_client_instance = MagicMock() - mock_client_instance.get_table.return_value = mock_table - mock_hive_client.__enter__.return_value = mock_client_instance - - request = DescribeTableRequest(id=["test_db", "test_table"]) - response = hive_namespace.describe_table(request) - - # Verify response contains Hive metadata - assert response.location == "/tmp/warehouse/test_db.db/test_table" - assert response.version == 42 # Parsed from lance.version - assert response.var_schema is None # No schema since we don't open Lance dataset - assert response.properties["owner"] == "table_owner" # From table.owner - assert response.properties["created_time"] == "2024-01-01" - # Properties should include all parameters from Hive - assert response.properties["table_type"] == "lance" - assert response.properties["version"] == "42" - - # Verify we called get_table but didn't try to open Lance dataset - mock_client_instance.get_table.assert_called_once_with("test_db", "test_table") - - def test_register_table(self, hive_namespace, mock_hive_client): - """Test registering a Lance table.""" - with tempfile.TemporaryDirectory() as tmpdir: - # Create a mock Lance dataset - table_path = os.path.join(tmpdir, "test_table") - - # Create sample data - data = pa.table({ - "id": [1, 2, 3], - "name": ["Alice", "Bob", "Charlie"] - }) - - with patch("lance_namespace.hive.lance.dataset") as mock_dataset_func: - mock_dataset = MagicMock() - mock_dataset.schema = data.schema - mock_dataset.version = 1 - mock_dataset_func.return_value = mock_dataset - - mock_client_instance = MagicMock() - mock_hive_client.__enter__.return_value = mock_client_instance - - # Mock all Hive classes - with patch("lance_namespace.hive.HiveTable") as mock_hive_table_class, \ - patch("lance_namespace.hive.StorageDescriptor") as mock_sd_class, \ - patch("lance_namespace.hive.SerDeInfo") as mock_serde_class, \ - patch("lance_namespace.hive.FieldSchema") as mock_field_class: - - mock_hive_table = MagicMock() - mock_hive_table_class.return_value = mock_hive_table - mock_sd = MagicMock() - mock_sd_class.return_value = mock_sd - mock_serde = MagicMock() - mock_serde_class.return_value = mock_serde - mock_field_class.return_value = MagicMock() - - request = RegisterTableRequest( - id=["test_db", "test_table"], - location=table_path, - properties={"owner": "test_user"} - ) - response = hive_namespace.register_table(request) - - # Response only includes location - assert response.location == table_path - - mock_client_instance.create_table.assert_called_once_with(mock_hive_table) - - # Verify the table object properties were set - assert mock_hive_table.dbName == "test_db" - assert mock_hive_table.tableName == "test_table" - assert mock_hive_table.tableType == "EXTERNAL_TABLE" - assert mock_sd.location == table_path - # Verify Lance-specific input/output formats - assert mock_sd.inputFormat == "com.lancedb.lance.mapred.LanceInputFormat" - assert mock_sd.outputFormat == "com.lancedb.lance.mapred.LanceOutputFormat" - # Verify SerDe configuration - assert mock_sd.serdeInfo == mock_serde - assert mock_serde.serializationLib == "com.lancedb.lance.mapred.LanceSerDe" - assert mock_hive_table.parameters["table_type"] == "lance" - assert mock_hive_table.parameters["managed_by"] == "storage" # Default - assert "version" not in mock_hive_table.parameters # Not set for storage-managed - assert "EXTERNAL" not in mock_hive_table.parameters # Should not be present - assert mock_hive_table.parameters["owner"] == "test_user" - - def test_register_table_impl_managed(self, hive_namespace, mock_hive_client): - """Test registering a Lance table with managed_by=impl.""" - with tempfile.TemporaryDirectory() as tmpdir: - # Create a mock Lance dataset - table_path = os.path.join(tmpdir, "test_table") - - # Create sample data - data = pa.table({ - "id": [1, 2, 3], - "name": ["Alice", "Bob", "Charlie"] - }) - - with patch("lance_namespace.hive.lance.dataset") as mock_dataset_func: - mock_dataset = MagicMock() - mock_dataset.schema = data.schema - mock_dataset.version = 42 - mock_dataset_func.return_value = mock_dataset - - mock_client_instance = MagicMock() - mock_hive_client.__enter__.return_value = mock_client_instance - - # Mock all Hive classes - with patch("lance_namespace.hive.HiveTable") as mock_hive_table_class, \ - patch("lance_namespace.hive.StorageDescriptor") as mock_sd_class, \ - patch("lance_namespace.hive.SerDeInfo") as mock_serde_class, \ - patch("lance_namespace.hive.FieldSchema") as mock_field_class: - - mock_hive_table = MagicMock() - mock_hive_table_class.return_value = mock_hive_table - mock_sd = MagicMock() - mock_sd_class.return_value = mock_sd - mock_serde = MagicMock() - mock_serde_class.return_value = mock_serde - mock_field_class.return_value = MagicMock() - - request = RegisterTableRequest( - id=["test_db", "test_table"], - location=table_path, - properties={"owner": "test_user", "managed_by": "impl"} - ) - response = hive_namespace.register_table(request) - - # Verify version is set when managed_by is "impl" - assert mock_hive_table.parameters["table_type"] == "lance" - assert mock_hive_table.parameters["managed_by"] == "impl" - assert mock_hive_table.parameters["version"] == "42" # Version should be set - assert "EXTERNAL" not in mock_hive_table.parameters # Should not be present - assert mock_hive_table.parameters["owner"] == "test_user" - - def test_table_exists(self, hive_namespace, mock_hive_client): - """Test checking if a table exists.""" - mock_table = MagicMock() - mock_table.parameters = {"table_type": "lance"} - - mock_client_instance = MagicMock() - mock_client_instance.get_table.return_value = mock_table - mock_hive_client.__enter__.return_value = mock_client_instance - - request = TableExistsRequest(id=["test_db", "test_table"]) - hive_namespace.table_exists(request) - - mock_client_instance.get_table.assert_called_once_with("test_db", "test_table") - - def test_drop_table(self, hive_namespace, mock_hive_client): - """Test dropping a table.""" - mock_table = MagicMock() - mock_table.parameters = {"table_type": "lance"} - - mock_client_instance = MagicMock() - mock_client_instance.get_table.return_value = mock_table - mock_hive_client.__enter__.return_value = mock_client_instance - - request = DropTableRequest(id=["test_db", "test_table"]) - response = hive_namespace.drop_table(request) - - mock_client_instance.get_table.assert_called_once_with("test_db", "test_table") - mock_client_instance.drop_table.assert_called_once_with( - "test_db", "test_table", deleteData=True - ) - - def test_deregister_table(self, hive_namespace, mock_hive_client): - """Test deregistering a table without deleting data.""" - mock_table = MagicMock() - mock_table.parameters = {"table_type": "lance"} - mock_table.sd.location = "/tmp/test_table" - - mock_client_instance = MagicMock() - mock_client_instance.get_table.return_value = mock_table - mock_hive_client.__enter__.return_value = mock_client_instance - - request = DeregisterTableRequest(id=["test_db", "test_table"]) - response = hive_namespace.deregister_table(request) - - assert response.location == "/tmp/test_table" - mock_client_instance.drop_table.assert_called_once_with( - "test_db", "test_table", deleteData=False - ) - - def test_normalize_identifier(self, hive_namespace): - """Test identifier normalization.""" - # Single element should default to "default" database - assert hive_namespace._normalize_identifier(["test_table"]) == ("default", "test_table") - - # Two elements should be (database, table) - assert hive_namespace._normalize_identifier(["test_db", "test_table"]) == ("test_db", "test_table") - - # More than two elements should raise an error - with pytest.raises(ValueError, match="Invalid identifier"): - hive_namespace._normalize_identifier(["a", "b", "c"]) - - def test_get_table_location(self, hive_namespace): - """Test getting table location.""" - location = hive_namespace._get_table_location("test_db", "test_table") - assert location == "/tmp/warehouse/test_db.db/test_table" - - def test_root_namespace_operations(self, hive_namespace): - """Test root namespace operations.""" - # Test namespace_exists for root - request = NamespaceExistsRequest(id=[]) - hive_namespace.namespace_exists(request) # Should not raise - - # Test describe_namespace for root - request = DescribeNamespaceRequest(id=[]) - response = hive_namespace.describe_namespace(request) - assert response.properties["location"] == "/tmp/warehouse" - assert "Root namespace" in response.properties["description"] - - # Test list_tables for root (should be empty) - request = ListTablesRequest(id=[]) - response = hive_namespace.list_tables(request) - assert response.tables == [] - - # Test create_namespace for root (should fail) - request = CreateNamespaceRequest(id=[]) - with pytest.raises(ValueError, match="Root namespace already exists"): - hive_namespace.create_namespace(request) - - # Test drop_namespace for root (should fail) - request = DropNamespaceRequest(id=[]) - with pytest.raises(ValueError, match="Cannot drop root namespace"): - hive_namespace.drop_namespace(request) - - def test_pickle_support(self): - """Test that Hive2Namespace can be pickled and unpickled for Ray compatibility.""" - import pickle - - with patch("lance_namespace.hive.HIVE_AVAILABLE", True): - with patch("lance_namespace.hive.HiveMetastoreClient"): - # Create a Hive2Namespace instance - namespace = connect("hive2", { - "uri": "thrift://localhost:9083", - "root": "/tmp/warehouse", - "ugi": "user:group1,group2", - "client.pool-size": "5", - "storage.access_key_id": "test-key", - "storage.secret_access_key": "test-secret" - }) - - # Test pickling - pickled = pickle.dumps(namespace) - assert pickled is not None - - # Test unpickling - restored = pickle.loads(pickled) - assert isinstance(restored, namespace.__class__) - - # Verify configuration is preserved - assert restored.uri == "thrift://localhost:9083" - assert restored.root == "/tmp/warehouse" - assert restored.ugi == "user:group1,group2" - assert restored.pool_size == 5 - assert restored.storage_properties["access_key_id"] == "test-key" - assert restored.storage_properties["secret_access_key"] == "test-secret" - - # Verify client is None after unpickling (will be lazily initialized) - assert restored._client is None - - # Test that client can be re-initialized after unpickling - with patch("lance_namespace.hive.HiveMetastoreClient") as mock_client: - # This will create a new mock client when accessed - client = restored.client - assert client is not None - assert restored._client is not None - mock_client.assert_called_once_with("thrift://localhost:9083", "user:group1,group2") \ No newline at end of file diff --git a/python/tests/test_hive2.py b/python/tests/test_hive2.py new file mode 100644 index 0000000..5811570 --- /dev/null +++ b/python/tests/test_hive2.py @@ -0,0 +1,331 @@ +""" +Tests for Lance Hive2 Namespace implementation. +""" + +import pytest +from unittest.mock import MagicMock, patch + +from lance_namespace_impls.hive2 import Hive2Namespace +from lance_namespace_urllib3_client.models import ( + ListNamespacesRequest, + DescribeNamespaceRequest, + CreateNamespaceRequest, + DropNamespaceRequest, + ListTablesRequest, + DescribeTableRequest, + DeregisterTableRequest, +) + + +@pytest.fixture +def mock_hive_client(): + """Create a mock Hive client.""" + with patch("lance_namespace_impls.hive2.HIVE_AVAILABLE", True): + with patch( + "lance_namespace_impls.hive2.HiveMetastoreClientWrapper" + ) as mock_client_class: + mock_client = MagicMock() + mock_client_class.return_value = mock_client + yield mock_client + + +@pytest.fixture +def hive_namespace(mock_hive_client): + """Create a Hive2Namespace instance with mocked client.""" + with patch("lance_namespace_impls.hive2.HIVE_AVAILABLE", True): + namespace = Hive2Namespace(uri="thrift://localhost:9083", root="/tmp/warehouse") + namespace._client = mock_hive_client + return namespace + + +class TestHive2Namespace: + """Test cases for Hive2Namespace.""" + + def test_initialization(self): + """Test namespace initialization.""" + with patch("lance_namespace_impls.hive2.HIVE_AVAILABLE", True): + with patch( + "lance_namespace_impls.hive2.HiveMetastoreClientWrapper" + ) as mock_client: + namespace = Hive2Namespace( + uri="thrift://localhost:9083", + root="/tmp/warehouse", + ugi="user:group1,group2", + ) + + assert namespace.uri == "thrift://localhost:9083" + assert namespace.root == "/tmp/warehouse" + assert namespace.ugi == "user:group1,group2" + + # Client should not be initialized yet (lazy loading) + mock_client.assert_not_called() + + # Access the client property to trigger initialization + _ = namespace.client + mock_client.assert_called_once_with( + "thrift://localhost:9083", "user:group1,group2" + ) + + def test_initialization_without_hive_deps(self): + """Test that initialization fails gracefully without Hive dependencies.""" + with patch("lance_namespace_impls.hive2.HIVE_AVAILABLE", False): + with pytest.raises(ImportError, match="Hive dependencies not installed"): + Hive2Namespace(uri="thrift://localhost:9083") + + def test_list_namespaces(self, hive_namespace, mock_hive_client): + """Test listing namespaces (databases).""" + mock_client_instance = MagicMock() + mock_client_instance.get_all_databases.return_value = [ + "default", + "test_db", + "prod_db", + ] + mock_hive_client.__enter__.return_value = mock_client_instance + + request = ListNamespacesRequest() + response = hive_namespace.list_namespaces(request) + + assert response.namespaces == ["test_db", "prod_db"] + mock_client_instance.get_all_databases.assert_called_once() + + def test_describe_namespace(self, hive_namespace, mock_hive_client): + """Test describing a namespace (database).""" + mock_database = MagicMock() + mock_database.description = "Test database" + mock_database.ownerName = "test_user" + mock_database.locationUri = "/tmp/warehouse/test_db.db" + mock_database.parameters = {"key": "value"} + + mock_client_instance = MagicMock() + mock_client_instance.get_database.return_value = mock_database + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DescribeNamespaceRequest(id=["test_db"]) + response = hive_namespace.describe_namespace(request) + + # Response doesn't include id, only properties + assert response.properties["comment"] == "Test database" + assert response.properties["owner"] == "test_user" + assert response.properties["location"] == "/tmp/warehouse/test_db.db" + assert response.properties["key"] == "value" + mock_client_instance.get_database.assert_called_once_with("test_db") + + def test_create_namespace(self, hive_namespace, mock_hive_client): + """Test creating a namespace (database).""" + mock_client_instance = MagicMock() + mock_hive_client.__enter__.return_value = mock_client_instance + + # Mock HiveDatabase class + with patch("lance_namespace_impls.hive2.HiveDatabase") as mock_hive_db_class: + mock_hive_db = MagicMock() + mock_hive_db_class.return_value = mock_hive_db + + request = CreateNamespaceRequest( + id=["test_db"], + properties={ + "comment": "Test database", + "owner": "test_user", + "location": "/custom/location", + }, + ) + hive_namespace.create_namespace(request) + mock_client_instance.create_database.assert_called_once_with(mock_hive_db) + + # Verify the database object properties were set + assert mock_hive_db.name == "test_db" + assert mock_hive_db.description == "Test database" + assert mock_hive_db.ownerName == "test_user" + assert mock_hive_db.locationUri == "/custom/location" + + def test_drop_namespace(self, hive_namespace, mock_hive_client): + """Test dropping a namespace (database).""" + mock_client_instance = MagicMock() + mock_client_instance.get_all_tables.return_value = [] + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DropNamespaceRequest(id=["test_db"]) + hive_namespace.drop_namespace(request) + + mock_client_instance.get_all_tables.assert_called_once_with("test_db") + mock_client_instance.drop_database.assert_called_once_with( + "test_db", deleteData=True, cascade=False + ) + + def test_drop_namespace_not_empty_fails(self, hive_namespace, mock_hive_client): + """Test that dropping a non-empty namespace fails (only RESTRICT mode is supported).""" + mock_client_instance = MagicMock() + mock_client_instance.get_all_tables.return_value = ["table1", "table2"] + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DropNamespaceRequest(id=["test_db"]) + + # Should fail because namespace is not empty and CASCADE is not supported + with pytest.raises(ValueError, match="is not empty"): + hive_namespace.drop_namespace(request) + + def test_list_tables(self, hive_namespace, mock_hive_client): + """Test listing tables in a namespace.""" + mock_table1 = MagicMock() + mock_table1.parameters = {"table_type": "lance"} + + mock_table2 = MagicMock() + mock_table2.parameters = {"other_type": "OTHER"} + + mock_table3 = MagicMock() + mock_table3.parameters = {"table_type": "lance"} + + mock_client_instance = MagicMock() + mock_client_instance.get_all_tables.return_value = [ + "table1", + "table2", + "table3", + ] + mock_client_instance.get_table.side_effect = [ + mock_table1, + mock_table2, + mock_table3, + ] + mock_hive_client.__enter__.return_value = mock_client_instance + + request = ListTablesRequest(id=["test_db"]) + response = hive_namespace.list_tables(request) + + # Should only return Lance table names + assert response.tables == ["table1", "table3"] + mock_client_instance.get_all_tables.assert_called_once_with("test_db") + + def test_describe_table(self, hive_namespace, mock_hive_client): + """Test describing a table returns location and storage_options only. + + Note: load_detailed_metadata=false is the only supported mode, which means + only location and storage_options are returned. Other fields (version, schema, etc.) + are not populated. + """ + mock_table = MagicMock() + mock_table.sd.location = "/tmp/warehouse/test_db.db/test_table" + mock_table.owner = "table_owner" + mock_table.parameters = { + "table_type": "lance", + "version": "42", + } + + mock_client_instance = MagicMock() + mock_client_instance.get_table.return_value = mock_table + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DescribeTableRequest(id=["test_db", "test_table"]) + response = hive_namespace.describe_table(request) + + assert response.location == "/tmp/warehouse/test_db.db/test_table" + # Only location and storage_options are returned (load_detailed_metadata=false) + assert ( + response.storage_options == {} + ) # Empty since no storage.* properties configured + + mock_client_instance.get_table.assert_called_once_with("test_db", "test_table") + + def test_deregister_table(self, hive_namespace, mock_hive_client): + """Test deregistering a table without deleting data.""" + mock_table = MagicMock() + mock_table.parameters = {"table_type": "lance"} + mock_table.sd.location = "/tmp/test_table" + + mock_client_instance = MagicMock() + mock_client_instance.get_table.return_value = mock_table + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DeregisterTableRequest(id=["test_db", "test_table"]) + response = hive_namespace.deregister_table(request) + + assert response.location == "/tmp/test_table" + mock_client_instance.drop_table.assert_called_once_with( + "test_db", "test_table", deleteData=False + ) + + def test_normalize_identifier(self, hive_namespace): + """Test identifier normalization.""" + # Single element should default to "default" database + assert hive_namespace._normalize_identifier(["test_table"]) == ( + "default", + "test_table", + ) + + # Two elements should be (database, table) + assert hive_namespace._normalize_identifier(["test_db", "test_table"]) == ( + "test_db", + "test_table", + ) + + # More than two elements should raise an error + with pytest.raises(ValueError, match="Invalid identifier"): + hive_namespace._normalize_identifier(["a", "b", "c"]) + + def test_get_table_location(self, hive_namespace): + """Test getting table location.""" + location = hive_namespace._get_table_location("test_db", "test_table") + assert location == "/tmp/warehouse/test_db.db/test_table" + + def test_root_namespace_operations(self, hive_namespace): + """Test root namespace operations.""" + # Test describe_namespace for root + request = DescribeNamespaceRequest(id=[]) + response = hive_namespace.describe_namespace(request) + assert response.properties["location"] == "/tmp/warehouse" + assert "Root namespace" in response.properties["description"] + + # Test list_tables for root (should be empty) + request = ListTablesRequest(id=[]) + response = hive_namespace.list_tables(request) + assert response.tables == [] + + # Test create_namespace for root (should fail) + request = CreateNamespaceRequest(id=[]) + with pytest.raises(ValueError, match="Root namespace already exists"): + hive_namespace.create_namespace(request) + + # Test drop_namespace for root (should fail) + request = DropNamespaceRequest(id=[]) + with pytest.raises(ValueError, match="Cannot drop root namespace"): + hive_namespace.drop_namespace(request) + + def test_pickle_support(self): + """Test that Hive2Namespace can be pickled and unpickled for Ray compatibility.""" + import pickle + + with patch("lance_namespace_impls.hive2.HIVE_AVAILABLE", True): + with patch("lance_namespace_impls.hive2.HiveMetastoreClientWrapper"): + namespace = Hive2Namespace( + uri="thrift://localhost:9083", + root="/tmp/warehouse", + ugi="user:group1,group2", + **{ + "client.pool-size": "5", + "storage.access_key_id": "test-key", + "storage.secret_access_key": "test-secret", + }, + ) + + pickled = pickle.dumps(namespace) + assert pickled is not None + + restored = pickle.loads(pickled) + assert isinstance(restored, Hive2Namespace) + + assert restored.uri == "thrift://localhost:9083" + assert restored.root == "/tmp/warehouse" + assert restored.ugi == "user:group1,group2" + assert restored.pool_size == 5 + assert restored.storage_properties["access_key_id"] == "test-key" + assert restored.storage_properties["secret_access_key"] == "test-secret" + + assert restored._client is None + + with patch( + "lance_namespace_impls.hive2.HiveMetastoreClientWrapper" + ) as mock_client: + client = restored.client + assert client is not None + assert restored._client is not None + mock_client.assert_called_once_with( + "thrift://localhost:9083", "user:group1,group2" + ) diff --git a/python/tests/test_hive2_integration.py b/python/tests/test_hive2_integration.py new file mode 100644 index 0000000..216d713 --- /dev/null +++ b/python/tests/test_hive2_integration.py @@ -0,0 +1,180 @@ +""" +Integration tests for Hive2 Namespace implementation. + +To run these tests, start Hive2 Metastore with: + cd docker/hive2 && docker-compose up -d + +Tests are automatically skipped if Hive2 Metastore is not available. +""" + +import os +import socket +import uuid +import unittest + +import pytest + +from lance_namespace_impls.hive2 import Hive2Namespace +from lance_namespace_urllib3_client.models import ( + CreateEmptyTableRequest, + CreateNamespaceRequest, + DeregisterTableRequest, + DescribeNamespaceRequest, + DescribeTableRequest, + DropNamespaceRequest, + ListNamespacesRequest, + ListTablesRequest, +) + + +HIVE_HOST = os.environ.get("HIVE_HOST", "localhost") +HIVE_PORT = int(os.environ.get("HIVE_PORT", "9083")) +HIVE_URI = f"thrift://{HIVE_HOST}:{HIVE_PORT}" + + +def check_hive_available(): + """Check if Hive Metastore is available.""" + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(2) + result = sock.connect_ex((HIVE_HOST, HIVE_PORT)) + sock.close() + return result == 0 + except Exception: + return False + + +hive_available = check_hive_available() + + +@pytest.mark.integration +@unittest.skipUnless(hive_available, f"Hive2 Metastore is not available at {HIVE_URI}") +class TestHive2NamespaceIntegration(unittest.TestCase): + """Integration tests for Hive2Namespace against a running Hive2 Metastore.""" + + def setUp(self): + """Set up test fixtures.""" + unique_id = uuid.uuid4().hex[:8] + self.test_database = f"test_db_{unique_id}" + + properties = { + "uri": HIVE_URI, + "root": "/tmp/lance", + } + + self.namespace = Hive2Namespace(**properties) + + def tearDown(self): + """Clean up test resources.""" + try: + drop_request = DropNamespaceRequest() + drop_request.id = [self.test_database] + self.namespace.drop_namespace(drop_request) + except Exception: + pass + + if self.namespace: + self.namespace.close() + + def test_list_databases(self): + """Test listing databases at root level.""" + list_request = ListNamespacesRequest() + list_request.id = [] + + response = self.namespace.list_namespaces(list_request) + + # Should return a list of databases (may be empty initially) + self.assertIsNotNone(response.namespaces) + self.assertIsInstance(response.namespaces, list) + + def test_namespace_operations(self): + """Test namespace CRUD operations.""" + # Create namespace (database) + create_request = CreateNamespaceRequest() + create_request.id = [self.test_database] + create_request.properties = {"comment": "Test database for integration tests"} + + create_response = self.namespace.create_namespace(create_request) + self.assertIsNotNone(create_response) + + # Describe namespace + describe_request = DescribeNamespaceRequest() + describe_request.id = [self.test_database] + + describe_response = self.namespace.describe_namespace(describe_request) + self.assertIsNotNone(describe_response) + self.assertEqual( + describe_response.properties.get("comment"), + "Test database for integration tests", + ) + + # List namespaces (databases) + list_request = ListNamespacesRequest() + list_request.id = [] + list_response = self.namespace.list_namespaces(list_request) + self.assertIn(self.test_database, list_response.namespaces) + + # Drop namespace + drop_request = DropNamespaceRequest() + drop_request.id = [self.test_database] + self.namespace.drop_namespace(drop_request) + + def test_table_operations(self): + """Test table CRUD operations.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [self.test_database] + self.namespace.create_namespace(ns_request) + + table_name = f"test_table_{uuid.uuid4().hex[:8]}" + + # Create empty table (DeclareTable) + create_request = CreateEmptyTableRequest() + create_request.id = [self.test_database, table_name] + create_request.location = f"/tmp/lance/{self.test_database}/{table_name}" + + create_response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(create_response.location) + + # Describe table + describe_request = DescribeTableRequest() + describe_request.id = [self.test_database, table_name] + + describe_response = self.namespace.describe_table(describe_request) + self.assertIsNotNone(describe_response.location) + + # List tables + list_request = ListTablesRequest() + list_request.id = [self.test_database] + + list_response = self.namespace.list_tables(list_request) + self.assertIn(table_name, list_response.tables) + + # Deregister table + deregister_request = DeregisterTableRequest() + deregister_request.id = [self.test_database, table_name] + self.namespace.deregister_table(deregister_request) + + def test_create_empty_table_with_location(self): + """Test creating an empty table with a specific location.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [self.test_database] + self.namespace.create_namespace(ns_request) + + table_name = "lance_table" + create_request = CreateEmptyTableRequest() + create_request.id = [self.test_database, table_name] + create_request.location = f"/tmp/lance/{self.test_database}/{table_name}" + + response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(response.location) + + # Clean up table + deregister_request = DeregisterTableRequest() + deregister_request.id = [self.test_database, table_name] + self.namespace.deregister_table(deregister_request) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_hive3.py b/python/tests/test_hive3.py new file mode 100644 index 0000000..9291ffa --- /dev/null +++ b/python/tests/test_hive3.py @@ -0,0 +1,329 @@ +""" +Tests for Lance Hive3 Namespace implementation. +""" + +import pytest +from unittest.mock import MagicMock, patch + +from lance_namespace_impls.hive3 import Hive3Namespace +from lance_namespace_urllib3_client.models import ( + ListNamespacesRequest, + DescribeNamespaceRequest, + CreateNamespaceRequest, + DropNamespaceRequest, + ListTablesRequest, + DescribeTableRequest, + DeregisterTableRequest, +) + + +@pytest.fixture +def mock_hive_client(): + """Create a mock Hive client.""" + with patch("lance_namespace_impls.hive3.HIVE_AVAILABLE", True): + with patch( + "lance_namespace_impls.hive3.Hive3MetastoreClientWrapper" + ) as mock_client_class: + mock_client = MagicMock() + mock_client_class.return_value = mock_client + yield mock_client + + +@pytest.fixture +def hive_namespace(mock_hive_client): + """Create a Hive3Namespace instance with mocked client.""" + with patch("lance_namespace_impls.hive3.HIVE_AVAILABLE", True): + namespace = Hive3Namespace(uri="thrift://localhost:9083", root="/tmp/warehouse") + namespace._client = mock_hive_client + return namespace + + +class TestHive3Namespace: + """Test cases for Hive3Namespace.""" + + def test_initialization(self): + """Test namespace initialization.""" + with patch("lance_namespace_impls.hive3.HIVE_AVAILABLE", True): + with patch( + "lance_namespace_impls.hive3.Hive3MetastoreClientWrapper" + ) as mock_client: + namespace = Hive3Namespace( + uri="thrift://localhost:9083", + root="/tmp/warehouse", + ugi="user:group1,group2", + ) + + assert namespace.uri == "thrift://localhost:9083" + assert namespace.root == "/tmp/warehouse" + assert namespace.ugi == "user:group1,group2" + + mock_client.assert_not_called() + + _ = namespace.client + mock_client.assert_called_once_with( + "thrift://localhost:9083", "user:group1,group2" + ) + + def test_initialization_without_hive_deps(self): + """Test that initialization fails gracefully without Hive dependencies.""" + with patch("lance_namespace_impls.hive3.HIVE_AVAILABLE", False): + with pytest.raises(ImportError, match="Hive dependencies not installed"): + Hive3Namespace(uri="thrift://localhost:9083") + + def test_list_namespaces_root(self, hive_namespace, mock_hive_client): + """Test listing catalogs at root level.""" + mock_client_instance = MagicMock() + mock_catalogs = MagicMock() + mock_catalogs.names = ["hive", "custom_catalog"] + mock_client_instance.get_catalogs.return_value = mock_catalogs + mock_hive_client.__enter__.return_value = mock_client_instance + + request = ListNamespacesRequest() + response = hive_namespace.list_namespaces(request) + + assert "hive" in response.namespaces + assert "custom_catalog" in response.namespaces + + def test_list_namespaces_catalog_level(self, hive_namespace, mock_hive_client): + """Test listing databases in a catalog.""" + mock_client_instance = MagicMock() + mock_client_instance.get_all_databases.return_value = [ + "default", + "test_db", + "prod_db", + ] + mock_hive_client.__enter__.return_value = mock_client_instance + + request = ListNamespacesRequest(id=["hive"]) + response = hive_namespace.list_namespaces(request) + + assert response.namespaces == ["test_db", "prod_db"] + mock_client_instance.get_all_databases.assert_called_once() + + def test_describe_namespace_catalog(self, hive_namespace, mock_hive_client): + """Test describing a catalog namespace.""" + request = DescribeNamespaceRequest(id=["hive"]) + response = hive_namespace.describe_namespace(request) + + assert "Catalog: hive" in response.properties["description"] + assert "catalog.location.uri" in response.properties + + def test_describe_namespace_database(self, hive_namespace, mock_hive_client): + """Test describing a database namespace.""" + mock_database = MagicMock() + mock_database.description = "Test database" + mock_database.ownerName = "test_user" + mock_database.locationUri = "/tmp/warehouse/test_db" + mock_database.parameters = {"key": "value"} + + mock_client_instance = MagicMock() + mock_client_instance.get_database.return_value = mock_database + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DescribeNamespaceRequest(id=["hive", "test_db"]) + response = hive_namespace.describe_namespace(request) + + assert response.properties["comment"] == "Test database" + assert response.properties["owner"] == "test_user" + assert response.properties["location"] == "/tmp/warehouse/test_db" + mock_client_instance.get_database.assert_called_once_with("test_db") + + def test_create_namespace_database(self, hive_namespace, mock_hive_client): + """Test creating a database namespace.""" + mock_client_instance = MagicMock() + mock_hive_client.__enter__.return_value = mock_client_instance + + with patch("lance_namespace_impls.hive3.HiveDatabase") as mock_hive_db_class: + mock_hive_db = MagicMock() + mock_hive_db_class.return_value = mock_hive_db + + request = CreateNamespaceRequest( + id=["hive", "test_db"], + properties={"comment": "Test database", "owner": "test_user"}, + ) + hive_namespace.create_namespace(request) + + mock_client_instance.create_database.assert_called_once_with(mock_hive_db) + assert mock_hive_db.name == "test_db" + + def test_drop_namespace_database(self, hive_namespace, mock_hive_client): + """Test dropping a database namespace.""" + mock_client_instance = MagicMock() + mock_client_instance.get_all_tables.return_value = [] + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DropNamespaceRequest(id=["hive", "test_db"]) + hive_namespace.drop_namespace(request) + + mock_client_instance.drop_database.assert_called_once_with( + "test_db", deleteData=True, cascade=False + ) + + def test_list_tables(self, hive_namespace, mock_hive_client): + """Test listing tables in a database.""" + mock_table1 = MagicMock() + mock_table1.parameters = {"table_type": "lance"} + + mock_table2 = MagicMock() + mock_table2.parameters = {"other_type": "OTHER"} + + mock_table3 = MagicMock() + mock_table3.parameters = {"table_type": "lance"} + + mock_client_instance = MagicMock() + mock_client_instance.get_all_tables.return_value = [ + "table1", + "table2", + "table3", + ] + mock_client_instance.get_table.side_effect = [ + mock_table1, + mock_table2, + mock_table3, + ] + mock_hive_client.__enter__.return_value = mock_client_instance + + request = ListTablesRequest(id=["hive", "test_db"]) + response = hive_namespace.list_tables(request) + + assert response.tables == ["table1", "table3"] + mock_client_instance.get_all_tables.assert_called_once_with("test_db") + + def test_describe_table(self, hive_namespace, mock_hive_client): + """Test describing a table returns location and storage_options only. + + Note: load_detailed_metadata=false is the only supported mode, which means + only location and storage_options are returned. Other fields (version, schema, etc.) + are not populated. + """ + mock_table = MagicMock() + mock_table.sd.location = "/tmp/warehouse/test_db/test_table" + mock_table.parameters = { + "table_type": "lance", + "version": "42", + } + + mock_client_instance = MagicMock() + mock_client_instance.get_table.return_value = mock_table + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DescribeTableRequest(id=["hive", "test_db", "test_table"]) + response = hive_namespace.describe_table(request) + + assert response.location == "/tmp/warehouse/test_db/test_table" + # Only location and storage_options are returned (load_detailed_metadata=false) + assert ( + response.storage_options == {} + ) # Empty since no storage.* properties configured + + mock_client_instance.get_table.assert_called_once_with("test_db", "test_table") + + def test_deregister_table(self, hive_namespace, mock_hive_client): + """Test deregistering a table with 3-level identifier.""" + mock_table = MagicMock() + mock_table.parameters = {"table_type": "lance"} + mock_table.sd.location = "/tmp/test_table" + + mock_client_instance = MagicMock() + mock_client_instance.get_table.return_value = mock_table + mock_hive_client.__enter__.return_value = mock_client_instance + + request = DeregisterTableRequest(id=["hive", "test_db", "test_table"]) + response = hive_namespace.deregister_table(request) + + assert response.location == "/tmp/test_table" + mock_client_instance.drop_table.assert_called_once_with( + "test_db", "test_table", deleteData=False + ) + + def test_normalize_identifier(self, hive_namespace): + """Test identifier normalization for 3-level hierarchy.""" + # Single element defaults to (hive, default, table) + assert hive_namespace._normalize_identifier(["test_table"]) == ( + "hive", + "default", + "test_table", + ) + + # Two elements defaults to (hive, database, table) + assert hive_namespace._normalize_identifier(["test_db", "test_table"]) == ( + "hive", + "test_db", + "test_table", + ) + + # Three elements is (catalog, database, table) + assert hive_namespace._normalize_identifier( + ["my_cat", "test_db", "test_table"] + ) == ("my_cat", "test_db", "test_table") + + # More than three elements should raise an error + with pytest.raises(ValueError, match="Invalid identifier"): + hive_namespace._normalize_identifier(["a", "b", "c", "d"]) + + def test_get_table_location(self, hive_namespace): + """Test getting table location for 3-level hierarchy.""" + location = hive_namespace._get_table_location("hive", "test_db", "test_table") + assert location == "/tmp/warehouse/test_db/test_table.lance" + + def test_root_namespace_operations(self, hive_namespace): + """Test root namespace operations.""" + # describe_namespace for root + request = DescribeNamespaceRequest(id=[]) + response = hive_namespace.describe_namespace(request) + assert response.properties["location"] == "/tmp/warehouse" + + # list_tables for root should be empty + request = ListTablesRequest(id=[]) + response = hive_namespace.list_tables(request) + assert response.tables == [] + + # create_namespace for root should fail + request = CreateNamespaceRequest(id=[]) + with pytest.raises(ValueError, match="Root namespace already exists"): + hive_namespace.create_namespace(request) + + # drop_namespace for root should fail + request = DropNamespaceRequest(id=[]) + with pytest.raises(ValueError, match="Cannot drop root namespace"): + hive_namespace.drop_namespace(request) + + def test_pickle_support(self): + """Test that Hive3Namespace can be pickled and unpickled.""" + import pickle + + with patch("lance_namespace_impls.hive3.HIVE_AVAILABLE", True): + with patch("lance_namespace_impls.hive3.Hive3MetastoreClientWrapper"): + namespace = Hive3Namespace( + uri="thrift://localhost:9083", + root="/tmp/warehouse", + ugi="user:group1,group2", + **{ + "client.pool-size": "5", + "storage.access_key_id": "test-key", + "storage.secret_access_key": "test-secret", + }, + ) + + pickled = pickle.dumps(namespace) + assert pickled is not None + + restored = pickle.loads(pickled) + assert isinstance(restored, Hive3Namespace) + + assert restored.uri == "thrift://localhost:9083" + assert restored.root == "/tmp/warehouse" + assert restored.ugi == "user:group1,group2" + assert restored.pool_size == 5 + assert restored.storage_properties["access_key_id"] == "test-key" + + assert restored._client is None + + with patch( + "lance_namespace_impls.hive3.Hive3MetastoreClientWrapper" + ) as mock_client: + client = restored.client + assert client is not None + mock_client.assert_called_once_with( + "thrift://localhost:9083", "user:group1,group2" + ) diff --git a/python/tests/test_hive3_integration.py b/python/tests/test_hive3_integration.py new file mode 100644 index 0000000..80ac949 --- /dev/null +++ b/python/tests/test_hive3_integration.py @@ -0,0 +1,194 @@ +""" +Integration tests for Hive3 Namespace implementation. + +To run these tests, start Hive3 Metastore with: + cd docker/hive3 && docker-compose up -d + +Tests are automatically skipped if Hive3 Metastore is not available. +""" + +import os +import socket +import uuid +import unittest + +import pytest + +from lance_namespace_impls.hive3 import Hive3Namespace +from lance_namespace_urllib3_client.models import ( + CreateEmptyTableRequest, + CreateNamespaceRequest, + DeregisterTableRequest, + DescribeNamespaceRequest, + DescribeTableRequest, + DropNamespaceRequest, + ListNamespacesRequest, + ListTablesRequest, +) + + +HIVE_HOST = os.environ.get("HIVE3_HOST", "localhost") +HIVE_PORT = int(os.environ.get("HIVE3_PORT", "9084")) +HIVE_URI = f"thrift://{HIVE_HOST}:{HIVE_PORT}" +DEFAULT_CATALOG = "hive" + + +def check_hive_available(): + """Check if Hive Metastore is available.""" + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(2) + result = sock.connect_ex((HIVE_HOST, HIVE_PORT)) + sock.close() + return result == 0 + except Exception: + return False + + +hive_available = check_hive_available() + + +@pytest.mark.integration +@unittest.skipUnless(hive_available, f"Hive3 Metastore is not available at {HIVE_URI}") +class TestHive3NamespaceIntegration(unittest.TestCase): + """Integration tests for Hive3Namespace against a running Hive3 Metastore.""" + + def setUp(self): + """Set up test fixtures.""" + unique_id = uuid.uuid4().hex[:8] + self.test_catalog = DEFAULT_CATALOG + self.test_database = f"test_db_{unique_id}" + + properties = { + "uri": HIVE_URI, + "root": "/tmp/lance", + } + + self.namespace = Hive3Namespace(**properties) + + def tearDown(self): + """Clean up test resources.""" + try: + drop_request = DropNamespaceRequest() + drop_request.id = [self.test_catalog, self.test_database] + self.namespace.drop_namespace(drop_request) + except Exception: + pass + + if self.namespace: + self.namespace.close() + + def test_list_catalogs(self): + """Test listing catalogs at root level.""" + list_request = ListNamespacesRequest() + list_request.id = [] + + response = self.namespace.list_namespaces(list_request) + + # Should return a list of catalogs + self.assertIsNotNone(response.namespaces) + self.assertIsInstance(response.namespaces, list) + self.assertIn(DEFAULT_CATALOG, response.namespaces) + + def test_list_databases(self): + """Test listing databases at catalog level.""" + list_request = ListNamespacesRequest() + list_request.id = [self.test_catalog] + + response = self.namespace.list_namespaces(list_request) + + # Should return a list of databases (may be empty initially) + self.assertIsNotNone(response.namespaces) + self.assertIsInstance(response.namespaces, list) + + def test_namespace_operations(self): + """Test namespace CRUD operations.""" + # Create namespace (database) + create_request = CreateNamespaceRequest() + create_request.id = [self.test_catalog, self.test_database] + create_request.properties = {"comment": "Test database for integration tests"} + + create_response = self.namespace.create_namespace(create_request) + self.assertIsNotNone(create_response) + + # Describe namespace + describe_request = DescribeNamespaceRequest() + describe_request.id = [self.test_catalog, self.test_database] + + describe_response = self.namespace.describe_namespace(describe_request) + self.assertIsNotNone(describe_response) + self.assertEqual( + describe_response.properties.get("comment"), + "Test database for integration tests", + ) + + # List namespaces (databases) + list_request = ListNamespacesRequest() + list_request.id = [self.test_catalog] + list_response = self.namespace.list_namespaces(list_request) + self.assertIn(self.test_database, list_response.namespaces) + + # Drop namespace + drop_request = DropNamespaceRequest() + drop_request.id = [self.test_catalog, self.test_database] + self.namespace.drop_namespace(drop_request) + + def test_table_operations(self): + """Test table CRUD operations.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [self.test_catalog, self.test_database] + self.namespace.create_namespace(ns_request) + + table_name = f"test_table_{uuid.uuid4().hex[:8]}" + + # Create empty table (DeclareTable) + create_request = CreateEmptyTableRequest() + create_request.id = [self.test_catalog, self.test_database, table_name] + create_request.location = f"/tmp/lance/{self.test_database}/{table_name}" + + create_response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(create_response.location) + + # Describe table + describe_request = DescribeTableRequest() + describe_request.id = [self.test_catalog, self.test_database, table_name] + + describe_response = self.namespace.describe_table(describe_request) + self.assertIsNotNone(describe_response.location) + + # List tables + list_request = ListTablesRequest() + list_request.id = [self.test_catalog, self.test_database] + + list_response = self.namespace.list_tables(list_request) + self.assertIn(table_name, list_response.tables) + + # Deregister table + deregister_request = DeregisterTableRequest() + deregister_request.id = [self.test_catalog, self.test_database, table_name] + self.namespace.deregister_table(deregister_request) + + def test_create_empty_table_with_location(self): + """Test creating an empty table with a specific location.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [self.test_catalog, self.test_database] + self.namespace.create_namespace(ns_request) + + table_name = "lance_table" + create_request = CreateEmptyTableRequest() + create_request.id = [self.test_catalog, self.test_database, table_name] + create_request.location = f"/tmp/lance/{self.test_database}/{table_name}" + + response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(response.location) + + # Clean up table + deregister_request = DeregisterTableRequest() + deregister_request.id = [self.test_catalog, self.test_database, table_name] + self.namespace.deregister_table(deregister_request) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_iceberg.py b/python/tests/test_iceberg.py new file mode 100644 index 0000000..f3a1061 --- /dev/null +++ b/python/tests/test_iceberg.py @@ -0,0 +1,542 @@ +""" +Tests for Iceberg REST Catalog namespace implementation. +""" + +import unittest +from unittest.mock import MagicMock, patch + +from lance_namespace_impls.iceberg import ( + IcebergNamespace, + IcebergNamespaceConfig, + create_dummy_schema, +) +from lance_namespace_impls.rest_client import ( + RestClientException, + NamespaceNotFoundException, + NamespaceAlreadyExistsException, + TableNotFoundException, + TableAlreadyExistsException, + InvalidInputException, +) +from lance_namespace_urllib3_client.models import ( + ListNamespacesRequest, + CreateNamespaceRequest, + DescribeNamespaceRequest, + DropNamespaceRequest, + ListTablesRequest, + CreateEmptyTableRequest, + DescribeTableRequest, + DeregisterTableRequest, +) + + +class TestIcebergNamespaceConfig(unittest.TestCase): + """Test Iceberg namespace configuration.""" + + def test_config_initialization(self): + """Test configuration initialization with required properties.""" + properties = { + "endpoint": "https://iceberg.example.com", + "root": "/data/lance", + "auth_token": "test_token", + "warehouse": "test_warehouse", + } + + config = IcebergNamespaceConfig(properties) + + self.assertEqual(config.endpoint, "https://iceberg.example.com") + self.assertEqual(config.root, "/data/lance") + self.assertEqual(config.auth_token, "test_token") + self.assertEqual(config.warehouse, "test_warehouse") + + def test_config_defaults(self): + """Test configuration with default values.""" + import os + + properties = {"endpoint": "https://iceberg.example.com"} + + config = IcebergNamespaceConfig(properties) + + self.assertEqual(config.root, os.getcwd()) + self.assertIsNone(config.auth_token) + self.assertIsNone(config.warehouse) + self.assertEqual(config.connect_timeout, 10000) + self.assertEqual(config.read_timeout, 30000) + self.assertEqual(config.max_retries, 3) + + def test_config_missing_endpoint(self): + """Test configuration fails without endpoint.""" + properties = {} + + with self.assertRaises(ValueError) as context: + IcebergNamespaceConfig(properties) + + self.assertIn("endpoint", str(context.exception)) + + def test_get_base_api_url(self): + """Test API URL generation.""" + properties = {"endpoint": "https://iceberg.example.com/"} + config = IcebergNamespaceConfig(properties) + + self.assertEqual(config.get_base_api_url(), "https://iceberg.example.com") + + +class TestIcebergNamespace(unittest.TestCase): + """Test Iceberg namespace implementation.""" + + def setUp(self): + """Set up test fixtures.""" + self.properties = { + "endpoint": "https://iceberg.example.com", + "root": "/data/lance", + } + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_namespace_id(self, mock_rest_client_class): + """Test namespace ID generation.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = IcebergNamespace(**self.properties) + ns_id = namespace.namespace_id() + + self.assertIn("IcebergNamespace", ns_id) + self.assertIn("iceberg.example.com", ns_id) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_list_namespaces_prefix_level(self, mock_rest_client_class): + """Test listing namespaces at prefix level.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + {"namespaces": [["ns1"], ["ns2"], ["ns3"]]}, + ] + + namespace = IcebergNamespace(**self.properties) + + request = ListNamespacesRequest() + request.id = ["warehouse1"] + + response = namespace.list_namespaces(request) + + self.assertEqual( + sorted(response.namespaces), + ["warehouse1.ns1", "warehouse1.ns2", "warehouse1.ns3"], + ) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_list_namespaces_nested(self, mock_rest_client_class): + """Test listing nested namespaces.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + {"namespaces": [["parent", "child1"], ["parent", "child2"]]}, + ] + + namespace = IcebergNamespace(**self.properties) + + request = ListNamespacesRequest() + request.id = ["warehouse1", "parent"] + + response = namespace.list_namespaces(request) + + self.assertEqual( + sorted(response.namespaces), + ["warehouse1.parent.child1", "warehouse1.parent.child2"], + ) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_list_namespaces_empty_id(self, mock_rest_client_class): + """Test listing namespaces without prefix fails.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = IcebergNamespace(**self.properties) + + request = ListNamespacesRequest() + request.id = [] + + with self.assertRaises(InvalidInputException): + namespace.list_namespaces(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_create_namespace(self, mock_rest_client_class): + """Test creating a namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"defaults": {"prefix": "warehouse1"}} + mock_client.post.return_value = {"properties": {"key": "value"}} + + namespace = IcebergNamespace(**self.properties) + + request = CreateNamespaceRequest() + request.id = ["warehouse1", "test_namespace"] + request.properties = {"key": "value"} + + response = namespace.create_namespace(request) + + self.assertEqual(response.properties, {"key": "value"}) + mock_client.post.assert_called_once_with( + "/v1/warehouse1/namespaces", + {"namespace": ["test_namespace"], "properties": {"key": "value"}}, + ) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_create_namespace_already_exists(self, mock_rest_client_class): + """Test creating a namespace that already exists.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"defaults": {"prefix": "warehouse1"}} + mock_client.post.side_effect = RestClientException( + status_code=409, response_body="Conflict" + ) + + namespace = IcebergNamespace(**self.properties) + + request = CreateNamespaceRequest() + request.id = ["warehouse1", "existing_namespace"] + + with self.assertRaises(NamespaceAlreadyExistsException): + namespace.create_namespace(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_create_namespace_invalid_id(self, mock_rest_client_class): + """Test creating namespace with invalid ID fails.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = IcebergNamespace(**self.properties) + + request = CreateNamespaceRequest() + request.id = ["only_prefix"] + + with self.assertRaises(InvalidInputException): + namespace.create_namespace(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_describe_namespace(self, mock_rest_client_class): + """Test describing a namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + {"properties": {"key": "value"}}, + ] + + namespace = IcebergNamespace(**self.properties) + + request = DescribeNamespaceRequest() + request.id = ["warehouse1", "test_namespace"] + + response = namespace.describe_namespace(request) + + self.assertEqual(response.properties, {"key": "value"}) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_describe_namespace_not_found(self, mock_rest_client_class): + """Test describing a namespace that doesn't exist.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + RestClientException(status_code=404, response_body="Not found"), + ] + + namespace = IcebergNamespace(**self.properties) + + request = DescribeNamespaceRequest() + request.id = ["warehouse1", "nonexistent"] + + with self.assertRaises(NamespaceNotFoundException): + namespace.describe_namespace(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_drop_namespace(self, mock_rest_client_class): + """Test dropping a namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"defaults": {"prefix": "warehouse1"}} + + namespace = IcebergNamespace(**self.properties) + + request = DropNamespaceRequest() + request.id = ["warehouse1", "test_namespace"] + + response = namespace.drop_namespace(request) + + self.assertIsNotNone(response) + mock_client.delete.assert_called_once() + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_drop_namespace_not_found(self, mock_rest_client_class): + """Test dropping a namespace that doesn't exist returns success.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"defaults": {"prefix": "warehouse1"}} + mock_client.delete.side_effect = RestClientException( + status_code=404, response_body="Not found" + ) + + namespace = IcebergNamespace(**self.properties) + + request = DropNamespaceRequest() + request.id = ["warehouse1", "nonexistent"] + + response = namespace.drop_namespace(request) + + self.assertIsNotNone(response) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_list_tables(self, mock_rest_client_class): + """Test listing tables in a namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + {"identifiers": [{"name": "table1"}, {"name": "table2"}]}, + {"metadata": {"properties": {"table_type": "lance"}}}, + {"metadata": {"properties": {"table_type": "lance"}}}, + ] + + namespace = IcebergNamespace(**self.properties) + + request = ListTablesRequest() + request.id = ["warehouse1", "test_namespace"] + + response = namespace.list_tables(request) + + self.assertEqual(sorted(response.tables), ["table1", "table2"]) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_list_tables_invalid_id(self, mock_rest_client_class): + """Test listing tables with invalid ID fails.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = IcebergNamespace(**self.properties) + + request = ListTablesRequest() + request.id = ["only_prefix"] + + with self.assertRaises(InvalidInputException): + namespace.list_tables(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_create_empty_table(self, mock_rest_client_class): + """Test creating an empty table.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"defaults": {"prefix": "warehouse1"}} + mock_client.post.return_value = {} + + namespace = IcebergNamespace(**self.properties) + + request = CreateEmptyTableRequest() + request.id = ["warehouse1", "test_namespace", "test_table"] + request.location = None + + response = namespace.create_empty_table(request) + + self.assertEqual( + response.location, "/data/lance/warehouse1/test_namespace/test_table" + ) + mock_client.post.assert_called_once() + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_create_empty_table_with_location(self, mock_rest_client_class): + """Test creating an empty table with custom location.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"defaults": {"prefix": "warehouse1"}} + mock_client.post.return_value = {} + + namespace = IcebergNamespace(**self.properties) + + request = CreateEmptyTableRequest() + request.id = ["warehouse1", "test_namespace", "test_table"] + request.location = "/custom/path/test_table" + + response = namespace.create_empty_table(request) + + self.assertEqual(response.location, "/custom/path/test_table") + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_create_empty_table_already_exists(self, mock_rest_client_class): + """Test creating a table that already exists.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"defaults": {"prefix": "warehouse1"}} + mock_client.post.side_effect = RestClientException( + status_code=409, response_body="Conflict" + ) + + namespace = IcebergNamespace(**self.properties) + + request = CreateEmptyTableRequest() + request.id = ["warehouse1", "test_namespace", "existing_table"] + + with self.assertRaises(TableAlreadyExistsException): + namespace.create_empty_table(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_create_empty_table_invalid_id(self, mock_rest_client_class): + """Test creating table with invalid ID fails.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = IcebergNamespace(**self.properties) + + request = CreateEmptyTableRequest() + request.id = ["warehouse1", "only_namespace"] + + with self.assertRaises(InvalidInputException): + namespace.create_empty_table(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_describe_table(self, mock_rest_client_class): + """Test describing a table.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + { + "metadata": { + "location": "/data/lance/ns/table", + "properties": {"table_type": "lance", "key": "value"}, + } + }, + ] + + namespace = IcebergNamespace(**self.properties) + + request = DescribeTableRequest() + request.id = ["warehouse1", "test_namespace", "test_table"] + + response = namespace.describe_table(request) + + self.assertEqual(response.location, "/data/lance/ns/table") + self.assertEqual( + response.storage_options, {"table_type": "lance", "key": "value"} + ) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_describe_table_not_lance(self, mock_rest_client_class): + """Test describing a table that is not a Lance table.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + { + "metadata": { + "location": "/data/iceberg/ns/table", + "properties": {"table_type": "iceberg"}, + } + }, + ] + + namespace = IcebergNamespace(**self.properties) + + request = DescribeTableRequest() + request.id = ["warehouse1", "test_namespace", "test_table"] + + with self.assertRaises(InvalidInputException): + namespace.describe_table(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_describe_table_not_found(self, mock_rest_client_class): + """Test describing a table that doesn't exist.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + RestClientException(status_code=404, response_body="Not found"), + ] + + namespace = IcebergNamespace(**self.properties) + + request = DescribeTableRequest() + request.id = ["warehouse1", "test_namespace", "nonexistent"] + + with self.assertRaises(TableNotFoundException): + namespace.describe_table(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_deregister_table(self, mock_rest_client_class): + """Test deregistering a table.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + {"metadata": {"location": "/data/lance/ns/table"}}, + ] + + namespace = IcebergNamespace(**self.properties) + + request = DeregisterTableRequest() + request.id = ["warehouse1", "test_namespace", "test_table"] + + response = namespace.deregister_table(request) + + self.assertEqual(response.location, "/data/lance/ns/table") + mock_client.delete.assert_called_once() + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_deregister_table_not_found(self, mock_rest_client_class): + """Test deregistering a table that doesn't exist.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = [ + {"defaults": {"prefix": "warehouse1"}}, + RestClientException(status_code=404, response_body="Not found"), + ] + + namespace = IcebergNamespace(**self.properties) + + request = DeregisterTableRequest() + request.id = ["warehouse1", "test_namespace", "nonexistent"] + + with self.assertRaises(TableNotFoundException): + namespace.deregister_table(request) + + @patch("lance_namespace_impls.iceberg.RestClient") + def test_close(self, mock_rest_client_class): + """Test closing the namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = IcebergNamespace(**self.properties) + namespace.close() + + mock_client.close.assert_called_once() + + def test_create_dummy_schema(self): + """Test dummy schema creation.""" + schema = create_dummy_schema() + + self.assertEqual(schema["type"], "struct") + self.assertEqual(schema["schema-id"], 0) + self.assertEqual(len(schema["fields"]), 1) + self.assertEqual(schema["fields"][0]["name"], "dummy") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_iceberg_integration.py b/python/tests/test_iceberg_integration.py new file mode 100644 index 0000000..b31c1e2 --- /dev/null +++ b/python/tests/test_iceberg_integration.py @@ -0,0 +1,199 @@ +""" +Integration tests for Iceberg REST Catalog namespace implementation. + +This test uses Lakekeeper as the Iceberg REST Catalog implementation. +To run these tests, start the catalog with: + cd docker/iceberg && docker-compose up -d + +Tests are automatically skipped if the catalog is not available. +""" + +import os +import uuid +import urllib.request +import urllib.error +import unittest + +import pytest + +from lance_namespace_impls.iceberg import IcebergNamespace +from lance_namespace_urllib3_client.models import ( + CreateEmptyTableRequest, + CreateNamespaceRequest, + DeregisterTableRequest, + DescribeNamespaceRequest, + DescribeTableRequest, + DropNamespaceRequest, + ListNamespacesRequest, + ListTablesRequest, +) + + +ICEBERG_ENDPOINT = os.environ.get("ICEBERG_ENDPOINT", "http://localhost:8282/catalog") +ICEBERG_WAREHOUSE = os.environ.get("ICEBERG_WAREHOUSE", "test_warehouse") + + +def check_iceberg_available(): + """Check if Iceberg REST Catalog is available.""" + try: + url = f"{ICEBERG_ENDPOINT}/v1/config?warehouse={ICEBERG_WAREHOUSE}" + req = urllib.request.Request(url, method="GET") + try: + with urllib.request.urlopen(req, timeout=5) as response: + return response.status == 200 + except urllib.error.HTTPError: + return False + except Exception: + return False + + +iceberg_available = check_iceberg_available() + + +@pytest.mark.integration +@unittest.skipUnless( + iceberg_available, f"Iceberg REST Catalog is not available at {ICEBERG_ENDPOINT}" +) +class TestIcebergNamespaceIntegration(unittest.TestCase): + """Integration tests for IcebergNamespace against a running Iceberg REST Catalog.""" + + def setUp(self): + """Set up test fixtures.""" + unique_id = uuid.uuid4().hex[:8] + self.test_warehouse = ICEBERG_WAREHOUSE + self.test_namespace = f"test_ns_{unique_id}" + + properties = { + "endpoint": ICEBERG_ENDPOINT, + "root": "s3://warehouse", + } + + self.namespace = IcebergNamespace(**properties) + + def tearDown(self): + """Clean up test resources.""" + try: + drop_request = DropNamespaceRequest() + drop_request.id = [self.test_warehouse, self.test_namespace] + self.namespace.drop_namespace(drop_request) + except Exception: + pass + + if self.namespace: + self.namespace.close() + + def test_namespace_operations(self): + """Test namespace CRUD operations.""" + # Create namespace + create_request = CreateNamespaceRequest() + create_request.id = [self.test_warehouse, self.test_namespace] + create_request.properties = {"description": "Test namespace"} + + create_response = self.namespace.create_namespace(create_request) + self.assertIsNotNone(create_response) + + # Describe namespace + describe_request = DescribeNamespaceRequest() + describe_request.id = [self.test_warehouse, self.test_namespace] + + describe_response = self.namespace.describe_namespace(describe_request) + self.assertIsNotNone(describe_response) + + # List namespaces + list_request = ListNamespacesRequest() + list_request.id = [self.test_warehouse] + list_response = self.namespace.list_namespaces(list_request) + full_ns_name = f"{self.test_warehouse}.{self.test_namespace}" + self.assertIn(full_ns_name, list_response.namespaces) + + # Drop namespace + drop_request = DropNamespaceRequest() + drop_request.id = [self.test_warehouse, self.test_namespace] + self.namespace.drop_namespace(drop_request) + + def test_table_operations(self): + """Test table CRUD operations.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [self.test_warehouse, self.test_namespace] + self.namespace.create_namespace(ns_request) + + table_name = f"test_table_{uuid.uuid4().hex[:8]}" + + # Create empty table (DeclareTable) + create_request = CreateEmptyTableRequest() + create_request.id = [self.test_warehouse, self.test_namespace, table_name] + create_request.location = f"s3://warehouse/{self.test_namespace}/{table_name}" + + create_response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(create_response.location) + + # Describe table + describe_request = DescribeTableRequest() + describe_request.id = [self.test_warehouse, self.test_namespace, table_name] + + describe_response = self.namespace.describe_table(describe_request) + self.assertIsNotNone(describe_response.location) + + # List tables + list_request = ListTablesRequest() + list_request.id = [self.test_warehouse, self.test_namespace] + + list_response = self.namespace.list_tables(list_request) + self.assertIn(table_name, list_response.tables) + + # Deregister table + deregister_request = DeregisterTableRequest() + deregister_request.id = [self.test_warehouse, self.test_namespace, table_name] + self.namespace.deregister_table(deregister_request) + + def test_create_empty_table_with_location(self): + """Test creating an empty table with a specific location.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [self.test_warehouse, self.test_namespace] + self.namespace.create_namespace(ns_request) + + table_name = "lance_table" + create_request = CreateEmptyTableRequest() + create_request.id = [self.test_warehouse, self.test_namespace, table_name] + create_request.location = f"s3://warehouse/{self.test_namespace}/{table_name}" + + response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(response.location) + + # Clean up table + deregister_request = DeregisterTableRequest() + deregister_request.id = [self.test_warehouse, self.test_namespace, table_name] + self.namespace.deregister_table(deregister_request) + + def test_nested_namespace(self): + """Test nested namespace operations.""" + nested_ns = f"nested_{uuid.uuid4().hex[:8]}" + + # Create parent namespace + parent_request = CreateNamespaceRequest() + parent_request.id = [self.test_warehouse, self.test_namespace] + self.namespace.create_namespace(parent_request) + + # Create nested namespace + nested_request = CreateNamespaceRequest() + nested_request.id = [self.test_warehouse, self.test_namespace, nested_ns] + nested_request.properties = {"description": "Nested namespace"} + self.namespace.create_namespace(nested_request) + + # List nested namespaces + list_request = ListNamespacesRequest() + list_request.id = [self.test_warehouse, self.test_namespace] + list_response = self.namespace.list_namespaces(list_request) + expected_ns = f"{self.test_warehouse}.{self.test_namespace}.{nested_ns}" + self.assertIn(expected_ns, list_response.namespaces) + + # Drop nested namespace first + drop_nested = DropNamespaceRequest() + drop_nested.id = [self.test_warehouse, self.test_namespace, nested_ns] + self.namespace.drop_namespace(drop_nested) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_namespace.py b/python/tests/test_namespace.py index 03583f0..c502e34 100644 --- a/python/tests/test_namespace.py +++ b/python/tests/test_namespace.py @@ -4,7 +4,6 @@ class MockNamespace(LanceNamespace): - def __init__(self, **kwargs): self.properties = kwargs @@ -12,10 +11,11 @@ def namespace_id(self) -> str: """Return a mock namespace ID.""" return "MockNamespace { }" + def test_connect_with_short_name_rest(): - with patch('lance_namespace.namespace.importlib.import_module') as mock_import: + with patch("lance_namespace.importlib.import_module") as mock_import: mock_module = Mock() - mock_module.LanceRestNamespace = MockNamespace + mock_module.RestNamespace = MockNamespace mock_import.return_value = mock_module ns = connect("rest", {"uri": "http://localhost:8080"}) @@ -25,7 +25,7 @@ def test_connect_with_short_name_rest(): def test_connect_with_full_class_path(): - with patch('lance_namespace.namespace.importlib.import_module') as mock_import: + with patch("lance_namespace.importlib.import_module") as mock_import: mock_module = Mock() mock_module.CustomNamespace = MockNamespace mock_import.return_value = mock_module @@ -37,7 +37,7 @@ def test_connect_with_full_class_path(): def test_connect_invalid_implementation(): - with patch('lance_namespace.namespace.importlib.import_module') as mock_import: + with patch("lance_namespace.importlib.import_module") as mock_import: mock_import.side_effect = ImportError("Module not found") with pytest.raises(ValueError) as exc_info: @@ -47,7 +47,7 @@ def test_connect_invalid_implementation(): def test_connect_non_namespace_class(): - with patch('lance_namespace.namespace.importlib.import_module') as mock_import: + with patch("lance_namespace.importlib.import_module") as mock_import: mock_module = Mock() mock_module.NotANamespace = str mock_import.return_value = mock_module @@ -58,7 +58,8 @@ def test_connect_non_namespace_class(): assert "does not implement LanceNamespace interface" in str(exc_info.value) -def test_default_methods_raise_not_implemented(): +def test_default_methods_raise_unsupported(): + from lance_namespace import UnsupportedOperationError from lance_namespace_urllib3_client.models import ( ListNamespacesRequest, DescribeNamespaceRequest, @@ -67,11 +68,11 @@ def test_default_methods_raise_not_implemented(): ns = MockNamespace() - with pytest.raises(NotImplementedError): + with pytest.raises(UnsupportedOperationError): ns.list_namespaces(ListNamespacesRequest()) - with pytest.raises(NotImplementedError): + with pytest.raises(UnsupportedOperationError): ns.describe_namespace(DescribeNamespaceRequest()) - with pytest.raises(NotImplementedError): - ns.create_namespace(CreateNamespaceRequest()) \ No newline at end of file + with pytest.raises(UnsupportedOperationError): + ns.create_namespace(CreateNamespaceRequest(id=["test"])) diff --git a/python/tests/test_polaris.py b/python/tests/test_polaris.py new file mode 100644 index 0000000..3e2d81f --- /dev/null +++ b/python/tests/test_polaris.py @@ -0,0 +1,498 @@ +""" +Tests for Polaris Catalog namespace implementation. +""" + +import unittest +from unittest.mock import MagicMock, patch + +from lance_namespace_impls.polaris import ( + PolarisNamespace, + PolarisNamespaceConfig, +) +from lance_namespace_impls.rest_client import ( + RestClientException, + NamespaceNotFoundException, + NamespaceAlreadyExistsException, + TableNotFoundException, + TableAlreadyExistsException, + InvalidInputException, +) +from lance_namespace_urllib3_client.models import ( + ListNamespacesRequest, + CreateNamespaceRequest, + DescribeNamespaceRequest, + DropNamespaceRequest, + ListTablesRequest, + CreateEmptyTableRequest, + DescribeTableRequest, + DeregisterTableRequest, +) + + +class TestPolarisNamespaceConfig(unittest.TestCase): + """Test Polaris namespace configuration.""" + + def test_config_initialization(self): + """Test configuration initialization with required properties.""" + properties = { + "polaris.endpoint": "https://polaris.example.com", + "polaris.root": "/data/lance", + "polaris.auth_token": "test_token", + } + + config = PolarisNamespaceConfig(properties) + + self.assertEqual(config.endpoint, "https://polaris.example.com") + self.assertEqual(config.root, "/data/lance") + self.assertEqual(config.auth_token, "test_token") + + def test_config_defaults(self): + """Test configuration with default values.""" + properties = {"polaris.endpoint": "https://polaris.example.com"} + + config = PolarisNamespaceConfig(properties) + + self.assertEqual(config.root, "/tmp/lance") + self.assertIsNone(config.auth_token) + self.assertEqual(config.connect_timeout, 10000) + self.assertEqual(config.read_timeout, 30000) + self.assertEqual(config.max_retries, 3) + + def test_config_missing_endpoint(self): + """Test configuration fails without endpoint.""" + properties = {} + + with self.assertRaises(ValueError) as context: + PolarisNamespaceConfig(properties) + + self.assertIn("polaris.endpoint", str(context.exception)) + + def test_get_full_api_url(self): + """Test API URL generation.""" + properties = {"polaris.endpoint": "https://polaris.example.com/"} + config = PolarisNamespaceConfig(properties) + + self.assertEqual( + config.get_full_api_url(), "https://polaris.example.com/api/catalog" + ) + + +class TestPolarisNamespace(unittest.TestCase): + """Test Polaris namespace implementation.""" + + def setUp(self): + """Set up test fixtures.""" + self.properties = { + "polaris.endpoint": "https://polaris.example.com", + "polaris.root": "/data/lance", + } + + @patch("lance_namespace_impls.polaris.RestClient") + def test_namespace_id(self, mock_rest_client_class): + """Test namespace ID generation.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = PolarisNamespace(**self.properties) + ns_id = namespace.namespace_id() + + self.assertIn("PolarisNamespace", ns_id) + self.assertIn("polaris.example.com", ns_id) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_list_namespaces_catalog_level(self, mock_rest_client_class): + """Test listing namespaces at catalog level.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"namespaces": [["ns1"], ["ns2"], ["ns3"]]} + + namespace = PolarisNamespace(**self.properties) + + request = ListNamespacesRequest() + request.id = ["test_catalog"] + + response = namespace.list_namespaces(request) + + self.assertEqual( + sorted(response.namespaces), + ["test_catalog.ns1", "test_catalog.ns2", "test_catalog.ns3"], + ) + mock_client.get.assert_called_once_with("/v1/test_catalog/namespaces") + + @patch("lance_namespace_impls.polaris.RestClient") + def test_list_namespaces_nested(self, mock_rest_client_class): + """Test listing nested namespaces.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = { + "namespaces": [["parent", "child1"], ["parent", "child2"]] + } + + namespace = PolarisNamespace(**self.properties) + + request = ListNamespacesRequest() + request.id = ["test_catalog", "parent"] + + response = namespace.list_namespaces(request) + + self.assertEqual( + sorted(response.namespaces), + ["test_catalog.parent.child1", "test_catalog.parent.child2"], + ) + mock_client.get.assert_called_once_with( + "/v1/test_catalog/namespaces/parent/namespaces" + ) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_create_namespace(self, mock_rest_client_class): + """Test creating a namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.post.return_value = {"properties": {"key": "value"}} + + namespace = PolarisNamespace(**self.properties) + + request = CreateNamespaceRequest() + request.id = ["test_catalog", "test_namespace"] + request.properties = {"key": "value"} + + response = namespace.create_namespace(request) + + self.assertEqual(response.properties, {"key": "value"}) + mock_client.post.assert_called_once_with( + "/v1/test_catalog/namespaces", + {"namespace": ["test_namespace"], "properties": {"key": "value"}}, + ) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_create_namespace_already_exists(self, mock_rest_client_class): + """Test creating a namespace that already exists.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.post.side_effect = RestClientException( + status_code=409, response_body="Conflict" + ) + + namespace = PolarisNamespace(**self.properties) + + request = CreateNamespaceRequest() + request.id = ["test_catalog", "existing_namespace"] + + with self.assertRaises(NamespaceAlreadyExistsException): + namespace.create_namespace(request) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_describe_namespace(self, mock_rest_client_class): + """Test describing a namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = {"properties": {"key": "value"}} + + namespace = PolarisNamespace(**self.properties) + + request = DescribeNamespaceRequest() + request.id = ["test_catalog", "test_namespace"] + + response = namespace.describe_namespace(request) + + self.assertEqual(response.properties, {"key": "value"}) + mock_client.get.assert_called_once_with( + "/v1/test_catalog/namespaces/test_namespace" + ) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_describe_namespace_not_found(self, mock_rest_client_class): + """Test describing a namespace that doesn't exist.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = RestClientException( + status_code=404, response_body="Not found" + ) + + namespace = PolarisNamespace(**self.properties) + + request = DescribeNamespaceRequest() + request.id = ["test_catalog", "nonexistent"] + + with self.assertRaises(NamespaceNotFoundException): + namespace.describe_namespace(request) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_drop_namespace(self, mock_rest_client_class): + """Test dropping a namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = PolarisNamespace(**self.properties) + + request = DropNamespaceRequest() + request.id = ["test_catalog", "test_namespace"] + + response = namespace.drop_namespace(request) + + self.assertIsNotNone(response) + mock_client.delete.assert_called_once_with( + "/v1/test_catalog/namespaces/test_namespace" + ) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_drop_namespace_not_found(self, mock_rest_client_class): + """Test dropping a namespace that doesn't exist returns success.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.delete.side_effect = RestClientException( + status_code=404, response_body="Not found" + ) + + namespace = PolarisNamespace(**self.properties) + + request = DropNamespaceRequest() + request.id = ["test_catalog", "nonexistent"] + + response = namespace.drop_namespace(request) + + self.assertIsNotNone(response) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_list_tables(self, mock_rest_client_class): + """Test listing tables in a namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = { + "identifiers": [ + {"name": "table1"}, + {"name": "table2"}, + {"name": "table3"}, + ] + } + + namespace = PolarisNamespace(**self.properties) + + request = ListTablesRequest() + request.id = ["test_catalog", "test_namespace"] + + response = namespace.list_tables(request) + + self.assertEqual(sorted(response.tables), ["table1", "table2", "table3"]) + mock_client.get.assert_called_once_with( + "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables" + ) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_create_empty_table(self, mock_rest_client_class): + """Test creating an empty table.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.post.return_value = {} + + namespace = PolarisNamespace(**self.properties) + + request = CreateEmptyTableRequest() + request.id = ["test_catalog", "test_namespace", "test_table"] + request.location = None + + response = namespace.create_empty_table(request) + + self.assertEqual( + response.location, "/data/lance/test_catalog/test_namespace/test_table" + ) + mock_client.post.assert_called_once() + + @patch("lance_namespace_impls.polaris.RestClient") + def test_create_empty_table_with_location(self, mock_rest_client_class): + """Test creating an empty table with custom location.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.post.return_value = {} + + namespace = PolarisNamespace(**self.properties) + + request = CreateEmptyTableRequest() + request.id = ["test_catalog", "test_namespace", "test_table"] + request.location = "/custom/path/test_table" + + response = namespace.create_empty_table(request) + + self.assertEqual(response.location, "/custom/path/test_table") + mock_client.post.assert_called_once_with( + "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables", + { + "name": "test_table", + "format": "lance", + "base-location": "/custom/path/test_table", + "properties": {"table_type": "lance"}, + }, + ) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_create_empty_table_already_exists(self, mock_rest_client_class): + """Test creating a table that already exists.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.post.side_effect = RestClientException( + status_code=409, response_body="Conflict" + ) + + namespace = PolarisNamespace(**self.properties) + + request = CreateEmptyTableRequest() + request.id = ["test_catalog", "test_namespace", "existing_table"] + + with self.assertRaises(TableAlreadyExistsException): + namespace.create_empty_table(request) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_describe_table(self, mock_rest_client_class): + """Test describing a table.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = { + "table": { + "format": "lance", + "base-location": "/data/lance/ns/table", + "properties": {"key": "value"}, + } + } + + namespace = PolarisNamespace(**self.properties) + + request = DescribeTableRequest() + request.id = ["test_catalog", "test_namespace", "test_table"] + + response = namespace.describe_table(request) + + self.assertEqual(response.location, "/data/lance/ns/table") + self.assertEqual(response.storage_options, {"key": "value"}) + mock_client.get.assert_called_once_with( + "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables/test_table" + ) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_describe_table_not_lance(self, mock_rest_client_class): + """Test describing a table that is not a Lance table.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = { + "table": { + "format": "iceberg", + "base-location": "/data/iceberg/ns/table", + "properties": {}, + } + } + + namespace = PolarisNamespace(**self.properties) + + request = DescribeTableRequest() + request.id = ["test_catalog", "test_namespace", "test_table"] + + with self.assertRaises(InvalidInputException): + namespace.describe_table(request) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_describe_table_not_found(self, mock_rest_client_class): + """Test describing a table that doesn't exist.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = RestClientException( + status_code=404, response_body="Not found" + ) + + namespace = PolarisNamespace(**self.properties) + + request = DescribeTableRequest() + request.id = ["test_catalog", "test_namespace", "nonexistent"] + + with self.assertRaises(TableNotFoundException): + namespace.describe_table(request) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_deregister_table(self, mock_rest_client_class): + """Test deregistering a table.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.return_value = { + "table": {"base-location": "/data/lance/ns/table"} + } + + namespace = PolarisNamespace(**self.properties) + + request = DeregisterTableRequest() + request.id = ["test_catalog", "test_namespace", "test_table"] + + response = namespace.deregister_table(request) + + self.assertEqual(response.location, "/data/lance/ns/table") + mock_client.get.assert_called_once_with( + "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables/test_table" + ) + mock_client.delete.assert_called_once_with( + "/polaris/v1/test_catalog/namespaces/test_namespace/generic-tables/test_table" + ) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_deregister_table_not_found(self, mock_rest_client_class): + """Test deregistering a table that doesn't exist.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + mock_client.get.side_effect = RestClientException( + status_code=404, response_body="Not found" + ) + + namespace = PolarisNamespace(**self.properties) + + request = DeregisterTableRequest() + request.id = ["test_catalog", "test_namespace", "nonexistent"] + + with self.assertRaises(TableNotFoundException): + namespace.deregister_table(request) + + @patch("lance_namespace_impls.polaris.RestClient") + def test_close(self, mock_rest_client_class): + """Test closing the namespace.""" + mock_client = MagicMock() + mock_rest_client_class.return_value = mock_client + + namespace = PolarisNamespace(**self.properties) + namespace.close() + + mock_client.close.assert_called_once() + + def test_invalid_table_id(self): + """Test that table operations fail with invalid identifiers.""" + namespace = PolarisNamespace(**self.properties) + + request = CreateEmptyTableRequest() + request.id = ["catalog", "only_namespace"] # Missing table name + + with self.assertRaises(InvalidInputException): + namespace.create_empty_table(request) + + def test_invalid_namespace_id(self): + """Test that namespace operations fail with invalid identifiers.""" + namespace = PolarisNamespace(**self.properties) + + request = CreateNamespaceRequest() + request.id = ["only_catalog"] # Missing namespace level + + with self.assertRaises(InvalidInputException): + namespace.create_namespace(request) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_polaris_integration.py b/python/tests/test_polaris_integration.py new file mode 100644 index 0000000..af35f06 --- /dev/null +++ b/python/tests/test_polaris_integration.py @@ -0,0 +1,204 @@ +""" +Integration tests for Polaris Catalog namespace implementation. + +To run these tests, start Polaris with: + cd docker/polaris && docker-compose up -d + +Tests are automatically skipped if Polaris is not available. +""" + +import os +import uuid +import urllib.request +import urllib.error +import unittest + +import pytest + +from lance_namespace_impls.polaris import PolarisNamespace +from lance_namespace_urllib3_client.models import ( + CreateEmptyTableRequest, + CreateNamespaceRequest, + DeregisterTableRequest, + DescribeNamespaceRequest, + DescribeTableRequest, + DropNamespaceRequest, + ListNamespacesRequest, + ListTablesRequest, +) + + +POLARIS_ENDPOINT = os.environ.get("POLARIS_ENDPOINT", "http://localhost:8181") +CLIENT_ID = os.environ.get("POLARIS_CLIENT_ID", "root") +CLIENT_SECRET = os.environ.get("POLARIS_CLIENT_SECRET", "s3cr3t") + + +def check_polaris_available(): + """Check if Polaris is available.""" + try: + url = f"{POLARIS_ENDPOINT}/api/catalog/v1/test_catalog/namespaces" + req = urllib.request.Request(url, method="GET") + try: + with urllib.request.urlopen(req, timeout=2) as response: + return response.status != 404 + except urllib.error.HTTPError as e: + # 401/403 means server is up but needs auth, 404 means not found + return e.code != 404 and e.code > 0 + except Exception: + return False + + +def get_oauth_token(): + """Get OAuth token from Polaris.""" + try: + url = f"{POLARIS_ENDPOINT}/api/catalog/v1/oauth/tokens" + data = f"grant_type=client_credentials&client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&scope=PRINCIPAL_ROLE:ALL" + req = urllib.request.Request( + url, + data=data.encode("utf-8"), + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=5) as response: + import json + + body = response.read().decode("utf-8") + token_data = json.loads(body) + return token_data.get("access_token") + except Exception as e: + print(f"Failed to get OAuth token: {e}") + return None + + +polaris_available = check_polaris_available() + + +@pytest.mark.integration +@unittest.skipUnless( + polaris_available, f"Polaris is not available at {POLARIS_ENDPOINT}" +) +class TestPolarisNamespaceIntegration(unittest.TestCase): + """Integration tests for PolarisNamespace against a running Polaris instance.""" + + @classmethod + def setUpClass(cls): + """Set up class-level resources.""" + cls.token = get_oauth_token() + if not cls.token: + raise unittest.SkipTest("Failed to get OAuth token from Polaris") + + def setUp(self): + """Set up test fixtures.""" + unique_id = uuid.uuid4().hex[:8] + self.test_catalog = "test_catalog" + self.test_namespace = f"test_ns_{unique_id}" + + properties = { + "polaris.endpoint": POLARIS_ENDPOINT, + "polaris.auth_token": self.token, + "polaris.root": "/data/warehouse", + } + + self.namespace = PolarisNamespace(**properties) + + def tearDown(self): + """Clean up test resources.""" + try: + # Drop test namespace if it exists + drop_request = DropNamespaceRequest() + drop_request.id = [self.test_catalog, self.test_namespace] + self.namespace.drop_namespace(drop_request) + except Exception: + pass + + if self.namespace: + self.namespace.close() + + def test_namespace_operations(self): + """Test namespace CRUD operations.""" + # Create namespace + create_request = CreateNamespaceRequest() + create_request.id = [self.test_catalog, self.test_namespace] + create_request.properties = {"description": "Test namespace"} + + create_response = self.namespace.create_namespace(create_request) + self.assertIsNotNone(create_response) + + # Describe namespace + describe_request = DescribeNamespaceRequest() + describe_request.id = [self.test_catalog, self.test_namespace] + + describe_response = self.namespace.describe_namespace(describe_request) + self.assertIsNotNone(describe_response) + + # List namespaces + list_request = ListNamespacesRequest() + list_request.id = [self.test_catalog] + list_response = self.namespace.list_namespaces(list_request) + full_ns_name = f"{self.test_catalog}.{self.test_namespace}" + self.assertIn(full_ns_name, list_response.namespaces) + + # Drop namespace + drop_request = DropNamespaceRequest() + drop_request.id = [self.test_catalog, self.test_namespace] + self.namespace.drop_namespace(drop_request) + + def test_table_operations(self): + """Test table CRUD operations.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [self.test_catalog, self.test_namespace] + self.namespace.create_namespace(ns_request) + + table_name = f"test_table_{uuid.uuid4().hex[:8]}" + + # Create empty table (DeclareTable) + create_request = CreateEmptyTableRequest() + create_request.id = [self.test_catalog, self.test_namespace, table_name] + create_request.location = f"/data/warehouse/{self.test_namespace}/{table_name}" + + create_response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(create_response.location) + + # Describe table + describe_request = DescribeTableRequest() + describe_request.id = [self.test_catalog, self.test_namespace, table_name] + + describe_response = self.namespace.describe_table(describe_request) + self.assertIsNotNone(describe_response.location) + + # List tables + list_request = ListTablesRequest() + list_request.id = [self.test_catalog, self.test_namespace] + + list_response = self.namespace.list_tables(list_request) + self.assertIn(table_name, list_response.tables) + + # Deregister table + deregister_request = DeregisterTableRequest() + deregister_request.id = [self.test_catalog, self.test_namespace, table_name] + self.namespace.deregister_table(deregister_request) + + def test_create_empty_table_with_location(self): + """Test creating an empty table with a specific location.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [self.test_catalog, self.test_namespace] + self.namespace.create_namespace(ns_request) + + table_name = "lance_table" + create_request = CreateEmptyTableRequest() + create_request.id = [self.test_catalog, self.test_namespace, table_name] + create_request.location = f"/data/warehouse/{self.test_namespace}/{table_name}" + + response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(response.location) + + # Clean up table + deregister_request = DeregisterTableRequest() + deregister_request.id = [self.test_catalog, self.test_namespace, table_name] + self.namespace.deregister_table(deregister_request) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py index 561ff73..13a9155 100644 --- a/python/tests/test_schema.py +++ b/python/tests/test_schema.py @@ -1,10 +1,11 @@ """ Tests for schema conversion utilities. """ + import pytest import pyarrow as pa -from lance_namespace.schema import ( +from lance_namespace_impls.schema import ( convert_json_arrow_schema_to_pyarrow, convert_json_arrow_type_to_pyarrow, ) @@ -17,90 +18,149 @@ class TestJsonArrowToPyArrow: """Test JSON Arrow to PyArrow conversions.""" - + def test_convert_basic_types(self): """Test conversion of basic Arrow types.""" # Test null - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='null')) == pa.null() - + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="null")) + == pa.null() + ) + # Test boolean - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='bool')) == pa.bool_() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='boolean')) == pa.bool_() - + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="bool")) + == pa.bool_() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="boolean")) + == pa.bool_() + ) + # Test integers - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='int8')) == pa.int8() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='uint8')) == pa.uint8() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='int16')) == pa.int16() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='uint16')) == pa.uint16() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='int32')) == pa.int32() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='uint32')) == pa.uint32() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='int64')) == pa.int64() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='uint64')) == pa.uint64() - + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="int8")) + == pa.int8() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="uint8")) + == pa.uint8() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="int16")) + == pa.int16() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="uint16")) + == pa.uint16() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="int32")) + == pa.int32() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="uint32")) + == pa.uint32() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="int64")) + == pa.int64() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="uint64")) + == pa.uint64() + ) + # Test floats - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='float32')) == pa.float32() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='float64')) == pa.float64() - + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="float32")) + == pa.float32() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="float64")) + == pa.float64() + ) + # Test strings and binary - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='utf8')) == pa.utf8() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='binary')) == pa.binary() - + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="utf8")) + == pa.utf8() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="binary")) + == pa.binary() + ) + # Test dates - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='date32')) == pa.date32() - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='date64')) == pa.date64() - + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="date32")) + == pa.date32() + ) + assert ( + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="date64")) + == pa.date64() + ) + def test_convert_timestamp_types(self): """Test conversion of timestamp types.""" # Without timezone - assert convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='timestamp')) == pa.timestamp('us') - + assert convert_json_arrow_type_to_pyarrow( + JsonArrowDataType(type="timestamp") + ) == pa.timestamp("us") + # With timezone assert convert_json_arrow_type_to_pyarrow( - JsonArrowDataType(type='timestamp[tz=UTC]') - ) == pa.timestamp('us', tz='UTC') - + JsonArrowDataType(type="timestamp[tz=UTC]") + ) == pa.timestamp("us", tz="UTC") + assert convert_json_arrow_type_to_pyarrow( - JsonArrowDataType(type='timestamp[tz=America/New_York]') - ) == pa.timestamp('us', tz='America/New_York') - + JsonArrowDataType(type="timestamp[tz=America/New_York]") + ) == pa.timestamp("us", tz="America/New_York") + def test_convert_decimal_types(self): """Test conversion of decimal types.""" # With precision and scale assert convert_json_arrow_type_to_pyarrow( - JsonArrowDataType(type='decimal(10, 2)') + JsonArrowDataType(type="decimal(10, 2)") ) == pa.decimal128(10, 2) - + assert convert_json_arrow_type_to_pyarrow( - JsonArrowDataType(type='decimal(38,10)') + JsonArrowDataType(type="decimal(38,10)") ) == pa.decimal128(38, 10) - + # Default precision/scale assert convert_json_arrow_type_to_pyarrow( - JsonArrowDataType(type='decimal') + JsonArrowDataType(type="decimal") ) == pa.decimal128(38, 10) - + def test_convert_unsupported_type(self): """Test that unsupported types raise an error.""" with pytest.raises(ValueError, match="Unsupported Arrow type: unknown_type"): - convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type='unknown_type')) - + convert_json_arrow_type_to_pyarrow(JsonArrowDataType(type="unknown_type")) + def test_convert_json_arrow_schema(self): """Test conversion of complete JSON Arrow schema.""" json_schema = JsonArrowSchema( fields=[ - JsonArrowField(name='id', type=JsonArrowDataType(type='int64'), nullable=False), - JsonArrowField(name='name', type=JsonArrowDataType(type='utf8'), nullable=True), - JsonArrowField(name='score', type=JsonArrowDataType(type='float64'), nullable=True), + JsonArrowField( + name="id", type=JsonArrowDataType(type="int64"), nullable=False + ), + JsonArrowField( + name="name", type=JsonArrowDataType(type="utf8"), nullable=True + ), + JsonArrowField( + name="score", type=JsonArrowDataType(type="float64"), nullable=True + ), ], - metadata={'created_by': 'test'} + metadata={"created_by": "test"}, ) - + pyarrow_schema = convert_json_arrow_schema_to_pyarrow(json_schema) - + assert len(pyarrow_schema) == 3 - assert pyarrow_schema.field('id').type == pa.int64() - assert pyarrow_schema.field('id').nullable == False - assert pyarrow_schema.field('name').type == pa.utf8() - assert pyarrow_schema.field('name').nullable == True - assert pyarrow_schema.field('score').type == pa.float64() - assert pyarrow_schema.metadata == {b'created_by': b'test'} \ No newline at end of file + assert pyarrow_schema.field("id").type == pa.int64() + assert not pyarrow_schema.field("id").nullable + assert pyarrow_schema.field("name").type == pa.utf8() + assert pyarrow_schema.field("name").nullable + assert pyarrow_schema.field("score").type == pa.float64() + assert pyarrow_schema.metadata == {b"created_by": b"test"} diff --git a/python/tests/test_unity.py b/python/tests/test_unity.py index cbfc672..4a9f5e0 100644 --- a/python/tests/test_unity.py +++ b/python/tests/test_unity.py @@ -3,22 +3,19 @@ """ import unittest -from unittest.mock import Mock, patch, MagicMock -import json -import io +from unittest.mock import patch, MagicMock import pyarrow as pa -import pyarrow.ipc as ipc -from lance_namespace.unity import ( +from lance_namespace_impls.unity import ( UnityNamespace, UnityNamespaceConfig, - RestClient, - RestClientException, - LanceNamespaceException, SchemaInfo, TableInfo, - ColumnInfo, +) +from lance_namespace_impls.rest_client import ( + RestClient, + RestClientException, ) from lance_namespace_urllib3_client.models import ( ListNamespacesRequest, @@ -26,341 +23,290 @@ DescribeNamespaceRequest, DropNamespaceRequest, ListTablesRequest, - CreateTableRequest, CreateEmptyTableRequest, DescribeTableRequest, - DropTableRequest, ) class TestUnityNamespaceConfig(unittest.TestCase): """Test Unity namespace configuration.""" - + def test_config_initialization(self): """Test configuration initialization with required properties.""" properties = { "unity.endpoint": "https://unity.example.com", - "unity.catalog": "test_catalog", "unity.root": "/data/lance", - "unity.auth_token": "test_token" + "unity.auth_token": "test_token", } - + config = UnityNamespaceConfig(properties) - + self.assertEqual(config.endpoint, "https://unity.example.com") - self.assertEqual(config.catalog, "test_catalog") self.assertEqual(config.root, "/data/lance") self.assertEqual(config.auth_token, "test_token") - + def test_config_defaults(self): """Test configuration with default values.""" - properties = { - "unity.endpoint": "https://unity.example.com" - } - + properties = {"unity.endpoint": "https://unity.example.com"} + config = UnityNamespaceConfig(properties) - - self.assertEqual(config.catalog, "unity") + self.assertEqual(config.root, "/tmp/lance") self.assertIsNone(config.auth_token) self.assertEqual(config.connect_timeout, 10000) self.assertEqual(config.read_timeout, 300000) self.assertEqual(config.max_retries, 3) - + def test_config_missing_endpoint(self): """Test configuration fails without endpoint.""" properties = {} - + with self.assertRaises(ValueError) as context: UnityNamespaceConfig(properties) - + self.assertIn("unity.endpoint", str(context.exception)) - + def test_get_full_api_url(self): """Test API URL generation.""" - properties = { - "unity.endpoint": "https://unity.example.com" - } + properties = {"unity.endpoint": "https://unity.example.com"} config = UnityNamespaceConfig(properties) - - self.assertEqual(config.get_full_api_url(), "https://unity.example.com/api/2.1") - + + self.assertEqual( + config.get_full_api_url(), "https://unity.example.com/api/2.1/unity-catalog" + ) + # Test with endpoint already containing /api/2.1 + properties = {"unity.endpoint": "https://unity.example.com/api/2.1"} + config = UnityNamespaceConfig(properties) + + self.assertEqual( + config.get_full_api_url(), "https://unity.example.com/api/2.1/unity-catalog" + ) + + # Test with endpoint already containing full path properties = { - "unity.endpoint": "https://unity.example.com/api/2.1" + "unity.endpoint": "https://unity.example.com/api/2.1/unity-catalog" } config = UnityNamespaceConfig(properties) - - self.assertEqual(config.get_full_api_url(), "https://unity.example.com/api/2.1") + + self.assertEqual( + config.get_full_api_url(), "https://unity.example.com/api/2.1/unity-catalog" + ) class TestRestClient(unittest.TestCase): """Test REST client functionality.""" - - @patch('lance_namespace.unity.urllib3.PoolManager') + + @patch("lance_namespace_impls.rest_client.urllib3.PoolManager") def test_get_request(self, mock_pool_manager): """Test GET request.""" mock_http = MagicMock() mock_pool_manager.return_value = mock_http - + mock_response = MagicMock() mock_response.status = 200 mock_response.data = b'{"name": "test_schema"}' mock_http.request.return_value = mock_response - + client = RestClient("https://api.example.com") result = client.get("/schemas/test") - + self.assertEqual(result, {"name": "test_schema"}) mock_http.request.assert_called_once() - - @patch('lance_namespace.unity.urllib3.PoolManager') + + @patch("lance_namespace_impls.rest_client.urllib3.PoolManager") def test_post_request(self, mock_pool_manager): """Test POST request.""" mock_http = MagicMock() mock_pool_manager.return_value = mock_http - + mock_response = MagicMock() mock_response.status = 201 mock_response.data = b'{"id": "123"}' mock_http.request.return_value = mock_response - + client = RestClient("https://api.example.com") result = client.post("/schemas", {"name": "test"}) - + self.assertEqual(result, {"id": "123"}) mock_http.request.assert_called_once() - - @patch('lance_namespace.unity.urllib3.PoolManager') + + @patch("lance_namespace_impls.rest_client.urllib3.PoolManager") def test_delete_request(self, mock_pool_manager): """Test DELETE request.""" mock_http = MagicMock() mock_pool_manager.return_value = mock_http - + mock_response = MagicMock() mock_response.status = 204 - mock_response.data = b'' + mock_response.data = b"" mock_http.request.return_value = mock_response - + client = RestClient("https://api.example.com") client.delete("/schemas/test") - + mock_http.request.assert_called_once() - - @patch('lance_namespace.unity.urllib3.PoolManager') + + @patch("lance_namespace_impls.rest_client.urllib3.PoolManager") def test_error_response(self, mock_pool_manager): """Test error response handling.""" mock_http = MagicMock() mock_pool_manager.return_value = mock_http - + mock_response = MagicMock() mock_response.status = 404 mock_response.data = b'{"error": "Not found"}' mock_http.request.return_value = mock_response - + client = RestClient("https://api.example.com") - + with self.assertRaises(RestClientException) as context: client.get("/schemas/test") - + self.assertEqual(context.exception.status_code, 404) self.assertIn("Not found", context.exception.response_body) class TestUnityNamespace(unittest.TestCase): """Test Unity namespace implementation.""" - + def setUp(self): """Set up test fixtures.""" self.properties = { "unity.endpoint": "https://unity.example.com", - "unity.catalog": "test_catalog", - "unity.root": "/data/lance" + "unity.root": "/data/lance", } - - @patch('lance_namespace.unity.RestClient') + + @patch("lance_namespace_impls.unity.RestClient") def test_list_namespaces_top_level(self, mock_rest_client_class): """Test listing top-level namespaces (catalogs).""" mock_client = MagicMock() mock_rest_client_class.return_value = mock_client - + + mock_client.get.return_value = { + "catalogs": [{"name": "catalog1"}, {"name": "catalog2"}] + } + namespace = UnityNamespace(**self.properties) - + request = ListNamespacesRequest() request.id = [] - + response = namespace.list_namespaces(request) - - self.assertEqual(response.namespaces, ["test_catalog"]) - mock_client.get.assert_not_called() - - @patch('lance_namespace.unity.RestClient') + + self.assertEqual(sorted(response.namespaces), ["catalog1", "catalog2"]) + mock_client.get.assert_called_once_with("/catalogs", params=None) + + @patch("lance_namespace_impls.unity.RestClient") def test_list_namespaces_schemas(self, mock_rest_client_class): """Test listing schemas in a catalog.""" mock_client = MagicMock() mock_rest_client_class.return_value = mock_client - + mock_client.get.return_value = { - "schemas": [ - {"name": "schema1"}, - {"name": "schema2"} - ] + "schemas": [{"name": "schema1"}, {"name": "schema2"}] } - + namespace = UnityNamespace(**self.properties) - + request = ListNamespacesRequest() request.id = ["test_catalog"] - + response = namespace.list_namespaces(request) - + self.assertEqual(sorted(response.namespaces), ["schema1", "schema2"]) mock_client.get.assert_called_once_with( - '/schemas', - params={'catalog_name': 'test_catalog'} + "/schemas", params={"catalog_name": "test_catalog"} ) - - @patch('lance_namespace.unity.RestClient') + + @patch("lance_namespace_impls.unity.RestClient") def test_create_namespace(self, mock_rest_client_class): """Test creating a namespace.""" mock_client = MagicMock() mock_rest_client_class.return_value = mock_client - + mock_schema_info = SchemaInfo( - name="test_schema", - catalog_name="test_catalog", - properties={"key": "value"} + name="test_schema", catalog_name="test_catalog", properties={"key": "value"} ) mock_client.post.return_value = mock_schema_info - + namespace = UnityNamespace(**self.properties) - + request = CreateNamespaceRequest() request.id = ["test_catalog", "test_schema"] request.properties = {"key": "value"} - + response = namespace.create_namespace(request) - + self.assertEqual(response.properties, {"key": "value"}) - - @patch('lance_namespace.unity.RestClient') + + @patch("lance_namespace_impls.unity.RestClient") def test_describe_namespace(self, mock_rest_client_class): """Test describing a namespace.""" mock_client = MagicMock() mock_rest_client_class.return_value = mock_client - + mock_schema_info = SchemaInfo( - name="test_schema", - catalog_name="test_catalog", - properties={"key": "value"} + name="test_schema", catalog_name="test_catalog", properties={"key": "value"} ) mock_client.get.return_value = mock_schema_info - + namespace = UnityNamespace(**self.properties) - + request = DescribeNamespaceRequest() request.id = ["test_catalog", "test_schema"] - + response = namespace.describe_namespace(request) - + self.assertEqual(response.properties, {"key": "value"}) - mock_client.get.assert_called_once_with( - "/schemas/test_catalog.test_schema", - response_class=SchemaInfo - ) - - @patch('lance_namespace.unity.RestClient') + mock_client.get.assert_called_once() + + @patch("lance_namespace_impls.unity.RestClient") def test_drop_namespace(self, mock_rest_client_class): """Test dropping a namespace.""" mock_client = MagicMock() mock_rest_client_class.return_value = mock_client - + namespace = UnityNamespace(**self.properties) - + request = DropNamespaceRequest() request.id = ["test_catalog", "test_schema"] - + response = namespace.drop_namespace(request) - + self.assertIsNotNone(response) - mock_client.delete.assert_called_once_with( - "/schemas/test_catalog.test_schema", - params={} - ) - - @patch('lance_namespace.unity.RestClient') + mock_client.delete.assert_called_once() + + @patch("lance_namespace_impls.unity.RestClient") def test_list_tables(self, mock_rest_client_class): """Test listing tables in a namespace.""" mock_client = MagicMock() mock_rest_client_class.return_value = mock_client - + mock_client.get.return_value = { "tables": [ {"name": "table1", "properties": {"table_type": "lance"}}, {"name": "table2", "properties": {"table_type": "delta"}}, - {"name": "table3", "properties": {"table_type": "lance"}} + {"name": "table3", "properties": {"table_type": "lance"}}, ] } - + namespace = UnityNamespace(**self.properties) - + request = ListTablesRequest() request.id = ["test_catalog", "test_schema"] - + response = namespace.list_tables(request) - + # Should only return Lance tables self.assertEqual(sorted(response.tables), ["table1", "table3"]) - - @patch('lance_namespace.unity.lance') - @patch('lance_namespace.unity.RestClient') - def test_create_table(self, mock_rest_client_class, mock_lance): - """Test creating a table.""" - mock_client = MagicMock() - mock_rest_client_class.return_value = mock_client - - # Create test Arrow schema and IPC data - arrow_schema = pa.schema([ - pa.field("id", pa.int64()), - pa.field("name", pa.string()) - ]) - - # Create IPC stream data - buf = io.BytesIO() - writer = ipc.new_stream(buf, arrow_schema) - writer.close() - ipc_data = buf.getvalue() - - mock_table_info = TableInfo( - name="test_table", - catalog_name="test_catalog", - schema_name="test_schema", - table_type="EXTERNAL", - data_source_format="TEXT", - columns=[], - storage_location="/data/lance/test_catalog/test_schema/test_table", - properties={"table_type": "lance", "version": "0"} - ) - mock_client.post.return_value = mock_table_info - - namespace = UnityNamespace(**self.properties) - - request = CreateTableRequest() - request.id = ["test_catalog", "test_schema", "test_table"] - request.properties = {"custom": "property"} - - response = namespace.create_table(request, ipc_data) - - self.assertEqual(response.location, "/data/lance/test_catalog/test_schema/test_table") - self.assertEqual(response.version, 1) - mock_lance.write_dataset.assert_called_once() - - @patch('lance_namespace.unity.RestClient') + + @patch("lance_namespace_impls.unity.RestClient") def test_create_empty_table(self, mock_rest_client_class): """Test creating an empty table.""" mock_client = MagicMock() mock_rest_client_class.return_value = mock_client - + mock_table_info = TableInfo( name="test_table", catalog_name="test_catalog", @@ -369,26 +315,27 @@ def test_create_empty_table(self, mock_rest_client_class): data_source_format="TEXT", columns=[], storage_location="/data/lance/test_catalog/test_schema/test_table", - properties={"table_type": "lance"} + properties={"table_type": "lance"}, ) mock_client.post.return_value = mock_table_info - + namespace = UnityNamespace(**self.properties) - + request = CreateEmptyTableRequest() request.id = ["test_catalog", "test_schema", "test_table"] - + response = namespace.create_empty_table(request) - - self.assertEqual(response.location, "/data/lance/test_catalog/test_schema/test_table") - - @patch('lance_namespace.unity.lance') - @patch('lance_namespace.unity.RestClient') - def test_describe_table(self, mock_rest_client_class, mock_lance): + + self.assertEqual( + response.location, "/data/lance/test_catalog/test_schema/test_table" + ) + + @patch("lance_namespace_impls.unity.RestClient") + def test_describe_table(self, mock_rest_client_class): """Test describing a table.""" mock_client = MagicMock() mock_rest_client_class.return_value = mock_client - + mock_table_info = TableInfo( name="test_table", catalog_name="test_catalog", @@ -397,85 +344,87 @@ def test_describe_table(self, mock_rest_client_class, mock_lance): data_source_format="TEXT", columns=[], storage_location="/data/lance/test_catalog/test_schema/test_table", - properties={"table_type": "lance"} + properties={"table_type": "lance"}, ) mock_client.get.return_value = mock_table_info - - # Mock Lance dataset - mock_dataset = MagicMock() - mock_dataset.schema = pa.schema([pa.field("id", pa.int64())]) - mock_lance.dataset.return_value = mock_dataset - + namespace = UnityNamespace(**self.properties) - + request = DescribeTableRequest() request.id = ["test_catalog", "test_schema", "test_table"] - + response = namespace.describe_table(request) - - self.assertEqual(response.location, "/data/lance/test_catalog/test_schema/test_table") - self.assertEqual(response.properties, {"table_type": "lance"}) - - @patch('lance_namespace.unity.os') - @patch('lance_namespace.unity.shutil') - @patch('lance_namespace.unity.RestClient') - def test_drop_table(self, mock_rest_client_class, mock_shutil, mock_os): - """Test dropping a table.""" - mock_client = MagicMock() - mock_rest_client_class.return_value = mock_client - - mock_table_info = TableInfo( - name="test_table", - catalog_name="test_catalog", - schema_name="test_schema", - table_type="EXTERNAL", - data_source_format="TEXT", - columns=[], - storage_location="/data/lance/test_catalog/test_schema/test_table", - properties={"table_type": "lance"} + + self.assertEqual( + response.location, "/data/lance/test_catalog/test_schema/test_table" ) - mock_client.get.return_value = mock_table_info - mock_os.path.exists.return_value = True - - namespace = UnityNamespace(**self.properties) - - request = DropTableRequest() - request.id = ["test_catalog", "test_schema", "test_table"] - - response = namespace.drop_table(request) - - self.assertEqual(response.location, "/data/lance/test_catalog/test_schema/test_table") - mock_client.delete.assert_called_once_with("/tables/test_catalog.test_schema.test_table") - mock_shutil.rmtree.assert_called_once_with("/data/lance/test_catalog/test_schema/test_table") - + def test_arrow_type_conversion(self): """Test Arrow type to Unity type conversion.""" namespace = UnityNamespace(**self.properties) - + # Test various Arrow types - self.assertEqual(namespace._convert_arrow_type_to_unity_type(pa.string()), "STRING") + self.assertEqual( + namespace._convert_arrow_type_to_unity_type(pa.string()), "STRING" + ) self.assertEqual(namespace._convert_arrow_type_to_unity_type(pa.int32()), "INT") - self.assertEqual(namespace._convert_arrow_type_to_unity_type(pa.int64()), "BIGINT") - self.assertEqual(namespace._convert_arrow_type_to_unity_type(pa.float32()), "FLOAT") - self.assertEqual(namespace._convert_arrow_type_to_unity_type(pa.float64()), "DOUBLE") - self.assertEqual(namespace._convert_arrow_type_to_unity_type(pa.bool_()), "BOOLEAN") - self.assertEqual(namespace._convert_arrow_type_to_unity_type(pa.date32()), "DATE") - self.assertEqual(namespace._convert_arrow_type_to_unity_type(pa.timestamp('us')), "TIMESTAMP") - + self.assertEqual( + namespace._convert_arrow_type_to_unity_type(pa.int64()), "LONG" + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type(pa.float32()), "FLOAT" + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type(pa.float64()), "DOUBLE" + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type(pa.bool_()), "BOOLEAN" + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type(pa.date32()), "DATE" + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type(pa.timestamp("us")), "TIMESTAMP" + ) + def test_arrow_type_to_json_conversion(self): """Test Arrow type to Unity JSON type conversion.""" namespace = UnityNamespace(**self.properties) - + # Test various Arrow types - self.assertEqual(namespace._convert_arrow_type_to_unity_type_json(pa.string()), '{"type":"string"}') - self.assertEqual(namespace._convert_arrow_type_to_unity_type_json(pa.int32()), '{"type":"integer"}') - self.assertEqual(namespace._convert_arrow_type_to_unity_type_json(pa.int64()), '{"type":"long"}') - self.assertEqual(namespace._convert_arrow_type_to_unity_type_json(pa.float32()), '{"type":"float"}') - self.assertEqual(namespace._convert_arrow_type_to_unity_type_json(pa.float64()), '{"type":"double"}') - self.assertEqual(namespace._convert_arrow_type_to_unity_type_json(pa.bool_()), '{"type":"boolean"}') - self.assertEqual(namespace._convert_arrow_type_to_unity_type_json(pa.date32()), '{"type":"date"}') - self.assertEqual(namespace._convert_arrow_type_to_unity_type_json(pa.timestamp('us')), '{"type":"timestamp"}') - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file + self.assertEqual( + namespace._convert_arrow_type_to_unity_type_json(pa.string()), + '{"type":"string"}', + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type_json(pa.int32()), + '{"type":"integer"}', + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type_json(pa.int64()), + '{"type":"long"}', + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type_json(pa.float32()), + '{"type":"float"}', + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type_json(pa.float64()), + '{"type":"double"}', + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type_json(pa.bool_()), + '{"type":"boolean"}', + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type_json(pa.date32()), + '{"type":"date"}', + ) + self.assertEqual( + namespace._convert_arrow_type_to_unity_type_json(pa.timestamp("us")), + '{"type":"timestamp"}', + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/test_unity_integration.py b/python/tests/test_unity_integration.py new file mode 100644 index 0000000..31f99c4 --- /dev/null +++ b/python/tests/test_unity_integration.py @@ -0,0 +1,203 @@ +""" +Integration tests for Unity Catalog namespace implementation. + +To run these tests, start Unity Catalog with: + cd docker/unity && docker-compose up -d + +Tests are automatically skipped if Unity Catalog is not available. +""" + +import os +import uuid +import urllib.request +import urllib.error +import unittest + +import pytest + +from lance_namespace_impls.unity import UnityNamespace +from lance_namespace_urllib3_client.models import ( + CreateEmptyTableRequest, + CreateNamespaceRequest, + DeregisterTableRequest, + DescribeNamespaceRequest, + DescribeTableRequest, + DropNamespaceRequest, + ListNamespacesRequest, + ListTablesRequest, +) + + +UNITY_ENDPOINT = os.environ.get("UNITY_ENDPOINT", "http://localhost:8080") +UNITY_CATALOG = os.environ.get("UNITY_CATALOG", "lance_test") + + +def check_unity_available(): + """Check if Unity Catalog is available.""" + try: + url = f"{UNITY_ENDPOINT}/api/2.1/unity-catalog/catalogs" + req = urllib.request.Request(url, method="GET") + try: + with urllib.request.urlopen(req, timeout=2) as response: + return response.status == 200 + except urllib.error.HTTPError as e: + return e.code != 404 and e.code > 0 + except Exception: + return False + + +def check_catalog_exists(): + """Check if the test catalog exists.""" + try: + url = f"{UNITY_ENDPOINT}/api/2.1/unity-catalog/catalogs/{UNITY_CATALOG}" + req = urllib.request.Request(url, method="GET") + with urllib.request.urlopen(req, timeout=2) as response: + return response.status == 200 + except Exception: + return False + + +unity_available = check_unity_available() + + +@pytest.mark.integration +@unittest.skipUnless( + unity_available, f"Unity Catalog is not available at {UNITY_ENDPOINT}" +) +class TestUnityNamespaceIntegration(unittest.TestCase): + """Integration tests for UnityNamespace against a running Unity Catalog instance.""" + + @classmethod + def setUpClass(cls): + """Set up class-level resources.""" + if not check_catalog_exists(): + raise unittest.SkipTest( + f"Test catalog '{UNITY_CATALOG}' does not exist in Unity Catalog" + ) + + def setUp(self): + """Set up test fixtures.""" + unique_id = uuid.uuid4().hex[:8] + self.test_schema = f"test_schema_{unique_id}" + + properties = { + "unity.endpoint": UNITY_ENDPOINT, + "unity.root": "/tmp/lance", + } + + self.namespace = UnityNamespace(**properties) + + def tearDown(self): + """Clean up test resources.""" + try: + # Drop test schema if it exists + drop_request = DropNamespaceRequest() + drop_request.id = [UNITY_CATALOG, self.test_schema] + self.namespace.drop_namespace(drop_request) + except Exception: + pass + + if self.namespace: + self.namespace.close() + + def test_list_catalogs(self): + """Test listing catalogs at root level.""" + list_request = ListNamespacesRequest() + list_request.id = [] + + response = self.namespace.list_namespaces(list_request) + + # Should list all catalogs including our test catalog + self.assertIn(UNITY_CATALOG, response.namespaces) + + def test_namespace_operations(self): + """Test namespace CRUD operations.""" + # Create namespace (schema) + create_request = CreateNamespaceRequest() + create_request.id = [UNITY_CATALOG, self.test_schema] + create_request.properties = {} + + create_response = self.namespace.create_namespace(create_request) + self.assertIsNotNone(create_response) + + # Describe namespace + describe_request = DescribeNamespaceRequest() + describe_request.id = [UNITY_CATALOG, self.test_schema] + + describe_response = self.namespace.describe_namespace(describe_request) + self.assertIsNotNone(describe_response) + + # List namespaces (schemas) + list_request = ListNamespacesRequest() + list_request.id = [UNITY_CATALOG] + list_response = self.namespace.list_namespaces(list_request) + self.assertIn(self.test_schema, list_response.namespaces) + + # Drop namespace + drop_request = DropNamespaceRequest() + drop_request.id = [UNITY_CATALOG, self.test_schema] + self.namespace.drop_namespace(drop_request) + + def test_table_operations(self): + """Test table CRUD operations.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [UNITY_CATALOG, self.test_schema] + self.namespace.create_namespace(ns_request) + + table_name = f"test_table_{uuid.uuid4().hex[:8]}" + + # Create empty table (DeclareTable) + create_request = CreateEmptyTableRequest() + create_request.id = [UNITY_CATALOG, self.test_schema, table_name] + create_request.location = ( + f"/tmp/lance/{UNITY_CATALOG}/{self.test_schema}/{table_name}" + ) + + create_response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(create_response.location) + + # Describe table + describe_request = DescribeTableRequest() + describe_request.id = [UNITY_CATALOG, self.test_schema, table_name] + + describe_response = self.namespace.describe_table(describe_request) + self.assertIsNotNone(describe_response.location) + + # List tables + list_request = ListTablesRequest() + list_request.id = [UNITY_CATALOG, self.test_schema] + + list_response = self.namespace.list_tables(list_request) + self.assertIn(table_name, list_response.tables) + + # Deregister table + deregister_request = DeregisterTableRequest() + deregister_request.id = [UNITY_CATALOG, self.test_schema, table_name] + self.namespace.deregister_table(deregister_request) + + def test_create_empty_table_with_location(self): + """Test creating an empty table with a specific location.""" + # Create namespace first + ns_request = CreateNamespaceRequest() + ns_request.id = [UNITY_CATALOG, self.test_schema] + self.namespace.create_namespace(ns_request) + + table_name = "lance_table" + create_request = CreateEmptyTableRequest() + create_request.id = [UNITY_CATALOG, self.test_schema, table_name] + create_request.location = ( + f"/tmp/lance/{UNITY_CATALOG}/{self.test_schema}/{table_name}" + ) + + response = self.namespace.create_empty_table(create_request) + self.assertIsNotNone(response.location) + + # Clean up table + deregister_request = DeregisterTableRequest() + deregister_request.id = [UNITY_CATALOG, self.test_schema, table_name] + self.namespace.deregister_table(deregister_request) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/uv.lock b/python/uv.lock new file mode 100644 index 0000000..fad9ad7 --- /dev/null +++ b/python/uv.lock @@ -0,0 +1,846 @@ +version = 1 +revision = 3 +requires-python = ">=3.10" +resolution-markers = [ + "python_full_version >= '3.11'", + "python_full_version < '3.11'", +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "boto3" +version = "1.42.16" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/37/22c82e9d314d769a6eaf600ce5e08357927b5c6a614bfbeb1e7b7e7aa036/boto3-1.42.16.tar.gz", hash = "sha256:811391611db88c8a061f6e6fabbd7ca784ad9de04490a879f091cbaa9de7de74", size = 112834, upload-time = "2025-12-23T20:44:21.286Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/93/78d175e7d40941c4b608a6701a14215aeeb1db58499dbbc40467a6fd6116/boto3-1.42.16-py3-none-any.whl", hash = "sha256:37a43d42aebd06a8f93ee801ea1b7b5181ac42a30869ef403c9dadc160a748e5", size = 140574, upload-time = "2025-12-23T20:44:20.121Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.16" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cf/eb/d91fb1fb288ba896392d68f89881f5f26bc5b51f8da28697c77f05bc44e8/botocore-1.42.16.tar.gz", hash = "sha256:29ee8555cd5d5023350405387cedcf3fe1c7f02fcb8060bf9e01602487482c25", size = 14914600, upload-time = "2025-12-23T20:44:11.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/98/c7c26ff399994e2b1119cc36027aaae46b9d646a49b70a82c2622e44c94b/botocore-1.42.16-py3-none-any.whl", hash = "sha256:b1f584a0f8645c12e07bf6ec9c18e05221a789f2a9b2d3c6291deb42f8c1c542", size = 14585775, upload-time = "2025-12-23T20:44:08.092Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "coverage" +version = "7.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/45/2c665ca77ec32ad67e25c77daf1cee28ee4558f3bc571cdbaf88a00b9f23/coverage-7.13.0.tar.gz", hash = "sha256:a394aa27f2d7ff9bc04cf703817773a59ad6dfbd577032e690f961d2460ee936", size = 820905, upload-time = "2025-12-08T13:14:38.055Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/08/bdd7ccca14096f7eb01412b87ac11e5d16e4cb54b6e328afc9dee8bdaec1/coverage-7.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:02d9fb9eccd48f6843c98a37bd6817462f130b86da8660461e8f5e54d4c06070", size = 217979, upload-time = "2025-12-08T13:12:14.505Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f0/d1302e3416298a28b5663ae1117546a745d9d19fde7e28402b2c5c3e2109/coverage-7.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:367449cf07d33dc216c083f2036bb7d976c6e4903ab31be400ad74ad9f85ce98", size = 218496, upload-time = "2025-12-08T13:12:16.237Z" }, + { url = "https://files.pythonhosted.org/packages/07/26/d36c354c8b2a320819afcea6bffe72839efd004b98d1d166b90801d49d57/coverage-7.13.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cdb3c9f8fef0a954c632f64328a3935988d33a6604ce4bf67ec3e39670f12ae5", size = 245237, upload-time = "2025-12-08T13:12:17.858Z" }, + { url = "https://files.pythonhosted.org/packages/91/52/be5e85631e0eec547873d8b08dd67a5f6b111ecfe89a86e40b89b0c1c61c/coverage-7.13.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d10fd186aac2316f9bbb46ef91977f9d394ded67050ad6d84d94ed6ea2e8e54e", size = 247061, upload-time = "2025-12-08T13:12:19.132Z" }, + { url = "https://files.pythonhosted.org/packages/0f/45/a5e8fa0caf05fbd8fa0402470377bff09cc1f026d21c05c71e01295e55ab/coverage-7.13.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f88ae3e69df2ab62fb0bc5219a597cb890ba5c438190ffa87490b315190bb33", size = 248928, upload-time = "2025-12-08T13:12:20.702Z" }, + { url = "https://files.pythonhosted.org/packages/f5/42/ffb5069b6fd1b95fae482e02f3fecf380d437dd5a39bae09f16d2e2e7e01/coverage-7.13.0-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4be718e51e86f553bcf515305a158a1cd180d23b72f07ae76d6017c3cc5d791", size = 245931, upload-time = "2025-12-08T13:12:22.243Z" }, + { url = "https://files.pythonhosted.org/packages/95/6e/73e809b882c2858f13e55c0c36e94e09ce07e6165d5644588f9517efe333/coverage-7.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a00d3a393207ae12f7c49bb1c113190883b500f48979abb118d8b72b8c95c032", size = 246968, upload-time = "2025-12-08T13:12:23.52Z" }, + { url = "https://files.pythonhosted.org/packages/87/08/64ebd9e64b6adb8b4a4662133d706fbaccecab972e0b3ccc23f64e2678ad/coverage-7.13.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a7b1cd820e1b6116f92c6128f1188e7afe421c7e1b35fa9836b11444e53ebd9", size = 244972, upload-time = "2025-12-08T13:12:24.781Z" }, + { url = "https://files.pythonhosted.org/packages/12/97/f4d27c6fe0cb375a5eced4aabcaef22de74766fb80a3d5d2015139e54b22/coverage-7.13.0-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:37eee4e552a65866f15dedd917d5e5f3d59805994260720821e2c1b51ac3248f", size = 245241, upload-time = "2025-12-08T13:12:28.041Z" }, + { url = "https://files.pythonhosted.org/packages/0c/94/42f8ae7f633bf4c118bf1038d80472f9dade88961a466f290b81250f7ab7/coverage-7.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:62d7c4f13102148c78d7353c6052af6d899a7f6df66a32bddcc0c0eb7c5326f8", size = 245847, upload-time = "2025-12-08T13:12:29.337Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2f/6369ca22b6b6d933f4f4d27765d313d8914cc4cce84f82a16436b1a233db/coverage-7.13.0-cp310-cp310-win32.whl", hash = "sha256:24e4e56304fdb56f96f80eabf840eab043b3afea9348b88be680ec5986780a0f", size = 220573, upload-time = "2025-12-08T13:12:30.905Z" }, + { url = "https://files.pythonhosted.org/packages/f1/dc/a6a741e519acceaeccc70a7f4cfe5d030efc4b222595f0677e101af6f1f3/coverage-7.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:74c136e4093627cf04b26a35dab8cbfc9b37c647f0502fc313376e11726ba303", size = 221509, upload-time = "2025-12-08T13:12:32.09Z" }, + { url = "https://files.pythonhosted.org/packages/f1/dc/888bf90d8b1c3d0b4020a40e52b9f80957d75785931ec66c7dfaccc11c7d/coverage-7.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0dfa3855031070058add1a59fdfda0192fd3e8f97e7c81de0596c145dea51820", size = 218104, upload-time = "2025-12-08T13:12:33.333Z" }, + { url = "https://files.pythonhosted.org/packages/8d/ea/069d51372ad9c380214e86717e40d1a743713a2af191cfba30a0911b0a4a/coverage-7.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4fdb6f54f38e334db97f72fa0c701e66d8479af0bc3f9bfb5b90f1c30f54500f", size = 218606, upload-time = "2025-12-08T13:12:34.498Z" }, + { url = "https://files.pythonhosted.org/packages/68/09/77b1c3a66c2aa91141b6c4471af98e5b1ed9b9e6d17255da5eb7992299e3/coverage-7.13.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7e442c013447d1d8d195be62852270b78b6e255b79b8675bad8479641e21fd96", size = 248999, upload-time = "2025-12-08T13:12:36.02Z" }, + { url = "https://files.pythonhosted.org/packages/0a/32/2e2f96e9d5691eaf1181d9040f850b8b7ce165ea10810fd8e2afa534cef7/coverage-7.13.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ed5630d946859de835a85e9a43b721123a8a44ec26e2830b296d478c7fd4259", size = 250925, upload-time = "2025-12-08T13:12:37.221Z" }, + { url = "https://files.pythonhosted.org/packages/7b/45/b88ddac1d7978859b9a39a8a50ab323186148f1d64bc068f86fc77706321/coverage-7.13.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7f15a931a668e58087bc39d05d2b4bf4b14ff2875b49c994bbdb1c2217a8daeb", size = 253032, upload-time = "2025-12-08T13:12:38.763Z" }, + { url = "https://files.pythonhosted.org/packages/71/cb/e15513f94c69d4820a34b6bf3d2b1f9f8755fa6021be97c7065442d7d653/coverage-7.13.0-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:30a3a201a127ea57f7e14ba43c93c9c4be8b7d17a26e03bb49e6966d019eede9", size = 249134, upload-time = "2025-12-08T13:12:40.382Z" }, + { url = "https://files.pythonhosted.org/packages/09/61/d960ff7dc9e902af3310ce632a875aaa7860f36d2bc8fc8b37ee7c1b82a5/coverage-7.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a485ff48fbd231efa32d58f479befce52dcb6bfb2a88bb7bf9a0b89b1bc8030", size = 250731, upload-time = "2025-12-08T13:12:41.992Z" }, + { url = "https://files.pythonhosted.org/packages/98/34/c7c72821794afc7c7c2da1db8f00c2c98353078aa7fb6b5ff36aac834b52/coverage-7.13.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:22486cdafba4f9e471c816a2a5745337742a617fef68e890d8baf9f3036d7833", size = 248795, upload-time = "2025-12-08T13:12:43.331Z" }, + { url = "https://files.pythonhosted.org/packages/0a/5b/e0f07107987a43b2def9aa041c614ddb38064cbf294a71ef8c67d43a0cdd/coverage-7.13.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:263c3dbccc78e2e331e59e90115941b5f53e85cfcc6b3b2fbff1fd4e3d2c6ea8", size = 248514, upload-time = "2025-12-08T13:12:44.546Z" }, + { url = "https://files.pythonhosted.org/packages/71/c2/c949c5d3b5e9fc6dd79e1b73cdb86a59ef14f3709b1d72bf7668ae12e000/coverage-7.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e5330fa0cc1f5c3c4c3bb8e101b742025933e7848989370a1d4c8c5e401ea753", size = 249424, upload-time = "2025-12-08T13:12:45.759Z" }, + { url = "https://files.pythonhosted.org/packages/11/f1/bbc009abd6537cec0dffb2cc08c17a7f03de74c970e6302db4342a6e05af/coverage-7.13.0-cp311-cp311-win32.whl", hash = "sha256:0f4872f5d6c54419c94c25dd6ae1d015deeb337d06e448cd890a1e89a8ee7f3b", size = 220597, upload-time = "2025-12-08T13:12:47.378Z" }, + { url = "https://files.pythonhosted.org/packages/c4/f6/d9977f2fb51c10fbaed0718ce3d0a8541185290b981f73b1d27276c12d91/coverage-7.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51a202e0f80f241ccb68e3e26e19ab5b3bf0f813314f2c967642f13ebcf1ddfe", size = 221536, upload-time = "2025-12-08T13:12:48.7Z" }, + { url = "https://files.pythonhosted.org/packages/be/ad/3fcf43fd96fb43e337a3073dea63ff148dcc5c41ba7a14d4c7d34efb2216/coverage-7.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:d2a9d7f1c11487b1c69367ab3ac2d81b9b3721f097aa409a3191c3e90f8f3dd7", size = 220206, upload-time = "2025-12-08T13:12:50.365Z" }, + { url = "https://files.pythonhosted.org/packages/9b/f1/2619559f17f31ba00fc40908efd1fbf1d0a5536eb75dc8341e7d660a08de/coverage-7.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0b3d67d31383c4c68e19a88e28fc4c2e29517580f1b0ebec4a069d502ce1e0bf", size = 218274, upload-time = "2025-12-08T13:12:52.095Z" }, + { url = "https://files.pythonhosted.org/packages/2b/11/30d71ae5d6e949ff93b2a79a2c1b4822e00423116c5c6edfaeef37301396/coverage-7.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:581f086833d24a22c89ae0fe2142cfaa1c92c930adf637ddf122d55083fb5a0f", size = 218638, upload-time = "2025-12-08T13:12:53.418Z" }, + { url = "https://files.pythonhosted.org/packages/79/c2/fce80fc6ded8d77e53207489d6065d0fed75db8951457f9213776615e0f5/coverage-7.13.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0a3a30f0e257df382f5f9534d4ce3d4cf06eafaf5192beb1a7bd066cb10e78fb", size = 250129, upload-time = "2025-12-08T13:12:54.744Z" }, + { url = "https://files.pythonhosted.org/packages/5b/b6/51b5d1eb6fcbb9a1d5d6984e26cbe09018475c2922d554fd724dd0f056ee/coverage-7.13.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:583221913fbc8f53b88c42e8dbb8fca1d0f2e597cb190ce45916662b8b9d9621", size = 252885, upload-time = "2025-12-08T13:12:56.401Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f8/972a5affea41de798691ab15d023d3530f9f56a72e12e243f35031846ff7/coverage-7.13.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f5d9bd30756fff3e7216491a0d6d520c448d5124d3d8e8f56446d6412499e74", size = 253974, upload-time = "2025-12-08T13:12:57.718Z" }, + { url = "https://files.pythonhosted.org/packages/8a/56/116513aee860b2c7968aa3506b0f59b22a959261d1dbf3aea7b4450a7520/coverage-7.13.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a23e5a1f8b982d56fa64f8e442e037f6ce29322f1f9e6c2344cd9e9f4407ee57", size = 250538, upload-time = "2025-12-08T13:12:59.254Z" }, + { url = "https://files.pythonhosted.org/packages/d6/75/074476d64248fbadf16dfafbf93fdcede389ec821f74ca858d7c87d2a98c/coverage-7.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9b01c22bc74a7fb44066aaf765224c0d933ddf1f5047d6cdfe4795504a4493f8", size = 251912, upload-time = "2025-12-08T13:13:00.604Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d2/aa4f8acd1f7c06024705c12609d8698c51b27e4d635d717cd1934c9668e2/coverage-7.13.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:898cce66d0836973f48dda4e3514d863d70142bdf6dfab932b9b6a90ea5b222d", size = 250054, upload-time = "2025-12-08T13:13:01.892Z" }, + { url = "https://files.pythonhosted.org/packages/19/98/8df9e1af6a493b03694a1e8070e024e7d2cdc77adedc225a35e616d505de/coverage-7.13.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:3ab483ea0e251b5790c2aac03acde31bff0c736bf8a86829b89382b407cd1c3b", size = 249619, upload-time = "2025-12-08T13:13:03.236Z" }, + { url = "https://files.pythonhosted.org/packages/d8/71/f8679231f3353018ca66ef647fa6fe7b77e6bff7845be54ab84f86233363/coverage-7.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1d84e91521c5e4cb6602fe11ece3e1de03b2760e14ae4fcf1a4b56fa3c801fcd", size = 251496, upload-time = "2025-12-08T13:13:04.511Z" }, + { url = "https://files.pythonhosted.org/packages/04/86/9cb406388034eaf3c606c22094edbbb82eea1fa9d20c0e9efadff20d0733/coverage-7.13.0-cp312-cp312-win32.whl", hash = "sha256:193c3887285eec1dbdb3f2bd7fbc351d570ca9c02ca756c3afbc71b3c98af6ef", size = 220808, upload-time = "2025-12-08T13:13:06.422Z" }, + { url = "https://files.pythonhosted.org/packages/1c/59/af483673df6455795daf5f447c2f81a3d2fcfc893a22b8ace983791f6f34/coverage-7.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:4f3e223b2b2db5e0db0c2b97286aba0036ca000f06aca9b12112eaa9af3d92ae", size = 221616, upload-time = "2025-12-08T13:13:07.95Z" }, + { url = "https://files.pythonhosted.org/packages/64/b0/959d582572b30a6830398c60dd419c1965ca4b5fb38ac6b7093a0d50ca8d/coverage-7.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:086cede306d96202e15a4b77ace8472e39d9f4e5f9fd92dd4fecdfb2313b2080", size = 220261, upload-time = "2025-12-08T13:13:09.581Z" }, + { url = "https://files.pythonhosted.org/packages/7c/cc/bce226595eb3bf7d13ccffe154c3c487a22222d87ff018525ab4dd2e9542/coverage-7.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:28ee1c96109974af104028a8ef57cec21447d42d0e937c0275329272e370ebcf", size = 218297, upload-time = "2025-12-08T13:13:10.977Z" }, + { url = "https://files.pythonhosted.org/packages/3b/9f/73c4d34600aae03447dff3d7ad1d0ac649856bfb87d1ca7d681cfc913f9e/coverage-7.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d1e97353dcc5587b85986cda4ff3ec98081d7e84dd95e8b2a6d59820f0545f8a", size = 218673, upload-time = "2025-12-08T13:13:12.562Z" }, + { url = "https://files.pythonhosted.org/packages/63/ab/8fa097db361a1e8586535ae5073559e6229596b3489ec3ef2f5b38df8cb2/coverage-7.13.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:99acd4dfdfeb58e1937629eb1ab6ab0899b131f183ee5f23e0b5da5cba2fec74", size = 249652, upload-time = "2025-12-08T13:13:13.909Z" }, + { url = "https://files.pythonhosted.org/packages/90/3a/9bfd4de2ff191feb37ef9465855ca56a6f2f30a3bca172e474130731ac3d/coverage-7.13.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ff45e0cd8451e293b63ced93161e189780baf444119391b3e7d25315060368a6", size = 252251, upload-time = "2025-12-08T13:13:15.553Z" }, + { url = "https://files.pythonhosted.org/packages/df/61/b5d8105f016e1b5874af0d7c67542da780ccd4a5f2244a433d3e20ceb1ad/coverage-7.13.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f4f72a85316d8e13234cafe0a9f81b40418ad7a082792fa4165bd7d45d96066b", size = 253492, upload-time = "2025-12-08T13:13:16.849Z" }, + { url = "https://files.pythonhosted.org/packages/f3/b8/0fad449981803cc47a4694768b99823fb23632150743f9c83af329bb6090/coverage-7.13.0-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:11c21557d0e0a5a38632cbbaca5f008723b26a89d70db6315523df6df77d6232", size = 249850, upload-time = "2025-12-08T13:13:18.142Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e9/8d68337c3125014d918cf4327d5257553a710a2995a6a6de2ac77e5aa429/coverage-7.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76541dc8d53715fb4f7a3a06b34b0dc6846e3c69bc6204c55653a85dd6220971", size = 251633, upload-time = "2025-12-08T13:13:19.56Z" }, + { url = "https://files.pythonhosted.org/packages/55/14/d4112ab26b3a1bc4b3c1295d8452dcf399ed25be4cf649002fb3e64b2d93/coverage-7.13.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6e9e451dee940a86789134b6b0ffbe31c454ade3b849bb8a9d2cca2541a8e91d", size = 249586, upload-time = "2025-12-08T13:13:20.883Z" }, + { url = "https://files.pythonhosted.org/packages/2c/a9/22b0000186db663b0d82f86c2f1028099ae9ac202491685051e2a11a5218/coverage-7.13.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:5c67dace46f361125e6b9cace8fe0b729ed8479f47e70c89b838d319375c8137", size = 249412, upload-time = "2025-12-08T13:13:22.22Z" }, + { url = "https://files.pythonhosted.org/packages/a1/2e/42d8e0d9e7527fba439acdc6ed24a2b97613b1dc85849b1dd935c2cffef0/coverage-7.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f59883c643cb19630500f57016f76cfdcd6845ca8c5b5ea1f6e17f74c8e5f511", size = 251191, upload-time = "2025-12-08T13:13:23.899Z" }, + { url = "https://files.pythonhosted.org/packages/a4/af/8c7af92b1377fd8860536aadd58745119252aaaa71a5213e5a8e8007a9f5/coverage-7.13.0-cp313-cp313-win32.whl", hash = "sha256:58632b187be6f0be500f553be41e277712baa278147ecb7559983c6d9faf7ae1", size = 220829, upload-time = "2025-12-08T13:13:25.182Z" }, + { url = "https://files.pythonhosted.org/packages/58/f9/725e8bf16f343d33cbe076c75dc8370262e194ff10072c0608b8e5cf33a3/coverage-7.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:73419b89f812f498aca53f757dd834919b48ce4799f9d5cad33ca0ae442bdb1a", size = 221640, upload-time = "2025-12-08T13:13:26.836Z" }, + { url = "https://files.pythonhosted.org/packages/8a/ff/e98311000aa6933cc79274e2b6b94a2fe0fe3434fca778eba82003675496/coverage-7.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:eb76670874fdd6091eedcc856128ee48c41a9bbbb9c3f1c7c3cf169290e3ffd6", size = 220269, upload-time = "2025-12-08T13:13:28.116Z" }, + { url = "https://files.pythonhosted.org/packages/cf/cf/bbaa2e1275b300343ea865f7d424cc0a2e2a1df6925a070b2b2d5d765330/coverage-7.13.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6e63ccc6e0ad8986386461c3c4b737540f20426e7ec932f42e030320896c311a", size = 218990, upload-time = "2025-12-08T13:13:29.463Z" }, + { url = "https://files.pythonhosted.org/packages/21/1d/82f0b3323b3d149d7672e7744c116e9c170f4957e0c42572f0366dbb4477/coverage-7.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:494f5459ffa1bd45e18558cd98710c36c0b8fbfa82a5eabcbe671d80ecffbfe8", size = 219340, upload-time = "2025-12-08T13:13:31.524Z" }, + { url = "https://files.pythonhosted.org/packages/fb/e3/fe3fd4702a3832a255f4d43013eacb0ef5fc155a5960ea9269d8696db28b/coverage-7.13.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:06cac81bf10f74034e055e903f5f946e3e26fc51c09fc9f584e4a1605d977053", size = 260638, upload-time = "2025-12-08T13:13:32.965Z" }, + { url = "https://files.pythonhosted.org/packages/ad/01/63186cb000307f2b4da463f72af9b85d380236965574c78e7e27680a2593/coverage-7.13.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f2ffc92b46ed6e6760f1d47a71e56b5664781bc68986dbd1836b2b70c0ce2071", size = 262705, upload-time = "2025-12-08T13:13:34.378Z" }, + { url = "https://files.pythonhosted.org/packages/7c/a1/c0dacef0cc865f2455d59eed3548573ce47ed603205ffd0735d1d78b5906/coverage-7.13.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0602f701057c6823e5db1b74530ce85f17c3c5be5c85fc042ac939cbd909426e", size = 265125, upload-time = "2025-12-08T13:13:35.73Z" }, + { url = "https://files.pythonhosted.org/packages/ef/92/82b99223628b61300bd382c205795533bed021505eab6dd86e11fb5d7925/coverage-7.13.0-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:25dc33618d45456ccb1d37bce44bc78cf269909aa14c4db2e03d63146a8a1493", size = 259844, upload-time = "2025-12-08T13:13:37.69Z" }, + { url = "https://files.pythonhosted.org/packages/cf/2c/89b0291ae4e6cd59ef042708e1c438e2290f8c31959a20055d8768349ee2/coverage-7.13.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:71936a8b3b977ddd0b694c28c6a34f4fff2e9dd201969a4ff5d5fc7742d614b0", size = 262700, upload-time = "2025-12-08T13:13:39.525Z" }, + { url = "https://files.pythonhosted.org/packages/bf/f9/a5f992efae1996245e796bae34ceb942b05db275e4b34222a9a40b9fbd3b/coverage-7.13.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:936bc20503ce24770c71938d1369461f0c5320830800933bc3956e2a4ded930e", size = 260321, upload-time = "2025-12-08T13:13:41.172Z" }, + { url = "https://files.pythonhosted.org/packages/4c/89/a29f5d98c64fedbe32e2ac3c227fbf78edc01cc7572eee17d61024d89889/coverage-7.13.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:af0a583efaacc52ae2521f8d7910aff65cdb093091d76291ac5820d5e947fc1c", size = 259222, upload-time = "2025-12-08T13:13:43.282Z" }, + { url = "https://files.pythonhosted.org/packages/b3/c3/940fe447aae302a6701ee51e53af7e08b86ff6eed7631e5740c157ee22b9/coverage-7.13.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f1c23e24a7000da892a312fb17e33c5f94f8b001de44b7cf8ba2e36fbd15859e", size = 261411, upload-time = "2025-12-08T13:13:44.72Z" }, + { url = "https://files.pythonhosted.org/packages/eb/31/12a4aec689cb942a89129587860ed4d0fd522d5fda81237147fde554b8ae/coverage-7.13.0-cp313-cp313t-win32.whl", hash = "sha256:5f8a0297355e652001015e93be345ee54393e45dc3050af4a0475c5a2b767d46", size = 221505, upload-time = "2025-12-08T13:13:46.332Z" }, + { url = "https://files.pythonhosted.org/packages/65/8c/3b5fe3259d863572d2b0827642c50c3855d26b3aefe80bdc9eba1f0af3b0/coverage-7.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6abb3a4c52f05e08460bd9acf04fec027f8718ecaa0d09c40ffbc3fbd70ecc39", size = 222569, upload-time = "2025-12-08T13:13:47.79Z" }, + { url = "https://files.pythonhosted.org/packages/b0/39/f71fa8316a96ac72fc3908839df651e8eccee650001a17f2c78cdb355624/coverage-7.13.0-cp313-cp313t-win_arm64.whl", hash = "sha256:3ad968d1e3aa6ce5be295ab5fe3ae1bf5bb4769d0f98a80a0252d543a2ef2e9e", size = 220841, upload-time = "2025-12-08T13:13:49.243Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4b/9b54bedda55421449811dcd5263a2798a63f48896c24dfb92b0f1b0845bd/coverage-7.13.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:453b7ec753cf5e4356e14fe858064e5520c460d3bbbcb9c35e55c0d21155c256", size = 218343, upload-time = "2025-12-08T13:13:50.811Z" }, + { url = "https://files.pythonhosted.org/packages/59/df/c3a1f34d4bba2e592c8979f924da4d3d4598b0df2392fbddb7761258e3dc/coverage-7.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:af827b7cbb303e1befa6c4f94fd2bf72f108089cfa0f8abab8f4ca553cf5ca5a", size = 218672, upload-time = "2025-12-08T13:13:52.284Z" }, + { url = "https://files.pythonhosted.org/packages/07/62/eec0659e47857698645ff4e6ad02e30186eb8afd65214fd43f02a76537cb/coverage-7.13.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9987a9e4f8197a1000280f7cc089e3ea2c8b3c0a64d750537809879a7b4ceaf9", size = 249715, upload-time = "2025-12-08T13:13:53.791Z" }, + { url = "https://files.pythonhosted.org/packages/23/2d/3c7ff8b2e0e634c1f58d095f071f52ed3c23ff25be524b0ccae8b71f99f8/coverage-7.13.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3188936845cd0cb114fa6a51842a304cdbac2958145d03be2377ec41eb285d19", size = 252225, upload-time = "2025-12-08T13:13:55.274Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ac/fb03b469d20e9c9a81093575003f959cf91a4a517b783aab090e4538764b/coverage-7.13.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2bdb3babb74079f021696cb46b8bb5f5661165c385d3a238712b031a12355be", size = 253559, upload-time = "2025-12-08T13:13:57.161Z" }, + { url = "https://files.pythonhosted.org/packages/29/62/14afa9e792383c66cc0a3b872a06ded6e4ed1079c7d35de274f11d27064e/coverage-7.13.0-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7464663eaca6adba4175f6c19354feea61ebbdd735563a03d1e472c7072d27bb", size = 249724, upload-time = "2025-12-08T13:13:58.692Z" }, + { url = "https://files.pythonhosted.org/packages/31/b7/333f3dab2939070613696ab3ee91738950f0467778c6e5a5052e840646b7/coverage-7.13.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8069e831f205d2ff1f3d355e82f511eb7c5522d7d413f5db5756b772ec8697f8", size = 251582, upload-time = "2025-12-08T13:14:00.642Z" }, + { url = "https://files.pythonhosted.org/packages/81/cb/69162bda9381f39b2287265d7e29ee770f7c27c19f470164350a38318764/coverage-7.13.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:6fb2d5d272341565f08e962cce14cdf843a08ac43bd621783527adb06b089c4b", size = 249538, upload-time = "2025-12-08T13:14:02.556Z" }, + { url = "https://files.pythonhosted.org/packages/e0/76/350387b56a30f4970abe32b90b2a434f87d29f8b7d4ae40d2e8a85aacfb3/coverage-7.13.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:5e70f92ef89bac1ac8a99b3324923b4749f008fdbd7aa9cb35e01d7a284a04f9", size = 249349, upload-time = "2025-12-08T13:14:04.015Z" }, + { url = "https://files.pythonhosted.org/packages/86/0d/7f6c42b8d59f4c7e43ea3059f573c0dcfed98ba46eb43c68c69e52ae095c/coverage-7.13.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4b5de7d4583e60d5fd246dd57fcd3a8aa23c6e118a8c72b38adf666ba8e7e927", size = 251011, upload-time = "2025-12-08T13:14:05.505Z" }, + { url = "https://files.pythonhosted.org/packages/d7/f1/4bb2dff379721bb0b5c649d5c5eaf438462cad824acf32eb1b7ca0c7078e/coverage-7.13.0-cp314-cp314-win32.whl", hash = "sha256:a6c6e16b663be828a8f0b6c5027d36471d4a9f90d28444aa4ced4d48d7d6ae8f", size = 221091, upload-time = "2025-12-08T13:14:07.127Z" }, + { url = "https://files.pythonhosted.org/packages/ba/44/c239da52f373ce379c194b0ee3bcc121020e397242b85f99e0afc8615066/coverage-7.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:0900872f2fdb3ee5646b557918d02279dc3af3dfb39029ac4e945458b13f73bc", size = 221904, upload-time = "2025-12-08T13:14:08.542Z" }, + { url = "https://files.pythonhosted.org/packages/89/1f/b9f04016d2a29c2e4a0307baefefad1a4ec5724946a2b3e482690486cade/coverage-7.13.0-cp314-cp314-win_arm64.whl", hash = "sha256:3a10260e6a152e5f03f26db4a407c4c62d3830b9af9b7c0450b183615f05d43b", size = 220480, upload-time = "2025-12-08T13:14:10.958Z" }, + { url = "https://files.pythonhosted.org/packages/16/d4/364a1439766c8e8647860584171c36010ca3226e6e45b1753b1b249c5161/coverage-7.13.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9097818b6cc1cfb5f174e3263eba4a62a17683bcfe5c4b5d07f4c97fa51fbf28", size = 219074, upload-time = "2025-12-08T13:14:13.345Z" }, + { url = "https://files.pythonhosted.org/packages/ce/f4/71ba8be63351e099911051b2089662c03d5671437a0ec2171823c8e03bec/coverage-7.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0018f73dfb4301a89292c73be6ba5f58722ff79f51593352759c1790ded1cabe", size = 219342, upload-time = "2025-12-08T13:14:15.02Z" }, + { url = "https://files.pythonhosted.org/packages/5e/25/127d8ed03d7711a387d96f132589057213e3aef7475afdaa303412463f22/coverage-7.13.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:166ad2a22ee770f5656e1257703139d3533b4a0b6909af67c6b4a3adc1c98657", size = 260713, upload-time = "2025-12-08T13:14:16.907Z" }, + { url = "https://files.pythonhosted.org/packages/fd/db/559fbb6def07d25b2243663b46ba9eb5a3c6586c0c6f4e62980a68f0ee1c/coverage-7.13.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f6aaef16d65d1787280943f1c8718dc32e9cf141014e4634d64446702d26e0ff", size = 262825, upload-time = "2025-12-08T13:14:18.68Z" }, + { url = "https://files.pythonhosted.org/packages/37/99/6ee5bf7eff884766edb43bd8736b5e1c5144d0fe47498c3779326fe75a35/coverage-7.13.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e999e2dcc094002d6e2c7bbc1fb85b58ba4f465a760a8014d97619330cdbbbf3", size = 265233, upload-time = "2025-12-08T13:14:20.55Z" }, + { url = "https://files.pythonhosted.org/packages/d8/90/92f18fe0356ea69e1f98f688ed80cec39f44e9f09a1f26a1bbf017cc67f2/coverage-7.13.0-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:00c3d22cf6fb1cf3bf662aaaa4e563be8243a5ed2630339069799835a9cc7f9b", size = 259779, upload-time = "2025-12-08T13:14:22.367Z" }, + { url = "https://files.pythonhosted.org/packages/90/5d/b312a8b45b37a42ea7d27d7d3ff98ade3a6c892dd48d1d503e773503373f/coverage-7.13.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22ccfe8d9bb0d6134892cbe1262493a8c70d736b9df930f3f3afae0fe3ac924d", size = 262700, upload-time = "2025-12-08T13:14:24.309Z" }, + { url = "https://files.pythonhosted.org/packages/63/f8/b1d0de5c39351eb71c366f872376d09386640840a2e09b0d03973d791e20/coverage-7.13.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:9372dff5ea15930fea0445eaf37bbbafbc771a49e70c0aeed8b4e2c2614cc00e", size = 260302, upload-time = "2025-12-08T13:14:26.068Z" }, + { url = "https://files.pythonhosted.org/packages/aa/7c/d42f4435bc40c55558b3109a39e2d456cddcec37434f62a1f1230991667a/coverage-7.13.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:69ac2c492918c2461bc6ace42d0479638e60719f2a4ef3f0815fa2df88e9f940", size = 259136, upload-time = "2025-12-08T13:14:27.604Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d3/23413241dc04d47cfe19b9a65b32a2edd67ecd0b817400c2843ebc58c847/coverage-7.13.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:739c6c051a7540608d097b8e13c76cfa85263ced467168dc6b477bae3df7d0e2", size = 261467, upload-time = "2025-12-08T13:14:29.09Z" }, + { url = "https://files.pythonhosted.org/packages/13/e6/6e063174500eee216b96272c0d1847bf215926786f85c2bd024cf4d02d2f/coverage-7.13.0-cp314-cp314t-win32.whl", hash = "sha256:fe81055d8c6c9de76d60c94ddea73c290b416e061d40d542b24a5871bad498b7", size = 221875, upload-time = "2025-12-08T13:14:31.106Z" }, + { url = "https://files.pythonhosted.org/packages/3b/46/f4fb293e4cbe3620e3ac2a3e8fd566ed33affb5861a9b20e3dd6c1896cbc/coverage-7.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:445badb539005283825959ac9fa4a28f712c214b65af3a2c464f1adc90f5fcbc", size = 222982, upload-time = "2025-12-08T13:14:33.1Z" }, + { url = "https://files.pythonhosted.org/packages/68/62/5b3b9018215ed9733fbd1ae3b2ed75c5de62c3b55377a52cae732e1b7805/coverage-7.13.0-cp314-cp314t-win_arm64.whl", hash = "sha256:de7f6748b890708578fc4b7bb967d810aeb6fcc9bff4bb77dbca77dab2f9df6a", size = 221016, upload-time = "2025-12-08T13:14:34.601Z" }, + { url = "https://files.pythonhosted.org/packages/8d/4c/1968f32fb9a2604645827e11ff84a31e59d532e01995f904723b4f5328b3/coverage-7.13.0-py3-none-any.whl", hash = "sha256:850d2998f380b1e266459ca5b47bc9e7daf9af1d070f66317972f382d46f1904", size = 210068, upload-time = "2025-12-08T13:14:36.236Z" }, +] + +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version <= '3.11'" }, +] + +[[package]] +name = "exceptiongroup" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, +] + +[[package]] +name = "hive-metastore-client" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "thrift" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/07/ed/f15d3f3e4f3676368993beedde77f499456a64debc7fd244e9278f5b9631/hive_metastore_client-1.0.9.tar.gz", hash = "sha256:b7e00a7cbdd8b2907cb68a219b086f1dbcbeb2010d8c0b530c99c961c4512fe9", size = 122852, upload-time = "2021-08-23T17:55:28.623Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/40/1dc3ced1f1487cb298a465a0c30685569058b912d554e696394837430017/hive_metastore_client-1.0.9-py3-none-any.whl", hash = "sha256:62a6657804a772398574bba64d46cd7bff10babb7bdc601abe1118ca916810a5", size = 137883, upload-time = "2021-08-23T17:55:27.117Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "jmespath" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, +] + +[[package]] +name = "lance-namespace" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lance-namespace-urllib3-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/af/a77e7c9bc16ccf8a793df06bb87a559198f5b5dfb7ca03f4f32e1fe9cc15/lance_namespace-0.4.2.tar.gz", hash = "sha256:6830d0fb0f3f6dc0388ace2aa1a29f1b8e22c62f22e592a8b578c5da92980e7b", size = 9828, upload-time = "2025-12-31T08:31:02.488Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/9a/13242c23f932d8a61288e7c3e5bf5929cd509947535175d0f1ff188c6562/lance_namespace-0.4.2-py3-none-any.whl", hash = "sha256:ad0705dc0fdf37494cccc7163272472b773ad08572b146173114167187e5825e", size = 11702, upload-time = "2025-12-31T08:31:05.309Z" }, +] + +[[package]] +name = "lance-namespace-impls" +version = "0.0.1" +source = { editable = "." } +dependencies = [ + { name = "lance-namespace-urllib3-client" }, + { name = "pyarrow" }, + { name = "pylance" }, + { name = "typing-extensions" }, +] + +[package.optional-dependencies] +all = [ + { name = "boto3" }, + { name = "botocore" }, + { name = "hive-metastore-client" }, + { name = "thrift" }, +] +dev = [ + { name = "pytest" }, + { name = "pytest-cov" }, + { name = "ruff" }, +] +glue = [ + { name = "boto3" }, + { name = "botocore" }, +] +hive2 = [ + { name = "hive-metastore-client" }, + { name = "thrift" }, +] +hive3 = [ + { name = "hive-metastore-client" }, + { name = "thrift" }, +] + +[package.metadata] +requires-dist = [ + { name = "boto3", marker = "extra == 'all'", specifier = ">=1.35.0" }, + { name = "boto3", marker = "extra == 'glue'", specifier = ">=1.35.0" }, + { name = "botocore", marker = "extra == 'all'", specifier = ">=1.35.0" }, + { name = "botocore", marker = "extra == 'glue'", specifier = ">=1.35.0" }, + { name = "hive-metastore-client", marker = "extra == 'all'", specifier = ">=1.0.0" }, + { name = "hive-metastore-client", marker = "extra == 'hive2'", specifier = ">=1.0.0" }, + { name = "hive-metastore-client", marker = "extra == 'hive3'", specifier = ">=1.0.0" }, + { name = "lance-namespace-urllib3-client", specifier = ">=0.4.2" }, + { name = "pyarrow", specifier = ">=15.0.0" }, + { name = "pylance", specifier = ">=0.26.0" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, + { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" }, + { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" }, + { name = "thrift", marker = "extra == 'all'", specifier = ">=0.13.0" }, + { name = "thrift", marker = "extra == 'hive2'", specifier = ">=0.13.0" }, + { name = "thrift", marker = "extra == 'hive3'", specifier = ">=0.13.0" }, + { name = "typing-extensions", specifier = ">=4.5.0" }, +] +provides-extras = ["glue", "hive2", "hive3", "iceberg", "polaris", "unity", "all", "dev"] + +[[package]] +name = "lance-namespace-urllib3-client" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dateutil" }, + { name = "typing-extensions" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d0/33/3f533d87b8ad0867181a86cb17517cabed277d6816ca66a676dd98076064/lance_namespace_urllib3_client-0.4.2.tar.gz", hash = "sha256:294bfd2579f640053486008a77c2b7d43b8bf9614217941eda51b6f1c0f42f28", size = 155837, upload-time = "2025-12-31T08:31:04.605Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/79/c67652374a99c14e751fc4c16592b3d20c2f855ffc52c77132b3b48e356c/lance_namespace_urllib3_client-0.4.2-py3-none-any.whl", hash = "sha256:da885fd62b37af8653dba7ed22322bfd0a92a60ce78214eebca76783041af668", size = 262107, upload-time = "2025-12-31T08:31:03.431Z" }, +] + +[[package]] +name = "numpy" +version = "2.2.6" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" }, + { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048, upload-time = "2025-05-17T21:28:21.406Z" }, + { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542, upload-time = "2025-05-17T21:28:30.931Z" }, + { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301, upload-time = "2025-05-17T21:28:41.613Z" }, + { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320, upload-time = "2025-05-17T21:29:02.78Z" }, + { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050, upload-time = "2025-05-17T21:29:27.675Z" }, + { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034, upload-time = "2025-05-17T21:29:51.102Z" }, + { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185, upload-time = "2025-05-17T21:30:18.703Z" }, + { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149, upload-time = "2025-05-17T21:30:29.788Z" }, + { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620, upload-time = "2025-05-17T21:30:48.994Z" }, + { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963, upload-time = "2025-05-17T21:31:19.36Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743, upload-time = "2025-05-17T21:31:41.087Z" }, + { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616, upload-time = "2025-05-17T21:31:50.072Z" }, + { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579, upload-time = "2025-05-17T21:32:01.712Z" }, + { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005, upload-time = "2025-05-17T21:32:23.332Z" }, + { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570, upload-time = "2025-05-17T21:32:47.991Z" }, + { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548, upload-time = "2025-05-17T21:33:11.728Z" }, + { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521, upload-time = "2025-05-17T21:33:39.139Z" }, + { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866, upload-time = "2025-05-17T21:33:50.273Z" }, + { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455, upload-time = "2025-05-17T21:34:09.135Z" }, + { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" }, + { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" }, + { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" }, + { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" }, + { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" }, + { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" }, + { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" }, + { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" }, + { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" }, + { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" }, + { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" }, + { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" }, + { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" }, + { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" }, + { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" }, + { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" }, + { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" }, + { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" }, + { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" }, + { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" }, + { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" }, + { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" }, + { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" }, + { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" }, + { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391, upload-time = "2025-05-17T21:44:35.948Z" }, + { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754, upload-time = "2025-05-17T21:44:47.446Z" }, + { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476, upload-time = "2025-05-17T21:45:11.871Z" }, + { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.11'", +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/7a/6a3d14e205d292b738db449d0de649b373a59edb0d0b4493821d0a3e8718/numpy-2.4.0.tar.gz", hash = "sha256:6e504f7b16118198f138ef31ba24d985b124c2c469fe8467007cf30fd992f934", size = 20685720, upload-time = "2025-12-20T16:18:19.023Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/7e/7bae7cbcc2f8132271967aa03e03954fc1e48aa1f3bf32b29ca95fbef352/numpy-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:316b2f2584682318539f0bcaca5a496ce9ca78c88066579ebd11fd06f8e4741e", size = 16940166, upload-time = "2025-12-20T16:15:43.434Z" }, + { url = "https://files.pythonhosted.org/packages/0f/27/6c13f5b46776d6246ec884ac5817452672156a506d08a1f2abb39961930a/numpy-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2718c1de8504121714234b6f8241d0019450353276c88b9453c9c3d92e101db", size = 12641781, upload-time = "2025-12-20T16:15:45.701Z" }, + { url = "https://files.pythonhosted.org/packages/14/1c/83b4998d4860d15283241d9e5215f28b40ac31f497c04b12fa7f428ff370/numpy-2.4.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:21555da4ec4a0c942520ead42c3b0dc9477441e085c42b0fbdd6a084869a6f6b", size = 5470247, upload-time = "2025-12-20T16:15:47.943Z" }, + { url = "https://files.pythonhosted.org/packages/54/08/cbce72c835d937795571b0464b52069f869c9e78b0c076d416c5269d2718/numpy-2.4.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:413aa561266a4be2d06cd2b9665e89d9f54c543f418773076a76adcf2af08bc7", size = 6799807, upload-time = "2025-12-20T16:15:49.795Z" }, + { url = "https://files.pythonhosted.org/packages/ff/be/2e647961cd8c980591d75cdcd9e8f647d69fbe05e2a25613dc0a2ea5fb1a/numpy-2.4.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0feafc9e03128074689183031181fac0897ff169692d8492066e949041096548", size = 14701992, upload-time = "2025-12-20T16:15:51.615Z" }, + { url = "https://files.pythonhosted.org/packages/a2/fb/e1652fb8b6fd91ce6ed429143fe2e01ce714711e03e5b762615e7b36172c/numpy-2.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8fdfed3deaf1928fb7667d96e0567cdf58c2b370ea2ee7e586aa383ec2cb346", size = 16646871, upload-time = "2025-12-20T16:15:54.129Z" }, + { url = "https://files.pythonhosted.org/packages/62/23/d841207e63c4322842f7cd042ae981cffe715c73376dcad8235fb31debf1/numpy-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e06a922a469cae9a57100864caf4f8a97a1026513793969f8ba5b63137a35d25", size = 16487190, upload-time = "2025-12-20T16:15:56.147Z" }, + { url = "https://files.pythonhosted.org/packages/bc/a0/6a842c8421ebfdec0a230e65f61e0dabda6edbef443d999d79b87c273965/numpy-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:927ccf5cd17c48f801f4ed43a7e5673a2724bd2171460be3e3894e6e332ef83a", size = 18580762, upload-time = "2025-12-20T16:15:58.524Z" }, + { url = "https://files.pythonhosted.org/packages/0a/d1/c79e0046641186f2134dde05e6181825b911f8bdcef31b19ddd16e232847/numpy-2.4.0-cp311-cp311-win32.whl", hash = "sha256:882567b7ae57c1b1a0250208cc21a7976d8cbcc49d5a322e607e6f09c9e0bd53", size = 6233359, upload-time = "2025-12-20T16:16:00.938Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f0/74965001d231f28184d6305b8cdc1b6fcd4bf23033f6cb039cfe76c9fca7/numpy-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:8b986403023c8f3bf8f487c2e6186afda156174d31c175f747d8934dfddf3479", size = 12601132, upload-time = "2025-12-20T16:16:02.484Z" }, + { url = "https://files.pythonhosted.org/packages/65/32/55408d0f46dfebce38017f5bd931affa7256ad6beac1a92a012e1fbc67a7/numpy-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:3f3096405acc48887458bbf9f6814d43785ac7ba2a57ea6442b581dedbc60ce6", size = 10573977, upload-time = "2025-12-20T16:16:04.77Z" }, + { url = "https://files.pythonhosted.org/packages/8b/ff/f6400ffec95de41c74b8e73df32e3fff1830633193a7b1e409be7fb1bb8c/numpy-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a8b6bb8369abefb8bd1801b054ad50e02b3275c8614dc6e5b0373c305291037", size = 16653117, upload-time = "2025-12-20T16:16:06.709Z" }, + { url = "https://files.pythonhosted.org/packages/fd/28/6c23e97450035072e8d830a3c411bf1abd1f42c611ff9d29e3d8f55c6252/numpy-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e284ca13d5a8367e43734148622caf0b261b275673823593e3e3634a6490f83", size = 12369711, upload-time = "2025-12-20T16:16:08.758Z" }, + { url = "https://files.pythonhosted.org/packages/bc/af/acbef97b630ab1bb45e6a7d01d1452e4251aa88ce680ac36e56c272120ec/numpy-2.4.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:49ff32b09f5aa0cd30a20c2b39db3e669c845589f2b7fc910365210887e39344", size = 5198355, upload-time = "2025-12-20T16:16:10.902Z" }, + { url = "https://files.pythonhosted.org/packages/c1/c8/4e0d436b66b826f2e53330adaa6311f5cac9871a5b5c31ad773b27f25a74/numpy-2.4.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:36cbfb13c152b1c7c184ddac43765db8ad672567e7bafff2cc755a09917ed2e6", size = 6545298, upload-time = "2025-12-20T16:16:12.607Z" }, + { url = "https://files.pythonhosted.org/packages/ef/27/e1f5d144ab54eac34875e79037011d511ac57b21b220063310cb96c80fbc/numpy-2.4.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:35ddc8f4914466e6fc954c76527aa91aa763682a4f6d73249ef20b418fe6effb", size = 14398387, upload-time = "2025-12-20T16:16:14.257Z" }, + { url = "https://files.pythonhosted.org/packages/67/64/4cb909dd5ab09a9a5d086eff9586e69e827b88a5585517386879474f4cf7/numpy-2.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc578891de1db95b2a35001b695451767b580bb45753717498213c5ff3c41d63", size = 16363091, upload-time = "2025-12-20T16:16:17.32Z" }, + { url = "https://files.pythonhosted.org/packages/9d/9c/8efe24577523ec6809261859737cf117b0eb6fdb655abdfdc81b2e468ce4/numpy-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98e81648e0b36e325ab67e46b5400a7a6d4a22b8a7c8e8bbfe20e7db7906bf95", size = 16176394, upload-time = "2025-12-20T16:16:19.524Z" }, + { url = "https://files.pythonhosted.org/packages/61/f0/1687441ece7b47a62e45a1f82015352c240765c707928edd8aef875d5951/numpy-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d57b5046c120561ba8fa8e4030fbb8b822f3063910fa901ffadf16e2b7128ad6", size = 18287378, upload-time = "2025-12-20T16:16:22.866Z" }, + { url = "https://files.pythonhosted.org/packages/d3/6f/f868765d44e6fc466467ed810ba9d8d6db1add7d4a748abfa2a4c99a3194/numpy-2.4.0-cp312-cp312-win32.whl", hash = "sha256:92190db305a6f48734d3982f2c60fa30d6b5ee9bff10f2887b930d7b40119f4c", size = 5955432, upload-time = "2025-12-20T16:16:25.06Z" }, + { url = "https://files.pythonhosted.org/packages/d4/b5/94c1e79fcbab38d1ca15e13777477b2914dd2d559b410f96949d6637b085/numpy-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:680060061adb2d74ce352628cb798cfdec399068aa7f07ba9fb818b2b3305f98", size = 12306201, upload-time = "2025-12-20T16:16:26.979Z" }, + { url = "https://files.pythonhosted.org/packages/70/09/c39dadf0b13bb0768cd29d6a3aaff1fb7c6905ac40e9aaeca26b1c086e06/numpy-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:39699233bc72dd482da1415dcb06076e32f60eddc796a796c5fb6c5efce94667", size = 10308234, upload-time = "2025-12-20T16:16:29.417Z" }, + { url = "https://files.pythonhosted.org/packages/a7/0d/853fd96372eda07c824d24adf02e8bc92bb3731b43a9b2a39161c3667cc4/numpy-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a152d86a3ae00ba5f47b3acf3b827509fd0b6cb7d3259665e63dafbad22a75ea", size = 16649088, upload-time = "2025-12-20T16:16:31.421Z" }, + { url = "https://files.pythonhosted.org/packages/e3/37/cc636f1f2a9f585434e20a3e6e63422f70bfe4f7f6698e941db52ea1ac9a/numpy-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39b19251dec4de8ff8496cd0806cbe27bf0684f765abb1f4809554de93785f2d", size = 12364065, upload-time = "2025-12-20T16:16:33.491Z" }, + { url = "https://files.pythonhosted.org/packages/ed/69/0b78f37ca3690969beee54103ce5f6021709134e8020767e93ba691a72f1/numpy-2.4.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:009bd0ea12d3c784b6639a8457537016ce5172109e585338e11334f6a7bb88ee", size = 5192640, upload-time = "2025-12-20T16:16:35.636Z" }, + { url = "https://files.pythonhosted.org/packages/1d/2a/08569f8252abf590294dbb09a430543ec8f8cc710383abfb3e75cc73aeda/numpy-2.4.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5fe44e277225fd3dff6882d86d3d447205d43532c3627313d17e754fb3905a0e", size = 6541556, upload-time = "2025-12-20T16:16:37.276Z" }, + { url = "https://files.pythonhosted.org/packages/93/e9/a949885a4e177493d61519377952186b6cbfdf1d6002764c664ba28349b5/numpy-2.4.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f935c4493eda9069851058fa0d9e39dbf6286be690066509305e52912714dbb2", size = 14396562, upload-time = "2025-12-20T16:16:38.953Z" }, + { url = "https://files.pythonhosted.org/packages/99/98/9d4ad53b0e9ef901c2ef1d550d2136f5ac42d3fd2988390a6def32e23e48/numpy-2.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8cfa5f29a695cb7438965e6c3e8d06e0416060cf0d709c1b1c1653a939bf5c2a", size = 16351719, upload-time = "2025-12-20T16:16:41.503Z" }, + { url = "https://files.pythonhosted.org/packages/28/de/5f3711a38341d6e8dd619f6353251a0cdd07f3d6d101a8fd46f4ef87f895/numpy-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ba0cb30acd3ef11c94dc27fbfba68940652492bc107075e7ffe23057f9425681", size = 16176053, upload-time = "2025-12-20T16:16:44.552Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5b/2a3753dc43916501b4183532e7ace862e13211042bceafa253afb5c71272/numpy-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:60e8c196cd82cbbd4f130b5290007e13e6de3eca79f0d4d38014769d96a7c475", size = 18277859, upload-time = "2025-12-20T16:16:47.174Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c5/a18bcdd07a941db3076ef489d036ab16d2bfc2eae0cf27e5a26e29189434/numpy-2.4.0-cp313-cp313-win32.whl", hash = "sha256:5f48cb3e88fbc294dc90e215d86fbaf1c852c63dbdb6c3a3e63f45c4b57f7344", size = 5953849, upload-time = "2025-12-20T16:16:49.554Z" }, + { url = "https://files.pythonhosted.org/packages/4f/f1/719010ff8061da6e8a26e1980cf090412d4f5f8060b31f0c45d77dd67a01/numpy-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:a899699294f28f7be8992853c0c60741f16ff199205e2e6cdca155762cbaa59d", size = 12302840, upload-time = "2025-12-20T16:16:51.227Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5a/b3d259083ed8b4d335270c76966cb6cf14a5d1b69e1a608994ac57a659e6/numpy-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:9198f447e1dc5647d07c9a6bbe2063cc0132728cc7175b39dbc796da5b54920d", size = 10308509, upload-time = "2025-12-20T16:16:53.313Z" }, + { url = "https://files.pythonhosted.org/packages/31/01/95edcffd1bb6c0633df4e808130545c4f07383ab629ac7e316fb44fff677/numpy-2.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74623f2ab5cc3f7c886add4f735d1031a1d2be4a4ae63c0546cfd74e7a31ddf6", size = 12491815, upload-time = "2025-12-20T16:16:55.496Z" }, + { url = "https://files.pythonhosted.org/packages/59/ea/5644b8baa92cc1c7163b4b4458c8679852733fa74ca49c942cfa82ded4e0/numpy-2.4.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:0804a8e4ab070d1d35496e65ffd3cf8114c136a2b81f61dfab0de4b218aacfd5", size = 5320321, upload-time = "2025-12-20T16:16:57.468Z" }, + { url = "https://files.pythonhosted.org/packages/26/4e/e10938106d70bc21319bd6a86ae726da37edc802ce35a3a71ecdf1fdfe7f/numpy-2.4.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:02a2038eb27f9443a8b266a66911e926566b5a6ffd1a689b588f7f35b81e7dc3", size = 6641635, upload-time = "2025-12-20T16:16:59.379Z" }, + { url = "https://files.pythonhosted.org/packages/b3/8d/a8828e3eaf5c0b4ab116924df82f24ce3416fa38d0674d8f708ddc6c8aac/numpy-2.4.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1889b3a3f47a7b5bee16bc25a2145bd7cb91897f815ce3499db64c7458b6d91d", size = 14456053, upload-time = "2025-12-20T16:17:01.768Z" }, + { url = "https://files.pythonhosted.org/packages/68/a1/17d97609d87d4520aa5ae2dcfb32305654550ac6a35effb946d303e594ce/numpy-2.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85eef4cb5625c47ee6425c58a3502555e10f45ee973da878ac8248ad58c136f3", size = 16401702, upload-time = "2025-12-20T16:17:04.235Z" }, + { url = "https://files.pythonhosted.org/packages/18/32/0f13c1b2d22bea1118356b8b963195446f3af124ed7a5adfa8fdecb1b6ca/numpy-2.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6dc8b7e2f4eb184b37655195f421836cfae6f58197b67e3ffc501f1333d993fa", size = 16242493, upload-time = "2025-12-20T16:17:06.856Z" }, + { url = "https://files.pythonhosted.org/packages/ae/23/48f21e3d309fbc137c068a1475358cbd3a901b3987dcfc97a029ab3068e2/numpy-2.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:44aba2f0cafd287871a495fb3163408b0bd25bbce135c6f621534a07f4f7875c", size = 18324222, upload-time = "2025-12-20T16:17:09.392Z" }, + { url = "https://files.pythonhosted.org/packages/ac/52/41f3d71296a3dcaa4f456aaa3c6fc8e745b43d0552b6bde56571bb4b4a0f/numpy-2.4.0-cp313-cp313t-win32.whl", hash = "sha256:20c115517513831860c573996e395707aa9fb691eb179200125c250e895fcd93", size = 6076216, upload-time = "2025-12-20T16:17:11.437Z" }, + { url = "https://files.pythonhosted.org/packages/35/ff/46fbfe60ab0710d2a2b16995f708750307d30eccbb4c38371ea9e986866e/numpy-2.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b48e35f4ab6f6a7597c46e301126ceba4c44cd3280e3750f85db48b082624fa4", size = 12444263, upload-time = "2025-12-20T16:17:13.182Z" }, + { url = "https://files.pythonhosted.org/packages/a3/e3/9189ab319c01d2ed556c932ccf55064c5d75bb5850d1df7a482ce0badead/numpy-2.4.0-cp313-cp313t-win_arm64.whl", hash = "sha256:4d1cfce39e511069b11e67cd0bd78ceff31443b7c9e5c04db73c7a19f572967c", size = 10378265, upload-time = "2025-12-20T16:17:15.211Z" }, + { url = "https://files.pythonhosted.org/packages/ab/ed/52eac27de39d5e5a6c9aadabe672bc06f55e24a3d9010cd1183948055d76/numpy-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c95eb6db2884917d86cde0b4d4cf31adf485c8ec36bf8696dd66fa70de96f36b", size = 16647476, upload-time = "2025-12-20T16:17:17.671Z" }, + { url = "https://files.pythonhosted.org/packages/77/c0/990ce1b7fcd4e09aeaa574e2a0a839589e4b08b2ca68070f1acb1fea6736/numpy-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:65167da969cd1ec3a1df31cb221ca3a19a8aaa25370ecb17d428415e93c1935e", size = 12374563, upload-time = "2025-12-20T16:17:20.216Z" }, + { url = "https://files.pythonhosted.org/packages/37/7c/8c5e389c6ae8f5fd2277a988600d79e9625db3fff011a2d87ac80b881a4c/numpy-2.4.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3de19cfecd1465d0dcf8a5b5ea8b3155b42ed0b639dba4b71e323d74f2a3be5e", size = 5203107, upload-time = "2025-12-20T16:17:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/e6/94/ca5b3bd6a8a70a5eec9a0b8dd7f980c1eff4b8a54970a9a7fef248ef564f/numpy-2.4.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6c05483c3136ac4c91b4e81903cb53a8707d316f488124d0398499a4f8e8ef51", size = 6538067, upload-time = "2025-12-20T16:17:24.001Z" }, + { url = "https://files.pythonhosted.org/packages/79/43/993eb7bb5be6761dde2b3a3a594d689cec83398e3f58f4758010f3b85727/numpy-2.4.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36667db4d6c1cea79c8930ab72fadfb4060feb4bfe724141cd4bd064d2e5f8ce", size = 14411926, upload-time = "2025-12-20T16:17:25.822Z" }, + { url = "https://files.pythonhosted.org/packages/03/75/d4c43b61de473912496317a854dac54f1efec3eeb158438da6884b70bb90/numpy-2.4.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a818668b674047fd88c4cddada7ab8f1c298812783e8328e956b78dc4807f9f", size = 16354295, upload-time = "2025-12-20T16:17:28.308Z" }, + { url = "https://files.pythonhosted.org/packages/b8/0a/b54615b47ee8736a6461a4bb6749128dd3435c5a759d5663f11f0e9af4ac/numpy-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1ee32359fb7543b7b7bd0b2f46294db27e29e7bbdf70541e81b190836cd83ded", size = 16190242, upload-time = "2025-12-20T16:17:30.993Z" }, + { url = "https://files.pythonhosted.org/packages/98/ce/ea207769aacad6246525ec6c6bbd66a2bf56c72443dc10e2f90feed29290/numpy-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e493962256a38f58283de033d8af176c5c91c084ea30f15834f7545451c42059", size = 18280875, upload-time = "2025-12-20T16:17:33.327Z" }, + { url = "https://files.pythonhosted.org/packages/17/ef/ec409437aa962ea372ed601c519a2b141701683ff028f894b7466f0ab42b/numpy-2.4.0-cp314-cp314-win32.whl", hash = "sha256:6bbaebf0d11567fa8926215ae731e1d58e6ec28a8a25235b8a47405d301332db", size = 6002530, upload-time = "2025-12-20T16:17:35.729Z" }, + { url = "https://files.pythonhosted.org/packages/5f/4a/5cb94c787a3ed1ac65e1271b968686521169a7b3ec0b6544bb3ca32960b0/numpy-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:3d857f55e7fdf7c38ab96c4558c95b97d1c685be6b05c249f5fdafcbd6f9899e", size = 12435890, upload-time = "2025-12-20T16:17:37.599Z" }, + { url = "https://files.pythonhosted.org/packages/48/a0/04b89db963af9de1104975e2544f30de89adbf75b9e75f7dd2599be12c79/numpy-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:bb50ce5fb202a26fd5404620e7ef820ad1ab3558b444cb0b55beb7ef66cd2d63", size = 10591892, upload-time = "2025-12-20T16:17:39.649Z" }, + { url = "https://files.pythonhosted.org/packages/53/e5/d74b5ccf6712c06c7a545025a6a71bfa03bdc7e0568b405b0d655232fd92/numpy-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:355354388cba60f2132df297e2d53053d4063f79077b67b481d21276d61fc4df", size = 12494312, upload-time = "2025-12-20T16:17:41.714Z" }, + { url = "https://files.pythonhosted.org/packages/c2/08/3ca9cc2ddf54dfee7ae9a6479c071092a228c68aef08252aa08dac2af002/numpy-2.4.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:1d8f9fde5f6dc1b6fc34df8162f3b3079365468703fee7f31d4e0cc8c63baed9", size = 5322862, upload-time = "2025-12-20T16:17:44.145Z" }, + { url = "https://files.pythonhosted.org/packages/87/74/0bb63a68394c0c1e52670cfff2e309afa41edbe11b3327d9af29e4383f34/numpy-2.4.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e0434aa22c821f44eeb4c650b81c7fbdd8c0122c6c4b5a576a76d5a35625ecd9", size = 6644986, upload-time = "2025-12-20T16:17:46.203Z" }, + { url = "https://files.pythonhosted.org/packages/06/8f/9264d9bdbcf8236af2823623fe2f3981d740fc3461e2787e231d97c38c28/numpy-2.4.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40483b2f2d3ba7aad426443767ff5632ec3156ef09742b96913787d13c336471", size = 14457958, upload-time = "2025-12-20T16:17:48.017Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d9/f9a69ae564bbc7236a35aa883319364ef5fd41f72aa320cc1cbe66148fe2/numpy-2.4.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6a7664ddd9746e20b7325351fe1a8408d0a2bf9c63b5e898290ddc8f09544", size = 16398394, upload-time = "2025-12-20T16:17:50.409Z" }, + { url = "https://files.pythonhosted.org/packages/34/c7/39241501408dde7f885d241a98caba5421061a2c6d2b2197ac5e3aa842d8/numpy-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ecb0019d44f4cdb50b676c5d0cb4b1eae8e15d1ed3d3e6639f986fc92b2ec52c", size = 16241044, upload-time = "2025-12-20T16:17:52.661Z" }, + { url = "https://files.pythonhosted.org/packages/7c/95/cae7effd90e065a95e59fe710eeee05d7328ed169776dfdd9f789e032125/numpy-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d0ffd9e2e4441c96a9c91ec1783285d80bf835b677853fc2770a89d50c1e48ac", size = 18321772, upload-time = "2025-12-20T16:17:54.947Z" }, + { url = "https://files.pythonhosted.org/packages/96/df/3c6c279accd2bfb968a76298e5b276310bd55d243df4fa8ac5816d79347d/numpy-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:77f0d13fa87036d7553bf81f0e1fe3ce68d14c9976c9851744e4d3e91127e95f", size = 6148320, upload-time = "2025-12-20T16:17:57.249Z" }, + { url = "https://files.pythonhosted.org/packages/92/8d/f23033cce252e7a75cae853d17f582e86534c46404dea1c8ee094a9d6d84/numpy-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b1f5b45829ac1848893f0ddf5cb326110604d6df96cdc255b0bf9edd154104d4", size = 12623460, upload-time = "2025-12-20T16:17:58.963Z" }, + { url = "https://files.pythonhosted.org/packages/a4/4f/1f8475907d1a7c4ef9020edf7f39ea2422ec896849245f00688e4b268a71/numpy-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:23a3e9d1a6f360267e8fbb38ba5db355a6a7e9be71d7fce7ab3125e88bb646c8", size = 10661799, upload-time = "2025-12-20T16:18:01.078Z" }, + { url = "https://files.pythonhosted.org/packages/4b/ef/088e7c7342f300aaf3ee5f2c821c4b9996a1bef2aaf6a49cc8ab4883758e/numpy-2.4.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b54c83f1c0c0f1d748dca0af516062b8829d53d1f0c402be24b4257a9c48ada6", size = 16819003, upload-time = "2025-12-20T16:18:03.41Z" }, + { url = "https://files.pythonhosted.org/packages/ff/ce/a53017b5443b4b84517182d463fc7bcc2adb4faa8b20813f8e5f5aeb5faa/numpy-2.4.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:aabb081ca0ec5d39591fc33018cd4b3f96e1a2dd6756282029986d00a785fba4", size = 12567105, upload-time = "2025-12-20T16:18:05.594Z" }, + { url = "https://files.pythonhosted.org/packages/77/58/5ff91b161f2ec650c88a626c3905d938c89aaadabd0431e6d9c1330c83e2/numpy-2.4.0-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:8eafe7c36c8430b7794edeab3087dec7bf31d634d92f2af9949434b9d1964cba", size = 5395590, upload-time = "2025-12-20T16:18:08.031Z" }, + { url = "https://files.pythonhosted.org/packages/1d/4e/f1a084106df8c2df8132fc437e56987308e0524836aa7733721c8429d4fe/numpy-2.4.0-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2f585f52b2baf07ff3356158d9268ea095e221371f1074fadea2f42544d58b4d", size = 6709947, upload-time = "2025-12-20T16:18:09.836Z" }, + { url = "https://files.pythonhosted.org/packages/63/09/3d8aeb809c0332c3f642da812ac2e3d74fc9252b3021f8c30c82e99e3f3d/numpy-2.4.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32ed06d0fe9cae27d8fb5f400c63ccee72370599c75e683a6358dd3a4fb50aaf", size = 14535119, upload-time = "2025-12-20T16:18:12.105Z" }, + { url = "https://files.pythonhosted.org/packages/fd/7f/68f0fc43a2cbdc6bb239160c754d87c922f60fbaa0fa3cd3d312b8a7f5ee/numpy-2.4.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:57c540ed8fb1f05cb997c6761cd56db72395b0d6985e90571ff660452ade4f98", size = 16475815, upload-time = "2025-12-20T16:18:14.433Z" }, + { url = "https://files.pythonhosted.org/packages/11/73/edeacba3167b1ca66d51b1a5a14697c2c40098b5ffa01811c67b1785a5ab/numpy-2.4.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a39fb973a726e63223287adc6dafe444ce75af952d711e400f3bf2b36ef55a7b", size = 12489376, upload-time = "2025-12-20T16:18:16.524Z" }, +] + +[[package]] +name = "packaging" +version = "25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pyarrow" +version = "22.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size = 34223968, upload-time = "2025-10-24T10:03:31.21Z" }, + { url = "https://files.pythonhosted.org/packages/6c/41/3184b8192a120306270c5307f105b70320fdaa592c99843c5ef78aaefdcf/pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace", size = 35942085, upload-time = "2025-10-24T10:03:38.146Z" }, + { url = "https://files.pythonhosted.org/packages/d9/3d/a1eab2f6f08001f9fb714b8ed5cfb045e2fe3e3e3c0c221f2c9ed1e6d67d/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce", size = 44964613, upload-time = "2025-10-24T10:03:46.516Z" }, + { url = "https://files.pythonhosted.org/packages/46/46/a1d9c24baf21cfd9ce994ac820a24608decf2710521b29223d4334985127/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48", size = 47627059, upload-time = "2025-10-24T10:03:55.353Z" }, + { url = "https://files.pythonhosted.org/packages/3a/4c/f711acb13075c1391fd54bc17e078587672c575f8de2a6e62509af026dcf/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340", size = 47947043, upload-time = "2025-10-24T10:04:05.408Z" }, + { url = "https://files.pythonhosted.org/packages/4e/70/1f3180dd7c2eab35c2aca2b29ace6c519f827dcd4cfeb8e0dca41612cf7a/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653", size = 50206505, upload-time = "2025-10-24T10:04:15.786Z" }, + { url = "https://files.pythonhosted.org/packages/80/07/fea6578112c8c60ffde55883a571e4c4c6bc7049f119d6b09333b5cc6f73/pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84", size = 28101641, upload-time = "2025-10-24T10:04:22.57Z" }, + { url = "https://files.pythonhosted.org/packages/2e/b7/18f611a8cdc43417f9394a3ccd3eace2f32183c08b9eddc3d17681819f37/pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a", size = 34272022, upload-time = "2025-10-24T10:04:28.973Z" }, + { url = "https://files.pythonhosted.org/packages/26/5c/f259e2526c67eb4b9e511741b19870a02363a47a35edbebc55c3178db22d/pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e", size = 35995834, upload-time = "2025-10-24T10:04:35.467Z" }, + { url = "https://files.pythonhosted.org/packages/50/8d/281f0f9b9376d4b7f146913b26fac0aa2829cd1ee7e997f53a27411bbb92/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215", size = 45030348, upload-time = "2025-10-24T10:04:43.366Z" }, + { url = "https://files.pythonhosted.org/packages/f5/e5/53c0a1c428f0976bf22f513d79c73000926cb00b9c138d8e02daf2102e18/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d", size = 47699480, upload-time = "2025-10-24T10:04:51.486Z" }, + { url = "https://files.pythonhosted.org/packages/95/e1/9dbe4c465c3365959d183e6345d0a8d1dc5b02ca3f8db4760b3bc834cf25/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8", size = 48011148, upload-time = "2025-10-24T10:04:59.585Z" }, + { url = "https://files.pythonhosted.org/packages/c5/b4/7caf5d21930061444c3cf4fa7535c82faf5263e22ce43af7c2759ceb5b8b/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016", size = 50276964, upload-time = "2025-10-24T10:05:08.175Z" }, + { url = "https://files.pythonhosted.org/packages/ae/f3/cec89bd99fa3abf826f14d4e53d3d11340ce6f6af4d14bdcd54cd83b6576/pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c", size = 28106517, upload-time = "2025-10-24T10:05:14.314Z" }, + { url = "https://files.pythonhosted.org/packages/af/63/ba23862d69652f85b615ca14ad14f3bcfc5bf1b99ef3f0cd04ff93fdad5a/pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d", size = 34211578, upload-time = "2025-10-24T10:05:21.583Z" }, + { url = "https://files.pythonhosted.org/packages/b1/d0/f9ad86fe809efd2bcc8be32032fa72e8b0d112b01ae56a053006376c5930/pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8", size = 35989906, upload-time = "2025-10-24T10:05:29.485Z" }, + { url = "https://files.pythonhosted.org/packages/b4/a8/f910afcb14630e64d673f15904ec27dd31f1e009b77033c365c84e8c1e1d/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5", size = 45021677, upload-time = "2025-10-24T10:05:38.274Z" }, + { url = "https://files.pythonhosted.org/packages/13/95/aec81f781c75cd10554dc17a25849c720d54feafb6f7847690478dcf5ef8/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe", size = 47726315, upload-time = "2025-10-24T10:05:47.314Z" }, + { url = "https://files.pythonhosted.org/packages/bb/d4/74ac9f7a54cfde12ee42734ea25d5a3c9a45db78f9def949307a92720d37/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e", size = 47990906, upload-time = "2025-10-24T10:05:58.254Z" }, + { url = "https://files.pythonhosted.org/packages/2e/71/fedf2499bf7a95062eafc989ace56572f3343432570e1c54e6599d5b88da/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9", size = 50306783, upload-time = "2025-10-24T10:06:08.08Z" }, + { url = "https://files.pythonhosted.org/packages/68/ed/b202abd5a5b78f519722f3d29063dda03c114711093c1995a33b8e2e0f4b/pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d", size = 27972883, upload-time = "2025-10-24T10:06:14.204Z" }, + { url = "https://files.pythonhosted.org/packages/a6/d6/d0fac16a2963002fc22c8fa75180a838737203d558f0ed3b564c4a54eef5/pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a", size = 34204629, upload-time = "2025-10-24T10:06:20.274Z" }, + { url = "https://files.pythonhosted.org/packages/c6/9c/1d6357347fbae062ad3f17082f9ebc29cc733321e892c0d2085f42a2212b/pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901", size = 35985783, upload-time = "2025-10-24T10:06:27.301Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c0/782344c2ce58afbea010150df07e3a2f5fdad299cd631697ae7bd3bac6e3/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691", size = 45020999, upload-time = "2025-10-24T10:06:35.387Z" }, + { url = "https://files.pythonhosted.org/packages/1b/8b/5362443737a5307a7b67c1017c42cd104213189b4970bf607e05faf9c525/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a", size = 47724601, upload-time = "2025-10-24T10:06:43.551Z" }, + { url = "https://files.pythonhosted.org/packages/69/4d/76e567a4fc2e190ee6072967cb4672b7d9249ac59ae65af2d7e3047afa3b/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6", size = 48001050, upload-time = "2025-10-24T10:06:52.284Z" }, + { url = "https://files.pythonhosted.org/packages/01/5e/5653f0535d2a1aef8223cee9d92944cb6bccfee5cf1cd3f462d7cb022790/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941", size = 50307877, upload-time = "2025-10-24T10:07:02.405Z" }, + { url = "https://files.pythonhosted.org/packages/2d/f8/1d0bd75bf9328a3b826e24a16e5517cd7f9fbf8d34a3184a4566ef5a7f29/pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145", size = 27977099, upload-time = "2025-10-24T10:08:07.259Z" }, + { url = "https://files.pythonhosted.org/packages/90/81/db56870c997805bf2b0f6eeeb2d68458bf4654652dccdcf1bf7a42d80903/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1", size = 34336685, upload-time = "2025-10-24T10:07:11.47Z" }, + { url = "https://files.pythonhosted.org/packages/1c/98/0727947f199aba8a120f47dfc229eeb05df15bcd7a6f1b669e9f882afc58/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f", size = 36032158, upload-time = "2025-10-24T10:07:18.626Z" }, + { url = "https://files.pythonhosted.org/packages/96/b4/9babdef9c01720a0785945c7cf550e4acd0ebcd7bdd2e6f0aa7981fa85e2/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d", size = 44892060, upload-time = "2025-10-24T10:07:26.002Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ca/2f8804edd6279f78a37062d813de3f16f29183874447ef6d1aadbb4efa0f/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f", size = 47504395, upload-time = "2025-10-24T10:07:34.09Z" }, + { url = "https://files.pythonhosted.org/packages/b9/f0/77aa5198fd3943682b2e4faaf179a674f0edea0d55d326d83cb2277d9363/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746", size = 48066216, upload-time = "2025-10-24T10:07:43.528Z" }, + { url = "https://files.pythonhosted.org/packages/79/87/a1937b6e78b2aff18b706d738c9e46ade5bfcf11b294e39c87706a0089ac/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95", size = 50288552, upload-time = "2025-10-24T10:07:53.519Z" }, + { url = "https://files.pythonhosted.org/packages/60/ae/b5a5811e11f25788ccfdaa8f26b6791c9807119dffcf80514505527c384c/pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc", size = 28262504, upload-time = "2025-10-24T10:08:00.932Z" }, + { url = "https://files.pythonhosted.org/packages/bd/b0/0fa4d28a8edb42b0a7144edd20befd04173ac79819547216f8a9f36f9e50/pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d", size = 34224062, upload-time = "2025-10-24T10:08:14.101Z" }, + { url = "https://files.pythonhosted.org/packages/0f/a8/7a719076b3c1be0acef56a07220c586f25cd24de0e3f3102b438d18ae5df/pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9", size = 35990057, upload-time = "2025-10-24T10:08:21.842Z" }, + { url = "https://files.pythonhosted.org/packages/89/3c/359ed54c93b47fb6fe30ed16cdf50e3f0e8b9ccfb11b86218c3619ae50a8/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7", size = 45068002, upload-time = "2025-10-24T10:08:29.034Z" }, + { url = "https://files.pythonhosted.org/packages/55/fc/4945896cc8638536ee787a3bd6ce7cec8ec9acf452d78ec39ab328efa0a1/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde", size = 47737765, upload-time = "2025-10-24T10:08:38.559Z" }, + { url = "https://files.pythonhosted.org/packages/cd/5e/7cb7edeb2abfaa1f79b5d5eb89432356155c8426f75d3753cbcb9592c0fd/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc", size = 48048139, upload-time = "2025-10-24T10:08:46.784Z" }, + { url = "https://files.pythonhosted.org/packages/88/c6/546baa7c48185f5e9d6e59277c4b19f30f48c94d9dd938c2a80d4d6b067c/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0", size = 50314244, upload-time = "2025-10-24T10:08:55.771Z" }, + { url = "https://files.pythonhosted.org/packages/3c/79/755ff2d145aafec8d347bf18f95e4e81c00127f06d080135dfc86aea417c/pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730", size = 28757501, upload-time = "2025-10-24T10:09:59.891Z" }, + { url = "https://files.pythonhosted.org/packages/0e/d2/237d75ac28ced3147912954e3c1a174df43a95f4f88e467809118a8165e0/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2", size = 34355506, upload-time = "2025-10-24T10:09:02.953Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/733dfffe6d3069740f98e57ff81007809067d68626c5faef293434d11bd6/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70", size = 36047312, upload-time = "2025-10-24T10:09:10.334Z" }, + { url = "https://files.pythonhosted.org/packages/7c/2b/29d6e3782dc1f299727462c1543af357a0f2c1d3c160ce199950d9ca51eb/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754", size = 45081609, upload-time = "2025-10-24T10:09:18.61Z" }, + { url = "https://files.pythonhosted.org/packages/8d/42/aa9355ecc05997915af1b7b947a7f66c02dcaa927f3203b87871c114ba10/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91", size = 47703663, upload-time = "2025-10-24T10:09:27.369Z" }, + { url = "https://files.pythonhosted.org/packages/ee/62/45abedde480168e83a1de005b7b7043fd553321c1e8c5a9a114425f64842/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c", size = 48066543, upload-time = "2025-10-24T10:09:34.908Z" }, + { url = "https://files.pythonhosted.org/packages/84/e9/7878940a5b072e4f3bf998770acafeae13b267f9893af5f6d4ab3904b67e/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80", size = 50288838, upload-time = "2025-10-24T10:09:44.394Z" }, + { url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" }, +] + +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" }, + { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" }, + { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" }, + { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" }, + { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" }, + { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" }, + { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" }, + { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" }, + { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" }, + { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" }, + { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" }, + { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" }, + { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" }, + { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" }, + { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" }, + { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" }, + { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" }, + { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" }, + { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" }, + { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" }, + { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" }, + { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" }, + { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" }, + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" }, + { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" }, + { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" }, + { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, + { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" }, + { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" }, + { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" }, + { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" }, + { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" }, + { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" }, + { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" }, + { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" }, + { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" }, + { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" }, + { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" }, + { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, +] + +[[package]] +name = "pylance" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lance-namespace" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pyarrow" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/5c/501e3a5d73b8ef1247045ce959fa6f8932753eacf192b7a122f394a063a0/pylance-1.0.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f1d70a59868dcee62862545f9f0846b328ee013f845bec536ff6d8aac23e3bfb", size = 49829642, upload-time = "2025-12-12T21:42:52.81Z" }, + { url = "https://files.pythonhosted.org/packages/22/74/a30ad89ce6bf818c9551224ce0d2bfe4f67d7d99b3f8298f8860b12e3de6/pylance-1.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29f2af7d4eed932334b98c991b1d0c105de89a706f95ae40cce48385c6f5589e", size = 52193853, upload-time = "2025-12-12T21:51:49.609Z" }, + { url = "https://files.pythonhosted.org/packages/e8/4d/160ca42beb5e903dd1dc6526fb8b0b3a0fe4750e9f04d3f16531ef23b158/pylance-1.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05196823a7698571c122f861038193a591fe55d42a0532c1183756a9f1602cf3", size = 55557899, upload-time = "2025-12-12T21:58:02.104Z" }, + { url = "https://files.pythonhosted.org/packages/a8/4e/6fd71a0e0ba8560061d3222773c9d9406beb4d9f12dc8dcdce36964d6884/pylance-1.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:78db3a4270f0171870cfbfc13abe6af16e50565f111a8fe57b551600cfa27566", size = 52217155, upload-time = "2025-12-12T21:51:13.615Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a5/5c3c0605fb93d38d889e4219a8987e46863ab42e4ac46b8922afea0a5263/pylance-1.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4564edbe124052272c802bfc7d43de9a7448fe8ee25d10376dcfeed2f3c42ff8", size = 55530328, upload-time = "2025-12-12T21:58:36.733Z" }, + { url = "https://files.pythonhosted.org/packages/f5/05/2fd1188e0ccb419e45e30788c033ff6fd98fc3b8ccc204ef7c67bcc82146/pylance-1.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:cfc3e03709e64f255fc5c9dd9ac8847d8c24cce971cf290cffe92068b320188d", size = 59355812, upload-time = "2025-12-12T22:18:08.83Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + +[[package]] +name = "pytest-cov" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage", extra = ["toml"] }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, +] + +[[package]] +name = "ruff" +version = "0.14.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/08/52232a877978dd8f9cf2aeddce3e611b40a63287dfca29b6b8da791f5e8d/ruff-0.14.10.tar.gz", hash = "sha256:9a2e830f075d1a42cd28420d7809ace390832a490ed0966fe373ba288e77aaf4", size = 5859763, upload-time = "2025-12-18T19:28:57.98Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/01/933704d69f3f05ee16ef11406b78881733c186fe14b6a46b05cfcaf6d3b2/ruff-0.14.10-py3-none-linux_armv6l.whl", hash = "sha256:7a3ce585f2ade3e1f29ec1b92df13e3da262178df8c8bdf876f48fa0e8316c49", size = 13527080, upload-time = "2025-12-18T19:29:25.642Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/a0349197a7dfa603ffb7f5b0470391efa79ddc327c1e29c4851e85b09cc5/ruff-0.14.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:674f9be9372907f7257c51f1d4fc902cb7cf014b9980152b802794317941f08f", size = 13797320, upload-time = "2025-12-18T19:29:02.571Z" }, + { url = "https://files.pythonhosted.org/packages/7b/82/36be59f00a6082e38c23536df4e71cdbc6af8d7c707eade97fcad5c98235/ruff-0.14.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d85713d522348837ef9df8efca33ccb8bd6fcfc86a2cde3ccb4bc9d28a18003d", size = 12918434, upload-time = "2025-12-18T19:28:51.202Z" }, + { url = "https://files.pythonhosted.org/packages/a6/00/45c62a7f7e34da92a25804f813ebe05c88aa9e0c25e5cb5a7d23dd7450e3/ruff-0.14.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6987ebe0501ae4f4308d7d24e2d0fe3d7a98430f5adfd0f1fead050a740a3a77", size = 13371961, upload-time = "2025-12-18T19:29:04.991Z" }, + { url = "https://files.pythonhosted.org/packages/40/31/a5906d60f0405f7e57045a70f2d57084a93ca7425f22e1d66904769d1628/ruff-0.14.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16a01dfb7b9e4eee556fbfd5392806b1b8550c9b4a9f6acd3dbe6812b193c70a", size = 13275629, upload-time = "2025-12-18T19:29:21.381Z" }, + { url = "https://files.pythonhosted.org/packages/3e/60/61c0087df21894cf9d928dc04bcd4fb10e8b2e8dca7b1a276ba2155b2002/ruff-0.14.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7165d31a925b7a294465fa81be8c12a0e9b60fb02bf177e79067c867e71f8b1f", size = 14029234, upload-time = "2025-12-18T19:29:00.132Z" }, + { url = "https://files.pythonhosted.org/packages/44/84/77d911bee3b92348b6e5dab5a0c898d87084ea03ac5dc708f46d88407def/ruff-0.14.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c561695675b972effb0c0a45db233f2c816ff3da8dcfbe7dfc7eed625f218935", size = 15449890, upload-time = "2025-12-18T19:28:53.573Z" }, + { url = "https://files.pythonhosted.org/packages/e9/36/480206eaefa24a7ec321582dda580443a8f0671fdbf6b1c80e9c3e93a16a/ruff-0.14.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bb98fcbbc61725968893682fd4df8966a34611239c9fd07a1f6a07e7103d08e", size = 15123172, upload-time = "2025-12-18T19:29:23.453Z" }, + { url = "https://files.pythonhosted.org/packages/5c/38/68e414156015ba80cef5473d57919d27dfb62ec804b96180bafdeaf0e090/ruff-0.14.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f24b47993a9d8cb858429e97bdf8544c78029f09b520af615c1d261bf827001d", size = 14460260, upload-time = "2025-12-18T19:29:27.808Z" }, + { url = "https://files.pythonhosted.org/packages/b3/19/9e050c0dca8aba824d67cc0db69fb459c28d8cd3f6855b1405b3f29cc91d/ruff-0.14.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59aabd2e2c4fd614d2862e7939c34a532c04f1084476d6833dddef4afab87e9f", size = 14229978, upload-time = "2025-12-18T19:29:11.32Z" }, + { url = "https://files.pythonhosted.org/packages/51/eb/e8dd1dd6e05b9e695aa9dd420f4577debdd0f87a5ff2fedda33c09e9be8c/ruff-0.14.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:213db2b2e44be8625002dbea33bb9c60c66ea2c07c084a00d55732689d697a7f", size = 14338036, upload-time = "2025-12-18T19:29:09.184Z" }, + { url = "https://files.pythonhosted.org/packages/6a/12/f3e3a505db7c19303b70af370d137795fcfec136d670d5de5391e295c134/ruff-0.14.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b914c40ab64865a17a9a5b67911d14df72346a634527240039eb3bd650e5979d", size = 13264051, upload-time = "2025-12-18T19:29:13.431Z" }, + { url = "https://files.pythonhosted.org/packages/08/64/8c3a47eaccfef8ac20e0484e68e0772013eb85802f8a9f7603ca751eb166/ruff-0.14.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1484983559f026788e3a5c07c81ef7d1e97c1c78ed03041a18f75df104c45405", size = 13283998, upload-time = "2025-12-18T19:29:06.994Z" }, + { url = "https://files.pythonhosted.org/packages/12/84/534a5506f4074e5cc0529e5cd96cfc01bb480e460c7edf5af70d2bcae55e/ruff-0.14.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c70427132db492d25f982fffc8d6c7535cc2fd2c83fc8888f05caaa248521e60", size = 13601891, upload-time = "2025-12-18T19:28:55.811Z" }, + { url = "https://files.pythonhosted.org/packages/0d/1e/14c916087d8598917dbad9b2921d340f7884824ad6e9c55de948a93b106d/ruff-0.14.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5bcf45b681e9f1ee6445d317ce1fa9d6cba9a6049542d1c3d5b5958986be8830", size = 14336660, upload-time = "2025-12-18T19:29:16.531Z" }, + { url = "https://files.pythonhosted.org/packages/f2/1c/d7b67ab43f30013b47c12b42d1acd354c195351a3f7a1d67f59e54227ede/ruff-0.14.10-py3-none-win32.whl", hash = "sha256:104c49fc7ab73f3f3a758039adea978869a918f31b73280db175b43a2d9b51d6", size = 13196187, upload-time = "2025-12-18T19:29:19.006Z" }, + { url = "https://files.pythonhosted.org/packages/fb/9c/896c862e13886fae2af961bef3e6312db9ebc6adc2b156fe95e615dee8c1/ruff-0.14.10-py3-none-win_amd64.whl", hash = "sha256:466297bd73638c6bdf06485683e812db1c00c7ac96d4ddd0294a338c62fdc154", size = 14661283, upload-time = "2025-12-18T19:29:30.16Z" }, + { url = "https://files.pythonhosted.org/packages/74/31/b0e29d572670dca3674eeee78e418f20bdf97fa8aa9ea71380885e175ca0/ruff-0.14.10-py3-none-win_arm64.whl", hash = "sha256:e51d046cf6dda98a4633b8a8a771451107413b0f07183b2bef03f075599e44e6", size = 13729839, upload-time = "2025-12-18T19:28:48.636Z" }, +] + +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, +] + +[[package]] +name = "thrift" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/97/1e/3284d19d7be99305eda145b8aa46b0c33244e4a496ec66440dac19f8274d/thrift-0.13.0.tar.gz", hash = "sha256:9af1c86bf73433afc6010ed376a6c6aca2b54099cc0d61895f640870a9ae7d89", size = 59911, upload-time = "2019-11-18T04:50:57.646Z" } + +[[package]] +name = "tomli" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392, upload-time = "2025-10-08T22:01:47.119Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236, upload-time = "2025-10-08T22:01:00.137Z" }, + { url = "https://files.pythonhosted.org/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084, upload-time = "2025-10-08T22:01:01.63Z" }, + { url = "https://files.pythonhosted.org/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832, upload-time = "2025-10-08T22:01:02.543Z" }, + { url = "https://files.pythonhosted.org/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052, upload-time = "2025-10-08T22:01:03.836Z" }, + { url = "https://files.pythonhosted.org/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555, upload-time = "2025-10-08T22:01:04.834Z" }, + { url = "https://files.pythonhosted.org/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128, upload-time = "2025-10-08T22:01:05.84Z" }, + { url = "https://files.pythonhosted.org/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445, upload-time = "2025-10-08T22:01:06.896Z" }, + { url = "https://files.pythonhosted.org/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165, upload-time = "2025-10-08T22:01:08.107Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891, upload-time = "2025-10-08T22:01:09.082Z" }, + { url = "https://files.pythonhosted.org/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796, upload-time = "2025-10-08T22:01:10.266Z" }, + { url = "https://files.pythonhosted.org/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121, upload-time = "2025-10-08T22:01:11.332Z" }, + { url = "https://files.pythonhosted.org/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070, upload-time = "2025-10-08T22:01:12.498Z" }, + { url = "https://files.pythonhosted.org/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859, upload-time = "2025-10-08T22:01:13.551Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296, upload-time = "2025-10-08T22:01:14.614Z" }, + { url = "https://files.pythonhosted.org/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124, upload-time = "2025-10-08T22:01:15.629Z" }, + { url = "https://files.pythonhosted.org/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698, upload-time = "2025-10-08T22:01:16.51Z" }, + { url = "https://files.pythonhosted.org/packages/89/48/06ee6eabe4fdd9ecd48bf488f4ac783844fd777f547b8d1b61c11939974e/tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b", size = 154819, upload-time = "2025-10-08T22:01:17.964Z" }, + { url = "https://files.pythonhosted.org/packages/f1/01/88793757d54d8937015c75dcdfb673c65471945f6be98e6a0410fba167ed/tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae", size = 148766, upload-time = "2025-10-08T22:01:18.959Z" }, + { url = "https://files.pythonhosted.org/packages/42/17/5e2c956f0144b812e7e107f94f1cc54af734eb17b5191c0bbfb72de5e93e/tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b", size = 240771, upload-time = "2025-10-08T22:01:20.106Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f4/0fbd014909748706c01d16824eadb0307115f9562a15cbb012cd9b3512c5/tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf", size = 248586, upload-time = "2025-10-08T22:01:21.164Z" }, + { url = "https://files.pythonhosted.org/packages/30/77/fed85e114bde5e81ecf9bc5da0cc69f2914b38f4708c80ae67d0c10180c5/tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f", size = 244792, upload-time = "2025-10-08T22:01:22.417Z" }, + { url = "https://files.pythonhosted.org/packages/55/92/afed3d497f7c186dc71e6ee6d4fcb0acfa5f7d0a1a2878f8beae379ae0cc/tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05", size = 248909, upload-time = "2025-10-08T22:01:23.859Z" }, + { url = "https://files.pythonhosted.org/packages/f8/84/ef50c51b5a9472e7265ce1ffc7f24cd4023d289e109f669bdb1553f6a7c2/tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606", size = 96946, upload-time = "2025-10-08T22:01:24.893Z" }, + { url = "https://files.pythonhosted.org/packages/b2/b7/718cd1da0884f281f95ccfa3a6cc572d30053cba64603f79d431d3c9b61b/tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999", size = 107705, upload-time = "2025-10-08T22:01:26.153Z" }, + { url = "https://files.pythonhosted.org/packages/19/94/aeafa14a52e16163008060506fcb6aa1949d13548d13752171a755c65611/tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e", size = 154244, upload-time = "2025-10-08T22:01:27.06Z" }, + { url = "https://files.pythonhosted.org/packages/db/e4/1e58409aa78eefa47ccd19779fc6f36787edbe7d4cd330eeeedb33a4515b/tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3", size = 148637, upload-time = "2025-10-08T22:01:28.059Z" }, + { url = "https://files.pythonhosted.org/packages/26/b6/d1eccb62f665e44359226811064596dd6a366ea1f985839c566cd61525ae/tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc", size = 241925, upload-time = "2025-10-08T22:01:29.066Z" }, + { url = "https://files.pythonhosted.org/packages/70/91/7cdab9a03e6d3d2bb11beae108da5bdc1c34bdeb06e21163482544ddcc90/tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0", size = 249045, upload-time = "2025-10-08T22:01:31.98Z" }, + { url = "https://files.pythonhosted.org/packages/15/1b/8c26874ed1f6e4f1fcfeb868db8a794cbe9f227299402db58cfcc858766c/tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879", size = 245835, upload-time = "2025-10-08T22:01:32.989Z" }, + { url = "https://files.pythonhosted.org/packages/fd/42/8e3c6a9a4b1a1360c1a2a39f0b972cef2cc9ebd56025168c4137192a9321/tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005", size = 253109, upload-time = "2025-10-08T22:01:34.052Z" }, + { url = "https://files.pythonhosted.org/packages/22/0c/b4da635000a71b5f80130937eeac12e686eefb376b8dee113b4a582bba42/tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463", size = 97930, upload-time = "2025-10-08T22:01:35.082Z" }, + { url = "https://files.pythonhosted.org/packages/b9/74/cb1abc870a418ae99cd5c9547d6bce30701a954e0e721821df483ef7223c/tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8", size = 107964, upload-time = "2025-10-08T22:01:36.057Z" }, + { url = "https://files.pythonhosted.org/packages/54/78/5c46fff6432a712af9f792944f4fcd7067d8823157949f4e40c56b8b3c83/tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77", size = 163065, upload-time = "2025-10-08T22:01:37.27Z" }, + { url = "https://files.pythonhosted.org/packages/39/67/f85d9bd23182f45eca8939cd2bc7050e1f90c41f4a2ecbbd5963a1d1c486/tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf", size = 159088, upload-time = "2025-10-08T22:01:38.235Z" }, + { url = "https://files.pythonhosted.org/packages/26/5a/4b546a0405b9cc0659b399f12b6adb750757baf04250b148d3c5059fc4eb/tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530", size = 268193, upload-time = "2025-10-08T22:01:39.712Z" }, + { url = "https://files.pythonhosted.org/packages/42/4f/2c12a72ae22cf7b59a7fe75b3465b7aba40ea9145d026ba41cb382075b0e/tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b", size = 275488, upload-time = "2025-10-08T22:01:40.773Z" }, + { url = "https://files.pythonhosted.org/packages/92/04/a038d65dbe160c3aa5a624e93ad98111090f6804027d474ba9c37c8ae186/tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67", size = 272669, upload-time = "2025-10-08T22:01:41.824Z" }, + { url = "https://files.pythonhosted.org/packages/be/2f/8b7c60a9d1612a7cbc39ffcca4f21a73bf368a80fc25bccf8253e2563267/tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f", size = 279709, upload-time = "2025-10-08T22:01:43.177Z" }, + { url = "https://files.pythonhosted.org/packages/7e/46/cc36c679f09f27ded940281c38607716c86cf8ba4a518d524e349c8b4874/tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0", size = 107563, upload-time = "2025-10-08T22:01:44.233Z" }, + { url = "https://files.pythonhosted.org/packages/84/ff/426ca8683cf7b753614480484f6437f568fd2fda2edbdf57a2d3d8b27a0b/tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba", size = 119756, upload-time = "2025-10-08T22:01:45.234Z" }, + { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "urllib3" +version = "2.6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/24/a2a2ed9addd907787d7aa0355ba36a6cadf1768b934c652ea78acbd59dcd/urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797", size = 432930, upload-time = "2025-12-11T15:56:40.252Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/b9/4095b668ea3678bf6a0af005527f39de12fb026516fb3df17495a733b7f8/urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd", size = 131182, upload-time = "2025-12-11T15:56:38.584Z" }, +]