diff --git a/.github/bin/download_nodejs b/.github/bin/download_nodejs index 773e3f74f18e1..6fa4d62c332a4 100755 --- a/.github/bin/download_nodejs +++ b/.github/bin/download_nodejs @@ -1,4 +1,13 @@ #!/usr/bin/env bash +# Download Node.js and Yarn to Maven cache for frontend-maven-plugin +# ================================================================== +# Presto's web UI (presto-ui module) uses frontend-maven-plugin which expects +# Node.js and Yarn at specific paths in the Maven repository. This script +# pre-downloads them to avoid flaky downloads during the build. +# +# The tarballs are stored at: +# ${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-*-${OS}-${ARCH}.tar.gz +# ${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-*.tar.gz set -euo pipefail @@ -47,27 +56,27 @@ get_arch() { } download_node() { - if [[ -a "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" ]]; then + if [[ -a "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" ]]; then echo "Node binary exists. Skipped download" return 0 fi - - if ! wget_retry 3 10 "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" \ + + if ! wget_retry 3 10 "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" \ "https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" "node"; then - rm "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" + rm "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" return 1 fi } download_yarn() { - if [[ -a "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" ]]; then + if [[ -a "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" ]]; then echo "Yarn binary exists. Skipped download" return 0 fi - if ! wget_retry 3 10 "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" \ + if ! wget_retry 3 10 "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" \ "https://github.com/yarnpkg/yarn/releases/download/v${YARN_VERSION}/yarn-v${YARN_VERSION}.tar.gz" "yarn"; then - rm "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" + rm "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" return 1 fi } @@ -75,8 +84,11 @@ download_yarn() { NODE_OS=$(get_os) NODE_ARCH=$(get_arch) -mkdir -p "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}" -mkdir -p "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}" +# Use MAVEN_REPO if set, otherwise fall back to default .m2/repository +MAVEN_REPO="${MAVEN_REPO:-${HOME}/.m2/repository}" + +mkdir -p "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}" +mkdir -p "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}" if download_node; then echo "node-v${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz is ready for use" diff --git a/.github/dockerfiles/yscope-presto-builder.dockerfile b/.github/dockerfiles/yscope-presto-builder.dockerfile new file mode 100644 index 0000000000000..d57d0ab3b0f2f --- /dev/null +++ b/.github/dockerfiles/yscope-presto-builder.dockerfile @@ -0,0 +1,142 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# YScope Presto Builder Image +# =========================== +# A unified builder for presto (Java) and prestocpp (C++). +# +# Adapted from upstream's ubuntu-22.04-dependency.dockerfile, with additions: +# - Pre-warmed ccache for faster C++ builds +# - Pre-downloaded Maven dependencies for faster Java builds +# - Pre-downloaded Node.js/Yarn for frontend builds +# +# Tagged by hash of dependency files, rebuilt only when deps change. + +FROM ghcr.io/y-scope/docker-github-actions-runner:ubuntu-jammy + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# ============================================================================ +# Dependency Installation (from upstream ubuntu-22.04-dependency.dockerfile) +# ============================================================================ + +COPY ./presto-native-execution/scripts /presto/scripts/ +COPY ./presto-native-execution/velox/scripts /presto/velox/scripts/ + +# Required to avoid tzdata prompting for region selection +ARG DEBIAN_FRONTEND="noninteractive" +ARG tz="Etc/UTC" +ENV TZ=${tz} +ENV PROMPT_ALWAYS_RESPOND=n +ENV SUDO=" " + +# Build parallelism for 32-core self-hosted runners +# See: https://github.com/y-scope/velox/pull/45 +ARG NUM_THREADS=16 +ARG MAX_HIGH_MEM_JOBS=16 +ARG MAX_LINK_JOBS=12 +ENV MAX_HIGH_MEM_JOBS=${MAX_HIGH_MEM_JOBS} +ENV MAX_LINK_JOBS=${MAX_LINK_JOBS} + +# Install CMake 3.28.3 (required - setup script's pip cmake causes fastfloat issues) +RUN apt-get update && \ + apt-get install -y --no-install-recommends wget && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + wget -q https://github.com/Kitware/CMake/releases/download/v3.28.3/cmake-3.28.3-linux-x86_64.tar.gz && \ + tar -xzf cmake-3.28.3-linux-x86_64.tar.gz -C /opt && \ + rm cmake-3.28.3-linux-x86_64.tar.gz && \ + ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/cmake /usr/local/bin/cmake && \ + ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/ctest /usr/local/bin/ctest && \ + ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/cpack /usr/local/bin/cpack + +# Run setup scripts - same pattern as upstream ubuntu-22.04-dependency.dockerfile +# rpm is needed for MinIO installation (S3-compatible storage for tests) +RUN mkdir -p /build && \ + cd /build && \ + /presto/scripts/setup-ubuntu.sh && \ + apt install -y rpm && \ + /presto/velox/scripts/setup-ubuntu.sh install_adapters && \ + /presto/scripts/setup-adapters.sh && \ + rm -rf /build + +ENV PATH="/presto/.venv/bin:${PATH}" +ENV VIRTUAL_ENV="/presto/.venv" + +# ============================================================================ +# ccache Warmup (YScope addition for faster C++ builds) +# See: https://github.com/y-scope/velox/pull/45 +# ============================================================================ + +# ccache settings for portable cache (works across different checkout paths) +# - CCACHE_DIR: Standard location in /var/cache for system caches +# - CCACHE_BASEDIR: Set at runtime via GITHUB_WORKSPACE for portability +# - CCACHE_COMPRESSLEVEL=0: Disabled for faster CI execution (disk space not a concern) +# - CCACHE_NOHASHDIR: Ignore directory paths in hash for cache hits across checkouts +ENV CCACHE_DIR=/var/cache/ccache +ENV CCACHE_COMPRESSLEVEL=0 +ENV CCACHE_MAX_SIZE=5G +ENV CCACHE_NOHASHDIR=true + +RUN mkdir -p ${CCACHE_DIR} && chmod 777 ${CCACHE_DIR} + +COPY . /workspace/ +WORKDIR /workspace + +# Build prestocpp once to populate ccache +# Build flags must match CI builds exactly for cache hits (see prestocpp-linux-build-and-unit-test.yml) +# CCACHE_BASEDIR set to /workspace for the warmup build +RUN ccache -z && \ + export CCACHE_BASEDIR=/workspace && \ + cd presto-native-execution && \ + cmake \ + -B _build/release \ + -GNinja \ + -DTREAT_WARNINGS_AS_ERRORS=1 \ + -DENABLE_ALL_WARNINGS=1 \ + -DCMAKE_BUILD_TYPE=Release \ + -DPRESTO_ENABLE_PARQUET=ON \ + -DPRESTO_ENABLE_REMOTE_FUNCTIONS=ON \ + -DPRESTO_ENABLE_JWT=ON \ + -DPRESTO_STATS_REPORTER_TYPE=PROMETHEUS \ + -DPRESTO_MEMORY_CHECKER_TYPE=LINUX_MEMORY_CHECKER \ + -DCMAKE_PREFIX_PATH=/usr/local \ + -DThrift_ROOT=/usr/local \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DMAX_LINK_JOBS=${MAX_LINK_JOBS} && \ + ninja -C _build/release -j ${NUM_THREADS} && \ + ccache -svz + +# ============================================================================ +# Maven/Node.js Cache (YScope addition for faster Java builds) +# ============================================================================ + +ENV MAVEN_REPO=/opt/maven/repository +RUN mkdir -p ${MAVEN_REPO} + +# Download dependencies using temporary Java installation +RUN wget -q https://github.com/adoptium/temurin8-binaries/releases/download/jdk8u442-b06/OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz && \ + tar -xzf OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz -C /tmp && \ + rm OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz && \ + export JAVA_HOME=/tmp/jdk8u442-b06 && \ + export PATH=${JAVA_HOME}/bin:${PATH} && \ + export RUNNER_OS=Linux && \ + export RUNNER_ARCH=X64 && \ + cd /workspace && \ + .github/bin/download_nodejs && \ + ./mvnw dependency:resolve-plugins dependency:resolve -B --no-transfer-progress \ + -Dmaven.repo.local=${MAVEN_REPO} || true && \ + rm -rf /tmp/jdk8u442-b06 + +# Clean up source, keep only caches +RUN rm -rf /workspace/* + +WORKDIR /workspace diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000000..8b3e55e13e665 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,277 @@ +name: ci + +# ============================================================================== +# CI Workflow Overview +# ============================================================================== +# Main orchestrator for all build and test jobs. Builds Java coordinator (presto) +# and C++ native worker (prestissimo) in parallel, then runs integration tests. +# +# Terminology: +# - presto: Java-based query coordinator (runs queries, manages workers) +# - prestocpp: C++ worker implementation (source in presto-native-execution/) +# - prestissimo: Runtime name for prestocpp (Docker image and binary) +# +# Job Dependency Graph: +# config ─► create-builder-image ─┬─► prestocpp ────┬─► integration-tests +# ├─► presto ───────┘ +# └─► presto-tests +# +# ============================================================================== +# Key Design Decisions +# ============================================================================== +# +# 1. CACHING STRATEGY: Bake caches into Docker image layers +# -------------------------------------------------------- +# Problem: Downloading ccache (~2GB+) and Maven cache from remote storage on +# every CI run causes bandwidth bottlenecks with parallel builds. +# +# Solution: Bake caches into Docker image layers. +# - Docker layer caching: each host downloads cache layer ONCE, then reuses it +# - Builder image based on GitHub runner image (always pre-cached on self-hosted) +# - Parallel jobs on same host share cached layers with zero network traffic +# +# 2. IMAGE TAGGING: Dual tags + version streams +# ------------------------------------------- +# Problem: Branch-only tags can't pin specific builds for production while also +# providing a "latest" tag for development. +# +# Solution: Every build gets two tags: +# - Immutable: --- (e.g., 0.293-BETA-20250522140509-abc123) +# Use for: Production deployments, reproducible environments +# - Mutable: --SNAPSHOT (e.g., 0.293-BETA-SNAPSHOT) +# Use for: Always pulling latest without knowing exact version +# +# Version streams (RELEASE, BETA, DEV) let users choose stability level. +# +# 3. BUILDER IMAGE TAG: Auto-computed dependency hash +# ------------------------------------------------- +# The unified-builder image tag is a hash of dependency files (setup scripts, +# pom.xml, etc.). Image is only rebuilt when dependencies change. +# +# 4. CENTRALIZED CONFIG JOB: Single source of truth +# ----------------------------------------------- +# Problem: GitHub Actions doesn't allow `${{ env.* }}` in `with:` blocks when +# calling reusable workflows. Also, version tag computation was duplicated +# across presto-build.yml and prestocpp workflows. +# +# Solution: A `config` job computes all shared configuration upfront: +# - builder-image: Hash-based tag for the builder image +# - runtime-version-tag: Immutable tag (e.g., 0.293-BETA-20250522140509-abc123) +# - runtime-snapshot-tag: Mutable tag (e.g., 0.293-BETA-SNAPSHOT) +# +# All downstream jobs reference these outputs via `needs.config.outputs.*`. +# +# ============================================================================== +# Comparison with Upstream (prestodb/presto) +# ============================================================================== +# |---------------------|----------------------------------------|-----------------------------------------------| +# | Aspect | Upstream | This Fork (yscope) | +# |---------------------|----------------------------------------|-----------------------------------------------| +# | Runners | GitHub-hosted (ephemeral) | Self-hosted (ephemeral) | +# | CI Structure | Separate independent workflows | Unified ci.yml orchestrator (parallel) | +# | Builder Image | presto-native-dependency (C++ only) | unified-builder (Java + C++ + caches) | +# | Builder Image Tag | Pinned version-timestamp-hash | Auto-computed dependency hash | +# | Runtime Image Tag | Release version only (e.g., 0.292) | version-TYPE-timestamp-hash per build | +# | ccache Strategy | Stash/restore via Apache Infra | Pre-warmed in builder image | +# | Image Publishing | On release only | On every push (presto + prestissimo) | +# |---------------------|----------------------------------------|-----------------------------------------------| +# +# ============================================================================== +# Configuration +# ============================================================================== +# IMAGE_VERSION_TYPE (env variable below) +# Controls version stream for Docker images. Values: RELEASE, BETA, DEV +# +# ARTIFACT_JAVA_VERSION (GitHub repo variable: Settings > Secrets and variables) +# Controls which Java version uploads artifacts. Values: 8, 17. Default: 8 +# +# ============================================================================== +# Outputs +# ============================================================================== +# Artifacts (1-day retention, shared between jobs): +# | Artifact | Contents | +# |---------------------|---------------------------------------------------| +# | presto-server | presto-server-*.tar.gz | +# | presto-cli | presto-cli-*-executable.jar | +# | presto-native-build | presto_server, velox_functions_remote_server_main | +# +# Docker Images (ghcr.io, pushed on push events only): +# | Image | Description | +# |-----------------|--------------------------------| +# | unified-builder | Build environment with deps | +# | presto | Java coordinator runtime | +# | prestissimo | C++ native worker runtime | + +# ============================================================================== +# Triggers: When does this workflow run? +# ============================================================================== +on: + workflow_dispatch: # Manual trigger from GitHub UI (Actions tab -> Run workflow) + pull_request: # On every pull request + push: # On every push to any branch + paths-ignore: + - 'presto-docs/**' # Skip CI for docs-only changes (docs have their own workflow) + +# ============================================================================== +# Environment Variables +# ============================================================================== +env: + # Docker image version type for presto and prestissimo images + # See "Configuration" section above for details + IMAGE_VERSION_TYPE: 'BETA' + + # Maven JVM settings (not inherited by called workflows) + MAVEN_OPTS: "-Xmx1024M -XX:+ExitOnOutOfMemoryError" + MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" + RETRY: .github/bin/retry + +# ============================================================================== +# Concurrency Control +# ============================================================================== +# Prevents multiple CI runs for the same branch from running simultaneously. +# If you push twice quickly, the first run is cancelled and only the second runs. +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true + +# ============================================================================== +# Jobs +# ============================================================================== +jobs: + # ---------------------------------------------------------------------------- + # Step 1: Config - Compute Builder Tag and Runtime Version Tags + # ---------------------------------------------------------------------------- + # Centralizes all configuration computation: + # - builder-image: Hash-based tag for the builder image (only rebuilds when dependencies change) + # - runtime-version-tag: Immutable tag for runtime images (version-TYPE-timestamp-hash) + # - runtime-snapshot-tag: Mutable SNAPSHOT tag for runtime images (version-TYPE-SNAPSHOT) + config: + runs-on: ubuntu-latest + outputs: + builder-image: ${{ steps.builder.outputs.image }} + runtime-version-tag: ${{ steps.version.outputs.runtime-version-tag }} + runtime-snapshot-tag: ${{ steps.version.outputs.runtime-snapshot-tag }} + steps: + - uses: actions/checkout@v4 + with: + show-progress: false + + - name: Initialize submodules (needed for velox scripts hash) + run: | + # Try shallow clone first (fast), fall back to full clone if pinned commit not in shallow history + git submodule update --init --recursive --depth=1 || \ + git submodule update --init --recursive + + - name: Compute builder image tag + id: builder + run: | + # Compute hash of files that affect the builder image: + # - Builder dockerfile itself + # - C++ setup scripts (native worker dependencies) + # - pom.xml files (Maven/Java dependencies) + # - download_nodejs script (Node.js/Yarn for frontend) + DOCKERFILE_HASH=$(sha256sum .github/dockerfiles/yscope-presto-builder.dockerfile | cut -c1-8) + SCRIPTS_HASH=$(find presto-native-execution/scripts presto-native-execution/velox/scripts -type f -exec sha256sum {} \; | sort | sha256sum | cut -c1-8) + POM_HASH=$(find . -name "pom.xml" -exec sha256sum {} \; | sort | sha256sum | cut -c1-8) + NODEJS_HASH=$(sha256sum .github/bin/download_nodejs | cut -c1-8) + TAG="${DOCKERFILE_HASH}-${SCRIPTS_HASH}-${POM_HASH}-${NODEJS_HASH}" + echo "image=ghcr.io/${{ github.repository }}/unified-builder:${TAG}" >> $GITHUB_OUTPUT + + - name: Compute runtime version tags + id: version + run: | + # Extract base version from pom.xml (e.g., "0.293" from "0.293-SNAPSHOT") + BASE_VERSION=$(grep '' pom.xml | head -1 | sed 's/.*\(.*\)<\/version>.*/\1/' | sed 's/-SNAPSHOT//') + + # Get commit timestamp for immutable tag + TIMESTAMP=$(git show -s --format=%cd --date=format:'%Y%m%d%H%M%S' HEAD) + SHORT_SHA=$(git rev-parse --short HEAD) + + # Output tags: + # - runtime-version-tag: e.g., 0.293-BETA-20250529140509-484b00e (immutable) + # - runtime-snapshot-tag: e.g., 0.293-BETA-SNAPSHOT (mutable, always latest) + echo "runtime-version-tag=${BASE_VERSION}-${{ env.IMAGE_VERSION_TYPE }}-${TIMESTAMP}-${SHORT_SHA}" >> $GITHUB_OUTPUT + echo "runtime-snapshot-tag=${BASE_VERSION}-${{ env.IMAGE_VERSION_TYPE }}-SNAPSHOT" >> $GITHUB_OUTPUT + + # ---------------------------------------------------------------------------- + # Step 2: Create Builder Image (if needed) + # ---------------------------------------------------------------------------- + # Checks if a builder image with this tag exists. If not, builds and pushes it. + # The image contains all C++ and Java dependencies pre-installed. + create-builder-image: + needs: config + uses: ./.github/workflows/create-builder-image.yml + with: + builder-image: ${{ needs.config.outputs.builder-image }} + + # ---------------------------------------------------------------------------- + # Step 3a: Build Presto (Java) + # ---------------------------------------------------------------------------- + # Builds the Java coordinator with both Java 8 and Java 17 in parallel. + # Only the version matching ARTIFACT_JAVA_VERSION (default: '8') will: + # - Upload artifacts (presto-server, presto-cli) for integration tests + # - Build and push the presto Docker image + presto: + name: presto${{ matrix.java-major }} + needs: [config, create-builder-image] + strategy: + matrix: + include: + - java-major: '8' + java-version: '8.0.442' + - java-major: '17' + java-version: '17.0.13' + uses: ./.github/workflows/presto-build.yml + with: + builder-image: ${{ needs.config.outputs.builder-image }} + java-version: ${{ matrix.java-version }} + should-upload-artifacts: ${{ matrix.java-major == (vars.ARTIFACT_JAVA_VERSION || '8') }} + should-build-image: ${{ matrix.java-major == (vars.ARTIFACT_JAVA_VERSION || '8') }} + runtime-version-tag: ${{ needs.config.outputs.runtime-version-tag }} + runtime-snapshot-tag: ${{ needs.config.outputs.runtime-snapshot-tag }} + secrets: inherit + + # ---------------------------------------------------------------------------- + # Step 3b: Presto Unit Tests (Java) + # ---------------------------------------------------------------------------- + # Runs Java unit tests in parallel with builds. Uses matrix strategy to run + # multiple test modules in parallel across Java 8 and Java 17. + presto-tests: + name: presto-tests + needs: [config, create-builder-image] + uses: ./.github/workflows/tests.yml + with: + builder-image: ${{ needs.config.outputs.builder-image }} + secrets: inherit + + # ---------------------------------------------------------------------------- + # Step 3c: Build and Test Prestocpp (C++) + # ---------------------------------------------------------------------------- + # Builds the C++ native worker (prestocpp), runs C++ unit tests, and builds + # the prestissimo runtime Docker image. + # Uses pre-warmed ccache from builder image for fast incremental builds. + # Uploads the compiled binary as an artifact for integration tests. + prestocpp: + name: prestocpp + needs: [config, create-builder-image] + uses: ./.github/workflows/prestocpp-linux-build-and-unit-test.yml + with: + builder-image: ${{ needs.config.outputs.builder-image }} + runtime-version-tag: ${{ needs.config.outputs.runtime-version-tag }} + runtime-snapshot-tag: ${{ needs.config.outputs.runtime-snapshot-tag }} + secrets: inherit + + # ---------------------------------------------------------------------------- + # Step 4: Integration Tests + # ---------------------------------------------------------------------------- + # End-to-end tests that run the Java coordinator with the C++ native worker. + # Requires artifacts from both: + # - presto: presto-server tarball (Java coordinator) + # - prestocpp: presto_server binary (C++ native worker) + integration-tests: + name: integration-tests + needs: [config, create-builder-image, presto, prestocpp] + uses: ./.github/workflows/integration-tests.yml + with: + builder-image: ${{ needs.config.outputs.builder-image }} + secrets: inherit diff --git a/.github/workflows/create-builder-image.yml b/.github/workflows/create-builder-image.yml new file mode 100644 index 0000000000000..c91a2bcb2f257 --- /dev/null +++ b/.github/workflows/create-builder-image.yml @@ -0,0 +1,79 @@ +name: create-builder-image + +# ============================================================================== +# Create Builder Image +# ============================================================================== +# This workflow creates the unified builder Docker image if it doesn't already exist. +# +# The builder image contains: +# - All C++ dependencies (boost, folly, etc.) pre-compiled +# - Pre-warmed ccache (compiler cache) with prestocpp already built once +# - Pre-downloaded Maven dependencies +# - Pre-downloaded Node.js/Yarn +# +# Building this image takes ~1 hour, but we only do it when dependencies change. +# Most CI runs will skip this step because the image already exists. + +on: + workflow_call: + inputs: + builder-image: + description: 'Full builder image URI with tag' + required: true + type: string + +jobs: + create-builder-image: + name: "create-builder-image" + # Use self-hosted runner with 32 cores for faster builds + # The [self-hosted, cores=32] syntax is a label filter - it selects runners with both labels + runs-on: [self-hosted, cores=32] + timeout-minutes: 180 + # Concurrency control: If two workflows try to build the same image simultaneously, + # only one will run. The second will wait (cancel-in-progress: false means don't cancel). + # This prevents duplicate builds of the same image. + concurrency: + group: ${{ inputs.builder-image }} + cancel-in-progress: false + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive # Also checkout velox submodule (needed for build) + show-progress: false + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx + # Buildx is Docker's extended build tool with better caching and multi-platform support + uses: docker/setup-buildx-action@v3 + + - name: Check if builder image exists + id: check-image + run: | + # Check if the image already exists in the registry + # docker manifest inspect queries the registry without pulling the image + IMAGE_TAG="${{ inputs.builder-image }}" + if docker manifest inspect $IMAGE_TAG > /dev/null 2>&1; then + echo "exists=true" >> $GITHUB_OUTPUT + echo "Builder image already exists: $IMAGE_TAG" + else + echo "exists=false" >> $GITHUB_OUTPUT + echo "Builder image does not exist: $IMAGE_TAG" + fi + + - name: Build and push unified builder image + # Only build if image doesn't exist (skip if cached) + if: steps.check-image.outputs.exists == 'false' + uses: docker/build-push-action@v6 + with: + context: . # Build context is the entire repo (needed for setup scripts) + file: ./.github/dockerfiles/yscope-presto-builder.dockerfile + push: true # Push to ghcr.io after building + tags: ${{ inputs.builder-image }} + labels: | + org.opencontainers.image.source=https://github.com/${{ github.repository }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 46565491c504b..13cc727dcbe3c 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -20,9 +20,9 @@ env: concurrency: group: "${{github.workflow}}-${{github.ref}}" - # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so - # that each commit to release-0.293-clp-connector is checked completely. - cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}" + # Cancel in-progress jobs for efficiency. Exclude branches with `release-0.293-clp-connector-snapshot` prefix so + # that each commit to these branches is checked completely. + cancel-in-progress: "${{!startsWith(github.ref, 'refs/heads/release-0.293-clp-connector-snapshot')}}" jobs: test: diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 0000000000000..708341a64f803 --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,459 @@ +name: integration-tests + +# ============================================================================== +# Integration Tests +# ============================================================================== +# End-to-end tests that run the Java coordinator with the C++ native worker. +# +# These tests verify that the Java coordinator and C++ native worker work together +# correctly. They require artifacts from both: +# - presto-java8: presto-server tarball (Java coordinator) +# - prestocpp: presto_server binary (C++ native worker) +# +# Test Categories: +# ---------------- +# - integration-e2e-java: End-to-end tests (TestPrestoNative*.java) +# - integration-storage-java: Storage format tests (PARQUET, DWRF) +# - integration-sidecar-java: Sidecar plugin tests +# +# Adapted from upstream's prestocpp-linux-build-and-unit-test.yml jobs: +# - prestocpp-linux-presto-e2e-tests -> integration-e2e-java +# - prestocpp-linux-presto-native-tests -> integration-storage-java +# - prestocpp-linux-presto-sidecar-tests -> integration-sidecar-java +# +# Key differences from upstream: +# 1. Uses pre-built presto-server artifact instead of building from source +# (upstream runs: ./mvnw install -pl 'presto-native-execution' -am) +# 2. Uses unified builder image with pre-cached Maven dependencies +# 3. Artifact download path handling differs due to actions/download-artifact@v4 +# stripping common path prefixes +# 4. Adds PRESTO_SERVER_DIR and additionalClasspath for presto-server JARs +# 5. Sets LD_LIBRARY_PATH explicitly for native library discovery + +on: + workflow_call: + inputs: + builder-image: + description: 'Full builder image URI with tag' + required: true + type: string + +jobs: + # Upstream: prestocpp-linux-presto-e2e-tests + # Tests: TestPrestoNative*.java in presto-native-execution module + integration-e2e-java: + name: "integration-e2e-java" + runs-on: self-hosted + timeout-minutes: 180 + container: + image: ${{ inputs.builder-image }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + env: + MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError" + MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end" + # Use pre-cached Maven repository from builder image + MAVEN_REPO: /opt/maven/repository + steps: + - uses: actions/checkout@v4 + with: + show-progress: false + + - name: Setup Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '8.0.442' + + # setup-java may override MAVEN_REPO, so explicitly set it after + - name: Configure Maven repository + run: | + echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV + + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: presto-native-build + path: . + + # Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38 + - name: Restore execute permissions and library path + run: | + # actions/download-artifact@v4 strips common path prefix when downloading + # Uploaded: presto-native-execution/_build/release/presto_cpp/main/presto_server + # Downloaded to: presto_cpp/main/presto_server + echo "Setting up binary directory structure..." + mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/ + mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/ + + # Move and restore execute permissions for presto_server + if [ -f "${GITHUB_WORKSPACE}/presto_cpp/main/presto_server" ]; then + echo "Found presto_server, moving to expected location..." + mv ${GITHUB_WORKSPACE}/presto_cpp/main/presto_server ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/ + chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server + echo "presto_server moved and made executable" + else + echo "ERROR: presto_server not found at ${GITHUB_WORKSPACE}/presto_cpp/main/presto_server" + echo "Contents of workspace:" + ls -la ${GITHUB_WORKSPACE}/ + echo "Searching for presto_server:" + find ${GITHUB_WORKSPACE} -name "presto_server" -type f 2>/dev/null + exit 1 + fi + + # Move and restore execute permissions for velox remote server if it exists + if [ -f "${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main" ]; then + echo "Found velox_functions_remote_server_main, moving to expected location..." + mv ${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/ + chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main + echo "velox_functions_remote_server_main moved and made executable" + else + echo "Warning: velox_functions_remote_server_main not found (might not be built in this configuration)" + fi + + # Note: All required libraries (boost, snappy, geos, etc.) are already installed + # in the builder container and available via system library paths. No additional + # library configuration needed. + + # Download pre-built presto-server from presto-java8 job + # This replaces upstream's: ./mvnw install -pl 'presto-native-execution' -am + - name: Download presto-server artifact + uses: actions/download-artifact@v4 + with: + name: presto-server + path: . + + - name: Extract presto-server + run: | + echo "Extracting presto-server..." + PRESTO_SERVER_TAR=$(ls presto-server-*.tar.gz) + tar -xzf ${PRESTO_SERVER_TAR} + PRESTO_SERVER_DIR=$(ls -d presto-server-*/ | head -1 | sed 's:/$::') + echo "Presto server extracted to ${PRESTO_SERVER_DIR}/" + echo "PRESTO_SERVER_DIR=${GITHUB_WORKSPACE}/${PRESTO_SERVER_DIR}" >> $GITHUB_ENV + + # YScope-specific: presto-clp is not in upstream, required for CLP UDF tests + - name: Build required dependencies + env: + MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" + run: | + ./mvnw install -B -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true \ + --no-transfer-progress -Dmaven.repo.local=${{ env.MAVEN_REPO }} \ + -pl 'presto-clp' + + - name: Compile integration test classes + env: + MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" + run: | + ./mvnw test-compile -Dmaven.repo.local=${{ env.MAVEN_REPO }} \ + -pl 'presto-native-execution' + + - name: Run presto-native e2e tests + run: | + # First verify the binary exists at expected location + export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server" + if [ ! -f "${PRESTO_SERVER_PATH}" ]; then + echo "ERROR: presto_server binary not found at: ${PRESTO_SERVER_PATH}" + echo "Current directory structure:" + ls -la ${GITHUB_WORKSPACE}/ + echo "Checking presto-native-execution directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/ || echo "Directory not found" + echo "Checking _build directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/ || echo "Directory not found" + echo "Checking release directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/ || echo "Directory not found" + exit 1 + fi + echo "Found presto_server binary at: ${PRESTO_SERVER_PATH}" + + export TESTFILES=`find ./presto-native-execution/src/test -type f -name 'TestPrestoNative*.java'` + # Convert file paths to comma separated class names + export TESTCLASSES= + for test_file in $TESTFILES + do + tmp=${test_file##*/} + test_class=${tmp%%\.*} + export TESTCLASSES="${TESTCLASSES},$test_class" + done + export TESTCLASSES=${TESTCLASSES#,} + echo "TESTCLASSES = $TESTCLASSES" + + # Add presto-server JARs to classpath for tests + export PRESTO_CLASSPATH="${PRESTO_SERVER_DIR}/plugin/*:${PRESTO_SERVER_DIR}/lib/*" + + # Set LD_LIBRARY_PATH for native workers to find libraries + # This must be exported so child processes (native workers) inherit it + export LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" + + ./mvnw test \ + ${MAVEN_TEST} \ + -Dmaven.repo.local=${{ env.MAVEN_REPO }} \ + -pl 'presto-native-execution' \ + -Dtest="${TESTCLASSES}" \ + -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \ + -DPRESTO_SERVER_DIR=${PRESTO_SERVER_DIR} \ + -DDATA_DIR=${RUNNER_TEMP} \ + -Duser.timezone=America/Bahia_Banderas \ + -DadditionalClasspath="${PRESTO_CLASSPATH}" \ + -T1C + + # Upstream: prestocpp-linux-presto-native-tests + # Tests: Test*.java in presto-native-tests module with storage format matrix + integration-storage-java: + name: "integration-storage-java" + runs-on: self-hosted + timeout-minutes: 180 + container: + image: ${{ inputs.builder-image }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + strategy: + fail-fast: false + matrix: + storage-format: ["PARQUET", "DWRF"] + env: + MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError" + MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end" + MAVEN_REPO: /opt/maven/repository + steps: + - uses: actions/checkout@v4 + with: + show-progress: false + + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: presto-native-build + path: . + + # Restore artifact permissions and move to expected paths + # (same as integration-e2e-java, see comments there for details) + - name: Restore execute permissions and library path + run: | + mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/ + mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/ + mv ${GITHUB_WORKSPACE}/presto_cpp/main/presto_server ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/ + chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server + if [ -f "${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main" ]; then + mv ${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/ + chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main + fi + + - name: Setup Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '8.0.442' + + # setup-java may override MAVEN_REPO, so explicitly set it after + - name: Configure Maven repository + run: | + echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV + + # Download pre-built presto-server from presto-java8 job + # This replaces upstream's: ./mvnw install -pl 'presto-native-tests' -am + - name: Download presto-server artifact + uses: actions/download-artifact@v4 + with: + name: presto-server + path: . + + - name: Extract presto-server + run: | + echo "Extracting presto-server..." + PRESTO_SERVER_TAR=$(ls presto-server-*.tar.gz) + tar -xzf ${PRESTO_SERVER_TAR} + PRESTO_SERVER_DIR=$(ls -d presto-server-*/ | head -1 | sed 's:/$::') + echo "Presto server extracted to ${PRESTO_SERVER_DIR}/" + echo "PRESTO_SERVER_DIR=${GITHUB_WORKSPACE}/${PRESTO_SERVER_DIR}" >> $GITHUB_ENV + + - name: Compile integration test classes only + env: + MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" + run: | + # Presto JARs already available from presto-server artifact + ./mvnw test-compile -Dmaven.repo.local=${{ env.MAVEN_REPO }} -pl 'presto-native-tests' + + - name: Run presto-native tests + run: | + # First verify the binary exists at expected location + export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server" + if [ ! -f "${PRESTO_SERVER_PATH}" ]; then + echo "ERROR: presto_server binary not found at: ${PRESTO_SERVER_PATH}" + echo "Current directory structure:" + ls -la ${GITHUB_WORKSPACE}/ + echo "Checking presto-native-execution directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/ || echo "Directory not found" + echo "Checking _build directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/ || echo "Directory not found" + echo "Checking release directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/ || echo "Directory not found" + exit 1 + fi + echo "Found presto_server binary at: ${PRESTO_SERVER_PATH}" + + export TESTFILES=`find ./presto-native-tests/src/test -type f -name 'Test*.java'` + # Convert file paths to comma separated class names + export TESTCLASSES= + for test_file in $TESTFILES + do + tmp=${test_file##*/} + test_class=${tmp%%\.*} + export TESTCLASSES="${TESTCLASSES},$test_class" + done + export TESTCLASSES=${TESTCLASSES#,} + echo "TESTCLASSES = $TESTCLASSES" + + # Add presto-server JARs to classpath for tests + export PRESTO_CLASSPATH="${PRESTO_SERVER_DIR}/plugin/*:${PRESTO_SERVER_DIR}/lib/*" + + # Set LD_LIBRARY_PATH for native workers to find libraries + export LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" + + ./mvnw test \ + ${MAVEN_TEST} \ + -Dmaven.repo.local=${{ env.MAVEN_REPO }} \ + -pl 'presto-native-tests' \ + -DstorageFormat=${{ matrix.storage-format }} \ + -Dtest="${TESTCLASSES}" \ + -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \ + -DPRESTO_SERVER_DIR=${PRESTO_SERVER_DIR} \ + -DDATA_DIR=${RUNNER_TEMP} \ + -Duser.timezone=America/Bahia_Banderas \ + -DadditionalClasspath="${PRESTO_CLASSPATH}" \ + -DLD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ + -T1C + + # Upstream: prestocpp-linux-presto-sidecar-tests + # Tests: Test*.java in presto-native-sidecar-plugin module + integration-sidecar-java: + name: "integration-sidecar-java" + runs-on: self-hosted + timeout-minutes: 180 + container: + image: ${{ inputs.builder-image }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + env: + MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError" + MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end" + MAVEN_REPO: /opt/maven/repository + steps: + - uses: actions/checkout@v4 + with: + show-progress: false + + - name: Setup Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '8.0.442' + + # setup-java may override MAVEN_REPO, so explicitly set it after + - name: Configure Maven repository + run: | + echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV + + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: presto-native-build + path: . + + # Restore artifact permissions and move to expected paths + # (same as integration-e2e-java, see comments there for details) + - name: Restore execute permissions and library path + run: | + mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/ + mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/ + mv ${GITHUB_WORKSPACE}/presto_cpp/main/presto_server ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/ + chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server + if [ -f "${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main" ]; then + mv ${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/ + chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main + fi + + # Download pre-built presto-server from presto-java8 job + # This replaces upstream's: ./mvnw install -pl 'presto-native-execution' -am + - name: Download presto-server artifact + uses: actions/download-artifact@v4 + with: + name: presto-server + path: . + + - name: Extract presto-server + run: | + echo "Extracting presto-server..." + PRESTO_SERVER_TAR=$(ls presto-server-*.tar.gz) + tar -xzf ${PRESTO_SERVER_TAR} + PRESTO_SERVER_DIR=$(ls -d presto-server-*/ | head -1 | sed 's:/$::') + echo "Presto server extracted to ${PRESTO_SERVER_DIR}/" + echo "PRESTO_SERVER_DIR=${GITHUB_WORKSPACE}/${PRESTO_SERVER_DIR}" >> $GITHUB_ENV + + # YScope-specific: presto-clp is not in upstream, required for CLP UDF tests + - name: Build required dependencies + env: + MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" + run: | + ./mvnw install -B -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true \ + --no-transfer-progress -Dmaven.repo.local=${{ env.MAVEN_REPO }} \ + -pl 'presto-clp' + + - name: Compile integration test classes + env: + MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" + run: | + ./mvnw test-compile -Dmaven.repo.local=${{ env.MAVEN_REPO }} \ + -pl 'presto-native-sidecar-plugin' + + - name: Run presto-native sidecar tests + run: | + # First verify the binary exists at expected location + export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server" + if [ ! -f "${PRESTO_SERVER_PATH}" ]; then + echo "ERROR: presto_server binary not found at: ${PRESTO_SERVER_PATH}" + echo "Current directory structure:" + ls -la ${GITHUB_WORKSPACE}/ + echo "Checking presto-native-execution directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/ || echo "Directory not found" + echo "Checking _build directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/ || echo "Directory not found" + echo "Checking release directory:" + ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/ || echo "Directory not found" + exit 1 + fi + echo "Found presto_server binary at: ${PRESTO_SERVER_PATH}" + + export TESTFILES=`find ./presto-native-sidecar-plugin/src/test -type f -name 'Test*.java'` + # Convert file paths to comma separated class names + export TESTCLASSES= + for test_file in $TESTFILES + do + tmp=${test_file##*/} + test_class=${tmp%%\.*} + export TESTCLASSES="${TESTCLASSES},$test_class" + done + export TESTCLASSES=${TESTCLASSES#,} + echo "TESTCLASSES = $TESTCLASSES" + + # Add presto-server JARs to classpath for tests + export PRESTO_CLASSPATH="${PRESTO_SERVER_DIR}/plugin/*:${PRESTO_SERVER_DIR}/lib/*" + + # Set LD_LIBRARY_PATH for native workers to find libraries + export LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" + + ./mvnw test \ + ${MAVEN_TEST} \ + -Dmaven.repo.local=${{ env.MAVEN_REPO }} \ + -pl 'presto-native-sidecar-plugin' \ + -Dtest="${TESTCLASSES}" \ + -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \ + -DPRESTO_SERVER_DIR=${PRESTO_SERVER_DIR} \ + -DDATA_DIR=${RUNNER_TEMP} \ + -Duser.timezone=America/Bahia_Banderas \ + -DadditionalClasspath="${PRESTO_CLASSPATH}" \ + -DLD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \ + -T1C \ No newline at end of file diff --git a/.github/workflows/maven-checks.yml b/.github/workflows/maven-checks.yml deleted file mode 100644 index 2295c413bef7b..0000000000000 --- a/.github/workflows/maven-checks.yml +++ /dev/null @@ -1,115 +0,0 @@ -name: maven checks - -on: - pull_request: - push: - -env: - # An envar that signals to tests we are executing in the CI environment - CONTINUOUS_INTEGRATION: true - MAVEN_OPTS: "-Xmx1024M -XX:+ExitOnOutOfMemoryError" - MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" - RETRY: .github/bin/retry - -concurrency: - group: "${{github.workflow}}-${{github.ref}}" - - # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so - # that each commit to release-0.293-clp-connector is checked completely. - cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}" - -jobs: - maven-checks: - strategy: - fail-fast: false - matrix: - java: [ 8.0.442, 17.0.13 ] - runs-on: ubuntu-latest - timeout-minutes: 45 - steps: - - name: Free Disk Space - run: | - df -h - sudo apt-get clean - df -h - - uses: actions/checkout@v4 - with: - show-progress: false - - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: ${{ matrix.java }} - cache: 'maven' - - name: Download nodejs to maven cache - run: .github/bin/download_nodejs - - name: Maven Checks - run: | - export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}" - ./mvnw install -B -V -T 1C -DskipTests -Dmaven.javadoc.skip=true --no-transfer-progress -P ci -pl '!presto-test-coverage,!:presto-docs' - - name: "Upload presto-server" - if: matrix.java == '8.0.442' - uses: "actions/upload-artifact@v4" - with: - name: "presto-server" - path: "presto-server/target/presto-server-0.293.tar.gz" - if-no-files-found: "error" - retention-days: 1 - - name: "Upload presto-cli" - if: matrix.java == '8.0.442' - uses: "actions/upload-artifact@v4" - with: - name: "presto-cli" - path: "presto-cli/target/presto-cli-0.293-executable.jar" - if-no-files-found: "error" - retention-days: 1 - - name: "Clean Maven output" - run: "./mvnw clean -pl '!:presto-server,!:presto-cli,!presto-test-coverage'" - - presto-coordinator-image: - name: "presto-coordinator-image" - needs: "maven-checks" - runs-on: "ubuntu-22.04" - steps: - - uses: "actions/checkout@v4" - with: - submodules: "recursive" - - - name: "Download presto-server" - uses: "actions/download-artifact@v4" - with: - name: "presto-server" - path: "./docker" - - - name: "Download presto-cli" - uses: "actions/download-artifact@v4" - with: - name: "presto-cli" - path: "./docker" - - - name: "Login to image registry" - uses: "docker/login-action@v3" - with: - registry: "ghcr.io" - username: "${{github.actor}}" - password: "${{secrets.GITHUB_TOKEN}}" - - - name: "Set up container image metadata" - id: "meta" - uses: "docker/metadata-action@v5" - with: - images: "ghcr.io/${{github.repository}}/coordinator" - tags: "type=raw,value=dev" - - - name: "Build and push" - uses: "docker/build-push-action@v6" - with: - build-args: |- - JMX_PROMETHEUS_JAVA_AGENT_VERSION=0.20.0 - PRESTO_VERSION=0.293 - context: "./docker" - file: "./docker/Dockerfile" - push: >- - ${{github.event_name != 'pull_request' - && github.ref == 'refs/heads/release-0.293-clp-connector'}} - tags: "${{steps.meta.outputs.tags}}" - labels: "${{steps.meta.outputs.labels}}" diff --git a/.github/workflows/pr-title-checks.yaml b/.github/workflows/pr-title-checks.yaml index 886249e6348c0..eaddf713902c6 100644 --- a/.github/workflows/pr-title-checks.yaml +++ b/.github/workflows/pr-title-checks.yaml @@ -8,7 +8,7 @@ on: # pull request triggered by this event. # - Each job has `permissions` set to only those necessary. types: ["edited", "opened", "reopened"] - branches: ["release-0.293-clp-connector"] + branches: ["release-0.293-clp-connector-snapshot*"] permissions: {} diff --git a/.github/workflows/prestissimo-worker-images-build.yml b/.github/workflows/prestissimo-worker-images-build.yml deleted file mode 100644 index b36dcb71949be..0000000000000 --- a/.github/workflows/prestissimo-worker-images-build.yml +++ /dev/null @@ -1,70 +0,0 @@ -name: "prestissimo-worker-images-build" - -on: - pull_request: - push: - -jobs: - prestissimo-worker-images-build: - name: "prestissimo-worker-images-build" - runs-on: "ubuntu-22.04" - steps: - - uses: "actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683" - with: - submodules: "recursive" - - - name: "Login to image registry" - uses: "docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772" - with: - registry: "ghcr.io" - username: "${{github.actor}}" - password: "${{secrets.GITHUB_TOKEN}}" - - - name: "Set up metadata for dependency image" - id: "metadata-deps-image" - uses: "docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804" - with: - images: "ghcr.io/${{github.repository}}/prestissimo-worker-dev-env" - tags: "type=raw,value=dev" - - - name: "Build and push dependency image" - uses: "docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4" - with: - context: "./presto-native-execution" - file: "./presto-native-execution/scripts/dockerfiles/ubuntu-22.04-dependency.dockerfile" - push: >- - ${{github.event_name != 'pull_request' - && github.ref == 'refs/heads/release-0.293-clp-connector'}} - tags: "${{steps.metadata-deps-image.outputs.tags}}" - labels: "${{steps.metadata-deps-image.outputs.labels}}" - - - name: "Set up metadata for runtime image" - id: "metadata-runtime-image" - uses: "docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804" - with: - images: "ghcr.io/${{github.repository}}/prestissimo-worker" - tags: "type=raw,value=dev" - - - name: "Get number of cores" - id: "get-cores" - run: |- - echo "num_cores=$(nproc)" >> $GITHUB_OUTPUT - - - name: "Build and push runtime image" - uses: "docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4" - with: - build-args: |- - BASE_IMAGE=ubuntu:22.04 - DEPENDENCY_IMAGE=${{steps.metadata-deps-image.outputs.tags}} - EXTRA_CMAKE_FLAGS=-DPRESTO_ENABLE_TESTING=OFF \ - -DPRESTO_ENABLE_PARQUET=ON \ - -DPRESTO_ENABLE_S3=ON - NUM_THREADS=${{steps.get-cores.outputs.num_cores}} - OSNAME=ubuntu - context: "./presto-native-execution" - file: "./presto-native-execution/scripts/dockerfiles/prestissimo-runtime.dockerfile" - push: >- - ${{github.event_name != 'pull_request' - && github.ref == 'refs/heads/release-0.293-clp-connector'}} - tags: "${{steps.metadata-runtime-image.outputs.tags}}" - labels: "${{steps.metadata-runtime-image.outputs.labels}}" diff --git a/.github/workflows/presto-build.yml b/.github/workflows/presto-build.yml new file mode 100644 index 0000000000000..26e7d37335ec1 --- /dev/null +++ b/.github/workflows/presto-build.yml @@ -0,0 +1,219 @@ +name: presto-build + +# ============================================================================== +# Presto Java Build Workflow +# ============================================================================== +# Builds the Java components of Presto (coordinator, CLI, plugins) and optionally +# builds and pushes a Docker image. +# +# What gets built? +# ---------------- +# - presto-server: The coordinator that accepts queries and manages workers +# - presto-cli: Command-line interface for running queries +# - Various plugins (connectors, functions, etc.) +# +# Build Strategy: +# --------------- +# Uses Maven with a pre-populated local repository from the builder image. +# The builder image contains all Maven dependencies pre-downloaded, so builds +# don't need to fetch from the internet (faster and more reliable). +# +# Artifacts: +# ---------- +# When should-upload-artifacts is true, this workflow uploads: +# - presto-server tarball: Used by integration tests and Docker image build +# - presto-cli JAR: The CLI executable +# +# Docker Image: +# ------------- +# When should-build-image is true, builds and pushes a Presto Docker image +# tagged with version, type (RELEASE/BETA/DEV), timestamp, and commit hash. + +on: + workflow_call: + inputs: + builder-image: + description: 'Full builder image URI with tag' + required: true + type: string + java-version: + description: 'Java version to use for build (e.g., 8.0.442, 17.0.13)' + required: false + default: '8.0.442' + type: string + should-upload-artifacts: + description: 'Whether this build should upload artifacts' + required: false + default: true + type: boolean + runtime-version-tag: + description: 'Immutable runtime image tag (e.g., 0.293-BETA-20250529140509-484b00e)' + required: false + default: '' + type: string + runtime-snapshot-tag: + description: 'Mutable SNAPSHOT tag (e.g., 0.293-BETA-SNAPSHOT)' + required: false + default: '' + type: string + should-build-image: + description: 'Whether to build and push Docker image' + required: false + default: false + type: boolean + # Outputs allow the caller workflow to reference artifact names + outputs: + presto-server-artifact: + description: "Name of the presto-server artifact" + value: presto-server + presto-cli-artifact: + description: "Name of the presto-cli artifact" + value: presto-cli + +jobs: + # -------------------------------------------------------------------------- + # Build Job: Compile all Java modules + # -------------------------------------------------------------------------- + build: + name: "presto-build" + runs-on: self-hosted + timeout-minutes: 45 + container: + image: ${{ inputs.builder-image }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + env: + # Use the Maven cache pre-populated in the builder image + # This contains all dependencies pre-downloaded, avoiding network fetches + MAVEN_REPO: /opt/maven/repository + # JVM options for Maven: 2GB heap, crash immediately on OOM (don't hang) + MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" + steps: + - uses: actions/checkout@v4 + with: + show-progress: false + + - name: Setup Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: ${{ inputs.java-version }} + + # setup-java may override MAVEN_REPO, so explicitly set it after + - name: Configure Maven repository + run: | + echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV + + # Maven build with parallel compilation (-T 1C = 1 thread per CPU core) + # Key flags: + # - -DskipTests: Only compile, don't run tests (separate workflow handles tests) + # - -P ci: Activate the 'ci' profile (may have CI-specific settings in pom.xml) + # - -pl '!...': Exclude modules we don't need (test coverage, docs) + - name: Maven Build + run: | + export MAVEN_OPTS="${MAVEN_INSTALL_OPTS} -Dmaven.repo.local=${{ env.MAVEN_REPO }}" + ./mvnw install -B -V -T 1C -DskipTests -Dmaven.javadoc.skip=true --no-transfer-progress -P ci -pl '!presto-test-coverage,!:presto-docs' + + # Upload artifacts for other jobs to download + # Artifacts are stored by GitHub and available for the duration of the workflow run + # retention-days: 1 means they're deleted after 1 day to save storage + - name: "Upload presto-server" + if: inputs.should-upload-artifacts + uses: "actions/upload-artifact@v4" + with: + name: "presto-server" + path: "presto-server/target/presto-server-*.tar.gz" + if-no-files-found: "error" + retention-days: 1 + + - name: "Upload presto-cli" + if: inputs.should-upload-artifacts + uses: "actions/upload-artifact@v4" + with: + name: "presto-cli" + path: "presto-cli/target/presto-cli-*-executable.jar" + if-no-files-found: "error" + retention-days: 1 + + # -------------------------------------------------------------------------- + # Docker Image Job: Build and push Presto Docker image + # -------------------------------------------------------------------------- + # This job creates a Docker image containing the Presto coordinator. + # The image is pushed to GitHub Container Registry (ghcr.io). + # + # Image tagging strategy: + # - Full tag: 0.293-RELEASE-20250522140509-484b00e (version-type-timestamp-hash) + # - Snapshot tag: 0.293-RELEASE-SNAPSHOT (always points to latest build) + build-image: + name: "presto-image" + needs: build # Wait for build job to complete and upload artifacts + if: inputs.should-build-image && inputs.should-upload-artifacts + runs-on: self-hosted + steps: + # Sparse checkout: Only fetch the docker/ directory (not entire repo) + # This is faster when we only need specific files + - name: "Download Docker context files" + uses: actions/checkout@v4 + with: + sparse-checkout: | + docker + sparse-checkout-cone-mode: false + show-progress: false + + # Download artifacts from the build job + # These go into ./docker so they're available in the Docker build context + - name: "Download presto-server" + uses: actions/download-artifact@v4 + with: + name: presto-server + path: ./docker + + - name: "Download presto-cli" + uses: actions/download-artifact@v4 + with: + name: presto-cli + path: ./docker + + # Extract base version from artifact filename (needed for Docker build args) + - name: "Extract base version" + id: "extract-version" + run: | + VERSION=$(ls docker/presto-server-*.tar.gz | sed 's/.*presto-server-\(.*\)\.tar\.gz/\1/') + echo "base-version=${VERSION}" >> $GITHUB_OUTPUT + + # Login to GitHub Container Registry (ghcr.io) + # GITHUB_TOKEN is automatically provided by GitHub Actions + - name: "Login to image registry" + uses: "docker/login-action@v3" + with: + registry: "ghcr.io" + username: "${{github.actor}}" + password: "${{secrets.GITHUB_TOKEN}}" + + # docker/metadata-action generates Docker image tags and labels + - name: "Set up container image metadata" + id: "meta" + uses: "docker/metadata-action@v5" + with: + images: "ghcr.io/${{github.repository}}/presto" + # Two tags passed from config job: + # 1. runtime-version-tag: Immutable (e.g., 0.293-BETA-20250529140509-484b00e) + # 2. runtime-snapshot-tag: Mutable (e.g., 0.293-BETA-SNAPSHOT) + tags: | + type=raw,value=${{inputs.runtime-version-tag}} + type=raw,value=${{inputs.runtime-snapshot-tag}} + + # Build the Docker image and push to registry + # Note: push is disabled for pull_request events (security: PRs shouldn't push images) + - name: "Build and push" + uses: "docker/build-push-action@v6" + with: + build-args: |- + JMX_PROMETHEUS_JAVA_AGENT_VERSION=0.20.0 + PRESTO_VERSION=${{steps.extract-version.outputs.base-version}} + context: "./docker" + file: "./docker/Dockerfile" + push: ${{github.event_name != 'pull_request'}} + tags: "${{steps.meta.outputs.tags}}" + labels: "${{steps.meta.outputs.labels}}" diff --git a/.github/workflows/prestocpp-format-and-header-check.yml b/.github/workflows/prestocpp-format-and-header-check.yml index c554ee8785786..b89a7f25efd8d 100644 --- a/.github/workflows/prestocpp-format-and-header-check.yml +++ b/.github/workflows/prestocpp-format-and-header-check.yml @@ -14,9 +14,9 @@ on: concurrency: group: "${{github.workflow}}-${{github.ref}}" - # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so - # that each commit to release-0.293-clp-connector is checked completely. - cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}" + # Cancel in-progress jobs for efficiency. Exclude branches with `release-0.293-clp-connector-snapshot` prefix so + # that each commit to these branches is checked completely. + cancel-in-progress: "${{!startsWith(github.ref, 'refs/heads/release-0.293-clp-connector-snapshot')}}" jobs: prestocpp-format-and-header-check: diff --git a/.github/workflows/prestocpp-linux-build-and-unit-test.yml b/.github/workflows/prestocpp-linux-build-and-unit-test.yml index e26e330403ec2..23db1ec5720bb 100644 --- a/.github/workflows/prestocpp-linux-build-and-unit-test.yml +++ b/.github/workflows/prestocpp-linux-build-and-unit-test.yml @@ -1,59 +1,78 @@ name: prestocpp-linux-build-and-unit-test -on: - workflow_dispatch: - pull_request: - paths: - - 'presto-native-execution/**' - - 'presto-native-sidecar-plugin/**' - - '.github/workflows/prestocpp-linux-build-and-unit-test.yml' - push: - paths-ignore: - - 'presto-docs/**' - -concurrency: - group: "${{github.workflow}}-${{github.ref}}" +# ============================================================================== +# Prestocpp (C++ Native Worker) Build, Tests, and Image +# ============================================================================== +# This workflow builds the C++ native worker (prestocpp), runs unit tests, and +# builds the prestissimo runtime Docker image. +# +# What is Prestocpp/Prestissimo? +# ------------------------------ +# - Prestocpp: The C++ source code for the native Presto worker +# - Prestissimo: The runtime/deployment name for the compiled C++ worker +# +# Job Dependency Graph: +# --------------------- +# prestocpp-linux-build-and-test ─► prestissimo-image +# +# Build Strategy: +# --------------- +# Uses a pre-built Docker image ("builder image") with: +# - All C++ dependencies pre-installed (boost, folly, glog, etc.) +# - Pre-warmed ccache containing compiled object files from a previous build - # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so - # that each commit to release-0.293-clp-connector is checked completely. - cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}" +on: + workflow_call: + inputs: + builder-image: + description: 'Full builder image URI with tag' + required: true + type: string + runtime-version-tag: + description: 'Immutable runtime image tag (e.g., 0.293-BETA-20250529140509-484b00e)' + required: false + default: '' + type: string + runtime-snapshot-tag: + description: 'Mutable SNAPSHOT tag (e.g., 0.293-BETA-SNAPSHOT)' + required: false + default: '' + type: string + +# Note: concurrency is handled by the parent ci.yml workflow jobs: - prestocpp-linux-build-for-test: - runs-on: ubuntu-22.04 + # ---------------------------------------------------------------------------- + # Job 1: Build and Test Prestocpp + # ---------------------------------------------------------------------------- + # Builds the prestocpp binary, runs unit tests, and uploads artifacts. + prestocpp-linux-build-and-test: + name: "prestocpp-linux-build-and-test" + runs-on: self-hosted + timeout-minutes: 180 container: - image: prestodb/presto-native-dependency:0.293-20250522140509-484b00e + image: ${{ inputs.builder-image }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} env: - CCACHE_DIR: "${{ github.workspace }}/ccache" + CCACHE_DIR: /var/cache/ccache + CCACHE_BASEDIR: ${{ github.workspace }} steps: - uses: actions/checkout@v4 + with: + show-progress: false - name: Fix git permissions - # Usually actions/checkout does this but as we run in a container - # it doesn't work run: git config --global --add safe.directory ${GITHUB_WORKSPACE} - - name: Update velox + - name: Update velox submodule run: | cd presto-native-execution make velox-submodule - - name: Install Github CLI for using apache/infrastructure-actions/stash - run: | - curl -L https://github.com/cli/cli/releases/download/v2.63.2/gh_2.63.2_linux_amd64.rpm > gh_2.63.2_linux_amd64.rpm - rpm -iv gh_2.63.2_linux_amd64.rpm - - - uses: apache/infrastructure-actions/stash/restore@4ab8682fbd4623d2b4fc1c98db38aba5091924c3 - with: - path: '${{ env.CCACHE_DIR }}' - key: ccache-prestocpp-linux-build-for-test - - - name: Zero ccache statistics - run: ccache -sz - - name: Build engine run: | - source /opt/rh/gcc-toolset-12/enable cd presto-native-execution cmake \ -B _build/release \ @@ -72,243 +91,75 @@ jobs: -DMAX_LINK_JOBS=4 ninja -C _build/release -j $(getconf _NPROCESSORS_ONLN) - - name: Ccache after + - name: Show ccache statistics run: ccache -s - - uses: apache/infrastructure-actions/stash/save@4ab8682fbd4623d2b4fc1c98db38aba5091924c3 - with: - path: '${{ env.CCACHE_DIR }}' - key: ccache-prestocpp-linux-build-for-test - - - name: Run Unit Tests + - name: Run unit tests run: | - # Ensure transitive dependency libboost-iostreams is found. ldconfig /usr/local/lib cd presto-native-execution/_build/release ctest -j $(getconf _NPROCESSORS_ONLN) -VV --output-on-failure --exclude-regex velox.* + # Upload compiled binaries as artifacts for integration tests and image building - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: presto-native-build + retention-days: 1 path: | presto-native-execution/_build/release/presto_cpp/main/presto_server presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main - prestocpp-linux-presto-e2e-tests: - needs: prestocpp-linux-build-for-test - runs-on: ubuntu-22.04 - container: - image: prestodb/presto-native-dependency:0.293-20250522140509-484b00e - env: - MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError" - MAVEN_FAST_INSTALL: "-B -V --quiet -T 1C -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true" - MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end" + # ---------------------------------------------------------------------------- + # Job 2: Build Prestissimo Runtime Image + # ---------------------------------------------------------------------------- + # Downloads the pre-built binary and packages it into a minimal Docker image. + # The runtime image contains only the binary and runtime dependencies (no build tools). + prestissimo-image: + name: "prestissimo-image" + needs: prestocpp-linux-build-and-test + runs-on: self-hosted + timeout-minutes: 30 steps: - uses: actions/checkout@v4 - - - name: Fix git permissions - # Usually actions/checkout does this but as we run in a container - # it doesn't work - run: git config --global --add safe.directory ${GITHUB_WORKSPACE} - - - name: Download artifacts - uses: actions/download-artifact@v4 with: - name: presto-native-build - path: presto-native-execution/_build/release - - # Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38 - - name: Restore execute permissions and library path - run: | - chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server - chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main - # Ensure transitive dependency libboost-iostreams is found. - ldconfig /usr/local/lib - - - name: Install OpenJDK8 - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: 8.0.442 - cache: 'maven' - - name: Download nodejs to maven cache - run: .github/bin/download_nodejs - - - name: Maven install - env: - # Use different Maven options to install. - MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" - run: | - for i in $(seq 1 3); do ./mvnw clean install $MAVEN_FAST_INSTALL -pl 'presto-native-execution' -am && s=0 && break || s=$? && sleep 10; done; (exit $s) - - - name: Run presto-native e2e tests - run: | - export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server" - export TESTFILES=`find ./presto-native-execution/src/test -type f -name 'TestPrestoNative*.java'` - # Convert file paths to comma separated class names - export TESTCLASSES= - for test_file in $TESTFILES - do - tmp=${test_file##*/} - test_class=${tmp%%\.*} - export TESTCLASSES="${TESTCLASSES},$test_class" - done - export TESTCLASSES=${TESTCLASSES#,} - echo "TESTCLASSES = $TESTCLASSES" - # TODO: neeed to enable remote function tests with - # "-Ppresto-native-execution-remote-functions" once - # > https://github.com/facebookincubator/velox/discussions/6163 - # is fixed. - - mvn test \ - ${MAVEN_TEST} \ - -pl 'presto-native-execution' \ - -Dtest="${TESTCLASSES}" \ - -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \ - -DDATA_DIR=${RUNNER_TEMP} \ - -Duser.timezone=America/Bahia_Banderas \ - -T1C + show-progress: false - prestocpp-linux-presto-native-tests: - needs: prestocpp-linux-build-for-test - runs-on: ubuntu-22.04 - strategy: - fail-fast: false - matrix: - storage-format: [ "PARQUET", "DWRF" ] - container: - image: prestodb/presto-native-dependency:0.293-20250522140509-484b00e - env: - MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError" - MAVEN_FAST_INSTALL: "-B -V --quiet -T 1C -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true" - MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end" - steps: - - uses: actions/checkout@v4 - - - name: Fix git permissions - # Usually actions/checkout does this but as we run in a container - # it doesn't work - run: git config --global --add safe.directory ${GITHUB_WORKSPACE} - - - name: Download artifacts + - name: Download build artifacts uses: actions/download-artifact@v4 with: name: presto-native-build path: presto-native-execution/_build/release - # Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38 - - name: Restore execute permissions and library path + - name: Restore execute permissions run: | - chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server - chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main - # Ensure transitive dependency libboost-iostreams is found. - ldconfig /usr/local/lib + chmod +x presto-native-execution/_build/release/presto_cpp/main/presto_server + chmod +x presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main || true + ls -lh presto-native-execution/_build/release/presto_cpp/main/presto_server - - name: Install OpenJDK8 - uses: actions/setup-java@v4 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 with: - distribution: 'temurin' - java-version: '8.0.442' - cache: 'maven' - - name: Download nodejs to maven cache - run: .github/bin/download_nodejs - - - name: Maven install - env: - # Use different Maven options to install. - MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" - run: | - for i in $(seq 1 3); do ./mvnw clean install $MAVEN_FAST_INSTALL -pl 'presto-native-tests' -am && s=0 && break || s=$? && sleep 10; done; (exit $s) - - - name: Run presto-native tests - run: | - export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server" - export TESTFILES=`find ./presto-native-tests/src/test -type f -name 'Test*.java'` - # Convert file paths to comma separated class names - export TESTCLASSES= - for test_file in $TESTFILES - do - tmp=${test_file##*/} - test_class=${tmp%%\.*} - export TESTCLASSES="${TESTCLASSES},$test_class" - done - export TESTCLASSES=${TESTCLASSES#,} - echo "TESTCLASSES = $TESTCLASSES" - - mvn test \ - ${MAVEN_TEST} \ - -pl 'presto-native-tests' \ - -DstorageFormat=${{ matrix.storage-format }} \ - -Dtest="${TESTCLASSES}" \ - -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \ - -DDATA_DIR=${RUNNER_TEMP} \ - -Duser.timezone=America/Bahia_Banderas \ - -T1C - - prestocpp-linux-presto-sidecar-tests: - needs: prestocpp-linux-build-for-test - runs-on: ubuntu-22.04 - container: - image: prestodb/presto-native-dependency:0.293-20250522140509-484b00e - env: - MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError" - MAVEN_FAST_INSTALL: "-B -V --quiet -T 1C -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true" - MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end" - steps: - - uses: actions/checkout@v4 - - name: Fix git permissions - # Usually actions/checkout does this but as we run in a container - # it doesn't work - run: git config --global --add safe.directory ${GITHUB_WORKSPACE} + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - name: Download artifacts - uses: actions/download-artifact@v4 - with: - name: presto-native-build - path: presto-native-execution/_build/release + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 - # Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38 - - name: Restore execute permissions and library path - run: | - chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server - chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main - # Ensure transitive dependency libboost-iostreams is found. - ldconfig /usr/local/lib - - name: Install OpenJDK8 - uses: actions/setup-java@v4 + - name: Build and push prestissimo runtime image + uses: docker/build-push-action@v6 with: - distribution: 'temurin' - java-version: '8.0.442' - cache: 'maven' - - name: Download nodejs to maven cache - run: .github/bin/download_nodejs - - - name: Maven install - env: - # Use different Maven options to install. - MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" - run: | - for i in $(seq 1 3); do ./mvnw clean install $MAVEN_FAST_INSTALL -pl 'presto-native-execution' -am && s=0 && break || s=$? && sleep 10; done; (exit $s) - - name: Run presto-native sidecar tests - run: | - export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server" - export TESTFILES=`find ./presto-native-sidecar-plugin/src/test -type f -name 'Test*.java'` - # Convert file paths to comma separated class names - export TESTCLASSES= - for test_file in $TESTFILES - do - tmp=${test_file##*/} - test_class=${tmp%%\.*} - export TESTCLASSES="${TESTCLASSES},$test_class" - done - export TESTCLASSES=${TESTCLASSES#,} - echo "TESTCLASSES = $TESTCLASSES" - mvn test \ - ${MAVEN_TEST} \ - -pl 'presto-native-sidecar-plugin' \ - -Dtest="${TESTCLASSES}" \ - -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \ - -DDATA_DIR=${RUNNER_TEMP} \ - -Duser.timezone=America/Bahia_Banderas \ - -T1C + context: . + file: ./presto-native-execution/scripts/dockerfiles/prestissimo-runtime.dockerfile + build-args: | + BUILDER_IMAGE=${{ inputs.builder-image }} + push: ${{ github.event_name != 'pull_request' }} + # Two tags passed from config job: + # 1. runtime-version-tag: Immutable (e.g., 0.293-BETA-20250529140509-484b00e) + # 2. runtime-snapshot-tag: Mutable (e.g., 0.293-BETA-SNAPSHOT) + tags: | + ghcr.io/${{ github.repository }}/prestissimo:${{ inputs.runtime-version-tag }} + ghcr.io/${{ github.repository }}/prestissimo:${{ inputs.runtime-snapshot-tag }} + labels: | + org.opencontainers.image.source=https://github.com/${{ github.repository }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6f829502fbbb8..ef33819365825 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,52 +1,46 @@ -name: test +name: tests -on: - pull_request: - push: - -env: - # An envar that signals to tests we are executing in the CI environment - CONTINUOUS_INTEGRATION: true - MAVEN_OPTS: "-Xmx1024M -XX:+ExitOnOutOfMemoryError" - MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" - MAVEN_FAST_INSTALL: "-B -V --quiet -T 1C -DskipTests -Dair.check.skip-all --no-transfer-progress -Dmaven.javadoc.skip=true" - MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --no-transfer-progress --fail-at-end" - RETRY: .github/bin/retry - -concurrency: - group: "${{github.workflow}}-${{github.ref}}" +# ============================================================================== +# Presto Java Unit Tests +# ============================================================================== +# Runs unit tests for the Java components of Presto using a matrix strategy. +# +# Matrix Strategy Explained: +# -------------------------- +# GitHub Actions "matrix" runs the same job multiple times with different parameters. +# This workflow creates a matrix of: +# - java: ['8.0.442', '17.0.13'] (2 versions) +# - modules: 11 different test modules/profiles +# +# This results in 2 × 11 = 22 parallel test jobs! Each combination runs independently. +# +# Test Modules: +# ------------- +# Tests are split by Maven profile (-P flag) to parallelize and categorize: +# - presto-tests-execution-memory: Memory management tests +# - presto-tests-general: General functionality tests +# - ci-only-*: Test profiles that only run in CI (too slow/resource-intensive for local) +# - presto-main-base, presto-main: Core module tests +# +# fail-fast: false means if one matrix job fails, others continue running. +# This helps identify all failing tests rather than stopping at the first failure. - # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so - # that each commit to release-0.293-clp-connector is checked completely. - cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}" +on: + workflow_call: + inputs: + builder-image: + description: 'Full builder image URI with tag' + required: true + type: string jobs: - changes: - runs-on: ubuntu-latest - # Required permissions - permissions: - pull-requests: read - # Set job outputs to values from filter step - outputs: - codechange: ${{ steps.filter.outputs.codechange }} - steps: - - uses: "actions/checkout@v4" - with: - submodules: "recursive" - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: | - codechange: - - '!presto-docs/**' - test: - runs-on: ubuntu-latest - needs: changes + name: "presto-tests" + runs-on: self-hosted strategy: - fail-fast: false + fail-fast: false # Continue other jobs even if one fails matrix: - java: [8.0.442, 17.0.13] + java: ['8.0.442', '17.0.13'] modules: - ":presto-tests -P presto-tests-execution-memory" - ":presto-tests -P presto-tests-general" @@ -56,29 +50,50 @@ jobs: - ":presto-tests -P ci-only-distributed-queries" - ":presto-tests -P ci-only-aggregation-queries" - ":presto-tests -P ci-only-plan-determinism" - - ":presto-tests -P ci-only-resource-manager" + - ":presto-tests -P ci-only-resource-manager" - ":presto-main-base" - ":presto-main" timeout-minutes: 80 + container: + image: ${{ inputs.builder-image }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + env: + # Maven test flags: + # - -Dair.check.skip-all: Skip static analysis checks (already done in build) + # - -DLogTestDurationListener.enabled=true: Log how long each test takes + # - --fail-at-end: Run all tests even if some fail, report failures at end + MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --no-transfer-progress --fail-at-end" + MAVEN_REPO: /opt/maven/repository + MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError" steps: - uses: actions/checkout@v4 - if: needs.changes.outputs.codechange == 'true' with: show-progress: false - - uses: actions/setup-java@v4 - if: needs.changes.outputs.codechange == 'true' + + # Setup Java version from the matrix (runs twice: once for Java 8, once for Java 17) + - name: Setup Java + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: ${{ matrix.java }} - cache: 'maven' - - name: Download nodejs to maven cache - if: needs.changes.outputs.codechange == 'true' - run: .github/bin/download_nodejs + + # setup-java may override MAVEN_REPO, so explicitly set it after + - name: Configure Maven repository + run: | + echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV + + # First, compile the test module and its dependencies (-am = also make dependencies) + # The cut command extracts just the module name (before any -P profile flags) + # Example: ":presto-tests -P presto-tests-general" -> ":presto-tests" - name: Maven Install - if: needs.changes.outputs.codechange == 'true' run: | export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}" - ./mvnw install ${MAVEN_FAST_INSTALL} -am -pl $(echo '${{ matrix.modules }}' | cut -d' ' -f1) + ./mvnw install -B -V --quiet -T 1C -DskipTests -Dair.check.skip-all --no-transfer-progress -Dmaven.javadoc.skip=true -Dmaven.repo.local=${{ env.MAVEN_REPO }} -am -pl $(echo '${{ matrix.modules }}' | cut -d' ' -f1) + + # Run the actual tests for this matrix combination + # -pl specifies both the module and the profile (e.g., ":presto-tests -P ci-only-local-queries") - name: Maven Tests - if: needs.changes.outputs.codechange == 'true' - run: ./mvnw test ${MAVEN_TEST} -pl ${{ matrix.modules }} + run: | + ./mvnw test ${MAVEN_TEST} -Dmaven.repo.local=${{ env.MAVEN_REPO }} -pl ${{ matrix.modules }} diff --git a/pom.xml b/pom.xml index 57dc0439a8401..3b1f4d7554789 100644 --- a/pom.xml +++ b/pom.xml @@ -55,7 +55,7 @@ 1.43 - 2.12.7 + 2.13.1 1.54 7.5 8.11.3 @@ -93,7 +93,7 @@ + + + diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java index 121eb0d5ff17b..21366e90db156 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java @@ -33,6 +33,8 @@ public class ClpConfig private long metadataRefreshInterval = 60; private long metadataExpireInterval = 600; + private String metadataYamlPath; + private String splitFilterConfig; private SplitFilterProviderType splitFilterProviderType = SplitFilterProviderType.MYSQL; private SplitProviderType splitProviderType = SplitProviderType.MYSQL; @@ -151,6 +153,18 @@ public ClpConfig setMetadataExpireInterval(long metadataExpireInterval) return this; } + public String getMetadataYamlPath() + { + return metadataYamlPath; + } + + @Config("clp.metadata-yaml-path") + public ClpConfig setMetadataYamlPath(String metadataYamlPath) + { + this.metadataYamlPath = metadataYamlPath; + return this; + } + public String getSplitFilterConfig() { return splitFilterConfig; @@ -189,16 +203,21 @@ public ClpConfig setSplitProviderType(SplitProviderType splitProviderType) public enum MetadataProviderType { - MYSQL + MYSQL, + YAML } public enum SplitFilterProviderType { - MYSQL + MYSQL, + PINOT, + PINOT_UBER } public enum SplitProviderType { - MYSQL + MYSQL, + PINOT, + PINOT_UBER } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java index 2530c013455cc..fb6626de25a61 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java @@ -28,6 +28,7 @@ public enum ClpErrorCode CLP_UNSUPPORTED_SPLIT_SOURCE(2, EXTERNAL), CLP_UNSUPPORTED_TYPE(3, EXTERNAL), CLP_UNSUPPORTED_CONFIG_OPTION(4, EXTERNAL), + CLP_UNSUPPORTED_TABLE_SCHEMA_YAML(5, EXTERNAL), CLP_SPLIT_FILTER_CONFIG_NOT_FOUND(10, USER_ERROR), CLP_MANDATORY_SPLIT_FILTER_NOT_VALID(11, USER_ERROR), diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java index 1f9962a3456d9..a63b7dc3f9770 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java @@ -81,7 +81,7 @@ public ClpMetadata(ClpConfig clpConfig, ClpMetadataProvider clpMetadataProvider) @Override public List listSchemaNames(ConnectorSession session) { - return ImmutableList.of(DEFAULT_SCHEMA_NAME); + return clpMetadataProvider.listSchemaNames(); } @Override diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java index bf801d0d87242..0ed988d16a76c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java @@ -16,14 +16,22 @@ import com.facebook.airlift.configuration.AbstractConfigurationAwareModule; import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider; +import com.facebook.presto.plugin.clp.metadata.ClpYamlMetadataProvider; import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpPinotSplitProvider; import com.facebook.presto.plugin.clp.split.ClpSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpUberPinotSplitProvider; import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.filter.ClpPinotSplitFilterProvider; import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.filter.ClpUberPinotSplitFilterProvider; import com.facebook.presto.spi.PrestoException; +import com.google.common.collect.ImmutableMap; import com.google.inject.Binder; import com.google.inject.Scopes; +import java.util.Map; + import static com.facebook.airlift.configuration.ConfigBinder.configBinder; import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType; import static com.facebook.presto.plugin.clp.ClpConfig.SplitFilterProviderType; @@ -35,6 +43,27 @@ public class ClpModule extends AbstractConfigurationAwareModule { + // Provider mappings for cleaner configuration binding + private static final Map> SPLIT_FILTER_PROVIDER_MAPPINGS = + ImmutableMap.>builder() + .put(SplitFilterProviderType.MYSQL, ClpMySqlSplitFilterProvider.class) + .put(SplitFilterProviderType.PINOT, ClpPinotSplitFilterProvider.class) + .put(SplitFilterProviderType.PINOT_UBER, ClpUberPinotSplitFilterProvider.class) + .build(); + + private static final Map> METADATA_PROVIDER_MAPPINGS = + ImmutableMap.>builder() + .put(MetadataProviderType.MYSQL, ClpMySqlMetadataProvider.class) + .put(MetadataProviderType.YAML, ClpYamlMetadataProvider.class) + .build(); + + private static final Map> SPLIT_PROVIDER_MAPPINGS = + ImmutableMap.>builder() + .put(SplitProviderType.MYSQL, ClpMySqlSplitProvider.class) + .put(SplitProviderType.PINOT, ClpPinotSplitProvider.class) + .put(SplitProviderType.PINOT_UBER, ClpUberPinotSplitProvider.class) + .build(); + @Override protected void setup(Binder binder) { @@ -46,25 +75,31 @@ protected void setup(Binder binder) ClpConfig config = buildConfigObject(ClpConfig.class); - if (SplitFilterProviderType.MYSQL == config.getSplitFilterProviderType()) { - binder.bind(ClpSplitFilterProvider.class).to(ClpMySqlSplitFilterProvider.class).in(Scopes.SINGLETON); - } - else { - throw new PrestoException(CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE, "Unsupported split filter provider type: " + config.getSplitFilterProviderType()); + // Bind split filter provider + Class splitFilterProviderClass = + SPLIT_FILTER_PROVIDER_MAPPINGS.get(config.getSplitFilterProviderType()); + if (splitFilterProviderClass == null) { + throw new PrestoException(CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE, + "Unsupported split filter provider type: " + config.getSplitFilterProviderType()); } + binder.bind(ClpSplitFilterProvider.class).to(splitFilterProviderClass).in(Scopes.SINGLETON); - if (config.getMetadataProviderType() == MetadataProviderType.MYSQL) { - binder.bind(ClpMetadataProvider.class).to(ClpMySqlMetadataProvider.class).in(Scopes.SINGLETON); - } - else { - throw new PrestoException(CLP_UNSUPPORTED_METADATA_SOURCE, "Unsupported metadata provider type: " + config.getMetadataProviderType()); + // Bind metadata provider + Class metadataProviderClass = + METADATA_PROVIDER_MAPPINGS.get(config.getMetadataProviderType()); + if (metadataProviderClass == null) { + throw new PrestoException(CLP_UNSUPPORTED_METADATA_SOURCE, + "Unsupported metadata provider type: " + config.getMetadataProviderType()); } + binder.bind(ClpMetadataProvider.class).to(metadataProviderClass).in(Scopes.SINGLETON); - if (config.getSplitProviderType() == SplitProviderType.MYSQL) { - binder.bind(ClpSplitProvider.class).to(ClpMySqlSplitProvider.class).in(Scopes.SINGLETON); - } - else { - throw new PrestoException(CLP_UNSUPPORTED_SPLIT_SOURCE, "Unsupported split provider type: " + config.getSplitProviderType()); + // Bind split provider + Class splitProviderClass = + SPLIT_PROVIDER_MAPPINGS.get(config.getSplitProviderType()); + if (splitProviderClass == null) { + throw new PrestoException(CLP_UNSUPPORTED_SPLIT_SOURCE, + "Unsupported split provider type: " + config.getSplitProviderType()); } + binder.bind(ClpSplitProvider.class).to(splitProviderClass).in(Scopes.SINGLETON); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java index 2e35840971c11..7b2d42bb0635d 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import static com.facebook.presto.spi.schedule.NodeSelectionStrategy.NO_PREFERENCE; @@ -77,6 +78,25 @@ public List getPreferredNodes(NodeProvider nodeProvider) return ImmutableList.of(); } + @Override + public int hashCode() + { + return Objects.hash(path, type, kqlQuery); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ClpSplit other = (ClpSplit) obj; + return this.type == other.type && this.path.equals(other.path) && this.kqlQuery.equals(other.kqlQuery); + } + @Override public Map getInfo() { diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java index b82932f0c30fd..902c9dfe37176 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.plugin.clp; +import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec; import com.facebook.presto.spi.ConnectorTableLayoutHandle; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -28,13 +29,34 @@ public class ClpTableLayoutHandle private final ClpTableHandle table; private final Optional kqlQuery; private final Optional metadataSql; + private final boolean metadataQueryOnly; + private final Optional topN; @JsonCreator - public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, @JsonProperty("kqlQuery") Optional kqlQuery, @JsonProperty("metadataFilterQuery") Optional metadataSql) + public ClpTableLayoutHandle( + @JsonProperty("table") ClpTableHandle table, + @JsonProperty("kqlQuery") Optional kqlQuery, + @JsonProperty("metadataFilterQuery") Optional metadataSql, + @JsonProperty("metadataQueryOnly") boolean metadataQueryOnly, + @JsonProperty("topN") Optional topN) { this.table = table; this.kqlQuery = kqlQuery; this.metadataSql = metadataSql; + this.metadataQueryOnly = metadataQueryOnly; + this.topN = topN; + } + + public ClpTableLayoutHandle( + @JsonProperty("table") ClpTableHandle table, + @JsonProperty("kqlQuery") Optional kqlQuery, + @JsonProperty("metadataFilterQuery") Optional metadataSql) + { + this.table = table; + this.kqlQuery = kqlQuery; + this.metadataSql = metadataSql; + this.metadataQueryOnly = false; + this.topN = Optional.empty(); } @JsonProperty @@ -55,6 +77,18 @@ public Optional getMetadataSql() return metadataSql; } + @JsonProperty + public boolean isMetadataQueryOnly() + { + return metadataQueryOnly; + } + + @JsonProperty + public Optional getTopN() + { + return topN; + } + @Override public boolean equals(Object o) { @@ -67,13 +101,15 @@ public boolean equals(Object o) ClpTableLayoutHandle that = (ClpTableLayoutHandle) o; return Objects.equals(table, that.table) && Objects.equals(kqlQuery, that.kqlQuery) && - Objects.equals(metadataSql, that.metadataSql); + Objects.equals(metadataSql, that.metadataSql) && + Objects.equals(metadataQueryOnly, that.metadataQueryOnly) && + Objects.equals(topN, that.topN); } @Override public int hashCode() { - return Objects.hash(table, kqlQuery, metadataSql); + return Objects.hash(table, kqlQuery, metadataSql, metadataQueryOnly, topN); } @Override @@ -83,6 +119,8 @@ public String toString() .add("table", table) .add("kqlQuery", kqlQuery) .add("metadataSql", metadataSql) + .add("metadataQueryOnly", metadataQueryOnly) + .add("topN", topN) .toString(); } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java index 33e4b748a30d4..28f8494de4a7b 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java @@ -16,15 +16,32 @@ import com.facebook.presto.plugin.clp.ClpColumnHandle; import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.spi.SchemaTableName; +import com.google.common.collect.ImmutableList; import java.util.List; +import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; + /** * A provider for metadata that describes what tables exist in the CLP connector, and what columns * exist in each of those tables. */ public interface ClpMetadataProvider { + /** + * Returns the list of schema names available in this connector. + *

+ * The default implementation returns only the default schema. Implementations can override + * this method to support multiple schemas by querying their metadata source (e.g., YAML file + * or database) to discover available schemas. + * + * @return the list of schema names available in this connector + */ + default List listSchemaNames() + { + return ImmutableList.of(DEFAULT_SCHEMA_NAME); + } + /** * @param schemaTableName the name of the schema and the table * @return the list of column handles for the given table. diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpYamlMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpYamlMetadataProvider.java new file mode 100644 index 0000000000000..2c455291d9aba --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpYamlMetadataProvider.java @@ -0,0 +1,277 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.metadata; + +import com.facebook.airlift.log.Logger; +import com.facebook.presto.plugin.clp.ClpColumnHandle; +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.ClpTableHandle; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +import javax.inject.Inject; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; +import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_TABLE_SCHEMA_YAML; +import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; +import static java.lang.String.format; + +public class ClpYamlMetadataProvider + implements ClpMetadataProvider +{ + private static final Logger log = Logger.get(ClpYamlMetadataProvider.class); + private final ClpConfig config; + private final ObjectMapper yamlMapper; + + // Thread-safe cache for schema names to avoid repeated file parsing + private volatile List cachedSchemaNames; + + // Thread-safe cache for table schema mappings per schema + // Outer map: schema name -> inner map + // Inner map: table name -> YAML schema file path + private final Map> tableSchemaYamlMapPerSchema = new HashMap<>(); + + @Inject + public ClpYamlMetadataProvider(ClpConfig config) + { + this.config = config; + // Reuse ObjectMapper instance for better performance + this.yamlMapper = new ObjectMapper(new YAMLFactory()); + } + + @Override + public List listSchemaNames() + { + // Use cached result if available to improve performance + List cached = cachedSchemaNames; + if (cached != null) { + return cached; + } + + // Double-checked locking for thread-safe lazy initialization + synchronized (this) { + // Check again inside synchronized block + cached = cachedSchemaNames; + if (cached != null) { + return cached; + } + + // Check if YAML path is configured + // If not configured, fall back to default schema for backward compatibility + if (config.getMetadataYamlPath() == null) { + log.warn("Metadata YAML path not configured, returning default schema only"); + cachedSchemaNames = ImmutableList.of(DEFAULT_SCHEMA_NAME); + return cachedSchemaNames; + } + + // Prepare to parse the YAML metadata file + Path tablesSchemaPath = Paths.get(config.getMetadataYamlPath()); + + try { + // Parse the YAML file into a nested Map structure + // Expected structure: + // clp: + // default: + // table1: /path/to/schema1.yaml + // dev: + // table2: /path/to/schema2.yaml + Map root = yamlMapper.readValue( + new File(tablesSchemaPath.toString()), + new TypeReference>() {}); + + // Extract the catalog object (e.g., "clp") + // This contains all schemas as keys + Object catalogObj = root.get(CONNECTOR_NAME); + if (!(catalogObj instanceof Map)) { + // Log error and fall back to default schema for graceful degradation + log.error("The metadata YAML does not contain valid catalog field: %s, returning default schema only", CONNECTOR_NAME); + List defaultSchema = ImmutableList.of(DEFAULT_SCHEMA_NAME); + cachedSchemaNames = defaultSchema; + return defaultSchema; + } + + // Extract schema names from the catalog Map + // Each key represents a schema name (e.g., "default", "dev", "prod") + Map catalogMap = (Map) catalogObj; + List schemas = ImmutableList.copyOf(catalogMap.keySet()); + log.info("Discovered %d schema(s) from YAML metadata: %s", schemas.size(), schemas); + + // Cache the result for future calls + cachedSchemaNames = schemas; + return schemas; + } + catch (IOException e) { + // If YAML parsing fails (file not found, malformed, etc.), fall back to default schema + // This ensures the connector still works even with configuration errors + log.error(e, "Failed to parse metadata YAML file: %s, returning default schema only", config.getMetadataYamlPath()); + List defaultSchema = ImmutableList.of(DEFAULT_SCHEMA_NAME); + cachedSchemaNames = defaultSchema; + return defaultSchema; + } + } + } + + @Override + public List listColumnHandles(SchemaTableName schemaTableName) + { + String schemaName = schemaTableName.getSchemaName(); + String tableName = schemaTableName.getTableName(); + + // Get the schema-specific map + Map tablesInSchema; + synchronized (tableSchemaYamlMapPerSchema) { + tablesInSchema = tableSchemaYamlMapPerSchema.get(schemaName); + } + + if (tablesInSchema == null) { + log.error("No tables loaded for schema: %s", schemaName); + return Collections.emptyList(); + } + + String schemaPath = tablesInSchema.get(tableName); + if (schemaPath == null) { + log.error("No schema path found for table: %s.%s", schemaName, tableName); + return Collections.emptyList(); + } + + Path tableSchemaPath = Paths.get(schemaPath); + ClpSchemaTree schemaTree = new ClpSchemaTree(config.isPolymorphicTypeEnabled()); + + try { + // Use the shared yamlMapper for better performance + Map root = yamlMapper.readValue( + new File(tableSchemaPath.toString()), + new TypeReference>() {}); + ImmutableList.Builder namesBuilder = ImmutableList.builder(); + ImmutableList.Builder typesBuilder = ImmutableList.builder(); + collectTypes(root, "", namesBuilder, typesBuilder); + ImmutableList names = namesBuilder.build(); + ImmutableList types = typesBuilder.build(); + // The names and types should have same sizes + for (int i = 0; i < names.size(); i++) { + schemaTree.addColumn(names.get(i), types.get(i)); + } + return schemaTree.collectColumnHandles(); + } + catch (IOException e) { + log.error(format("Failed to parse table schema file %s, error: %s", tableSchemaPath, e.getMessage()), e); + } + return Collections.emptyList(); + } + + @Override + public List listTableHandles(String schemaName) + { + // Check if YAML path is configured + if (config.getMetadataYamlPath() == null) { + log.warn("Metadata YAML path not configured"); + return Collections.emptyList(); + } + + Path tablesSchemaPath = Paths.get(config.getMetadataYamlPath()); + + try { + // Use the shared yamlMapper for better performance + Map root = yamlMapper.readValue(new File(tablesSchemaPath.toString()), + new TypeReference>() {}); + + Object catalogObj = root.get(CONNECTOR_NAME); + if (!(catalogObj instanceof Map)) { + throw new PrestoException(CLP_UNSUPPORTED_TABLE_SCHEMA_YAML, format("The table schema does not contain field: %s", CONNECTOR_NAME)); + } + + Object schemaObj = ((Map) catalogObj).get(schemaName); + if (schemaObj == null) { + log.warn("Schema '%s' not found in metadata YAML", schemaName); + return Collections.emptyList(); + } + + if (!(schemaObj instanceof Map)) { + log.error("Schema '%s' is not a valid map structure", schemaName); + return Collections.emptyList(); + } + + ImmutableList.Builder tableHandlesBuilder = new ImmutableList.Builder<>(); + ImmutableMap.Builder tableToYamlPathBuilder = new ImmutableMap.Builder<>(); + + for (Map.Entry schemaEntry : ((Map) schemaObj).entrySet()) { + String tableName = schemaEntry.getKey(); + String tableSchemaYamlPath = schemaEntry.getValue().toString(); + + // Resolve relative paths relative to the directory containing tables-schema.yaml + Path resolvedPath = Paths.get(tableSchemaYamlPath); + if (!resolvedPath.isAbsolute()) { + // If relative, resolve it relative to the parent directory of tables-schema.yaml + Path parentDir = tablesSchemaPath.getParent(); + if (parentDir != null) { + resolvedPath = parentDir.resolve(tableSchemaYamlPath).normalize(); + } + } + + // The splits' absolute paths will be stored in Pinot metadata database + SchemaTableName schemaTableName = new SchemaTableName(schemaName, tableName); + tableHandlesBuilder.add(new ClpTableHandle(schemaTableName, "")); + tableToYamlPathBuilder.put(tableName, resolvedPath.toString()); + } + + // Thread-safe update of the schema-specific table map + synchronized (tableSchemaYamlMapPerSchema) { + tableSchemaYamlMapPerSchema.put(schemaName, tableToYamlPathBuilder.build()); + } + + return tableHandlesBuilder.build(); + } + catch (IOException e) { + log.error(format("Failed to parse metadata file: %s, error: %s", config.getMetadataYamlPath(), e.getMessage()), e); + } + return Collections.emptyList(); + } + + private void collectTypes(Object node, String prefix, ImmutableList.Builder namesBuilder, ImmutableList.Builder typesBuilder) + { + if (node instanceof Number) { + namesBuilder.add(prefix); + typesBuilder.add(((Number) node).byteValue()); + return; + } + if (node instanceof List) { + for (Number type : (List) node) { + namesBuilder.add(prefix); + typesBuilder.add(type.byteValue()); + } + return; + } + for (Map.Entry entry : ((Map) node).entrySet()) { + if (!prefix.isEmpty()) { + collectTypes(entry.getValue(), format("%s.%s", prefix, entry.getKey()), namesBuilder, typesBuilder); + continue; + } + collectTypes(entry.getValue(), entry.getKey(), namesBuilder, typesBuilder); + } + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java index 2c216614af10f..c86ea0dbe4d7e 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java @@ -14,7 +14,10 @@ package com.facebook.presto.plugin.clp.optimization; import com.facebook.airlift.log.Logger; -import com.facebook.presto.plugin.clp.ClpExpression; +import com.facebook.presto.common.block.SortOrder; +import com.facebook.presto.common.type.RowType; +import com.facebook.presto.plugin.clp.ClpColumnHandle; +import com.facebook.presto.plugin.clp.ClpMetadata; import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; @@ -22,25 +25,38 @@ import com.facebook.presto.spi.ConnectorPlanOptimizer; import com.facebook.presto.spi.ConnectorPlanRewriter; import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.ConnectorTableLayoutHandle; import com.facebook.presto.spi.TableHandle; import com.facebook.presto.spi.VariableAllocator; import com.facebook.presto.spi.function.FunctionMetadataManager; import com.facebook.presto.spi.function.StandardFunctionResolution; import com.facebook.presto.spi.plan.FilterNode; +import com.facebook.presto.spi.plan.Ordering; import com.facebook.presto.spi.plan.PlanNode; import com.facebook.presto.spi.plan.PlanNodeIdAllocator; +import com.facebook.presto.spi.plan.ProjectNode; import com.facebook.presto.spi.plan.TableScanNode; +import com.facebook.presto.spi.plan.TopNNode; +import com.facebook.presto.spi.relation.ConstantExpression; import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.SpecialFormExpression; import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME; import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith; +import static java.lang.Math.toIntExact; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -67,7 +83,7 @@ public PlanNode optimize(PlanNode maxSubplan, ConnectorSession session, Variable // Throw exception if any required split filters are missing if (!rewriter.tableScopeSet.isEmpty() && !rewriter.hasVisitedFilter) { - splitFilterProvider.checkContainsRequiredFilters(rewriter.tableScopeSet, ""); + splitFilterProvider.checkContainsRequiredFilters(rewriter.tableScopeSet, ImmutableSet.of()); } return optimizedPlanNode; } @@ -105,6 +121,156 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context) return processFilter(node, (TableScanNode) node.getSource()); } + @Override + public PlanNode visitTopN(TopNNode node, RewriteContext context) + { + PlanNode rewrittenSource = context.rewrite(node.getSource(), null); + + ProjectNode project = null; + FilterNode filter = null; + PlanNode cursor = rewrittenSource; + + if (cursor instanceof ProjectNode) { + project = (ProjectNode) cursor; + cursor = project.getSource(); + } + if (cursor instanceof FilterNode) { + filter = (FilterNode) cursor; + cursor = filter.getSource(); + } + if (!(cursor instanceof TableScanNode)) { + return node.replaceChildren(ImmutableList.of(rewrittenSource)); + } + + TableScanNode scan = (TableScanNode) cursor; + TableHandle tableHandle = scan.getTable(); + if (!(tableHandle.getConnectorHandle() instanceof ClpTableHandle)) { + return node.replaceChildren(ImmutableList.of(rewrittenSource)); + } + + // only allow TopN pushdown when metadata-only is true + boolean metadataOnly = false; + Optional layout = tableHandle.getLayout(); + Optional kql = Optional.empty(); + Optional metadataSql = Optional.empty(); + Optional existingTopN = Optional.empty(); + ClpTableHandle clpTableHandle = null; + + if (layout.isPresent() && layout.get() instanceof ClpTableLayoutHandle) { + ClpTableLayoutHandle cl = (ClpTableLayoutHandle) layout.get(); + metadataOnly = cl.isMetadataQueryOnly(); + kql = cl.getKqlQuery(); + metadataSql = cl.getMetadataSql(); + existingTopN = cl.getTopN(); + clpTableHandle = cl.getTable(); + } + + if (!metadataOnly) { + // Rule: skip TopN pushdown unless metadataQueryOnly is true + return node.replaceChildren(ImmutableList.of(rewrittenSource)); + } + + // Ensure ORDER BY items are plain variables (allow identity through Project) + List ords = node.getOrderingScheme().getOrderBy(); + if (project != null && !areIdents(project, ords)) { + return node.replaceChildren(ImmutableList.of(rewrittenSource)); + } + + Map assignments = scan.getAssignments(); + List newOrderings = new ArrayList<>(ords.size()); + for (Ordering ord : ords) { + VariableReferenceExpression outVar = ord.getVariable(); + Optional columnNameOpt = buildOrderColumnName(project, outVar, assignments); + if (!columnNameOpt.isPresent()) { + return node.replaceChildren(ImmutableList.of(rewrittenSource)); + } + + String tableScope = CONNECTOR_NAME + "." + (clpTableHandle != null ? + clpTableHandle.getSchemaTableName().toString() : ClpMetadata.DEFAULT_SCHEMA_NAME); + + List remappedColumnName = splitFilterProvider.remapColumnName(tableScope, columnNameOpt.get()); + newOrderings.add(new ClpTopNSpec.Ordering(remappedColumnName, toClpOrder(ord.getSortOrder()))); + } + + if (existingTopN.isPresent()) { + ClpTopNSpec ex = existingTopN.get(); + if (!sameOrdering(ex.getOrderings(), newOrderings)) { + return node.replaceChildren(ImmutableList.of(rewrittenSource)); // leave existing as-is + } + long mergedLimit = Math.min(ex.getLimit(), node.getCount()); + if (mergedLimit == ex.getLimit()) { + // No change needed; keep current layout/spec + return node.replaceChildren(ImmutableList.of(rewrittenSource)); + } + + // Tighten the limit on the layout + ClpTopNSpec tightened = new ClpTopNSpec(mergedLimit, ex.getOrderings()); + ClpTableHandle clpHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); + ClpTableLayoutHandle newLayout = + new ClpTableLayoutHandle(clpHandle, kql, metadataSql, true, Optional.of(tightened)); + + TableScanNode newScan = new TableScanNode( + scan.getSourceLocation(), + idAllocator.getNextId(), + new TableHandle( + tableHandle.getConnectorId(), + clpHandle, + tableHandle.getTransaction(), + Optional.of(newLayout)), + scan.getOutputVariables(), + scan.getAssignments(), + scan.getTableConstraints(), + scan.getCurrentConstraint(), + scan.getEnforcedConstraint(), + scan.getCteMaterializationInfo()); + + PlanNode newSource = newScan; + if (filter != null) { + newSource = new FilterNode(filter.getSourceLocation(), idAllocator.getNextId(), newSource, filter.getPredicate()); + } + if (project != null) { + newSource = new ProjectNode( + project.getSourceLocation(), + idAllocator.getNextId(), + newSource, + project.getAssignments(), + project.getLocality()); + } + + return new TopNNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, node.getCount(), node.getOrderingScheme(), node.getStep()); + } + + ClpTopNSpec spec = new ClpTopNSpec(node.getCount(), newOrderings); + ClpTableHandle clpHandle = (ClpTableHandle) tableHandle.getConnectorHandle(); + ClpTableLayoutHandle newLayout = + new ClpTableLayoutHandle(clpHandle, kql, metadataSql, true, Optional.of(spec)); + + TableScanNode newScanNode = new TableScanNode( + scan.getSourceLocation(), + idAllocator.getNextId(), + new TableHandle( + tableHandle.getConnectorId(), + clpHandle, + tableHandle.getTransaction(), + Optional.of(newLayout)), + scan.getOutputVariables(), + scan.getAssignments(), + scan.getTableConstraints(), + scan.getCurrentConstraint(), + scan.getEnforcedConstraint(), + scan.getCteMaterializationInfo()); + + PlanNode newSource = newScanNode; + if (filter != null) { + newSource = new FilterNode(filter.getSourceLocation(), idAllocator.getNextId(), newSource, filter.getPredicate()); + } + if (project != null) { + newSource = new ProjectNode(project.getSourceLocation(), idAllocator.getNextId(), newSource, project.getAssignments(), project.getLocality()); + } + + return new TopNNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, node.getCount(), node.getOrderingScheme(), node.getStep()); + } + private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNode) { hasVisitedFilter = true; @@ -114,21 +280,23 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod String tableScope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString(); Map assignments = tableScanNode.getAssignments(); + Set metadataColumnNames = splitFilterProvider.getColumnNames(tableScope); ClpExpression clpExpression = filterNode.getPredicate().accept( new ClpFilterToKqlConverter( functionResolution, functionManager, assignments, - splitFilterProvider.getColumnNames(tableScope)), + metadataColumnNames), null); + Optional kqlQuery = clpExpression.getPushDownExpression(); Optional metadataSqlQuery = clpExpression.getMetadataSqlQuery(); Optional remainingPredicate = clpExpression.getRemainingExpression(); // Perform required metadata filter checks before handling the KQL query (if kqlQuery // isn't present, we'll return early, skipping subsequent checks). - splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), metadataSqlQuery.orElse("")); + splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), clpExpression.getPushDownVariables()); boolean hasMetadataFilter = metadataSqlQuery.isPresent() && !metadataSqlQuery.get().isEmpty(); if (hasMetadataFilter) { metadataSqlQuery = Optional.of(splitFilterProvider.remapSplitFilterPushDownExpression(tableScope, metadataSqlQuery.get())); @@ -140,7 +308,12 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod log.debug("KQL query: %s", kqlQuery.get()); } - ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery); + ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle( + clpTableHandle, + kqlQuery, + metadataSqlQuery, + metadataColumnNames.equals(clpExpression.getPushDownVariables()), + Optional.empty()); TableHandle newTableHandle = new TableHandle( tableHandle.getConnectorId(), clpTableHandle, @@ -171,5 +344,141 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod return tableScanNode; } } + + private boolean sameOrdering(List a, List b) + { + if (a.size() != b.size()) { + return false; + } + for (int i = 0; i < a.size(); i++) { + ClpTopNSpec.Ordering x = a.get(i); + ClpTopNSpec.Ordering y = b.get(i); + if (!Objects.equals(x.getColumns(), y.getColumns())) { + return false; + } + if (x.getOrder() != y.getOrder()) { + return false; + } + } + return true; + } + + /** Accept plain var or dereference-of-var passthroughs. */ + private boolean areIdents(ProjectNode project, List vars) + { + for (Ordering ord : vars) { + VariableReferenceExpression out = ord.getVariable(); + RowExpression expr = project.getAssignments().get(out); + + if (expr instanceof VariableReferenceExpression) { + continue; + } + if (isDereferenceChainOverVariable(expr)) { + continue; + } + return false; + } + return true; + } + + /** Build final column name string for CLP (e.g., "msg.timestamp"), or empty if not pushdownable. */ + private Optional buildOrderColumnName( + ProjectNode project, + VariableReferenceExpression outVar, + Map assignments) + { + if (project == null) { + // ORDER BY directly on scan var + ColumnHandle ch = assignments.get(outVar); + if (!(ch instanceof ClpColumnHandle)) { + return Optional.empty(); + } + return Optional.of(((ClpColumnHandle) ch).getOriginalColumnName()); + } + + RowExpression expr = project.getAssignments().get(outVar); + if (expr instanceof VariableReferenceExpression) { + ColumnHandle ch = assignments.get((VariableReferenceExpression) expr); + if (!(ch instanceof ClpColumnHandle)) { + return Optional.empty(); + } + return Optional.of(((ClpColumnHandle) ch).getOriginalColumnName()); + } + + // Handle DEREFERENCE chain: baseVar.field1.field2... + Deque path = new ArrayDeque<>(); + RowExpression cur = expr; + + while (cur instanceof SpecialFormExpression + && ((SpecialFormExpression) cur).getForm() == SpecialFormExpression.Form.DEREFERENCE) { + SpecialFormExpression s = (SpecialFormExpression) cur; + RowExpression base = s.getArguments().get(0); + RowExpression indexExpr = s.getArguments().get(1); + + if (!(indexExpr instanceof ConstantExpression) || !(base.getType() instanceof RowType)) { + return Optional.empty(); + } + int idx; + Object v = ((ConstantExpression) indexExpr).getValue(); + if (v instanceof Long) { + idx = toIntExact((Long) v); + } + else if (v instanceof Integer) { + idx = (Integer) v; + } + else { + return Optional.empty(); + } + + RowType rowType = (RowType) base.getType(); + if (idx < 0 || idx >= rowType.getFields().size()) { + return Optional.empty(); + } + String fname = rowType.getFields().get(idx).getName().orElse(String.valueOf(idx)); + // We traverse outer->inner; collect in deque and join later + path.addLast(fname); + + cur = base; // move up the chain + } + + if (!(cur instanceof VariableReferenceExpression)) { + return Optional.empty(); + } + + ColumnHandle baseCh = assignments.get((VariableReferenceExpression) cur); + if (!(baseCh instanceof ClpColumnHandle)) { + return Optional.empty(); + } + + String baseName = ((ClpColumnHandle) baseCh).getOriginalColumnName(); + if (path.isEmpty()) { + return Optional.of(baseName); + } + return Optional.of(baseName + "." + String.join(".", path)); + } + + /** True if expr is DEREFERENCE(... DEREFERENCE(baseVar, i) ..., j) with baseVar a VariableReferenceExpression. */ + private boolean isDereferenceChainOverVariable(RowExpression expr) + { + RowExpression cur = expr; + while (cur instanceof SpecialFormExpression + && ((SpecialFormExpression) cur).getForm() == SpecialFormExpression.Form.DEREFERENCE) { + cur = ((SpecialFormExpression) cur).getArguments().get(0); + } + return (cur instanceof VariableReferenceExpression); + } + + private ClpTopNSpec.Order toClpOrder(SortOrder so) + { + switch (so) { + case ASC_NULLS_FIRST: + case ASC_NULLS_LAST: + return ClpTopNSpec.Order.ASC; + case DESC_NULLS_FIRST: + case DESC_NULLS_LAST: + return ClpTopNSpec.Order.DESC; + default: throw new IllegalArgumentException("Unknown sort order: " + so); + } + } } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java similarity index 65% rename from presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java index e970f9848a9cf..571ecb028dc0a 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java @@ -11,11 +11,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.facebook.presto.plugin.clp; +package com.facebook.presto.plugin.clp.optimization; import com.facebook.presto.spi.relation.RowExpression; +import com.google.common.collect.ImmutableSet; import java.util.Optional; +import java.util.Set; /** * Represents the result of: @@ -38,11 +40,19 @@ public class ClpExpression // The remaining (non-translatable) portion of the RowExpression, if any. private final Optional remainingExpression; - public ClpExpression(String pushDownExpression, String metadataSqlQuery, RowExpression remainingExpression) + // Variables used in pushDownExpression + private final Set pushDownVariables; + + public ClpExpression( + String pushDownExpression, + String metadataSqlQuery, + RowExpression remainingExpression, + Set pushDownVariables) { this.pushDownExpression = Optional.ofNullable(pushDownExpression); this.metadataSqlQuery = Optional.ofNullable(metadataSqlQuery); this.remainingExpression = Optional.ofNullable(remainingExpression); + this.pushDownVariables = ImmutableSet.copyOf(pushDownVariables); } /** @@ -50,7 +60,7 @@ public ClpExpression(String pushDownExpression, String metadataSqlQuery, RowExpr */ public ClpExpression() { - this(null, null, null); + this(null, null, null, ImmutableSet.of()); } /** @@ -60,7 +70,18 @@ public ClpExpression() */ public ClpExpression(String pushDownExpression) { - this(pushDownExpression, null, null); + this(pushDownExpression, null, null, ImmutableSet.of()); + } + + /** + * Creates a ClpExpression from a fully translatable KQL query or column name. + * + * @param pushDownExpression + * @param pushDownVariables + */ + public ClpExpression(String pushDownExpression, Set pushDownVariables) + { + this(pushDownExpression, null, null, pushDownVariables); } /** @@ -72,7 +93,20 @@ public ClpExpression(String pushDownExpression) */ public ClpExpression(String pushDownExpression, String metadataSqlQuery) { - this(pushDownExpression, metadataSqlQuery, null); + this(pushDownExpression, metadataSqlQuery, null, ImmutableSet.of()); + } + + /** + * Creates a ClpExpression from a fully translatable KQL string or column name, as well as a + * metadata SQL string. + * + * @param pushDownExpression + * @param metadataSqlQuery + * @param pushDownVariables + */ + public ClpExpression(String pushDownExpression, String metadataSqlQuery, Set pushDownVariables) + { + this(pushDownExpression, metadataSqlQuery, null, pushDownVariables); } /** @@ -82,7 +116,7 @@ public ClpExpression(String pushDownExpression, String metadataSqlQuery) */ public ClpExpression(RowExpression remainingExpression) { - this(null, null, remainingExpression); + this(null, null, remainingExpression, ImmutableSet.of()); } public Optional getPushDownExpression() @@ -99,4 +133,9 @@ public Optional getRemainingExpression() { return remainingExpression; } + + public Set getPushDownVariables() + { + return pushDownVariables; + } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java index b27a61ef0d65a..cca19114e8ed4 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java @@ -16,10 +16,10 @@ import com.facebook.presto.common.function.OperatorType; import com.facebook.presto.common.type.DecimalType; import com.facebook.presto.common.type.RowType; +import com.facebook.presto.common.type.TimestampType; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.VarcharType; import com.facebook.presto.plugin.clp.ClpColumnHandle; -import com.facebook.presto.plugin.clp.ClpExpression; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.function.FunctionHandle; @@ -65,6 +65,7 @@ import static java.lang.Integer.parseInt; import static java.lang.String.format; import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.SECONDS; /** * A translator to translate Presto {@link RowExpression}s into: @@ -163,7 +164,8 @@ public ClpExpression visitConstant(ConstantExpression node, Void context) @Override public ClpExpression visitVariableReference(VariableReferenceExpression node, Void context) { - return new ClpExpression(getVariableName(node)); + String variableName = getVariableName(node); + return new ClpExpression(variableName, ImmutableSet.of(variableName)); } @Override @@ -250,7 +252,8 @@ private ClpExpression handleBetween(CallExpression node) return new ClpExpression(node); } - Optional variableOpt = first.accept(this, null).getPushDownExpression(); + ClpExpression variableExpression = first.accept(this, null); + Optional variableOpt = variableExpression.getPushDownExpression(); if (!variableOpt.isPresent() || !(second instanceof ConstantExpression) || !(third instanceof ConstantExpression)) { @@ -258,13 +261,15 @@ private ClpExpression handleBetween(CallExpression node) } String variable = variableOpt.get(); - String lowerBound = getLiteralString((ConstantExpression) second); - String upperBound = getLiteralString((ConstantExpression) third); + Type lowerBoundType = second.getType(); + String lowerBound = tryEnsureNanosecondTimestamp(lowerBoundType, getLiteralString((ConstantExpression) second)); + Type upperBoundType = third.getType(); + String upperBound = tryEnsureNanosecondTimestamp(upperBoundType, getLiteralString((ConstantExpression) third)); String kql = String.format("%s >= %s AND %s <= %s", variable, lowerBound, variable, upperBound); String metadataSqlQuery = metadataFilterColumns.contains(variable) ? String.format("\"%s\" >= %s AND \"%s\" <= %s", variable, lowerBound, variable, upperBound) : null; - return new ClpExpression(kql, metadataSqlQuery); + return new ClpExpression(kql, metadataSqlQuery, variableExpression.getPushDownVariables()); } /** @@ -290,10 +295,10 @@ private ClpExpression handleNot(CallExpression node) } String notPushDownExpression = "NOT " + expression.getPushDownExpression().get(); if (expression.getMetadataSqlQuery().isPresent()) { - return new ClpExpression(notPushDownExpression, "NOT " + expression.getMetadataSqlQuery()); + return new ClpExpression(notPushDownExpression, "NOT " + expression.getMetadataSqlQuery(), expression.getPushDownVariables()); } else { - return new ClpExpression(notPushDownExpression); + return new ClpExpression(notPushDownExpression, expression.getPushDownVariables()); } } @@ -345,7 +350,7 @@ else if (argument instanceof CallExpression) { return new ClpExpression(node); } pattern = pattern.replace("%", "*").replace("_", "?"); - return new ClpExpression(format("%s: \"%s\"", variableName, pattern)); + return new ClpExpression(format("%s: \"%s\"", variableName, pattern), variable.getPushDownVariables()); } /** @@ -442,33 +447,45 @@ private ClpExpression buildClpExpression( RowExpression originalNode) { String metadataSqlQuery = null; + literalString = tryEnsureNanosecondTimestamp(literalType, literalString); if (operator.equals(EQUAL)) { if (literalType instanceof VarcharType) { - return new ClpExpression(format("%s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString))); + if (metadataFilterColumns.contains(variableName)) { + metadataSqlQuery = format("\"%s\" = '%s'", variableName, literalString); + } + return new ClpExpression( + format("%s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString)), metadataSqlQuery, + ImmutableSet.of(variableName)); } else { if (metadataFilterColumns.contains(variableName)) { metadataSqlQuery = format("\"%s\" = %s", variableName, literalString); } - return new ClpExpression(format("%s: %s", variableName, literalString), metadataSqlQuery); + return new ClpExpression(format("%s: %s", variableName, literalString), metadataSqlQuery, ImmutableSet.of(variableName)); } } else if (operator.equals(NOT_EQUAL)) { if (literalType instanceof VarcharType) { - return new ClpExpression(format("NOT %s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString))); + if (metadataFilterColumns.contains(variableName)) { + metadataSqlQuery = format("\"%s\" != '%s'", variableName, literalString); + } + return new ClpExpression( + format("NOT %s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString)), metadataSqlQuery, + ImmutableSet.of(variableName)); } else { if (metadataFilterColumns.contains(variableName)) { metadataSqlQuery = format("NOT \"%s\" = %s", variableName, literalString); } - return new ClpExpression(format("NOT %s: %s", variableName, literalString), metadataSqlQuery); + return new ClpExpression(format("NOT %s: %s", variableName, literalString), metadataSqlQuery, ImmutableSet.of(variableName)); } } else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceof VarcharType)) { if (metadataFilterColumns.contains(variableName)) { - metadataSqlQuery = format("\"%s\" %s %s", variableName, operator.getOperator(), literalString); + metadataSqlQuery = format("\"%s\" %s %s", variableName, operator.getOperator(), literalString, ImmutableSet.of(variableName)); } - return new ClpExpression(format("%s %s %s", variableName, operator.getOperator(), literalString), metadataSqlQuery); + return new ClpExpression( + format("%s %s %s", variableName, operator.getOperator(), literalString), metadataSqlQuery, ImmutableSet.of(variableName)); } return new ClpExpression(originalNode); } @@ -576,7 +593,7 @@ private Optional interpretSubstringEquality(SubstrInfo info, Stri result.append("?"); } result.append(targetString).append("*\""); - return Optional.of(new ClpExpression(result.toString())); + return Optional.of(new ClpExpression(result.toString(), ImmutableSet.of(info.variableName))); } } } @@ -590,11 +607,11 @@ private Optional interpretSubstringEquality(SubstrInfo info, Stri result.append("?"); } result.append(targetString).append("\""); - return Optional.of(new ClpExpression(result.toString())); + return Optional.of(new ClpExpression(result.toString(), ImmutableSet.of(info.variableName))); } if (start == -targetString.length()) { result.append(format("%s: \"*%s\"", info.variableName, targetString)); - return Optional.of(new ClpExpression(result.toString())); + return Optional.of(new ClpExpression(result.toString(), ImmutableSet.of(info.variableName))); } } } @@ -678,10 +695,12 @@ private ClpExpression handleAnd(SpecialFormExpression node) List remainingExpressions = new ArrayList<>(); boolean hasMetadataSql = false; boolean hasPushDownExpression = false; + ImmutableSet.Builder pushDownVariables = new ImmutableSet.Builder<>(); for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); if (expression.getPushDownExpression().isPresent()) { hasPushDownExpression = true; + pushDownVariables.addAll(expression.getPushDownVariables()); queryBuilder.append(expression.getPushDownExpression().get()); queryBuilder.append(" AND "); if (expression.getMetadataSqlQuery().isPresent()) { @@ -702,18 +721,21 @@ else if (!remainingExpressions.isEmpty()) { return new ClpExpression( queryBuilder.substring(0, queryBuilder.length() - 5) + ")", hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, - remainingExpressions.get(0)); + remainingExpressions.get(0), + pushDownVariables.build()); } else { return new ClpExpression( queryBuilder.substring(0, queryBuilder.length() - 5) + ")", hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, - new SpecialFormExpression(node.getSourceLocation(), AND, BOOLEAN, remainingExpressions)); + new SpecialFormExpression(node.getSourceLocation(), AND, BOOLEAN, remainingExpressions), + pushDownVariables.build()); } } // Remove the last " AND " from the query return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")", - hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null); + hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null, + pushDownVariables.build()); } /** @@ -736,6 +758,7 @@ private ClpExpression handleOr(SpecialFormExpression node) queryBuilder.append("("); boolean allPushedDown = true; boolean hasAllMetadataSql = true; + ImmutableSet.Builder pushDownVariables = new ImmutableSet.Builder<>(); for (RowExpression argument : node.getArguments()) { ClpExpression expression = argument.accept(this, null); // Note: It is possible in the future that an expression cannot be pushed down as a KQL query, but can be @@ -746,6 +769,7 @@ private ClpExpression handleOr(SpecialFormExpression node) } queryBuilder.append(expression.getPushDownExpression().get()); queryBuilder.append(" OR "); + pushDownVariables.addAll(expression.getPushDownVariables()); if (hasAllMetadataSql && expression.getMetadataSqlQuery().isPresent()) { metadataQueryBuilder.append(expression.getMetadataSqlQuery().get()); metadataQueryBuilder.append(" OR "); @@ -758,7 +782,8 @@ private ClpExpression handleOr(SpecialFormExpression node) // Remove the last " OR " from the query return new ClpExpression( queryBuilder.substring(0, queryBuilder.length() - 4) + ")", - hasAllMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 4) + ")" : null); + hasAllMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 4) + ")" : null, + pushDownVariables.build()); } return new ClpExpression(node); } @@ -798,7 +823,7 @@ private ClpExpression handleIn(SpecialFormExpression node) } // Remove the last " OR " from the query - return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")"); + return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")", variable.getPushDownVariables()); } /** @@ -823,7 +848,7 @@ private ClpExpression handleIsNull(SpecialFormExpression node) } String variableName = expression.getPushDownExpression().get(); - return new ClpExpression(format("NOT %s: *", variableName)); + return new ClpExpression(format("NOT %s: *", variableName), expression.getPushDownVariables()); } /** @@ -885,7 +910,7 @@ private ClpExpression handleDereference(RowExpression expression) if (!baseString.getPushDownExpression().isPresent()) { return new ClpExpression(expression); } - return new ClpExpression(baseString.getPushDownExpression().get() + "." + fieldName); + return new ClpExpression(baseString.getPushDownExpression().get() + "." + fieldName, baseString.getPushDownVariables()); } /** @@ -925,6 +950,26 @@ public static boolean isClpCompatibleNumericType(Type type) || type instanceof DecimalType; } + private static String tryEnsureNanosecondTimestamp(Type type, String literalString) + { + if (type == TIMESTAMP) { + return ensureNanosecondTimestamp(TIMESTAMP, literalString); + } + else if (type == TIMESTAMP_MICROSECONDS) { + return ensureNanosecondTimestamp(TIMESTAMP_MICROSECONDS, literalString); + } + return literalString; + } + + private static String ensureNanosecondTimestamp(TimestampType type, String literalString) + { + long literalNumber = Long.parseLong(literalString); + long seconds = type.getEpochSecond(literalNumber); + long nanosecondFraction = type.getNanos(literalNumber); + long nanoseconds = SECONDS.toNanos(seconds) + nanosecondFraction; + return Long.toString(nanoseconds); + } + private static class SubstrInfo { String variableName; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java index b536c95ad216a..bdf50eb0fb709 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java @@ -32,7 +32,10 @@ public class ClpPlanOptimizerProvider private final ClpSplitFilterProvider splitFilterProvider; @Inject - public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider) + public ClpPlanOptimizerProvider( + FunctionMetadataManager functionManager, + StandardFunctionResolution functionResolution, + ClpSplitFilterProvider splitFilterProvider) { this.functionManager = functionManager; this.functionResolution = functionResolution; diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpTopNSpec.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpTopNSpec.java new file mode 100644 index 0000000000000..de2f3ee2eab6c --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpTopNSpec.java @@ -0,0 +1,148 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.optimization; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.List; +import java.util.Objects; + +import static java.util.Objects.requireNonNull; + +/** + * Represents a Top-N specification for a query, including the limit of rows to return + * and the ordering of columns. + */ +public class ClpTopNSpec +{ + /** + * Enum representing the order direction: ascending or descending. + */ + public enum Order + { + ASC, + DESC + } + + /** + * Represents the ordering of one or more columns with a specified order (ASC or DESC). + */ + public static final class Ordering + { + private final List columns; + private final Order order; + + @JsonCreator + public Ordering( + @JsonProperty("columns") List columns, + @JsonProperty("order") Order order) + { + this.columns = requireNonNull(columns, "column is null"); + this.order = requireNonNull(order, "order is null"); + } + + @JsonProperty("columns") + public List getColumns() + { + return columns; + } + + @JsonProperty("order") + public Order getOrder() + { + return order; + } + + @Override + public int hashCode() + { + return Objects.hash(columns, order); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + Ordering other = (Ordering) obj; + return this.order == other.order && this.columns.equals(other.columns); + } + + @Override + public String toString() + { + return columns + ":" + order; + } + } + + private final long limit; + private final List orderings; + + @JsonCreator + public ClpTopNSpec( + @JsonProperty("limit") long limit, + @JsonProperty("orderings") List orderings) + { + if (limit <= 0) { + throw new IllegalArgumentException("limit must be > 0"); + } + if (orderings == null || orderings.isEmpty()) { + throw new IllegalArgumentException("orderings must be non-empty"); + } + this.limit = limit; + this.orderings = orderings; + } + + @JsonProperty("limit") + public long getLimit() + { + return limit; + } + + @JsonProperty("orderings") + public List getOrderings() + { + return orderings; + } + + @Override + public int hashCode() + { + return Objects.hash(limit, orderings); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ClpTopNSpec other = (ClpTopNSpec) obj; + return this.limit == other.limit && this.orderings.equals(other.orderings); + } + + @Override + public String toString() + { + return "ClpTopNSpec (limit=" + limit + ", order=" + orderings + ")"; + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java index 75d0a66cc02c7..f691b8ecaa23c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java @@ -127,14 +127,40 @@ public PlanNode visitProject(ProjectNode node, RewriteContext context) rewriteClpUdfs(entry.getValue(), functionManager, variableAllocator, true)); } - PlanNode newSource = rewritePlanSubtree(node.getSource()); + PlanNode newSource = node.getSource().accept(this, context); return new ProjectNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, newAssignments.build(), node.getLocality()); } @Override public PlanNode visitFilter(FilterNode node, RewriteContext context) { - return buildNewFilterNode(node); + RowExpression newPredicate = rewriteClpUdfs(node.getPredicate(), functionManager, variableAllocator, false); + PlanNode newSource = node.getSource().accept(this, context); + return new FilterNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, newPredicate); + } + + @Override + public PlanNode visitTableScan(TableScanNode node, RewriteContext context) + { + Set outputVars = new LinkedHashSet<>(node.getOutputVariables()); + Map newAssignments = new HashMap<>(node.getAssignments()); + + // Add any missing variables for known handles + globalColumnVarMap.forEach((handle, var) -> { + outputVars.add(var); + newAssignments.put(var, handle); + }); + + return new TableScanNode( + node.getSourceLocation(), + idAllocator.getNextId(), + node.getTable(), + new ArrayList<>(outputVars), + newAssignments, + node.getTableConstraints(), + node.getCurrentConstraint(), + node.getEnforcedConstraint(), + node.getCteMaterializationInfo()); } /** @@ -220,29 +246,6 @@ else if (functionName.startsWith("CLP_GET_")) { return expression; } - /** - * Recursively rewrites the subtree of a plan node to include any new variables produced by - * CLP UDF rewrites. - * - * @param node the plan node to rewrite - * @return the rewritten plan node - */ - private PlanNode rewritePlanSubtree(PlanNode node) - { - if (node instanceof TableScanNode) { - return buildNewTableScanNode((TableScanNode) node); - } - else if (node instanceof FilterNode) { - return buildNewFilterNode((FilterNode) node); - } - - List rewrittenChildren = node.getSources().stream() - .map(source -> rewritePlanSubtree(source)) - .collect(toImmutableList()); - - return node.replaceChildren(rewrittenChildren); - } - /** * Encodes a JSON path into a valid variable name by replacing uppercase letters with * "_ux", dots with "_dot_", and underscores with "_und_". @@ -272,48 +275,5 @@ else if (c == '_') { } return sb.toString(); } - - /** - * Builds a new {@link TableScanNode} that includes additional - * {@link VariableReferenceExpression}s and {@link ColumnHandle}s for rewritten CLP UDFs. - * - * @param node the original table scan node - * @return the updated table scan node - */ - private TableScanNode buildNewTableScanNode(TableScanNode node) - { - Set outputVars = new LinkedHashSet<>(node.getOutputVariables()); - Map newAssignments = new HashMap<>(node.getAssignments()); - - // Add any missing variables for known handles - globalColumnVarMap.forEach((handle, var) -> { - outputVars.add(var); - newAssignments.put(var, handle); - }); - - return new TableScanNode( - node.getSourceLocation(), - idAllocator.getNextId(), - node.getTable(), - new ArrayList<>(outputVars), - newAssignments, - node.getTableConstraints(), - node.getCurrentConstraint(), - node.getEnforcedConstraint(), - node.getCteMaterializationInfo()); - } - - /** - * Builds a new {@link FilterNode} with its predicate rewritten to replace CLP UDF calls. - * - * @param node the original filter node - * @return the updated filter node - */ - private FilterNode buildNewFilterNode(FilterNode node) - { - RowExpression newPredicate = rewriteClpUdfs(node.getPredicate(), functionManager, variableAllocator, false); - PlanNode newSource = rewritePlanSubtree(node.getSource()); - return new FilterNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, newPredicate); - } } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java index 6b54218509c7f..13435e28a8b2c 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java @@ -18,6 +18,7 @@ import com.facebook.presto.plugin.clp.ClpSplit; import com.facebook.presto.plugin.clp.ClpTableHandle; import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; +import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec; import com.google.common.collect.ImmutableList; import javax.inject.Inject; @@ -27,22 +28,26 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; +import java.util.Optional; import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE; import static java.lang.String.format; +import static java.util.Comparator.comparingLong; public class ClpMySqlSplitProvider implements ClpSplitProvider { // Column names public static final String ARCHIVES_TABLE_COLUMN_ID = "id"; + public static final String ARCHIVES_TABLE_NUM_MESSAGES = "num_messages"; // Table suffixes public static final String ARCHIVES_TABLE_SUFFIX = "_archives"; // SQL templates - private static final String SQL_SELECT_ARCHIVES_TEMPLATE = format("SELECT `%s` FROM `%%s%%s%s` WHERE 1 = 1", ARCHIVES_TABLE_COLUMN_ID, ARCHIVES_TABLE_SUFFIX); + private static final String SQL_SELECT_ARCHIVES_TEMPLATE = format("SELECT * FROM `%%s%%s%s` WHERE 1 = 1", ARCHIVES_TABLE_SUFFIX); private static final Logger log = Logger.get(ClpMySqlSplitProvider.class); @@ -66,6 +71,7 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) { ImmutableList.Builder splits = new ImmutableList.Builder<>(); ClpTableHandle clpTableHandle = clpTableLayoutHandle.getTable(); + Optional topNSpecOptional = clpTableLayoutHandle.getTopN(); String tablePath = clpTableHandle.getTablePath(); String tableName = clpTableHandle.getSchemaTableName().getTableName(); String archivePathQuery = format(SQL_SELECT_ARCHIVES_TEMPLATE, config.getMetadataTablePrefix(), tableName); @@ -74,6 +80,25 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) String metadataFilterQuery = clpTableLayoutHandle.getMetadataSql().get(); archivePathQuery += " AND (" + metadataFilterQuery + ")"; } + + if (topNSpecOptional.isPresent()) { + ClpTopNSpec topNSpec = topNSpecOptional.get(); + // Only handles one range metadata column for now + ClpTopNSpec.Ordering ordering = topNSpec.getOrderings().get(0); + String col = ordering.getColumns().get(ordering.getColumns().size() - 1); + String dir = (ordering.getOrder() == ClpTopNSpec.Order.ASC) ? "ASC" : "DESC"; + archivePathQuery += " ORDER BY " + "`" + col + "` " + dir; + + List archiveMetaList = fetchArchiveMeta(archivePathQuery, ordering); + List selected = selectTopNArchives(archiveMetaList, topNSpec.getLimit(), ordering.getOrder()); + + for (ArchiveMeta a : selected) { + splits.add(new ClpSplit(tablePath + "/" + a.id, ARCHIVE, clpTableLayoutHandle.getKqlQuery())); + } + ImmutableList result = splits.build(); + log.debug("Number of splits: %s", result.size()); + return result; + } log.debug("Query for archive: %s", archivePathQuery); try (Connection connection = getConnection()) { @@ -105,4 +130,171 @@ private Connection getConnection() } return connection; } + + /** + * Fetches archive metadata from the database. + * + * @param query SQL query string that selects the archives + * @param ordering The top-N ordering specifying which columns contain lowerBound/upperBound + * @return List of ArchiveMeta objects representing archive metadata + */ + private List fetchArchiveMeta(String query, ClpTopNSpec.Ordering ordering) + { + List list = new ArrayList<>(); + try (Connection connection = getConnection(); + PreparedStatement stmt = connection.prepareStatement(query); + ResultSet rs = stmt.executeQuery()) { + while (rs.next()) { + list.add(new ArchiveMeta( + rs.getString(ARCHIVES_TABLE_COLUMN_ID), + rs.getLong(ordering.getColumns().get(0)), + rs.getLong(ordering.getColumns().get(1)), + rs.getLong(ARCHIVES_TABLE_NUM_MESSAGES))); + } + } + catch (SQLException e) { + log.warn("Database error while fetching archive metadata: %s", e); + } + return list; + } + + /** + * Selects the set of archives that must be scanned to guarantee the top-N results by timestamp + * (ASC or DESC), given only archive ranges and message counts. + *

    + *
  • Merges overlapping archives into groups (union of time ranges).
  • + *
  • For DESC: always include the newest group, then add older ones until their total + * message counts cover the limit.
  • + *
  • For ASC: symmetric — start from the oldest, then add newer ones.
  • + *
+ + * @param archives list of archives with [lowerBound, upperBound, messageCount] + * @param limit number of messages requested + * @param order ASC (earliest first) or DESC (latest first) + * @return archives that must be scanned + */ + private static List selectTopNArchives(List archives, long limit, ClpTopNSpec.Order order) + { + if (archives == null || archives.isEmpty() || limit <= 0) { + return ImmutableList.of(); + } + + // 1) Merge overlaps into groups + List groups = toArchiveGroups(archives); + + // 2) Pick minimal set of groups per order, then return all member archives + List selected = new ArrayList<>(); + if (order == ClpTopNSpec.Order.DESC) { + // newest group index + int k = groups.size() - 1; + + // must include newest group + selected.addAll(groups.get(k).members); + + // assume worst case: newest contributes 0 after filter; cover limit from older groups + long coveredByOlder = 0; + for (int i = k - 1; i >= 0 && coveredByOlder < limit; --i) { + selected.addAll(groups.get(i).members); + coveredByOlder += groups.get(i).count; + } + } + else { + // oldest group index + int k = 0; + + // must include oldest group + selected.addAll(groups.get(k).members); + + // assume worst case: oldest contributes 0; cover limit from newer groups + long coveredByNewer = 0; + for (int i = k + 1; i < groups.size() && coveredByNewer < limit; ++i) { + selected.addAll(groups.get(i).members); + coveredByNewer += groups.get(i).count; + } + } + + return selected; + } + + /** + * Groups overlapping archives into non-overlapping archive groups. + * + * @param archives archives sorted by lowerBound + * @return merged groups + */ + private static List toArchiveGroups(List archives) + { + List sorted = new ArrayList<>(archives); + sorted.sort(comparingLong((ArchiveMeta a) -> a.lowerBound) + .thenComparingLong(a -> a.upperBound)); + + List groups = new ArrayList<>(); + ArchiveGroup cur = null; + + for (ArchiveMeta a : sorted) { + if (cur == null) { + cur = startArchiveGroup(a); + } + else if (overlaps(cur, a)) { + // extend current group + cur.end = Math.max(cur.end, a.upperBound); + cur.count += a.messageCount; + cur.members.add(a); + } + else { + // finalize current, start a new one + groups.add(cur); + cur = startArchiveGroup(a); + } + } + if (cur != null) { + groups.add(cur); + } + return groups; + } + + private static ArchiveGroup startArchiveGroup(ArchiveMeta a) + { + ArchiveGroup group = new ArchiveGroup(); + group.begin = a.lowerBound; + group.end = a.upperBound; + group.count = a.messageCount; + group.members.add(a); + return group; + } + + private static boolean overlaps(ArchiveGroup cur, ArchiveMeta a) + { + return a.lowerBound <= cur.end && a.upperBound >= cur.begin; + } + + /** + * Represents metadata of an archive, including its ID, timestamp bounds, and message count. + */ + private static class ArchiveMeta + { + final String id; + final long lowerBound; + final long upperBound; + final long messageCount; + + ArchiveMeta(String id, long lowerBound, long upperBound, long messageCount) + { + this.id = id; + this.lowerBound = lowerBound; + this.upperBound = upperBound; + this.messageCount = messageCount; + } + } + + /** + * Represents a group of overlapping archives treated as one logical unit. + */ + private static final class ArchiveGroup + { + long begin; + long end; + long count; + final List members = new ArrayList<>(); + } } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpPinotSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpPinotSplitProvider.java new file mode 100644 index 0000000000000..1eca9a93f6b29 --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpPinotSplitProvider.java @@ -0,0 +1,432 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split; + +import com.facebook.airlift.log.Logger; +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.ClpSplit; +import com.facebook.presto.plugin.clp.ClpTableHandle; +import com.facebook.presto.plugin.clp.ClpTableLayoutHandle; +import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec; +import com.facebook.presto.spi.SchemaTableName; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; + +import javax.inject.Inject; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Optional; + +import static com.facebook.presto.plugin.clp.ClpSplit.SplitType; +import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE; +import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.IR; +import static java.lang.String.format; +import static java.util.Comparator.comparingLong; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.TimeUnit.SECONDS; + +public class ClpPinotSplitProvider + implements ClpSplitProvider +{ + private static final Logger log = Logger.get(ClpPinotSplitProvider.class); + private static final String SQL_SELECT_SPLITS_TEMPLATE = "SELECT tpath FROM %s WHERE 1 = 1 AND (%s) LIMIT 999999"; + private static final String SQL_SELECT_SPLIT_META_TEMPLATE = "SELECT tpath, creationtime, lastmodifiedtime, num_messages FROM %s WHERE 1 = 1 AND (%s) ORDER BY %s %s LIMIT 999999"; + private final ClpConfig config; + private final URL pinotSqlQueryEndpointUrl; + + @Inject + public ClpPinotSplitProvider(ClpConfig config) + { + this.config = requireNonNull(config, "config is null"); + try { + this.pinotSqlQueryEndpointUrl = buildPinotSqlQueryEndpointUrl(config); + } + catch (MalformedURLException e) { + throw new IllegalArgumentException( + format("Failed to build Pinot sql query endpoint URL using the provided database url: %s", config.getMetadataDbUrl()), e); + } + } + + @Override + public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle) + { + ClpTableHandle clpTableHandle = clpTableLayoutHandle.getTable(); + Optional topNSpecOptional = clpTableLayoutHandle.getTopN(); + String tableName = inferMetadataTableName(clpTableHandle); + try { + ImmutableList.Builder splits = new ImmutableList.Builder<>(); + if (topNSpecOptional.isPresent()) { + ClpTopNSpec topNSpec = topNSpecOptional.get(); + // Only handles one range metadata column for now (first ordering) + ClpTopNSpec.Ordering ordering = topNSpec.getOrderings().get(0); + // Get the last column in the ordering (the primary sort column for nested fields) + String col = ordering.getColumns().get(ordering.getColumns().size() - 1); + String dir = (ordering.getOrder() == ClpTopNSpec.Order.ASC) ? "ASC" : "DESC"; + String splitMetaQuery = buildSplitMetadataQuery(tableName, clpTableLayoutHandle.getMetadataSql().orElse("1 = 1"), col, dir); + List archiveMetaList = fetchArchiveMeta(splitMetaQuery, ordering); + List selected = selectTopNArchives(archiveMetaList, topNSpec.getLimit(), ordering.getOrder()); + + for (ArchiveMeta a : selected) { + String splitPath = a.id; + splits.add(new ClpSplit(splitPath, determineSplitType(splitPath), clpTableLayoutHandle.getKqlQuery())); + } + + List filteredSplits = splits.build(); + log.debug("Number of topN filtered splits: %s", filteredSplits.size()); + return filteredSplits; + } + + String splitQuery = buildSplitSelectionQuery(tableName, clpTableLayoutHandle.getMetadataSql().orElse("1 = 1")); + List splitRows = getQueryResult(pinotSqlQueryEndpointUrl, splitQuery); + for (JsonNode row : splitRows) { + String splitPath = row.elements().next().asText(); + splits.add(new ClpSplit(splitPath, determineSplitType(splitPath), clpTableLayoutHandle.getKqlQuery())); + } + + List filteredSplits = splits.build(); + log.debug("Number of filtered splits: %s", filteredSplits.size()); + return filteredSplits; + } + catch (Exception e) { + log.error(e, "Failed to list splits for table %s", tableName); + throw new RuntimeException(format("Failed to list splits for table %s: %s", tableName, e.getMessage()), e); + } + } + + /** + * Infers the Pinot metadata table name from the CLP table handle. + *

+ * In the current Pinot metadata, tables across different schemas share the same metadata table. + * The metadata table name corresponds directly to the logical table name, + * regardless of which schema is being queried. This allows multiple schemas + * to have different views or access patterns on the same underlying data. + *

+ *

+ * For example: + *

    + *
  • Schema: "default", Table: "logs" → Pinot metadata table: "logs"
  • + *
  • Schema: "production", Table: "logs" → Pinot metadata table: "logs" (same table)
  • + *
  • Schema: "staging", Table: "events" → Pinot metadata table: "events"
  • + *
+ *

+ * + * @param tableHandle the CLP table handle containing schema and table information + * @return the Pinot metadata table name (just the table name without schema prefix) + * @throws NullPointerException if tableHandle is null + */ + protected String inferMetadataTableName(ClpTableHandle tableHandle) + { + requireNonNull(tableHandle, "tableHandle is null"); + SchemaTableName schemaTableName = tableHandle.getSchemaTableName(); + + // In Pinot, the metadata table name is just the table name + // Multiple schemas can reference the same underlying metadata table + return schemaTableName.getTableName(); + } + + /** + * Constructs the Pinot SQL query endpoint URL from configuration. + * Can be overridden by subclasses to customize URL construction. + * + * @param config the CLP configuration + * @return the Pinot SQL query endpoint URL + * @throws MalformedURLException if the constructed URL is invalid + */ + protected URL buildPinotSqlQueryEndpointUrl(ClpConfig config) throws MalformedURLException + { + return new URL(config.getMetadataDbUrl() + "/query/sql"); + } + + /** + * Fetches archive metadata from the database. + * + * @param query SQL query string that selects the archives + * @param ordering The top-N ordering specifying which columns contain lowerBound/upperBound + * @return List of ArchiveMeta objects representing archive metadata + */ + private List fetchArchiveMeta(String query, ClpTopNSpec.Ordering ordering) + { + ImmutableList.Builder archiveMetas = new ImmutableList.Builder<>(); + List rows = getQueryResult(pinotSqlQueryEndpointUrl, query); + for (JsonNode row : rows) { + archiveMetas.add(new ArchiveMeta( + row.get(0).asText(), + row.get(1).asLong(), + row.get(2).asLong(), + row.get(3).asLong())); + } + return archiveMetas.build(); + } + + /** + * Selects the set of archives that must be scanned to guarantee the top-N results by timestamp + * (ASC or DESC), given only archive ranges and message counts. + *
    + *
  • Merges overlapping archives into components (union of time ranges).
  • + *
  • For DESC: always include the newest component, then add older ones until their total + * message counts cover the limit.
  • + *
  • For ASC: symmetric — start from the oldest, then add newer ones.
  • + *
+ + * @param archives list of archives with [lowerBound, upperBound, messageCount] + * @param limit number of messages requested + * @param order ASC (earliest first) or DESC (latest first) + * @return archives that must be scanned + */ + private static List selectTopNArchives(List archives, long limit, ClpTopNSpec.Order order) + { + if (archives == null || archives.isEmpty() || limit <= 0) { + return ImmutableList.of(); + } + requireNonNull(order, "order is null"); + + // 1) Merge overlaps into groups + List groups = toArchiveGroups(archives); + + if (groups.isEmpty()) { + return ImmutableList.of(); + } + + // 2) Pick minimal set of groups per order, then return all member archives + List selected = new ArrayList<>(); + if (order == ClpTopNSpec.Order.DESC) { + // newest group index + int k = groups.size() - 1; + + // must include newest group + selected.addAll(groups.get(k).members); + + // assume worst case: newest contributes 0 after filter; cover limit from older groups + long coveredByOlder = 0; + for (int i = k - 1; i >= 0 && coveredByOlder < limit; --i) { + selected.addAll(groups.get(i).members); + coveredByOlder += groups.get(i).count; + } + } + else { + // oldest group index + int k = 0; + + // must include oldest group + selected.addAll(groups.get(k).members); + + // assume worst case: oldest contributes 0; cover limit from newer groups + long coveredByNewer = 0; + for (int i = k + 1; i < groups.size() && coveredByNewer < limit; ++i) { + selected.addAll(groups.get(i).members); + coveredByNewer += groups.get(i).count; + } + } + + return selected; + } + + /** + * Groups overlapping archives into non-overlapping archive groups. + * + * @param archives archives sorted by lowerBound + * @return merged components + */ + private static List toArchiveGroups(List archives) + { + List sorted = new ArrayList<>(archives); + sorted.sort(comparingLong((ArchiveMeta a) -> a.lowerBound) + .thenComparingLong(a -> a.upperBound)); + + List groups = new ArrayList<>(); + ArchiveGroup cur = null; + + for (ArchiveMeta a : sorted) { + if (cur == null) { + cur = startArchiveGroup(a); + } + else if (overlaps(cur, a)) { + // extend current component + cur.end = Math.max(cur.end, a.upperBound); + cur.count += a.messageCount; + cur.members.add(a); + } + else { + // finalize current, start a new one + groups.add(cur); + cur = startArchiveGroup(a); + } + } + if (cur != null) { + groups.add(cur); + } + return groups; + } + + private static ArchiveGroup startArchiveGroup(ArchiveMeta a) + { + ArchiveGroup group = new ArchiveGroup(); + group.begin = a.lowerBound; + group.end = a.upperBound; + group.count = a.messageCount; + group.members.add(a); + return group; + } + + private static boolean overlaps(ArchiveGroup cur, ArchiveMeta a) + { + return a.lowerBound <= cur.end && a.upperBound >= cur.begin; + } + + /** + * Determines the split type based on file path extension. + * + * @param splitPath the file path + * @return IR for .clp.zst files, ARCHIVE otherwise + */ + private static SplitType determineSplitType(String splitPath) + { + return splitPath.endsWith(".clp.zst") ? IR : ARCHIVE; + } + + /** + * Factory method for building split selection SQL queries. + * Exposed for testing purposes. + * + * @param tableName the Pinot table name + * @param filterSql the filter SQL expression + * @return the complete SQL query for selecting splits + */ + @VisibleForTesting + protected String buildSplitSelectionQuery(String tableName, String filterSql) + { + return format(SQL_SELECT_SPLITS_TEMPLATE, tableName, filterSql); + } + + /** + * Factory method for building split metadata SQL queries. + * Exposed for testing purposes. + * + * @param tableName the Pinot table name + * @param filterSql the filter SQL expression + * @param orderByColumn the column to order by + * @param orderDirection the order direction (ASC or DESC) + * @return the complete SQL query for selecting split metadata + */ + @VisibleForTesting + protected String buildSplitMetadataQuery(String tableName, String filterSql, String orderByColumn, String orderDirection) + { + return format(SQL_SELECT_SPLIT_META_TEMPLATE, tableName, filterSql, orderByColumn, orderDirection); + } + + private static List getQueryResult(URL url, String sql) + { + try { + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("POST"); + conn.setRequestProperty("Content-Type", "application/json"); + conn.setRequestProperty("Accept", "application/json"); + conn.setDoOutput(true); + conn.setConnectTimeout((int) SECONDS.toMillis(5)); + conn.setReadTimeout((int) SECONDS.toMillis(30)); + + log.info("Executing Pinot query: %s", sql); + ObjectMapper mapper = new ObjectMapper(); + String body = format("{\"sql\": %s }", mapper.writeValueAsString(sql)); + try (OutputStream os = conn.getOutputStream()) { + os.write(body.getBytes(StandardCharsets.UTF_8)); + } + + int code = conn.getResponseCode(); + InputStream is = (code >= 200 && code < 300) ? conn.getInputStream() : conn.getErrorStream(); + if (is == null) { + throw new IOException("Pinot HTTP " + code + " with empty body"); + } + + JsonNode root; + try (InputStream in = is) { + root = mapper.readTree(in); + } + JsonNode resultTable = root.get("resultTable"); + if (resultTable == null) { + throw new IllegalStateException("Pinot query response missing 'resultTable' field"); + } + JsonNode rows = resultTable.get("rows"); + if (rows == null) { + throw new IllegalStateException("Pinot query response missing 'rows' field in resultTable"); + } + ImmutableList.Builder resultBuilder = ImmutableList.builder(); + for (Iterator it = rows.elements(); it.hasNext(); ) { + JsonNode row = it.next(); + resultBuilder.add(row); + } + List results = resultBuilder.build(); + log.debug("Number of results: %s", results.size()); + return results; + } + catch (IOException e) { + log.error(e, "IO error executing Pinot query: %s", sql); + return Collections.emptyList(); + } + catch (Exception e) { + log.error(e, "Unexpected error executing Pinot query: %s", sql); + return Collections.emptyList(); + } + } + + /** + * Represents metadata of an archive, including its ID, timestamp bounds, and message count. + */ + private static final class ArchiveMeta + { + private final String id; + private final long lowerBound; + private final long upperBound; + private final long messageCount; + + ArchiveMeta(String id, long lowerBound, long upperBound, long messageCount) + { + this.id = requireNonNull(id, "id is null"); + if (lowerBound > upperBound) { + throw new IllegalArgumentException( + format("Invalid archive bounds: lowerBound (%d) > upperBound (%d)", lowerBound, upperBound)); + } + if (messageCount < 0) { + throw new IllegalArgumentException( + format("Invalid message count: %d (must be >= 0)", messageCount)); + } + this.lowerBound = lowerBound; + this.upperBound = upperBound; + this.messageCount = messageCount; + } + } + + /** + * Represents a group of overlapping archives treated as one logical unit. + */ + private static final class ArchiveGroup + { + long begin; + long end; + long count; + final List members = new ArrayList<>(); + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpUberPinotSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpUberPinotSplitProvider.java new file mode 100644 index 0000000000000..7b2dd8bfb6bef --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpUberPinotSplitProvider.java @@ -0,0 +1,123 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.ClpTableHandle; +import com.facebook.presto.spi.SchemaTableName; +import com.google.common.annotations.VisibleForTesting; + +import javax.inject.Inject; + +import java.net.MalformedURLException; +import java.net.URL; + +import static java.util.Objects.requireNonNull; + +/** + * Uber-specific implementation of CLP Pinot split provider. + *

+ * At Uber, Pinot is accessed through Neutrino, a cross-region routing and aggregation service + * that provides a unified interface for querying distributed Pinot clusters. This implementation + * customizes the SQL query endpoint URL to use Neutrino's global statements API instead of + * the standard Pinot query endpoint. + *

+ */ +public class ClpUberPinotSplitProvider + extends ClpPinotSplitProvider +{ + /** + * Constructs an Uber CLP Pinot split provider with the given configuration. + * + * @param config the CLP configuration + */ + @Inject + public ClpUberPinotSplitProvider(ClpConfig config) + { + super(config); + } + + /** + * Constructs the Neutrino SQL query endpoint URL for Uber's Pinot infrastructure. + *

+ * Instead of using Pinot's standard {@code /query/sql} endpoint, this method constructs + * a URL pointing to Neutrino's {@code /v1/globalStatements} endpoint, which provides + * cross-region query routing and aggregation capabilities. + *

+ * + * @param config the CLP configuration containing the base Neutrino service URL + * @return the Neutrino global statements endpoint URL + * @throws MalformedURLException if the constructed URL is invalid + */ + @Override + protected URL buildPinotSqlQueryEndpointUrl(ClpConfig config) throws MalformedURLException + { + return new URL(config.getMetadataDbUrl() + "/v1/globalStatements"); + } + + /** + * Infers the Uber-specific Pinot metadata table name from the CLP table handle. + *

+ * At Uber, Pinot tables are organized under a specific namespace hierarchy. + * All logging-related metadata tables are prefixed with {@code "rta.logging."} + * to identify them within Uber's multi-tenant Pinot infrastructure. This prefix + * represents: + *

    + *
  • rta: Real-Time Analytics platform namespace
  • + *
  • logging: The logging subsystem within RTA
  • + *
+ *

+ *

+ * Unlike the standard Pinot implementation where schemas can affect table naming, + * Uber's approach uses a flat namespace where all logging tables share the same + * prefix regardless of the schema being queried. + *

+ *

+ * Examples: + *

    + *
  • Schema: "default", Table: "logs" → Pinot table: "rta.logging.logs"
  • + *
  • Schema: "production", Table: "events" → Pinot table: "rta.logging.events"
  • + *
  • Schema: "staging", Table: "metrics" → Pinot table: "rta.logging.metrics"
  • + *
+ *

+ * + * @param tableHandle the CLP table handle containing schema and table information + * @return the fully-qualified Pinot metadata table name with Uber's namespace prefix + * @throws NullPointerException if tableHandle is null + */ + @Override + protected String inferMetadataTableName(ClpTableHandle tableHandle) + { + requireNonNull(tableHandle, "tableHandle is null"); + SchemaTableName schemaTableName = tableHandle.getSchemaTableName(); + + // Uber's Pinot tables use a fixed namespace prefix for all logging tables + // Format: rta.logging. + String tableName = schemaTableName.getTableName(); + return buildUberTableName(tableName); + } + + /** + * Factory method for building Uber-specific table names. + * Exposed for testing purposes. + * + * @param tableName the base table name + * @return the fully-qualified Uber Pinot table name + */ + @VisibleForTesting + protected String buildUberTableName(String tableName) + { + return String.format("rta.logging.%s", tableName); + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java index 31d24fd4df71c..4bec8a79c9eed 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java @@ -15,6 +15,7 @@ import com.facebook.presto.plugin.clp.ClpConfig; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.inject.Inject; @@ -85,6 +86,30 @@ public String remapSplitFilterPushDownExpression(String scope, String pushDownEx return remappedSql; } + @Override + public List remapColumnName(String scope, String columnName) + { + String[] splitScope = scope.split("\\."); + + Map mappings = new HashMap<>(getAllMappingsFromFilters(filterMap.get(splitScope[0]))); + + if (1 < splitScope.length) { + mappings.putAll(getAllMappingsFromFilters(filterMap.get(splitScope[0] + "." + splitScope[1]))); + } + + if (3 == splitScope.length) { + mappings.putAll(getAllMappingsFromFilters(filterMap.get(scope))); + } + + if (mappings.containsKey(columnName)) { + ClpMySqlCustomSplitFilterOptions.RangeMapping value = mappings.get(columnName); + return ImmutableList.of(value.lowerBound, value.upperBound); + } + else { + return ImmutableList.of(columnName); + } + } + @Override protected Class getCustomSplitFilterOptionsClass() { diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpPinotSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpPinotSplitFilterProvider.java new file mode 100644 index 0000000000000..3ffc76b5ae59d --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpPinotSplitFilterProvider.java @@ -0,0 +1,33 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.google.inject.Inject; + +/** + * Split filter provider for metadata databases implemented with Pinot. + *

+ * Currently uses the same implementation as MySQL. This class exists to allow + * for future Pinot-specific customizations if needed. + */ +public class ClpPinotSplitFilterProvider + extends ClpMySqlSplitFilterProvider +{ + @Inject + public ClpPinotSplitFilterProvider(ClpConfig config) + { + super(config); + } +} diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java index 0609843aaf22f..7f19a5296b801 100644 --- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java @@ -91,20 +91,36 @@ public ClpSplitFilterProvider(ClpConfig config) */ public abstract String remapSplitFilterPushDownExpression(String scope, String pushDownExpression); + /** + * Rewrites {@code columnName} to remap column names based on the {@code "customOptions"} for + * the given scope. + *

+ * {@code scope} follows the format {@code catalog[.schema][.table]}, and determines which + * column mappings to apply, since mappings from more specific scopes (e.g., table-level) + * override or supplement those from broader scopes (e.g., catalog-level). For each scope + * (catalog, schema, table), this method collects all mappings defined in + * {@code "customOptions"}. + * + * @param scope the scope of the column mapping + * @param columnName the column name to be remapped + * @return the remapped column names + */ + public abstract List remapColumnName(String scope, String columnName); + /** * Checks for the given table, if {@code splitFilterPushDownExpression} contains all required * fields. * * @param tableScopeSet the set of scopes of the tables that are being queried - * @param splitFilterPushDownExpression the expression to be checked + * @param pushDownVariables the set of variables being pushed down */ - public void checkContainsRequiredFilters(Set tableScopeSet, String splitFilterPushDownExpression) + public void checkContainsRequiredFilters(Set tableScopeSet, Set pushDownVariables) { boolean hasRequiredSplitFilterColumns = true; ImmutableList.Builder notFoundListBuilder = ImmutableList.builder(); for (String tableScope : tableScopeSet) { for (String columnName : getRequiredColumnNames(tableScope)) { - if (!splitFilterPushDownExpression.contains(columnName)) { + if (!pushDownVariables.contains(columnName)) { hasRequiredSplitFilterColumns = false; notFoundListBuilder.add(columnName); } diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpUberPinotSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpUberPinotSplitFilterProvider.java new file mode 100644 index 0000000000000..4be5585a3e18c --- /dev/null +++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpUberPinotSplitFilterProvider.java @@ -0,0 +1,122 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.google.inject.Inject; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static java.lang.String.format; + +/** + * Uber-specific split filter provider for Pinot metadata databases. + *

+ * This provider extends the standard Pinot filter provider and adds TEXT_MATCH + * transformations specific to Uber's Pinot infrastructure. It inherits all range + * mapping functionality from the parent class while adding support for transforming + * equality predicates into TEXT_MATCH expressions for efficient querying against + * Uber's merged text indices. + *

+ *

+ * Example transformations: + *

    + *
  • {@code "x" = 1} → {@code TEXT_MATCH("__mergedTextIndex", '/1:x/')}
  • + *
  • {@code "x" = 'abc'} → {@code TEXT_MATCH("__mergedTextIndex", '/abc:x/')}
  • + *
  • {@code "timestamp" >= 1234} → {@code end_timestamp >= 1234} (via inherited range mapping)
  • + *
+ *

+ */ +public class ClpUberPinotSplitFilterProvider + extends ClpPinotSplitFilterProvider +{ + private static final String MERGED_TEXT_INDEX_COLUMN = "__mergedTextIndex"; + + // Pattern to match quoted column = value expressions (both numeric and string values) + // Pre-compiled for performance + private static final Pattern EQUALITY_PATTERN = Pattern.compile( + "\"([^\"]+)\"\\s*=\\s*(?:(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)|'([^']*)')"); + + @Inject + public ClpUberPinotSplitFilterProvider(ClpConfig config) + { + super(config); + } + + /** + * Transforms SQL predicates into Uber Pinot-compatible TEXT_MATCH expressions. + *

+ * First applies inherited range mappings from the parent class, then transforms + * remaining equality predicates to TEXT_MATCH format for Uber's merged text indices. + *

+ * + * @param scope the filter's scope (catalog.schema.table) + * @param pushDownExpression the SQL expression to be transformed + * @return the transformed Uber Pinot-compatible expression + */ + @Override + public String remapSplitFilterPushDownExpression(String scope, String pushDownExpression) + { + // First, apply inherited range mappings from parent class + String remappedSql = super.remapSplitFilterPushDownExpression(scope, pushDownExpression); + + // Then, apply Uber-specific TEXT_MATCH transformations + // Range-mapped columns won't match our pattern since they've already been transformed + return transformToTextMatch(remappedSql); + } + + /** + * Transforms equality predicates to Pinot TEXT_MATCH expressions for Uber's infrastructure. + *

+ * Converts {@code "columnName" = value} to {@code TEXT_MATCH("__mergedTextIndex", '/value:columnName/')} + * This transformation enables efficient querying against Uber's merged text indices. + *

+ * + * @param expression the SQL expression to transform + * @return the expression with equality predicates transformed to TEXT_MATCH + */ + private String transformToTextMatch(String expression) + { + StringBuilder result = new StringBuilder(); + Matcher matcher = EQUALITY_PATTERN.matcher(expression); + int lastEnd = 0; + + while (matcher.find()) { + String columnName = matcher.group(1); + // Group 2 contains numeric value, Group 3 contains string value + String numericValue = matcher.group(2); + String stringValue = matcher.group(3); + String value = (numericValue != null) ? numericValue : stringValue; + + // Append text before the match + result.append(expression, lastEnd, matcher.start()); + + // Transform to TEXT_MATCH pattern: TEXT_MATCH("__mergedTextIndex", '/value:columnName/') + String textMatchExpr = format( + "TEXT_MATCH(\"%s\", '/%s:%s/')", + MERGED_TEXT_INDEX_COLUMN, + value, + columnName); + result.append(textMatchExpr); + + lastEnd = matcher.end(); + } + + // Append remaining text after last match + result.append(expression, lastEnd, expression.length()); + + return result.toString(); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java index d1d0ee6964c8e..ee207f9864004 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java @@ -37,6 +37,7 @@ import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_COLUMN_NAME; import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_SUFFIX; import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_COLUMN_ID; +import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_NUM_MESSAGES; import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_SUFFIX; import static java.lang.String.format; import static java.util.UUID.randomUUID; @@ -139,26 +140,30 @@ public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME); + } + + @Test + public void testMultiSchemaDiscovery() throws IOException + { + // Create a temporary YAML file with multiple schemas + File tempFile = File.createTempFile("clp-metadata-multi-", ".yaml"); + tempFile.deleteOnExit(); + + try (FileWriter writer = new FileWriter(tempFile)) { + writer.write("clp:\n"); + writer.write(" default:\n"); + writer.write(" logs_table: /path/to/default/logs.yaml\n"); + writer.write(" dev:\n"); + writer.write(" test_logs: /path/to/dev/test.yaml\n"); + writer.write(" prod:\n"); + writer.write(" production_logs: /path/to/prod/logs.yaml\n"); + } + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(tempFile.getAbsolutePath()); + + ClpMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 3); + Set schemaSet = ImmutableSet.copyOf(schemas); + assertTrue(schemaSet.contains("default")); + assertTrue(schemaSet.contains("dev")); + assertTrue(schemaSet.contains("prod")); + } + + @Test + public void testMissingYamlPathReturnsDefault() + { + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML); + // Note: not setting metadataYamlPath + + ClpMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME); + } + + @Test + public void testInvalidYamlPathReturnsDefault() + { + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath("/nonexistent/path/to/metadata.yaml"); + + ClpMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpTopN.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpTopN.java new file mode 100644 index 0000000000000..f7e746a21f347 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpTopN.java @@ -0,0 +1,440 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp; + +import com.facebook.airlift.log.Logger; +import com.facebook.presto.Session; +import com.facebook.presto.common.transaction.TransactionId; +import com.facebook.presto.common.type.RowType; +import com.facebook.presto.cost.PlanNodeStatsEstimate; +import com.facebook.presto.cost.StatsAndCosts; +import com.facebook.presto.cost.StatsProvider; +import com.facebook.presto.metadata.FunctionAndTypeManager; +import com.facebook.presto.metadata.Metadata; +import com.facebook.presto.plugin.clp.optimization.ClpComputePushDown; +import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec; +import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec.Order; +import com.facebook.presto.plugin.clp.split.ClpSplitProvider; +import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider; +import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.spi.VariableAllocator; +import com.facebook.presto.spi.WarningCollector; +import com.facebook.presto.spi.plan.FilterNode; +import com.facebook.presto.spi.plan.OutputNode; +import com.facebook.presto.spi.plan.PlanNode; +import com.facebook.presto.spi.plan.PlanNodeIdAllocator; +import com.facebook.presto.spi.plan.ProjectNode; +import com.facebook.presto.spi.plan.TableScanNode; +import com.facebook.presto.spi.plan.TopNNode; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.planner.Plan; +import com.facebook.presto.sql.planner.assertions.MatchResult; +import com.facebook.presto.sql.planner.assertions.Matcher; +import com.facebook.presto.sql.planner.assertions.PlanAssert; +import com.facebook.presto.sql.planner.assertions.PlanMatchPattern; +import com.facebook.presto.sql.planner.assertions.SymbolAliases; +import com.facebook.presto.sql.planner.plan.ExchangeNode; +import com.facebook.presto.sql.planner.plan.SimplePlanRewriter; +import com.facebook.presto.sql.relational.FunctionResolution; +import com.facebook.presto.sql.tree.SymbolReference; +import com.facebook.presto.testing.LocalQueryRunner; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.apache.commons.math3.util.Pair; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +import static com.facebook.presto.common.Utils.checkState; +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.metadata.FunctionExtractor.extractFunctions; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_PASSWORD; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_TABLE_PREFIX; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_URL_TEMPLATE; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_USER; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.getDbHandle; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.setupMetadata; +import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.setupSplit; +import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE; +import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Boolean; +import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.ClpString; +import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Float; +import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Integer; +import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.VarString; +import static com.facebook.presto.plugin.clp.optimization.ClpTopNSpec.Order.ASC; +import static com.facebook.presto.plugin.clp.optimization.ClpTopNSpec.Order.DESC; +import static com.facebook.presto.sql.planner.assertions.MatchResult.NO_MATCH; +import static com.facebook.presto.sql.planner.assertions.MatchResult.match; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.anyTree; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.node; +import static com.facebook.presto.testing.TestingSession.testSessionBuilder; +import static java.lang.String.format; +import static org.testng.Assert.assertEquals; + +@Test(singleThreaded = true) +public class TestClpTopN + extends TestClpQueryBase +{ + private final Session defaultSession = testSessionBuilder() + .setCatalog("clp") + .setSchema(ClpMetadata.DEFAULT_SCHEMA_NAME) + .build(); + + private ClpMetadataDbSetUp.DbHandle dbHandle; + ClpTableHandle table; + + private static final Logger log = Logger.get(TestClpTopN.class); + + private LocalQueryRunner localQueryRunner; + private FunctionAndTypeManager functionAndTypeManager; + private FunctionResolution functionResolution; + private ClpSplitProvider splitProvider; + private ClpSplitFilterProvider splitFilterProvider; + private PlanNodeIdAllocator planNodeIdAllocator; + private VariableAllocator variableAllocator; + + @BeforeMethod + public void setUp() + { + dbHandle = getDbHandle("topn_query_testdb"); + final String tableName = "test"; + final String tablePath = ARCHIVES_STORAGE_DIRECTORY_BASE + tableName; + table = new ClpTableHandle(new SchemaTableName("default", tableName), tablePath); + + setupMetadata(dbHandle, + ImmutableMap.of( + tableName, + ImmutableList.of( + new Pair<>("msg.timestamp", Integer), + new Pair<>("city.Name", ClpString), + new Pair<>("city.Region.Id", Integer), + new Pair<>("city.Region.Name", VarString), + new Pair<>("fare", Float), + new Pair<>("isHoliday", Boolean)))); + + splitProvider = setupSplit(dbHandle, + ImmutableMap.of( + tableName, + ImmutableList.of( + new ClpMetadataDbSetUp.ArchivesTableRow("0", 100, 0, 100), + new ClpMetadataDbSetUp.ArchivesTableRow("1", 100, 50, 150), + new ClpMetadataDbSetUp.ArchivesTableRow("2", 100, 100, 200), + new ClpMetadataDbSetUp.ArchivesTableRow("3", 100, 201, 300), + new ClpMetadataDbSetUp.ArchivesTableRow("4", 100, 301, 400)))); + + URL resource = getClass().getClassLoader().getResource("test-topn-split-filter.json"); + if (resource == null) { + log.error("test-topn-split-filter.json not found in resources"); + return; + } + + String filterConfigPath; + try { + filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + } + catch (URISyntaxException e) { + log.error("test-topn-split-filter.json not found in resources"); + return; + } + + localQueryRunner = new LocalQueryRunner(defaultSession); + localQueryRunner.createCatalog("clp", new ClpConnectorFactory(), ImmutableMap.of( + "clp.metadata-db-url", format(METADATA_DB_URL_TEMPLATE, dbHandle.getDbPath()), + "clp.metadata-db-user", METADATA_DB_USER, + "clp.metadata-db-password", METADATA_DB_PASSWORD, + "clp.metadata-table-prefix", METADATA_DB_TABLE_PREFIX)); + localQueryRunner.getMetadata().registerBuiltInFunctions(extractFunctions(new ClpPlugin().getFunctions())); + functionAndTypeManager = localQueryRunner.getMetadata().getFunctionAndTypeManager(); + functionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver()); + splitFilterProvider = new ClpMySqlSplitFilterProvider(new ClpConfig().setSplitFilterConfig(filterConfigPath)); + planNodeIdAllocator = new PlanNodeIdAllocator(); + variableAllocator = new VariableAllocator(); + } + + @AfterMethod + public void tearDown() + { + localQueryRunner.close(); + ClpMetadataDbSetUp.tearDown(dbHandle); + } + + @Test + public void test() + { + testTopNQueryPlanAndSplits( + "SELECT * FROM test WHERE msg.timestamp > 120 AND msg.timestamp < 240 ORDER BY msg.timestamp DESC LIMIT 100", + "(msg.timestamp > 120 AND msg.timestamp < 240)", + "(end_timestamp > 120 AND begin_timestamp < 240)", + 100, + DESC, + ImmutableSet.of("1", "2", "3")); + + testTopNQueryPlanAndSplits( + "SELECT * FROM test WHERE msg.timestamp > 120 AND msg.timestamp < 240 ORDER BY msg.timestamp ASC LIMIT 50", + "(msg.timestamp > 120 AND msg.timestamp < 240)", + "(end_timestamp > 120 AND begin_timestamp < 240)", + 50, + ASC, + ImmutableSet.of("1", "2", "3")); + + testTopNQueryPlanAndSplits( + "SELECT * FROM test WHERE msg.timestamp >= 180 AND msg.timestamp <= 260 ORDER BY msg.timestamp DESC LIMIT 100", + "(msg.timestamp >= 180 AND msg.timestamp <= 260)", + "(end_timestamp >= 180 AND begin_timestamp <= 260)", + 100, + DESC, + ImmutableSet.of("2", "3")); + + testTopNQueryPlanAndSplits( + "SELECT * FROM test WHERE msg.timestamp > 250 AND msg.timestamp < 290 ORDER BY msg.timestamp DESC LIMIT 10", + "(msg.timestamp > 250 AND msg.timestamp < 290)", + "(end_timestamp > 250 AND begin_timestamp < 290)", + 10, + DESC, + ImmutableSet.of("3")); + + testTopNQueryPlanAndSplits( + "SELECT * FROM test WHERE msg.timestamp > 1000 AND msg.timestamp < 1100 ORDER BY msg.timestamp DESC LIMIT 10", + "(msg.timestamp > 1000 AND msg.timestamp < 1100)", + "(end_timestamp > 1000 AND begin_timestamp < 1100)", + 10, + DESC, + ImmutableSet.of()); + + testTopNQueryPlanAndSplits( + "SELECT * FROM test WHERE msg.timestamp <= 300 ORDER BY msg.timestamp DESC LIMIT 1000", + "msg.timestamp <= 300", + "begin_timestamp <= 300", + 1000, + DESC, + ImmutableSet.of("0", "1", "2", "3")); + + testTopNQueryPlanAndSplits( + "SELECT * FROM test WHERE msg.timestamp <= 400 ORDER BY msg.timestamp DESC LIMIT 100", + "msg.timestamp <= 400", + "begin_timestamp <= 400", + 100, + DESC, + ImmutableSet.of("3", "4")); + } + + private void testTopNQueryPlanAndSplits(String sql, String kql, String metadataSql, long limit, Order order, Set splitIds) + { + TransactionId transactionId = localQueryRunner.getTransactionManager().beginTransaction(false); + Session session = testSessionBuilder().setCatalog("clp").setSchema("default").setTransactionId(transactionId).build(); + + Plan plan = localQueryRunner.createPlan( + session, + sql, + WarningCollector.NOOP); + ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider); + PlanNode optimizedPlan = optimizer.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); + PlanNode optimizedPlanWithUniqueId = freshenIds(optimizedPlan, new PlanNodeIdAllocator()); + + ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle( + table, + Optional.of(kql), + Optional.of(metadataSql), + true, + Optional.of(new ClpTopNSpec( + limit, + ImmutableList.of(new ClpTopNSpec.Ordering(ImmutableList.of("begin_timestamp", "end_timestamp"), order))))); + + PlanAssert.assertPlan( + session, + localQueryRunner.getMetadata(), + (node, sourceStats, lookup, s, types) -> PlanNodeStatsEstimate.unknown(), + new Plan(optimizedPlanWithUniqueId, plan.getTypes(), StatsAndCosts.empty()), + anyTree( + ClpTableScanMatcher.clpTableScanPattern( + clpTableLayoutHandle, + ImmutableSet.of( + city, + fare, + isHoliday, + new ClpColumnHandle( + "msg", + RowType.from(ImmutableList.of(new RowType.Field(Optional.of("timestamp"), BIGINT)))))))); + + assertEquals( + ImmutableSet.copyOf(splitProvider.listSplits(clpTableLayoutHandle)), + splitIds.stream() + .map(id -> new ClpSplit("/tmp/archives/test/" + id, ARCHIVE, Optional.of(kql))) + .collect(ImmutableSet.toImmutableSet())); + } + + /** + * Recursively rebuilds a query plan tree so that every {@link PlanNode} has a fresh, unique ID. + *

+ * This utility is mainly for testing, to avoid ID collisions that can occur when + * localQueryRunner.createPlan() and a custom optimizer each use separate + * {@link PlanNodeIdAllocator}s that start at the same seed, producing duplicate IDs. + * + * @param root the root of the plan + * @param idAlloc the plan node ID allocator + * @return the plan with a fresh, unique IDs. + */ + private static PlanNode freshenIds(PlanNode root, PlanNodeIdAllocator idAlloc) + { + return SimplePlanRewriter.rewriteWith(new SimplePlanRewriter() { + @Override + public PlanNode visitOutput(OutputNode node, RewriteContext ctx) + { + PlanNode src = ctx.rewrite(node.getSource(), null); + return new OutputNode( + node.getSourceLocation(), + idAlloc.getNextId(), + src, + node.getColumnNames(), + node.getOutputVariables()); + } + + @Override + public PlanNode visitExchange(ExchangeNode node, RewriteContext ctx) + { + List newSources = node.getSources().stream() + .map(s -> ctx.rewrite(s, null)) + .collect(com.google.common.collect.ImmutableList.toImmutableList()); + + return new ExchangeNode( + node.getSourceLocation(), + idAlloc.getNextId(), + node.getType(), + node.getScope(), + node.getPartitioningScheme(), + newSources, + node.getInputs(), + node.isEnsureSourceOrdering(), + node.getOrderingScheme()); + } + + @Override + public PlanNode visitProject(ProjectNode node, RewriteContext ctx) + { + PlanNode src = ctx.rewrite(node.getSource(), null); + return new ProjectNode(idAlloc.getNextId(), src, node.getAssignments()); + } + + @Override + public PlanNode visitFilter(FilterNode node, RewriteContext ctx) + { + PlanNode src = ctx.rewrite(node.getSource(), null); + return new FilterNode(node.getSourceLocation(), idAlloc.getNextId(), src, node.getPredicate()); + } + + @Override + public PlanNode visitTopN(TopNNode node, RewriteContext ctx) + { + PlanNode src = ctx.rewrite(node.getSource(), null); + return new TopNNode( + node.getSourceLocation(), + idAlloc.getNextId(), + src, + node.getCount(), + node.getOrderingScheme(), + node.getStep()); + } + + @Override + public PlanNode visitTableScan(TableScanNode node, RewriteContext ctx) + { + return new TableScanNode( + node.getSourceLocation(), + idAlloc.getNextId(), + node.getTable(), + node.getOutputVariables(), + node.getAssignments()); + } + + @Override + public PlanNode visitPlan(PlanNode node, RewriteContext ctx) + { + List newChildren = node.getSources().stream() + .map(ch -> ctx.rewrite(ch, null)) + .collect(com.google.common.collect.ImmutableList.toImmutableList()); + return node.replaceChildren(newChildren); + } + }, root, null); + } + + private static final class ClpTableScanMatcher + implements Matcher + { + private final ClpTableLayoutHandle expectedLayoutHandle; + private final Set expectedColumns; + + private ClpTableScanMatcher(ClpTableLayoutHandle expectedLayoutHandle, Set expectedColumns) + { + this.expectedLayoutHandle = expectedLayoutHandle; + this.expectedColumns = expectedColumns; + } + + static PlanMatchPattern clpTableScanPattern(ClpTableLayoutHandle layoutHandle, Set columns) + { + return node(TableScanNode.class).with(new ClpTableScanMatcher(layoutHandle, columns)); + } + + @Override + public boolean shapeMatches(PlanNode node) + { + return node instanceof TableScanNode; + } + + @Override + public MatchResult detailMatches( + PlanNode node, + StatsProvider stats, + Session session, + Metadata metadata, + SymbolAliases symbolAliases) + { + checkState(shapeMatches(node), "Plan testing framework error: shapeMatches returned false"); + TableScanNode tableScanNode = (TableScanNode) node; + ClpTableLayoutHandle actualLayoutHandle = (ClpTableLayoutHandle) tableScanNode.getTable().getLayout().get(); + + // Check layout handle + if (!expectedLayoutHandle.equals(actualLayoutHandle)) { + return NO_MATCH; + } + + // Check assignments contain expected columns + Map actualAssignments = tableScanNode.getAssignments(); + Set actualColumns = new HashSet<>(actualAssignments.values()); + + if (!expectedColumns.equals(actualColumns)) { + return NO_MATCH; + } + + SymbolAliases.Builder aliasesBuilder = SymbolAliases.builder(); + for (VariableReferenceExpression variable : tableScanNode.getOutputVariables()) { + aliasesBuilder.put(variable.getName(), new SymbolReference(variable.getName())); + } + + return match(aliasesBuilder.build()); + } + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java index a6b6bc118ffee..3b83d1c95d10b 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java @@ -274,7 +274,7 @@ public void testClpGetJsonString() Plan plan = localQueryRunner.createPlan( session, - "SELECT CLP_GET_JSON_STRING() from test WHERE CLP_GET_BIGINT('user_id') = 0", + "SELECT CLP_GET_JSON_STRING() from test WHERE CLP_GET_BIGINT('user_id') = 0 ORDER BY fare", WarningCollector.NOOP); ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager); PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator); @@ -294,6 +294,7 @@ public void testClpGetJsonString() ClpTableScanMatcher.clpTableScanPattern( new ClpTableLayoutHandle(table, Optional.of("user_id: 0"), Optional.empty()), ImmutableSet.of( + fare, new ClpColumnHandle("user_id", BIGINT), new ClpColumnHandle(JSON_STRING_PLACEHOLDER, VARCHAR)))))); } diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpYamlMetadata.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpYamlMetadata.java new file mode 100644 index 0000000000000..f91174851d7e0 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpYamlMetadata.java @@ -0,0 +1,250 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp; + +import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider; +import com.facebook.presto.plugin.clp.metadata.ClpYamlMetadataProvider; +import com.facebook.presto.plugin.clp.split.ClpPinotSplitProvider; +import com.facebook.presto.plugin.clp.split.ClpSplitProvider; +import com.facebook.presto.spi.ColumnMetadata; +import com.facebook.presto.spi.ConnectorTableMetadata; +import com.facebook.presto.spi.SchemaTableName; +import com.google.common.collect.ImmutableSet; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +import java.util.HashSet; +import java.util.List; +import java.util.Optional; + +import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType.YAML; +import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; +import static com.facebook.presto.testing.TestingConnectorSession.SESSION; +import static org.testng.Assert.assertEquals; + +public class TestClpYamlMetadata +{ + private static final String PINOT_BROKER_URL = "http://localhost:8099"; + private static final String TABLE_NAME = "cockroachdb"; + private static final String SCHEMA1_NAME = "schema1"; + private static final String SCHEMA2_NAME = "schema2"; + private static final String ORDERS_TABLE_NAME = "orders"; + private static final String USERS_TABLE_NAME = "users"; + private ClpMetadata metadata; + private ClpSplitProvider clpSplitProvider; + + @BeforeTest + public void setUp() throws Exception + { + // Load test resources from classpath + // ClpYamlMetadataProvider now supports relative paths, so we can use the resource file directly + java.net.URL tablesSchemaResource = getClass().getClassLoader().getResource("test-tables-schema.yaml"); + + if (tablesSchemaResource == null) { + throw new IllegalStateException("test-tables-schema.yaml not found in test resources"); + } + + // Get the absolute path to test-tables-schema.yaml + // Relative paths in the YAML will be resolved relative to this file's parent directory + String tablesSchemaPath = java.nio.file.Paths.get(tablesSchemaResource.toURI()).toString(); + + ClpConfig config = new ClpConfig() + .setPolymorphicTypeEnabled(true) + .setMetadataDbUrl(PINOT_BROKER_URL) + .setMetadataProviderType(YAML) + .setMetadataYamlPath(tablesSchemaPath); + ClpMetadataProvider metadataProvider = new ClpYamlMetadataProvider(config); + metadata = new ClpMetadata(config, metadataProvider); + clpSplitProvider = new ClpPinotSplitProvider(config); + } + + @Test + public void testListSchemaNames() + { + List schemaNames = metadata.listSchemaNames(SESSION); + assertEquals(new HashSet<>(schemaNames), ImmutableSet.of(DEFAULT_SCHEMA_NAME, SCHEMA1_NAME, SCHEMA2_NAME)); + } + + @Test + public void testListTables() + { + // When no schema is specified, listTables defaults to DEFAULT_SCHEMA_NAME + ImmutableSet defaultTables = ImmutableSet.of( + new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME)); + assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.empty())), defaultTables); + } + + @Test + public void testListTablesForSpecificSchema() + { + // Test listing tables for schema1 + ImmutableSet schema1Tables = ImmutableSet.of( + new SchemaTableName(SCHEMA1_NAME, ORDERS_TABLE_NAME), + new SchemaTableName(SCHEMA1_NAME, USERS_TABLE_NAME)); + assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.of(SCHEMA1_NAME))), schema1Tables); + + // Test listing tables for schema2 + ImmutableSet schema2Tables = ImmutableSet.of( + new SchemaTableName(SCHEMA2_NAME, ORDERS_TABLE_NAME)); + assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.of(SCHEMA2_NAME))), schema2Tables); + + // Test listing tables for default schema + ImmutableSet defaultTables = ImmutableSet.of( + new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME)); + assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.of(DEFAULT_SCHEMA_NAME))), defaultTables); + } + + @Test + public void testListSplits() + { + ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle( + new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME), ""), + Optional.empty(), + Optional.empty()); + List result = clpSplitProvider.listSplits(layoutHandle); + System.out.println("Hello world"); + } + + @Test + public void testGetTableMetadata() + { + ClpTableHandle clpTableHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME)); + ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle); +// ImmutableSet columnMetadata = ImmutableSet.builder() +// .add(ColumnMetadata.builder() +// .setName("a_bigint") +// .setType(BIGINT) +// .setNullable(true) +// .build()) +// .add(ColumnMetadata.builder() +// .setName("a_varchar") +// .setType(VARCHAR) +// .setNullable(true) +// .build()) +// .add(ColumnMetadata.builder() +// .setName("b_double") +// .setType(DOUBLE) +// .setNullable(true) +// .build()) +// .add(ColumnMetadata.builder() +// .setName("b_varchar") +// .setType(VARCHAR) +// .setNullable(true) +// .build()) +// .add(ColumnMetadata.builder() +// .setName("c") +// .setType(RowType.from(ImmutableList.of( +// RowType.field("d", BOOLEAN), +// RowType.field("e", VARCHAR)))) +// .setNullable(true) +// .build()) +// .add(ColumnMetadata.builder() +// .setName("f") +// .setType(RowType.from(ImmutableList.of( +// RowType.field("g", +// RowType.from(ImmutableList.of( +// RowType.field("h", new ArrayType(VARCHAR)))))))) +// .setNullable(true) +// .build()) +// .build(); +// assertEquals(columnMetadata, ImmutableSet.copyOf(tableMetadata.getColumns())); + ImmutableSet actual = ImmutableSet.copyOf(tableMetadata.getColumns()); + System.out.println("Hello world"); + } + + @Test + public void testGetTableHandleForDuplicateTableNames() + { + // Test that we can get distinct table handles for tables with the same name in different schemas + ClpTableHandle schema1OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA1_NAME, ORDERS_TABLE_NAME)); + ClpTableHandle schema2OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA2_NAME, ORDERS_TABLE_NAME)); + + // Verify both handles are not null + assertEquals(schema1OrdersHandle != null, true); + assertEquals(schema2OrdersHandle != null, true); + + // Verify the schema names are correctly set + assertEquals(schema1OrdersHandle.getSchemaTableName().getSchemaName(), SCHEMA1_NAME); + assertEquals(schema2OrdersHandle.getSchemaTableName().getSchemaName(), SCHEMA2_NAME); + + // Verify the table names are the same + assertEquals(schema1OrdersHandle.getSchemaTableName().getTableName(), ORDERS_TABLE_NAME); + assertEquals(schema2OrdersHandle.getSchemaTableName().getTableName(), ORDERS_TABLE_NAME); + } + + @Test + public void testGetTableMetadataForDuplicateTableNames() + { + // Get table handles for orders tables in both schemas + ClpTableHandle schema1OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA1_NAME, ORDERS_TABLE_NAME)); + ClpTableHandle schema2OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA2_NAME, ORDERS_TABLE_NAME)); + + // Get metadata for both tables + ConnectorTableMetadata schema1Metadata = metadata.getTableMetadata(SESSION, schema1OrdersHandle); + ConnectorTableMetadata schema2Metadata = metadata.getTableMetadata(SESSION, schema2OrdersHandle); + + // Extract column names from both tables + ImmutableSet schema1Columns = schema1Metadata.getColumns().stream() + .map(ColumnMetadata::getName) + .collect(ImmutableSet.toImmutableSet()); + ImmutableSet schema2Columns = schema2Metadata.getColumns().stream() + .map(ColumnMetadata::getName) + .collect(ImmutableSet.toImmutableSet()); + + // Verify schema1.orders has the expected columns (from test-orders-schema1.yaml) + ImmutableSet expectedSchema1Columns = ImmutableSet.of( + "order_id", "customer_id", "product_name", "quantity", "price"); + assertEquals(schema1Columns, expectedSchema1Columns); + + // Verify schema2.orders has the expected columns (from test-orders-schema2.yaml) + ImmutableSet expectedSchema2Columns = ImmutableSet.of( + "order_id", "vendor_id", "item_description", "total_amount", "is_paid", "shipping_address"); + assertEquals(schema2Columns, expectedSchema2Columns); + + // Verify that the two tables have different schemas (different columns) + assertEquals(schema1Columns.equals(schema2Columns), false); + } + + @Test + public void testGetTableMetadataForAllSchemas() + { + // Test default.cockroachdb + ClpTableHandle defaultTableHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME)); + ConnectorTableMetadata defaultMetadata = metadata.getTableMetadata(SESSION, defaultTableHandle); + assertEquals(defaultMetadata != null, true); + assertEquals(defaultMetadata.getTable().getSchemaName(), DEFAULT_SCHEMA_NAME); + assertEquals(defaultMetadata.getTable().getTableName(), TABLE_NAME); + + // Test schema1.orders + ClpTableHandle schema1OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA1_NAME, ORDERS_TABLE_NAME)); + ConnectorTableMetadata schema1OrdersMetadata = metadata.getTableMetadata(SESSION, schema1OrdersHandle); + assertEquals(schema1OrdersMetadata != null, true); + assertEquals(schema1OrdersMetadata.getTable().getSchemaName(), SCHEMA1_NAME); + assertEquals(schema1OrdersMetadata.getTable().getTableName(), ORDERS_TABLE_NAME); + + // Test schema1.users + ClpTableHandle schema1UsersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA1_NAME, USERS_TABLE_NAME)); + ConnectorTableMetadata schema1UsersMetadata = metadata.getTableMetadata(SESSION, schema1UsersHandle); + assertEquals(schema1UsersMetadata != null, true); + assertEquals(schema1UsersMetadata.getTable().getSchemaName(), SCHEMA1_NAME); + assertEquals(schema1UsersMetadata.getTable().getTableName(), USERS_TABLE_NAME); + + // Test schema2.orders + ClpTableHandle schema2OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA2_NAME, ORDERS_TABLE_NAME)); + ConnectorTableMetadata schema2OrdersMetadata = metadata.getTableMetadata(SESSION, schema2OrdersHandle); + assertEquals(schema2OrdersMetadata != null, true); + assertEquals(schema2OrdersMetadata.getTable().getSchemaName(), SCHEMA2_NAME); + assertEquals(schema2OrdersMetadata.getTable().getTableName(), ORDERS_TABLE_NAME); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/metadata/TestClpYamlMetadataProvider.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/metadata/TestClpYamlMetadataProvider.java new file mode 100644 index 0000000000000..7005f7dcce06a --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/metadata/TestClpYamlMetadataProvider.java @@ -0,0 +1,345 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.metadata; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.ClpTableHandle; +import com.google.common.collect.ImmutableSet; +import org.testng.annotations.AfterClass; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType.YAML; +import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestClpYamlMetadataProvider +{ + private final List tempFiles = new ArrayList<>(); + + @AfterClass + public void cleanup() + { + // Clean up temporary files + for (File file : tempFiles) { + if (file.exists()) { + file.delete(); + } + } + } + + /** + * Test that listSchemaNames returns only the default schema when YAML has single schema + */ + @Test + public void testListSchemaNamesSingleSchema() throws IOException + { + File metadataFile = createTempYamlFile( + "clp:\n" + + " default:\n" + + " table1: /path/to/table1.yaml\n" + + " table2: /path/to/table2.yaml\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME); + } + + /** + * Test that listSchemaNames discovers multiple schemas from YAML + */ + @Test + public void testListSchemaNamesMultipleSchemas() throws IOException + { + File metadataFile = createTempYamlFile( + "clp:\n" + + " default:\n" + + " logs: /path/to/default/logs.yaml\n" + + " dev:\n" + + " test_logs: /path/to/dev/logs.yaml\n" + + " staging:\n" + + " staging_logs: /path/to/staging/logs.yaml\n" + + " prod:\n" + + " production_logs: /path/to/prod/logs.yaml\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 4); + Set schemaSet = ImmutableSet.copyOf(schemas); + assertTrue(schemaSet.contains("default")); + assertTrue(schemaSet.contains("dev")); + assertTrue(schemaSet.contains("staging")); + assertTrue(schemaSet.contains("prod")); + } + + /** + * Test that listSchemaNames handles missing YAML path gracefully + */ + @Test + public void testListSchemaNamesNullPath() + { + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML); + // Note: not setting metadataYamlPath + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME); + } + + /** + * Test that listSchemaNames handles nonexistent file gracefully + */ + @Test + public void testListSchemaNamesNonexistentFile() + { + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath("/nonexistent/path/metadata.yaml"); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME); + } + + /** + * Test that listSchemaNames handles malformed YAML gracefully + */ + @Test + public void testListSchemaNamesMalformedYaml() throws IOException + { + File metadataFile = createTempYamlFile( + "this is not\n" + + " valid: yaml: content\n" + + " - with random structure\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME); + } + + /** + * Test that listSchemaNames handles YAML without catalog field + */ + @Test + public void testListSchemaNamesNoCatalogField() throws IOException + { + File metadataFile = createTempYamlFile( + "some_other_catalog:\n" + + " default:\n" + + " table1: /path/to/table1.yaml\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + // Should fall back to default schema on error + assertEquals(schemas.size(), 1); + assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME); + } + + /** + * Test that listTableHandles returns correct tables for a schema + */ + @Test + public void testListTableHandles() throws IOException + { + // Create schema YAML files + File table1Schema = createTempYamlFile("column1: 1\ncolumn2: 2\n"); + File table2Schema = createTempYamlFile("field1: 3\nfield2: 4\n"); + + File metadataFile = createTempYamlFile( + "clp:\n" + + " default:\n" + + " table1: " + table1Schema.getAbsolutePath() + "\n" + + " table2: " + table2Schema.getAbsolutePath() + "\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List tables = provider.listTableHandles(DEFAULT_SCHEMA_NAME); + + assertEquals(tables.size(), 2); + Set tableNames = ImmutableSet.of( + tables.get(0).getSchemaTableName().getTableName(), + tables.get(1).getSchemaTableName().getTableName()); + assertTrue(tableNames.contains("table1")); + assertTrue(tableNames.contains("table2")); + } + + /** + * Test that listTableHandles returns correct tables for multiple schemas + */ + @Test + public void testListTableHandlesMultipleSchemas() throws IOException + { + File devTable = createTempYamlFile("col: 1\n"); + File prodTable = createTempYamlFile("col: 2\n"); + + File metadataFile = createTempYamlFile( + "clp:\n" + + " dev:\n" + + " dev_logs: " + devTable.getAbsolutePath() + "\n" + + " prod:\n" + + " prod_logs: " + prodTable.getAbsolutePath() + "\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + + // Test dev schema + List devTables = provider.listTableHandles("dev"); + assertEquals(devTables.size(), 1); + assertEquals(devTables.get(0).getSchemaTableName().getTableName(), "dev_logs"); + assertEquals(devTables.get(0).getSchemaTableName().getSchemaName(), "dev"); + + // Test prod schema + List prodTables = provider.listTableHandles("prod"); + assertEquals(prodTables.size(), 1); + assertEquals(prodTables.get(0).getSchemaTableName().getTableName(), "prod_logs"); + assertEquals(prodTables.get(0).getSchemaTableName().getSchemaName(), "prod"); + } + + /** + * Test that schema names are returned in consistent order + */ + @Test + public void testSchemaNameConsistency() throws IOException + { + File metadataFile = createTempYamlFile( + "clp:\n" + + " schema_a:\n" + + " table: /path/a.yaml\n" + + " schema_b:\n" + + " table: /path/b.yaml\n" + + " schema_c:\n" + + " table: /path/c.yaml\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + + // Call multiple times to verify consistency + List schemas1 = provider.listSchemaNames(); + List schemas2 = provider.listSchemaNames(); + List schemas3 = provider.listSchemaNames(); + + assertEquals(schemas1, schemas2); + assertEquals(schemas2, schemas3); + } + + /** + * Test empty schema (no tables) + */ + @Test + public void testEmptySchema() throws IOException + { + File metadataFile = createTempYamlFile( + "clp:\n" + + " empty_schema:\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertTrue(schemas.contains("empty_schema")); + + List tables = provider.listTableHandles("empty_schema"); + assertTrue(tables.isEmpty()); + } + + /** + * Test that schemas with special characters in names are handled + */ + @Test + public void testSchemaWithSpecialCharacters() throws IOException + { + File metadataFile = createTempYamlFile( + "clp:\n" + + " schema_with_underscores:\n" + + " table: /path/table.yaml\n" + + " schema-with-dashes:\n" + + " table: /path/table2.yaml\n"); + + ClpConfig config = new ClpConfig() + .setMetadataProviderType(YAML) + .setMetadataYamlPath(metadataFile.getAbsolutePath()); + + ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config); + List schemas = provider.listSchemaNames(); + + assertEquals(schemas.size(), 2); + Set schemaSet = ImmutableSet.copyOf(schemas); + assertTrue(schemaSet.contains("schema_with_underscores")); + assertTrue(schemaSet.contains("schema-with-dashes")); + } + + /** + * Helper method to create temporary YAML files for testing + */ + private File createTempYamlFile(String content) throws IOException + { + File tempFile = Files.createTempFile("clp-test-", ".yaml").toFile(); + tempFile.deleteOnExit(); + tempFiles.add(tempFile); + + try (FileWriter writer = new FileWriter(tempFile)) { + writer.write(content); + } + + return tempFile; + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/optimization/TestClpFilterToKqlConverter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/optimization/TestClpFilterToKqlConverter.java new file mode 100644 index 0000000000000..6eff28148ca29 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/optimization/TestClpFilterToKqlConverter.java @@ -0,0 +1,141 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.optimization; + +import com.facebook.presto.common.function.OperatorType; +import com.facebook.presto.plugin.clp.ClpColumnHandle; +import com.facebook.presto.plugin.clp.TestClpQueryBase; +import com.facebook.presto.spi.ColumnHandle; +import com.facebook.presto.spi.relation.CallExpression; +import com.facebook.presto.spi.relation.ConstantExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import io.airlift.slice.Slices; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +import static com.facebook.presto.common.type.BooleanType.BOOLEAN; +import static com.facebook.presto.common.type.IntegerType.INTEGER; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +/** + * Basic tests for ClpFilterToKqlConverter focusing on metadata SQL generation + * with string and numeric literals. + */ +@Test(singleThreaded = true) +public class TestClpFilterToKqlConverter + extends TestClpQueryBase +{ + private ClpFilterToKqlConverter converter; + private Map assignments; + private Set metadataFilterColumns; + + @BeforeMethod + public void setUp() + { + assignments = new HashMap<>(); + metadataFilterColumns = ImmutableSet.of("hostname", "status_code"); + converter = new ClpFilterToKqlConverter( + standardFunctionResolution, + functionAndTypeManager, + assignments, + metadataFilterColumns); + } + + /** + * Test string literal equality with metadata SQL generation. + * This is the main fix - ensuring string literals are handled correctly. + */ + @Test + public void testStringLiteralWithMetadataSql() + { + // Setup + VariableReferenceExpression hostnameVar = new VariableReferenceExpression( + Optional.empty(), "hostname", VARCHAR); + ClpColumnHandle hostnameColumn = new ClpColumnHandle("hostname", "hostname", VARCHAR); + assignments.put(hostnameVar, hostnameColumn); + + // Test: hostname = 'abc' + ConstantExpression stringLiteral = new ConstantExpression(Slices.utf8Slice("abc"), VARCHAR); + CallExpression equalCall = new CallExpression( + Optional.empty(), + "equal", + standardFunctionResolution.comparisonFunction(OperatorType.EQUAL, VARCHAR, VARCHAR), + BOOLEAN, + ImmutableList.of(hostnameVar, stringLiteral)); + + ClpExpression result = equalCall.accept(converter, null); + + // Verify + assertTrue(result.getPushDownExpression().isPresent()); + assertEquals(result.getPushDownExpression().get(), "hostname: \"abc\""); + assertTrue(result.getMetadataSqlQuery().isPresent()); + assertEquals(result.getMetadataSqlQuery().get(), "\"hostname\" = 'abc'"); + } + + /** + * Test numeric literal equality with metadata SQL generation. + */ + @Test + public void testNumericLiteralWithMetadataSql() + { + // Setup + VariableReferenceExpression statusCodeVar = new VariableReferenceExpression( + Optional.empty(), "status_code", INTEGER); + ClpColumnHandle statusCodeColumn = new ClpColumnHandle("status_code", "status_code", INTEGER); + assignments.put(statusCodeVar, statusCodeColumn); + + // Test: status_code = 200 + ConstantExpression numericLiteral = new ConstantExpression(200L, INTEGER); + CallExpression equalCall = new CallExpression( + Optional.empty(), + "equal", + standardFunctionResolution.comparisonFunction(OperatorType.EQUAL, INTEGER, INTEGER), + BOOLEAN, + ImmutableList.of(statusCodeVar, numericLiteral)); + + ClpExpression result = equalCall.accept(converter, null); + + // Verify + assertTrue(result.getPushDownExpression().isPresent()); + assertEquals(result.getPushDownExpression().get(), "status_code: 200"); + assertTrue(result.getMetadataSqlQuery().isPresent()); + assertEquals(result.getMetadataSqlQuery().get(), "\"status_code\" = 200"); + } + + /** + * Test escaping special characters in KQL string values. + */ + @Test + public void testEscapeKqlSpecialChars() + { + assertEquals( + ClpFilterToKqlConverter.escapeKqlSpecialCharsForStringValue("path\\to\\file"), + "path\\\\to\\\\file"); + assertEquals( + ClpFilterToKqlConverter.escapeKqlSpecialCharsForStringValue("file*.txt"), + "file\\*.txt"); + assertEquals( + ClpFilterToKqlConverter.escapeKqlSpecialCharsForStringValue("normal_string"), + "normal_string"); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/TestClpUberPinotSplitProvider.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/TestClpUberPinotSplitProvider.java new file mode 100644 index 0000000000000..4646c8d5f0f6b --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/TestClpUberPinotSplitProvider.java @@ -0,0 +1,249 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.facebook.presto.plugin.clp.ClpTableHandle; +import com.facebook.presto.spi.SchemaTableName; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.lang.reflect.Method; +import java.net.MalformedURLException; +import java.net.URL; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +/** + * Unit tests for ClpUberPinotSplitProvider. + * Tests Uber-specific customizations including Neutrino endpoint URL construction + * and RTA table name prefixing. + */ +@Test(singleThreaded = true) +public class TestClpUberPinotSplitProvider +{ + private ClpUberPinotSplitProvider splitProvider; + private ClpConfig config; + + @BeforeMethod + public void setUp() + { + config = new ClpConfig(); + config.setMetadataDbUrl("https://neutrino.uber.com"); + config.setSplitProviderType(ClpConfig.SplitProviderType.PINOT_UBER); + splitProvider = new ClpUberPinotSplitProvider(config); + } + + /** + * Test that the Neutrino endpoint URL is correctly constructed. + */ + @Test + public void testBuildPinotSqlQueryEndpointUrl() throws Exception + { + // Use reflection to access the protected method + Method method = ClpUberPinotSplitProvider.class.getDeclaredMethod("buildPinotSqlQueryEndpointUrl", ClpConfig.class); + method.setAccessible(true); + + URL result = (URL) method.invoke(splitProvider, config); + + assertNotNull(result); + assertEquals(result.toString(), "https://neutrino.uber.com/v1/globalStatements"); + assertEquals(result.getProtocol(), "https"); + assertEquals(result.getHost(), "neutrino.uber.com"); + assertEquals(result.getPath(), "/v1/globalStatements"); + } + + /** + * Test URL construction with different base URLs. + */ + @Test + public void testBuildPinotSqlQueryEndpointUrlVariations() throws Exception + { + Method method = ClpUberPinotSplitProvider.class.getDeclaredMethod("buildPinotSqlQueryEndpointUrl", ClpConfig.class); + method.setAccessible(true); + + // Test with trailing slash + config.setMetadataDbUrl("https://neutrino.uber.com/"); + URL result = (URL) method.invoke(splitProvider, config); + assertEquals(result.toString(), "https://neutrino.uber.com//v1/globalStatements"); + + // Test without protocol (should work as URL constructor handles it) + config.setMetadataDbUrl("http://neutrino-dev.uber.com"); + result = (URL) method.invoke(splitProvider, config); + assertEquals(result.toString(), "http://neutrino-dev.uber.com/v1/globalStatements"); + + // Test with port + config.setMetadataDbUrl("https://neutrino.uber.com:8080"); + result = (URL) method.invoke(splitProvider, config); + assertEquals(result.toString(), "https://neutrino.uber.com:8080/v1/globalStatements"); + } + + /** + * Test that invalid URLs throw MalformedURLException. + */ + @Test + public void testBuildPinotSqlQueryEndpointUrlInvalid() throws Exception + { + Method method = ClpUberPinotSplitProvider.class.getDeclaredMethod("buildPinotSqlQueryEndpointUrl", ClpConfig.class); + method.setAccessible(true); + + config.setMetadataDbUrl("not a valid url"); + try { + method.invoke(splitProvider, config); + fail("Expected MalformedURLException"); + } + catch (Exception e) { + assertTrue(e.getCause() instanceof MalformedURLException); + } + } + + /** + * Test that table names are correctly prefixed with "rta.logging." + */ + @Test + public void testInferMetadataTableName() + { + SchemaTableName schemaTableName = new SchemaTableName("default", "logs"); + ClpTableHandle tableHandle = new ClpTableHandle(schemaTableName, "test"); + + String result = splitProvider.inferMetadataTableName(tableHandle); + + assertEquals(result, "rta.logging.logs"); + } + + /** + * Test table name inference with different schemas. + * Verifies that schema name doesn't affect the output (flat namespace). + */ + @Test + public void testInferMetadataTableNameDifferentSchemas() + { + // Test with default schema + SchemaTableName schemaTableName1 = new SchemaTableName("default", "events"); + ClpTableHandle tableHandle1 = new ClpTableHandle(schemaTableName1, "test"); + assertEquals(splitProvider.inferMetadataTableName(tableHandle1), "rta.logging.events"); + + // Test with production schema - should produce same result + SchemaTableName schemaTableName2 = new SchemaTableName("production", "events"); + ClpTableHandle tableHandle2 = new ClpTableHandle(schemaTableName2, "test"); + assertEquals(splitProvider.inferMetadataTableName(tableHandle2), "rta.logging.events"); + + // Test with staging schema + SchemaTableName schemaTableName3 = new SchemaTableName("staging", "metrics"); + ClpTableHandle tableHandle3 = new ClpTableHandle(schemaTableName3, "test"); + assertEquals(splitProvider.inferMetadataTableName(tableHandle3), "rta.logging.metrics"); + } + + /** + * Test table name inference with special characters. + */ + @Test + public void testInferMetadataTableNameSpecialCharacters() + { + // Test with underscore + SchemaTableName schemaTableName1 = new SchemaTableName("default", "user_logs"); + ClpTableHandle tableHandle1 = new ClpTableHandle(schemaTableName1, "test"); + assertEquals(splitProvider.inferMetadataTableName(tableHandle1), "rta.logging.user_logs"); + + // Test with hyphen + SchemaTableName schemaTableName2 = new SchemaTableName("default", "app-logs"); + ClpTableHandle tableHandle2 = new ClpTableHandle(schemaTableName2, "test"); + assertEquals(splitProvider.inferMetadataTableName(tableHandle2), "rta.logging.app-logs"); + + // Test with numbers + SchemaTableName schemaTableName3 = new SchemaTableName("default", "logs2024"); + ClpTableHandle tableHandle3 = new ClpTableHandle(schemaTableName3, "test"); + assertEquals(splitProvider.inferMetadataTableName(tableHandle3), "rta.logging.logs2024"); + } + + /** + * Test that null table handle throws NullPointerException. + */ + @Test(expectedExceptions = NullPointerException.class, + expectedExceptionsMessageRegExp = "tableHandle is null") + public void testInferMetadataTableNameNull() + { + splitProvider.inferMetadataTableName(null); + } + + /** + * Test the factory method for building Uber table names. + */ + @Test + public void testBuildUberTableName() + { + assertEquals(splitProvider.buildUberTableName("logs"), "rta.logging.logs"); + assertEquals(splitProvider.buildUberTableName("events"), "rta.logging.events"); + assertEquals(splitProvider.buildUberTableName("metrics"), "rta.logging.metrics"); + assertEquals(splitProvider.buildUberTableName("user_activity"), "rta.logging.user_activity"); + assertEquals(splitProvider.buildUberTableName("app-logs"), "rta.logging.app-logs"); + } + + /** + * Test that the split provider is correctly instantiated with configuration. + */ + @Test + public void testConstructor() + { + assertNotNull(splitProvider); + + // Verify it's an instance of the parent class + assertTrue(splitProvider instanceof ClpPinotSplitProvider); + assertTrue(splitProvider instanceof ClpSplitProvider); + } + + /** + * Test SQL query building methods inherited from parent. + */ + @Test + public void testInheritedSqlQueryMethods() + { + // Test buildSplitSelectionQuery (inherited from parent) + String query = splitProvider.buildSplitSelectionQuery("rta.logging.logs", "status = 200"); + assertTrue(query.contains("rta.logging.logs")); + assertTrue(query.contains("status = 200")); + assertTrue(query.contains("SELECT")); + assertTrue(query.contains("tpath")); + + // Test buildSplitMetadataQuery (inherited from parent) + String metaQuery = splitProvider.buildSplitMetadataQuery("rta.logging.events", "timestamp > 1000", "timestamp", "DESC"); + assertTrue(metaQuery.contains("rta.logging.events")); + assertTrue(metaQuery.contains("timestamp > 1000")); + assertTrue(metaQuery.contains("ORDER BY timestamp DESC")); + assertTrue(metaQuery.contains("creationtime")); + assertTrue(metaQuery.contains("lastmodifiedtime")); + assertTrue(metaQuery.contains("num_messages")); + } + + /** + * Test configuration with different split provider types. + */ + @Test + public void testConfigurationTypes() + { + // Test that the configuration is set correctly + assertEquals(config.getSplitProviderType(), ClpConfig.SplitProviderType.PINOT_UBER); + + // Create a new instance with different config to ensure isolation + ClpConfig newConfig = new ClpConfig(); + newConfig.setMetadataDbUrl("https://other-neutrino.uber.com"); + newConfig.setSplitProviderType(ClpConfig.SplitProviderType.PINOT_UBER); + + ClpUberPinotSplitProvider newProvider = new ClpUberPinotSplitProvider(newConfig); + assertNotNull(newProvider); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpPinotSplitFilterProvider.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpPinotSplitFilterProvider.java new file mode 100644 index 0000000000000..33443b566cb37 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpPinotSplitFilterProvider.java @@ -0,0 +1,155 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.plugin.clp.ClpConfig; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; +import java.util.List; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +@Test(singleThreaded = true) +public class TestClpPinotSplitFilterProvider +{ + private String filterConfigPath; + private ClpPinotSplitFilterProvider filterProvider; + + @BeforeMethod + public void setUp() throws IOException, URISyntaxException + { + URL resource = getClass().getClassLoader().getResource("test-pinot-split-filter.json"); + if (resource == null) { + throw new FileNotFoundException("test-pinot-split-filter.json not found in resources"); + } + + filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + ClpConfig config = new ClpConfig(); + config.setSplitFilterConfig(filterConfigPath); + filterProvider = new ClpPinotSplitFilterProvider(config); + } + + /** + * Test that Pinot provider correctly inherits MySQL range mapping functionality. + * Verifies that range comparisons are transformed according to the configuration. + */ + @Test + public void testRangeMappingInheritance() + { + // Test greater than or equal + String sql1 = "\"msg.timestamp\" >= 1234"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "end_timestamp >= 1234"); + + // Test less than or equal + String sql2 = "\"msg.timestamp\" <= 5678"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "begin_timestamp <= 5678"); + + // Test equality (transforms to range check) + String sql3 = "\"msg.timestamp\" = 4567"; + String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3); + assertEquals(result3, "(begin_timestamp <= 4567 AND end_timestamp >= 4567)"); + } + + /** + * Test that expressions without range mappings pass through unchanged. + */ + @Test + public void testNonRangeMappedColumns() + { + // Test that non-mapped columns are not transformed + String sql1 = "\"status_code\" = 200"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "\"status_code\" = 200"); + + String sql2 = "\"hostname\" = 'server1'"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "\"hostname\" = 'server1'"); + } + + /** + * Test complex expressions with multiple predicates. + */ + @Test + public void testComplexExpressions() + { + // Test AND condition with range mapping + String sql1 = "(\"msg.timestamp\" >= 1000 AND \"msg.timestamp\" <= 2000)"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "(end_timestamp >= 1000 AND begin_timestamp <= 2000)"); + + // Test mixed conditions + String sql2 = "(\"msg.timestamp\" = 1500 AND \"status_code\" = 200)"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "((begin_timestamp <= 1500 AND end_timestamp >= 1500) AND \"status_code\" = 200)"); + } + + /** + * Test that remapColumnName correctly returns mapped column names. + */ + @Test + public void testRemapColumnName() + { + // Test range-mapped column + List mappedColumns = filterProvider.remapColumnName("clp.default.table_1", "msg.timestamp"); + assertEquals(mappedColumns, ImmutableList.of("begin_timestamp", "end_timestamp")); + + // Test non-mapped column + List unmappedColumns = filterProvider.remapColumnName("clp.default.table_1", "status_code"); + assertEquals(unmappedColumns, ImmutableList.of("status_code")); + } + + /** + * Test table-level configuration override. + */ + @Test + public void testTableLevelOverride() + { + // Test table_2 specific mapping + String sql = "\"table2_column\" >= 100"; + String result = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_2", sql); + assertEquals(result, "table2_upper >= 100"); + } + + /** + * Test schema-level configuration. + */ + @Test + public void testSchemaLevelMapping() + { + // Test schema-level mapping applies to tables + String sql = "\"schema_column\" <= 500"; + String result = filterProvider.remapSplitFilterPushDownExpression("clp.schema1.any_table", sql); + assertEquals(result, "schema_lower <= 500"); + } + + /** + * Test that configuration is correctly loaded. + */ + @Test + public void testConfigurationLoaded() + { + // Simply verify that the provider was instantiated correctly with the config + assertTrue(filterConfigPath.endsWith("test-pinot-split-filter.json")); + } +} diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java index 7a4058f617d0c..c6fe1cee8bec2 100644 --- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java @@ -57,12 +57,8 @@ public void checkRequiredFilters() config.setSplitFilterConfig(filterConfigPath); ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config); Set testTableScopeSet = ImmutableSet.of(format("%s.%s", CONNECTOR_NAME, new SchemaTableName("default", "table_1"))); - assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters( - testTableScopeSet, - "(\"level\" >= 1 AND \"level\" <= 3)")); - filterProvider.checkContainsRequiredFilters( - testTableScopeSet, - "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)"); + assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters(testTableScopeSet, ImmutableSet.of("level"))); + filterProvider.checkContainsRequiredFilters(testTableScopeSet, ImmutableSet.of("msg.timestamp")); } @Test diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpUberPinotSplitFilterProvider.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpUberPinotSplitFilterProvider.java new file mode 100644 index 0000000000000..edbd3a4cbc774 --- /dev/null +++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpUberPinotSplitFilterProvider.java @@ -0,0 +1,244 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.plugin.clp.split.filter; + +import com.facebook.presto.plugin.clp.ClpConfig; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +/** + * Unit tests for ClpUberPinotSplitFilterProvider. + * Tests Uber-specific TEXT_MATCH transformations in addition to inherited + * range mapping functionality. + */ +@Test(singleThreaded = true) +public class TestClpUberPinotSplitFilterProvider +{ + private String filterConfigPath; + private ClpUberPinotSplitFilterProvider filterProvider; + + @BeforeMethod + public void setUp() throws IOException, URISyntaxException + { + URL resource = getClass().getClassLoader().getResource("test-pinot-split-filter.json"); + if (resource == null) { + throw new FileNotFoundException("test-pinot-split-filter.json not found in resources"); + } + + filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString(); + ClpConfig config = new ClpConfig(); + config.setSplitFilterConfig(filterConfigPath); + filterProvider = new ClpUberPinotSplitFilterProvider(config); + } + + /** + * Test TEXT_MATCH transformation for simple equality predicates. + * Verifies that Uber-specific TEXT_MATCH transformations are applied. + */ + @Test + public void testTextMatchTransformationSimpleEquality() + { + // Test single equality predicate with integer + String sql1 = "\"status_code\" = 200"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')"); + + // Test single equality predicate with negative integer + String sql2 = "\"level\" = -1"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "TEXT_MATCH(\"__mergedTextIndex\", '/-1:level/')"); + + // Test single equality predicate with decimal + String sql3 = "\"score\" = 3.14"; + String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3); + assertEquals(result3, "TEXT_MATCH(\"__mergedTextIndex\", '/3.14:score/')"); + + // Test single equality predicate with scientific notation + String sql4 = "\"value\" = 1.5e10"; + String result4 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql4); + assertEquals(result4, "TEXT_MATCH(\"__mergedTextIndex\", '/1.5e10:value/')"); + } + + /** + * Test TEXT_MATCH transformation for string literal equality predicates. + */ + @Test + public void testTextMatchTransformationStringLiterals() + { + // Test single equality predicate with string literal + String sql1 = "\"hostname\" = 'uber-server1'"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "TEXT_MATCH(\"__mergedTextIndex\", '/uber-server1:hostname/')"); + + // Test string literal with special characters + String sql2 = "\"service\" = 'uber.logging.service'"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "TEXT_MATCH(\"__mergedTextIndex\", '/uber.logging.service:service/')"); + + // Test empty string literal + String sql3 = "\"tag\" = ''"; + String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3); + assertEquals(result3, "TEXT_MATCH(\"__mergedTextIndex\", '/:tag/')"); + + // Test string literal with spaces + String sql4 = "\"message\" = 'Hello Uber World'"; + String result4 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql4); + assertEquals(result4, "TEXT_MATCH(\"__mergedTextIndex\", '/Hello Uber World:message/')"); + } + + /** + * Test that range mappings are inherited and work correctly. + * Columns with range mappings should NOT be transformed to TEXT_MATCH. + */ + @Test + public void testRangeMappingInheritance() + { + // Test that range-mapped columns don't get TEXT_MATCH transformation + // msg.timestamp has range mapping in test config + String sql1 = "\"msg.timestamp\" = 1234"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "(begin_timestamp <= 1234 AND end_timestamp >= 1234)"); + + // Test greater than or equal (range mapping) + String sql2 = "\"msg.timestamp\" >= 5000"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "end_timestamp >= 5000"); + + // Test less than or equal (range mapping) + String sql3 = "\"msg.timestamp\" <= 10000"; + String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3); + assertEquals(result3, "begin_timestamp <= 10000"); + } + + /** + * Test complex expressions with both TEXT_MATCH and range mappings. + */ + @Test + public void testMixedTransformations() + { + // Mix of range mapping and TEXT_MATCH + String sql1 = "(\"msg.timestamp\" >= 1000 AND \"status_code\" = 200)"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "(end_timestamp >= 1000 AND TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/'))"); + + // Multiple TEXT_MATCH transformations + String sql2 = "(\"hostname\" = 'uber1' AND \"service\" = 'logging')"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "(TEXT_MATCH(\"__mergedTextIndex\", '/uber1:hostname/') AND TEXT_MATCH(\"__mergedTextIndex\", '/logging:service/'))"); + + // Complex nested expression + String sql3 = "((\"msg.timestamp\" <= 2000 AND \"hostname\" = 'uber2') OR \"status_code\" = 404)"; + String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3); + assertEquals(result3, "((begin_timestamp <= 2000 AND TEXT_MATCH(\"__mergedTextIndex\", '/uber2:hostname/')) OR TEXT_MATCH(\"__mergedTextIndex\", '/404:status_code/'))"); + } + + /** + * Test transformations at different scope levels. + */ + @Test + public void testDifferentScopes() + { + // Table-level scope + String sql1 = "\"status_code\" = 200"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')"); + + // Schema-level scope + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default", sql1); + assertEquals(result2, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')"); + + // Catalog-level scope + String result3 = filterProvider.remapSplitFilterPushDownExpression("clp", sql1); + assertEquals(result3, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')"); + } + + /** + * Test that the filter provider is correctly instantiated. + */ + @Test + public void testConstructor() + { + assertNotNull(filterProvider); + + // Verify it's an instance of the parent classes + assertTrue(filterProvider instanceof ClpPinotSplitFilterProvider); + assertTrue(filterProvider instanceof ClpMySqlSplitFilterProvider); + assertTrue(filterProvider instanceof ClpSplitFilterProvider); + } + + /** + * Test configuration is loaded correctly. + */ + @Test + public void testConfigurationLoaded() + { + // Simply verify that the provider was instantiated correctly with the config + assertTrue(filterConfigPath.endsWith("test-pinot-split-filter.json")); + assertNotNull(filterProvider); + } + + /** + * Test that non-equality expressions are not transformed to TEXT_MATCH. + */ + @Test + public void testNonEqualityNotTransformed() + { + // Greater than should not be transformed + String sql1 = "\"status_code\" > 200"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "\"status_code\" > 200"); + + // Less than should not be transformed + String sql2 = "\"level\" < 5"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "\"level\" < 5"); + + // Not equal should not be transformed + String sql3 = "\"hostname\" != 'server1'"; + String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3); + assertEquals(result3, "\"hostname\" != 'server1'"); + } + + /** + * Test edge cases and special patterns. + */ + @Test + public void testEdgeCases() + { + // Test expression with no transformable parts + String sql1 = "1 = 1"; + String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1); + assertEquals(result1, "1 = 1"); + + // Test column names with special characters (should still work if quoted properly) + String sql2 = "\"column.with.dots\" = 'value'"; + String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2); + assertEquals(result2, "TEXT_MATCH(\"__mergedTextIndex\", '/value:column.with.dots/')"); + + // Test multiple spaces in expression + String sql3 = "\"status_code\" = 200"; + String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3); + assertEquals(result3, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')"); + } +} diff --git a/presto-clp/src/test/resources/test-cockroachdb-schema.yaml b/presto-clp/src/test/resources/test-cockroachdb-schema.yaml new file mode 100644 index 0000000000000..d9fa4c99b3e65 --- /dev/null +++ b/presto-clp/src/test/resources/test-cockroachdb-schema.yaml @@ -0,0 +1,18 @@ +# Test schema for cockroachdb table +# Type mappings: +# 0 = Integer (BIGINT) +# 1 = Float (DOUBLE) +# 3 = VarString (VARCHAR) +# 4 = Boolean (BOOLEAN) +# 6 = UnstructuredArray (ARRAY) + +a_bigint: 0 +a_varchar: 3 +b_double: 1 +b_varchar: 3 +c: + d: 4 + e: 3 +f: + g: + h: 6 diff --git a/presto-clp/src/test/resources/test-orders-schema1.yaml b/presto-clp/src/test/resources/test-orders-schema1.yaml new file mode 100644 index 0000000000000..a9f773b5bdd97 --- /dev/null +++ b/presto-clp/src/test/resources/test-orders-schema1.yaml @@ -0,0 +1,11 @@ +# Test schema for orders table in schema1 +# Type mappings: +# 0 = Integer (BIGINT) +# 1 = Float (DOUBLE) +# 3 = VarString (VARCHAR) + +order_id: 0 +customer_id: 0 +product_name: 3 +quantity: 0 +price: 1 diff --git a/presto-clp/src/test/resources/test-orders-schema2.yaml b/presto-clp/src/test/resources/test-orders-schema2.yaml new file mode 100644 index 0000000000000..71a1fbd1d8724 --- /dev/null +++ b/presto-clp/src/test/resources/test-orders-schema2.yaml @@ -0,0 +1,13 @@ +# Test schema for orders table in schema2 (different structure from schema1) +# Type mappings: +# 0 = Integer (BIGINT) +# 1 = Float (DOUBLE) +# 3 = VarString (VARCHAR) +# 4 = Boolean (BOOLEAN) + +order_id: 0 +vendor_id: 0 +item_description: 3 +total_amount: 1 +is_paid: 4 +shipping_address: 3 diff --git a/presto-clp/src/test/resources/test-pinot-split-filter.json b/presto-clp/src/test/resources/test-pinot-split-filter.json new file mode 100644 index 0000000000000..e0edd0814d03b --- /dev/null +++ b/presto-clp/src/test/resources/test-pinot-split-filter.json @@ -0,0 +1,58 @@ +{ + "clp": [ + { + "columnName": "level" + } + ], + "clp.default": [ + { + "columnName": "author" + } + ], + "clp.default.table_1": [ + { + "columnName": "msg.timestamp", + "customOptions": { + "rangeMapping": { + "lowerBound": "begin_timestamp", + "upperBound": "end_timestamp" + } + }, + "required": true + }, + { + "columnName": "file_name" + }, + { + "columnName": "status_code" + } + ], + "clp.default.table_2": [ + { + "columnName": "table2_column", + "customOptions": { + "rangeMapping": { + "lowerBound": "table2_lower", + "upperBound": "table2_upper" + } + } + }, + { + "columnName": "request_id" + }, + { + "columnName": "user_id" + } + ], + "clp.schema1": [ + { + "columnName": "schema_column", + "customOptions": { + "rangeMapping": { + "lowerBound": "schema_lower", + "upperBound": "schema_upper" + } + } + } + ] +} \ No newline at end of file diff --git a/presto-clp/src/test/resources/test-tables-schema.yaml b/presto-clp/src/test/resources/test-tables-schema.yaml new file mode 100644 index 0000000000000..90a58ea32aaf4 --- /dev/null +++ b/presto-clp/src/test/resources/test-tables-schema.yaml @@ -0,0 +1,12 @@ +# Test metadata file for ClpYamlMetadataProvider +# Maps tables to their schema definition files +# Tests multiple schemas with duplicate table names (orders appears in both schema1 and schema2) +# Relative paths are resolved relative to this file's directory at runtime +clp: + default: + cockroachdb: test-cockroachdb-schema.yaml + schema1: + orders: test-orders-schema1.yaml + users: test-users-schema1.yaml + schema2: + orders: test-orders-schema2.yaml diff --git a/presto-clp/src/test/resources/test-topn-split-filter.json b/presto-clp/src/test/resources/test-topn-split-filter.json new file mode 100644 index 0000000000000..53450716cb7b4 --- /dev/null +++ b/presto-clp/src/test/resources/test-topn-split-filter.json @@ -0,0 +1,14 @@ +{ + "clp.default.test": [ + { + "columnName": "msg.timestamp", + "customOptions": { + "rangeMapping": { + "lowerBound": "begin_timestamp", + "upperBound": "end_timestamp" + } + }, + "required": true + } + ] +} diff --git a/presto-clp/src/test/resources/test-users-schema1.yaml b/presto-clp/src/test/resources/test-users-schema1.yaml new file mode 100644 index 0000000000000..5e603f32d53aa --- /dev/null +++ b/presto-clp/src/test/resources/test-users-schema1.yaml @@ -0,0 +1,8 @@ +# Test schema for users table in schema1 +# Type mappings: +# 0 = Integer (BIGINT) +# 3 = VarString (VARCHAR) + +user_id: 0 +username: 3 +email: 3 diff --git a/presto-native-execution/pom.xml b/presto-native-execution/pom.xml index 200ffa6834afc..adbd01ab4b917 100644 --- a/presto-native-execution/pom.xml +++ b/presto-native-execution/pom.xml @@ -267,6 +267,16 @@ + + + + org.yaml + snakeyaml + 2.1 + + + +