diff --git a/.github/bin/download_nodejs b/.github/bin/download_nodejs
index 773e3f74f18e1..6fa4d62c332a4 100755
--- a/.github/bin/download_nodejs
+++ b/.github/bin/download_nodejs
@@ -1,4 +1,13 @@
#!/usr/bin/env bash
+# Download Node.js and Yarn to Maven cache for frontend-maven-plugin
+# ==================================================================
+# Presto's web UI (presto-ui module) uses frontend-maven-plugin which expects
+# Node.js and Yarn at specific paths in the Maven repository. This script
+# pre-downloads them to avoid flaky downloads during the build.
+#
+# The tarballs are stored at:
+# ${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-*-${OS}-${ARCH}.tar.gz
+# ${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-*.tar.gz
set -euo pipefail
@@ -47,27 +56,27 @@ get_arch() {
}
download_node() {
- if [[ -a "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" ]]; then
+ if [[ -a "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" ]]; then
echo "Node binary exists. Skipped download"
return 0
fi
-
- if ! wget_retry 3 10 "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" \
+
+ if ! wget_retry 3 10 "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" \
"https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" "node"; then
- rm "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz"
+ rm "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz"
return 1
fi
}
download_yarn() {
- if [[ -a "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" ]]; then
+ if [[ -a "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" ]]; then
echo "Yarn binary exists. Skipped download"
return 0
fi
- if ! wget_retry 3 10 "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" \
+ if ! wget_retry 3 10 "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" \
"https://github.com/yarnpkg/yarn/releases/download/v${YARN_VERSION}/yarn-v${YARN_VERSION}.tar.gz" "yarn"; then
- rm "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz"
+ rm "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz"
return 1
fi
}
@@ -75,8 +84,11 @@ download_yarn() {
NODE_OS=$(get_os)
NODE_ARCH=$(get_arch)
-mkdir -p "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}"
-mkdir -p "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}"
+# Use MAVEN_REPO if set, otherwise fall back to default .m2/repository
+MAVEN_REPO="${MAVEN_REPO:-${HOME}/.m2/repository}"
+
+mkdir -p "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}"
+mkdir -p "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}"
if download_node; then
echo "node-v${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz is ready for use"
diff --git a/.github/dockerfiles/yscope-presto-builder.dockerfile b/.github/dockerfiles/yscope-presto-builder.dockerfile
new file mode 100644
index 0000000000000..d57d0ab3b0f2f
--- /dev/null
+++ b/.github/dockerfiles/yscope-presto-builder.dockerfile
@@ -0,0 +1,142 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# YScope Presto Builder Image
+# ===========================
+# A unified builder for presto (Java) and prestocpp (C++).
+#
+# Adapted from upstream's ubuntu-22.04-dependency.dockerfile, with additions:
+# - Pre-warmed ccache for faster C++ builds
+# - Pre-downloaded Maven dependencies for faster Java builds
+# - Pre-downloaded Node.js/Yarn for frontend builds
+#
+# Tagged by hash of dependency files, rebuilt only when deps change.
+
+FROM ghcr.io/y-scope/docker-github-actions-runner:ubuntu-jammy
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+# ============================================================================
+# Dependency Installation (from upstream ubuntu-22.04-dependency.dockerfile)
+# ============================================================================
+
+COPY ./presto-native-execution/scripts /presto/scripts/
+COPY ./presto-native-execution/velox/scripts /presto/velox/scripts/
+
+# Required to avoid tzdata prompting for region selection
+ARG DEBIAN_FRONTEND="noninteractive"
+ARG tz="Etc/UTC"
+ENV TZ=${tz}
+ENV PROMPT_ALWAYS_RESPOND=n
+ENV SUDO=" "
+
+# Build parallelism for 32-core self-hosted runners
+# See: https://github.com/y-scope/velox/pull/45
+ARG NUM_THREADS=16
+ARG MAX_HIGH_MEM_JOBS=16
+ARG MAX_LINK_JOBS=12
+ENV MAX_HIGH_MEM_JOBS=${MAX_HIGH_MEM_JOBS}
+ENV MAX_LINK_JOBS=${MAX_LINK_JOBS}
+
+# Install CMake 3.28.3 (required - setup script's pip cmake causes fastfloat issues)
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends wget && \
+ apt-get clean && rm -rf /var/lib/apt/lists/* && \
+ wget -q https://github.com/Kitware/CMake/releases/download/v3.28.3/cmake-3.28.3-linux-x86_64.tar.gz && \
+ tar -xzf cmake-3.28.3-linux-x86_64.tar.gz -C /opt && \
+ rm cmake-3.28.3-linux-x86_64.tar.gz && \
+ ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/cmake /usr/local/bin/cmake && \
+ ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/ctest /usr/local/bin/ctest && \
+ ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/cpack /usr/local/bin/cpack
+
+# Run setup scripts - same pattern as upstream ubuntu-22.04-dependency.dockerfile
+# rpm is needed for MinIO installation (S3-compatible storage for tests)
+RUN mkdir -p /build && \
+ cd /build && \
+ /presto/scripts/setup-ubuntu.sh && \
+ apt install -y rpm && \
+ /presto/velox/scripts/setup-ubuntu.sh install_adapters && \
+ /presto/scripts/setup-adapters.sh && \
+ rm -rf /build
+
+ENV PATH="/presto/.venv/bin:${PATH}"
+ENV VIRTUAL_ENV="/presto/.venv"
+
+# ============================================================================
+# ccache Warmup (YScope addition for faster C++ builds)
+# See: https://github.com/y-scope/velox/pull/45
+# ============================================================================
+
+# ccache settings for portable cache (works across different checkout paths)
+# - CCACHE_DIR: Standard location in /var/cache for system caches
+# - CCACHE_BASEDIR: Set at runtime via GITHUB_WORKSPACE for portability
+# - CCACHE_COMPRESSLEVEL=0: Disabled for faster CI execution (disk space not a concern)
+# - CCACHE_NOHASHDIR: Ignore directory paths in hash for cache hits across checkouts
+ENV CCACHE_DIR=/var/cache/ccache
+ENV CCACHE_COMPRESSLEVEL=0
+ENV CCACHE_MAX_SIZE=5G
+ENV CCACHE_NOHASHDIR=true
+
+RUN mkdir -p ${CCACHE_DIR} && chmod 777 ${CCACHE_DIR}
+
+COPY . /workspace/
+WORKDIR /workspace
+
+# Build prestocpp once to populate ccache
+# Build flags must match CI builds exactly for cache hits (see prestocpp-linux-build-and-unit-test.yml)
+# CCACHE_BASEDIR set to /workspace for the warmup build
+RUN ccache -z && \
+ export CCACHE_BASEDIR=/workspace && \
+ cd presto-native-execution && \
+ cmake \
+ -B _build/release \
+ -GNinja \
+ -DTREAT_WARNINGS_AS_ERRORS=1 \
+ -DENABLE_ALL_WARNINGS=1 \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DPRESTO_ENABLE_PARQUET=ON \
+ -DPRESTO_ENABLE_REMOTE_FUNCTIONS=ON \
+ -DPRESTO_ENABLE_JWT=ON \
+ -DPRESTO_STATS_REPORTER_TYPE=PROMETHEUS \
+ -DPRESTO_MEMORY_CHECKER_TYPE=LINUX_MEMORY_CHECKER \
+ -DCMAKE_PREFIX_PATH=/usr/local \
+ -DThrift_ROOT=/usr/local \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DMAX_LINK_JOBS=${MAX_LINK_JOBS} && \
+ ninja -C _build/release -j ${NUM_THREADS} && \
+ ccache -svz
+
+# ============================================================================
+# Maven/Node.js Cache (YScope addition for faster Java builds)
+# ============================================================================
+
+ENV MAVEN_REPO=/opt/maven/repository
+RUN mkdir -p ${MAVEN_REPO}
+
+# Download dependencies using temporary Java installation
+RUN wget -q https://github.com/adoptium/temurin8-binaries/releases/download/jdk8u442-b06/OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz && \
+ tar -xzf OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz -C /tmp && \
+ rm OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz && \
+ export JAVA_HOME=/tmp/jdk8u442-b06 && \
+ export PATH=${JAVA_HOME}/bin:${PATH} && \
+ export RUNNER_OS=Linux && \
+ export RUNNER_ARCH=X64 && \
+ cd /workspace && \
+ .github/bin/download_nodejs && \
+ ./mvnw dependency:resolve-plugins dependency:resolve -B --no-transfer-progress \
+ -Dmaven.repo.local=${MAVEN_REPO} || true && \
+ rm -rf /tmp/jdk8u442-b06
+
+# Clean up source, keep only caches
+RUN rm -rf /workspace/*
+
+WORKDIR /workspace
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000000000..8b3e55e13e665
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,277 @@
+name: ci
+
+# ==============================================================================
+# CI Workflow Overview
+# ==============================================================================
+# Main orchestrator for all build and test jobs. Builds Java coordinator (presto)
+# and C++ native worker (prestissimo) in parallel, then runs integration tests.
+#
+# Terminology:
+# - presto: Java-based query coordinator (runs queries, manages workers)
+# - prestocpp: C++ worker implementation (source in presto-native-execution/)
+# - prestissimo: Runtime name for prestocpp (Docker image and binary)
+#
+# Job Dependency Graph:
+# config ─► create-builder-image ─┬─► prestocpp ────┬─► integration-tests
+# ├─► presto ───────┘
+# └─► presto-tests
+#
+# ==============================================================================
+# Key Design Decisions
+# ==============================================================================
+#
+# 1. CACHING STRATEGY: Bake caches into Docker image layers
+# --------------------------------------------------------
+# Problem: Downloading ccache (~2GB+) and Maven cache from remote storage on
+# every CI run causes bandwidth bottlenecks with parallel builds.
+#
+# Solution: Bake caches into Docker image layers.
+# - Docker layer caching: each host downloads cache layer ONCE, then reuses it
+# - Builder image based on GitHub runner image (always pre-cached on self-hosted)
+# - Parallel jobs on same host share cached layers with zero network traffic
+#
+# 2. IMAGE TAGGING: Dual tags + version streams
+# -------------------------------------------
+# Problem: Branch-only tags can't pin specific builds for production while also
+# providing a "latest" tag for development.
+#
+# Solution: Every build gets two tags:
+# - Immutable: --- (e.g., 0.293-BETA-20250522140509-abc123)
+# Use for: Production deployments, reproducible environments
+# - Mutable: --SNAPSHOT (e.g., 0.293-BETA-SNAPSHOT)
+# Use for: Always pulling latest without knowing exact version
+#
+# Version streams (RELEASE, BETA, DEV) let users choose stability level.
+#
+# 3. BUILDER IMAGE TAG: Auto-computed dependency hash
+# -------------------------------------------------
+# The unified-builder image tag is a hash of dependency files (setup scripts,
+# pom.xml, etc.). Image is only rebuilt when dependencies change.
+#
+# 4. CENTRALIZED CONFIG JOB: Single source of truth
+# -----------------------------------------------
+# Problem: GitHub Actions doesn't allow `${{ env.* }}` in `with:` blocks when
+# calling reusable workflows. Also, version tag computation was duplicated
+# across presto-build.yml and prestocpp workflows.
+#
+# Solution: A `config` job computes all shared configuration upfront:
+# - builder-image: Hash-based tag for the builder image
+# - runtime-version-tag: Immutable tag (e.g., 0.293-BETA-20250522140509-abc123)
+# - runtime-snapshot-tag: Mutable tag (e.g., 0.293-BETA-SNAPSHOT)
+#
+# All downstream jobs reference these outputs via `needs.config.outputs.*`.
+#
+# ==============================================================================
+# Comparison with Upstream (prestodb/presto)
+# ==============================================================================
+# |---------------------|----------------------------------------|-----------------------------------------------|
+# | Aspect | Upstream | This Fork (yscope) |
+# |---------------------|----------------------------------------|-----------------------------------------------|
+# | Runners | GitHub-hosted (ephemeral) | Self-hosted (ephemeral) |
+# | CI Structure | Separate independent workflows | Unified ci.yml orchestrator (parallel) |
+# | Builder Image | presto-native-dependency (C++ only) | unified-builder (Java + C++ + caches) |
+# | Builder Image Tag | Pinned version-timestamp-hash | Auto-computed dependency hash |
+# | Runtime Image Tag | Release version only (e.g., 0.292) | version-TYPE-timestamp-hash per build |
+# | ccache Strategy | Stash/restore via Apache Infra | Pre-warmed in builder image |
+# | Image Publishing | On release only | On every push (presto + prestissimo) |
+# |---------------------|----------------------------------------|-----------------------------------------------|
+#
+# ==============================================================================
+# Configuration
+# ==============================================================================
+# IMAGE_VERSION_TYPE (env variable below)
+# Controls version stream for Docker images. Values: RELEASE, BETA, DEV
+#
+# ARTIFACT_JAVA_VERSION (GitHub repo variable: Settings > Secrets and variables)
+# Controls which Java version uploads artifacts. Values: 8, 17. Default: 8
+#
+# ==============================================================================
+# Outputs
+# ==============================================================================
+# Artifacts (1-day retention, shared between jobs):
+# | Artifact | Contents |
+# |---------------------|---------------------------------------------------|
+# | presto-server | presto-server-*.tar.gz |
+# | presto-cli | presto-cli-*-executable.jar |
+# | presto-native-build | presto_server, velox_functions_remote_server_main |
+#
+# Docker Images (ghcr.io, pushed on push events only):
+# | Image | Description |
+# |-----------------|--------------------------------|
+# | unified-builder | Build environment with deps |
+# | presto | Java coordinator runtime |
+# | prestissimo | C++ native worker runtime |
+
+# ==============================================================================
+# Triggers: When does this workflow run?
+# ==============================================================================
+on:
+ workflow_dispatch: # Manual trigger from GitHub UI (Actions tab -> Run workflow)
+ pull_request: # On every pull request
+ push: # On every push to any branch
+ paths-ignore:
+ - 'presto-docs/**' # Skip CI for docs-only changes (docs have their own workflow)
+
+# ==============================================================================
+# Environment Variables
+# ==============================================================================
+env:
+ # Docker image version type for presto and prestissimo images
+ # See "Configuration" section above for details
+ IMAGE_VERSION_TYPE: 'BETA'
+
+ # Maven JVM settings (not inherited by called workflows)
+ MAVEN_OPTS: "-Xmx1024M -XX:+ExitOnOutOfMemoryError"
+ MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
+ RETRY: .github/bin/retry
+
+# ==============================================================================
+# Concurrency Control
+# ==============================================================================
+# Prevents multiple CI runs for the same branch from running simultaneously.
+# If you push twice quickly, the first run is cancelled and only the second runs.
+concurrency:
+ group: "${{github.workflow}}-${{github.ref}}"
+ cancel-in-progress: true
+
+# ==============================================================================
+# Jobs
+# ==============================================================================
+jobs:
+ # ----------------------------------------------------------------------------
+ # Step 1: Config - Compute Builder Tag and Runtime Version Tags
+ # ----------------------------------------------------------------------------
+ # Centralizes all configuration computation:
+ # - builder-image: Hash-based tag for the builder image (only rebuilds when dependencies change)
+ # - runtime-version-tag: Immutable tag for runtime images (version-TYPE-timestamp-hash)
+ # - runtime-snapshot-tag: Mutable SNAPSHOT tag for runtime images (version-TYPE-SNAPSHOT)
+ config:
+ runs-on: ubuntu-latest
+ outputs:
+ builder-image: ${{ steps.builder.outputs.image }}
+ runtime-version-tag: ${{ steps.version.outputs.runtime-version-tag }}
+ runtime-snapshot-tag: ${{ steps.version.outputs.runtime-snapshot-tag }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ show-progress: false
+
+ - name: Initialize submodules (needed for velox scripts hash)
+ run: |
+ # Try shallow clone first (fast), fall back to full clone if pinned commit not in shallow history
+ git submodule update --init --recursive --depth=1 || \
+ git submodule update --init --recursive
+
+ - name: Compute builder image tag
+ id: builder
+ run: |
+ # Compute hash of files that affect the builder image:
+ # - Builder dockerfile itself
+ # - C++ setup scripts (native worker dependencies)
+ # - pom.xml files (Maven/Java dependencies)
+ # - download_nodejs script (Node.js/Yarn for frontend)
+ DOCKERFILE_HASH=$(sha256sum .github/dockerfiles/yscope-presto-builder.dockerfile | cut -c1-8)
+ SCRIPTS_HASH=$(find presto-native-execution/scripts presto-native-execution/velox/scripts -type f -exec sha256sum {} \; | sort | sha256sum | cut -c1-8)
+ POM_HASH=$(find . -name "pom.xml" -exec sha256sum {} \; | sort | sha256sum | cut -c1-8)
+ NODEJS_HASH=$(sha256sum .github/bin/download_nodejs | cut -c1-8)
+ TAG="${DOCKERFILE_HASH}-${SCRIPTS_HASH}-${POM_HASH}-${NODEJS_HASH}"
+ echo "image=ghcr.io/${{ github.repository }}/unified-builder:${TAG}" >> $GITHUB_OUTPUT
+
+ - name: Compute runtime version tags
+ id: version
+ run: |
+ # Extract base version from pom.xml (e.g., "0.293" from "0.293-SNAPSHOT")
+ BASE_VERSION=$(grep '' pom.xml | head -1 | sed 's/.*\(.*\)<\/version>.*/\1/' | sed 's/-SNAPSHOT//')
+
+ # Get commit timestamp for immutable tag
+ TIMESTAMP=$(git show -s --format=%cd --date=format:'%Y%m%d%H%M%S' HEAD)
+ SHORT_SHA=$(git rev-parse --short HEAD)
+
+ # Output tags:
+ # - runtime-version-tag: e.g., 0.293-BETA-20250529140509-484b00e (immutable)
+ # - runtime-snapshot-tag: e.g., 0.293-BETA-SNAPSHOT (mutable, always latest)
+ echo "runtime-version-tag=${BASE_VERSION}-${{ env.IMAGE_VERSION_TYPE }}-${TIMESTAMP}-${SHORT_SHA}" >> $GITHUB_OUTPUT
+ echo "runtime-snapshot-tag=${BASE_VERSION}-${{ env.IMAGE_VERSION_TYPE }}-SNAPSHOT" >> $GITHUB_OUTPUT
+
+ # ----------------------------------------------------------------------------
+ # Step 2: Create Builder Image (if needed)
+ # ----------------------------------------------------------------------------
+ # Checks if a builder image with this tag exists. If not, builds and pushes it.
+ # The image contains all C++ and Java dependencies pre-installed.
+ create-builder-image:
+ needs: config
+ uses: ./.github/workflows/create-builder-image.yml
+ with:
+ builder-image: ${{ needs.config.outputs.builder-image }}
+
+ # ----------------------------------------------------------------------------
+ # Step 3a: Build Presto (Java)
+ # ----------------------------------------------------------------------------
+ # Builds the Java coordinator with both Java 8 and Java 17 in parallel.
+ # Only the version matching ARTIFACT_JAVA_VERSION (default: '8') will:
+ # - Upload artifacts (presto-server, presto-cli) for integration tests
+ # - Build and push the presto Docker image
+ presto:
+ name: presto${{ matrix.java-major }}
+ needs: [config, create-builder-image]
+ strategy:
+ matrix:
+ include:
+ - java-major: '8'
+ java-version: '8.0.442'
+ - java-major: '17'
+ java-version: '17.0.13'
+ uses: ./.github/workflows/presto-build.yml
+ with:
+ builder-image: ${{ needs.config.outputs.builder-image }}
+ java-version: ${{ matrix.java-version }}
+ should-upload-artifacts: ${{ matrix.java-major == (vars.ARTIFACT_JAVA_VERSION || '8') }}
+ should-build-image: ${{ matrix.java-major == (vars.ARTIFACT_JAVA_VERSION || '8') }}
+ runtime-version-tag: ${{ needs.config.outputs.runtime-version-tag }}
+ runtime-snapshot-tag: ${{ needs.config.outputs.runtime-snapshot-tag }}
+ secrets: inherit
+
+ # ----------------------------------------------------------------------------
+ # Step 3b: Presto Unit Tests (Java)
+ # ----------------------------------------------------------------------------
+ # Runs Java unit tests in parallel with builds. Uses matrix strategy to run
+ # multiple test modules in parallel across Java 8 and Java 17.
+ presto-tests:
+ name: presto-tests
+ needs: [config, create-builder-image]
+ uses: ./.github/workflows/tests.yml
+ with:
+ builder-image: ${{ needs.config.outputs.builder-image }}
+ secrets: inherit
+
+ # ----------------------------------------------------------------------------
+ # Step 3c: Build and Test Prestocpp (C++)
+ # ----------------------------------------------------------------------------
+ # Builds the C++ native worker (prestocpp), runs C++ unit tests, and builds
+ # the prestissimo runtime Docker image.
+ # Uses pre-warmed ccache from builder image for fast incremental builds.
+ # Uploads the compiled binary as an artifact for integration tests.
+ prestocpp:
+ name: prestocpp
+ needs: [config, create-builder-image]
+ uses: ./.github/workflows/prestocpp-linux-build-and-unit-test.yml
+ with:
+ builder-image: ${{ needs.config.outputs.builder-image }}
+ runtime-version-tag: ${{ needs.config.outputs.runtime-version-tag }}
+ runtime-snapshot-tag: ${{ needs.config.outputs.runtime-snapshot-tag }}
+ secrets: inherit
+
+ # ----------------------------------------------------------------------------
+ # Step 4: Integration Tests
+ # ----------------------------------------------------------------------------
+ # End-to-end tests that run the Java coordinator with the C++ native worker.
+ # Requires artifacts from both:
+ # - presto: presto-server tarball (Java coordinator)
+ # - prestocpp: presto_server binary (C++ native worker)
+ integration-tests:
+ name: integration-tests
+ needs: [config, create-builder-image, presto, prestocpp]
+ uses: ./.github/workflows/integration-tests.yml
+ with:
+ builder-image: ${{ needs.config.outputs.builder-image }}
+ secrets: inherit
diff --git a/.github/workflows/create-builder-image.yml b/.github/workflows/create-builder-image.yml
new file mode 100644
index 0000000000000..c91a2bcb2f257
--- /dev/null
+++ b/.github/workflows/create-builder-image.yml
@@ -0,0 +1,79 @@
+name: create-builder-image
+
+# ==============================================================================
+# Create Builder Image
+# ==============================================================================
+# This workflow creates the unified builder Docker image if it doesn't already exist.
+#
+# The builder image contains:
+# - All C++ dependencies (boost, folly, etc.) pre-compiled
+# - Pre-warmed ccache (compiler cache) with prestocpp already built once
+# - Pre-downloaded Maven dependencies
+# - Pre-downloaded Node.js/Yarn
+#
+# Building this image takes ~1 hour, but we only do it when dependencies change.
+# Most CI runs will skip this step because the image already exists.
+
+on:
+ workflow_call:
+ inputs:
+ builder-image:
+ description: 'Full builder image URI with tag'
+ required: true
+ type: string
+
+jobs:
+ create-builder-image:
+ name: "create-builder-image"
+ # Use self-hosted runner with 32 cores for faster builds
+ # The [self-hosted, cores=32] syntax is a label filter - it selects runners with both labels
+ runs-on: [self-hosted, cores=32]
+ timeout-minutes: 180
+ # Concurrency control: If two workflows try to build the same image simultaneously,
+ # only one will run. The second will wait (cancel-in-progress: false means don't cancel).
+ # This prevents duplicate builds of the same image.
+ concurrency:
+ group: ${{ inputs.builder-image }}
+ cancel-in-progress: false
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: recursive # Also checkout velox submodule (needed for build)
+ show-progress: false
+
+ - name: Login to GitHub Container Registry
+ uses: docker/login-action@v3
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Set up Docker Buildx
+ # Buildx is Docker's extended build tool with better caching and multi-platform support
+ uses: docker/setup-buildx-action@v3
+
+ - name: Check if builder image exists
+ id: check-image
+ run: |
+ # Check if the image already exists in the registry
+ # docker manifest inspect queries the registry without pulling the image
+ IMAGE_TAG="${{ inputs.builder-image }}"
+ if docker manifest inspect $IMAGE_TAG > /dev/null 2>&1; then
+ echo "exists=true" >> $GITHUB_OUTPUT
+ echo "Builder image already exists: $IMAGE_TAG"
+ else
+ echo "exists=false" >> $GITHUB_OUTPUT
+ echo "Builder image does not exist: $IMAGE_TAG"
+ fi
+
+ - name: Build and push unified builder image
+ # Only build if image doesn't exist (skip if cached)
+ if: steps.check-image.outputs.exists == 'false'
+ uses: docker/build-push-action@v6
+ with:
+ context: . # Build context is the entire repo (needed for setup scripts)
+ file: ./.github/dockerfiles/yscope-presto-builder.dockerfile
+ push: true # Push to ghcr.io after building
+ tags: ${{ inputs.builder-image }}
+ labels: |
+ org.opencontainers.image.source=https://github.com/${{ github.repository }}
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 46565491c504b..13cc727dcbe3c 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -20,9 +20,9 @@ env:
concurrency:
group: "${{github.workflow}}-${{github.ref}}"
- # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so
- # that each commit to release-0.293-clp-connector is checked completely.
- cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}"
+ # Cancel in-progress jobs for efficiency. Exclude branches with `release-0.293-clp-connector-snapshot` prefix so
+ # that each commit to these branches is checked completely.
+ cancel-in-progress: "${{!startsWith(github.ref, 'refs/heads/release-0.293-clp-connector-snapshot')}}"
jobs:
test:
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
new file mode 100644
index 0000000000000..708341a64f803
--- /dev/null
+++ b/.github/workflows/integration-tests.yml
@@ -0,0 +1,459 @@
+name: integration-tests
+
+# ==============================================================================
+# Integration Tests
+# ==============================================================================
+# End-to-end tests that run the Java coordinator with the C++ native worker.
+#
+# These tests verify that the Java coordinator and C++ native worker work together
+# correctly. They require artifacts from both:
+# - presto-java8: presto-server tarball (Java coordinator)
+# - prestocpp: presto_server binary (C++ native worker)
+#
+# Test Categories:
+# ----------------
+# - integration-e2e-java: End-to-end tests (TestPrestoNative*.java)
+# - integration-storage-java: Storage format tests (PARQUET, DWRF)
+# - integration-sidecar-java: Sidecar plugin tests
+#
+# Adapted from upstream's prestocpp-linux-build-and-unit-test.yml jobs:
+# - prestocpp-linux-presto-e2e-tests -> integration-e2e-java
+# - prestocpp-linux-presto-native-tests -> integration-storage-java
+# - prestocpp-linux-presto-sidecar-tests -> integration-sidecar-java
+#
+# Key differences from upstream:
+# 1. Uses pre-built presto-server artifact instead of building from source
+# (upstream runs: ./mvnw install -pl 'presto-native-execution' -am)
+# 2. Uses unified builder image with pre-cached Maven dependencies
+# 3. Artifact download path handling differs due to actions/download-artifact@v4
+# stripping common path prefixes
+# 4. Adds PRESTO_SERVER_DIR and additionalClasspath for presto-server JARs
+# 5. Sets LD_LIBRARY_PATH explicitly for native library discovery
+
+on:
+ workflow_call:
+ inputs:
+ builder-image:
+ description: 'Full builder image URI with tag'
+ required: true
+ type: string
+
+jobs:
+ # Upstream: prestocpp-linux-presto-e2e-tests
+ # Tests: TestPrestoNative*.java in presto-native-execution module
+ integration-e2e-java:
+ name: "integration-e2e-java"
+ runs-on: self-hosted
+ timeout-minutes: 180
+ container:
+ image: ${{ inputs.builder-image }}
+ credentials:
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ env:
+ MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError"
+ MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end"
+ # Use pre-cached Maven repository from builder image
+ MAVEN_REPO: /opt/maven/repository
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ show-progress: false
+
+ - name: Setup Java
+ uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin'
+ java-version: '8.0.442'
+
+ # setup-java may override MAVEN_REPO, so explicitly set it after
+ - name: Configure Maven repository
+ run: |
+ echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV
+
+ - name: Download artifacts
+ uses: actions/download-artifact@v4
+ with:
+ name: presto-native-build
+ path: .
+
+ # Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38
+ - name: Restore execute permissions and library path
+ run: |
+ # actions/download-artifact@v4 strips common path prefix when downloading
+ # Uploaded: presto-native-execution/_build/release/presto_cpp/main/presto_server
+ # Downloaded to: presto_cpp/main/presto_server
+ echo "Setting up binary directory structure..."
+ mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/
+ mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/
+
+ # Move and restore execute permissions for presto_server
+ if [ -f "${GITHUB_WORKSPACE}/presto_cpp/main/presto_server" ]; then
+ echo "Found presto_server, moving to expected location..."
+ mv ${GITHUB_WORKSPACE}/presto_cpp/main/presto_server ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/
+ chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server
+ echo "presto_server moved and made executable"
+ else
+ echo "ERROR: presto_server not found at ${GITHUB_WORKSPACE}/presto_cpp/main/presto_server"
+ echo "Contents of workspace:"
+ ls -la ${GITHUB_WORKSPACE}/
+ echo "Searching for presto_server:"
+ find ${GITHUB_WORKSPACE} -name "presto_server" -type f 2>/dev/null
+ exit 1
+ fi
+
+ # Move and restore execute permissions for velox remote server if it exists
+ if [ -f "${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main" ]; then
+ echo "Found velox_functions_remote_server_main, moving to expected location..."
+ mv ${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/
+ chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main
+ echo "velox_functions_remote_server_main moved and made executable"
+ else
+ echo "Warning: velox_functions_remote_server_main not found (might not be built in this configuration)"
+ fi
+
+ # Note: All required libraries (boost, snappy, geos, etc.) are already installed
+ # in the builder container and available via system library paths. No additional
+ # library configuration needed.
+
+ # Download pre-built presto-server from presto-java8 job
+ # This replaces upstream's: ./mvnw install -pl 'presto-native-execution' -am
+ - name: Download presto-server artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: presto-server
+ path: .
+
+ - name: Extract presto-server
+ run: |
+ echo "Extracting presto-server..."
+ PRESTO_SERVER_TAR=$(ls presto-server-*.tar.gz)
+ tar -xzf ${PRESTO_SERVER_TAR}
+ PRESTO_SERVER_DIR=$(ls -d presto-server-*/ | head -1 | sed 's:/$::')
+ echo "Presto server extracted to ${PRESTO_SERVER_DIR}/"
+ echo "PRESTO_SERVER_DIR=${GITHUB_WORKSPACE}/${PRESTO_SERVER_DIR}" >> $GITHUB_ENV
+
+ # YScope-specific: presto-clp is not in upstream, required for CLP UDF tests
+ - name: Build required dependencies
+ env:
+ MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
+ run: |
+ ./mvnw install -B -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true \
+ --no-transfer-progress -Dmaven.repo.local=${{ env.MAVEN_REPO }} \
+ -pl 'presto-clp'
+
+ - name: Compile integration test classes
+ env:
+ MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
+ run: |
+ ./mvnw test-compile -Dmaven.repo.local=${{ env.MAVEN_REPO }} \
+ -pl 'presto-native-execution'
+
+ - name: Run presto-native e2e tests
+ run: |
+ # First verify the binary exists at expected location
+ export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server"
+ if [ ! -f "${PRESTO_SERVER_PATH}" ]; then
+ echo "ERROR: presto_server binary not found at: ${PRESTO_SERVER_PATH}"
+ echo "Current directory structure:"
+ ls -la ${GITHUB_WORKSPACE}/
+ echo "Checking presto-native-execution directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/ || echo "Directory not found"
+ echo "Checking _build directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/ || echo "Directory not found"
+ echo "Checking release directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/ || echo "Directory not found"
+ exit 1
+ fi
+ echo "Found presto_server binary at: ${PRESTO_SERVER_PATH}"
+
+ export TESTFILES=`find ./presto-native-execution/src/test -type f -name 'TestPrestoNative*.java'`
+ # Convert file paths to comma separated class names
+ export TESTCLASSES=
+ for test_file in $TESTFILES
+ do
+ tmp=${test_file##*/}
+ test_class=${tmp%%\.*}
+ export TESTCLASSES="${TESTCLASSES},$test_class"
+ done
+ export TESTCLASSES=${TESTCLASSES#,}
+ echo "TESTCLASSES = $TESTCLASSES"
+
+ # Add presto-server JARs to classpath for tests
+ export PRESTO_CLASSPATH="${PRESTO_SERVER_DIR}/plugin/*:${PRESTO_SERVER_DIR}/lib/*"
+
+ # Set LD_LIBRARY_PATH for native workers to find libraries
+ # This must be exported so child processes (native workers) inherit it
+ export LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+
+ ./mvnw test \
+ ${MAVEN_TEST} \
+ -Dmaven.repo.local=${{ env.MAVEN_REPO }} \
+ -pl 'presto-native-execution' \
+ -Dtest="${TESTCLASSES}" \
+ -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \
+ -DPRESTO_SERVER_DIR=${PRESTO_SERVER_DIR} \
+ -DDATA_DIR=${RUNNER_TEMP} \
+ -Duser.timezone=America/Bahia_Banderas \
+ -DadditionalClasspath="${PRESTO_CLASSPATH}" \
+ -T1C
+
+ # Upstream: prestocpp-linux-presto-native-tests
+ # Tests: Test*.java in presto-native-tests module with storage format matrix
+ integration-storage-java:
+ name: "integration-storage-java"
+ runs-on: self-hosted
+ timeout-minutes: 180
+ container:
+ image: ${{ inputs.builder-image }}
+ credentials:
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ strategy:
+ fail-fast: false
+ matrix:
+ storage-format: ["PARQUET", "DWRF"]
+ env:
+ MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError"
+ MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end"
+ MAVEN_REPO: /opt/maven/repository
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ show-progress: false
+
+ - name: Download artifacts
+ uses: actions/download-artifact@v4
+ with:
+ name: presto-native-build
+ path: .
+
+ # Restore artifact permissions and move to expected paths
+ # (same as integration-e2e-java, see comments there for details)
+ - name: Restore execute permissions and library path
+ run: |
+ mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/
+ mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/
+ mv ${GITHUB_WORKSPACE}/presto_cpp/main/presto_server ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/
+ chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server
+ if [ -f "${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main" ]; then
+ mv ${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/
+ chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main
+ fi
+
+ - name: Setup Java
+ uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin'
+ java-version: '8.0.442'
+
+ # setup-java may override MAVEN_REPO, so explicitly set it after
+ - name: Configure Maven repository
+ run: |
+ echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV
+
+ # Download pre-built presto-server from presto-java8 job
+ # This replaces upstream's: ./mvnw install -pl 'presto-native-tests' -am
+ - name: Download presto-server artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: presto-server
+ path: .
+
+ - name: Extract presto-server
+ run: |
+ echo "Extracting presto-server..."
+ PRESTO_SERVER_TAR=$(ls presto-server-*.tar.gz)
+ tar -xzf ${PRESTO_SERVER_TAR}
+ PRESTO_SERVER_DIR=$(ls -d presto-server-*/ | head -1 | sed 's:/$::')
+ echo "Presto server extracted to ${PRESTO_SERVER_DIR}/"
+ echo "PRESTO_SERVER_DIR=${GITHUB_WORKSPACE}/${PRESTO_SERVER_DIR}" >> $GITHUB_ENV
+
+ - name: Compile integration test classes only
+ env:
+ MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
+ run: |
+ # Presto JARs already available from presto-server artifact
+ ./mvnw test-compile -Dmaven.repo.local=${{ env.MAVEN_REPO }} -pl 'presto-native-tests'
+
+ - name: Run presto-native tests
+ run: |
+ # First verify the binary exists at expected location
+ export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server"
+ if [ ! -f "${PRESTO_SERVER_PATH}" ]; then
+ echo "ERROR: presto_server binary not found at: ${PRESTO_SERVER_PATH}"
+ echo "Current directory structure:"
+ ls -la ${GITHUB_WORKSPACE}/
+ echo "Checking presto-native-execution directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/ || echo "Directory not found"
+ echo "Checking _build directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/ || echo "Directory not found"
+ echo "Checking release directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/ || echo "Directory not found"
+ exit 1
+ fi
+ echo "Found presto_server binary at: ${PRESTO_SERVER_PATH}"
+
+ export TESTFILES=`find ./presto-native-tests/src/test -type f -name 'Test*.java'`
+ # Convert file paths to comma separated class names
+ export TESTCLASSES=
+ for test_file in $TESTFILES
+ do
+ tmp=${test_file##*/}
+ test_class=${tmp%%\.*}
+ export TESTCLASSES="${TESTCLASSES},$test_class"
+ done
+ export TESTCLASSES=${TESTCLASSES#,}
+ echo "TESTCLASSES = $TESTCLASSES"
+
+ # Add presto-server JARs to classpath for tests
+ export PRESTO_CLASSPATH="${PRESTO_SERVER_DIR}/plugin/*:${PRESTO_SERVER_DIR}/lib/*"
+
+ # Set LD_LIBRARY_PATH for native workers to find libraries
+ export LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+
+ ./mvnw test \
+ ${MAVEN_TEST} \
+ -Dmaven.repo.local=${{ env.MAVEN_REPO }} \
+ -pl 'presto-native-tests' \
+ -DstorageFormat=${{ matrix.storage-format }} \
+ -Dtest="${TESTCLASSES}" \
+ -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \
+ -DPRESTO_SERVER_DIR=${PRESTO_SERVER_DIR} \
+ -DDATA_DIR=${RUNNER_TEMP} \
+ -Duser.timezone=America/Bahia_Banderas \
+ -DadditionalClasspath="${PRESTO_CLASSPATH}" \
+ -DLD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \
+ -T1C
+
+ # Upstream: prestocpp-linux-presto-sidecar-tests
+ # Tests: Test*.java in presto-native-sidecar-plugin module
+ integration-sidecar-java:
+ name: "integration-sidecar-java"
+ runs-on: self-hosted
+ timeout-minutes: 180
+ container:
+ image: ${{ inputs.builder-image }}
+ credentials:
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ env:
+ MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError"
+ MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end"
+ MAVEN_REPO: /opt/maven/repository
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ show-progress: false
+
+ - name: Setup Java
+ uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin'
+ java-version: '8.0.442'
+
+ # setup-java may override MAVEN_REPO, so explicitly set it after
+ - name: Configure Maven repository
+ run: |
+ echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV
+
+ - name: Download artifacts
+ uses: actions/download-artifact@v4
+ with:
+ name: presto-native-build
+ path: .
+
+ # Restore artifact permissions and move to expected paths
+ # (same as integration-e2e-java, see comments there for details)
+ - name: Restore execute permissions and library path
+ run: |
+ mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/
+ mkdir -p ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/
+ mv ${GITHUB_WORKSPACE}/presto_cpp/main/presto_server ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/
+ chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server
+ if [ -f "${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main" ]; then
+ mv ${GITHUB_WORKSPACE}/velox/velox/functions/remote/server/velox_functions_remote_server_main ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/
+ chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main
+ fi
+
+ # Download pre-built presto-server from presto-java8 job
+ # This replaces upstream's: ./mvnw install -pl 'presto-native-execution' -am
+ - name: Download presto-server artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: presto-server
+ path: .
+
+ - name: Extract presto-server
+ run: |
+ echo "Extracting presto-server..."
+ PRESTO_SERVER_TAR=$(ls presto-server-*.tar.gz)
+ tar -xzf ${PRESTO_SERVER_TAR}
+ PRESTO_SERVER_DIR=$(ls -d presto-server-*/ | head -1 | sed 's:/$::')
+ echo "Presto server extracted to ${PRESTO_SERVER_DIR}/"
+ echo "PRESTO_SERVER_DIR=${GITHUB_WORKSPACE}/${PRESTO_SERVER_DIR}" >> $GITHUB_ENV
+
+ # YScope-specific: presto-clp is not in upstream, required for CLP UDF tests
+ - name: Build required dependencies
+ env:
+ MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
+ run: |
+ ./mvnw install -B -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true \
+ --no-transfer-progress -Dmaven.repo.local=${{ env.MAVEN_REPO }} \
+ -pl 'presto-clp'
+
+ - name: Compile integration test classes
+ env:
+ MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
+ run: |
+ ./mvnw test-compile -Dmaven.repo.local=${{ env.MAVEN_REPO }} \
+ -pl 'presto-native-sidecar-plugin'
+
+ - name: Run presto-native sidecar tests
+ run: |
+ # First verify the binary exists at expected location
+ export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server"
+ if [ ! -f "${PRESTO_SERVER_PATH}" ]; then
+ echo "ERROR: presto_server binary not found at: ${PRESTO_SERVER_PATH}"
+ echo "Current directory structure:"
+ ls -la ${GITHUB_WORKSPACE}/
+ echo "Checking presto-native-execution directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/ || echo "Directory not found"
+ echo "Checking _build directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/ || echo "Directory not found"
+ echo "Checking release directory:"
+ ls -la ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/ || echo "Directory not found"
+ exit 1
+ fi
+ echo "Found presto_server binary at: ${PRESTO_SERVER_PATH}"
+
+ export TESTFILES=`find ./presto-native-sidecar-plugin/src/test -type f -name 'Test*.java'`
+ # Convert file paths to comma separated class names
+ export TESTCLASSES=
+ for test_file in $TESTFILES
+ do
+ tmp=${test_file##*/}
+ test_class=${tmp%%\.*}
+ export TESTCLASSES="${TESTCLASSES},$test_class"
+ done
+ export TESTCLASSES=${TESTCLASSES#,}
+ echo "TESTCLASSES = $TESTCLASSES"
+
+ # Add presto-server JARs to classpath for tests
+ export PRESTO_CLASSPATH="${PRESTO_SERVER_DIR}/plugin/*:${PRESTO_SERVER_DIR}/lib/*"
+
+ # Set LD_LIBRARY_PATH for native workers to find libraries
+ export LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+
+ ./mvnw test \
+ ${MAVEN_TEST} \
+ -Dmaven.repo.local=${{ env.MAVEN_REPO }} \
+ -pl 'presto-native-sidecar-plugin' \
+ -Dtest="${TESTCLASSES}" \
+ -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \
+ -DPRESTO_SERVER_DIR=${PRESTO_SERVER_DIR} \
+ -DDATA_DIR=${RUNNER_TEMP} \
+ -Duser.timezone=America/Bahia_Banderas \
+ -DadditionalClasspath="${PRESTO_CLASSPATH}" \
+ -DLD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \
+ -T1C
\ No newline at end of file
diff --git a/.github/workflows/maven-checks.yml b/.github/workflows/maven-checks.yml
deleted file mode 100644
index 2295c413bef7b..0000000000000
--- a/.github/workflows/maven-checks.yml
+++ /dev/null
@@ -1,115 +0,0 @@
-name: maven checks
-
-on:
- pull_request:
- push:
-
-env:
- # An envar that signals to tests we are executing in the CI environment
- CONTINUOUS_INTEGRATION: true
- MAVEN_OPTS: "-Xmx1024M -XX:+ExitOnOutOfMemoryError"
- MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
- RETRY: .github/bin/retry
-
-concurrency:
- group: "${{github.workflow}}-${{github.ref}}"
-
- # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so
- # that each commit to release-0.293-clp-connector is checked completely.
- cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}"
-
-jobs:
- maven-checks:
- strategy:
- fail-fast: false
- matrix:
- java: [ 8.0.442, 17.0.13 ]
- runs-on: ubuntu-latest
- timeout-minutes: 45
- steps:
- - name: Free Disk Space
- run: |
- df -h
- sudo apt-get clean
- df -h
- - uses: actions/checkout@v4
- with:
- show-progress: false
- - uses: actions/setup-java@v4
- with:
- distribution: 'temurin'
- java-version: ${{ matrix.java }}
- cache: 'maven'
- - name: Download nodejs to maven cache
- run: .github/bin/download_nodejs
- - name: Maven Checks
- run: |
- export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}"
- ./mvnw install -B -V -T 1C -DskipTests -Dmaven.javadoc.skip=true --no-transfer-progress -P ci -pl '!presto-test-coverage,!:presto-docs'
- - name: "Upload presto-server"
- if: matrix.java == '8.0.442'
- uses: "actions/upload-artifact@v4"
- with:
- name: "presto-server"
- path: "presto-server/target/presto-server-0.293.tar.gz"
- if-no-files-found: "error"
- retention-days: 1
- - name: "Upload presto-cli"
- if: matrix.java == '8.0.442'
- uses: "actions/upload-artifact@v4"
- with:
- name: "presto-cli"
- path: "presto-cli/target/presto-cli-0.293-executable.jar"
- if-no-files-found: "error"
- retention-days: 1
- - name: "Clean Maven output"
- run: "./mvnw clean -pl '!:presto-server,!:presto-cli,!presto-test-coverage'"
-
- presto-coordinator-image:
- name: "presto-coordinator-image"
- needs: "maven-checks"
- runs-on: "ubuntu-22.04"
- steps:
- - uses: "actions/checkout@v4"
- with:
- submodules: "recursive"
-
- - name: "Download presto-server"
- uses: "actions/download-artifact@v4"
- with:
- name: "presto-server"
- path: "./docker"
-
- - name: "Download presto-cli"
- uses: "actions/download-artifact@v4"
- with:
- name: "presto-cli"
- path: "./docker"
-
- - name: "Login to image registry"
- uses: "docker/login-action@v3"
- with:
- registry: "ghcr.io"
- username: "${{github.actor}}"
- password: "${{secrets.GITHUB_TOKEN}}"
-
- - name: "Set up container image metadata"
- id: "meta"
- uses: "docker/metadata-action@v5"
- with:
- images: "ghcr.io/${{github.repository}}/coordinator"
- tags: "type=raw,value=dev"
-
- - name: "Build and push"
- uses: "docker/build-push-action@v6"
- with:
- build-args: |-
- JMX_PROMETHEUS_JAVA_AGENT_VERSION=0.20.0
- PRESTO_VERSION=0.293
- context: "./docker"
- file: "./docker/Dockerfile"
- push: >-
- ${{github.event_name != 'pull_request'
- && github.ref == 'refs/heads/release-0.293-clp-connector'}}
- tags: "${{steps.meta.outputs.tags}}"
- labels: "${{steps.meta.outputs.labels}}"
diff --git a/.github/workflows/pr-title-checks.yaml b/.github/workflows/pr-title-checks.yaml
index 886249e6348c0..eaddf713902c6 100644
--- a/.github/workflows/pr-title-checks.yaml
+++ b/.github/workflows/pr-title-checks.yaml
@@ -8,7 +8,7 @@ on:
# pull request triggered by this event.
# - Each job has `permissions` set to only those necessary.
types: ["edited", "opened", "reopened"]
- branches: ["release-0.293-clp-connector"]
+ branches: ["release-0.293-clp-connector-snapshot*"]
permissions: {}
diff --git a/.github/workflows/prestissimo-worker-images-build.yml b/.github/workflows/prestissimo-worker-images-build.yml
deleted file mode 100644
index b36dcb71949be..0000000000000
--- a/.github/workflows/prestissimo-worker-images-build.yml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: "prestissimo-worker-images-build"
-
-on:
- pull_request:
- push:
-
-jobs:
- prestissimo-worker-images-build:
- name: "prestissimo-worker-images-build"
- runs-on: "ubuntu-22.04"
- steps:
- - uses: "actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683"
- with:
- submodules: "recursive"
-
- - name: "Login to image registry"
- uses: "docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772"
- with:
- registry: "ghcr.io"
- username: "${{github.actor}}"
- password: "${{secrets.GITHUB_TOKEN}}"
-
- - name: "Set up metadata for dependency image"
- id: "metadata-deps-image"
- uses: "docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804"
- with:
- images: "ghcr.io/${{github.repository}}/prestissimo-worker-dev-env"
- tags: "type=raw,value=dev"
-
- - name: "Build and push dependency image"
- uses: "docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4"
- with:
- context: "./presto-native-execution"
- file: "./presto-native-execution/scripts/dockerfiles/ubuntu-22.04-dependency.dockerfile"
- push: >-
- ${{github.event_name != 'pull_request'
- && github.ref == 'refs/heads/release-0.293-clp-connector'}}
- tags: "${{steps.metadata-deps-image.outputs.tags}}"
- labels: "${{steps.metadata-deps-image.outputs.labels}}"
-
- - name: "Set up metadata for runtime image"
- id: "metadata-runtime-image"
- uses: "docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804"
- with:
- images: "ghcr.io/${{github.repository}}/prestissimo-worker"
- tags: "type=raw,value=dev"
-
- - name: "Get number of cores"
- id: "get-cores"
- run: |-
- echo "num_cores=$(nproc)" >> $GITHUB_OUTPUT
-
- - name: "Build and push runtime image"
- uses: "docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4"
- with:
- build-args: |-
- BASE_IMAGE=ubuntu:22.04
- DEPENDENCY_IMAGE=${{steps.metadata-deps-image.outputs.tags}}
- EXTRA_CMAKE_FLAGS=-DPRESTO_ENABLE_TESTING=OFF \
- -DPRESTO_ENABLE_PARQUET=ON \
- -DPRESTO_ENABLE_S3=ON
- NUM_THREADS=${{steps.get-cores.outputs.num_cores}}
- OSNAME=ubuntu
- context: "./presto-native-execution"
- file: "./presto-native-execution/scripts/dockerfiles/prestissimo-runtime.dockerfile"
- push: >-
- ${{github.event_name != 'pull_request'
- && github.ref == 'refs/heads/release-0.293-clp-connector'}}
- tags: "${{steps.metadata-runtime-image.outputs.tags}}"
- labels: "${{steps.metadata-runtime-image.outputs.labels}}"
diff --git a/.github/workflows/presto-build.yml b/.github/workflows/presto-build.yml
new file mode 100644
index 0000000000000..26e7d37335ec1
--- /dev/null
+++ b/.github/workflows/presto-build.yml
@@ -0,0 +1,219 @@
+name: presto-build
+
+# ==============================================================================
+# Presto Java Build Workflow
+# ==============================================================================
+# Builds the Java components of Presto (coordinator, CLI, plugins) and optionally
+# builds and pushes a Docker image.
+#
+# What gets built?
+# ----------------
+# - presto-server: The coordinator that accepts queries and manages workers
+# - presto-cli: Command-line interface for running queries
+# - Various plugins (connectors, functions, etc.)
+#
+# Build Strategy:
+# ---------------
+# Uses Maven with a pre-populated local repository from the builder image.
+# The builder image contains all Maven dependencies pre-downloaded, so builds
+# don't need to fetch from the internet (faster and more reliable).
+#
+# Artifacts:
+# ----------
+# When should-upload-artifacts is true, this workflow uploads:
+# - presto-server tarball: Used by integration tests and Docker image build
+# - presto-cli JAR: The CLI executable
+#
+# Docker Image:
+# -------------
+# When should-build-image is true, builds and pushes a Presto Docker image
+# tagged with version, type (RELEASE/BETA/DEV), timestamp, and commit hash.
+
+on:
+ workflow_call:
+ inputs:
+ builder-image:
+ description: 'Full builder image URI with tag'
+ required: true
+ type: string
+ java-version:
+ description: 'Java version to use for build (e.g., 8.0.442, 17.0.13)'
+ required: false
+ default: '8.0.442'
+ type: string
+ should-upload-artifacts:
+ description: 'Whether this build should upload artifacts'
+ required: false
+ default: true
+ type: boolean
+ runtime-version-tag:
+ description: 'Immutable runtime image tag (e.g., 0.293-BETA-20250529140509-484b00e)'
+ required: false
+ default: ''
+ type: string
+ runtime-snapshot-tag:
+ description: 'Mutable SNAPSHOT tag (e.g., 0.293-BETA-SNAPSHOT)'
+ required: false
+ default: ''
+ type: string
+ should-build-image:
+ description: 'Whether to build and push Docker image'
+ required: false
+ default: false
+ type: boolean
+ # Outputs allow the caller workflow to reference artifact names
+ outputs:
+ presto-server-artifact:
+ description: "Name of the presto-server artifact"
+ value: presto-server
+ presto-cli-artifact:
+ description: "Name of the presto-cli artifact"
+ value: presto-cli
+
+jobs:
+ # --------------------------------------------------------------------------
+ # Build Job: Compile all Java modules
+ # --------------------------------------------------------------------------
+ build:
+ name: "presto-build"
+ runs-on: self-hosted
+ timeout-minutes: 45
+ container:
+ image: ${{ inputs.builder-image }}
+ credentials:
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ env:
+ # Use the Maven cache pre-populated in the builder image
+ # This contains all dependencies pre-downloaded, avoiding network fetches
+ MAVEN_REPO: /opt/maven/repository
+ # JVM options for Maven: 2GB heap, crash immediately on OOM (don't hang)
+ MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ show-progress: false
+
+ - name: Setup Java
+ uses: actions/setup-java@v4
+ with:
+ distribution: 'temurin'
+ java-version: ${{ inputs.java-version }}
+
+ # setup-java may override MAVEN_REPO, so explicitly set it after
+ - name: Configure Maven repository
+ run: |
+ echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV
+
+ # Maven build with parallel compilation (-T 1C = 1 thread per CPU core)
+ # Key flags:
+ # - -DskipTests: Only compile, don't run tests (separate workflow handles tests)
+ # - -P ci: Activate the 'ci' profile (may have CI-specific settings in pom.xml)
+ # - -pl '!...': Exclude modules we don't need (test coverage, docs)
+ - name: Maven Build
+ run: |
+ export MAVEN_OPTS="${MAVEN_INSTALL_OPTS} -Dmaven.repo.local=${{ env.MAVEN_REPO }}"
+ ./mvnw install -B -V -T 1C -DskipTests -Dmaven.javadoc.skip=true --no-transfer-progress -P ci -pl '!presto-test-coverage,!:presto-docs'
+
+ # Upload artifacts for other jobs to download
+ # Artifacts are stored by GitHub and available for the duration of the workflow run
+ # retention-days: 1 means they're deleted after 1 day to save storage
+ - name: "Upload presto-server"
+ if: inputs.should-upload-artifacts
+ uses: "actions/upload-artifact@v4"
+ with:
+ name: "presto-server"
+ path: "presto-server/target/presto-server-*.tar.gz"
+ if-no-files-found: "error"
+ retention-days: 1
+
+ - name: "Upload presto-cli"
+ if: inputs.should-upload-artifacts
+ uses: "actions/upload-artifact@v4"
+ with:
+ name: "presto-cli"
+ path: "presto-cli/target/presto-cli-*-executable.jar"
+ if-no-files-found: "error"
+ retention-days: 1
+
+ # --------------------------------------------------------------------------
+ # Docker Image Job: Build and push Presto Docker image
+ # --------------------------------------------------------------------------
+ # This job creates a Docker image containing the Presto coordinator.
+ # The image is pushed to GitHub Container Registry (ghcr.io).
+ #
+ # Image tagging strategy:
+ # - Full tag: 0.293-RELEASE-20250522140509-484b00e (version-type-timestamp-hash)
+ # - Snapshot tag: 0.293-RELEASE-SNAPSHOT (always points to latest build)
+ build-image:
+ name: "presto-image"
+ needs: build # Wait for build job to complete and upload artifacts
+ if: inputs.should-build-image && inputs.should-upload-artifacts
+ runs-on: self-hosted
+ steps:
+ # Sparse checkout: Only fetch the docker/ directory (not entire repo)
+ # This is faster when we only need specific files
+ - name: "Download Docker context files"
+ uses: actions/checkout@v4
+ with:
+ sparse-checkout: |
+ docker
+ sparse-checkout-cone-mode: false
+ show-progress: false
+
+ # Download artifacts from the build job
+ # These go into ./docker so they're available in the Docker build context
+ - name: "Download presto-server"
+ uses: actions/download-artifact@v4
+ with:
+ name: presto-server
+ path: ./docker
+
+ - name: "Download presto-cli"
+ uses: actions/download-artifact@v4
+ with:
+ name: presto-cli
+ path: ./docker
+
+ # Extract base version from artifact filename (needed for Docker build args)
+ - name: "Extract base version"
+ id: "extract-version"
+ run: |
+ VERSION=$(ls docker/presto-server-*.tar.gz | sed 's/.*presto-server-\(.*\)\.tar\.gz/\1/')
+ echo "base-version=${VERSION}" >> $GITHUB_OUTPUT
+
+ # Login to GitHub Container Registry (ghcr.io)
+ # GITHUB_TOKEN is automatically provided by GitHub Actions
+ - name: "Login to image registry"
+ uses: "docker/login-action@v3"
+ with:
+ registry: "ghcr.io"
+ username: "${{github.actor}}"
+ password: "${{secrets.GITHUB_TOKEN}}"
+
+ # docker/metadata-action generates Docker image tags and labels
+ - name: "Set up container image metadata"
+ id: "meta"
+ uses: "docker/metadata-action@v5"
+ with:
+ images: "ghcr.io/${{github.repository}}/presto"
+ # Two tags passed from config job:
+ # 1. runtime-version-tag: Immutable (e.g., 0.293-BETA-20250529140509-484b00e)
+ # 2. runtime-snapshot-tag: Mutable (e.g., 0.293-BETA-SNAPSHOT)
+ tags: |
+ type=raw,value=${{inputs.runtime-version-tag}}
+ type=raw,value=${{inputs.runtime-snapshot-tag}}
+
+ # Build the Docker image and push to registry
+ # Note: push is disabled for pull_request events (security: PRs shouldn't push images)
+ - name: "Build and push"
+ uses: "docker/build-push-action@v6"
+ with:
+ build-args: |-
+ JMX_PROMETHEUS_JAVA_AGENT_VERSION=0.20.0
+ PRESTO_VERSION=${{steps.extract-version.outputs.base-version}}
+ context: "./docker"
+ file: "./docker/Dockerfile"
+ push: ${{github.event_name != 'pull_request'}}
+ tags: "${{steps.meta.outputs.tags}}"
+ labels: "${{steps.meta.outputs.labels}}"
diff --git a/.github/workflows/prestocpp-format-and-header-check.yml b/.github/workflows/prestocpp-format-and-header-check.yml
index c554ee8785786..b89a7f25efd8d 100644
--- a/.github/workflows/prestocpp-format-and-header-check.yml
+++ b/.github/workflows/prestocpp-format-and-header-check.yml
@@ -14,9 +14,9 @@ on:
concurrency:
group: "${{github.workflow}}-${{github.ref}}"
- # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so
- # that each commit to release-0.293-clp-connector is checked completely.
- cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}"
+ # Cancel in-progress jobs for efficiency. Exclude branches with `release-0.293-clp-connector-snapshot` prefix so
+ # that each commit to these branches is checked completely.
+ cancel-in-progress: "${{!startsWith(github.ref, 'refs/heads/release-0.293-clp-connector-snapshot')}}"
jobs:
prestocpp-format-and-header-check:
diff --git a/.github/workflows/prestocpp-linux-build-and-unit-test.yml b/.github/workflows/prestocpp-linux-build-and-unit-test.yml
index e26e330403ec2..23db1ec5720bb 100644
--- a/.github/workflows/prestocpp-linux-build-and-unit-test.yml
+++ b/.github/workflows/prestocpp-linux-build-and-unit-test.yml
@@ -1,59 +1,78 @@
name: prestocpp-linux-build-and-unit-test
-on:
- workflow_dispatch:
- pull_request:
- paths:
- - 'presto-native-execution/**'
- - 'presto-native-sidecar-plugin/**'
- - '.github/workflows/prestocpp-linux-build-and-unit-test.yml'
- push:
- paths-ignore:
- - 'presto-docs/**'
-
-concurrency:
- group: "${{github.workflow}}-${{github.ref}}"
+# ==============================================================================
+# Prestocpp (C++ Native Worker) Build, Tests, and Image
+# ==============================================================================
+# This workflow builds the C++ native worker (prestocpp), runs unit tests, and
+# builds the prestissimo runtime Docker image.
+#
+# What is Prestocpp/Prestissimo?
+# ------------------------------
+# - Prestocpp: The C++ source code for the native Presto worker
+# - Prestissimo: The runtime/deployment name for the compiled C++ worker
+#
+# Job Dependency Graph:
+# ---------------------
+# prestocpp-linux-build-and-test ─► prestissimo-image
+#
+# Build Strategy:
+# ---------------
+# Uses a pre-built Docker image ("builder image") with:
+# - All C++ dependencies pre-installed (boost, folly, glog, etc.)
+# - Pre-warmed ccache containing compiled object files from a previous build
- # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so
- # that each commit to release-0.293-clp-connector is checked completely.
- cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}"
+on:
+ workflow_call:
+ inputs:
+ builder-image:
+ description: 'Full builder image URI with tag'
+ required: true
+ type: string
+ runtime-version-tag:
+ description: 'Immutable runtime image tag (e.g., 0.293-BETA-20250529140509-484b00e)'
+ required: false
+ default: ''
+ type: string
+ runtime-snapshot-tag:
+ description: 'Mutable SNAPSHOT tag (e.g., 0.293-BETA-SNAPSHOT)'
+ required: false
+ default: ''
+ type: string
+
+# Note: concurrency is handled by the parent ci.yml workflow
jobs:
- prestocpp-linux-build-for-test:
- runs-on: ubuntu-22.04
+ # ----------------------------------------------------------------------------
+ # Job 1: Build and Test Prestocpp
+ # ----------------------------------------------------------------------------
+ # Builds the prestocpp binary, runs unit tests, and uploads artifacts.
+ prestocpp-linux-build-and-test:
+ name: "prestocpp-linux-build-and-test"
+ runs-on: self-hosted
+ timeout-minutes: 180
container:
- image: prestodb/presto-native-dependency:0.293-20250522140509-484b00e
+ image: ${{ inputs.builder-image }}
+ credentials:
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
env:
- CCACHE_DIR: "${{ github.workspace }}/ccache"
+ CCACHE_DIR: /var/cache/ccache
+ CCACHE_BASEDIR: ${{ github.workspace }}
steps:
- uses: actions/checkout@v4
+ with:
+ show-progress: false
- name: Fix git permissions
- # Usually actions/checkout does this but as we run in a container
- # it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
- - name: Update velox
+ - name: Update velox submodule
run: |
cd presto-native-execution
make velox-submodule
- - name: Install Github CLI for using apache/infrastructure-actions/stash
- run: |
- curl -L https://github.com/cli/cli/releases/download/v2.63.2/gh_2.63.2_linux_amd64.rpm > gh_2.63.2_linux_amd64.rpm
- rpm -iv gh_2.63.2_linux_amd64.rpm
-
- - uses: apache/infrastructure-actions/stash/restore@4ab8682fbd4623d2b4fc1c98db38aba5091924c3
- with:
- path: '${{ env.CCACHE_DIR }}'
- key: ccache-prestocpp-linux-build-for-test
-
- - name: Zero ccache statistics
- run: ccache -sz
-
- name: Build engine
run: |
- source /opt/rh/gcc-toolset-12/enable
cd presto-native-execution
cmake \
-B _build/release \
@@ -72,243 +91,75 @@ jobs:
-DMAX_LINK_JOBS=4
ninja -C _build/release -j $(getconf _NPROCESSORS_ONLN)
- - name: Ccache after
+ - name: Show ccache statistics
run: ccache -s
- - uses: apache/infrastructure-actions/stash/save@4ab8682fbd4623d2b4fc1c98db38aba5091924c3
- with:
- path: '${{ env.CCACHE_DIR }}'
- key: ccache-prestocpp-linux-build-for-test
-
- - name: Run Unit Tests
+ - name: Run unit tests
run: |
- # Ensure transitive dependency libboost-iostreams is found.
ldconfig /usr/local/lib
cd presto-native-execution/_build/release
ctest -j $(getconf _NPROCESSORS_ONLN) -VV --output-on-failure --exclude-regex velox.*
+ # Upload compiled binaries as artifacts for integration tests and image building
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: presto-native-build
+ retention-days: 1
path: |
presto-native-execution/_build/release/presto_cpp/main/presto_server
presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main
- prestocpp-linux-presto-e2e-tests:
- needs: prestocpp-linux-build-for-test
- runs-on: ubuntu-22.04
- container:
- image: prestodb/presto-native-dependency:0.293-20250522140509-484b00e
- env:
- MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError"
- MAVEN_FAST_INSTALL: "-B -V --quiet -T 1C -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true"
- MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end"
+ # ----------------------------------------------------------------------------
+ # Job 2: Build Prestissimo Runtime Image
+ # ----------------------------------------------------------------------------
+ # Downloads the pre-built binary and packages it into a minimal Docker image.
+ # The runtime image contains only the binary and runtime dependencies (no build tools).
+ prestissimo-image:
+ name: "prestissimo-image"
+ needs: prestocpp-linux-build-and-test
+ runs-on: self-hosted
+ timeout-minutes: 30
steps:
- uses: actions/checkout@v4
-
- - name: Fix git permissions
- # Usually actions/checkout does this but as we run in a container
- # it doesn't work
- run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
-
- - name: Download artifacts
- uses: actions/download-artifact@v4
with:
- name: presto-native-build
- path: presto-native-execution/_build/release
-
- # Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38
- - name: Restore execute permissions and library path
- run: |
- chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server
- chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main
- # Ensure transitive dependency libboost-iostreams is found.
- ldconfig /usr/local/lib
-
- - name: Install OpenJDK8
- uses: actions/setup-java@v4
- with:
- distribution: 'temurin'
- java-version: 8.0.442
- cache: 'maven'
- - name: Download nodejs to maven cache
- run: .github/bin/download_nodejs
-
- - name: Maven install
- env:
- # Use different Maven options to install.
- MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
- run: |
- for i in $(seq 1 3); do ./mvnw clean install $MAVEN_FAST_INSTALL -pl 'presto-native-execution' -am && s=0 && break || s=$? && sleep 10; done; (exit $s)
-
- - name: Run presto-native e2e tests
- run: |
- export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server"
- export TESTFILES=`find ./presto-native-execution/src/test -type f -name 'TestPrestoNative*.java'`
- # Convert file paths to comma separated class names
- export TESTCLASSES=
- for test_file in $TESTFILES
- do
- tmp=${test_file##*/}
- test_class=${tmp%%\.*}
- export TESTCLASSES="${TESTCLASSES},$test_class"
- done
- export TESTCLASSES=${TESTCLASSES#,}
- echo "TESTCLASSES = $TESTCLASSES"
- # TODO: neeed to enable remote function tests with
- # "-Ppresto-native-execution-remote-functions" once
- # > https://github.com/facebookincubator/velox/discussions/6163
- # is fixed.
-
- mvn test \
- ${MAVEN_TEST} \
- -pl 'presto-native-execution' \
- -Dtest="${TESTCLASSES}" \
- -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \
- -DDATA_DIR=${RUNNER_TEMP} \
- -Duser.timezone=America/Bahia_Banderas \
- -T1C
+ show-progress: false
- prestocpp-linux-presto-native-tests:
- needs: prestocpp-linux-build-for-test
- runs-on: ubuntu-22.04
- strategy:
- fail-fast: false
- matrix:
- storage-format: [ "PARQUET", "DWRF" ]
- container:
- image: prestodb/presto-native-dependency:0.293-20250522140509-484b00e
- env:
- MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError"
- MAVEN_FAST_INSTALL: "-B -V --quiet -T 1C -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true"
- MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end"
- steps:
- - uses: actions/checkout@v4
-
- - name: Fix git permissions
- # Usually actions/checkout does this but as we run in a container
- # it doesn't work
- run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
-
- - name: Download artifacts
+ - name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: presto-native-build
path: presto-native-execution/_build/release
- # Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38
- - name: Restore execute permissions and library path
+ - name: Restore execute permissions
run: |
- chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server
- chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main
- # Ensure transitive dependency libboost-iostreams is found.
- ldconfig /usr/local/lib
+ chmod +x presto-native-execution/_build/release/presto_cpp/main/presto_server
+ chmod +x presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main || true
+ ls -lh presto-native-execution/_build/release/presto_cpp/main/presto_server
- - name: Install OpenJDK8
- uses: actions/setup-java@v4
+ - name: Login to GitHub Container Registry
+ uses: docker/login-action@v3
with:
- distribution: 'temurin'
- java-version: '8.0.442'
- cache: 'maven'
- - name: Download nodejs to maven cache
- run: .github/bin/download_nodejs
-
- - name: Maven install
- env:
- # Use different Maven options to install.
- MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
- run: |
- for i in $(seq 1 3); do ./mvnw clean install $MAVEN_FAST_INSTALL -pl 'presto-native-tests' -am && s=0 && break || s=$? && sleep 10; done; (exit $s)
-
- - name: Run presto-native tests
- run: |
- export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server"
- export TESTFILES=`find ./presto-native-tests/src/test -type f -name 'Test*.java'`
- # Convert file paths to comma separated class names
- export TESTCLASSES=
- for test_file in $TESTFILES
- do
- tmp=${test_file##*/}
- test_class=${tmp%%\.*}
- export TESTCLASSES="${TESTCLASSES},$test_class"
- done
- export TESTCLASSES=${TESTCLASSES#,}
- echo "TESTCLASSES = $TESTCLASSES"
-
- mvn test \
- ${MAVEN_TEST} \
- -pl 'presto-native-tests' \
- -DstorageFormat=${{ matrix.storage-format }} \
- -Dtest="${TESTCLASSES}" \
- -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \
- -DDATA_DIR=${RUNNER_TEMP} \
- -Duser.timezone=America/Bahia_Banderas \
- -T1C
-
- prestocpp-linux-presto-sidecar-tests:
- needs: prestocpp-linux-build-for-test
- runs-on: ubuntu-22.04
- container:
- image: prestodb/presto-native-dependency:0.293-20250522140509-484b00e
- env:
- MAVEN_OPTS: "-Xmx4G -XX:+ExitOnOutOfMemoryError"
- MAVEN_FAST_INSTALL: "-B -V --quiet -T 1C -DskipTests -Dair.check.skip-all -Dmaven.javadoc.skip=true"
- MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --fail-at-end"
- steps:
- - uses: actions/checkout@v4
- - name: Fix git permissions
- # Usually actions/checkout does this but as we run in a container
- # it doesn't work
- run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
- - name: Download artifacts
- uses: actions/download-artifact@v4
- with:
- name: presto-native-build
- path: presto-native-execution/_build/release
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
- # Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38
- - name: Restore execute permissions and library path
- run: |
- chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server
- chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/velox/velox/functions/remote/server/velox_functions_remote_server_main
- # Ensure transitive dependency libboost-iostreams is found.
- ldconfig /usr/local/lib
- - name: Install OpenJDK8
- uses: actions/setup-java@v4
+ - name: Build and push prestissimo runtime image
+ uses: docker/build-push-action@v6
with:
- distribution: 'temurin'
- java-version: '8.0.442'
- cache: 'maven'
- - name: Download nodejs to maven cache
- run: .github/bin/download_nodejs
-
- - name: Maven install
- env:
- # Use different Maven options to install.
- MAVEN_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
- run: |
- for i in $(seq 1 3); do ./mvnw clean install $MAVEN_FAST_INSTALL -pl 'presto-native-execution' -am && s=0 && break || s=$? && sleep 10; done; (exit $s)
- - name: Run presto-native sidecar tests
- run: |
- export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server"
- export TESTFILES=`find ./presto-native-sidecar-plugin/src/test -type f -name 'Test*.java'`
- # Convert file paths to comma separated class names
- export TESTCLASSES=
- for test_file in $TESTFILES
- do
- tmp=${test_file##*/}
- test_class=${tmp%%\.*}
- export TESTCLASSES="${TESTCLASSES},$test_class"
- done
- export TESTCLASSES=${TESTCLASSES#,}
- echo "TESTCLASSES = $TESTCLASSES"
- mvn test \
- ${MAVEN_TEST} \
- -pl 'presto-native-sidecar-plugin' \
- -Dtest="${TESTCLASSES}" \
- -DPRESTO_SERVER=${PRESTO_SERVER_PATH} \
- -DDATA_DIR=${RUNNER_TEMP} \
- -Duser.timezone=America/Bahia_Banderas \
- -T1C
+ context: .
+ file: ./presto-native-execution/scripts/dockerfiles/prestissimo-runtime.dockerfile
+ build-args: |
+ BUILDER_IMAGE=${{ inputs.builder-image }}
+ push: ${{ github.event_name != 'pull_request' }}
+ # Two tags passed from config job:
+ # 1. runtime-version-tag: Immutable (e.g., 0.293-BETA-20250529140509-484b00e)
+ # 2. runtime-snapshot-tag: Mutable (e.g., 0.293-BETA-SNAPSHOT)
+ tags: |
+ ghcr.io/${{ github.repository }}/prestissimo:${{ inputs.runtime-version-tag }}
+ ghcr.io/${{ github.repository }}/prestissimo:${{ inputs.runtime-snapshot-tag }}
+ labels: |
+ org.opencontainers.image.source=https://github.com/${{ github.repository }}
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 6f829502fbbb8..ef33819365825 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,52 +1,46 @@
-name: test
+name: tests
-on:
- pull_request:
- push:
-
-env:
- # An envar that signals to tests we are executing in the CI environment
- CONTINUOUS_INTEGRATION: true
- MAVEN_OPTS: "-Xmx1024M -XX:+ExitOnOutOfMemoryError"
- MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
- MAVEN_FAST_INSTALL: "-B -V --quiet -T 1C -DskipTests -Dair.check.skip-all --no-transfer-progress -Dmaven.javadoc.skip=true"
- MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --no-transfer-progress --fail-at-end"
- RETRY: .github/bin/retry
-
-concurrency:
- group: "${{github.workflow}}-${{github.ref}}"
+# ==============================================================================
+# Presto Java Unit Tests
+# ==============================================================================
+# Runs unit tests for the Java components of Presto using a matrix strategy.
+#
+# Matrix Strategy Explained:
+# --------------------------
+# GitHub Actions "matrix" runs the same job multiple times with different parameters.
+# This workflow creates a matrix of:
+# - java: ['8.0.442', '17.0.13'] (2 versions)
+# - modules: 11 different test modules/profiles
+#
+# This results in 2 × 11 = 22 parallel test jobs! Each combination runs independently.
+#
+# Test Modules:
+# -------------
+# Tests are split by Maven profile (-P flag) to parallelize and categorize:
+# - presto-tests-execution-memory: Memory management tests
+# - presto-tests-general: General functionality tests
+# - ci-only-*: Test profiles that only run in CI (too slow/resource-intensive for local)
+# - presto-main-base, presto-main: Core module tests
+#
+# fail-fast: false means if one matrix job fails, others continue running.
+# This helps identify all failing tests rather than stopping at the first failure.
- # Cancel in-progress jobs for efficiency. Exclude the `release-0.293-clp-connector` branch so
- # that each commit to release-0.293-clp-connector is checked completely.
- cancel-in-progress: "${{github.ref != 'refs/heads/release-0.293-clp-connector'}}"
+on:
+ workflow_call:
+ inputs:
+ builder-image:
+ description: 'Full builder image URI with tag'
+ required: true
+ type: string
jobs:
- changes:
- runs-on: ubuntu-latest
- # Required permissions
- permissions:
- pull-requests: read
- # Set job outputs to values from filter step
- outputs:
- codechange: ${{ steps.filter.outputs.codechange }}
- steps:
- - uses: "actions/checkout@v4"
- with:
- submodules: "recursive"
- - uses: dorny/paths-filter@v2
- id: filter
- with:
- filters: |
- codechange:
- - '!presto-docs/**'
-
test:
- runs-on: ubuntu-latest
- needs: changes
+ name: "presto-tests"
+ runs-on: self-hosted
strategy:
- fail-fast: false
+ fail-fast: false # Continue other jobs even if one fails
matrix:
- java: [8.0.442, 17.0.13]
+ java: ['8.0.442', '17.0.13']
modules:
- ":presto-tests -P presto-tests-execution-memory"
- ":presto-tests -P presto-tests-general"
@@ -56,29 +50,50 @@ jobs:
- ":presto-tests -P ci-only-distributed-queries"
- ":presto-tests -P ci-only-aggregation-queries"
- ":presto-tests -P ci-only-plan-determinism"
- - ":presto-tests -P ci-only-resource-manager"
+ - ":presto-tests -P ci-only-resource-manager"
- ":presto-main-base"
- ":presto-main"
timeout-minutes: 80
+ container:
+ image: ${{ inputs.builder-image }}
+ credentials:
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ env:
+ # Maven test flags:
+ # - -Dair.check.skip-all: Skip static analysis checks (already done in build)
+ # - -DLogTestDurationListener.enabled=true: Log how long each test takes
+ # - --fail-at-end: Run all tests even if some fail, report failures at end
+ MAVEN_TEST: "-B -Dair.check.skip-all -Dmaven.javadoc.skip=true -DLogTestDurationListener.enabled=true --no-transfer-progress --fail-at-end"
+ MAVEN_REPO: /opt/maven/repository
+ MAVEN_INSTALL_OPTS: "-Xmx2G -XX:+ExitOnOutOfMemoryError"
steps:
- uses: actions/checkout@v4
- if: needs.changes.outputs.codechange == 'true'
with:
show-progress: false
- - uses: actions/setup-java@v4
- if: needs.changes.outputs.codechange == 'true'
+
+ # Setup Java version from the matrix (runs twice: once for Java 8, once for Java 17)
+ - name: Setup Java
+ uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- cache: 'maven'
- - name: Download nodejs to maven cache
- if: needs.changes.outputs.codechange == 'true'
- run: .github/bin/download_nodejs
+
+ # setup-java may override MAVEN_REPO, so explicitly set it after
+ - name: Configure Maven repository
+ run: |
+ echo "MAVEN_REPO=/opt/maven/repository" >> $GITHUB_ENV
+
+ # First, compile the test module and its dependencies (-am = also make dependencies)
+ # The cut command extracts just the module name (before any -P profile flags)
+ # Example: ":presto-tests -P presto-tests-general" -> ":presto-tests"
- name: Maven Install
- if: needs.changes.outputs.codechange == 'true'
run: |
export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}"
- ./mvnw install ${MAVEN_FAST_INSTALL} -am -pl $(echo '${{ matrix.modules }}' | cut -d' ' -f1)
+ ./mvnw install -B -V --quiet -T 1C -DskipTests -Dair.check.skip-all --no-transfer-progress -Dmaven.javadoc.skip=true -Dmaven.repo.local=${{ env.MAVEN_REPO }} -am -pl $(echo '${{ matrix.modules }}' | cut -d' ' -f1)
+
+ # Run the actual tests for this matrix combination
+ # -pl specifies both the module and the profile (e.g., ":presto-tests -P ci-only-local-queries")
- name: Maven Tests
- if: needs.changes.outputs.codechange == 'true'
- run: ./mvnw test ${MAVEN_TEST} -pl ${{ matrix.modules }}
+ run: |
+ ./mvnw test ${MAVEN_TEST} -Dmaven.repo.local=${{ env.MAVEN_REPO }} -pl ${{ matrix.modules }}
diff --git a/pom.xml b/pom.xml
index 57dc0439a8401..3b1f4d7554789 100644
--- a/pom.xml
+++ b/pom.xml
@@ -55,7 +55,7 @@
1.43
- 2.12.7
+ 2.13.1
1.54
7.5
8.11.3
@@ -93,7 +93,7 @@
+
+
+
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java
index 121eb0d5ff17b..21366e90db156 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpConfig.java
@@ -33,6 +33,8 @@ public class ClpConfig
private long metadataRefreshInterval = 60;
private long metadataExpireInterval = 600;
+ private String metadataYamlPath;
+
private String splitFilterConfig;
private SplitFilterProviderType splitFilterProviderType = SplitFilterProviderType.MYSQL;
private SplitProviderType splitProviderType = SplitProviderType.MYSQL;
@@ -151,6 +153,18 @@ public ClpConfig setMetadataExpireInterval(long metadataExpireInterval)
return this;
}
+ public String getMetadataYamlPath()
+ {
+ return metadataYamlPath;
+ }
+
+ @Config("clp.metadata-yaml-path")
+ public ClpConfig setMetadataYamlPath(String metadataYamlPath)
+ {
+ this.metadataYamlPath = metadataYamlPath;
+ return this;
+ }
+
public String getSplitFilterConfig()
{
return splitFilterConfig;
@@ -189,16 +203,21 @@ public ClpConfig setSplitProviderType(SplitProviderType splitProviderType)
public enum MetadataProviderType
{
- MYSQL
+ MYSQL,
+ YAML
}
public enum SplitFilterProviderType
{
- MYSQL
+ MYSQL,
+ PINOT,
+ PINOT_UBER
}
public enum SplitProviderType
{
- MYSQL
+ MYSQL,
+ PINOT,
+ PINOT_UBER
}
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java
index 2530c013455cc..fb6626de25a61 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpErrorCode.java
@@ -28,6 +28,7 @@ public enum ClpErrorCode
CLP_UNSUPPORTED_SPLIT_SOURCE(2, EXTERNAL),
CLP_UNSUPPORTED_TYPE(3, EXTERNAL),
CLP_UNSUPPORTED_CONFIG_OPTION(4, EXTERNAL),
+ CLP_UNSUPPORTED_TABLE_SCHEMA_YAML(5, EXTERNAL),
CLP_SPLIT_FILTER_CONFIG_NOT_FOUND(10, USER_ERROR),
CLP_MANDATORY_SPLIT_FILTER_NOT_VALID(11, USER_ERROR),
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java
index 1f9962a3456d9..a63b7dc3f9770 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpMetadata.java
@@ -81,7 +81,7 @@ public ClpMetadata(ClpConfig clpConfig, ClpMetadataProvider clpMetadataProvider)
@Override
public List listSchemaNames(ConnectorSession session)
{
- return ImmutableList.of(DEFAULT_SCHEMA_NAME);
+ return clpMetadataProvider.listSchemaNames();
}
@Override
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java
index bf801d0d87242..0ed988d16a76c 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpModule.java
@@ -16,14 +16,22 @@
import com.facebook.airlift.configuration.AbstractConfigurationAwareModule;
import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider;
import com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider;
+import com.facebook.presto.plugin.clp.metadata.ClpYamlMetadataProvider;
import com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider;
+import com.facebook.presto.plugin.clp.split.ClpPinotSplitProvider;
import com.facebook.presto.plugin.clp.split.ClpSplitProvider;
+import com.facebook.presto.plugin.clp.split.ClpUberPinotSplitProvider;
import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider;
+import com.facebook.presto.plugin.clp.split.filter.ClpPinotSplitFilterProvider;
import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider;
+import com.facebook.presto.plugin.clp.split.filter.ClpUberPinotSplitFilterProvider;
import com.facebook.presto.spi.PrestoException;
+import com.google.common.collect.ImmutableMap;
import com.google.inject.Binder;
import com.google.inject.Scopes;
+import java.util.Map;
+
import static com.facebook.airlift.configuration.ConfigBinder.configBinder;
import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType;
import static com.facebook.presto.plugin.clp.ClpConfig.SplitFilterProviderType;
@@ -35,6 +43,27 @@
public class ClpModule
extends AbstractConfigurationAwareModule
{
+ // Provider mappings for cleaner configuration binding
+ private static final Map> SPLIT_FILTER_PROVIDER_MAPPINGS =
+ ImmutableMap.>builder()
+ .put(SplitFilterProviderType.MYSQL, ClpMySqlSplitFilterProvider.class)
+ .put(SplitFilterProviderType.PINOT, ClpPinotSplitFilterProvider.class)
+ .put(SplitFilterProviderType.PINOT_UBER, ClpUberPinotSplitFilterProvider.class)
+ .build();
+
+ private static final Map> METADATA_PROVIDER_MAPPINGS =
+ ImmutableMap.>builder()
+ .put(MetadataProviderType.MYSQL, ClpMySqlMetadataProvider.class)
+ .put(MetadataProviderType.YAML, ClpYamlMetadataProvider.class)
+ .build();
+
+ private static final Map> SPLIT_PROVIDER_MAPPINGS =
+ ImmutableMap.>builder()
+ .put(SplitProviderType.MYSQL, ClpMySqlSplitProvider.class)
+ .put(SplitProviderType.PINOT, ClpPinotSplitProvider.class)
+ .put(SplitProviderType.PINOT_UBER, ClpUberPinotSplitProvider.class)
+ .build();
+
@Override
protected void setup(Binder binder)
{
@@ -46,25 +75,31 @@ protected void setup(Binder binder)
ClpConfig config = buildConfigObject(ClpConfig.class);
- if (SplitFilterProviderType.MYSQL == config.getSplitFilterProviderType()) {
- binder.bind(ClpSplitFilterProvider.class).to(ClpMySqlSplitFilterProvider.class).in(Scopes.SINGLETON);
- }
- else {
- throw new PrestoException(CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE, "Unsupported split filter provider type: " + config.getSplitFilterProviderType());
+ // Bind split filter provider
+ Class extends ClpSplitFilterProvider> splitFilterProviderClass =
+ SPLIT_FILTER_PROVIDER_MAPPINGS.get(config.getSplitFilterProviderType());
+ if (splitFilterProviderClass == null) {
+ throw new PrestoException(CLP_UNSUPPORTED_SPLIT_FILTER_SOURCE,
+ "Unsupported split filter provider type: " + config.getSplitFilterProviderType());
}
+ binder.bind(ClpSplitFilterProvider.class).to(splitFilterProviderClass).in(Scopes.SINGLETON);
- if (config.getMetadataProviderType() == MetadataProviderType.MYSQL) {
- binder.bind(ClpMetadataProvider.class).to(ClpMySqlMetadataProvider.class).in(Scopes.SINGLETON);
- }
- else {
- throw new PrestoException(CLP_UNSUPPORTED_METADATA_SOURCE, "Unsupported metadata provider type: " + config.getMetadataProviderType());
+ // Bind metadata provider
+ Class extends ClpMetadataProvider> metadataProviderClass =
+ METADATA_PROVIDER_MAPPINGS.get(config.getMetadataProviderType());
+ if (metadataProviderClass == null) {
+ throw new PrestoException(CLP_UNSUPPORTED_METADATA_SOURCE,
+ "Unsupported metadata provider type: " + config.getMetadataProviderType());
}
+ binder.bind(ClpMetadataProvider.class).to(metadataProviderClass).in(Scopes.SINGLETON);
- if (config.getSplitProviderType() == SplitProviderType.MYSQL) {
- binder.bind(ClpSplitProvider.class).to(ClpMySqlSplitProvider.class).in(Scopes.SINGLETON);
- }
- else {
- throw new PrestoException(CLP_UNSUPPORTED_SPLIT_SOURCE, "Unsupported split provider type: " + config.getSplitProviderType());
+ // Bind split provider
+ Class extends ClpSplitProvider> splitProviderClass =
+ SPLIT_PROVIDER_MAPPINGS.get(config.getSplitProviderType());
+ if (splitProviderClass == null) {
+ throw new PrestoException(CLP_UNSUPPORTED_SPLIT_SOURCE,
+ "Unsupported split provider type: " + config.getSplitProviderType());
}
+ binder.bind(ClpSplitProvider.class).to(splitProviderClass).in(Scopes.SINGLETON);
}
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java
index 2e35840971c11..7b2d42bb0635d 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpSplit.java
@@ -24,6 +24,7 @@
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.Optional;
import static com.facebook.presto.spi.schedule.NodeSelectionStrategy.NO_PREFERENCE;
@@ -77,6 +78,25 @@ public List getPreferredNodes(NodeProvider nodeProvider)
return ImmutableList.of();
}
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(path, type, kqlQuery);
+ }
+
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ ClpSplit other = (ClpSplit) obj;
+ return this.type == other.type && this.path.equals(other.path) && this.kqlQuery.equals(other.kqlQuery);
+ }
+
@Override
public Map getInfo()
{
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java
index b82932f0c30fd..902c9dfe37176 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpTableLayoutHandle.java
@@ -13,6 +13,7 @@
*/
package com.facebook.presto.plugin.clp;
+import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec;
import com.facebook.presto.spi.ConnectorTableLayoutHandle;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
@@ -28,13 +29,34 @@ public class ClpTableLayoutHandle
private final ClpTableHandle table;
private final Optional kqlQuery;
private final Optional metadataSql;
+ private final boolean metadataQueryOnly;
+ private final Optional topN;
@JsonCreator
- public ClpTableLayoutHandle(@JsonProperty("table") ClpTableHandle table, @JsonProperty("kqlQuery") Optional kqlQuery, @JsonProperty("metadataFilterQuery") Optional metadataSql)
+ public ClpTableLayoutHandle(
+ @JsonProperty("table") ClpTableHandle table,
+ @JsonProperty("kqlQuery") Optional kqlQuery,
+ @JsonProperty("metadataFilterQuery") Optional metadataSql,
+ @JsonProperty("metadataQueryOnly") boolean metadataQueryOnly,
+ @JsonProperty("topN") Optional topN)
{
this.table = table;
this.kqlQuery = kqlQuery;
this.metadataSql = metadataSql;
+ this.metadataQueryOnly = metadataQueryOnly;
+ this.topN = topN;
+ }
+
+ public ClpTableLayoutHandle(
+ @JsonProperty("table") ClpTableHandle table,
+ @JsonProperty("kqlQuery") Optional kqlQuery,
+ @JsonProperty("metadataFilterQuery") Optional metadataSql)
+ {
+ this.table = table;
+ this.kqlQuery = kqlQuery;
+ this.metadataSql = metadataSql;
+ this.metadataQueryOnly = false;
+ this.topN = Optional.empty();
}
@JsonProperty
@@ -55,6 +77,18 @@ public Optional getMetadataSql()
return metadataSql;
}
+ @JsonProperty
+ public boolean isMetadataQueryOnly()
+ {
+ return metadataQueryOnly;
+ }
+
+ @JsonProperty
+ public Optional getTopN()
+ {
+ return topN;
+ }
+
@Override
public boolean equals(Object o)
{
@@ -67,13 +101,15 @@ public boolean equals(Object o)
ClpTableLayoutHandle that = (ClpTableLayoutHandle) o;
return Objects.equals(table, that.table) &&
Objects.equals(kqlQuery, that.kqlQuery) &&
- Objects.equals(metadataSql, that.metadataSql);
+ Objects.equals(metadataSql, that.metadataSql) &&
+ Objects.equals(metadataQueryOnly, that.metadataQueryOnly) &&
+ Objects.equals(topN, that.topN);
}
@Override
public int hashCode()
{
- return Objects.hash(table, kqlQuery, metadataSql);
+ return Objects.hash(table, kqlQuery, metadataSql, metadataQueryOnly, topN);
}
@Override
@@ -83,6 +119,8 @@ public String toString()
.add("table", table)
.add("kqlQuery", kqlQuery)
.add("metadataSql", metadataSql)
+ .add("metadataQueryOnly", metadataQueryOnly)
+ .add("topN", topN)
.toString();
}
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java
index 33e4b748a30d4..28f8494de4a7b 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpMetadataProvider.java
@@ -16,15 +16,32 @@
import com.facebook.presto.plugin.clp.ClpColumnHandle;
import com.facebook.presto.plugin.clp.ClpTableHandle;
import com.facebook.presto.spi.SchemaTableName;
+import com.google.common.collect.ImmutableList;
import java.util.List;
+import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME;
+
/**
* A provider for metadata that describes what tables exist in the CLP connector, and what columns
* exist in each of those tables.
*/
public interface ClpMetadataProvider
{
+ /**
+ * Returns the list of schema names available in this connector.
+ *
+ * The default implementation returns only the default schema. Implementations can override
+ * this method to support multiple schemas by querying their metadata source (e.g., YAML file
+ * or database) to discover available schemas.
+ *
+ * @return the list of schema names available in this connector
+ */
+ default List listSchemaNames()
+ {
+ return ImmutableList.of(DEFAULT_SCHEMA_NAME);
+ }
+
/**
* @param schemaTableName the name of the schema and the table
* @return the list of column handles for the given table.
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpYamlMetadataProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpYamlMetadataProvider.java
new file mode 100644
index 0000000000000..2c455291d9aba
--- /dev/null
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/metadata/ClpYamlMetadataProvider.java
@@ -0,0 +1,277 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.metadata;
+
+import com.facebook.airlift.log.Logger;
+import com.facebook.presto.plugin.clp.ClpColumnHandle;
+import com.facebook.presto.plugin.clp.ClpConfig;
+import com.facebook.presto.plugin.clp.ClpTableHandle;
+import com.facebook.presto.spi.PrestoException;
+import com.facebook.presto.spi.SchemaTableName;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+
+import javax.inject.Inject;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME;
+import static com.facebook.presto.plugin.clp.ClpErrorCode.CLP_UNSUPPORTED_TABLE_SCHEMA_YAML;
+import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME;
+import static java.lang.String.format;
+
+public class ClpYamlMetadataProvider
+ implements ClpMetadataProvider
+{
+ private static final Logger log = Logger.get(ClpYamlMetadataProvider.class);
+ private final ClpConfig config;
+ private final ObjectMapper yamlMapper;
+
+ // Thread-safe cache for schema names to avoid repeated file parsing
+ private volatile List cachedSchemaNames;
+
+ // Thread-safe cache for table schema mappings per schema
+ // Outer map: schema name -> inner map
+ // Inner map: table name -> YAML schema file path
+ private final Map> tableSchemaYamlMapPerSchema = new HashMap<>();
+
+ @Inject
+ public ClpYamlMetadataProvider(ClpConfig config)
+ {
+ this.config = config;
+ // Reuse ObjectMapper instance for better performance
+ this.yamlMapper = new ObjectMapper(new YAMLFactory());
+ }
+
+ @Override
+ public List listSchemaNames()
+ {
+ // Use cached result if available to improve performance
+ List cached = cachedSchemaNames;
+ if (cached != null) {
+ return cached;
+ }
+
+ // Double-checked locking for thread-safe lazy initialization
+ synchronized (this) {
+ // Check again inside synchronized block
+ cached = cachedSchemaNames;
+ if (cached != null) {
+ return cached;
+ }
+
+ // Check if YAML path is configured
+ // If not configured, fall back to default schema for backward compatibility
+ if (config.getMetadataYamlPath() == null) {
+ log.warn("Metadata YAML path not configured, returning default schema only");
+ cachedSchemaNames = ImmutableList.of(DEFAULT_SCHEMA_NAME);
+ return cachedSchemaNames;
+ }
+
+ // Prepare to parse the YAML metadata file
+ Path tablesSchemaPath = Paths.get(config.getMetadataYamlPath());
+
+ try {
+ // Parse the YAML file into a nested Map structure
+ // Expected structure:
+ // clp:
+ // default:
+ // table1: /path/to/schema1.yaml
+ // dev:
+ // table2: /path/to/schema2.yaml
+ Map root = yamlMapper.readValue(
+ new File(tablesSchemaPath.toString()),
+ new TypeReference>() {});
+
+ // Extract the catalog object (e.g., "clp")
+ // This contains all schemas as keys
+ Object catalogObj = root.get(CONNECTOR_NAME);
+ if (!(catalogObj instanceof Map)) {
+ // Log error and fall back to default schema for graceful degradation
+ log.error("The metadata YAML does not contain valid catalog field: %s, returning default schema only", CONNECTOR_NAME);
+ List defaultSchema = ImmutableList.of(DEFAULT_SCHEMA_NAME);
+ cachedSchemaNames = defaultSchema;
+ return defaultSchema;
+ }
+
+ // Extract schema names from the catalog Map
+ // Each key represents a schema name (e.g., "default", "dev", "prod")
+ Map catalogMap = (Map) catalogObj;
+ List schemas = ImmutableList.copyOf(catalogMap.keySet());
+ log.info("Discovered %d schema(s) from YAML metadata: %s", schemas.size(), schemas);
+
+ // Cache the result for future calls
+ cachedSchemaNames = schemas;
+ return schemas;
+ }
+ catch (IOException e) {
+ // If YAML parsing fails (file not found, malformed, etc.), fall back to default schema
+ // This ensures the connector still works even with configuration errors
+ log.error(e, "Failed to parse metadata YAML file: %s, returning default schema only", config.getMetadataYamlPath());
+ List defaultSchema = ImmutableList.of(DEFAULT_SCHEMA_NAME);
+ cachedSchemaNames = defaultSchema;
+ return defaultSchema;
+ }
+ }
+ }
+
+ @Override
+ public List listColumnHandles(SchemaTableName schemaTableName)
+ {
+ String schemaName = schemaTableName.getSchemaName();
+ String tableName = schemaTableName.getTableName();
+
+ // Get the schema-specific map
+ Map tablesInSchema;
+ synchronized (tableSchemaYamlMapPerSchema) {
+ tablesInSchema = tableSchemaYamlMapPerSchema.get(schemaName);
+ }
+
+ if (tablesInSchema == null) {
+ log.error("No tables loaded for schema: %s", schemaName);
+ return Collections.emptyList();
+ }
+
+ String schemaPath = tablesInSchema.get(tableName);
+ if (schemaPath == null) {
+ log.error("No schema path found for table: %s.%s", schemaName, tableName);
+ return Collections.emptyList();
+ }
+
+ Path tableSchemaPath = Paths.get(schemaPath);
+ ClpSchemaTree schemaTree = new ClpSchemaTree(config.isPolymorphicTypeEnabled());
+
+ try {
+ // Use the shared yamlMapper for better performance
+ Map root = yamlMapper.readValue(
+ new File(tableSchemaPath.toString()),
+ new TypeReference>() {});
+ ImmutableList.Builder namesBuilder = ImmutableList.builder();
+ ImmutableList.Builder typesBuilder = ImmutableList.builder();
+ collectTypes(root, "", namesBuilder, typesBuilder);
+ ImmutableList names = namesBuilder.build();
+ ImmutableList types = typesBuilder.build();
+ // The names and types should have same sizes
+ for (int i = 0; i < names.size(); i++) {
+ schemaTree.addColumn(names.get(i), types.get(i));
+ }
+ return schemaTree.collectColumnHandles();
+ }
+ catch (IOException e) {
+ log.error(format("Failed to parse table schema file %s, error: %s", tableSchemaPath, e.getMessage()), e);
+ }
+ return Collections.emptyList();
+ }
+
+ @Override
+ public List listTableHandles(String schemaName)
+ {
+ // Check if YAML path is configured
+ if (config.getMetadataYamlPath() == null) {
+ log.warn("Metadata YAML path not configured");
+ return Collections.emptyList();
+ }
+
+ Path tablesSchemaPath = Paths.get(config.getMetadataYamlPath());
+
+ try {
+ // Use the shared yamlMapper for better performance
+ Map root = yamlMapper.readValue(new File(tablesSchemaPath.toString()),
+ new TypeReference>() {});
+
+ Object catalogObj = root.get(CONNECTOR_NAME);
+ if (!(catalogObj instanceof Map)) {
+ throw new PrestoException(CLP_UNSUPPORTED_TABLE_SCHEMA_YAML, format("The table schema does not contain field: %s", CONNECTOR_NAME));
+ }
+
+ Object schemaObj = ((Map) catalogObj).get(schemaName);
+ if (schemaObj == null) {
+ log.warn("Schema '%s' not found in metadata YAML", schemaName);
+ return Collections.emptyList();
+ }
+
+ if (!(schemaObj instanceof Map)) {
+ log.error("Schema '%s' is not a valid map structure", schemaName);
+ return Collections.emptyList();
+ }
+
+ ImmutableList.Builder tableHandlesBuilder = new ImmutableList.Builder<>();
+ ImmutableMap.Builder tableToYamlPathBuilder = new ImmutableMap.Builder<>();
+
+ for (Map.Entry schemaEntry : ((Map) schemaObj).entrySet()) {
+ String tableName = schemaEntry.getKey();
+ String tableSchemaYamlPath = schemaEntry.getValue().toString();
+
+ // Resolve relative paths relative to the directory containing tables-schema.yaml
+ Path resolvedPath = Paths.get(tableSchemaYamlPath);
+ if (!resolvedPath.isAbsolute()) {
+ // If relative, resolve it relative to the parent directory of tables-schema.yaml
+ Path parentDir = tablesSchemaPath.getParent();
+ if (parentDir != null) {
+ resolvedPath = parentDir.resolve(tableSchemaYamlPath).normalize();
+ }
+ }
+
+ // The splits' absolute paths will be stored in Pinot metadata database
+ SchemaTableName schemaTableName = new SchemaTableName(schemaName, tableName);
+ tableHandlesBuilder.add(new ClpTableHandle(schemaTableName, ""));
+ tableToYamlPathBuilder.put(tableName, resolvedPath.toString());
+ }
+
+ // Thread-safe update of the schema-specific table map
+ synchronized (tableSchemaYamlMapPerSchema) {
+ tableSchemaYamlMapPerSchema.put(schemaName, tableToYamlPathBuilder.build());
+ }
+
+ return tableHandlesBuilder.build();
+ }
+ catch (IOException e) {
+ log.error(format("Failed to parse metadata file: %s, error: %s", config.getMetadataYamlPath(), e.getMessage()), e);
+ }
+ return Collections.emptyList();
+ }
+
+ private void collectTypes(Object node, String prefix, ImmutableList.Builder namesBuilder, ImmutableList.Builder typesBuilder)
+ {
+ if (node instanceof Number) {
+ namesBuilder.add(prefix);
+ typesBuilder.add(((Number) node).byteValue());
+ return;
+ }
+ if (node instanceof List) {
+ for (Number type : (List) node) {
+ namesBuilder.add(prefix);
+ typesBuilder.add(type.byteValue());
+ }
+ return;
+ }
+ for (Map.Entry entry : ((Map) node).entrySet()) {
+ if (!prefix.isEmpty()) {
+ collectTypes(entry.getValue(), format("%s.%s", prefix, entry.getKey()), namesBuilder, typesBuilder);
+ continue;
+ }
+ collectTypes(entry.getValue(), entry.getKey(), namesBuilder, typesBuilder);
+ }
+ }
+}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java
index 2c216614af10f..c86ea0dbe4d7e 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpComputePushDown.java
@@ -14,7 +14,10 @@
package com.facebook.presto.plugin.clp.optimization;
import com.facebook.airlift.log.Logger;
-import com.facebook.presto.plugin.clp.ClpExpression;
+import com.facebook.presto.common.block.SortOrder;
+import com.facebook.presto.common.type.RowType;
+import com.facebook.presto.plugin.clp.ClpColumnHandle;
+import com.facebook.presto.plugin.clp.ClpMetadata;
import com.facebook.presto.plugin.clp.ClpTableHandle;
import com.facebook.presto.plugin.clp.ClpTableLayoutHandle;
import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider;
@@ -22,25 +25,38 @@
import com.facebook.presto.spi.ConnectorPlanOptimizer;
import com.facebook.presto.spi.ConnectorPlanRewriter;
import com.facebook.presto.spi.ConnectorSession;
+import com.facebook.presto.spi.ConnectorTableLayoutHandle;
import com.facebook.presto.spi.TableHandle;
import com.facebook.presto.spi.VariableAllocator;
import com.facebook.presto.spi.function.FunctionMetadataManager;
import com.facebook.presto.spi.function.StandardFunctionResolution;
import com.facebook.presto.spi.plan.FilterNode;
+import com.facebook.presto.spi.plan.Ordering;
import com.facebook.presto.spi.plan.PlanNode;
import com.facebook.presto.spi.plan.PlanNodeIdAllocator;
+import com.facebook.presto.spi.plan.ProjectNode;
import com.facebook.presto.spi.plan.TableScanNode;
+import com.facebook.presto.spi.plan.TopNNode;
+import com.facebook.presto.spi.relation.ConstantExpression;
import com.facebook.presto.spi.relation.RowExpression;
+import com.facebook.presto.spi.relation.SpecialFormExpression;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Deque;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import static com.facebook.presto.plugin.clp.ClpConnectorFactory.CONNECTOR_NAME;
import static com.facebook.presto.spi.ConnectorPlanRewriter.rewriteWith;
+import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
@@ -67,7 +83,7 @@ public PlanNode optimize(PlanNode maxSubplan, ConnectorSession session, Variable
// Throw exception if any required split filters are missing
if (!rewriter.tableScopeSet.isEmpty() && !rewriter.hasVisitedFilter) {
- splitFilterProvider.checkContainsRequiredFilters(rewriter.tableScopeSet, "");
+ splitFilterProvider.checkContainsRequiredFilters(rewriter.tableScopeSet, ImmutableSet.of());
}
return optimizedPlanNode;
}
@@ -105,6 +121,156 @@ public PlanNode visitFilter(FilterNode node, RewriteContext context)
return processFilter(node, (TableScanNode) node.getSource());
}
+ @Override
+ public PlanNode visitTopN(TopNNode node, RewriteContext context)
+ {
+ PlanNode rewrittenSource = context.rewrite(node.getSource(), null);
+
+ ProjectNode project = null;
+ FilterNode filter = null;
+ PlanNode cursor = rewrittenSource;
+
+ if (cursor instanceof ProjectNode) {
+ project = (ProjectNode) cursor;
+ cursor = project.getSource();
+ }
+ if (cursor instanceof FilterNode) {
+ filter = (FilterNode) cursor;
+ cursor = filter.getSource();
+ }
+ if (!(cursor instanceof TableScanNode)) {
+ return node.replaceChildren(ImmutableList.of(rewrittenSource));
+ }
+
+ TableScanNode scan = (TableScanNode) cursor;
+ TableHandle tableHandle = scan.getTable();
+ if (!(tableHandle.getConnectorHandle() instanceof ClpTableHandle)) {
+ return node.replaceChildren(ImmutableList.of(rewrittenSource));
+ }
+
+ // only allow TopN pushdown when metadata-only is true
+ boolean metadataOnly = false;
+ Optional layout = tableHandle.getLayout();
+ Optional kql = Optional.empty();
+ Optional metadataSql = Optional.empty();
+ Optional existingTopN = Optional.empty();
+ ClpTableHandle clpTableHandle = null;
+
+ if (layout.isPresent() && layout.get() instanceof ClpTableLayoutHandle) {
+ ClpTableLayoutHandle cl = (ClpTableLayoutHandle) layout.get();
+ metadataOnly = cl.isMetadataQueryOnly();
+ kql = cl.getKqlQuery();
+ metadataSql = cl.getMetadataSql();
+ existingTopN = cl.getTopN();
+ clpTableHandle = cl.getTable();
+ }
+
+ if (!metadataOnly) {
+ // Rule: skip TopN pushdown unless metadataQueryOnly is true
+ return node.replaceChildren(ImmutableList.of(rewrittenSource));
+ }
+
+ // Ensure ORDER BY items are plain variables (allow identity through Project)
+ List ords = node.getOrderingScheme().getOrderBy();
+ if (project != null && !areIdents(project, ords)) {
+ return node.replaceChildren(ImmutableList.of(rewrittenSource));
+ }
+
+ Map assignments = scan.getAssignments();
+ List newOrderings = new ArrayList<>(ords.size());
+ for (Ordering ord : ords) {
+ VariableReferenceExpression outVar = ord.getVariable();
+ Optional columnNameOpt = buildOrderColumnName(project, outVar, assignments);
+ if (!columnNameOpt.isPresent()) {
+ return node.replaceChildren(ImmutableList.of(rewrittenSource));
+ }
+
+ String tableScope = CONNECTOR_NAME + "." + (clpTableHandle != null ?
+ clpTableHandle.getSchemaTableName().toString() : ClpMetadata.DEFAULT_SCHEMA_NAME);
+
+ List remappedColumnName = splitFilterProvider.remapColumnName(tableScope, columnNameOpt.get());
+ newOrderings.add(new ClpTopNSpec.Ordering(remappedColumnName, toClpOrder(ord.getSortOrder())));
+ }
+
+ if (existingTopN.isPresent()) {
+ ClpTopNSpec ex = existingTopN.get();
+ if (!sameOrdering(ex.getOrderings(), newOrderings)) {
+ return node.replaceChildren(ImmutableList.of(rewrittenSource)); // leave existing as-is
+ }
+ long mergedLimit = Math.min(ex.getLimit(), node.getCount());
+ if (mergedLimit == ex.getLimit()) {
+ // No change needed; keep current layout/spec
+ return node.replaceChildren(ImmutableList.of(rewrittenSource));
+ }
+
+ // Tighten the limit on the layout
+ ClpTopNSpec tightened = new ClpTopNSpec(mergedLimit, ex.getOrderings());
+ ClpTableHandle clpHandle = (ClpTableHandle) tableHandle.getConnectorHandle();
+ ClpTableLayoutHandle newLayout =
+ new ClpTableLayoutHandle(clpHandle, kql, metadataSql, true, Optional.of(tightened));
+
+ TableScanNode newScan = new TableScanNode(
+ scan.getSourceLocation(),
+ idAllocator.getNextId(),
+ new TableHandle(
+ tableHandle.getConnectorId(),
+ clpHandle,
+ tableHandle.getTransaction(),
+ Optional.of(newLayout)),
+ scan.getOutputVariables(),
+ scan.getAssignments(),
+ scan.getTableConstraints(),
+ scan.getCurrentConstraint(),
+ scan.getEnforcedConstraint(),
+ scan.getCteMaterializationInfo());
+
+ PlanNode newSource = newScan;
+ if (filter != null) {
+ newSource = new FilterNode(filter.getSourceLocation(), idAllocator.getNextId(), newSource, filter.getPredicate());
+ }
+ if (project != null) {
+ newSource = new ProjectNode(
+ project.getSourceLocation(),
+ idAllocator.getNextId(),
+ newSource,
+ project.getAssignments(),
+ project.getLocality());
+ }
+
+ return new TopNNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, node.getCount(), node.getOrderingScheme(), node.getStep());
+ }
+
+ ClpTopNSpec spec = new ClpTopNSpec(node.getCount(), newOrderings);
+ ClpTableHandle clpHandle = (ClpTableHandle) tableHandle.getConnectorHandle();
+ ClpTableLayoutHandle newLayout =
+ new ClpTableLayoutHandle(clpHandle, kql, metadataSql, true, Optional.of(spec));
+
+ TableScanNode newScanNode = new TableScanNode(
+ scan.getSourceLocation(),
+ idAllocator.getNextId(),
+ new TableHandle(
+ tableHandle.getConnectorId(),
+ clpHandle,
+ tableHandle.getTransaction(),
+ Optional.of(newLayout)),
+ scan.getOutputVariables(),
+ scan.getAssignments(),
+ scan.getTableConstraints(),
+ scan.getCurrentConstraint(),
+ scan.getEnforcedConstraint(),
+ scan.getCteMaterializationInfo());
+
+ PlanNode newSource = newScanNode;
+ if (filter != null) {
+ newSource = new FilterNode(filter.getSourceLocation(), idAllocator.getNextId(), newSource, filter.getPredicate());
+ }
+ if (project != null) {
+ newSource = new ProjectNode(project.getSourceLocation(), idAllocator.getNextId(), newSource, project.getAssignments(), project.getLocality());
+ }
+
+ return new TopNNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, node.getCount(), node.getOrderingScheme(), node.getStep());
+ }
+
private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNode)
{
hasVisitedFilter = true;
@@ -114,21 +280,23 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod
String tableScope = CONNECTOR_NAME + "." + clpTableHandle.getSchemaTableName().toString();
Map assignments = tableScanNode.getAssignments();
+ Set metadataColumnNames = splitFilterProvider.getColumnNames(tableScope);
ClpExpression clpExpression = filterNode.getPredicate().accept(
new ClpFilterToKqlConverter(
functionResolution,
functionManager,
assignments,
- splitFilterProvider.getColumnNames(tableScope)),
+ metadataColumnNames),
null);
+
Optional kqlQuery = clpExpression.getPushDownExpression();
Optional metadataSqlQuery = clpExpression.getMetadataSqlQuery();
Optional remainingPredicate = clpExpression.getRemainingExpression();
// Perform required metadata filter checks before handling the KQL query (if kqlQuery
// isn't present, we'll return early, skipping subsequent checks).
- splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), metadataSqlQuery.orElse(""));
+ splitFilterProvider.checkContainsRequiredFilters(ImmutableSet.of(tableScope), clpExpression.getPushDownVariables());
boolean hasMetadataFilter = metadataSqlQuery.isPresent() && !metadataSqlQuery.get().isEmpty();
if (hasMetadataFilter) {
metadataSqlQuery = Optional.of(splitFilterProvider.remapSplitFilterPushDownExpression(tableScope, metadataSqlQuery.get()));
@@ -140,7 +308,12 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod
log.debug("KQL query: %s", kqlQuery.get());
}
- ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(clpTableHandle, kqlQuery, metadataSqlQuery);
+ ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(
+ clpTableHandle,
+ kqlQuery,
+ metadataSqlQuery,
+ metadataColumnNames.equals(clpExpression.getPushDownVariables()),
+ Optional.empty());
TableHandle newTableHandle = new TableHandle(
tableHandle.getConnectorId(),
clpTableHandle,
@@ -171,5 +344,141 @@ private PlanNode processFilter(FilterNode filterNode, TableScanNode tableScanNod
return tableScanNode;
}
}
+
+ private boolean sameOrdering(List a, List b)
+ {
+ if (a.size() != b.size()) {
+ return false;
+ }
+ for (int i = 0; i < a.size(); i++) {
+ ClpTopNSpec.Ordering x = a.get(i);
+ ClpTopNSpec.Ordering y = b.get(i);
+ if (!Objects.equals(x.getColumns(), y.getColumns())) {
+ return false;
+ }
+ if (x.getOrder() != y.getOrder()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /** Accept plain var or dereference-of-var passthroughs. */
+ private boolean areIdents(ProjectNode project, List vars)
+ {
+ for (Ordering ord : vars) {
+ VariableReferenceExpression out = ord.getVariable();
+ RowExpression expr = project.getAssignments().get(out);
+
+ if (expr instanceof VariableReferenceExpression) {
+ continue;
+ }
+ if (isDereferenceChainOverVariable(expr)) {
+ continue;
+ }
+ return false;
+ }
+ return true;
+ }
+
+ /** Build final column name string for CLP (e.g., "msg.timestamp"), or empty if not pushdownable. */
+ private Optional buildOrderColumnName(
+ ProjectNode project,
+ VariableReferenceExpression outVar,
+ Map assignments)
+ {
+ if (project == null) {
+ // ORDER BY directly on scan var
+ ColumnHandle ch = assignments.get(outVar);
+ if (!(ch instanceof ClpColumnHandle)) {
+ return Optional.empty();
+ }
+ return Optional.of(((ClpColumnHandle) ch).getOriginalColumnName());
+ }
+
+ RowExpression expr = project.getAssignments().get(outVar);
+ if (expr instanceof VariableReferenceExpression) {
+ ColumnHandle ch = assignments.get((VariableReferenceExpression) expr);
+ if (!(ch instanceof ClpColumnHandle)) {
+ return Optional.empty();
+ }
+ return Optional.of(((ClpColumnHandle) ch).getOriginalColumnName());
+ }
+
+ // Handle DEREFERENCE chain: baseVar.field1.field2...
+ Deque path = new ArrayDeque<>();
+ RowExpression cur = expr;
+
+ while (cur instanceof SpecialFormExpression
+ && ((SpecialFormExpression) cur).getForm() == SpecialFormExpression.Form.DEREFERENCE) {
+ SpecialFormExpression s = (SpecialFormExpression) cur;
+ RowExpression base = s.getArguments().get(0);
+ RowExpression indexExpr = s.getArguments().get(1);
+
+ if (!(indexExpr instanceof ConstantExpression) || !(base.getType() instanceof RowType)) {
+ return Optional.empty();
+ }
+ int idx;
+ Object v = ((ConstantExpression) indexExpr).getValue();
+ if (v instanceof Long) {
+ idx = toIntExact((Long) v);
+ }
+ else if (v instanceof Integer) {
+ idx = (Integer) v;
+ }
+ else {
+ return Optional.empty();
+ }
+
+ RowType rowType = (RowType) base.getType();
+ if (idx < 0 || idx >= rowType.getFields().size()) {
+ return Optional.empty();
+ }
+ String fname = rowType.getFields().get(idx).getName().orElse(String.valueOf(idx));
+ // We traverse outer->inner; collect in deque and join later
+ path.addLast(fname);
+
+ cur = base; // move up the chain
+ }
+
+ if (!(cur instanceof VariableReferenceExpression)) {
+ return Optional.empty();
+ }
+
+ ColumnHandle baseCh = assignments.get((VariableReferenceExpression) cur);
+ if (!(baseCh instanceof ClpColumnHandle)) {
+ return Optional.empty();
+ }
+
+ String baseName = ((ClpColumnHandle) baseCh).getOriginalColumnName();
+ if (path.isEmpty()) {
+ return Optional.of(baseName);
+ }
+ return Optional.of(baseName + "." + String.join(".", path));
+ }
+
+ /** True if expr is DEREFERENCE(... DEREFERENCE(baseVar, i) ..., j) with baseVar a VariableReferenceExpression. */
+ private boolean isDereferenceChainOverVariable(RowExpression expr)
+ {
+ RowExpression cur = expr;
+ while (cur instanceof SpecialFormExpression
+ && ((SpecialFormExpression) cur).getForm() == SpecialFormExpression.Form.DEREFERENCE) {
+ cur = ((SpecialFormExpression) cur).getArguments().get(0);
+ }
+ return (cur instanceof VariableReferenceExpression);
+ }
+
+ private ClpTopNSpec.Order toClpOrder(SortOrder so)
+ {
+ switch (so) {
+ case ASC_NULLS_FIRST:
+ case ASC_NULLS_LAST:
+ return ClpTopNSpec.Order.ASC;
+ case DESC_NULLS_FIRST:
+ case DESC_NULLS_LAST:
+ return ClpTopNSpec.Order.DESC;
+ default: throw new IllegalArgumentException("Unknown sort order: " + so);
+ }
+ }
}
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java
similarity index 65%
rename from presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java
rename to presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java
index e970f9848a9cf..571ecb028dc0a 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/ClpExpression.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpExpression.java
@@ -11,11 +11,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package com.facebook.presto.plugin.clp;
+package com.facebook.presto.plugin.clp.optimization;
import com.facebook.presto.spi.relation.RowExpression;
+import com.google.common.collect.ImmutableSet;
import java.util.Optional;
+import java.util.Set;
/**
* Represents the result of:
@@ -38,11 +40,19 @@ public class ClpExpression
// The remaining (non-translatable) portion of the RowExpression, if any.
private final Optional remainingExpression;
- public ClpExpression(String pushDownExpression, String metadataSqlQuery, RowExpression remainingExpression)
+ // Variables used in pushDownExpression
+ private final Set pushDownVariables;
+
+ public ClpExpression(
+ String pushDownExpression,
+ String metadataSqlQuery,
+ RowExpression remainingExpression,
+ Set pushDownVariables)
{
this.pushDownExpression = Optional.ofNullable(pushDownExpression);
this.metadataSqlQuery = Optional.ofNullable(metadataSqlQuery);
this.remainingExpression = Optional.ofNullable(remainingExpression);
+ this.pushDownVariables = ImmutableSet.copyOf(pushDownVariables);
}
/**
@@ -50,7 +60,7 @@ public ClpExpression(String pushDownExpression, String metadataSqlQuery, RowExpr
*/
public ClpExpression()
{
- this(null, null, null);
+ this(null, null, null, ImmutableSet.of());
}
/**
@@ -60,7 +70,18 @@ public ClpExpression()
*/
public ClpExpression(String pushDownExpression)
{
- this(pushDownExpression, null, null);
+ this(pushDownExpression, null, null, ImmutableSet.of());
+ }
+
+ /**
+ * Creates a ClpExpression from a fully translatable KQL query or column name.
+ *
+ * @param pushDownExpression
+ * @param pushDownVariables
+ */
+ public ClpExpression(String pushDownExpression, Set pushDownVariables)
+ {
+ this(pushDownExpression, null, null, pushDownVariables);
}
/**
@@ -72,7 +93,20 @@ public ClpExpression(String pushDownExpression)
*/
public ClpExpression(String pushDownExpression, String metadataSqlQuery)
{
- this(pushDownExpression, metadataSqlQuery, null);
+ this(pushDownExpression, metadataSqlQuery, null, ImmutableSet.of());
+ }
+
+ /**
+ * Creates a ClpExpression from a fully translatable KQL string or column name, as well as a
+ * metadata SQL string.
+ *
+ * @param pushDownExpression
+ * @param metadataSqlQuery
+ * @param pushDownVariables
+ */
+ public ClpExpression(String pushDownExpression, String metadataSqlQuery, Set pushDownVariables)
+ {
+ this(pushDownExpression, metadataSqlQuery, null, pushDownVariables);
}
/**
@@ -82,7 +116,7 @@ public ClpExpression(String pushDownExpression, String metadataSqlQuery)
*/
public ClpExpression(RowExpression remainingExpression)
{
- this(null, null, remainingExpression);
+ this(null, null, remainingExpression, ImmutableSet.of());
}
public Optional getPushDownExpression()
@@ -99,4 +133,9 @@ public Optional getRemainingExpression()
{
return remainingExpression;
}
+
+ public Set getPushDownVariables()
+ {
+ return pushDownVariables;
+ }
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java
index b27a61ef0d65a..cca19114e8ed4 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpFilterToKqlConverter.java
@@ -16,10 +16,10 @@
import com.facebook.presto.common.function.OperatorType;
import com.facebook.presto.common.type.DecimalType;
import com.facebook.presto.common.type.RowType;
+import com.facebook.presto.common.type.TimestampType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.plugin.clp.ClpColumnHandle;
-import com.facebook.presto.plugin.clp.ClpExpression;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.function.FunctionHandle;
@@ -65,6 +65,7 @@
import static java.lang.Integer.parseInt;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
+import static java.util.concurrent.TimeUnit.SECONDS;
/**
* A translator to translate Presto {@link RowExpression}s into:
@@ -163,7 +164,8 @@ public ClpExpression visitConstant(ConstantExpression node, Void context)
@Override
public ClpExpression visitVariableReference(VariableReferenceExpression node, Void context)
{
- return new ClpExpression(getVariableName(node));
+ String variableName = getVariableName(node);
+ return new ClpExpression(variableName, ImmutableSet.of(variableName));
}
@Override
@@ -250,7 +252,8 @@ private ClpExpression handleBetween(CallExpression node)
return new ClpExpression(node);
}
- Optional variableOpt = first.accept(this, null).getPushDownExpression();
+ ClpExpression variableExpression = first.accept(this, null);
+ Optional variableOpt = variableExpression.getPushDownExpression();
if (!variableOpt.isPresent()
|| !(second instanceof ConstantExpression)
|| !(third instanceof ConstantExpression)) {
@@ -258,13 +261,15 @@ private ClpExpression handleBetween(CallExpression node)
}
String variable = variableOpt.get();
- String lowerBound = getLiteralString((ConstantExpression) second);
- String upperBound = getLiteralString((ConstantExpression) third);
+ Type lowerBoundType = second.getType();
+ String lowerBound = tryEnsureNanosecondTimestamp(lowerBoundType, getLiteralString((ConstantExpression) second));
+ Type upperBoundType = third.getType();
+ String upperBound = tryEnsureNanosecondTimestamp(upperBoundType, getLiteralString((ConstantExpression) third));
String kql = String.format("%s >= %s AND %s <= %s", variable, lowerBound, variable, upperBound);
String metadataSqlQuery = metadataFilterColumns.contains(variable)
? String.format("\"%s\" >= %s AND \"%s\" <= %s", variable, lowerBound, variable, upperBound)
: null;
- return new ClpExpression(kql, metadataSqlQuery);
+ return new ClpExpression(kql, metadataSqlQuery, variableExpression.getPushDownVariables());
}
/**
@@ -290,10 +295,10 @@ private ClpExpression handleNot(CallExpression node)
}
String notPushDownExpression = "NOT " + expression.getPushDownExpression().get();
if (expression.getMetadataSqlQuery().isPresent()) {
- return new ClpExpression(notPushDownExpression, "NOT " + expression.getMetadataSqlQuery());
+ return new ClpExpression(notPushDownExpression, "NOT " + expression.getMetadataSqlQuery(), expression.getPushDownVariables());
}
else {
- return new ClpExpression(notPushDownExpression);
+ return new ClpExpression(notPushDownExpression, expression.getPushDownVariables());
}
}
@@ -345,7 +350,7 @@ else if (argument instanceof CallExpression) {
return new ClpExpression(node);
}
pattern = pattern.replace("%", "*").replace("_", "?");
- return new ClpExpression(format("%s: \"%s\"", variableName, pattern));
+ return new ClpExpression(format("%s: \"%s\"", variableName, pattern), variable.getPushDownVariables());
}
/**
@@ -442,33 +447,45 @@ private ClpExpression buildClpExpression(
RowExpression originalNode)
{
String metadataSqlQuery = null;
+ literalString = tryEnsureNanosecondTimestamp(literalType, literalString);
if (operator.equals(EQUAL)) {
if (literalType instanceof VarcharType) {
- return new ClpExpression(format("%s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString)));
+ if (metadataFilterColumns.contains(variableName)) {
+ metadataSqlQuery = format("\"%s\" = '%s'", variableName, literalString);
+ }
+ return new ClpExpression(
+ format("%s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString)), metadataSqlQuery,
+ ImmutableSet.of(variableName));
}
else {
if (metadataFilterColumns.contains(variableName)) {
metadataSqlQuery = format("\"%s\" = %s", variableName, literalString);
}
- return new ClpExpression(format("%s: %s", variableName, literalString), metadataSqlQuery);
+ return new ClpExpression(format("%s: %s", variableName, literalString), metadataSqlQuery, ImmutableSet.of(variableName));
}
}
else if (operator.equals(NOT_EQUAL)) {
if (literalType instanceof VarcharType) {
- return new ClpExpression(format("NOT %s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString)));
+ if (metadataFilterColumns.contains(variableName)) {
+ metadataSqlQuery = format("\"%s\" != '%s'", variableName, literalString);
+ }
+ return new ClpExpression(
+ format("NOT %s: \"%s\"", variableName, escapeKqlSpecialCharsForStringValue(literalString)), metadataSqlQuery,
+ ImmutableSet.of(variableName));
}
else {
if (metadataFilterColumns.contains(variableName)) {
metadataSqlQuery = format("NOT \"%s\" = %s", variableName, literalString);
}
- return new ClpExpression(format("NOT %s: %s", variableName, literalString), metadataSqlQuery);
+ return new ClpExpression(format("NOT %s: %s", variableName, literalString), metadataSqlQuery, ImmutableSet.of(variableName));
}
}
else if (LOGICAL_BINARY_OPS_FILTER.contains(operator) && !(literalType instanceof VarcharType)) {
if (metadataFilterColumns.contains(variableName)) {
- metadataSqlQuery = format("\"%s\" %s %s", variableName, operator.getOperator(), literalString);
+ metadataSqlQuery = format("\"%s\" %s %s", variableName, operator.getOperator(), literalString, ImmutableSet.of(variableName));
}
- return new ClpExpression(format("%s %s %s", variableName, operator.getOperator(), literalString), metadataSqlQuery);
+ return new ClpExpression(
+ format("%s %s %s", variableName, operator.getOperator(), literalString), metadataSqlQuery, ImmutableSet.of(variableName));
}
return new ClpExpression(originalNode);
}
@@ -576,7 +593,7 @@ private Optional interpretSubstringEquality(SubstrInfo info, Stri
result.append("?");
}
result.append(targetString).append("*\"");
- return Optional.of(new ClpExpression(result.toString()));
+ return Optional.of(new ClpExpression(result.toString(), ImmutableSet.of(info.variableName)));
}
}
}
@@ -590,11 +607,11 @@ private Optional interpretSubstringEquality(SubstrInfo info, Stri
result.append("?");
}
result.append(targetString).append("\"");
- return Optional.of(new ClpExpression(result.toString()));
+ return Optional.of(new ClpExpression(result.toString(), ImmutableSet.of(info.variableName)));
}
if (start == -targetString.length()) {
result.append(format("%s: \"*%s\"", info.variableName, targetString));
- return Optional.of(new ClpExpression(result.toString()));
+ return Optional.of(new ClpExpression(result.toString(), ImmutableSet.of(info.variableName)));
}
}
}
@@ -678,10 +695,12 @@ private ClpExpression handleAnd(SpecialFormExpression node)
List remainingExpressions = new ArrayList<>();
boolean hasMetadataSql = false;
boolean hasPushDownExpression = false;
+ ImmutableSet.Builder pushDownVariables = new ImmutableSet.Builder<>();
for (RowExpression argument : node.getArguments()) {
ClpExpression expression = argument.accept(this, null);
if (expression.getPushDownExpression().isPresent()) {
hasPushDownExpression = true;
+ pushDownVariables.addAll(expression.getPushDownVariables());
queryBuilder.append(expression.getPushDownExpression().get());
queryBuilder.append(" AND ");
if (expression.getMetadataSqlQuery().isPresent()) {
@@ -702,18 +721,21 @@ else if (!remainingExpressions.isEmpty()) {
return new ClpExpression(
queryBuilder.substring(0, queryBuilder.length() - 5) + ")",
hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null,
- remainingExpressions.get(0));
+ remainingExpressions.get(0),
+ pushDownVariables.build());
}
else {
return new ClpExpression(
queryBuilder.substring(0, queryBuilder.length() - 5) + ")",
hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null,
- new SpecialFormExpression(node.getSourceLocation(), AND, BOOLEAN, remainingExpressions));
+ new SpecialFormExpression(node.getSourceLocation(), AND, BOOLEAN, remainingExpressions),
+ pushDownVariables.build());
}
}
// Remove the last " AND " from the query
return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 5) + ")",
- hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null);
+ hasMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 5) + ")" : null,
+ pushDownVariables.build());
}
/**
@@ -736,6 +758,7 @@ private ClpExpression handleOr(SpecialFormExpression node)
queryBuilder.append("(");
boolean allPushedDown = true;
boolean hasAllMetadataSql = true;
+ ImmutableSet.Builder pushDownVariables = new ImmutableSet.Builder<>();
for (RowExpression argument : node.getArguments()) {
ClpExpression expression = argument.accept(this, null);
// Note: It is possible in the future that an expression cannot be pushed down as a KQL query, but can be
@@ -746,6 +769,7 @@ private ClpExpression handleOr(SpecialFormExpression node)
}
queryBuilder.append(expression.getPushDownExpression().get());
queryBuilder.append(" OR ");
+ pushDownVariables.addAll(expression.getPushDownVariables());
if (hasAllMetadataSql && expression.getMetadataSqlQuery().isPresent()) {
metadataQueryBuilder.append(expression.getMetadataSqlQuery().get());
metadataQueryBuilder.append(" OR ");
@@ -758,7 +782,8 @@ private ClpExpression handleOr(SpecialFormExpression node)
// Remove the last " OR " from the query
return new ClpExpression(
queryBuilder.substring(0, queryBuilder.length() - 4) + ")",
- hasAllMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 4) + ")" : null);
+ hasAllMetadataSql ? metadataQueryBuilder.substring(0, metadataQueryBuilder.length() - 4) + ")" : null,
+ pushDownVariables.build());
}
return new ClpExpression(node);
}
@@ -798,7 +823,7 @@ private ClpExpression handleIn(SpecialFormExpression node)
}
// Remove the last " OR " from the query
- return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")");
+ return new ClpExpression(queryBuilder.substring(0, queryBuilder.length() - 4) + ")", variable.getPushDownVariables());
}
/**
@@ -823,7 +848,7 @@ private ClpExpression handleIsNull(SpecialFormExpression node)
}
String variableName = expression.getPushDownExpression().get();
- return new ClpExpression(format("NOT %s: *", variableName));
+ return new ClpExpression(format("NOT %s: *", variableName), expression.getPushDownVariables());
}
/**
@@ -885,7 +910,7 @@ private ClpExpression handleDereference(RowExpression expression)
if (!baseString.getPushDownExpression().isPresent()) {
return new ClpExpression(expression);
}
- return new ClpExpression(baseString.getPushDownExpression().get() + "." + fieldName);
+ return new ClpExpression(baseString.getPushDownExpression().get() + "." + fieldName, baseString.getPushDownVariables());
}
/**
@@ -925,6 +950,26 @@ public static boolean isClpCompatibleNumericType(Type type)
|| type instanceof DecimalType;
}
+ private static String tryEnsureNanosecondTimestamp(Type type, String literalString)
+ {
+ if (type == TIMESTAMP) {
+ return ensureNanosecondTimestamp(TIMESTAMP, literalString);
+ }
+ else if (type == TIMESTAMP_MICROSECONDS) {
+ return ensureNanosecondTimestamp(TIMESTAMP_MICROSECONDS, literalString);
+ }
+ return literalString;
+ }
+
+ private static String ensureNanosecondTimestamp(TimestampType type, String literalString)
+ {
+ long literalNumber = Long.parseLong(literalString);
+ long seconds = type.getEpochSecond(literalNumber);
+ long nanosecondFraction = type.getNanos(literalNumber);
+ long nanoseconds = SECONDS.toNanos(seconds) + nanosecondFraction;
+ return Long.toString(nanoseconds);
+ }
+
private static class SubstrInfo
{
String variableName;
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java
index b536c95ad216a..bdf50eb0fb709 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpPlanOptimizerProvider.java
@@ -32,7 +32,10 @@ public class ClpPlanOptimizerProvider
private final ClpSplitFilterProvider splitFilterProvider;
@Inject
- public ClpPlanOptimizerProvider(FunctionMetadataManager functionManager, StandardFunctionResolution functionResolution, ClpSplitFilterProvider splitFilterProvider)
+ public ClpPlanOptimizerProvider(
+ FunctionMetadataManager functionManager,
+ StandardFunctionResolution functionResolution,
+ ClpSplitFilterProvider splitFilterProvider)
{
this.functionManager = functionManager;
this.functionResolution = functionResolution;
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpTopNSpec.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpTopNSpec.java
new file mode 100644
index 0000000000000..de2f3ee2eab6c
--- /dev/null
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpTopNSpec.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.optimization;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+import java.util.Objects;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Represents a Top-N specification for a query, including the limit of rows to return
+ * and the ordering of columns.
+ */
+public class ClpTopNSpec
+{
+ /**
+ * Enum representing the order direction: ascending or descending.
+ */
+ public enum Order
+ {
+ ASC,
+ DESC
+ }
+
+ /**
+ * Represents the ordering of one or more columns with a specified order (ASC or DESC).
+ */
+ public static final class Ordering
+ {
+ private final List columns;
+ private final Order order;
+
+ @JsonCreator
+ public Ordering(
+ @JsonProperty("columns") List columns,
+ @JsonProperty("order") Order order)
+ {
+ this.columns = requireNonNull(columns, "column is null");
+ this.order = requireNonNull(order, "order is null");
+ }
+
+ @JsonProperty("columns")
+ public List getColumns()
+ {
+ return columns;
+ }
+
+ @JsonProperty("order")
+ public Order getOrder()
+ {
+ return order;
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(columns, order);
+ }
+
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ Ordering other = (Ordering) obj;
+ return this.order == other.order && this.columns.equals(other.columns);
+ }
+
+ @Override
+ public String toString()
+ {
+ return columns + ":" + order;
+ }
+ }
+
+ private final long limit;
+ private final List orderings;
+
+ @JsonCreator
+ public ClpTopNSpec(
+ @JsonProperty("limit") long limit,
+ @JsonProperty("orderings") List orderings)
+ {
+ if (limit <= 0) {
+ throw new IllegalArgumentException("limit must be > 0");
+ }
+ if (orderings == null || orderings.isEmpty()) {
+ throw new IllegalArgumentException("orderings must be non-empty");
+ }
+ this.limit = limit;
+ this.orderings = orderings;
+ }
+
+ @JsonProperty("limit")
+ public long getLimit()
+ {
+ return limit;
+ }
+
+ @JsonProperty("orderings")
+ public List getOrderings()
+ {
+ return orderings;
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(limit, orderings);
+ }
+
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ ClpTopNSpec other = (ClpTopNSpec) obj;
+ return this.limit == other.limit && this.orderings.equals(other.orderings);
+ }
+
+ @Override
+ public String toString()
+ {
+ return "ClpTopNSpec (limit=" + limit + ", order=" + orderings + ")";
+ }
+}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java
index 75d0a66cc02c7..f691b8ecaa23c 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/optimization/ClpUdfRewriter.java
@@ -127,14 +127,40 @@ public PlanNode visitProject(ProjectNode node, RewriteContext context)
rewriteClpUdfs(entry.getValue(), functionManager, variableAllocator, true));
}
- PlanNode newSource = rewritePlanSubtree(node.getSource());
+ PlanNode newSource = node.getSource().accept(this, context);
return new ProjectNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, newAssignments.build(), node.getLocality());
}
@Override
public PlanNode visitFilter(FilterNode node, RewriteContext context)
{
- return buildNewFilterNode(node);
+ RowExpression newPredicate = rewriteClpUdfs(node.getPredicate(), functionManager, variableAllocator, false);
+ PlanNode newSource = node.getSource().accept(this, context);
+ return new FilterNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, newPredicate);
+ }
+
+ @Override
+ public PlanNode visitTableScan(TableScanNode node, RewriteContext context)
+ {
+ Set outputVars = new LinkedHashSet<>(node.getOutputVariables());
+ Map newAssignments = new HashMap<>(node.getAssignments());
+
+ // Add any missing variables for known handles
+ globalColumnVarMap.forEach((handle, var) -> {
+ outputVars.add(var);
+ newAssignments.put(var, handle);
+ });
+
+ return new TableScanNode(
+ node.getSourceLocation(),
+ idAllocator.getNextId(),
+ node.getTable(),
+ new ArrayList<>(outputVars),
+ newAssignments,
+ node.getTableConstraints(),
+ node.getCurrentConstraint(),
+ node.getEnforcedConstraint(),
+ node.getCteMaterializationInfo());
}
/**
@@ -220,29 +246,6 @@ else if (functionName.startsWith("CLP_GET_")) {
return expression;
}
- /**
- * Recursively rewrites the subtree of a plan node to include any new variables produced by
- * CLP UDF rewrites.
- *
- * @param node the plan node to rewrite
- * @return the rewritten plan node
- */
- private PlanNode rewritePlanSubtree(PlanNode node)
- {
- if (node instanceof TableScanNode) {
- return buildNewTableScanNode((TableScanNode) node);
- }
- else if (node instanceof FilterNode) {
- return buildNewFilterNode((FilterNode) node);
- }
-
- List rewrittenChildren = node.getSources().stream()
- .map(source -> rewritePlanSubtree(source))
- .collect(toImmutableList());
-
- return node.replaceChildren(rewrittenChildren);
- }
-
/**
* Encodes a JSON path into a valid variable name by replacing uppercase letters with
* "_ux", dots with "_dot_", and underscores with "_und_".
@@ -272,48 +275,5 @@ else if (c == '_') {
}
return sb.toString();
}
-
- /**
- * Builds a new {@link TableScanNode} that includes additional
- * {@link VariableReferenceExpression}s and {@link ColumnHandle}s for rewritten CLP UDFs.
- *
- * @param node the original table scan node
- * @return the updated table scan node
- */
- private TableScanNode buildNewTableScanNode(TableScanNode node)
- {
- Set outputVars = new LinkedHashSet<>(node.getOutputVariables());
- Map newAssignments = new HashMap<>(node.getAssignments());
-
- // Add any missing variables for known handles
- globalColumnVarMap.forEach((handle, var) -> {
- outputVars.add(var);
- newAssignments.put(var, handle);
- });
-
- return new TableScanNode(
- node.getSourceLocation(),
- idAllocator.getNextId(),
- node.getTable(),
- new ArrayList<>(outputVars),
- newAssignments,
- node.getTableConstraints(),
- node.getCurrentConstraint(),
- node.getEnforcedConstraint(),
- node.getCteMaterializationInfo());
- }
-
- /**
- * Builds a new {@link FilterNode} with its predicate rewritten to replace CLP UDF calls.
- *
- * @param node the original filter node
- * @return the updated filter node
- */
- private FilterNode buildNewFilterNode(FilterNode node)
- {
- RowExpression newPredicate = rewriteClpUdfs(node.getPredicate(), functionManager, variableAllocator, false);
- PlanNode newSource = rewritePlanSubtree(node.getSource());
- return new FilterNode(node.getSourceLocation(), idAllocator.getNextId(), newSource, newPredicate);
- }
}
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java
index 6b54218509c7f..13435e28a8b2c 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpMySqlSplitProvider.java
@@ -18,6 +18,7 @@
import com.facebook.presto.plugin.clp.ClpSplit;
import com.facebook.presto.plugin.clp.ClpTableHandle;
import com.facebook.presto.plugin.clp.ClpTableLayoutHandle;
+import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec;
import com.google.common.collect.ImmutableList;
import javax.inject.Inject;
@@ -27,22 +28,26 @@
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
+import java.util.ArrayList;
import java.util.List;
+import java.util.Optional;
import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE;
import static java.lang.String.format;
+import static java.util.Comparator.comparingLong;
public class ClpMySqlSplitProvider
implements ClpSplitProvider
{
// Column names
public static final String ARCHIVES_TABLE_COLUMN_ID = "id";
+ public static final String ARCHIVES_TABLE_NUM_MESSAGES = "num_messages";
// Table suffixes
public static final String ARCHIVES_TABLE_SUFFIX = "_archives";
// SQL templates
- private static final String SQL_SELECT_ARCHIVES_TEMPLATE = format("SELECT `%s` FROM `%%s%%s%s` WHERE 1 = 1", ARCHIVES_TABLE_COLUMN_ID, ARCHIVES_TABLE_SUFFIX);
+ private static final String SQL_SELECT_ARCHIVES_TEMPLATE = format("SELECT * FROM `%%s%%s%s` WHERE 1 = 1", ARCHIVES_TABLE_SUFFIX);
private static final Logger log = Logger.get(ClpMySqlSplitProvider.class);
@@ -66,6 +71,7 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle)
{
ImmutableList.Builder splits = new ImmutableList.Builder<>();
ClpTableHandle clpTableHandle = clpTableLayoutHandle.getTable();
+ Optional topNSpecOptional = clpTableLayoutHandle.getTopN();
String tablePath = clpTableHandle.getTablePath();
String tableName = clpTableHandle.getSchemaTableName().getTableName();
String archivePathQuery = format(SQL_SELECT_ARCHIVES_TEMPLATE, config.getMetadataTablePrefix(), tableName);
@@ -74,6 +80,25 @@ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle)
String metadataFilterQuery = clpTableLayoutHandle.getMetadataSql().get();
archivePathQuery += " AND (" + metadataFilterQuery + ")";
}
+
+ if (topNSpecOptional.isPresent()) {
+ ClpTopNSpec topNSpec = topNSpecOptional.get();
+ // Only handles one range metadata column for now
+ ClpTopNSpec.Ordering ordering = topNSpec.getOrderings().get(0);
+ String col = ordering.getColumns().get(ordering.getColumns().size() - 1);
+ String dir = (ordering.getOrder() == ClpTopNSpec.Order.ASC) ? "ASC" : "DESC";
+ archivePathQuery += " ORDER BY " + "`" + col + "` " + dir;
+
+ List archiveMetaList = fetchArchiveMeta(archivePathQuery, ordering);
+ List selected = selectTopNArchives(archiveMetaList, topNSpec.getLimit(), ordering.getOrder());
+
+ for (ArchiveMeta a : selected) {
+ splits.add(new ClpSplit(tablePath + "/" + a.id, ARCHIVE, clpTableLayoutHandle.getKqlQuery()));
+ }
+ ImmutableList result = splits.build();
+ log.debug("Number of splits: %s", result.size());
+ return result;
+ }
log.debug("Query for archive: %s", archivePathQuery);
try (Connection connection = getConnection()) {
@@ -105,4 +130,171 @@ private Connection getConnection()
}
return connection;
}
+
+ /**
+ * Fetches archive metadata from the database.
+ *
+ * @param query SQL query string that selects the archives
+ * @param ordering The top-N ordering specifying which columns contain lowerBound/upperBound
+ * @return List of ArchiveMeta objects representing archive metadata
+ */
+ private List fetchArchiveMeta(String query, ClpTopNSpec.Ordering ordering)
+ {
+ List list = new ArrayList<>();
+ try (Connection connection = getConnection();
+ PreparedStatement stmt = connection.prepareStatement(query);
+ ResultSet rs = stmt.executeQuery()) {
+ while (rs.next()) {
+ list.add(new ArchiveMeta(
+ rs.getString(ARCHIVES_TABLE_COLUMN_ID),
+ rs.getLong(ordering.getColumns().get(0)),
+ rs.getLong(ordering.getColumns().get(1)),
+ rs.getLong(ARCHIVES_TABLE_NUM_MESSAGES)));
+ }
+ }
+ catch (SQLException e) {
+ log.warn("Database error while fetching archive metadata: %s", e);
+ }
+ return list;
+ }
+
+ /**
+ * Selects the set of archives that must be scanned to guarantee the top-N results by timestamp
+ * (ASC or DESC), given only archive ranges and message counts.
+ *
+ * - Merges overlapping archives into groups (union of time ranges).
+ * - For DESC: always include the newest group, then add older ones until their total
+ * message counts cover the limit.
+ * - For ASC: symmetric — start from the oldest, then add newer ones.
+ *
+
+ * @param archives list of archives with [lowerBound, upperBound, messageCount]
+ * @param limit number of messages requested
+ * @param order ASC (earliest first) or DESC (latest first)
+ * @return archives that must be scanned
+ */
+ private static List selectTopNArchives(List archives, long limit, ClpTopNSpec.Order order)
+ {
+ if (archives == null || archives.isEmpty() || limit <= 0) {
+ return ImmutableList.of();
+ }
+
+ // 1) Merge overlaps into groups
+ List groups = toArchiveGroups(archives);
+
+ // 2) Pick minimal set of groups per order, then return all member archives
+ List selected = new ArrayList<>();
+ if (order == ClpTopNSpec.Order.DESC) {
+ // newest group index
+ int k = groups.size() - 1;
+
+ // must include newest group
+ selected.addAll(groups.get(k).members);
+
+ // assume worst case: newest contributes 0 after filter; cover limit from older groups
+ long coveredByOlder = 0;
+ for (int i = k - 1; i >= 0 && coveredByOlder < limit; --i) {
+ selected.addAll(groups.get(i).members);
+ coveredByOlder += groups.get(i).count;
+ }
+ }
+ else {
+ // oldest group index
+ int k = 0;
+
+ // must include oldest group
+ selected.addAll(groups.get(k).members);
+
+ // assume worst case: oldest contributes 0; cover limit from newer groups
+ long coveredByNewer = 0;
+ for (int i = k + 1; i < groups.size() && coveredByNewer < limit; ++i) {
+ selected.addAll(groups.get(i).members);
+ coveredByNewer += groups.get(i).count;
+ }
+ }
+
+ return selected;
+ }
+
+ /**
+ * Groups overlapping archives into non-overlapping archive groups.
+ *
+ * @param archives archives sorted by lowerBound
+ * @return merged groups
+ */
+ private static List toArchiveGroups(List archives)
+ {
+ List sorted = new ArrayList<>(archives);
+ sorted.sort(comparingLong((ArchiveMeta a) -> a.lowerBound)
+ .thenComparingLong(a -> a.upperBound));
+
+ List groups = new ArrayList<>();
+ ArchiveGroup cur = null;
+
+ for (ArchiveMeta a : sorted) {
+ if (cur == null) {
+ cur = startArchiveGroup(a);
+ }
+ else if (overlaps(cur, a)) {
+ // extend current group
+ cur.end = Math.max(cur.end, a.upperBound);
+ cur.count += a.messageCount;
+ cur.members.add(a);
+ }
+ else {
+ // finalize current, start a new one
+ groups.add(cur);
+ cur = startArchiveGroup(a);
+ }
+ }
+ if (cur != null) {
+ groups.add(cur);
+ }
+ return groups;
+ }
+
+ private static ArchiveGroup startArchiveGroup(ArchiveMeta a)
+ {
+ ArchiveGroup group = new ArchiveGroup();
+ group.begin = a.lowerBound;
+ group.end = a.upperBound;
+ group.count = a.messageCount;
+ group.members.add(a);
+ return group;
+ }
+
+ private static boolean overlaps(ArchiveGroup cur, ArchiveMeta a)
+ {
+ return a.lowerBound <= cur.end && a.upperBound >= cur.begin;
+ }
+
+ /**
+ * Represents metadata of an archive, including its ID, timestamp bounds, and message count.
+ */
+ private static class ArchiveMeta
+ {
+ final String id;
+ final long lowerBound;
+ final long upperBound;
+ final long messageCount;
+
+ ArchiveMeta(String id, long lowerBound, long upperBound, long messageCount)
+ {
+ this.id = id;
+ this.lowerBound = lowerBound;
+ this.upperBound = upperBound;
+ this.messageCount = messageCount;
+ }
+ }
+
+ /**
+ * Represents a group of overlapping archives treated as one logical unit.
+ */
+ private static final class ArchiveGroup
+ {
+ long begin;
+ long end;
+ long count;
+ final List members = new ArrayList<>();
+ }
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpPinotSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpPinotSplitProvider.java
new file mode 100644
index 0000000000000..1eca9a93f6b29
--- /dev/null
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpPinotSplitProvider.java
@@ -0,0 +1,432 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.split;
+
+import com.facebook.airlift.log.Logger;
+import com.facebook.presto.plugin.clp.ClpConfig;
+import com.facebook.presto.plugin.clp.ClpSplit;
+import com.facebook.presto.plugin.clp.ClpTableHandle;
+import com.facebook.presto.plugin.clp.ClpTableLayoutHandle;
+import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec;
+import com.facebook.presto.spi.SchemaTableName;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
+
+import javax.inject.Inject;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Optional;
+
+import static com.facebook.presto.plugin.clp.ClpSplit.SplitType;
+import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE;
+import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.IR;
+import static java.lang.String.format;
+import static java.util.Comparator.comparingLong;
+import static java.util.Objects.requireNonNull;
+import static java.util.concurrent.TimeUnit.SECONDS;
+
+public class ClpPinotSplitProvider
+ implements ClpSplitProvider
+{
+ private static final Logger log = Logger.get(ClpPinotSplitProvider.class);
+ private static final String SQL_SELECT_SPLITS_TEMPLATE = "SELECT tpath FROM %s WHERE 1 = 1 AND (%s) LIMIT 999999";
+ private static final String SQL_SELECT_SPLIT_META_TEMPLATE = "SELECT tpath, creationtime, lastmodifiedtime, num_messages FROM %s WHERE 1 = 1 AND (%s) ORDER BY %s %s LIMIT 999999";
+ private final ClpConfig config;
+ private final URL pinotSqlQueryEndpointUrl;
+
+ @Inject
+ public ClpPinotSplitProvider(ClpConfig config)
+ {
+ this.config = requireNonNull(config, "config is null");
+ try {
+ this.pinotSqlQueryEndpointUrl = buildPinotSqlQueryEndpointUrl(config);
+ }
+ catch (MalformedURLException e) {
+ throw new IllegalArgumentException(
+ format("Failed to build Pinot sql query endpoint URL using the provided database url: %s", config.getMetadataDbUrl()), e);
+ }
+ }
+
+ @Override
+ public List listSplits(ClpTableLayoutHandle clpTableLayoutHandle)
+ {
+ ClpTableHandle clpTableHandle = clpTableLayoutHandle.getTable();
+ Optional topNSpecOptional = clpTableLayoutHandle.getTopN();
+ String tableName = inferMetadataTableName(clpTableHandle);
+ try {
+ ImmutableList.Builder splits = new ImmutableList.Builder<>();
+ if (topNSpecOptional.isPresent()) {
+ ClpTopNSpec topNSpec = topNSpecOptional.get();
+ // Only handles one range metadata column for now (first ordering)
+ ClpTopNSpec.Ordering ordering = topNSpec.getOrderings().get(0);
+ // Get the last column in the ordering (the primary sort column for nested fields)
+ String col = ordering.getColumns().get(ordering.getColumns().size() - 1);
+ String dir = (ordering.getOrder() == ClpTopNSpec.Order.ASC) ? "ASC" : "DESC";
+ String splitMetaQuery = buildSplitMetadataQuery(tableName, clpTableLayoutHandle.getMetadataSql().orElse("1 = 1"), col, dir);
+ List archiveMetaList = fetchArchiveMeta(splitMetaQuery, ordering);
+ List selected = selectTopNArchives(archiveMetaList, topNSpec.getLimit(), ordering.getOrder());
+
+ for (ArchiveMeta a : selected) {
+ String splitPath = a.id;
+ splits.add(new ClpSplit(splitPath, determineSplitType(splitPath), clpTableLayoutHandle.getKqlQuery()));
+ }
+
+ List filteredSplits = splits.build();
+ log.debug("Number of topN filtered splits: %s", filteredSplits.size());
+ return filteredSplits;
+ }
+
+ String splitQuery = buildSplitSelectionQuery(tableName, clpTableLayoutHandle.getMetadataSql().orElse("1 = 1"));
+ List splitRows = getQueryResult(pinotSqlQueryEndpointUrl, splitQuery);
+ for (JsonNode row : splitRows) {
+ String splitPath = row.elements().next().asText();
+ splits.add(new ClpSplit(splitPath, determineSplitType(splitPath), clpTableLayoutHandle.getKqlQuery()));
+ }
+
+ List filteredSplits = splits.build();
+ log.debug("Number of filtered splits: %s", filteredSplits.size());
+ return filteredSplits;
+ }
+ catch (Exception e) {
+ log.error(e, "Failed to list splits for table %s", tableName);
+ throw new RuntimeException(format("Failed to list splits for table %s: %s", tableName, e.getMessage()), e);
+ }
+ }
+
+ /**
+ * Infers the Pinot metadata table name from the CLP table handle.
+ *
+ * In the current Pinot metadata, tables across different schemas share the same metadata table.
+ * The metadata table name corresponds directly to the logical table name,
+ * regardless of which schema is being queried. This allows multiple schemas
+ * to have different views or access patterns on the same underlying data.
+ *
+ *
+ * For example:
+ *
+ * - Schema: "default", Table: "logs" → Pinot metadata table: "logs"
+ * - Schema: "production", Table: "logs" → Pinot metadata table: "logs" (same table)
+ * - Schema: "staging", Table: "events" → Pinot metadata table: "events"
+ *
+ *
+ *
+ * @param tableHandle the CLP table handle containing schema and table information
+ * @return the Pinot metadata table name (just the table name without schema prefix)
+ * @throws NullPointerException if tableHandle is null
+ */
+ protected String inferMetadataTableName(ClpTableHandle tableHandle)
+ {
+ requireNonNull(tableHandle, "tableHandle is null");
+ SchemaTableName schemaTableName = tableHandle.getSchemaTableName();
+
+ // In Pinot, the metadata table name is just the table name
+ // Multiple schemas can reference the same underlying metadata table
+ return schemaTableName.getTableName();
+ }
+
+ /**
+ * Constructs the Pinot SQL query endpoint URL from configuration.
+ * Can be overridden by subclasses to customize URL construction.
+ *
+ * @param config the CLP configuration
+ * @return the Pinot SQL query endpoint URL
+ * @throws MalformedURLException if the constructed URL is invalid
+ */
+ protected URL buildPinotSqlQueryEndpointUrl(ClpConfig config) throws MalformedURLException
+ {
+ return new URL(config.getMetadataDbUrl() + "/query/sql");
+ }
+
+ /**
+ * Fetches archive metadata from the database.
+ *
+ * @param query SQL query string that selects the archives
+ * @param ordering The top-N ordering specifying which columns contain lowerBound/upperBound
+ * @return List of ArchiveMeta objects representing archive metadata
+ */
+ private List fetchArchiveMeta(String query, ClpTopNSpec.Ordering ordering)
+ {
+ ImmutableList.Builder archiveMetas = new ImmutableList.Builder<>();
+ List rows = getQueryResult(pinotSqlQueryEndpointUrl, query);
+ for (JsonNode row : rows) {
+ archiveMetas.add(new ArchiveMeta(
+ row.get(0).asText(),
+ row.get(1).asLong(),
+ row.get(2).asLong(),
+ row.get(3).asLong()));
+ }
+ return archiveMetas.build();
+ }
+
+ /**
+ * Selects the set of archives that must be scanned to guarantee the top-N results by timestamp
+ * (ASC or DESC), given only archive ranges and message counts.
+ *
+ * - Merges overlapping archives into components (union of time ranges).
+ * - For DESC: always include the newest component, then add older ones until their total
+ * message counts cover the limit.
+ * - For ASC: symmetric — start from the oldest, then add newer ones.
+ *
+
+ * @param archives list of archives with [lowerBound, upperBound, messageCount]
+ * @param limit number of messages requested
+ * @param order ASC (earliest first) or DESC (latest first)
+ * @return archives that must be scanned
+ */
+ private static List selectTopNArchives(List archives, long limit, ClpTopNSpec.Order order)
+ {
+ if (archives == null || archives.isEmpty() || limit <= 0) {
+ return ImmutableList.of();
+ }
+ requireNonNull(order, "order is null");
+
+ // 1) Merge overlaps into groups
+ List groups = toArchiveGroups(archives);
+
+ if (groups.isEmpty()) {
+ return ImmutableList.of();
+ }
+
+ // 2) Pick minimal set of groups per order, then return all member archives
+ List selected = new ArrayList<>();
+ if (order == ClpTopNSpec.Order.DESC) {
+ // newest group index
+ int k = groups.size() - 1;
+
+ // must include newest group
+ selected.addAll(groups.get(k).members);
+
+ // assume worst case: newest contributes 0 after filter; cover limit from older groups
+ long coveredByOlder = 0;
+ for (int i = k - 1; i >= 0 && coveredByOlder < limit; --i) {
+ selected.addAll(groups.get(i).members);
+ coveredByOlder += groups.get(i).count;
+ }
+ }
+ else {
+ // oldest group index
+ int k = 0;
+
+ // must include oldest group
+ selected.addAll(groups.get(k).members);
+
+ // assume worst case: oldest contributes 0; cover limit from newer groups
+ long coveredByNewer = 0;
+ for (int i = k + 1; i < groups.size() && coveredByNewer < limit; ++i) {
+ selected.addAll(groups.get(i).members);
+ coveredByNewer += groups.get(i).count;
+ }
+ }
+
+ return selected;
+ }
+
+ /**
+ * Groups overlapping archives into non-overlapping archive groups.
+ *
+ * @param archives archives sorted by lowerBound
+ * @return merged components
+ */
+ private static List toArchiveGroups(List archives)
+ {
+ List sorted = new ArrayList<>(archives);
+ sorted.sort(comparingLong((ArchiveMeta a) -> a.lowerBound)
+ .thenComparingLong(a -> a.upperBound));
+
+ List groups = new ArrayList<>();
+ ArchiveGroup cur = null;
+
+ for (ArchiveMeta a : sorted) {
+ if (cur == null) {
+ cur = startArchiveGroup(a);
+ }
+ else if (overlaps(cur, a)) {
+ // extend current component
+ cur.end = Math.max(cur.end, a.upperBound);
+ cur.count += a.messageCount;
+ cur.members.add(a);
+ }
+ else {
+ // finalize current, start a new one
+ groups.add(cur);
+ cur = startArchiveGroup(a);
+ }
+ }
+ if (cur != null) {
+ groups.add(cur);
+ }
+ return groups;
+ }
+
+ private static ArchiveGroup startArchiveGroup(ArchiveMeta a)
+ {
+ ArchiveGroup group = new ArchiveGroup();
+ group.begin = a.lowerBound;
+ group.end = a.upperBound;
+ group.count = a.messageCount;
+ group.members.add(a);
+ return group;
+ }
+
+ private static boolean overlaps(ArchiveGroup cur, ArchiveMeta a)
+ {
+ return a.lowerBound <= cur.end && a.upperBound >= cur.begin;
+ }
+
+ /**
+ * Determines the split type based on file path extension.
+ *
+ * @param splitPath the file path
+ * @return IR for .clp.zst files, ARCHIVE otherwise
+ */
+ private static SplitType determineSplitType(String splitPath)
+ {
+ return splitPath.endsWith(".clp.zst") ? IR : ARCHIVE;
+ }
+
+ /**
+ * Factory method for building split selection SQL queries.
+ * Exposed for testing purposes.
+ *
+ * @param tableName the Pinot table name
+ * @param filterSql the filter SQL expression
+ * @return the complete SQL query for selecting splits
+ */
+ @VisibleForTesting
+ protected String buildSplitSelectionQuery(String tableName, String filterSql)
+ {
+ return format(SQL_SELECT_SPLITS_TEMPLATE, tableName, filterSql);
+ }
+
+ /**
+ * Factory method for building split metadata SQL queries.
+ * Exposed for testing purposes.
+ *
+ * @param tableName the Pinot table name
+ * @param filterSql the filter SQL expression
+ * @param orderByColumn the column to order by
+ * @param orderDirection the order direction (ASC or DESC)
+ * @return the complete SQL query for selecting split metadata
+ */
+ @VisibleForTesting
+ protected String buildSplitMetadataQuery(String tableName, String filterSql, String orderByColumn, String orderDirection)
+ {
+ return format(SQL_SELECT_SPLIT_META_TEMPLATE, tableName, filterSql, orderByColumn, orderDirection);
+ }
+
+ private static List getQueryResult(URL url, String sql)
+ {
+ try {
+ HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+ conn.setRequestMethod("POST");
+ conn.setRequestProperty("Content-Type", "application/json");
+ conn.setRequestProperty("Accept", "application/json");
+ conn.setDoOutput(true);
+ conn.setConnectTimeout((int) SECONDS.toMillis(5));
+ conn.setReadTimeout((int) SECONDS.toMillis(30));
+
+ log.info("Executing Pinot query: %s", sql);
+ ObjectMapper mapper = new ObjectMapper();
+ String body = format("{\"sql\": %s }", mapper.writeValueAsString(sql));
+ try (OutputStream os = conn.getOutputStream()) {
+ os.write(body.getBytes(StandardCharsets.UTF_8));
+ }
+
+ int code = conn.getResponseCode();
+ InputStream is = (code >= 200 && code < 300) ? conn.getInputStream() : conn.getErrorStream();
+ if (is == null) {
+ throw new IOException("Pinot HTTP " + code + " with empty body");
+ }
+
+ JsonNode root;
+ try (InputStream in = is) {
+ root = mapper.readTree(in);
+ }
+ JsonNode resultTable = root.get("resultTable");
+ if (resultTable == null) {
+ throw new IllegalStateException("Pinot query response missing 'resultTable' field");
+ }
+ JsonNode rows = resultTable.get("rows");
+ if (rows == null) {
+ throw new IllegalStateException("Pinot query response missing 'rows' field in resultTable");
+ }
+ ImmutableList.Builder resultBuilder = ImmutableList.builder();
+ for (Iterator it = rows.elements(); it.hasNext(); ) {
+ JsonNode row = it.next();
+ resultBuilder.add(row);
+ }
+ List results = resultBuilder.build();
+ log.debug("Number of results: %s", results.size());
+ return results;
+ }
+ catch (IOException e) {
+ log.error(e, "IO error executing Pinot query: %s", sql);
+ return Collections.emptyList();
+ }
+ catch (Exception e) {
+ log.error(e, "Unexpected error executing Pinot query: %s", sql);
+ return Collections.emptyList();
+ }
+ }
+
+ /**
+ * Represents metadata of an archive, including its ID, timestamp bounds, and message count.
+ */
+ private static final class ArchiveMeta
+ {
+ private final String id;
+ private final long lowerBound;
+ private final long upperBound;
+ private final long messageCount;
+
+ ArchiveMeta(String id, long lowerBound, long upperBound, long messageCount)
+ {
+ this.id = requireNonNull(id, "id is null");
+ if (lowerBound > upperBound) {
+ throw new IllegalArgumentException(
+ format("Invalid archive bounds: lowerBound (%d) > upperBound (%d)", lowerBound, upperBound));
+ }
+ if (messageCount < 0) {
+ throw new IllegalArgumentException(
+ format("Invalid message count: %d (must be >= 0)", messageCount));
+ }
+ this.lowerBound = lowerBound;
+ this.upperBound = upperBound;
+ this.messageCount = messageCount;
+ }
+ }
+
+ /**
+ * Represents a group of overlapping archives treated as one logical unit.
+ */
+ private static final class ArchiveGroup
+ {
+ long begin;
+ long end;
+ long count;
+ final List members = new ArrayList<>();
+ }
+}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpUberPinotSplitProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpUberPinotSplitProvider.java
new file mode 100644
index 0000000000000..7b2dd8bfb6bef
--- /dev/null
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/ClpUberPinotSplitProvider.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.split;
+
+import com.facebook.presto.plugin.clp.ClpConfig;
+import com.facebook.presto.plugin.clp.ClpTableHandle;
+import com.facebook.presto.spi.SchemaTableName;
+import com.google.common.annotations.VisibleForTesting;
+
+import javax.inject.Inject;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * Uber-specific implementation of CLP Pinot split provider.
+ *
+ * At Uber, Pinot is accessed through Neutrino, a cross-region routing and aggregation service
+ * that provides a unified interface for querying distributed Pinot clusters. This implementation
+ * customizes the SQL query endpoint URL to use Neutrino's global statements API instead of
+ * the standard Pinot query endpoint.
+ *
+ */
+public class ClpUberPinotSplitProvider
+ extends ClpPinotSplitProvider
+{
+ /**
+ * Constructs an Uber CLP Pinot split provider with the given configuration.
+ *
+ * @param config the CLP configuration
+ */
+ @Inject
+ public ClpUberPinotSplitProvider(ClpConfig config)
+ {
+ super(config);
+ }
+
+ /**
+ * Constructs the Neutrino SQL query endpoint URL for Uber's Pinot infrastructure.
+ *
+ * Instead of using Pinot's standard {@code /query/sql} endpoint, this method constructs
+ * a URL pointing to Neutrino's {@code /v1/globalStatements} endpoint, which provides
+ * cross-region query routing and aggregation capabilities.
+ *
+ *
+ * @param config the CLP configuration containing the base Neutrino service URL
+ * @return the Neutrino global statements endpoint URL
+ * @throws MalformedURLException if the constructed URL is invalid
+ */
+ @Override
+ protected URL buildPinotSqlQueryEndpointUrl(ClpConfig config) throws MalformedURLException
+ {
+ return new URL(config.getMetadataDbUrl() + "/v1/globalStatements");
+ }
+
+ /**
+ * Infers the Uber-specific Pinot metadata table name from the CLP table handle.
+ *
+ * At Uber, Pinot tables are organized under a specific namespace hierarchy.
+ * All logging-related metadata tables are prefixed with {@code "rta.logging."}
+ * to identify them within Uber's multi-tenant Pinot infrastructure. This prefix
+ * represents:
+ *
+ * - rta: Real-Time Analytics platform namespace
+ * - logging: The logging subsystem within RTA
+ *
+ *
+ *
+ * Unlike the standard Pinot implementation where schemas can affect table naming,
+ * Uber's approach uses a flat namespace where all logging tables share the same
+ * prefix regardless of the schema being queried.
+ *
+ *
+ * Examples:
+ *
+ * - Schema: "default", Table: "logs" → Pinot table: "rta.logging.logs"
+ * - Schema: "production", Table: "events" → Pinot table: "rta.logging.events"
+ * - Schema: "staging", Table: "metrics" → Pinot table: "rta.logging.metrics"
+ *
+ *
+ *
+ * @param tableHandle the CLP table handle containing schema and table information
+ * @return the fully-qualified Pinot metadata table name with Uber's namespace prefix
+ * @throws NullPointerException if tableHandle is null
+ */
+ @Override
+ protected String inferMetadataTableName(ClpTableHandle tableHandle)
+ {
+ requireNonNull(tableHandle, "tableHandle is null");
+ SchemaTableName schemaTableName = tableHandle.getSchemaTableName();
+
+ // Uber's Pinot tables use a fixed namespace prefix for all logging tables
+ // Format: rta.logging.
+ String tableName = schemaTableName.getTableName();
+ return buildUberTableName(tableName);
+ }
+
+ /**
+ * Factory method for building Uber-specific table names.
+ * Exposed for testing purposes.
+ *
+ * @param tableName the base table name
+ * @return the fully-qualified Uber Pinot table name
+ */
+ @VisibleForTesting
+ protected String buildUberTableName(String tableName)
+ {
+ return String.format("rta.logging.%s", tableName);
+ }
+}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java
index 31d24fd4df71c..4bec8a79c9eed 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpMySqlSplitFilterProvider.java
@@ -15,6 +15,7 @@
import com.facebook.presto.plugin.clp.ClpConfig;
import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
@@ -85,6 +86,30 @@ public String remapSplitFilterPushDownExpression(String scope, String pushDownEx
return remappedSql;
}
+ @Override
+ public List remapColumnName(String scope, String columnName)
+ {
+ String[] splitScope = scope.split("\\.");
+
+ Map mappings = new HashMap<>(getAllMappingsFromFilters(filterMap.get(splitScope[0])));
+
+ if (1 < splitScope.length) {
+ mappings.putAll(getAllMappingsFromFilters(filterMap.get(splitScope[0] + "." + splitScope[1])));
+ }
+
+ if (3 == splitScope.length) {
+ mappings.putAll(getAllMappingsFromFilters(filterMap.get(scope)));
+ }
+
+ if (mappings.containsKey(columnName)) {
+ ClpMySqlCustomSplitFilterOptions.RangeMapping value = mappings.get(columnName);
+ return ImmutableList.of(value.lowerBound, value.upperBound);
+ }
+ else {
+ return ImmutableList.of(columnName);
+ }
+ }
+
@Override
protected Class extends CustomSplitFilterOptions> getCustomSplitFilterOptionsClass()
{
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpPinotSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpPinotSplitFilterProvider.java
new file mode 100644
index 0000000000000..3ffc76b5ae59d
--- /dev/null
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpPinotSplitFilterProvider.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.split.filter;
+
+import com.facebook.presto.plugin.clp.ClpConfig;
+import com.google.inject.Inject;
+
+/**
+ * Split filter provider for metadata databases implemented with Pinot.
+ *
+ * Currently uses the same implementation as MySQL. This class exists to allow
+ * for future Pinot-specific customizations if needed.
+ */
+public class ClpPinotSplitFilterProvider
+ extends ClpMySqlSplitFilterProvider
+{
+ @Inject
+ public ClpPinotSplitFilterProvider(ClpConfig config)
+ {
+ super(config);
+ }
+}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java
index 0609843aaf22f..7f19a5296b801 100644
--- a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpSplitFilterProvider.java
@@ -91,20 +91,36 @@ public ClpSplitFilterProvider(ClpConfig config)
*/
public abstract String remapSplitFilterPushDownExpression(String scope, String pushDownExpression);
+ /**
+ * Rewrites {@code columnName} to remap column names based on the {@code "customOptions"} for
+ * the given scope.
+ *
+ * {@code scope} follows the format {@code catalog[.schema][.table]}, and determines which
+ * column mappings to apply, since mappings from more specific scopes (e.g., table-level)
+ * override or supplement those from broader scopes (e.g., catalog-level). For each scope
+ * (catalog, schema, table), this method collects all mappings defined in
+ * {@code "customOptions"}.
+ *
+ * @param scope the scope of the column mapping
+ * @param columnName the column name to be remapped
+ * @return the remapped column names
+ */
+ public abstract List remapColumnName(String scope, String columnName);
+
/**
* Checks for the given table, if {@code splitFilterPushDownExpression} contains all required
* fields.
*
* @param tableScopeSet the set of scopes of the tables that are being queried
- * @param splitFilterPushDownExpression the expression to be checked
+ * @param pushDownVariables the set of variables being pushed down
*/
- public void checkContainsRequiredFilters(Set tableScopeSet, String splitFilterPushDownExpression)
+ public void checkContainsRequiredFilters(Set tableScopeSet, Set pushDownVariables)
{
boolean hasRequiredSplitFilterColumns = true;
ImmutableList.Builder notFoundListBuilder = ImmutableList.builder();
for (String tableScope : tableScopeSet) {
for (String columnName : getRequiredColumnNames(tableScope)) {
- if (!splitFilterPushDownExpression.contains(columnName)) {
+ if (!pushDownVariables.contains(columnName)) {
hasRequiredSplitFilterColumns = false;
notFoundListBuilder.add(columnName);
}
diff --git a/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpUberPinotSplitFilterProvider.java b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpUberPinotSplitFilterProvider.java
new file mode 100644
index 0000000000000..4be5585a3e18c
--- /dev/null
+++ b/presto-clp/src/main/java/com/facebook/presto/plugin/clp/split/filter/ClpUberPinotSplitFilterProvider.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.split.filter;
+
+import com.facebook.presto.plugin.clp.ClpConfig;
+import com.google.inject.Inject;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static java.lang.String.format;
+
+/**
+ * Uber-specific split filter provider for Pinot metadata databases.
+ *
+ * This provider extends the standard Pinot filter provider and adds TEXT_MATCH
+ * transformations specific to Uber's Pinot infrastructure. It inherits all range
+ * mapping functionality from the parent class while adding support for transforming
+ * equality predicates into TEXT_MATCH expressions for efficient querying against
+ * Uber's merged text indices.
+ *
+ *
+ * Example transformations:
+ *
+ * - {@code "x" = 1} → {@code TEXT_MATCH("__mergedTextIndex", '/1:x/')}
+ * - {@code "x" = 'abc'} → {@code TEXT_MATCH("__mergedTextIndex", '/abc:x/')}
+ * - {@code "timestamp" >= 1234} → {@code end_timestamp >= 1234} (via inherited range mapping)
+ *
+ *
+ */
+public class ClpUberPinotSplitFilterProvider
+ extends ClpPinotSplitFilterProvider
+{
+ private static final String MERGED_TEXT_INDEX_COLUMN = "__mergedTextIndex";
+
+ // Pattern to match quoted column = value expressions (both numeric and string values)
+ // Pre-compiled for performance
+ private static final Pattern EQUALITY_PATTERN = Pattern.compile(
+ "\"([^\"]+)\"\\s*=\\s*(?:(-?[0-9]+(?:\\.[0-9]+)?(?:[eE][+-]?[0-9]+)?)|'([^']*)')");
+
+ @Inject
+ public ClpUberPinotSplitFilterProvider(ClpConfig config)
+ {
+ super(config);
+ }
+
+ /**
+ * Transforms SQL predicates into Uber Pinot-compatible TEXT_MATCH expressions.
+ *
+ * First applies inherited range mappings from the parent class, then transforms
+ * remaining equality predicates to TEXT_MATCH format for Uber's merged text indices.
+ *
+ *
+ * @param scope the filter's scope (catalog.schema.table)
+ * @param pushDownExpression the SQL expression to be transformed
+ * @return the transformed Uber Pinot-compatible expression
+ */
+ @Override
+ public String remapSplitFilterPushDownExpression(String scope, String pushDownExpression)
+ {
+ // First, apply inherited range mappings from parent class
+ String remappedSql = super.remapSplitFilterPushDownExpression(scope, pushDownExpression);
+
+ // Then, apply Uber-specific TEXT_MATCH transformations
+ // Range-mapped columns won't match our pattern since they've already been transformed
+ return transformToTextMatch(remappedSql);
+ }
+
+ /**
+ * Transforms equality predicates to Pinot TEXT_MATCH expressions for Uber's infrastructure.
+ *
+ * Converts {@code "columnName" = value} to {@code TEXT_MATCH("__mergedTextIndex", '/value:columnName/')}
+ * This transformation enables efficient querying against Uber's merged text indices.
+ *
+ *
+ * @param expression the SQL expression to transform
+ * @return the expression with equality predicates transformed to TEXT_MATCH
+ */
+ private String transformToTextMatch(String expression)
+ {
+ StringBuilder result = new StringBuilder();
+ Matcher matcher = EQUALITY_PATTERN.matcher(expression);
+ int lastEnd = 0;
+
+ while (matcher.find()) {
+ String columnName = matcher.group(1);
+ // Group 2 contains numeric value, Group 3 contains string value
+ String numericValue = matcher.group(2);
+ String stringValue = matcher.group(3);
+ String value = (numericValue != null) ? numericValue : stringValue;
+
+ // Append text before the match
+ result.append(expression, lastEnd, matcher.start());
+
+ // Transform to TEXT_MATCH pattern: TEXT_MATCH("__mergedTextIndex", '/value:columnName/')
+ String textMatchExpr = format(
+ "TEXT_MATCH(\"%s\", '/%s:%s/')",
+ MERGED_TEXT_INDEX_COLUMN,
+ value,
+ columnName);
+ result.append(textMatchExpr);
+
+ lastEnd = matcher.end();
+ }
+
+ // Append remaining text after last match
+ result.append(expression, lastEnd, expression.length());
+
+ return result.toString();
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java
index d1d0ee6964c8e..ee207f9864004 100644
--- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/ClpMetadataDbSetUp.java
@@ -37,6 +37,7 @@
import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_COLUMN_NAME;
import static com.facebook.presto.plugin.clp.metadata.ClpMySqlMetadataProvider.DATASETS_TABLE_SUFFIX;
import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_COLUMN_ID;
+import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_NUM_MESSAGES;
import static com.facebook.presto.plugin.clp.split.ClpMySqlSplitProvider.ARCHIVES_TABLE_SUFFIX;
import static java.lang.String.format;
import static java.util.UUID.randomUUID;
@@ -139,26 +140,30 @@ public static ClpMySqlSplitProvider setupSplit(DbHandle dbHandle, Map schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 1);
+ assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME);
+ }
+
+ @Test
+ public void testMultiSchemaDiscovery() throws IOException
+ {
+ // Create a temporary YAML file with multiple schemas
+ File tempFile = File.createTempFile("clp-metadata-multi-", ".yaml");
+ tempFile.deleteOnExit();
+
+ try (FileWriter writer = new FileWriter(tempFile)) {
+ writer.write("clp:\n");
+ writer.write(" default:\n");
+ writer.write(" logs_table: /path/to/default/logs.yaml\n");
+ writer.write(" dev:\n");
+ writer.write(" test_logs: /path/to/dev/test.yaml\n");
+ writer.write(" prod:\n");
+ writer.write(" production_logs: /path/to/prod/logs.yaml\n");
+ }
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(tempFile.getAbsolutePath());
+
+ ClpMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 3);
+ Set schemaSet = ImmutableSet.copyOf(schemas);
+ assertTrue(schemaSet.contains("default"));
+ assertTrue(schemaSet.contains("dev"));
+ assertTrue(schemaSet.contains("prod"));
+ }
+
+ @Test
+ public void testMissingYamlPathReturnsDefault()
+ {
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML);
+ // Note: not setting metadataYamlPath
+
+ ClpMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 1);
+ assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME);
+ }
+
+ @Test
+ public void testInvalidYamlPathReturnsDefault()
+ {
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath("/nonexistent/path/to/metadata.yaml");
+
+ ClpMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 1);
+ assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME);
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpTopN.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpTopN.java
new file mode 100644
index 0000000000000..f7e746a21f347
--- /dev/null
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpTopN.java
@@ -0,0 +1,440 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp;
+
+import com.facebook.airlift.log.Logger;
+import com.facebook.presto.Session;
+import com.facebook.presto.common.transaction.TransactionId;
+import com.facebook.presto.common.type.RowType;
+import com.facebook.presto.cost.PlanNodeStatsEstimate;
+import com.facebook.presto.cost.StatsAndCosts;
+import com.facebook.presto.cost.StatsProvider;
+import com.facebook.presto.metadata.FunctionAndTypeManager;
+import com.facebook.presto.metadata.Metadata;
+import com.facebook.presto.plugin.clp.optimization.ClpComputePushDown;
+import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec;
+import com.facebook.presto.plugin.clp.optimization.ClpTopNSpec.Order;
+import com.facebook.presto.plugin.clp.split.ClpSplitProvider;
+import com.facebook.presto.plugin.clp.split.filter.ClpMySqlSplitFilterProvider;
+import com.facebook.presto.plugin.clp.split.filter.ClpSplitFilterProvider;
+import com.facebook.presto.spi.ColumnHandle;
+import com.facebook.presto.spi.SchemaTableName;
+import com.facebook.presto.spi.VariableAllocator;
+import com.facebook.presto.spi.WarningCollector;
+import com.facebook.presto.spi.plan.FilterNode;
+import com.facebook.presto.spi.plan.OutputNode;
+import com.facebook.presto.spi.plan.PlanNode;
+import com.facebook.presto.spi.plan.PlanNodeIdAllocator;
+import com.facebook.presto.spi.plan.ProjectNode;
+import com.facebook.presto.spi.plan.TableScanNode;
+import com.facebook.presto.spi.plan.TopNNode;
+import com.facebook.presto.spi.relation.VariableReferenceExpression;
+import com.facebook.presto.sql.planner.Plan;
+import com.facebook.presto.sql.planner.assertions.MatchResult;
+import com.facebook.presto.sql.planner.assertions.Matcher;
+import com.facebook.presto.sql.planner.assertions.PlanAssert;
+import com.facebook.presto.sql.planner.assertions.PlanMatchPattern;
+import com.facebook.presto.sql.planner.assertions.SymbolAliases;
+import com.facebook.presto.sql.planner.plan.ExchangeNode;
+import com.facebook.presto.sql.planner.plan.SimplePlanRewriter;
+import com.facebook.presto.sql.relational.FunctionResolution;
+import com.facebook.presto.sql.tree.SymbolReference;
+import com.facebook.presto.testing.LocalQueryRunner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import org.apache.commons.math3.util.Pair;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+import static com.facebook.presto.common.Utils.checkState;
+import static com.facebook.presto.common.type.BigintType.BIGINT;
+import static com.facebook.presto.metadata.FunctionExtractor.extractFunctions;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.ARCHIVES_STORAGE_DIRECTORY_BASE;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_PASSWORD;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_TABLE_PREFIX;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_URL_TEMPLATE;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.METADATA_DB_USER;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.getDbHandle;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.setupMetadata;
+import static com.facebook.presto.plugin.clp.ClpMetadataDbSetUp.setupSplit;
+import static com.facebook.presto.plugin.clp.ClpSplit.SplitType.ARCHIVE;
+import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Boolean;
+import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.ClpString;
+import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Float;
+import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.Integer;
+import static com.facebook.presto.plugin.clp.metadata.ClpSchemaTreeNodeType.VarString;
+import static com.facebook.presto.plugin.clp.optimization.ClpTopNSpec.Order.ASC;
+import static com.facebook.presto.plugin.clp.optimization.ClpTopNSpec.Order.DESC;
+import static com.facebook.presto.sql.planner.assertions.MatchResult.NO_MATCH;
+import static com.facebook.presto.sql.planner.assertions.MatchResult.match;
+import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.anyTree;
+import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.node;
+import static com.facebook.presto.testing.TestingSession.testSessionBuilder;
+import static java.lang.String.format;
+import static org.testng.Assert.assertEquals;
+
+@Test(singleThreaded = true)
+public class TestClpTopN
+ extends TestClpQueryBase
+{
+ private final Session defaultSession = testSessionBuilder()
+ .setCatalog("clp")
+ .setSchema(ClpMetadata.DEFAULT_SCHEMA_NAME)
+ .build();
+
+ private ClpMetadataDbSetUp.DbHandle dbHandle;
+ ClpTableHandle table;
+
+ private static final Logger log = Logger.get(TestClpTopN.class);
+
+ private LocalQueryRunner localQueryRunner;
+ private FunctionAndTypeManager functionAndTypeManager;
+ private FunctionResolution functionResolution;
+ private ClpSplitProvider splitProvider;
+ private ClpSplitFilterProvider splitFilterProvider;
+ private PlanNodeIdAllocator planNodeIdAllocator;
+ private VariableAllocator variableAllocator;
+
+ @BeforeMethod
+ public void setUp()
+ {
+ dbHandle = getDbHandle("topn_query_testdb");
+ final String tableName = "test";
+ final String tablePath = ARCHIVES_STORAGE_DIRECTORY_BASE + tableName;
+ table = new ClpTableHandle(new SchemaTableName("default", tableName), tablePath);
+
+ setupMetadata(dbHandle,
+ ImmutableMap.of(
+ tableName,
+ ImmutableList.of(
+ new Pair<>("msg.timestamp", Integer),
+ new Pair<>("city.Name", ClpString),
+ new Pair<>("city.Region.Id", Integer),
+ new Pair<>("city.Region.Name", VarString),
+ new Pair<>("fare", Float),
+ new Pair<>("isHoliday", Boolean))));
+
+ splitProvider = setupSplit(dbHandle,
+ ImmutableMap.of(
+ tableName,
+ ImmutableList.of(
+ new ClpMetadataDbSetUp.ArchivesTableRow("0", 100, 0, 100),
+ new ClpMetadataDbSetUp.ArchivesTableRow("1", 100, 50, 150),
+ new ClpMetadataDbSetUp.ArchivesTableRow("2", 100, 100, 200),
+ new ClpMetadataDbSetUp.ArchivesTableRow("3", 100, 201, 300),
+ new ClpMetadataDbSetUp.ArchivesTableRow("4", 100, 301, 400))));
+
+ URL resource = getClass().getClassLoader().getResource("test-topn-split-filter.json");
+ if (resource == null) {
+ log.error("test-topn-split-filter.json not found in resources");
+ return;
+ }
+
+ String filterConfigPath;
+ try {
+ filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString();
+ }
+ catch (URISyntaxException e) {
+ log.error("test-topn-split-filter.json not found in resources");
+ return;
+ }
+
+ localQueryRunner = new LocalQueryRunner(defaultSession);
+ localQueryRunner.createCatalog("clp", new ClpConnectorFactory(), ImmutableMap.of(
+ "clp.metadata-db-url", format(METADATA_DB_URL_TEMPLATE, dbHandle.getDbPath()),
+ "clp.metadata-db-user", METADATA_DB_USER,
+ "clp.metadata-db-password", METADATA_DB_PASSWORD,
+ "clp.metadata-table-prefix", METADATA_DB_TABLE_PREFIX));
+ localQueryRunner.getMetadata().registerBuiltInFunctions(extractFunctions(new ClpPlugin().getFunctions()));
+ functionAndTypeManager = localQueryRunner.getMetadata().getFunctionAndTypeManager();
+ functionResolution = new FunctionResolution(functionAndTypeManager.getFunctionAndTypeResolver());
+ splitFilterProvider = new ClpMySqlSplitFilterProvider(new ClpConfig().setSplitFilterConfig(filterConfigPath));
+ planNodeIdAllocator = new PlanNodeIdAllocator();
+ variableAllocator = new VariableAllocator();
+ }
+
+ @AfterMethod
+ public void tearDown()
+ {
+ localQueryRunner.close();
+ ClpMetadataDbSetUp.tearDown(dbHandle);
+ }
+
+ @Test
+ public void test()
+ {
+ testTopNQueryPlanAndSplits(
+ "SELECT * FROM test WHERE msg.timestamp > 120 AND msg.timestamp < 240 ORDER BY msg.timestamp DESC LIMIT 100",
+ "(msg.timestamp > 120 AND msg.timestamp < 240)",
+ "(end_timestamp > 120 AND begin_timestamp < 240)",
+ 100,
+ DESC,
+ ImmutableSet.of("1", "2", "3"));
+
+ testTopNQueryPlanAndSplits(
+ "SELECT * FROM test WHERE msg.timestamp > 120 AND msg.timestamp < 240 ORDER BY msg.timestamp ASC LIMIT 50",
+ "(msg.timestamp > 120 AND msg.timestamp < 240)",
+ "(end_timestamp > 120 AND begin_timestamp < 240)",
+ 50,
+ ASC,
+ ImmutableSet.of("1", "2", "3"));
+
+ testTopNQueryPlanAndSplits(
+ "SELECT * FROM test WHERE msg.timestamp >= 180 AND msg.timestamp <= 260 ORDER BY msg.timestamp DESC LIMIT 100",
+ "(msg.timestamp >= 180 AND msg.timestamp <= 260)",
+ "(end_timestamp >= 180 AND begin_timestamp <= 260)",
+ 100,
+ DESC,
+ ImmutableSet.of("2", "3"));
+
+ testTopNQueryPlanAndSplits(
+ "SELECT * FROM test WHERE msg.timestamp > 250 AND msg.timestamp < 290 ORDER BY msg.timestamp DESC LIMIT 10",
+ "(msg.timestamp > 250 AND msg.timestamp < 290)",
+ "(end_timestamp > 250 AND begin_timestamp < 290)",
+ 10,
+ DESC,
+ ImmutableSet.of("3"));
+
+ testTopNQueryPlanAndSplits(
+ "SELECT * FROM test WHERE msg.timestamp > 1000 AND msg.timestamp < 1100 ORDER BY msg.timestamp DESC LIMIT 10",
+ "(msg.timestamp > 1000 AND msg.timestamp < 1100)",
+ "(end_timestamp > 1000 AND begin_timestamp < 1100)",
+ 10,
+ DESC,
+ ImmutableSet.of());
+
+ testTopNQueryPlanAndSplits(
+ "SELECT * FROM test WHERE msg.timestamp <= 300 ORDER BY msg.timestamp DESC LIMIT 1000",
+ "msg.timestamp <= 300",
+ "begin_timestamp <= 300",
+ 1000,
+ DESC,
+ ImmutableSet.of("0", "1", "2", "3"));
+
+ testTopNQueryPlanAndSplits(
+ "SELECT * FROM test WHERE msg.timestamp <= 400 ORDER BY msg.timestamp DESC LIMIT 100",
+ "msg.timestamp <= 400",
+ "begin_timestamp <= 400",
+ 100,
+ DESC,
+ ImmutableSet.of("3", "4"));
+ }
+
+ private void testTopNQueryPlanAndSplits(String sql, String kql, String metadataSql, long limit, Order order, Set splitIds)
+ {
+ TransactionId transactionId = localQueryRunner.getTransactionManager().beginTransaction(false);
+ Session session = testSessionBuilder().setCatalog("clp").setSchema("default").setTransactionId(transactionId).build();
+
+ Plan plan = localQueryRunner.createPlan(
+ session,
+ sql,
+ WarningCollector.NOOP);
+ ClpComputePushDown optimizer = new ClpComputePushDown(functionAndTypeManager, functionResolution, splitFilterProvider);
+ PlanNode optimizedPlan = optimizer.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator);
+ PlanNode optimizedPlanWithUniqueId = freshenIds(optimizedPlan, new PlanNodeIdAllocator());
+
+ ClpTableLayoutHandle clpTableLayoutHandle = new ClpTableLayoutHandle(
+ table,
+ Optional.of(kql),
+ Optional.of(metadataSql),
+ true,
+ Optional.of(new ClpTopNSpec(
+ limit,
+ ImmutableList.of(new ClpTopNSpec.Ordering(ImmutableList.of("begin_timestamp", "end_timestamp"), order)))));
+
+ PlanAssert.assertPlan(
+ session,
+ localQueryRunner.getMetadata(),
+ (node, sourceStats, lookup, s, types) -> PlanNodeStatsEstimate.unknown(),
+ new Plan(optimizedPlanWithUniqueId, plan.getTypes(), StatsAndCosts.empty()),
+ anyTree(
+ ClpTableScanMatcher.clpTableScanPattern(
+ clpTableLayoutHandle,
+ ImmutableSet.of(
+ city,
+ fare,
+ isHoliday,
+ new ClpColumnHandle(
+ "msg",
+ RowType.from(ImmutableList.of(new RowType.Field(Optional.of("timestamp"), BIGINT))))))));
+
+ assertEquals(
+ ImmutableSet.copyOf(splitProvider.listSplits(clpTableLayoutHandle)),
+ splitIds.stream()
+ .map(id -> new ClpSplit("/tmp/archives/test/" + id, ARCHIVE, Optional.of(kql)))
+ .collect(ImmutableSet.toImmutableSet()));
+ }
+
+ /**
+ * Recursively rebuilds a query plan tree so that every {@link PlanNode} has a fresh, unique ID.
+ *
+ * This utility is mainly for testing, to avoid ID collisions that can occur when
+ * localQueryRunner.createPlan() and a custom optimizer each use separate
+ * {@link PlanNodeIdAllocator}s that start at the same seed, producing duplicate IDs.
+ *
+ * @param root the root of the plan
+ * @param idAlloc the plan node ID allocator
+ * @return the plan with a fresh, unique IDs.
+ */
+ private static PlanNode freshenIds(PlanNode root, PlanNodeIdAllocator idAlloc)
+ {
+ return SimplePlanRewriter.rewriteWith(new SimplePlanRewriter() {
+ @Override
+ public PlanNode visitOutput(OutputNode node, RewriteContext ctx)
+ {
+ PlanNode src = ctx.rewrite(node.getSource(), null);
+ return new OutputNode(
+ node.getSourceLocation(),
+ idAlloc.getNextId(),
+ src,
+ node.getColumnNames(),
+ node.getOutputVariables());
+ }
+
+ @Override
+ public PlanNode visitExchange(ExchangeNode node, RewriteContext ctx)
+ {
+ List newSources = node.getSources().stream()
+ .map(s -> ctx.rewrite(s, null))
+ .collect(com.google.common.collect.ImmutableList.toImmutableList());
+
+ return new ExchangeNode(
+ node.getSourceLocation(),
+ idAlloc.getNextId(),
+ node.getType(),
+ node.getScope(),
+ node.getPartitioningScheme(),
+ newSources,
+ node.getInputs(),
+ node.isEnsureSourceOrdering(),
+ node.getOrderingScheme());
+ }
+
+ @Override
+ public PlanNode visitProject(ProjectNode node, RewriteContext ctx)
+ {
+ PlanNode src = ctx.rewrite(node.getSource(), null);
+ return new ProjectNode(idAlloc.getNextId(), src, node.getAssignments());
+ }
+
+ @Override
+ public PlanNode visitFilter(FilterNode node, RewriteContext ctx)
+ {
+ PlanNode src = ctx.rewrite(node.getSource(), null);
+ return new FilterNode(node.getSourceLocation(), idAlloc.getNextId(), src, node.getPredicate());
+ }
+
+ @Override
+ public PlanNode visitTopN(TopNNode node, RewriteContext ctx)
+ {
+ PlanNode src = ctx.rewrite(node.getSource(), null);
+ return new TopNNode(
+ node.getSourceLocation(),
+ idAlloc.getNextId(),
+ src,
+ node.getCount(),
+ node.getOrderingScheme(),
+ node.getStep());
+ }
+
+ @Override
+ public PlanNode visitTableScan(TableScanNode node, RewriteContext ctx)
+ {
+ return new TableScanNode(
+ node.getSourceLocation(),
+ idAlloc.getNextId(),
+ node.getTable(),
+ node.getOutputVariables(),
+ node.getAssignments());
+ }
+
+ @Override
+ public PlanNode visitPlan(PlanNode node, RewriteContext ctx)
+ {
+ List newChildren = node.getSources().stream()
+ .map(ch -> ctx.rewrite(ch, null))
+ .collect(com.google.common.collect.ImmutableList.toImmutableList());
+ return node.replaceChildren(newChildren);
+ }
+ }, root, null);
+ }
+
+ private static final class ClpTableScanMatcher
+ implements Matcher
+ {
+ private final ClpTableLayoutHandle expectedLayoutHandle;
+ private final Set expectedColumns;
+
+ private ClpTableScanMatcher(ClpTableLayoutHandle expectedLayoutHandle, Set expectedColumns)
+ {
+ this.expectedLayoutHandle = expectedLayoutHandle;
+ this.expectedColumns = expectedColumns;
+ }
+
+ static PlanMatchPattern clpTableScanPattern(ClpTableLayoutHandle layoutHandle, Set columns)
+ {
+ return node(TableScanNode.class).with(new ClpTableScanMatcher(layoutHandle, columns));
+ }
+
+ @Override
+ public boolean shapeMatches(PlanNode node)
+ {
+ return node instanceof TableScanNode;
+ }
+
+ @Override
+ public MatchResult detailMatches(
+ PlanNode node,
+ StatsProvider stats,
+ Session session,
+ Metadata metadata,
+ SymbolAliases symbolAliases)
+ {
+ checkState(shapeMatches(node), "Plan testing framework error: shapeMatches returned false");
+ TableScanNode tableScanNode = (TableScanNode) node;
+ ClpTableLayoutHandle actualLayoutHandle = (ClpTableLayoutHandle) tableScanNode.getTable().getLayout().get();
+
+ // Check layout handle
+ if (!expectedLayoutHandle.equals(actualLayoutHandle)) {
+ return NO_MATCH;
+ }
+
+ // Check assignments contain expected columns
+ Map actualAssignments = tableScanNode.getAssignments();
+ Set actualColumns = new HashSet<>(actualAssignments.values());
+
+ if (!expectedColumns.equals(actualColumns)) {
+ return NO_MATCH;
+ }
+
+ SymbolAliases.Builder aliasesBuilder = SymbolAliases.builder();
+ for (VariableReferenceExpression variable : tableScanNode.getOutputVariables()) {
+ aliasesBuilder.put(variable.getName(), new SymbolReference(variable.getName()));
+ }
+
+ return match(aliasesBuilder.build());
+ }
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java
index a6b6bc118ffee..3b83d1c95d10b 100644
--- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpUdfRewriter.java
@@ -274,7 +274,7 @@ public void testClpGetJsonString()
Plan plan = localQueryRunner.createPlan(
session,
- "SELECT CLP_GET_JSON_STRING() from test WHERE CLP_GET_BIGINT('user_id') = 0",
+ "SELECT CLP_GET_JSON_STRING() from test WHERE CLP_GET_BIGINT('user_id') = 0 ORDER BY fare",
WarningCollector.NOOP);
ClpUdfRewriter udfRewriter = new ClpUdfRewriter(functionAndTypeManager);
PlanNode optimizedPlan = udfRewriter.optimize(plan.getRoot(), session.toConnectorSession(), variableAllocator, planNodeIdAllocator);
@@ -294,6 +294,7 @@ public void testClpGetJsonString()
ClpTableScanMatcher.clpTableScanPattern(
new ClpTableLayoutHandle(table, Optional.of("user_id: 0"), Optional.empty()),
ImmutableSet.of(
+ fare,
new ClpColumnHandle("user_id", BIGINT),
new ClpColumnHandle(JSON_STRING_PLACEHOLDER, VARCHAR))))));
}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpYamlMetadata.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpYamlMetadata.java
new file mode 100644
index 0000000000000..f91174851d7e0
--- /dev/null
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/TestClpYamlMetadata.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp;
+
+import com.facebook.presto.plugin.clp.metadata.ClpMetadataProvider;
+import com.facebook.presto.plugin.clp.metadata.ClpYamlMetadataProvider;
+import com.facebook.presto.plugin.clp.split.ClpPinotSplitProvider;
+import com.facebook.presto.plugin.clp.split.ClpSplitProvider;
+import com.facebook.presto.spi.ColumnMetadata;
+import com.facebook.presto.spi.ConnectorTableMetadata;
+import com.facebook.presto.spi.SchemaTableName;
+import com.google.common.collect.ImmutableSet;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
+
+import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType.YAML;
+import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME;
+import static com.facebook.presto.testing.TestingConnectorSession.SESSION;
+import static org.testng.Assert.assertEquals;
+
+public class TestClpYamlMetadata
+{
+ private static final String PINOT_BROKER_URL = "http://localhost:8099";
+ private static final String TABLE_NAME = "cockroachdb";
+ private static final String SCHEMA1_NAME = "schema1";
+ private static final String SCHEMA2_NAME = "schema2";
+ private static final String ORDERS_TABLE_NAME = "orders";
+ private static final String USERS_TABLE_NAME = "users";
+ private ClpMetadata metadata;
+ private ClpSplitProvider clpSplitProvider;
+
+ @BeforeTest
+ public void setUp() throws Exception
+ {
+ // Load test resources from classpath
+ // ClpYamlMetadataProvider now supports relative paths, so we can use the resource file directly
+ java.net.URL tablesSchemaResource = getClass().getClassLoader().getResource("test-tables-schema.yaml");
+
+ if (tablesSchemaResource == null) {
+ throw new IllegalStateException("test-tables-schema.yaml not found in test resources");
+ }
+
+ // Get the absolute path to test-tables-schema.yaml
+ // Relative paths in the YAML will be resolved relative to this file's parent directory
+ String tablesSchemaPath = java.nio.file.Paths.get(tablesSchemaResource.toURI()).toString();
+
+ ClpConfig config = new ClpConfig()
+ .setPolymorphicTypeEnabled(true)
+ .setMetadataDbUrl(PINOT_BROKER_URL)
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(tablesSchemaPath);
+ ClpMetadataProvider metadataProvider = new ClpYamlMetadataProvider(config);
+ metadata = new ClpMetadata(config, metadataProvider);
+ clpSplitProvider = new ClpPinotSplitProvider(config);
+ }
+
+ @Test
+ public void testListSchemaNames()
+ {
+ List schemaNames = metadata.listSchemaNames(SESSION);
+ assertEquals(new HashSet<>(schemaNames), ImmutableSet.of(DEFAULT_SCHEMA_NAME, SCHEMA1_NAME, SCHEMA2_NAME));
+ }
+
+ @Test
+ public void testListTables()
+ {
+ // When no schema is specified, listTables defaults to DEFAULT_SCHEMA_NAME
+ ImmutableSet defaultTables = ImmutableSet.of(
+ new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME));
+ assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.empty())), defaultTables);
+ }
+
+ @Test
+ public void testListTablesForSpecificSchema()
+ {
+ // Test listing tables for schema1
+ ImmutableSet schema1Tables = ImmutableSet.of(
+ new SchemaTableName(SCHEMA1_NAME, ORDERS_TABLE_NAME),
+ new SchemaTableName(SCHEMA1_NAME, USERS_TABLE_NAME));
+ assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.of(SCHEMA1_NAME))), schema1Tables);
+
+ // Test listing tables for schema2
+ ImmutableSet schema2Tables = ImmutableSet.of(
+ new SchemaTableName(SCHEMA2_NAME, ORDERS_TABLE_NAME));
+ assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.of(SCHEMA2_NAME))), schema2Tables);
+
+ // Test listing tables for default schema
+ ImmutableSet defaultTables = ImmutableSet.of(
+ new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME));
+ assertEquals(new HashSet<>(metadata.listTables(SESSION, Optional.of(DEFAULT_SCHEMA_NAME))), defaultTables);
+ }
+
+ @Test
+ public void testListSplits()
+ {
+ ClpTableLayoutHandle layoutHandle = new ClpTableLayoutHandle(
+ new ClpTableHandle(new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME), ""),
+ Optional.empty(),
+ Optional.empty());
+ List result = clpSplitProvider.listSplits(layoutHandle);
+ System.out.println("Hello world");
+ }
+
+ @Test
+ public void testGetTableMetadata()
+ {
+ ClpTableHandle clpTableHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME));
+ ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, clpTableHandle);
+// ImmutableSet columnMetadata = ImmutableSet.builder()
+// .add(ColumnMetadata.builder()
+// .setName("a_bigint")
+// .setType(BIGINT)
+// .setNullable(true)
+// .build())
+// .add(ColumnMetadata.builder()
+// .setName("a_varchar")
+// .setType(VARCHAR)
+// .setNullable(true)
+// .build())
+// .add(ColumnMetadata.builder()
+// .setName("b_double")
+// .setType(DOUBLE)
+// .setNullable(true)
+// .build())
+// .add(ColumnMetadata.builder()
+// .setName("b_varchar")
+// .setType(VARCHAR)
+// .setNullable(true)
+// .build())
+// .add(ColumnMetadata.builder()
+// .setName("c")
+// .setType(RowType.from(ImmutableList.of(
+// RowType.field("d", BOOLEAN),
+// RowType.field("e", VARCHAR))))
+// .setNullable(true)
+// .build())
+// .add(ColumnMetadata.builder()
+// .setName("f")
+// .setType(RowType.from(ImmutableList.of(
+// RowType.field("g",
+// RowType.from(ImmutableList.of(
+// RowType.field("h", new ArrayType(VARCHAR))))))))
+// .setNullable(true)
+// .build())
+// .build();
+// assertEquals(columnMetadata, ImmutableSet.copyOf(tableMetadata.getColumns()));
+ ImmutableSet actual = ImmutableSet.copyOf(tableMetadata.getColumns());
+ System.out.println("Hello world");
+ }
+
+ @Test
+ public void testGetTableHandleForDuplicateTableNames()
+ {
+ // Test that we can get distinct table handles for tables with the same name in different schemas
+ ClpTableHandle schema1OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA1_NAME, ORDERS_TABLE_NAME));
+ ClpTableHandle schema2OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA2_NAME, ORDERS_TABLE_NAME));
+
+ // Verify both handles are not null
+ assertEquals(schema1OrdersHandle != null, true);
+ assertEquals(schema2OrdersHandle != null, true);
+
+ // Verify the schema names are correctly set
+ assertEquals(schema1OrdersHandle.getSchemaTableName().getSchemaName(), SCHEMA1_NAME);
+ assertEquals(schema2OrdersHandle.getSchemaTableName().getSchemaName(), SCHEMA2_NAME);
+
+ // Verify the table names are the same
+ assertEquals(schema1OrdersHandle.getSchemaTableName().getTableName(), ORDERS_TABLE_NAME);
+ assertEquals(schema2OrdersHandle.getSchemaTableName().getTableName(), ORDERS_TABLE_NAME);
+ }
+
+ @Test
+ public void testGetTableMetadataForDuplicateTableNames()
+ {
+ // Get table handles for orders tables in both schemas
+ ClpTableHandle schema1OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA1_NAME, ORDERS_TABLE_NAME));
+ ClpTableHandle schema2OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA2_NAME, ORDERS_TABLE_NAME));
+
+ // Get metadata for both tables
+ ConnectorTableMetadata schema1Metadata = metadata.getTableMetadata(SESSION, schema1OrdersHandle);
+ ConnectorTableMetadata schema2Metadata = metadata.getTableMetadata(SESSION, schema2OrdersHandle);
+
+ // Extract column names from both tables
+ ImmutableSet schema1Columns = schema1Metadata.getColumns().stream()
+ .map(ColumnMetadata::getName)
+ .collect(ImmutableSet.toImmutableSet());
+ ImmutableSet schema2Columns = schema2Metadata.getColumns().stream()
+ .map(ColumnMetadata::getName)
+ .collect(ImmutableSet.toImmutableSet());
+
+ // Verify schema1.orders has the expected columns (from test-orders-schema1.yaml)
+ ImmutableSet expectedSchema1Columns = ImmutableSet.of(
+ "order_id", "customer_id", "product_name", "quantity", "price");
+ assertEquals(schema1Columns, expectedSchema1Columns);
+
+ // Verify schema2.orders has the expected columns (from test-orders-schema2.yaml)
+ ImmutableSet expectedSchema2Columns = ImmutableSet.of(
+ "order_id", "vendor_id", "item_description", "total_amount", "is_paid", "shipping_address");
+ assertEquals(schema2Columns, expectedSchema2Columns);
+
+ // Verify that the two tables have different schemas (different columns)
+ assertEquals(schema1Columns.equals(schema2Columns), false);
+ }
+
+ @Test
+ public void testGetTableMetadataForAllSchemas()
+ {
+ // Test default.cockroachdb
+ ClpTableHandle defaultTableHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(DEFAULT_SCHEMA_NAME, TABLE_NAME));
+ ConnectorTableMetadata defaultMetadata = metadata.getTableMetadata(SESSION, defaultTableHandle);
+ assertEquals(defaultMetadata != null, true);
+ assertEquals(defaultMetadata.getTable().getSchemaName(), DEFAULT_SCHEMA_NAME);
+ assertEquals(defaultMetadata.getTable().getTableName(), TABLE_NAME);
+
+ // Test schema1.orders
+ ClpTableHandle schema1OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA1_NAME, ORDERS_TABLE_NAME));
+ ConnectorTableMetadata schema1OrdersMetadata = metadata.getTableMetadata(SESSION, schema1OrdersHandle);
+ assertEquals(schema1OrdersMetadata != null, true);
+ assertEquals(schema1OrdersMetadata.getTable().getSchemaName(), SCHEMA1_NAME);
+ assertEquals(schema1OrdersMetadata.getTable().getTableName(), ORDERS_TABLE_NAME);
+
+ // Test schema1.users
+ ClpTableHandle schema1UsersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA1_NAME, USERS_TABLE_NAME));
+ ConnectorTableMetadata schema1UsersMetadata = metadata.getTableMetadata(SESSION, schema1UsersHandle);
+ assertEquals(schema1UsersMetadata != null, true);
+ assertEquals(schema1UsersMetadata.getTable().getSchemaName(), SCHEMA1_NAME);
+ assertEquals(schema1UsersMetadata.getTable().getTableName(), USERS_TABLE_NAME);
+
+ // Test schema2.orders
+ ClpTableHandle schema2OrdersHandle = (ClpTableHandle) metadata.getTableHandle(SESSION, new SchemaTableName(SCHEMA2_NAME, ORDERS_TABLE_NAME));
+ ConnectorTableMetadata schema2OrdersMetadata = metadata.getTableMetadata(SESSION, schema2OrdersHandle);
+ assertEquals(schema2OrdersMetadata != null, true);
+ assertEquals(schema2OrdersMetadata.getTable().getSchemaName(), SCHEMA2_NAME);
+ assertEquals(schema2OrdersMetadata.getTable().getTableName(), ORDERS_TABLE_NAME);
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/metadata/TestClpYamlMetadataProvider.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/metadata/TestClpYamlMetadataProvider.java
new file mode 100644
index 0000000000000..7005f7dcce06a
--- /dev/null
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/metadata/TestClpYamlMetadataProvider.java
@@ -0,0 +1,345 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.metadata;
+
+import com.facebook.presto.plugin.clp.ClpConfig;
+import com.facebook.presto.plugin.clp.ClpTableHandle;
+import com.google.common.collect.ImmutableSet;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import static com.facebook.presto.plugin.clp.ClpConfig.MetadataProviderType.YAML;
+import static com.facebook.presto.plugin.clp.ClpMetadata.DEFAULT_SCHEMA_NAME;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+@Test(singleThreaded = true)
+public class TestClpYamlMetadataProvider
+{
+ private final List tempFiles = new ArrayList<>();
+
+ @AfterClass
+ public void cleanup()
+ {
+ // Clean up temporary files
+ for (File file : tempFiles) {
+ if (file.exists()) {
+ file.delete();
+ }
+ }
+ }
+
+ /**
+ * Test that listSchemaNames returns only the default schema when YAML has single schema
+ */
+ @Test
+ public void testListSchemaNamesSingleSchema() throws IOException
+ {
+ File metadataFile = createTempYamlFile(
+ "clp:\n" +
+ " default:\n" +
+ " table1: /path/to/table1.yaml\n" +
+ " table2: /path/to/table2.yaml\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 1);
+ assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME);
+ }
+
+ /**
+ * Test that listSchemaNames discovers multiple schemas from YAML
+ */
+ @Test
+ public void testListSchemaNamesMultipleSchemas() throws IOException
+ {
+ File metadataFile = createTempYamlFile(
+ "clp:\n" +
+ " default:\n" +
+ " logs: /path/to/default/logs.yaml\n" +
+ " dev:\n" +
+ " test_logs: /path/to/dev/logs.yaml\n" +
+ " staging:\n" +
+ " staging_logs: /path/to/staging/logs.yaml\n" +
+ " prod:\n" +
+ " production_logs: /path/to/prod/logs.yaml\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 4);
+ Set schemaSet = ImmutableSet.copyOf(schemas);
+ assertTrue(schemaSet.contains("default"));
+ assertTrue(schemaSet.contains("dev"));
+ assertTrue(schemaSet.contains("staging"));
+ assertTrue(schemaSet.contains("prod"));
+ }
+
+ /**
+ * Test that listSchemaNames handles missing YAML path gracefully
+ */
+ @Test
+ public void testListSchemaNamesNullPath()
+ {
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML);
+ // Note: not setting metadataYamlPath
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 1);
+ assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME);
+ }
+
+ /**
+ * Test that listSchemaNames handles nonexistent file gracefully
+ */
+ @Test
+ public void testListSchemaNamesNonexistentFile()
+ {
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath("/nonexistent/path/metadata.yaml");
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 1);
+ assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME);
+ }
+
+ /**
+ * Test that listSchemaNames handles malformed YAML gracefully
+ */
+ @Test
+ public void testListSchemaNamesMalformedYaml() throws IOException
+ {
+ File metadataFile = createTempYamlFile(
+ "this is not\n" +
+ " valid: yaml: content\n" +
+ " - with random structure\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 1);
+ assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME);
+ }
+
+ /**
+ * Test that listSchemaNames handles YAML without catalog field
+ */
+ @Test
+ public void testListSchemaNamesNoCatalogField() throws IOException
+ {
+ File metadataFile = createTempYamlFile(
+ "some_other_catalog:\n" +
+ " default:\n" +
+ " table1: /path/to/table1.yaml\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ // Should fall back to default schema on error
+ assertEquals(schemas.size(), 1);
+ assertEquals(schemas.get(0), DEFAULT_SCHEMA_NAME);
+ }
+
+ /**
+ * Test that listTableHandles returns correct tables for a schema
+ */
+ @Test
+ public void testListTableHandles() throws IOException
+ {
+ // Create schema YAML files
+ File table1Schema = createTempYamlFile("column1: 1\ncolumn2: 2\n");
+ File table2Schema = createTempYamlFile("field1: 3\nfield2: 4\n");
+
+ File metadataFile = createTempYamlFile(
+ "clp:\n" +
+ " default:\n" +
+ " table1: " + table1Schema.getAbsolutePath() + "\n" +
+ " table2: " + table2Schema.getAbsolutePath() + "\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List tables = provider.listTableHandles(DEFAULT_SCHEMA_NAME);
+
+ assertEquals(tables.size(), 2);
+ Set tableNames = ImmutableSet.of(
+ tables.get(0).getSchemaTableName().getTableName(),
+ tables.get(1).getSchemaTableName().getTableName());
+ assertTrue(tableNames.contains("table1"));
+ assertTrue(tableNames.contains("table2"));
+ }
+
+ /**
+ * Test that listTableHandles returns correct tables for multiple schemas
+ */
+ @Test
+ public void testListTableHandlesMultipleSchemas() throws IOException
+ {
+ File devTable = createTempYamlFile("col: 1\n");
+ File prodTable = createTempYamlFile("col: 2\n");
+
+ File metadataFile = createTempYamlFile(
+ "clp:\n" +
+ " dev:\n" +
+ " dev_logs: " + devTable.getAbsolutePath() + "\n" +
+ " prod:\n" +
+ " prod_logs: " + prodTable.getAbsolutePath() + "\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+
+ // Test dev schema
+ List devTables = provider.listTableHandles("dev");
+ assertEquals(devTables.size(), 1);
+ assertEquals(devTables.get(0).getSchemaTableName().getTableName(), "dev_logs");
+ assertEquals(devTables.get(0).getSchemaTableName().getSchemaName(), "dev");
+
+ // Test prod schema
+ List prodTables = provider.listTableHandles("prod");
+ assertEquals(prodTables.size(), 1);
+ assertEquals(prodTables.get(0).getSchemaTableName().getTableName(), "prod_logs");
+ assertEquals(prodTables.get(0).getSchemaTableName().getSchemaName(), "prod");
+ }
+
+ /**
+ * Test that schema names are returned in consistent order
+ */
+ @Test
+ public void testSchemaNameConsistency() throws IOException
+ {
+ File metadataFile = createTempYamlFile(
+ "clp:\n" +
+ " schema_a:\n" +
+ " table: /path/a.yaml\n" +
+ " schema_b:\n" +
+ " table: /path/b.yaml\n" +
+ " schema_c:\n" +
+ " table: /path/c.yaml\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+
+ // Call multiple times to verify consistency
+ List schemas1 = provider.listSchemaNames();
+ List schemas2 = provider.listSchemaNames();
+ List schemas3 = provider.listSchemaNames();
+
+ assertEquals(schemas1, schemas2);
+ assertEquals(schemas2, schemas3);
+ }
+
+ /**
+ * Test empty schema (no tables)
+ */
+ @Test
+ public void testEmptySchema() throws IOException
+ {
+ File metadataFile = createTempYamlFile(
+ "clp:\n" +
+ " empty_schema:\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertTrue(schemas.contains("empty_schema"));
+
+ List tables = provider.listTableHandles("empty_schema");
+ assertTrue(tables.isEmpty());
+ }
+
+ /**
+ * Test that schemas with special characters in names are handled
+ */
+ @Test
+ public void testSchemaWithSpecialCharacters() throws IOException
+ {
+ File metadataFile = createTempYamlFile(
+ "clp:\n" +
+ " schema_with_underscores:\n" +
+ " table: /path/table.yaml\n" +
+ " schema-with-dashes:\n" +
+ " table: /path/table2.yaml\n");
+
+ ClpConfig config = new ClpConfig()
+ .setMetadataProviderType(YAML)
+ .setMetadataYamlPath(metadataFile.getAbsolutePath());
+
+ ClpYamlMetadataProvider provider = new ClpYamlMetadataProvider(config);
+ List schemas = provider.listSchemaNames();
+
+ assertEquals(schemas.size(), 2);
+ Set schemaSet = ImmutableSet.copyOf(schemas);
+ assertTrue(schemaSet.contains("schema_with_underscores"));
+ assertTrue(schemaSet.contains("schema-with-dashes"));
+ }
+
+ /**
+ * Helper method to create temporary YAML files for testing
+ */
+ private File createTempYamlFile(String content) throws IOException
+ {
+ File tempFile = Files.createTempFile("clp-test-", ".yaml").toFile();
+ tempFile.deleteOnExit();
+ tempFiles.add(tempFile);
+
+ try (FileWriter writer = new FileWriter(tempFile)) {
+ writer.write(content);
+ }
+
+ return tempFile;
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/optimization/TestClpFilterToKqlConverter.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/optimization/TestClpFilterToKqlConverter.java
new file mode 100644
index 0000000000000..6eff28148ca29
--- /dev/null
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/optimization/TestClpFilterToKqlConverter.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.optimization;
+
+import com.facebook.presto.common.function.OperatorType;
+import com.facebook.presto.plugin.clp.ClpColumnHandle;
+import com.facebook.presto.plugin.clp.TestClpQueryBase;
+import com.facebook.presto.spi.ColumnHandle;
+import com.facebook.presto.spi.relation.CallExpression;
+import com.facebook.presto.spi.relation.ConstantExpression;
+import com.facebook.presto.spi.relation.VariableReferenceExpression;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import io.airlift.slice.Slices;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
+import static com.facebook.presto.common.type.IntegerType.INTEGER;
+import static com.facebook.presto.common.type.VarcharType.VARCHAR;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+/**
+ * Basic tests for ClpFilterToKqlConverter focusing on metadata SQL generation
+ * with string and numeric literals.
+ */
+@Test(singleThreaded = true)
+public class TestClpFilterToKqlConverter
+ extends TestClpQueryBase
+{
+ private ClpFilterToKqlConverter converter;
+ private Map assignments;
+ private Set metadataFilterColumns;
+
+ @BeforeMethod
+ public void setUp()
+ {
+ assignments = new HashMap<>();
+ metadataFilterColumns = ImmutableSet.of("hostname", "status_code");
+ converter = new ClpFilterToKqlConverter(
+ standardFunctionResolution,
+ functionAndTypeManager,
+ assignments,
+ metadataFilterColumns);
+ }
+
+ /**
+ * Test string literal equality with metadata SQL generation.
+ * This is the main fix - ensuring string literals are handled correctly.
+ */
+ @Test
+ public void testStringLiteralWithMetadataSql()
+ {
+ // Setup
+ VariableReferenceExpression hostnameVar = new VariableReferenceExpression(
+ Optional.empty(), "hostname", VARCHAR);
+ ClpColumnHandle hostnameColumn = new ClpColumnHandle("hostname", "hostname", VARCHAR);
+ assignments.put(hostnameVar, hostnameColumn);
+
+ // Test: hostname = 'abc'
+ ConstantExpression stringLiteral = new ConstantExpression(Slices.utf8Slice("abc"), VARCHAR);
+ CallExpression equalCall = new CallExpression(
+ Optional.empty(),
+ "equal",
+ standardFunctionResolution.comparisonFunction(OperatorType.EQUAL, VARCHAR, VARCHAR),
+ BOOLEAN,
+ ImmutableList.of(hostnameVar, stringLiteral));
+
+ ClpExpression result = equalCall.accept(converter, null);
+
+ // Verify
+ assertTrue(result.getPushDownExpression().isPresent());
+ assertEquals(result.getPushDownExpression().get(), "hostname: \"abc\"");
+ assertTrue(result.getMetadataSqlQuery().isPresent());
+ assertEquals(result.getMetadataSqlQuery().get(), "\"hostname\" = 'abc'");
+ }
+
+ /**
+ * Test numeric literal equality with metadata SQL generation.
+ */
+ @Test
+ public void testNumericLiteralWithMetadataSql()
+ {
+ // Setup
+ VariableReferenceExpression statusCodeVar = new VariableReferenceExpression(
+ Optional.empty(), "status_code", INTEGER);
+ ClpColumnHandle statusCodeColumn = new ClpColumnHandle("status_code", "status_code", INTEGER);
+ assignments.put(statusCodeVar, statusCodeColumn);
+
+ // Test: status_code = 200
+ ConstantExpression numericLiteral = new ConstantExpression(200L, INTEGER);
+ CallExpression equalCall = new CallExpression(
+ Optional.empty(),
+ "equal",
+ standardFunctionResolution.comparisonFunction(OperatorType.EQUAL, INTEGER, INTEGER),
+ BOOLEAN,
+ ImmutableList.of(statusCodeVar, numericLiteral));
+
+ ClpExpression result = equalCall.accept(converter, null);
+
+ // Verify
+ assertTrue(result.getPushDownExpression().isPresent());
+ assertEquals(result.getPushDownExpression().get(), "status_code: 200");
+ assertTrue(result.getMetadataSqlQuery().isPresent());
+ assertEquals(result.getMetadataSqlQuery().get(), "\"status_code\" = 200");
+ }
+
+ /**
+ * Test escaping special characters in KQL string values.
+ */
+ @Test
+ public void testEscapeKqlSpecialChars()
+ {
+ assertEquals(
+ ClpFilterToKqlConverter.escapeKqlSpecialCharsForStringValue("path\\to\\file"),
+ "path\\\\to\\\\file");
+ assertEquals(
+ ClpFilterToKqlConverter.escapeKqlSpecialCharsForStringValue("file*.txt"),
+ "file\\*.txt");
+ assertEquals(
+ ClpFilterToKqlConverter.escapeKqlSpecialCharsForStringValue("normal_string"),
+ "normal_string");
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/TestClpUberPinotSplitProvider.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/TestClpUberPinotSplitProvider.java
new file mode 100644
index 0000000000000..4646c8d5f0f6b
--- /dev/null
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/TestClpUberPinotSplitProvider.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.split;
+
+import com.facebook.presto.plugin.clp.ClpConfig;
+import com.facebook.presto.plugin.clp.ClpTableHandle;
+import com.facebook.presto.spi.SchemaTableName;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.lang.reflect.Method;
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+/**
+ * Unit tests for ClpUberPinotSplitProvider.
+ * Tests Uber-specific customizations including Neutrino endpoint URL construction
+ * and RTA table name prefixing.
+ */
+@Test(singleThreaded = true)
+public class TestClpUberPinotSplitProvider
+{
+ private ClpUberPinotSplitProvider splitProvider;
+ private ClpConfig config;
+
+ @BeforeMethod
+ public void setUp()
+ {
+ config = new ClpConfig();
+ config.setMetadataDbUrl("https://neutrino.uber.com");
+ config.setSplitProviderType(ClpConfig.SplitProviderType.PINOT_UBER);
+ splitProvider = new ClpUberPinotSplitProvider(config);
+ }
+
+ /**
+ * Test that the Neutrino endpoint URL is correctly constructed.
+ */
+ @Test
+ public void testBuildPinotSqlQueryEndpointUrl() throws Exception
+ {
+ // Use reflection to access the protected method
+ Method method = ClpUberPinotSplitProvider.class.getDeclaredMethod("buildPinotSqlQueryEndpointUrl", ClpConfig.class);
+ method.setAccessible(true);
+
+ URL result = (URL) method.invoke(splitProvider, config);
+
+ assertNotNull(result);
+ assertEquals(result.toString(), "https://neutrino.uber.com/v1/globalStatements");
+ assertEquals(result.getProtocol(), "https");
+ assertEquals(result.getHost(), "neutrino.uber.com");
+ assertEquals(result.getPath(), "/v1/globalStatements");
+ }
+
+ /**
+ * Test URL construction with different base URLs.
+ */
+ @Test
+ public void testBuildPinotSqlQueryEndpointUrlVariations() throws Exception
+ {
+ Method method = ClpUberPinotSplitProvider.class.getDeclaredMethod("buildPinotSqlQueryEndpointUrl", ClpConfig.class);
+ method.setAccessible(true);
+
+ // Test with trailing slash
+ config.setMetadataDbUrl("https://neutrino.uber.com/");
+ URL result = (URL) method.invoke(splitProvider, config);
+ assertEquals(result.toString(), "https://neutrino.uber.com//v1/globalStatements");
+
+ // Test without protocol (should work as URL constructor handles it)
+ config.setMetadataDbUrl("http://neutrino-dev.uber.com");
+ result = (URL) method.invoke(splitProvider, config);
+ assertEquals(result.toString(), "http://neutrino-dev.uber.com/v1/globalStatements");
+
+ // Test with port
+ config.setMetadataDbUrl("https://neutrino.uber.com:8080");
+ result = (URL) method.invoke(splitProvider, config);
+ assertEquals(result.toString(), "https://neutrino.uber.com:8080/v1/globalStatements");
+ }
+
+ /**
+ * Test that invalid URLs throw MalformedURLException.
+ */
+ @Test
+ public void testBuildPinotSqlQueryEndpointUrlInvalid() throws Exception
+ {
+ Method method = ClpUberPinotSplitProvider.class.getDeclaredMethod("buildPinotSqlQueryEndpointUrl", ClpConfig.class);
+ method.setAccessible(true);
+
+ config.setMetadataDbUrl("not a valid url");
+ try {
+ method.invoke(splitProvider, config);
+ fail("Expected MalformedURLException");
+ }
+ catch (Exception e) {
+ assertTrue(e.getCause() instanceof MalformedURLException);
+ }
+ }
+
+ /**
+ * Test that table names are correctly prefixed with "rta.logging."
+ */
+ @Test
+ public void testInferMetadataTableName()
+ {
+ SchemaTableName schemaTableName = new SchemaTableName("default", "logs");
+ ClpTableHandle tableHandle = new ClpTableHandle(schemaTableName, "test");
+
+ String result = splitProvider.inferMetadataTableName(tableHandle);
+
+ assertEquals(result, "rta.logging.logs");
+ }
+
+ /**
+ * Test table name inference with different schemas.
+ * Verifies that schema name doesn't affect the output (flat namespace).
+ */
+ @Test
+ public void testInferMetadataTableNameDifferentSchemas()
+ {
+ // Test with default schema
+ SchemaTableName schemaTableName1 = new SchemaTableName("default", "events");
+ ClpTableHandle tableHandle1 = new ClpTableHandle(schemaTableName1, "test");
+ assertEquals(splitProvider.inferMetadataTableName(tableHandle1), "rta.logging.events");
+
+ // Test with production schema - should produce same result
+ SchemaTableName schemaTableName2 = new SchemaTableName("production", "events");
+ ClpTableHandle tableHandle2 = new ClpTableHandle(schemaTableName2, "test");
+ assertEquals(splitProvider.inferMetadataTableName(tableHandle2), "rta.logging.events");
+
+ // Test with staging schema
+ SchemaTableName schemaTableName3 = new SchemaTableName("staging", "metrics");
+ ClpTableHandle tableHandle3 = new ClpTableHandle(schemaTableName3, "test");
+ assertEquals(splitProvider.inferMetadataTableName(tableHandle3), "rta.logging.metrics");
+ }
+
+ /**
+ * Test table name inference with special characters.
+ */
+ @Test
+ public void testInferMetadataTableNameSpecialCharacters()
+ {
+ // Test with underscore
+ SchemaTableName schemaTableName1 = new SchemaTableName("default", "user_logs");
+ ClpTableHandle tableHandle1 = new ClpTableHandle(schemaTableName1, "test");
+ assertEquals(splitProvider.inferMetadataTableName(tableHandle1), "rta.logging.user_logs");
+
+ // Test with hyphen
+ SchemaTableName schemaTableName2 = new SchemaTableName("default", "app-logs");
+ ClpTableHandle tableHandle2 = new ClpTableHandle(schemaTableName2, "test");
+ assertEquals(splitProvider.inferMetadataTableName(tableHandle2), "rta.logging.app-logs");
+
+ // Test with numbers
+ SchemaTableName schemaTableName3 = new SchemaTableName("default", "logs2024");
+ ClpTableHandle tableHandle3 = new ClpTableHandle(schemaTableName3, "test");
+ assertEquals(splitProvider.inferMetadataTableName(tableHandle3), "rta.logging.logs2024");
+ }
+
+ /**
+ * Test that null table handle throws NullPointerException.
+ */
+ @Test(expectedExceptions = NullPointerException.class,
+ expectedExceptionsMessageRegExp = "tableHandle is null")
+ public void testInferMetadataTableNameNull()
+ {
+ splitProvider.inferMetadataTableName(null);
+ }
+
+ /**
+ * Test the factory method for building Uber table names.
+ */
+ @Test
+ public void testBuildUberTableName()
+ {
+ assertEquals(splitProvider.buildUberTableName("logs"), "rta.logging.logs");
+ assertEquals(splitProvider.buildUberTableName("events"), "rta.logging.events");
+ assertEquals(splitProvider.buildUberTableName("metrics"), "rta.logging.metrics");
+ assertEquals(splitProvider.buildUberTableName("user_activity"), "rta.logging.user_activity");
+ assertEquals(splitProvider.buildUberTableName("app-logs"), "rta.logging.app-logs");
+ }
+
+ /**
+ * Test that the split provider is correctly instantiated with configuration.
+ */
+ @Test
+ public void testConstructor()
+ {
+ assertNotNull(splitProvider);
+
+ // Verify it's an instance of the parent class
+ assertTrue(splitProvider instanceof ClpPinotSplitProvider);
+ assertTrue(splitProvider instanceof ClpSplitProvider);
+ }
+
+ /**
+ * Test SQL query building methods inherited from parent.
+ */
+ @Test
+ public void testInheritedSqlQueryMethods()
+ {
+ // Test buildSplitSelectionQuery (inherited from parent)
+ String query = splitProvider.buildSplitSelectionQuery("rta.logging.logs", "status = 200");
+ assertTrue(query.contains("rta.logging.logs"));
+ assertTrue(query.contains("status = 200"));
+ assertTrue(query.contains("SELECT"));
+ assertTrue(query.contains("tpath"));
+
+ // Test buildSplitMetadataQuery (inherited from parent)
+ String metaQuery = splitProvider.buildSplitMetadataQuery("rta.logging.events", "timestamp > 1000", "timestamp", "DESC");
+ assertTrue(metaQuery.contains("rta.logging.events"));
+ assertTrue(metaQuery.contains("timestamp > 1000"));
+ assertTrue(metaQuery.contains("ORDER BY timestamp DESC"));
+ assertTrue(metaQuery.contains("creationtime"));
+ assertTrue(metaQuery.contains("lastmodifiedtime"));
+ assertTrue(metaQuery.contains("num_messages"));
+ }
+
+ /**
+ * Test configuration with different split provider types.
+ */
+ @Test
+ public void testConfigurationTypes()
+ {
+ // Test that the configuration is set correctly
+ assertEquals(config.getSplitProviderType(), ClpConfig.SplitProviderType.PINOT_UBER);
+
+ // Create a new instance with different config to ensure isolation
+ ClpConfig newConfig = new ClpConfig();
+ newConfig.setMetadataDbUrl("https://other-neutrino.uber.com");
+ newConfig.setSplitProviderType(ClpConfig.SplitProviderType.PINOT_UBER);
+
+ ClpUberPinotSplitProvider newProvider = new ClpUberPinotSplitProvider(newConfig);
+ assertNotNull(newProvider);
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpPinotSplitFilterProvider.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpPinotSplitFilterProvider.java
new file mode 100644
index 0000000000000..33443b566cb37
--- /dev/null
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpPinotSplitFilterProvider.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.split.filter;
+
+import com.facebook.presto.plugin.clp.ClpConfig;
+import com.google.common.collect.ImmutableList;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Paths;
+import java.util.List;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+@Test(singleThreaded = true)
+public class TestClpPinotSplitFilterProvider
+{
+ private String filterConfigPath;
+ private ClpPinotSplitFilterProvider filterProvider;
+
+ @BeforeMethod
+ public void setUp() throws IOException, URISyntaxException
+ {
+ URL resource = getClass().getClassLoader().getResource("test-pinot-split-filter.json");
+ if (resource == null) {
+ throw new FileNotFoundException("test-pinot-split-filter.json not found in resources");
+ }
+
+ filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString();
+ ClpConfig config = new ClpConfig();
+ config.setSplitFilterConfig(filterConfigPath);
+ filterProvider = new ClpPinotSplitFilterProvider(config);
+ }
+
+ /**
+ * Test that Pinot provider correctly inherits MySQL range mapping functionality.
+ * Verifies that range comparisons are transformed according to the configuration.
+ */
+ @Test
+ public void testRangeMappingInheritance()
+ {
+ // Test greater than or equal
+ String sql1 = "\"msg.timestamp\" >= 1234";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "end_timestamp >= 1234");
+
+ // Test less than or equal
+ String sql2 = "\"msg.timestamp\" <= 5678";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "begin_timestamp <= 5678");
+
+ // Test equality (transforms to range check)
+ String sql3 = "\"msg.timestamp\" = 4567";
+ String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3);
+ assertEquals(result3, "(begin_timestamp <= 4567 AND end_timestamp >= 4567)");
+ }
+
+ /**
+ * Test that expressions without range mappings pass through unchanged.
+ */
+ @Test
+ public void testNonRangeMappedColumns()
+ {
+ // Test that non-mapped columns are not transformed
+ String sql1 = "\"status_code\" = 200";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "\"status_code\" = 200");
+
+ String sql2 = "\"hostname\" = 'server1'";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "\"hostname\" = 'server1'");
+ }
+
+ /**
+ * Test complex expressions with multiple predicates.
+ */
+ @Test
+ public void testComplexExpressions()
+ {
+ // Test AND condition with range mapping
+ String sql1 = "(\"msg.timestamp\" >= 1000 AND \"msg.timestamp\" <= 2000)";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "(end_timestamp >= 1000 AND begin_timestamp <= 2000)");
+
+ // Test mixed conditions
+ String sql2 = "(\"msg.timestamp\" = 1500 AND \"status_code\" = 200)";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "((begin_timestamp <= 1500 AND end_timestamp >= 1500) AND \"status_code\" = 200)");
+ }
+
+ /**
+ * Test that remapColumnName correctly returns mapped column names.
+ */
+ @Test
+ public void testRemapColumnName()
+ {
+ // Test range-mapped column
+ List mappedColumns = filterProvider.remapColumnName("clp.default.table_1", "msg.timestamp");
+ assertEquals(mappedColumns, ImmutableList.of("begin_timestamp", "end_timestamp"));
+
+ // Test non-mapped column
+ List unmappedColumns = filterProvider.remapColumnName("clp.default.table_1", "status_code");
+ assertEquals(unmappedColumns, ImmutableList.of("status_code"));
+ }
+
+ /**
+ * Test table-level configuration override.
+ */
+ @Test
+ public void testTableLevelOverride()
+ {
+ // Test table_2 specific mapping
+ String sql = "\"table2_column\" >= 100";
+ String result = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_2", sql);
+ assertEquals(result, "table2_upper >= 100");
+ }
+
+ /**
+ * Test schema-level configuration.
+ */
+ @Test
+ public void testSchemaLevelMapping()
+ {
+ // Test schema-level mapping applies to tables
+ String sql = "\"schema_column\" <= 500";
+ String result = filterProvider.remapSplitFilterPushDownExpression("clp.schema1.any_table", sql);
+ assertEquals(result, "schema_lower <= 500");
+ }
+
+ /**
+ * Test that configuration is correctly loaded.
+ */
+ @Test
+ public void testConfigurationLoaded()
+ {
+ // Simply verify that the provider was instantiated correctly with the config
+ assertTrue(filterConfigPath.endsWith("test-pinot-split-filter.json"));
+ }
+}
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java
index 7a4058f617d0c..c6fe1cee8bec2 100644
--- a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpSplitFilterConfigCommon.java
@@ -57,12 +57,8 @@ public void checkRequiredFilters()
config.setSplitFilterConfig(filterConfigPath);
ClpMySqlSplitFilterProvider filterProvider = new ClpMySqlSplitFilterProvider(config);
Set testTableScopeSet = ImmutableSet.of(format("%s.%s", CONNECTOR_NAME, new SchemaTableName("default", "table_1")));
- assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters(
- testTableScopeSet,
- "(\"level\" >= 1 AND \"level\" <= 3)"));
- filterProvider.checkContainsRequiredFilters(
- testTableScopeSet,
- "(\"msg.timestamp\" > 1234 AND \"msg.timestamp\" < 5678)");
+ assertThrows(PrestoException.class, () -> filterProvider.checkContainsRequiredFilters(testTableScopeSet, ImmutableSet.of("level")));
+ filterProvider.checkContainsRequiredFilters(testTableScopeSet, ImmutableSet.of("msg.timestamp"));
}
@Test
diff --git a/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpUberPinotSplitFilterProvider.java b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpUberPinotSplitFilterProvider.java
new file mode 100644
index 0000000000000..edbd3a4cbc774
--- /dev/null
+++ b/presto-clp/src/test/java/com/facebook/presto/plugin/clp/split/filter/TestClpUberPinotSplitFilterProvider.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.plugin.clp.split.filter;
+
+import com.facebook.presto.plugin.clp.ClpConfig;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Paths;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertTrue;
+
+/**
+ * Unit tests for ClpUberPinotSplitFilterProvider.
+ * Tests Uber-specific TEXT_MATCH transformations in addition to inherited
+ * range mapping functionality.
+ */
+@Test(singleThreaded = true)
+public class TestClpUberPinotSplitFilterProvider
+{
+ private String filterConfigPath;
+ private ClpUberPinotSplitFilterProvider filterProvider;
+
+ @BeforeMethod
+ public void setUp() throws IOException, URISyntaxException
+ {
+ URL resource = getClass().getClassLoader().getResource("test-pinot-split-filter.json");
+ if (resource == null) {
+ throw new FileNotFoundException("test-pinot-split-filter.json not found in resources");
+ }
+
+ filterConfigPath = Paths.get(resource.toURI()).toAbsolutePath().toString();
+ ClpConfig config = new ClpConfig();
+ config.setSplitFilterConfig(filterConfigPath);
+ filterProvider = new ClpUberPinotSplitFilterProvider(config);
+ }
+
+ /**
+ * Test TEXT_MATCH transformation for simple equality predicates.
+ * Verifies that Uber-specific TEXT_MATCH transformations are applied.
+ */
+ @Test
+ public void testTextMatchTransformationSimpleEquality()
+ {
+ // Test single equality predicate with integer
+ String sql1 = "\"status_code\" = 200";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')");
+
+ // Test single equality predicate with negative integer
+ String sql2 = "\"level\" = -1";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "TEXT_MATCH(\"__mergedTextIndex\", '/-1:level/')");
+
+ // Test single equality predicate with decimal
+ String sql3 = "\"score\" = 3.14";
+ String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3);
+ assertEquals(result3, "TEXT_MATCH(\"__mergedTextIndex\", '/3.14:score/')");
+
+ // Test single equality predicate with scientific notation
+ String sql4 = "\"value\" = 1.5e10";
+ String result4 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql4);
+ assertEquals(result4, "TEXT_MATCH(\"__mergedTextIndex\", '/1.5e10:value/')");
+ }
+
+ /**
+ * Test TEXT_MATCH transformation for string literal equality predicates.
+ */
+ @Test
+ public void testTextMatchTransformationStringLiterals()
+ {
+ // Test single equality predicate with string literal
+ String sql1 = "\"hostname\" = 'uber-server1'";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "TEXT_MATCH(\"__mergedTextIndex\", '/uber-server1:hostname/')");
+
+ // Test string literal with special characters
+ String sql2 = "\"service\" = 'uber.logging.service'";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "TEXT_MATCH(\"__mergedTextIndex\", '/uber.logging.service:service/')");
+
+ // Test empty string literal
+ String sql3 = "\"tag\" = ''";
+ String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3);
+ assertEquals(result3, "TEXT_MATCH(\"__mergedTextIndex\", '/:tag/')");
+
+ // Test string literal with spaces
+ String sql4 = "\"message\" = 'Hello Uber World'";
+ String result4 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql4);
+ assertEquals(result4, "TEXT_MATCH(\"__mergedTextIndex\", '/Hello Uber World:message/')");
+ }
+
+ /**
+ * Test that range mappings are inherited and work correctly.
+ * Columns with range mappings should NOT be transformed to TEXT_MATCH.
+ */
+ @Test
+ public void testRangeMappingInheritance()
+ {
+ // Test that range-mapped columns don't get TEXT_MATCH transformation
+ // msg.timestamp has range mapping in test config
+ String sql1 = "\"msg.timestamp\" = 1234";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "(begin_timestamp <= 1234 AND end_timestamp >= 1234)");
+
+ // Test greater than or equal (range mapping)
+ String sql2 = "\"msg.timestamp\" >= 5000";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "end_timestamp >= 5000");
+
+ // Test less than or equal (range mapping)
+ String sql3 = "\"msg.timestamp\" <= 10000";
+ String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3);
+ assertEquals(result3, "begin_timestamp <= 10000");
+ }
+
+ /**
+ * Test complex expressions with both TEXT_MATCH and range mappings.
+ */
+ @Test
+ public void testMixedTransformations()
+ {
+ // Mix of range mapping and TEXT_MATCH
+ String sql1 = "(\"msg.timestamp\" >= 1000 AND \"status_code\" = 200)";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "(end_timestamp >= 1000 AND TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/'))");
+
+ // Multiple TEXT_MATCH transformations
+ String sql2 = "(\"hostname\" = 'uber1' AND \"service\" = 'logging')";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "(TEXT_MATCH(\"__mergedTextIndex\", '/uber1:hostname/') AND TEXT_MATCH(\"__mergedTextIndex\", '/logging:service/'))");
+
+ // Complex nested expression
+ String sql3 = "((\"msg.timestamp\" <= 2000 AND \"hostname\" = 'uber2') OR \"status_code\" = 404)";
+ String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3);
+ assertEquals(result3, "((begin_timestamp <= 2000 AND TEXT_MATCH(\"__mergedTextIndex\", '/uber2:hostname/')) OR TEXT_MATCH(\"__mergedTextIndex\", '/404:status_code/'))");
+ }
+
+ /**
+ * Test transformations at different scope levels.
+ */
+ @Test
+ public void testDifferentScopes()
+ {
+ // Table-level scope
+ String sql1 = "\"status_code\" = 200";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')");
+
+ // Schema-level scope
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default", sql1);
+ assertEquals(result2, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')");
+
+ // Catalog-level scope
+ String result3 = filterProvider.remapSplitFilterPushDownExpression("clp", sql1);
+ assertEquals(result3, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')");
+ }
+
+ /**
+ * Test that the filter provider is correctly instantiated.
+ */
+ @Test
+ public void testConstructor()
+ {
+ assertNotNull(filterProvider);
+
+ // Verify it's an instance of the parent classes
+ assertTrue(filterProvider instanceof ClpPinotSplitFilterProvider);
+ assertTrue(filterProvider instanceof ClpMySqlSplitFilterProvider);
+ assertTrue(filterProvider instanceof ClpSplitFilterProvider);
+ }
+
+ /**
+ * Test configuration is loaded correctly.
+ */
+ @Test
+ public void testConfigurationLoaded()
+ {
+ // Simply verify that the provider was instantiated correctly with the config
+ assertTrue(filterConfigPath.endsWith("test-pinot-split-filter.json"));
+ assertNotNull(filterProvider);
+ }
+
+ /**
+ * Test that non-equality expressions are not transformed to TEXT_MATCH.
+ */
+ @Test
+ public void testNonEqualityNotTransformed()
+ {
+ // Greater than should not be transformed
+ String sql1 = "\"status_code\" > 200";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "\"status_code\" > 200");
+
+ // Less than should not be transformed
+ String sql2 = "\"level\" < 5";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "\"level\" < 5");
+
+ // Not equal should not be transformed
+ String sql3 = "\"hostname\" != 'server1'";
+ String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3);
+ assertEquals(result3, "\"hostname\" != 'server1'");
+ }
+
+ /**
+ * Test edge cases and special patterns.
+ */
+ @Test
+ public void testEdgeCases()
+ {
+ // Test expression with no transformable parts
+ String sql1 = "1 = 1";
+ String result1 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql1);
+ assertEquals(result1, "1 = 1");
+
+ // Test column names with special characters (should still work if quoted properly)
+ String sql2 = "\"column.with.dots\" = 'value'";
+ String result2 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql2);
+ assertEquals(result2, "TEXT_MATCH(\"__mergedTextIndex\", '/value:column.with.dots/')");
+
+ // Test multiple spaces in expression
+ String sql3 = "\"status_code\" = 200";
+ String result3 = filterProvider.remapSplitFilterPushDownExpression("clp.default.table_1", sql3);
+ assertEquals(result3, "TEXT_MATCH(\"__mergedTextIndex\", '/200:status_code/')");
+ }
+}
diff --git a/presto-clp/src/test/resources/test-cockroachdb-schema.yaml b/presto-clp/src/test/resources/test-cockroachdb-schema.yaml
new file mode 100644
index 0000000000000..d9fa4c99b3e65
--- /dev/null
+++ b/presto-clp/src/test/resources/test-cockroachdb-schema.yaml
@@ -0,0 +1,18 @@
+# Test schema for cockroachdb table
+# Type mappings:
+# 0 = Integer (BIGINT)
+# 1 = Float (DOUBLE)
+# 3 = VarString (VARCHAR)
+# 4 = Boolean (BOOLEAN)
+# 6 = UnstructuredArray (ARRAY)
+
+a_bigint: 0
+a_varchar: 3
+b_double: 1
+b_varchar: 3
+c:
+ d: 4
+ e: 3
+f:
+ g:
+ h: 6
diff --git a/presto-clp/src/test/resources/test-orders-schema1.yaml b/presto-clp/src/test/resources/test-orders-schema1.yaml
new file mode 100644
index 0000000000000..a9f773b5bdd97
--- /dev/null
+++ b/presto-clp/src/test/resources/test-orders-schema1.yaml
@@ -0,0 +1,11 @@
+# Test schema for orders table in schema1
+# Type mappings:
+# 0 = Integer (BIGINT)
+# 1 = Float (DOUBLE)
+# 3 = VarString (VARCHAR)
+
+order_id: 0
+customer_id: 0
+product_name: 3
+quantity: 0
+price: 1
diff --git a/presto-clp/src/test/resources/test-orders-schema2.yaml b/presto-clp/src/test/resources/test-orders-schema2.yaml
new file mode 100644
index 0000000000000..71a1fbd1d8724
--- /dev/null
+++ b/presto-clp/src/test/resources/test-orders-schema2.yaml
@@ -0,0 +1,13 @@
+# Test schema for orders table in schema2 (different structure from schema1)
+# Type mappings:
+# 0 = Integer (BIGINT)
+# 1 = Float (DOUBLE)
+# 3 = VarString (VARCHAR)
+# 4 = Boolean (BOOLEAN)
+
+order_id: 0
+vendor_id: 0
+item_description: 3
+total_amount: 1
+is_paid: 4
+shipping_address: 3
diff --git a/presto-clp/src/test/resources/test-pinot-split-filter.json b/presto-clp/src/test/resources/test-pinot-split-filter.json
new file mode 100644
index 0000000000000..e0edd0814d03b
--- /dev/null
+++ b/presto-clp/src/test/resources/test-pinot-split-filter.json
@@ -0,0 +1,58 @@
+{
+ "clp": [
+ {
+ "columnName": "level"
+ }
+ ],
+ "clp.default": [
+ {
+ "columnName": "author"
+ }
+ ],
+ "clp.default.table_1": [
+ {
+ "columnName": "msg.timestamp",
+ "customOptions": {
+ "rangeMapping": {
+ "lowerBound": "begin_timestamp",
+ "upperBound": "end_timestamp"
+ }
+ },
+ "required": true
+ },
+ {
+ "columnName": "file_name"
+ },
+ {
+ "columnName": "status_code"
+ }
+ ],
+ "clp.default.table_2": [
+ {
+ "columnName": "table2_column",
+ "customOptions": {
+ "rangeMapping": {
+ "lowerBound": "table2_lower",
+ "upperBound": "table2_upper"
+ }
+ }
+ },
+ {
+ "columnName": "request_id"
+ },
+ {
+ "columnName": "user_id"
+ }
+ ],
+ "clp.schema1": [
+ {
+ "columnName": "schema_column",
+ "customOptions": {
+ "rangeMapping": {
+ "lowerBound": "schema_lower",
+ "upperBound": "schema_upper"
+ }
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/presto-clp/src/test/resources/test-tables-schema.yaml b/presto-clp/src/test/resources/test-tables-schema.yaml
new file mode 100644
index 0000000000000..90a58ea32aaf4
--- /dev/null
+++ b/presto-clp/src/test/resources/test-tables-schema.yaml
@@ -0,0 +1,12 @@
+# Test metadata file for ClpYamlMetadataProvider
+# Maps tables to their schema definition files
+# Tests multiple schemas with duplicate table names (orders appears in both schema1 and schema2)
+# Relative paths are resolved relative to this file's directory at runtime
+clp:
+ default:
+ cockroachdb: test-cockroachdb-schema.yaml
+ schema1:
+ orders: test-orders-schema1.yaml
+ users: test-users-schema1.yaml
+ schema2:
+ orders: test-orders-schema2.yaml
diff --git a/presto-clp/src/test/resources/test-topn-split-filter.json b/presto-clp/src/test/resources/test-topn-split-filter.json
new file mode 100644
index 0000000000000..53450716cb7b4
--- /dev/null
+++ b/presto-clp/src/test/resources/test-topn-split-filter.json
@@ -0,0 +1,14 @@
+{
+ "clp.default.test": [
+ {
+ "columnName": "msg.timestamp",
+ "customOptions": {
+ "rangeMapping": {
+ "lowerBound": "begin_timestamp",
+ "upperBound": "end_timestamp"
+ }
+ },
+ "required": true
+ }
+ ]
+}
diff --git a/presto-clp/src/test/resources/test-users-schema1.yaml b/presto-clp/src/test/resources/test-users-schema1.yaml
new file mode 100644
index 0000000000000..5e603f32d53aa
--- /dev/null
+++ b/presto-clp/src/test/resources/test-users-schema1.yaml
@@ -0,0 +1,8 @@
+# Test schema for users table in schema1
+# Type mappings:
+# 0 = Integer (BIGINT)
+# 3 = VarString (VARCHAR)
+
+user_id: 0
+username: 3
+email: 3
diff --git a/presto-native-execution/pom.xml b/presto-native-execution/pom.xml
index 200ffa6834afc..adbd01ab4b917 100644
--- a/presto-native-execution/pom.xml
+++ b/presto-native-execution/pom.xml
@@ -267,6 +267,16 @@
+
+
+
+ org.yaml
+ snakeyaml
+ 2.1
+
+
+
+