Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
3e3cf83
ci: Use dynamic branch-based tags and prefix matching for snapshot bu…
jackluo923 Oct 28, 2025
48d43c8
WIP feat: Add YAML metadata provider and Pinot split provider
jackluo923 Sep 19, 2025
217b91d
fix: Normalize timestamp literals to nanoseconds in filter pushdown
jackluo923 Oct 3, 2025
a659d45
WIP feat: Add TopN optimizer for timestamp-based queries
wraymo Sep 22, 2025
5eb1746
WIP feat: Add TopN optimization for Pinot split provider
jackluo923 Oct 27, 2025
5a861d1
feat: Upgrade velox submodule to y-scope/velox@52bb2f (#85)
wraymo Oct 28, 2025
c8aad93
fix: Rewrite CLP UDFs across full plan tree (#86)
wraymo Oct 31, 2025
e3082e5
refactor: Make Pinot query endpoint URL construction overridable (#88)
jackluo923 Nov 5, 2025
a3deb6d
Refactor: Auto-correct code style violations for consistency and read…
jackluo923 Nov 7, 2025
da2787e
feat: Add partial dynamic multi-schema support for CLP connector
jackluo923 Nov 7, 2025
3105485
fix: Add metadata SQL generation for string literals in ClpFilterToKq…
jackluo923 Nov 7, 2025
519a803
feat: Add Pinot split filter providers with TEXT_MATCH support (#101)
jackluo923 Nov 10, 2025
dd4dda1
refactor: Improve ClpYamlMetadataProvider cache organization and fix …
jackluo923 Nov 12, 2025
c4e719a
fix(flakytest): TestDistributedSpilledQueries testQueriesWithSpill (#…
nishithakbhaskaran Oct 9, 2025
221dfe5
[native] Disable TestPrestoNativeAsyncDataCacheCleanupAPI
amitkdutta Jun 24, 2025
d9b5238
Support JVM tzdata 2025a
jackluo923 Nov 27, 2025
5fbe4f4
ci: restructure workflows to use unified builder image on self-hosted…
jackluo923 Nov 28, 2025
8c8abef
test: temporarily disable TestPrestoNativeClpGeneralQueries.test
jackluo923 Nov 28, 2025
4ae9eb7
docs(ci): improve ci.yml header comments with design rationale
jackluo923 Nov 29, 2025
fefa99e
ci: centralize config computation in ci.yml
jackluo923 Nov 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions .github/bin/download_nodejs
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
#!/usr/bin/env bash
# Download Node.js and Yarn to Maven cache for frontend-maven-plugin
# ==================================================================
# Presto's web UI (presto-ui module) uses frontend-maven-plugin which expects
# Node.js and Yarn at specific paths in the Maven repository. This script
# pre-downloads them to avoid flaky downloads during the build.
#
# The tarballs are stored at:
# ${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-*-${OS}-${ARCH}.tar.gz
# ${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-*.tar.gz

set -euo pipefail

Expand Down Expand Up @@ -47,36 +56,39 @@ get_arch() {
}

download_node() {
if [[ -a "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" ]]; then
if [[ -a "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" ]]; then
echo "Node binary exists. Skipped download"
return 0
fi
if ! wget_retry 3 10 "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" \

if ! wget_retry 3 10 "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" \
"https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz" "node"; then
rm "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz"
rm "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}/node-${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz"
return 1
fi
}

download_yarn() {
if [[ -a "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" ]]; then
if [[ -a "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" ]]; then
echo "Yarn binary exists. Skipped download"
return 0
fi

if ! wget_retry 3 10 "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" \
if ! wget_retry 3 10 "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz" \
"https://github.com/yarnpkg/yarn/releases/download/v${YARN_VERSION}/yarn-v${YARN_VERSION}.tar.gz" "yarn"; then
rm "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz"
rm "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}/yarn-${YARN_VERSION}.tar.gz"
return 1
fi
}

NODE_OS=$(get_os)
NODE_ARCH=$(get_arch)

mkdir -p "${HOME}/.m2/repository/com/github/eirslett/node/${NODE_VERSION}"
mkdir -p "${HOME}/.m2/repository/com/github/eirslett/yarn/${YARN_VERSION}"
# Use MAVEN_REPO if set, otherwise fall back to default .m2/repository
MAVEN_REPO="${MAVEN_REPO:-${HOME}/.m2/repository}"

mkdir -p "${MAVEN_REPO}/com/github/eirslett/node/${NODE_VERSION}"
mkdir -p "${MAVEN_REPO}/com/github/eirslett/yarn/${YARN_VERSION}"

if download_node; then
echo "node-v${NODE_VERSION}-${NODE_OS}-${NODE_ARCH}.tar.gz is ready for use"
Expand Down
142 changes: 142 additions & 0 deletions .github/dockerfiles/yscope-presto-builder.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# YScope Presto Builder Image
# ===========================
# A unified builder for presto (Java) and prestocpp (C++).
#
# Adapted from upstream's ubuntu-22.04-dependency.dockerfile, with additions:
# - Pre-warmed ccache for faster C++ builds
# - Pre-downloaded Maven dependencies for faster Java builds
# - Pre-downloaded Node.js/Yarn for frontend builds
#
# Tagged by hash of dependency files, rebuilt only when deps change.

FROM ghcr.io/y-scope/docker-github-actions-runner:ubuntu-jammy

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

# ============================================================================
# Dependency Installation (from upstream ubuntu-22.04-dependency.dockerfile)
# ============================================================================

COPY ./presto-native-execution/scripts /presto/scripts/
COPY ./presto-native-execution/velox/scripts /presto/velox/scripts/

# Required to avoid tzdata prompting for region selection
ARG DEBIAN_FRONTEND="noninteractive"
ARG tz="Etc/UTC"
ENV TZ=${tz}
ENV PROMPT_ALWAYS_RESPOND=n
ENV SUDO=" "

# Build parallelism for 32-core self-hosted runners
# See: https://github.com/y-scope/velox/pull/45
ARG NUM_THREADS=16
ARG MAX_HIGH_MEM_JOBS=16
ARG MAX_LINK_JOBS=12
ENV MAX_HIGH_MEM_JOBS=${MAX_HIGH_MEM_JOBS}
ENV MAX_LINK_JOBS=${MAX_LINK_JOBS}

# Install CMake 3.28.3 (required - setup script's pip cmake causes fastfloat issues)
RUN apt-get update && \
apt-get install -y --no-install-recommends wget && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
wget -q https://github.com/Kitware/CMake/releases/download/v3.28.3/cmake-3.28.3-linux-x86_64.tar.gz && \
tar -xzf cmake-3.28.3-linux-x86_64.tar.gz -C /opt && \
rm cmake-3.28.3-linux-x86_64.tar.gz && \
ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/cmake /usr/local/bin/cmake && \
ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/ctest /usr/local/bin/ctest && \
ln -sf /opt/cmake-3.28.3-linux-x86_64/bin/cpack /usr/local/bin/cpack

# Run setup scripts - same pattern as upstream ubuntu-22.04-dependency.dockerfile
# rpm is needed for MinIO installation (S3-compatible storage for tests)
RUN mkdir -p /build && \
cd /build && \
/presto/scripts/setup-ubuntu.sh && \
apt install -y rpm && \
/presto/velox/scripts/setup-ubuntu.sh install_adapters && \
/presto/scripts/setup-adapters.sh && \
rm -rf /build

ENV PATH="/presto/.venv/bin:${PATH}"
ENV VIRTUAL_ENV="/presto/.venv"

# ============================================================================
# ccache Warmup (YScope addition for faster C++ builds)
# See: https://github.com/y-scope/velox/pull/45
# ============================================================================

# ccache settings for portable cache (works across different checkout paths)
# - CCACHE_DIR: Standard location in /var/cache for system caches
# - CCACHE_BASEDIR: Set at runtime via GITHUB_WORKSPACE for portability
# - CCACHE_COMPRESSLEVEL=0: Disabled for faster CI execution (disk space not a concern)
# - CCACHE_NOHASHDIR: Ignore directory paths in hash for cache hits across checkouts
ENV CCACHE_DIR=/var/cache/ccache
ENV CCACHE_COMPRESSLEVEL=0
ENV CCACHE_MAX_SIZE=5G
ENV CCACHE_NOHASHDIR=true

RUN mkdir -p ${CCACHE_DIR} && chmod 777 ${CCACHE_DIR}

COPY . /workspace/
WORKDIR /workspace

# Build prestocpp once to populate ccache
# Build flags must match CI builds exactly for cache hits (see prestocpp-linux-build-and-unit-test.yml)
# CCACHE_BASEDIR set to /workspace for the warmup build
RUN ccache -z && \
export CCACHE_BASEDIR=/workspace && \
cd presto-native-execution && \
cmake \
-B _build/release \
-GNinja \
-DTREAT_WARNINGS_AS_ERRORS=1 \
-DENABLE_ALL_WARNINGS=1 \
-DCMAKE_BUILD_TYPE=Release \
-DPRESTO_ENABLE_PARQUET=ON \
-DPRESTO_ENABLE_REMOTE_FUNCTIONS=ON \
-DPRESTO_ENABLE_JWT=ON \
-DPRESTO_STATS_REPORTER_TYPE=PROMETHEUS \
-DPRESTO_MEMORY_CHECKER_TYPE=LINUX_MEMORY_CHECKER \
-DCMAKE_PREFIX_PATH=/usr/local \
-DThrift_ROOT=/usr/local \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DMAX_LINK_JOBS=${MAX_LINK_JOBS} && \
ninja -C _build/release -j ${NUM_THREADS} && \
ccache -svz

# ============================================================================
# Maven/Node.js Cache (YScope addition for faster Java builds)
# ============================================================================

ENV MAVEN_REPO=/opt/maven/repository
RUN mkdir -p ${MAVEN_REPO}

# Download dependencies using temporary Java installation
RUN wget -q https://github.com/adoptium/temurin8-binaries/releases/download/jdk8u442-b06/OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz && \
tar -xzf OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz -C /tmp && \
rm OpenJDK8U-jdk_x64_linux_hotspot_8u442b06.tar.gz && \
export JAVA_HOME=/tmp/jdk8u442-b06 && \
export PATH=${JAVA_HOME}/bin:${PATH} && \
export RUNNER_OS=Linux && \
export RUNNER_ARCH=X64 && \
cd /workspace && \
.github/bin/download_nodejs && \
./mvnw dependency:resolve-plugins dependency:resolve -B --no-transfer-progress \
-Dmaven.repo.local=${MAVEN_REPO} || true && \
rm -rf /tmp/jdk8u442-b06

# Clean up source, keep only caches
RUN rm -rf /workspace/*

WORKDIR /workspace
Loading
Loading