From 1f25c363143a6b3e2eb4ac46b0ba413aee06429a Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Wed, 22 Apr 2026 12:25:51 -0400 Subject: [PATCH 1/8] use core_nightly postgres extension --- Dockerfile | 10 +++++++--- server/server.go | 6 +++++- server/server_test.go | 14 ++++++++++++++ tests/integration/harness.go | 2 +- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4db4a46..3c37dff 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,14 +12,18 @@ ARG BUILD_TAGS="" ARG TARGETARCH ARG DUCKDB_EXTENSION_VERSION=1.5.2 ARG HTTPFS_EXTENSION_TAG=v1.5.2-stoi-fix +ARG DUCKDB_EXTENSION_REPOSITORY=https://extensions.duckdb.org +ARG DUCKDB_NIGHTLY_EXTENSION_REPOSITORY=http://nightly-extensions.duckdb.org RUN CGO_ENABLED=1 go build -tags "${BUILD_TAGS}" -ldflags "-X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" -o duckgres . RUN mkdir -p "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}" \ && curl -fsSL "https://github.com/benben/duckdb-httpfs/releases/download/${HTTPFS_EXTENSION_TAG}/httpfs-linux-${TARGETARCH}.duckdb_extension" \ -o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/httpfs.duckdb_extension" \ - && for ext in ducklake postgres_scanner json; do \ - curl -fsSL "https://extensions.duckdb.org/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension.gz" \ + && for ext in ducklake json; do \ + curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension.gz" \ | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/${ext}.duckdb_extension"; \ - done + done \ + && curl -fsSL "${DUCKDB_NIGHTLY_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \ + | gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension" FROM debian:bookworm-slim diff --git a/server/server.go b/server/server.go index 91553b5..dd96752 100644 --- a/server/server.go +++ b/server/server.go @@ -1146,6 +1146,10 @@ func parseExtensionName(ext string) (name, installCmd string) { return ext, ext } +// PostgresCoreNightlyExtension installs the DuckDB Postgres extension from the +// core_nightly repository while still loading it under its canonical name. +const PostgresCoreNightlyExtension = "postgres FROM core_nightly" + // LoadExtensions installs and loads DuckDB extensions. // This is a standalone function so it can be reused by control plane workers. // Extension strings can include a source, e.g. "cache_httpfs FROM community". @@ -1242,7 +1246,7 @@ func applyDuckLakePreAttachSettingsWith(db duckLakeSQLExecer, loadPostgresScanne func applyDuckLakePreAttachSettings(db *sql.DB, dlCfg DuckLakeConfig) error { return applyDuckLakePreAttachSettingsWith(db, func() error { - return LoadExtensions(db, []string{"postgres_scanner"}) + return LoadExtensions(db, []string{PostgresCoreNightlyExtension}) }, dlCfg) } diff --git a/server/server_test.go b/server/server_test.go index 357a849..0810c45 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -112,6 +112,20 @@ func TestParseExtensionName(t *testing.T) { } } +func TestPostgresCoreNightlyExtension(t *testing.T) { + if PostgresCoreNightlyExtension != "postgres FROM core_nightly" { + t.Fatalf("PostgresCoreNightlyExtension = %q, want %q", PostgresCoreNightlyExtension, "postgres FROM core_nightly") + } + + name, installCmd := parseExtensionName(PostgresCoreNightlyExtension) + if name != "postgres" { + t.Fatalf("parseExtensionName(%q) name = %q, want %q", PostgresCoreNightlyExtension, name, "postgres") + } + if installCmd != PostgresCoreNightlyExtension { + t.Fatalf("parseExtensionName(%q) installCmd = %q, want %q", PostgresCoreNightlyExtension, installCmd, PostgresCoreNightlyExtension) + } +} + func TestNeedsCredentialRefresh(t *testing.T) { tests := []struct { name string diff --git a/tests/integration/harness.go b/tests/integration/harness.go index 2e7565d..7839071 100644 --- a/tests/integration/harness.go +++ b/tests/integration/harness.go @@ -150,7 +150,7 @@ func (h *TestHarness) startDuckgres(harnessCfg HarnessConfig) error { if harnessCfg.UseDuckLake { // Try the upstream postgres_scanner fix from core_nightly in the // DuckLake-backed integration harness without changing global defaults. - cfg.Extensions = []string{"postgres FROM core_nightly", "ducklake"} + cfg.Extensions = []string{server.PostgresCoreNightlyExtension, "ducklake"} metadataPort := harnessCfg.DuckLakeMetadataPort From 35ab81847969c87714d6f5e6232661641ac1f675 Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Wed, 22 Apr 2026 12:33:18 -0400 Subject: [PATCH 2/8] force refresh nightly postgres extension --- server/bundled_extensions_test.go | 39 +++++++++- server/server.go | 114 ++++++++++++++++++++++++------ server/server_test.go | 27 +++++++ tests/integration/harness.go | 2 +- 4 files changed, 158 insertions(+), 24 deletions(-) diff --git a/server/bundled_extensions_test.go b/server/bundled_extensions_test.go index 8827161..3aed082 100644 --- a/server/bundled_extensions_test.go +++ b/server/bundled_extensions_test.go @@ -41,7 +41,7 @@ func TestSeedBundledExtensionsCopiesMissingFiles(t *testing.T) { } } -func TestSeedBundledExtensionsPreservesExistingFiles(t *testing.T) { +func TestSeedBundledExtensionsPreservesExistingFilesWithMatchingContents(t *testing.T) { srcRoot := t.TempDir() dstRoot := t.TempDir() @@ -49,7 +49,7 @@ func TestSeedBundledExtensionsPreservesExistingFiles(t *testing.T) { if err := os.MkdirAll(srcDir, 0o755); err != nil { t.Fatalf("mkdir src: %v", err) } - if err := os.WriteFile(filepath.Join(srcDir, "httpfs.duckdb_extension"), []byte("new"), 0o644); err != nil { + if err := os.WriteFile(filepath.Join(srcDir, "httpfs.duckdb_extension"), []byte("existing"), 0o644); err != nil { t.Fatalf("write src extension: %v", err) } @@ -74,3 +74,38 @@ func TestSeedBundledExtensionsPreservesExistingFiles(t *testing.T) { t.Fatalf("expected existing extension to be preserved, got %q", string(got)) } } + +func TestSeedBundledExtensionsReplacesExistingFilesWithUpdatedContents(t *testing.T) { + srcRoot := t.TempDir() + dstRoot := t.TempDir() + + srcDir := filepath.Join(srcRoot, "v1.5.2", "linux_arm64") + if err := os.MkdirAll(srcDir, 0o755); err != nil { + t.Fatalf("mkdir src: %v", err) + } + srcExt := filepath.Join(srcDir, "postgres_scanner.duckdb_extension") + if err := os.WriteFile(srcExt, []byte("nightly"), 0o644); err != nil { + t.Fatalf("write src extension: %v", err) + } + + dstDir := filepath.Join(dstRoot, "v1.5.2", "linux_arm64") + if err := os.MkdirAll(dstDir, 0o755); err != nil { + t.Fatalf("mkdir dst: %v", err) + } + dstExt := filepath.Join(dstDir, "postgres_scanner.duckdb_extension") + if err := os.WriteFile(dstExt, []byte("stable"), 0o644); err != nil { + t.Fatalf("write dst extension: %v", err) + } + + if err := seedBundledExtensions(srcRoot, dstRoot); err != nil { + t.Fatalf("seedBundledExtensions: %v", err) + } + + got, err := os.ReadFile(dstExt) + if err != nil { + t.Fatalf("read dst extension: %v", err) + } + if string(got) != "nightly" { + t.Fatalf("expected existing extension to be replaced, got %q", string(got)) + } +} diff --git a/server/server.go b/server/server.go index dd96752..9238242 100644 --- a/server/server.go +++ b/server/server.go @@ -1,6 +1,7 @@ package server import ( + "bytes" "context" "crypto/tls" "database/sql" @@ -964,36 +965,100 @@ func seedBundledExtensions(srcRoot, dstRoot string) error { if info.Mode()&os.ModeSymlink != 0 { return nil } + if err := os.MkdirAll(filepath.Dir(dstPath), 0o750); err != nil { + return err + } if _, err := os.Stat(dstPath); err == nil { - return nil + same, err := filesEqual(path, dstPath) + if err != nil { + return err + } + if same { + return nil + } } else if !errors.Is(err, os.ErrNotExist) { return err } - srcFile, err := os.Open(path) - if err != nil { - return err - } + return copyFile(path, dstPath, info.Mode().Perm()) + }) +} - dstFile, err := os.OpenFile(dstPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o640) - if err != nil { - _ = srcFile.Close() - return err +func filesEqual(srcPath, dstPath string) (bool, error) { + srcInfo, err := os.Stat(srcPath) + if err != nil { + return false, err + } + dstInfo, err := os.Stat(dstPath) + if err != nil { + return false, err + } + if srcInfo.Size() != dstInfo.Size() { + return false, nil + } + + srcFile, err := os.Open(srcPath) + if err != nil { + return false, err + } + defer func() { _ = srcFile.Close() }() + + dstFile, err := os.Open(dstPath) + if err != nil { + return false, err + } + defer func() { _ = dstFile.Close() }() + + srcBuf := make([]byte, 32*1024) + dstBuf := make([]byte, 32*1024) + for { + srcN, srcErr := srcFile.Read(srcBuf) + dstN, dstErr := dstFile.Read(dstBuf) + if srcN != dstN { + return false, nil } - if _, err := io.Copy(dstFile, srcFile); err != nil { - _ = srcFile.Close() - _ = dstFile.Close() - return err + if srcN > 0 && !bytes.Equal(srcBuf[:srcN], dstBuf[:dstN]) { + return false, nil } - if err := srcFile.Close(); err != nil { - _ = dstFile.Close() - return err + if errors.Is(srcErr, io.EOF) && errors.Is(dstErr, io.EOF) { + return true, nil } - if err := dstFile.Close(); err != nil { - return err + if srcErr != nil && !errors.Is(srcErr, io.EOF) { + return false, srcErr } - return nil - }) + if dstErr != nil && !errors.Is(dstErr, io.EOF) { + return false, dstErr + } + } +} + +func copyFile(srcPath, dstPath string, mode os.FileMode) error { + srcFile, err := os.Open(srcPath) + if err != nil { + return err + } + defer func() { _ = srcFile.Close() }() + + tmpFile, err := os.CreateTemp(filepath.Dir(dstPath), ".bundled-extension-*") + if err != nil { + return err + } + tmpPath := tmpFile.Name() + defer func() { _ = os.Remove(tmpPath) }() + + if err := tmpFile.Chmod(mode); err != nil { + _ = tmpFile.Close() + return err + } + if _, err := io.Copy(tmpFile, srcFile); err != nil { + _ = tmpFile.Close() + return err + } + if err := tmpFile.Close(); err != nil { + return err + } + + return os.Rename(tmpPath, dstPath) } // CreateDBConnection creates a DuckDB connection for a client session. @@ -1146,6 +1211,13 @@ func parseExtensionName(ext string) (name, installCmd string) { return ext, ext } +func installExtensionStatement(installCmd string) string { + if strings.Contains(strings.ToUpper(installCmd), " FROM ") { + return "FORCE INSTALL " + installCmd + } + return "INSTALL " + installCmd +} + // PostgresCoreNightlyExtension installs the DuckDB Postgres extension from the // core_nightly repository while still loading it under its canonical name. const PostgresCoreNightlyExtension = "postgres FROM core_nightly" @@ -1166,7 +1238,7 @@ func LoadExtensions(db *sql.DB, extensions []string) error { name, installCmd := parseExtensionName(ext) // First install the extension (downloads if needed) - if _, err := db.Exec("INSTALL " + installCmd); err != nil { + if _, err := db.Exec(installExtensionStatement(installCmd)); err != nil { slog.Warn("Failed to install extension.", "extension", installCmd, "error", err) lastErr = err continue diff --git a/server/server_test.go b/server/server_test.go index 0810c45..c626e5f 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -126,6 +126,33 @@ func TestPostgresCoreNightlyExtension(t *testing.T) { } } +func TestInstallExtensionStatement(t *testing.T) { + tests := []struct { + name string + installCmd string + want string + }{ + { + name: "default repository uses install", + installCmd: "ducklake", + want: "INSTALL ducklake", + }, + { + name: "explicit repository uses force install", + installCmd: "postgres FROM core_nightly", + want: "FORCE INSTALL postgres FROM core_nightly", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := installExtensionStatement(tt.installCmd); got != tt.want { + t.Fatalf("installExtensionStatement(%q) = %q, want %q", tt.installCmd, got, tt.want) + } + }) + } +} + func TestNeedsCredentialRefresh(t *testing.T) { tests := []struct { name string diff --git a/tests/integration/harness.go b/tests/integration/harness.go index 7839071..d46f8be 100644 --- a/tests/integration/harness.go +++ b/tests/integration/harness.go @@ -14,8 +14,8 @@ import ( "sync" "time" - "github.com/posthog/duckgres/server" _ "github.com/lib/pq" + "github.com/posthog/duckgres/server" ) var duckLakeInfraServices = []string{"ducklake-metadata", "minio", "minio-init"} From 61aec876d284ae804b147e850fbb5cce17618035 Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Wed, 22 Apr 2026 13:36:16 -0400 Subject: [PATCH 3/8] scope nightly refresh to postgres extension --- server/bundled_extensions_test.go | 35 +++++++++++++++++++ server/server.go | 57 +++---------------------------- server/server_test.go | 7 +++- 3 files changed, 45 insertions(+), 54 deletions(-) diff --git a/server/bundled_extensions_test.go b/server/bundled_extensions_test.go index 3aed082..7052405 100644 --- a/server/bundled_extensions_test.go +++ b/server/bundled_extensions_test.go @@ -109,3 +109,38 @@ func TestSeedBundledExtensionsReplacesExistingFilesWithUpdatedContents(t *testin t.Fatalf("expected existing extension to be replaced, got %q", string(got)) } } + +func TestSeedBundledExtensionsPreservesNonTargetedChangedFiles(t *testing.T) { + srcRoot := t.TempDir() + dstRoot := t.TempDir() + + srcDir := filepath.Join(srcRoot, "v1.5.2", "linux_arm64") + if err := os.MkdirAll(srcDir, 0o755); err != nil { + t.Fatalf("mkdir src: %v", err) + } + srcExt := filepath.Join(srcDir, "httpfs.duckdb_extension") + if err := os.WriteFile(srcExt, []byte("new-httpfs"), 0o644); err != nil { + t.Fatalf("write src extension: %v", err) + } + + dstDir := filepath.Join(dstRoot, "v1.5.2", "linux_arm64") + if err := os.MkdirAll(dstDir, 0o755); err != nil { + t.Fatalf("mkdir dst: %v", err) + } + dstExt := filepath.Join(dstDir, "httpfs.duckdb_extension") + if err := os.WriteFile(dstExt, []byte("existing-httpfs"), 0o644); err != nil { + t.Fatalf("write dst extension: %v", err) + } + + if err := seedBundledExtensions(srcRoot, dstRoot); err != nil { + t.Fatalf("seedBundledExtensions: %v", err) + } + + got, err := os.ReadFile(dstExt) + if err != nil { + t.Fatalf("read dst extension: %v", err) + } + if string(got) != "existing-httpfs" { + t.Fatalf("expected non-targeted extension to be preserved, got %q", string(got)) + } +} diff --git a/server/server.go b/server/server.go index 9238242..ad9cffb 100644 --- a/server/server.go +++ b/server/server.go @@ -1,7 +1,6 @@ package server import ( - "bytes" "context" "crypto/tls" "database/sql" @@ -969,11 +968,7 @@ func seedBundledExtensions(srcRoot, dstRoot string) error { return err } if _, err := os.Stat(dstPath); err == nil { - same, err := filesEqual(path, dstPath) - if err != nil { - return err - } - if same { + if !shouldRefreshBundledExtension(path) { return nil } } else if !errors.Is(err, os.ErrNotExist) { @@ -984,52 +979,8 @@ func seedBundledExtensions(srcRoot, dstRoot string) error { }) } -func filesEqual(srcPath, dstPath string) (bool, error) { - srcInfo, err := os.Stat(srcPath) - if err != nil { - return false, err - } - dstInfo, err := os.Stat(dstPath) - if err != nil { - return false, err - } - if srcInfo.Size() != dstInfo.Size() { - return false, nil - } - - srcFile, err := os.Open(srcPath) - if err != nil { - return false, err - } - defer func() { _ = srcFile.Close() }() - - dstFile, err := os.Open(dstPath) - if err != nil { - return false, err - } - defer func() { _ = dstFile.Close() }() - - srcBuf := make([]byte, 32*1024) - dstBuf := make([]byte, 32*1024) - for { - srcN, srcErr := srcFile.Read(srcBuf) - dstN, dstErr := dstFile.Read(dstBuf) - if srcN != dstN { - return false, nil - } - if srcN > 0 && !bytes.Equal(srcBuf[:srcN], dstBuf[:dstN]) { - return false, nil - } - if errors.Is(srcErr, io.EOF) && errors.Is(dstErr, io.EOF) { - return true, nil - } - if srcErr != nil && !errors.Is(srcErr, io.EOF) { - return false, srcErr - } - if dstErr != nil && !errors.Is(dstErr, io.EOF) { - return false, dstErr - } - } +func shouldRefreshBundledExtension(srcPath string) bool { + return filepath.Base(srcPath) == "postgres_scanner.duckdb_extension" } func copyFile(srcPath, dstPath string, mode os.FileMode) error { @@ -1212,7 +1163,7 @@ func parseExtensionName(ext string) (name, installCmd string) { } func installExtensionStatement(installCmd string) string { - if strings.Contains(strings.ToUpper(installCmd), " FROM ") { + if strings.EqualFold(strings.TrimSpace(installCmd), PostgresCoreNightlyExtension) { return "FORCE INSTALL " + installCmd } return "INSTALL " + installCmd diff --git a/server/server_test.go b/server/server_test.go index c626e5f..354a3fd 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -138,10 +138,15 @@ func TestInstallExtensionStatement(t *testing.T) { want: "INSTALL ducklake", }, { - name: "explicit repository uses force install", + name: "nightly postgres uses force install", installCmd: "postgres FROM core_nightly", want: "FORCE INSTALL postgres FROM core_nightly", }, + { + name: "other explicit repositories still use install", + installCmd: "cache_httpfs FROM community", + want: "INSTALL cache_httpfs FROM community", + }, } for _, tt := range tests { From ae14f54ebaaf8cff1de585a94c2191b369e661d5 Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Wed, 22 Apr 2026 16:00:41 -0400 Subject: [PATCH 4/8] use bundled postgres_scanner for nightly rollout --- server/server.go | 9 +-------- server/server_test.go | 18 ++---------------- tests/integration/harness.go | 4 +--- 3 files changed, 4 insertions(+), 27 deletions(-) diff --git a/server/server.go b/server/server.go index ad9cffb..1e30ccd 100644 --- a/server/server.go +++ b/server/server.go @@ -1163,16 +1163,9 @@ func parseExtensionName(ext string) (name, installCmd string) { } func installExtensionStatement(installCmd string) string { - if strings.EqualFold(strings.TrimSpace(installCmd), PostgresCoreNightlyExtension) { - return "FORCE INSTALL " + installCmd - } return "INSTALL " + installCmd } -// PostgresCoreNightlyExtension installs the DuckDB Postgres extension from the -// core_nightly repository while still loading it under its canonical name. -const PostgresCoreNightlyExtension = "postgres FROM core_nightly" - // LoadExtensions installs and loads DuckDB extensions. // This is a standalone function so it can be reused by control plane workers. // Extension strings can include a source, e.g. "cache_httpfs FROM community". @@ -1269,7 +1262,7 @@ func applyDuckLakePreAttachSettingsWith(db duckLakeSQLExecer, loadPostgresScanne func applyDuckLakePreAttachSettings(db *sql.DB, dlCfg DuckLakeConfig) error { return applyDuckLakePreAttachSettingsWith(db, func() error { - return LoadExtensions(db, []string{PostgresCoreNightlyExtension}) + return LoadExtensions(db, []string{"postgres_scanner"}) }, dlCfg) } diff --git a/server/server_test.go b/server/server_test.go index 354a3fd..8ba6176 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -112,20 +112,6 @@ func TestParseExtensionName(t *testing.T) { } } -func TestPostgresCoreNightlyExtension(t *testing.T) { - if PostgresCoreNightlyExtension != "postgres FROM core_nightly" { - t.Fatalf("PostgresCoreNightlyExtension = %q, want %q", PostgresCoreNightlyExtension, "postgres FROM core_nightly") - } - - name, installCmd := parseExtensionName(PostgresCoreNightlyExtension) - if name != "postgres" { - t.Fatalf("parseExtensionName(%q) name = %q, want %q", PostgresCoreNightlyExtension, name, "postgres") - } - if installCmd != PostgresCoreNightlyExtension { - t.Fatalf("parseExtensionName(%q) installCmd = %q, want %q", PostgresCoreNightlyExtension, installCmd, PostgresCoreNightlyExtension) - } -} - func TestInstallExtensionStatement(t *testing.T) { tests := []struct { name string @@ -138,9 +124,9 @@ func TestInstallExtensionStatement(t *testing.T) { want: "INSTALL ducklake", }, { - name: "nightly postgres uses force install", + name: "nightly postgres still uses plain install", installCmd: "postgres FROM core_nightly", - want: "FORCE INSTALL postgres FROM core_nightly", + want: "INSTALL postgres FROM core_nightly", }, { name: "other explicit repositories still use install", diff --git a/tests/integration/harness.go b/tests/integration/harness.go index d46f8be..54064cc 100644 --- a/tests/integration/harness.go +++ b/tests/integration/harness.go @@ -148,9 +148,7 @@ func (h *TestHarness) startDuckgres(harnessCfg HarnessConfig) error { // Configure DuckLake if enabled if harnessCfg.UseDuckLake { - // Try the upstream postgres_scanner fix from core_nightly in the - // DuckLake-backed integration harness without changing global defaults. - cfg.Extensions = []string{server.PostgresCoreNightlyExtension, "ducklake"} + cfg.Extensions = []string{"ducklake"} metadataPort := harnessCfg.DuckLakeMetadataPort From 582bcfaa42ef62c1b21d176adecad64fd061c656 Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Wed, 22 Apr 2026 16:22:53 -0400 Subject: [PATCH 5/8] remove unused install helper --- server/server.go | 6 +----- server/server_test.go | 32 -------------------------------- 2 files changed, 1 insertion(+), 37 deletions(-) diff --git a/server/server.go b/server/server.go index 1e30ccd..831ce62 100644 --- a/server/server.go +++ b/server/server.go @@ -1162,10 +1162,6 @@ func parseExtensionName(ext string) (name, installCmd string) { return ext, ext } -func installExtensionStatement(installCmd string) string { - return "INSTALL " + installCmd -} - // LoadExtensions installs and loads DuckDB extensions. // This is a standalone function so it can be reused by control plane workers. // Extension strings can include a source, e.g. "cache_httpfs FROM community". @@ -1182,7 +1178,7 @@ func LoadExtensions(db *sql.DB, extensions []string) error { name, installCmd := parseExtensionName(ext) // First install the extension (downloads if needed) - if _, err := db.Exec(installExtensionStatement(installCmd)); err != nil { + if _, err := db.Exec("INSTALL " + installCmd); err != nil { slog.Warn("Failed to install extension.", "extension", installCmd, "error", err) lastErr = err continue diff --git a/server/server_test.go b/server/server_test.go index 8ba6176..357a849 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -112,38 +112,6 @@ func TestParseExtensionName(t *testing.T) { } } -func TestInstallExtensionStatement(t *testing.T) { - tests := []struct { - name string - installCmd string - want string - }{ - { - name: "default repository uses install", - installCmd: "ducklake", - want: "INSTALL ducklake", - }, - { - name: "nightly postgres still uses plain install", - installCmd: "postgres FROM core_nightly", - want: "INSTALL postgres FROM core_nightly", - }, - { - name: "other explicit repositories still use install", - installCmd: "cache_httpfs FROM community", - want: "INSTALL cache_httpfs FROM community", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if got := installExtensionStatement(tt.installCmd); got != tt.want { - t.Fatalf("installExtensionStatement(%q) = %q, want %q", tt.installCmd, got, tt.want) - } - }) - } -} - func TestNeedsCredentialRefresh(t *testing.T) { tests := []struct { name string From 4c9e7bd2c2b03714b3f54a47a106375954d80eb6 Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Wed, 22 Apr 2026 16:41:00 -0400 Subject: [PATCH 6/8] bootstrap bundled extension cache once --- server/bundled_extensions_test.go | 60 +++++++++++++++++++++++++++++++ server/checkpoint.go | 4 +-- server/querylog.go | 4 +-- server/server.go | 17 ++++++--- 4 files changed, 76 insertions(+), 9 deletions(-) diff --git a/server/bundled_extensions_test.go b/server/bundled_extensions_test.go index 7052405..6c9897f 100644 --- a/server/bundled_extensions_test.go +++ b/server/bundled_extensions_test.go @@ -1,9 +1,12 @@ package server import ( + "database/sql" "os" "path/filepath" "testing" + + _ "github.com/duckdb/duckdb-go/v2" ) func TestSeedBundledExtensionsCopiesMissingFiles(t *testing.T) { @@ -144,3 +147,60 @@ func TestSeedBundledExtensionsPreservesNonTargetedChangedFiles(t *testing.T) { t.Fatalf("expected non-targeted extension to be preserved, got %q", string(got)) } } + +func TestConfigureExtensionDirectorySeedsBundledExtensions(t *testing.T) { + bundledRoot := t.TempDir() + extDir := t.TempDir() + + srcDir := filepath.Join(bundledRoot, "v1.5.2", "linux_arm64") + if err := os.MkdirAll(srcDir, 0o755); err != nil { + t.Fatalf("mkdir src: %v", err) + } + srcExt := filepath.Join(srcDir, "postgres_scanner.duckdb_extension") + if err := os.WriteFile(srcExt, []byte("nightly"), 0o644); err != nil { + t.Fatalf("write src extension: %v", err) + } + + db, err := sql.Open("duckdb", ":memory:") + if err != nil { + t.Fatalf("open duckdb: %v", err) + } + defer func() { _ = db.Close() }() + + if err := configureExtensionDirectory(db, bundledRoot, extDir, "test"); err != nil { + t.Fatalf("configureExtensionDirectory: %v", err) + } + + dstExt := filepath.Join(extDir, "v1.5.2", "linux_arm64", "postgres_scanner.duckdb_extension") + got, err := os.ReadFile(dstExt) + if err != nil { + t.Fatalf("read dst extension: %v", err) + } + if string(got) != "nightly" { + t.Fatalf("expected seeded extension to match bundled contents, got %q", string(got)) + } +} + +func TestConfigureExtensionDirectorySetsDuckDBExtensionDirectory(t *testing.T) { + bundledRoot := t.TempDir() + extDir := t.TempDir() + + db, err := sql.Open("duckdb", ":memory:") + if err != nil { + t.Fatalf("open duckdb: %v", err) + } + defer func() { _ = db.Close() }() + + if err := configureExtensionDirectory(db, bundledRoot, extDir, "test"); err != nil { + t.Fatalf("configureExtensionDirectory: %v", err) + } + + var got string + row := db.QueryRow("SELECT current_setting('extension_directory')") + if err := row.Scan(&got); err != nil { + t.Fatalf("scan extension_directory: %v", err) + } + if got != extDir { + t.Fatalf("extension_directory = %q, want %q", got, extDir) + } +} diff --git a/server/checkpoint.go b/server/checkpoint.go index 90386f5..584d4fb 100644 --- a/server/checkpoint.go +++ b/server/checkpoint.go @@ -33,9 +33,9 @@ func NewDuckLakeCheckpointer(cfg Config) (*DuckLakeCheckpointer, error) { } extDir := filepath.Join(cfg.DataDir, "extensions") - if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { + if err := configureExtensionDirectory(db, bundledDuckDBExtensionsDir, extDir, "checkpoint"); err != nil { _ = db.Close() - return nil, fmt.Errorf("checkpoint: set extension_directory: %w", err) + return nil, err } if _, err := db.Exec("INSTALL ducklake; LOAD ducklake"); err != nil { diff --git a/server/querylog.go b/server/querylog.go index 0d3633c..b75cb6d 100644 --- a/server/querylog.go +++ b/server/querylog.go @@ -71,9 +71,9 @@ func NewQueryLogger(cfg Config) (*QueryLogger, error) { // Set extension directory under DataDir so DuckDB doesn't rely on $HOME/.duckdb extDir := filepath.Join(cfg.DataDir, "extensions") - if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { + if err := configureExtensionDirectory(db, bundledDuckDBExtensionsDir, extDir, "querylog"); err != nil { _ = db.Close() - return nil, fmt.Errorf("querylog: set extension_directory: %w", err) + return nil, err } // Load ducklake extension diff --git a/server/server.go b/server/server.go index 831ce62..0fe9648 100644 --- a/server/server.go +++ b/server/server.go @@ -873,11 +873,8 @@ func openBaseDB(cfg Config, username string) (*sql.DB, error) { // Set extension directory under DataDir so DuckDB doesn't rely on $HOME/.duckdb // for autoloading/installing extensions. extDir := filepath.Join(cfg.DataDir, "extensions") - if err := seedBundledExtensions(bundledDuckDBExtensionsDir, extDir); err != nil { - slog.Warn("Failed to seed bundled DuckDB extensions.", "source", bundledDuckDBExtensionsDir, "extension_directory", extDir, "error", err) - } - if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { - slog.Warn("Failed to set DuckDB extension_directory.", "extension_directory", extDir, "error", err) + if err := configureExtensionDirectory(db, bundledDuckDBExtensionsDir, extDir, "openBaseDB"); err != nil { + slog.Warn("Failed to configure DuckDB extension_directory.", "extension_directory", extDir, "error", err) } else { slog.Debug("Set DuckDB extension_directory.", "extension_directory", extDir) } @@ -979,6 +976,16 @@ func seedBundledExtensions(srcRoot, dstRoot string) error { }) } +func configureExtensionDirectory(db *sql.DB, bundledSrcRoot, extDir, scope string) error { + if err := seedBundledExtensions(bundledSrcRoot, extDir); err != nil { + return fmt.Errorf("%s: seed bundled extensions: %w", scope, err) + } + if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { + return fmt.Errorf("%s: set extension_directory: %w", scope, err) + } + return nil +} + func shouldRefreshBundledExtension(srcPath string) bool { return filepath.Base(srcPath) == "postgres_scanner.duckdb_extension" } From 4227ca2cb085956c57b822364fe2eede43f2e977 Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Wed, 22 Apr 2026 16:56:37 -0400 Subject: [PATCH 7/8] move extension seeding to process bootstrap --- server/bundled_extensions_test.go | 73 ++++++++++++++++++++++++++----- server/checkpoint.go | 4 +- server/querylog.go | 4 +- server/server.go | 42 ++++++++++++------ server/shell.go | 4 ++ server/worker.go | 6 ++- 6 files changed, 103 insertions(+), 30 deletions(-) diff --git a/server/bundled_extensions_test.go b/server/bundled_extensions_test.go index 6c9897f..567611b 100644 --- a/server/bundled_extensions_test.go +++ b/server/bundled_extensions_test.go @@ -2,8 +2,10 @@ package server import ( "database/sql" + "fmt" "os" "path/filepath" + "sync" "testing" _ "github.com/duckdb/duckdb-go/v2" @@ -148,9 +150,9 @@ func TestSeedBundledExtensionsPreservesNonTargetedChangedFiles(t *testing.T) { } } -func TestConfigureExtensionDirectorySeedsBundledExtensions(t *testing.T) { +func TestBootstrapBundledExtensionsSeedsBundledExtensions(t *testing.T) { bundledRoot := t.TempDir() - extDir := t.TempDir() + dataDir := t.TempDir() srcDir := filepath.Join(bundledRoot, "v1.5.2", "linux_arm64") if err := os.MkdirAll(srcDir, 0o755); err != nil { @@ -161,16 +163,20 @@ func TestConfigureExtensionDirectorySeedsBundledExtensions(t *testing.T) { t.Fatalf("write src extension: %v", err) } - db, err := sql.Open("duckdb", ":memory:") - if err != nil { - t.Fatalf("open duckdb: %v", err) - } - defer func() { _ = db.Close() }() + prevBundledRoot := bundledDuckDBExtensionsDir + bundledDuckDBExtensionsDir = bundledRoot + defer func() { bundledDuckDBExtensionsDir = prevBundledRoot }() + + bundledExtensionBootstrap = struct { + mu sync.Mutex + byPath map[string]error + }{} - if err := configureExtensionDirectory(db, bundledRoot, extDir, "test"); err != nil { - t.Fatalf("configureExtensionDirectory: %v", err) + if err := bootstrapBundledExtensions(dataDir); err != nil { + t.Fatalf("bootstrapBundledExtensions: %v", err) } + extDir := filepath.Join(dataDir, "extensions") dstExt := filepath.Join(extDir, "v1.5.2", "linux_arm64", "postgres_scanner.duckdb_extension") got, err := os.ReadFile(dstExt) if err != nil { @@ -181,8 +187,51 @@ func TestConfigureExtensionDirectorySeedsBundledExtensions(t *testing.T) { } } -func TestConfigureExtensionDirectorySetsDuckDBExtensionDirectory(t *testing.T) { +func TestBootstrapBundledExtensionsRunsOncePerExtensionDirectory(t *testing.T) { bundledRoot := t.TempDir() + dataDir := t.TempDir() + extDir := filepath.Join(dataDir, "extensions") + + srcDir := filepath.Join(bundledRoot, "v1.5.2", "linux_arm64") + if err := os.MkdirAll(srcDir, 0o755); err != nil { + t.Fatalf("mkdir src: %v", err) + } + srcExt := filepath.Join(srcDir, "postgres_scanner.duckdb_extension") + if err := os.WriteFile(srcExt, []byte("nightly"), 0o644); err != nil { + t.Fatalf("write src extension: %v", err) + } + + prevBundledRoot := bundledDuckDBExtensionsDir + bundledDuckDBExtensionsDir = bundledRoot + defer func() { bundledDuckDBExtensionsDir = prevBundledRoot }() + + bundledExtensionBootstrap = struct { + mu sync.Mutex + byPath map[string]error + }{} + + if err := bootstrapBundledExtensions(dataDir); err != nil { + t.Fatalf("bootstrapBundledExtensions: %v", err) + } + + if err := os.WriteFile(srcExt, []byte("newer-nightly"), 0o644); err != nil { + t.Fatalf("rewrite src extension: %v", err) + } + if err := bootstrapBundledExtensions(dataDir); err != nil { + t.Fatalf("second bootstrapBundledExtensions: %v", err) + } + + dstExt := filepath.Join(extDir, "v1.5.2", "linux_arm64", "postgres_scanner.duckdb_extension") + got, err := os.ReadFile(dstExt) + if err != nil { + t.Fatalf("read dst extension: %v", err) + } + if string(got) != "nightly" { + t.Fatalf("expected bootstrap to run once, got %q", string(got)) + } +} + +func TestSetExtensionDirectoryUsesPreparedCache(t *testing.T) { extDir := t.TempDir() db, err := sql.Open("duckdb", ":memory:") @@ -191,8 +240,8 @@ func TestConfigureExtensionDirectorySetsDuckDBExtensionDirectory(t *testing.T) { } defer func() { _ = db.Close() }() - if err := configureExtensionDirectory(db, bundledRoot, extDir, "test"); err != nil { - t.Fatalf("configureExtensionDirectory: %v", err) + if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { + t.Fatalf("set extension_directory: %v", err) } var got string diff --git a/server/checkpoint.go b/server/checkpoint.go index 584d4fb..90386f5 100644 --- a/server/checkpoint.go +++ b/server/checkpoint.go @@ -33,9 +33,9 @@ func NewDuckLakeCheckpointer(cfg Config) (*DuckLakeCheckpointer, error) { } extDir := filepath.Join(cfg.DataDir, "extensions") - if err := configureExtensionDirectory(db, bundledDuckDBExtensionsDir, extDir, "checkpoint"); err != nil { + if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { _ = db.Close() - return nil, err + return nil, fmt.Errorf("checkpoint: set extension_directory: %w", err) } if _, err := db.Exec("INSTALL ducklake; LOAD ducklake"); err != nil { diff --git a/server/querylog.go b/server/querylog.go index b75cb6d..0d3633c 100644 --- a/server/querylog.go +++ b/server/querylog.go @@ -71,9 +71,9 @@ func NewQueryLogger(cfg Config) (*QueryLogger, error) { // Set extension directory under DataDir so DuckDB doesn't rely on $HOME/.duckdb extDir := filepath.Join(cfg.DataDir, "extensions") - if err := configureExtensionDirectory(db, bundledDuckDBExtensionsDir, extDir, "querylog"); err != nil { + if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { _ = db.Close() - return nil, err + return nil, fmt.Errorf("querylog: set extension_directory: %w", err) } // Load ducklake extension diff --git a/server/server.go b/server/server.go index 0fe9648..d062869 100644 --- a/server/server.go +++ b/server/server.go @@ -37,7 +37,12 @@ var processVersion = "dev" // clients pinning connection goroutines indefinitely. var startupReadTimeout = 30 * time.Second -const bundledDuckDBExtensionsDir = "/app/extensions" +var bundledDuckDBExtensionsDir = "/app/extensions" + +var bundledExtensionBootstrap struct { + mu sync.Mutex + byPath map[string]error +} // SetProcessVersion sets the version string for this process. Called from main(). func SetProcessVersion(v string) { processVersion = v } @@ -45,6 +50,23 @@ func SetProcessVersion(v string) { processVersion = v } // ProcessVersion returns the version string for this process. func ProcessVersion() string { return processVersion } +func bootstrapBundledExtensions(dataDir string) error { + extDir := filepath.Join(dataDir, "extensions") + + bundledExtensionBootstrap.mu.Lock() + defer bundledExtensionBootstrap.mu.Unlock() + if bundledExtensionBootstrap.byPath == nil { + bundledExtensionBootstrap.byPath = make(map[string]error) + } + if err, ok := bundledExtensionBootstrap.byPath[extDir]; ok { + return err + } + + err := seedBundledExtensions(bundledDuckDBExtensionsDir, extDir) + bundledExtensionBootstrap.byPath[extDir] = err + return err +} + // passwordPattern matches password= or password: with quoted or unquoted values. var passwordPattern = regexp.MustCompile(`(?i)(password\s*[=:]\s*)("[^"]*"|[^\s"]+)`) @@ -477,6 +499,10 @@ func New(cfg Config) (*Server, error) { ensureDuckLakeMigrationCheck(cfg.DuckLake, cfg.DataDir) } + if err := bootstrapBundledExtensions(cfg.DataDir); err != nil { + slog.Warn("Failed to bootstrap bundled DuckDB extensions.", "source", bundledDuckDBExtensionsDir, "extension_directory", filepath.Join(cfg.DataDir, "extensions"), "error", err) + } + // Initialize query logger (non-fatal on error) if ql, err := NewQueryLogger(cfg); err != nil { slog.Warn("Failed to initialize query log, continuing without it.", "error", err) @@ -873,8 +899,8 @@ func openBaseDB(cfg Config, username string) (*sql.DB, error) { // Set extension directory under DataDir so DuckDB doesn't rely on $HOME/.duckdb // for autoloading/installing extensions. extDir := filepath.Join(cfg.DataDir, "extensions") - if err := configureExtensionDirectory(db, bundledDuckDBExtensionsDir, extDir, "openBaseDB"); err != nil { - slog.Warn("Failed to configure DuckDB extension_directory.", "extension_directory", extDir, "error", err) + if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { + slog.Warn("Failed to set DuckDB extension_directory.", "extension_directory", extDir, "error", err) } else { slog.Debug("Set DuckDB extension_directory.", "extension_directory", extDir) } @@ -976,16 +1002,6 @@ func seedBundledExtensions(srcRoot, dstRoot string) error { }) } -func configureExtensionDirectory(db *sql.DB, bundledSrcRoot, extDir, scope string) error { - if err := seedBundledExtensions(bundledSrcRoot, extDir); err != nil { - return fmt.Errorf("%s: seed bundled extensions: %w", scope, err) - } - if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { - return fmt.Errorf("%s: set extension_directory: %w", scope, err) - } - return nil -} - func shouldRefreshBundledExtension(srcPath string) bool { return filepath.Base(srcPath) == "postgres_scanner.duckdb_extension" } diff --git a/server/shell.go b/server/shell.go index 0707bf2..9619b9b 100644 --- a/server/shell.go +++ b/server/shell.go @@ -8,6 +8,7 @@ import ( "log/slog" "os" "os/signal" + "path/filepath" "strings" "sync" "syscall" @@ -18,6 +19,9 @@ import ( // DuckLake, and pg_catalog views are all available. func RunShell(cfg Config) { sem := make(chan struct{}, 1) + if err := bootstrapBundledExtensions(cfg.DataDir); err != nil { + slog.Warn("Failed to bootstrap bundled DuckDB extensions.", "source", bundledDuckDBExtensionsDir, "extension_directory", filepath.Join(cfg.DataDir, "extensions"), "error", err) + } db, err := CreateDBConnection(cfg, sem, "shell", processStartTime, processVersion) if err != nil { diff --git a/server/worker.go b/server/worker.go index f6467c2..624d4d0 100644 --- a/server/worker.go +++ b/server/worker.go @@ -13,6 +13,7 @@ import ( "net" "os" "os/signal" + "path/filepath" "sync" "syscall" "time" @@ -293,6 +294,10 @@ func runChildWorker(tcpConn *net.TCPConn, cfg *ChildConfig) int { parentVersion = cfg.ServerVersion } + if err := bootstrapBundledExtensions(serverCfg.DataDir); err != nil { + slog.Warn("Failed to bootstrap bundled DuckDB extensions.", "source", bundledDuckDBExtensionsDir, "extension_directory", filepath.Join(serverCfg.DataDir, "extensions"), "error", err) + } + // Create DuckDB connection db, err := CreateDBConnection(serverCfg, make(chan struct{}, 1), username, parentStartTime, parentVersion) if err != nil { @@ -382,4 +387,3 @@ func runChildWorker(tcpConn *net.TCPConn, cfg *ChildConfig) int { return ExitSuccess } } - From 8280d31f595669e5f8ab6c5128834efbc5785966 Mon Sep 17 00:00:00 2001 From: Bill Guowei Yang Date: Wed, 22 Apr 2026 17:14:02 -0400 Subject: [PATCH 8/8] remove stale nightly rollout leftovers --- server/bundled_extensions_test.go | 27 --------------------------- tests/integration/harness.go | 2 -- 2 files changed, 29 deletions(-) diff --git a/server/bundled_extensions_test.go b/server/bundled_extensions_test.go index 567611b..a2c18f3 100644 --- a/server/bundled_extensions_test.go +++ b/server/bundled_extensions_test.go @@ -1,14 +1,10 @@ package server import ( - "database/sql" - "fmt" "os" "path/filepath" "sync" "testing" - - _ "github.com/duckdb/duckdb-go/v2" ) func TestSeedBundledExtensionsCopiesMissingFiles(t *testing.T) { @@ -230,26 +226,3 @@ func TestBootstrapBundledExtensionsRunsOncePerExtensionDirectory(t *testing.T) { t.Fatalf("expected bootstrap to run once, got %q", string(got)) } } - -func TestSetExtensionDirectoryUsesPreparedCache(t *testing.T) { - extDir := t.TempDir() - - db, err := sql.Open("duckdb", ":memory:") - if err != nil { - t.Fatalf("open duckdb: %v", err) - } - defer func() { _ = db.Close() }() - - if _, err := db.Exec(fmt.Sprintf("SET extension_directory = '%s'", extDir)); err != nil { - t.Fatalf("set extension_directory: %v", err) - } - - var got string - row := db.QueryRow("SELECT current_setting('extension_directory')") - if err := row.Scan(&got); err != nil { - t.Fatalf("scan extension_directory: %v", err) - } - if got != extDir { - t.Fatalf("extension_directory = %q, want %q", got, extDir) - } -} diff --git a/tests/integration/harness.go b/tests/integration/harness.go index 54064cc..0454d01 100644 --- a/tests/integration/harness.go +++ b/tests/integration/harness.go @@ -148,8 +148,6 @@ func (h *TestHarness) startDuckgres(harnessCfg HarnessConfig) error { // Configure DuckLake if enabled if harnessCfg.UseDuckLake { - cfg.Extensions = []string{"ducklake"} - metadataPort := harnessCfg.DuckLakeMetadataPort // If latency injection is requested, start a TCP proxy in front of the