diff --git a/e2e-tests/run-pr.csv b/e2e-tests/run-pr.csv index 110dff48e..08617ee7e 100644 --- a/e2e-tests/run-pr.csv +++ b/e2e-tests/run-pr.csv @@ -33,3 +33,6 @@ telemetry-transfer upgrade-consistency upgrade-minor users +migration-from-crunchy-standby +migration-from-crunchy-pv +migration-from-crunchy-backup-restore diff --git a/e2e-tests/tests/migration-from-crunchy-backup-restore/10-assert.yaml b/e2e-tests/tests/migration-from-crunchy-backup-restore/10-assert.yaml new file mode 100644 index 000000000..4e59a300d --- /dev/null +++ b/e2e-tests/tests/migration-from-crunchy-backup-restore/10-assert.yaml @@ -0,0 +1,7 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +commands: + - script: |- + set -o errexit + kubectl get configmap -n "${NAMESPACE}" 10-second-batch-written diff --git a/e2e-tests/tests/migration-from-crunchy-backup-restore/10-write-more-data.yaml b/e2e-tests/tests/migration-from-crunchy-backup-restore/10-write-more-data.yaml new file mode 100644 index 000000000..4cc4ec597 --- /dev/null +++ b/e2e-tests/tests/migration-from-crunchy-backup-restore/10-write-more-data.yaml @@ -0,0 +1,38 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + primary=$(kubectl get pod -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=percona-restored,postgres-operator.crunchydata.com/role=primary \ + -o jsonpath='{.items[0].metadata.name}') + + if [ -z "${primary}" ]; then + echo "ERROR: primary pod not found" + exit 1 + fi + + # Insert the second batch of rows that must survive the upcoming PITR. + # The PITR target is captured in step 12 AFTER the step 11 backup + # completes, so the backup's start time is guaranteed to be before the + # target and pgBackRest can use it for the restore. + kubectl exec -n "${NAMESPACE}" "${primary}" -c database -- \ + psql -d migrationtest -c " + INSERT INTO migration_data VALUES + (5, 'second-batch-one'), + (6, 'second-batch-two'), + (7, 'second-batch-three'); + " + + # Force a WAL switch so the inserted rows reach the archive before the + # step 11 backup starts. + kubectl exec -n "${NAMESPACE}" "${primary}" -c database -- \ + psql -q -c "SELECT pg_switch_wal();" + + kubectl create configmap -n "${NAMESPACE}" 10-second-batch-written \ + --from-literal=rows="5,6,7" + timeout: 120 diff --git a/e2e-tests/tests/migration-from-crunchy-backup-restore/11-assert.yaml b/e2e-tests/tests/migration-from-crunchy-backup-restore/11-assert.yaml new file mode 100644 index 000000000..d0a763fdd --- /dev/null +++ b/e2e-tests/tests/migration-from-crunchy-backup-restore/11-assert.yaml @@ -0,0 +1,29 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 560 +--- +kind: Job +apiVersion: batch/v1 +metadata: + annotations: + postgres-operator.crunchydata.com/pgbackrest-backup: second-post-restore-backup + labels: + postgres-operator.crunchydata.com/pgbackrest-backup: manual + postgres-operator.crunchydata.com/pgbackrest-repo: repo1 + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGBackup + controller: true + blockOwnerDeletion: true +status: + succeeded: 1 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGBackup +metadata: + name: second-post-restore-backup +spec: + pgCluster: percona-restored + repoName: repo1 +status: + state: Succeeded diff --git a/e2e-tests/tests/migration-from-crunchy-backup-restore/11-backup.yaml b/e2e-tests/tests/migration-from-crunchy-backup-restore/11-backup.yaml new file mode 100644 index 000000000..04c35a2c2 --- /dev/null +++ b/e2e-tests/tests/migration-from-crunchy-backup-restore/11-backup.yaml @@ -0,0 +1,9 @@ +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGBackup +metadata: + name: second-post-restore-backup +spec: + pgCluster: percona-restored + repoName: repo1 + options: + - --type=full diff --git a/e2e-tests/tests/migration-from-crunchy-backup-restore/12-assert.yaml b/e2e-tests/tests/migration-from-crunchy-backup-restore/12-assert.yaml new file mode 100644 index 000000000..e6679fb7e --- /dev/null +++ b/e2e-tests/tests/migration-from-crunchy-backup-restore/12-assert.yaml @@ -0,0 +1,60 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGRestore +metadata: + name: second-pitr-restore +status: + state: Succeeded +--- +# One StatefulSet per pod; readyReplicas=1 each. Aggregate validated below. +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + postgres-operator.crunchydata.com/cluster: percona-restored + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/instance-set: instance1 + ownerReferences: + - apiVersion: upstream.pgv2.percona.com/v1beta1 + kind: PostgresCluster + name: percona-restored + controller: true + blockOwnerDeletion: true +status: + availableReplicas: 1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: upstream.pgv2.percona.com/v1beta1 +kind: PostgresCluster +metadata: + name: percona-restored +status: + instances: + - name: instance1 + readyReplicas: 3 + replicas: 3 + updatedReplicas: 3 + pgbackrest: + restore: + finished: true + id: second-pitr-restore + succeeded: 1 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: percona-restored +status: + postgres: + instances: + - name: instance1 + ready: 3 + size: 3 + ready: 3 + size: 3 + state: ready diff --git a/e2e-tests/tests/migration-from-crunchy-backup-restore/12-restore.yaml b/e2e-tests/tests/migration-from-crunchy-backup-restore/12-restore.yaml new file mode 100644 index 000000000..821898cf7 --- /dev/null +++ b/e2e-tests/tests/migration-from-crunchy-backup-restore/12-restore.yaml @@ -0,0 +1,73 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + # No scale-down needed: the cluster is on TL3 and the stanza existed + # before this timeline. When postgres promotes TL3→TL4 after the PITR, + # 00000004.history is pushed by the async archiver immediately (stanza + # exists), so pg_rewind on the two replicas can trace the ancestry and + # rejoin without the scale-to-1 workaround required for step 08. + + primary=$(kubectl get pod -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=percona-restored,postgres-operator.crunchydata.com/role=primary \ + -o jsonpath='{.items[0].metadata.name}') + + if [ -z "${primary}" ]; then + echo "ERROR: primary pod not found" + exit 1 + fi + + # Capture the PITR target NOW — after the step 11 backup has completed. + # This guarantees the backup start time < target < after-target rows, + # which is the only valid ordering for a time-based pgBackRest restore. + pitr_target=$(kubectl exec -n "${NAMESPACE}" "${primary}" -c database -- \ + psql -q -t -c "SELECT to_char(clock_timestamp(), 'YYYY-MM-DD HH24:MI:SS')" \ + | xargs) + echo "PITR target: ${pitr_target}" + + # Force a WAL switch so the LSN at pitr_target is flushed to the archive + # before we write the rows that must be absent after restore. + kubectl exec -n "${NAMESPACE}" "${primary}" -c database -- \ + psql -q -c "SELECT pg_switch_wal();" + + # Write rows that must be absent after the PITR restore. + kubectl exec -n "${NAMESPACE}" "${primary}" -c database -- \ + psql -d migrationtest -c " + INSERT INTO migration_data VALUES + (8, 'after-pitr-target-one'), + (9, 'after-pitr-target-two'); + " + + # Resolve the latest full backup label (step 11's second-post-restore- + # backup). Because that backup completed before pitr_target was + # captured above, --set is safe: backup-start < pitr_target. + backup_label=$(kubectl -n "${NAMESPACE}" exec "${primary}" -- \ + pgbackrest info --output json --log-level-console=info --stanza=db \ + | jq -r '[.[] | .backup[] | select(.type == "full") | select(.database.["repo-key"] == 1)][-1].label') + + if [ -z "${backup_label}" ] || [ "${backup_label}" = "null" ]; then + echo "ERROR: could not determine latest full backup label" + exit 1 + fi + echo "Restoring from backup: ${backup_label}" + + cat </dev/null || true + kubectl -n ${NAMESPACE} delete postgrescluster upgrade-minor --ignore-not-found remove_all_finalizers check_operator_panic destroy_operator diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index 44913866c..f45be198a 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1100,7 +1100,7 @@ func (r *Reconciler) scaleUpInstances( next := naming.GenerateInstance(cluster, set) // if there are any available instance names (as determined by observing any PVCs for the // instance set that are not currently associated with an instance, e.g. in the event the - // instance STS was deleted), then reuse them instead of generating a new name + // instance STS was deleted), then reuse them instead of generating a new name. if len(availableInstanceNames) > 0 { next.Name = availableInstanceNames[0] availableInstanceNames = availableInstanceNames[1:] diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index a83804d62..4a51d9470 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -3037,6 +3037,21 @@ func (r *Reconciler) reconcileStanzaCreate(ctx context.Context, r.Recorder.Event(postgresCluster, corev1.EventTypeNormal, EventStanzasCreated, "pgBackRest stanza creation completed successfully") + // Re-push any timeline history files stranded by the async-archiver race: + // postgres archives 00000002.history during bootstrap promotion before the + // stanza exists; pgBackRest drops it silently (error 103) and postgres + // never retries. Without it pg_rewind fails on replicas after PITR. + log := logging.FromContext(ctx) + historyOut, historyErr := pgbackrest.Executor(exec).ArchivePushHistoryFiles(ctx) + if historyErr != nil { + r.Recorder.Event(postgresCluster, corev1.EventTypeWarning, + "ArchivePushHistoryFilesFailed", historyErr.Error()) + log.Error(historyErr, "timeline history file recovery failed", + "pod", writableInstanceName, "output", historyOut) + } else if historyOut != "" { + log.Info("timeline history file recovery", "output", historyOut) + } + // if no errors then stanza(s) created successfully for i := range postgresCluster.Status.PGBackRest.Repos { postgresCluster.Status.PGBackRest.Repos[i].StanzaCreated = true diff --git a/internal/pgbackrest/pgbackrest.go b/internal/pgbackrest/pgbackrest.go index afed28958..95a8cc8cc 100644 --- a/internal/pgbackrest/pgbackrest.go +++ b/internal/pgbackrest/pgbackrest.go @@ -107,3 +107,62 @@ fi return false, nil } + +// ArchivePushHistoryFiles pushes any timeline history files (*.history) found +// in pg_wal to the pgBackRest archive synchronously. +// +// During a dataSource bootstrap restore, postgres promotes from TL1 to TL2 and +// immediately hands 00000002.history to archive_command. pgBackRest's async +// archiver silently drops the push when archive.info does not yet exist (error +// 103), and postgres never retries. Without 00000002.history in the archive, +// pg_rewind on replicas fails after any subsequent PITR restore. +// +// Calling this method right after stanza-create recovers any such stranded +// files. The combined stdout+stderr is returned for structured logging. +func (exec Executor) ArchivePushHistoryFiles(ctx context.Context) (string, error) { + var stdout, stderr bytes.Buffer + + const script = ` +set -eu + +if [ -z "${PGDATA:-}" ]; then + echo "PGDATA is not set; skipping history file recovery" >&2 + exit 0 +fi + +# Use -L so find follows the pg_wal symlink ($PGDATA/pg_wal -> /pgdata/_wal). +# Use a temp file to avoid running the push loop in a subshell where set -e is ineffective. +tmplist=$(mktemp) +find -L "${PGDATA}/pg_wal" -maxdepth 1 -name '*.history' 2>/dev/null | sort > "${tmplist}" + +if [ ! -s "${tmplist}" ]; then + rm -f "${tmplist}" + exit 0 +fi + +echo "history files to push:" +cat "${tmplist}" + +push_failed=0 +while IFS= read -r f; do + if pgbackrest --stanza=db --log-level-console=info archive-push --no-archive-async "${f}"; then + echo "pushed ${f}" + else + echo "FAILED to push ${f}" >&2 + push_failed=1 + fi +done < "${tmplist}" +rm -f "${tmplist}" + +[ "${push_failed}" -eq 0 ] || exit 1 +` + err := exec(ctx, nil, &stdout, &stderr, "bash", "-ceu", "--", script) + combined := stdout.String() + if s := stderr.String(); s != "" { + combined += "\nstderr: " + s + } + if err != nil { + return combined, errors.Wrap(err, combined) + } + return combined, nil +}