diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b41015a5..be735903 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -301,8 +301,8 @@ jobs: make test-e2e " - e2e-install-smoke: - name: E2E Install Smoke (${{ matrix.scenario }}) + e2e-install-quickstart: + name: E2E Install Quickstart (${{ matrix.scenario }}) runs-on: ubuntu-latest needs: [build-ci-container, docker-build, lint-helm] strategy: @@ -350,9 +350,9 @@ jobs: run: | echo "${{ secrets.GITHUB_TOKEN }}" | docker login ${{ env.REGISTRY }} -u ${{ github.actor }} --password-stdin - - name: Run install smoke test in CI container + - name: Run install quickstart smoke test in CI container run: | - TARGET="test-e2e-install-${{ matrix.scenario }}" + TARGET="test-e2e-quickstart-${{ matrix.scenario }}" docker run --rm \ --network host \ -v "${GITHUB_WORKSPACE}:${{ env.CI_WORKDIR }}" \ @@ -372,7 +372,7 @@ jobs: name: Release Please runs-on: ubuntu-latest if: github.event_name == 'push' && github.ref == 'refs/heads/main' - needs: [lint-helm, lint, test, e2e-test, e2e-install-smoke, validate-devcontainer] + needs: [lint-helm, lint, test, e2e-test, e2e-install-quickstart, validate-devcontainer] outputs: release_created: ${{ steps.release.outputs.release_created }} tag_name: ${{ steps.release.outputs.tag_name }} @@ -513,7 +513,7 @@ jobs: publish-helm: name: Publish Helm Chart runs-on: ubuntu-latest - needs: [build-ci-container, e2e-install-smoke, release-please] + needs: [build-ci-container, e2e-install-quickstart, release-please] if: needs.release-please.outputs.release_created == 'true' container: image: ${{ needs.build-ci-container.outputs.image }} diff --git a/.kilocode/rules/implementation-rules.md b/AGENTS.md similarity index 100% rename from .kilocode/rules/implementation-rules.md rename to AGENTS.md diff --git a/Makefile b/Makefile index 50d8a192..fade3a53 100644 --- a/Makefile +++ b/Makefile @@ -306,9 +306,9 @@ wait-cert-manager: ## Wait for cert-manager pods to become ready check-cert-manager: wait-cert-manager ## Explicit readiness check for cert-manager @echo "✅ cert-manager check passed" -## Smoke test: install from local Helm chart and verify rollout -.PHONY: test-e2e-install -test-e2e-install: ## Smoke test install with E2E_INSTALL_MODE=helm|manifest +## Smoke test: install from local Helm chart and validate first quickstart flow +.PHONY: test-e2e-install-quickstart +test-e2e-install-quickstart: ## Install + quickstart smoke with E2E_INSTALL_MODE=helm|manifest @MODE="$(E2E_INSTALL_MODE)"; \ if [ "$$MODE" != "helm" ] && [ "$$MODE" != "manifest" ]; then \ echo "❌ Invalid E2E_INSTALL_MODE='$$MODE' (expected: helm|manifest)"; \ @@ -326,21 +326,34 @@ test-e2e-install: ## Smoke test install with E2E_INSTALL_MODE=helm|manifest echo "ℹ️ Entry point selected local fallback image: $$PROJECT_IMAGE_VALUE"; \ KIND_CLUSTER=$(KIND_CLUSTER) PROJECT_IMAGE="$$PROJECT_IMAGE_VALUE" $(MAKE) setup-cluster setup-e2e check-cert-manager e2e-build-load-image; \ fi; \ - echo "ℹ️ Running install smoke mode: $$MODE"; \ - PROJECT_IMAGE="$$PROJECT_IMAGE_VALUE" bash test/e2e/scripts/install-smoke.sh "$$MODE"; \ + echo "ℹ️ Running install quickstart smoke mode: $$MODE"; \ + PROJECT_IMAGE="$$PROJECT_IMAGE_VALUE" bash test/e2e/scripts/install-smoke-quickstart.sh "$$MODE"; \ -## Smoke test: install from local Helm chart and verify rollout -.PHONY: test-e2e-install-helm -test-e2e-install-helm: - @$(MAKE) test-e2e-install E2E_INSTALL_MODE=helm PROJECT_IMAGE="$(PROJECT_IMAGE)" KIND_CLUSTER="$(KIND_CLUSTER)" +## Smoke test: install from local Helm chart and validate first quickstart flow +.PHONY: test-e2e-install-quickstart-helm +test-e2e-install-quickstart-helm: + @$(MAKE) test-e2e-install-quickstart E2E_INSTALL_MODE=helm PROJECT_IMAGE="$(PROJECT_IMAGE)" KIND_CLUSTER="$(KIND_CLUSTER)" -## Smoke test: install from generated dist/install.yaml and verify rollout -.PHONY: test-e2e-install-manifest -test-e2e-install-manifest: +## Smoke test: install from generated dist/install.yaml and validate first quickstart flow +.PHONY: test-e2e-install-quickstart-manifest +test-e2e-install-quickstart-manifest: @if [ -n "$(PROJECT_IMAGE)" ]; then \ - echo "ℹ️ test-e2e-install-manifest using existing artifact (PROJECT_IMAGE set, CI/pre-built path)"; \ + echo "ℹ️ test-e2e-install-quickstart-manifest using existing artifact (PROJECT_IMAGE set, CI/pre-built path)"; \ else \ - echo "ℹ️ test-e2e-install-manifest local path: regenerating dist/install.yaml via build-installer"; \ + echo "ℹ️ test-e2e-install-quickstart-manifest local path: regenerating dist/install.yaml via build-installer"; \ $(MAKE) build-installer; \ fi - @$(MAKE) test-e2e-install E2E_INSTALL_MODE=manifest PROJECT_IMAGE="$(PROJECT_IMAGE)" KIND_CLUSTER="$(KIND_CLUSTER)" + @$(MAKE) test-e2e-install-quickstart E2E_INSTALL_MODE=manifest PROJECT_IMAGE="$(PROJECT_IMAGE)" KIND_CLUSTER="$(KIND_CLUSTER)" + +## Backward-compatible aliases (kept for one release cycle) +.PHONY: test-e2e-install +test-e2e-install: ## Alias for test-e2e-install-quickstart + @$(MAKE) test-e2e-install-quickstart E2E_INSTALL_MODE="$(E2E_INSTALL_MODE)" PROJECT_IMAGE="$(PROJECT_IMAGE)" KIND_CLUSTER="$(KIND_CLUSTER)" + +.PHONY: test-e2e-install-helm +test-e2e-install-helm: ## Alias for test-e2e-install-quickstart-helm + @$(MAKE) test-e2e-install-quickstart-helm PROJECT_IMAGE="$(PROJECT_IMAGE)" KIND_CLUSTER="$(KIND_CLUSTER)" + +.PHONY: test-e2e-install-manifest +test-e2e-install-manifest: ## Alias for test-e2e-install-quickstart-manifest + @$(MAKE) test-e2e-install-quickstart-manifest PROJECT_IMAGE="$(PROJECT_IMAGE)" KIND_CLUSTER="$(KIND_CLUSTER)" diff --git a/README.md b/README.md index 08de47a6..d90604b9 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,12 @@ spec: providerRef: name: your-repo branch: test-gitops-reverser - baseFolder: live-cluster + path: live-cluster + encryption: + provider: sops + secretRef: + name: sops-age-key + generateWhenMissing: true --- apiVersion: configbutler.ai/v1alpha1 kind: WatchRule @@ -140,6 +145,12 @@ spec: EOF ``` +When `generateWhenMissing: true` is enabled, GitOps Reverser can create the encryption key Secret automatically. +Back up the generated `SOPS_AGE_KEY` immediately and securely. +If you lose that private key, existing encrypted `*.sops.yaml` files are unrecoverable. +After confirming backup, remove the warning annotation: +`kubectl annotate secret sops-age-key -n default configbutler.ai/backup-warning-` + **4. Test it:** ```bash # Create a test ConfigMap @@ -148,7 +159,8 @@ kubectl create configmap test-config --from-literal=key=value -n default # Check your Git repository - you should see a new commit with the ConfigMap YAML ``` -For cluster-wide resources (nodes, CRDs, etc.) or watching multiple namespaces, use [`ClusterWatchRule`](config/samples/clusterwatchrule.yaml). More examples in [`config/samples/`](config/samples/). +For cluster-wide resources (nodes, CRDs, etc.) or watching multiple namespaces, use +[`ClusterWatchRule`](config/samples/clusterwatchrule.yaml). More examples in [`config/samples/`](config/samples/). ## Usage guidance @@ -166,6 +178,14 @@ Avoid infinite loops: Do not point GitOps (Argo CD/Flux) and GitOps Reverser at - The container image ships with `/usr/local/bin/sops`. - Per-path `.sops.yaml` files are bootstrapped in the target repo for SOPS-based secret encryption. - If Secret encryption fails, Secret writes are rejected (no plaintext fallback). + - `GitTarget.spec.encryption.generateWhenMissing: true` can auto-generate the referenced encryption Secret when it does not exist. + - Generated Secret data contains one `SOPS_AGE_KEY` (`AGE-SECRET-KEY-...`). + - Generated Secret annotation `configbutler.ai/age-recipient` stores the public age recipient. + - Generated Secret annotation `configbutler.ai/backup-warning: REMOVE_AFTER_BACKUP` is set by default. + - While `configbutler.ai/backup-warning` remains, gitops-reverser logs a recurring high-visibility backup warning during periodic reconciliation. + - WARNING: backup generated private keys immediately and securely. Losing the key means existing encrypted `*.sops.yaml` files cannot be decrypted. + - After backup, remove the warning annotation: + - `kubectl annotate secret -n configbutler.ai/backup-warning-` - Avoid multiple GitProvider configurations pointing at the same repo to prevent queue collisions. - Queue collisions are possible when multiple configs target the same repository (so don't do that). diff --git a/api/v1alpha1/gitprovider_types.go b/api/v1alpha1/gitprovider_types.go index 504bb859..0adb11cd 100644 --- a/api/v1alpha1/gitprovider_types.go +++ b/api/v1alpha1/gitprovider_types.go @@ -66,6 +66,12 @@ type EncryptionSpec struct { // SecretRef references namespace-local Secret data used by the encryption provider. SecretRef LocalSecretReference `json:"secretRef"` + + // GenerateWhenMissing auto-creates the referenced Secret when it does not exist. + // The generated Secret contains one age private key in SOPS_AGE_KEY. + // +optional + // +kubebuilder:default=false + GenerateWhenMissing bool `json:"generateWhenMissing,omitempty"` } // GitProviderStatus defines the observed state of GitProvider. diff --git a/api/v1alpha1/gittarget_types.go b/api/v1alpha1/gittarget_types.go index ed741b74..83391959 100644 --- a/api/v1alpha1/gittarget_types.go +++ b/api/v1alpha1/gittarget_types.go @@ -63,12 +63,20 @@ type GitTargetSpec struct { // GitTargetStatus defines the observed state of GitTarget. type GitTargetStatus struct { + // ObservedGeneration is the latest generation observed by the controller. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // Conditions represent the latest available observations of an object's state // +optional // +patchMergeKey=type // +patchStrategy=merge Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` + // LastReconcileTime is the timestamp of the most recent reconcile attempt. + // +optional + LastReconcileTime metav1.Time `json:"lastReconcileTime,omitempty"` + // LastCommit is the SHA of the last commit processed. // +optional LastCommit string `json:"lastCommit,omitempty"` @@ -76,6 +84,36 @@ type GitTargetStatus struct { // LastPushTime is the timestamp of the last successful push. // +optional LastPushTime *metav1.Time `json:"lastPushTime,omitempty"` + + // Snapshot captures the latest initial snapshot reconciliation details. + // +optional + Snapshot *GitTargetSnapshotStatus `json:"snapshot,omitempty"` +} + +// GitTargetSnapshotStatus captures initial snapshot progress details. +type GitTargetSnapshotStatus struct { + // LastCompletedTime is the timestamp of the latest completed snapshot reconciliation. + // +optional + LastCompletedTime *metav1.Time `json:"lastCompletedTime,omitempty"` + + // Stats records counts from the latest snapshot reconciliation diff. + // +optional + Stats GitTargetSnapshotStats `json:"stats,omitempty"` +} + +// GitTargetSnapshotStats records create/update/delete counts for snapshot sync. +type GitTargetSnapshotStats struct { + // Created is the number of resources created in Git during snapshot sync. + // +optional + Created int32 `json:"created,omitempty"` + + // Updated is the number of existing resources reconciled during snapshot sync. + // +optional + Updated int32 `json:"updated,omitempty"` + + // Deleted is the number of stale resources deleted from Git during snapshot sync. + // +optional + Deleted int32 `json:"deleted,omitempty"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index a2c45247..2251c4e7 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -367,6 +367,41 @@ func (in *GitTargetList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GitTargetSnapshotStats) DeepCopyInto(out *GitTargetSnapshotStats) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitTargetSnapshotStats. +func (in *GitTargetSnapshotStats) DeepCopy() *GitTargetSnapshotStats { + if in == nil { + return nil + } + out := new(GitTargetSnapshotStats) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GitTargetSnapshotStatus) DeepCopyInto(out *GitTargetSnapshotStatus) { + *out = *in + if in.LastCompletedTime != nil { + in, out := &in.LastCompletedTime, &out.LastCompletedTime + *out = (*in).DeepCopy() + } + out.Stats = in.Stats +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitTargetSnapshotStatus. +func (in *GitTargetSnapshotStatus) DeepCopy() *GitTargetSnapshotStatus { + if in == nil { + return nil + } + out := new(GitTargetSnapshotStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GitTargetSpec) DeepCopyInto(out *GitTargetSpec) { *out = *in @@ -398,10 +433,16 @@ func (in *GitTargetStatus) DeepCopyInto(out *GitTargetStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + in.LastReconcileTime.DeepCopyInto(&out.LastReconcileTime) if in.LastPushTime != nil { in, out := &in.LastPushTime, &out.LastPushTime *out = (*in).DeepCopy() } + if in.Snapshot != nil { + in, out := &in.Snapshot, &out.Snapshot + *out = new(GitTargetSnapshotStatus) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GitTargetStatus. diff --git a/charts/gitops-reverser/README.md b/charts/gitops-reverser/README.md index d3cd37a9..21023ceb 100644 --- a/charts/gitops-reverser/README.md +++ b/charts/gitops-reverser/README.md @@ -1,6 +1,7 @@ # GitOps Reverser Helm Chart -GitOps Reverser enables synchronization from Kubernetes to one or more Git repositories. This Helm chart provides a production-ready deployment with High Availability (HA) by default. +GitOps Reverser enables synchronization from Kubernetes to one or more Git repositories. +This Helm chart provides a production-ready single-pod deployment. ## Quick Start @@ -21,13 +22,32 @@ helm install gitops-reverser \ kubectl get pods -n gitops-reverser-system ``` -That's it! The controller is now running and ready to synchronize your Kubernetes resources with Git. +Controller install is complete. Next, create a minimal provider/target/rule to validate first commit flow: + +```bash +kubectl apply -f config/samples/quickstart-gitprovider.yaml +kubectl apply -f config/samples/quickstart-gittarget.yaml +kubectl apply -f config/samples/quickstart-watchrule.yaml +``` + +The quickstart target uses: +- `spec.path` (not `baseFolder`) +- `spec.encryption.provider: sops` +- `spec.encryption.generateWhenMissing: true` + +When the encryption Secret is auto-generated, back up `SOPS_AGE_KEY` immediately. +If you lose it, existing encrypted `*.sops.yaml` files are unrecoverable. +After backup verification, remove warning annotation: + +```bash +kubectl annotate secret sops-age-key -n default configbutler.ai/backup-warning- +``` ## Features - ✅ **Two-way Git synchronization**: Push Kubernetes changes back to Git repositories -- ✅ **High Availability**: 2 replicas with leader election by default -- ✅ **Automatic CRD installation**: GitRepoConfig and WatchRule CRDs installed automatically +- ✅ **Single-pod stability**: 1 replica by default while multi-pod support is in progress +- ✅ **Automatic CRD installation**: GitProvider, GitTarget, WatchRule, and ClusterWatchRule CRDs installed automatically - ✅ **Webhook support**: Watch all Kubernetes resources for changes - ✅ **Production-ready**: Pod disruption budgets, anti-affinity, and resource limits - ✅ **Certificate management**: Automatic TLS via cert-manager @@ -97,7 +117,7 @@ The chart deploys 1 replica by default: ``` **Key Features:** -- **Single-pod operation**: Minimal moving parts while HA work is deferred +- **Single-pod operation**: minimal moving parts while HA work is deferred - **Single Service topology**: admission, audit, and metrics on one Service ## Configuration @@ -111,7 +131,6 @@ Single replica: ```yaml # minimal-values.yaml replicaCount: 1 -controllerManager: podDisruptionBudget: enabled: false affinity: {} @@ -215,8 +234,10 @@ The bare path `/audit-webhook` is rejected. Use a non-empty cluster ID segment. This chart automatically manages the following CRDs: -- **`gitrepoconfigs.configbutler.ai`** - Git repository configurations for synchronization -- **`watchrules.configbutler.ai`** - Rules for watching Kubernetes resources +- **`gitproviders.configbutler.ai`** - Git repository connectivity and credentials +- **`gittargets.configbutler.ai`** - Branch/path and optional encryption configuration +- **`watchrules.configbutler.ai`** - Namespaced watch rules +- **`clusterwatchrules.configbutler.ai`** - Cluster-scoped watch rules ### CRD Lifecycle @@ -229,7 +250,7 @@ This chart automatically manages the following CRDs: To manually remove CRDs after uninstallation: ```bash -kubectl delete crd gitrepoconfigs.configbutler.ai watchrules.configbutler.ai +kubectl delete crd gitproviders.configbutler.ai gittargets.configbutler.ai watchrules.configbutler.ai clusterwatchrules.configbutler.ai ``` > ⚠️ **Warning**: Deleting CRDs will also delete all custom resources of those types! @@ -291,7 +312,7 @@ helm upgrade gitops-reverser \ If upgrading from earlier chart versions: - Single-replica is the default during the current simplified topology phase -- Leader election now enabled by default (required for HA) +- Leader election remains enabled for safe future multi-pod evolution - Health probe port changed to 8081 - Certificate secret names are auto-generated @@ -305,7 +326,7 @@ helm uninstall gitops-reverser --namespace gitops-reverser-system kubectl delete namespace gitops-reverser-system # Delete CRDs (optional, but removes all custom resources) -kubectl delete crd gitrepoconfigs.configbutler.ai watchrules.configbutler.ai +kubectl delete crd gitproviders.configbutler.ai gittargets.configbutler.ai watchrules.configbutler.ai clusterwatchrules.configbutler.ai ``` ## Troubleshooting diff --git a/cmd/main.go b/cmd/main.go index 880189be..3f147bf8 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -161,6 +161,7 @@ func main() { ctrl.Log.WithName("event-router"), ) setupLog.Info("EventRouter initialized") + reconcilerManager.SetEventRouter(eventRouter) // Set EventRouter reference in WatchManager watchMgr.EventRouter = eventRouter diff --git a/config/crd/bases/configbutler.ai_gittargets.yaml b/config/crd/bases/configbutler.ai_gittargets.yaml index aa159c5a..91139858 100644 --- a/config/crd/bases/configbutler.ai_gittargets.yaml +++ b/config/crd/bases/configbutler.ai_gittargets.yaml @@ -48,6 +48,12 @@ spec: description: Encryption defines encryption settings for Secret resource writes. properties: + generateWhenMissing: + default: false + description: |- + GenerateWhenMissing auto-creates the referenced Secret when it does not exist. + The generated Secret contains one age private key in SOPS_AGE_KEY. + type: boolean provider: default: sops description: Provider selects the encryption provider. @@ -174,6 +180,46 @@ spec: push. format: date-time type: string + lastReconcileTime: + description: LastReconcileTime is the timestamp of the most recent + reconcile attempt. + format: date-time + type: string + observedGeneration: + description: ObservedGeneration is the latest generation observed + by the controller. + format: int64 + type: integer + snapshot: + description: Snapshot captures the latest initial snapshot reconciliation + details. + properties: + lastCompletedTime: + description: LastCompletedTime is the timestamp of the latest + completed snapshot reconciliation. + format: date-time + type: string + stats: + description: Stats records counts from the latest snapshot reconciliation + diff. + properties: + created: + description: Created is the number of resources created in + Git during snapshot sync. + format: int32 + type: integer + deleted: + description: Deleted is the number of stale resources deleted + from Git during snapshot sync. + format: int32 + type: integer + updated: + description: Updated is the number of existing resources reconciled + during snapshot sync. + format: int32 + type: integer + type: object + type: object type: object required: - spec diff --git a/config/rbac/gitops-reverser-manager-role.yaml b/config/rbac/gitops-reverser-manager-role.yaml index ca36f738..04b78f14 100644 --- a/config/rbac/gitops-reverser-manager-role.yaml +++ b/config/rbac/gitops-reverser-manager-role.yaml @@ -7,8 +7,16 @@ rules: - "" resources: - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: - secrets verbs: + - create - get - list - watch diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c4e3993e..512be9a1 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -8,8 +8,16 @@ rules: - "" resources: - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: - secrets verbs: + - create - get - list - watch diff --git a/config/samples/README.md b/config/samples/README.md new file mode 100644 index 00000000..b2171c70 --- /dev/null +++ b/config/samples/README.md @@ -0,0 +1,8 @@ +# Samples + +These samples are quick starting points for common GitOps Reverser setups. + +- `quickstart-gitprovider.yaml`: Minimal `GitProvider` with credentials. +- `quickstart-gittarget.yaml`: Minimal `GitTarget` using `spec.path` and SOPS encryption auto-generation. +- `quickstart-watchrule.yaml`: Minimal `WatchRule` for ConfigMaps. +- `clusterwatchrule.yaml`: Minimal `ClusterWatchRule` for cluster-scoped resources. diff --git a/config/samples/clusterwatchrule.yaml b/config/samples/clusterwatchrule.yaml new file mode 100644 index 00000000..8e3a2536 --- /dev/null +++ b/config/samples/clusterwatchrule.yaml @@ -0,0 +1,14 @@ +apiVersion: configbutler.ai/v1alpha1 +kind: ClusterWatchRule +metadata: + name: example-clusterwatchrule +spec: + targetRef: + name: example-target + namespace: default + rules: + - scope: Cluster + operations: [CREATE, UPDATE, DELETE] + apiGroups: ["apiextensions.k8s.io"] + apiVersions: ["v1"] + resources: ["customresourcedefinitions"] diff --git a/config/samples/quickstart-gitprovider.yaml b/config/samples/quickstart-gitprovider.yaml new file mode 100644 index 00000000..34834f98 --- /dev/null +++ b/config/samples/quickstart-gitprovider.yaml @@ -0,0 +1,14 @@ +apiVersion: configbutler.ai/v1alpha1 +kind: GitProvider +metadata: + name: example-provider + namespace: default +spec: + # Replace with your own repository URL + url: "git@github.com:YOUR_USERNAME/my-k8s-audit.git" + allowedBranches: ["*"] + secretRef: + name: git-creds + push: + interval: "5s" + maxCommits: 10 diff --git a/config/samples/quickstart-gittarget.yaml b/config/samples/quickstart-gittarget.yaml new file mode 100644 index 00000000..16eba113 --- /dev/null +++ b/config/samples/quickstart-gittarget.yaml @@ -0,0 +1,15 @@ +apiVersion: configbutler.ai/v1alpha1 +kind: GitTarget +metadata: + name: example-target + namespace: default +spec: + providerRef: + name: example-provider + branch: main + path: live-cluster + encryption: + provider: sops + secretRef: + name: sops-age-key + generateWhenMissing: true diff --git a/config/samples/quickstart-watchrule.yaml b/config/samples/quickstart-watchrule.yaml new file mode 100644 index 00000000..e6cdfc93 --- /dev/null +++ b/config/samples/quickstart-watchrule.yaml @@ -0,0 +1,13 @@ +apiVersion: configbutler.ai/v1alpha1 +kind: WatchRule +metadata: + name: example-watchrule + namespace: default +spec: + targetRef: + name: example-target + rules: + - operations: [CREATE, UPDATE, DELETE] + apiGroups: [""] + apiVersions: ["v1"] + resources: ["configmaps"] diff --git a/docs/SOPS_AGE_GUIDE.md b/docs/SOPS_AGE_GUIDE.md index d507fef3..50b2b183 100644 --- a/docs/SOPS_AGE_GUIDE.md +++ b/docs/SOPS_AGE_GUIDE.md @@ -103,6 +103,29 @@ kubectl -n sut create secret generic sops-age-key \ Then reference it from `GitTarget.spec.encryption.secretRef.name`. +Optional bootstrap mode: + +```yaml +spec: + encryption: + provider: sops + secretRef: + name: sops-age-key + generateWhenMissing: true +``` + +When enabled and the secret is missing, gitops-reverser generates one age key and +creates the secret. Generated secrets include: +- `configbutler.ai/age-recipient: age1...` +- `configbutler.ai/backup-warning: REMOVE_AFTER_BACKUP` + +Important: back up the generated private key immediately. Remove the warning +annotation after backup: + +```bash +kubectl annotate secret sops-age-key -n configbutler.ai/backup-warning- +``` + ## Rotation (new recipient/key) 1. Generate new keypair. @@ -126,3 +149,8 @@ The bootstrap template currently contains a static recipient in - This is a public recipient, not a private key. - It is not auto-replaced by the controller. - If you need your own keys, update committed `.sops.yaml` in the repo path and re-wrap files with `sops updatekeys`. + +## Design plan: `generateWhenMissing` + +Moved to a dedicated document: [`docs/SOPS_GENERATE_WHEN_MISSING_PLAN.md`](docs/SOPS_GENERATE_WHEN_MISSING_PLAN.md) + diff --git a/docs/SOPS_GENERATE_WHEN_MISSING_PLAN.md b/docs/SOPS_GENERATE_WHEN_MISSING_PLAN.md new file mode 100644 index 00000000..d05ccbb6 --- /dev/null +++ b/docs/SOPS_GENERATE_WHEN_MISSING_PLAN.md @@ -0,0 +1,124 @@ +# Design plan: `generateWhenMissing` + +Goal: make first-time SOPS setup easier by generating an age private key when +the configured encryption Secret does not exist, while keeping failure behavior +safe and explicit. + +## Scope and non-goals + +In scope: +- SOPS provider with age key in `SOPS_AGE_KEY`. +- Automatic creation of the runtime encryption Secret when missing. +- Status/conditions so operators can see whether key bootstrap succeeded. + +Out of scope: +- Automatic recipient rotation and mass re-wrap of existing `*.sops.yaml` files. +- Backup escrow implementation (we can integrate with it later). + +## API changes + +Add fields to `EncryptionSpec`: + +```yaml +spec: + encryption: + provider: sops + secretRef: + name: sops-age-key + generateWhenMissing: false +``` + +Optional extension for safety policy: + +```yaml +spec: + encryption: + generateWhenMissing: true +``` + +Rules: +- Default is `false` (BYOK remains default). +- If `true`, controller may create `secretRef.name` only when not found. +- Existing valid Secret is never overwritten. + +## Reconcile flow + +1. GitTarget reconcile validates provider/branch as today. +2. If encryption is configured and `generateWhenMissing=true`: + - Fetch `secretRef.name` in GitTarget namespace. + - If missing, generate one age identity and create Secret with `SOPS_AGE_KEY`. + - If already exists, use as-is. +3. Continue normal worker registration/bootstrap. Existing logic already derives + the age recipient from `SOPS_AGE_KEY` and renders `.sops.yaml`. + +Key design choice: +- Do key generation in the GitTarget controller reconcile path, not in worker + write path. This keeps lifecycle/status visible and avoids hidden side effects + during event processing. + +## Secret contract + +Generated Secret shape: +- `type: Opaque` +- `data["SOPS_AGE_KEY"]`: exactly one `AGE-SECRET-KEY-...` identity + +Recommended metadata: +- `configbutler.ai/age-recipient: age1...` (public metadata, useful for audit) +- `configbutler.ai/backup-warning: REMOVE_AFTER_BACKUP` + - As long as this annotation exists, controller prints a recurring high-visibility + warning during reconciliation (`RequeueLongInterval`, currently 10 minutes). + +## Status and observability + +Add or reuse GitTarget conditions: +- `EncryptionReady=True` when usable key is present. +- `EncryptionReady=False` with reason: + - `EncryptionSecretMissing` (and auto-generate disabled) + - `EncryptionSecretInvalid` + - `EncryptionSecretCreateFailed` + +Metrics/logging: +- Counter: generated keys total. +- Counter: generation failures total. +- Structured log on key generation with target and secret names (never log key). + +## Concurrency and idempotency + +Multiple reconciles may race. Use create-then-handle-AlreadyExists semantics: +- Attempt `Create(secret)`. +- If `AlreadyExists`, fetch and validate instead of failing. +- Never rotate or replace key during this workflow. + +This guarantees eventual success without duplicate key churn. + +## Security and recovery requirements + +Hard requirements: +- Never print key material in logs/events/status. +- RBAC should allow create/get/list/watch Secrets in target namespace. +- Document clearly: losing this Secret means old encrypted files cannot be + decrypted. + +Recommended safeguard: +- Emit a Warning event once after first generation: backup required. + +## Rotation plan (separate feature) + +Treat rotation as a dedicated workflow/tool: +1. Add new recipient to `.sops.yaml`. +2. Re-wrap (`sops updatekeys`) all encrypted files. +3. Roll out new private key. +4. Remove old recipient and re-wrap again. + +Do not couple rotation into `generateWhenMissing`; that should stay bootstrap-only. + +## Implementation phase + +Single phase (MVP + hardening): +- Add `generateWhenMissing` to API. +- Implement Secret generation + validation in GitTarget reconcile. +- Add explicit recipient annotation on generated secret. +- Add backup warning annotation (`configbutler.ai/backup-warning: REMOVE_AFTER_BACKUP`) + and recurring controller warning while the annotation remains. +- Add condition updates and tests (unit + e2e): + - missing secret + enabled flag -> generated key + successful SOPS write. diff --git a/docs/ci/E2E_FIRST_TIME_USER_EXPERIENCE_ANALYSIS.md b/docs/ci/E2E_FIRST_TIME_USER_EXPERIENCE_ANALYSIS.md new file mode 100644 index 00000000..f12753db --- /dev/null +++ b/docs/ci/E2E_FIRST_TIME_USER_EXPERIENCE_ANALYSIS.md @@ -0,0 +1,301 @@ +# E2E Pipeline Analysis and First-Time User Improvement Plan + +## Scope + +This document summarizes the current end-to-end (e2e) CI coverage and proposes improvements focused on first-time user success. + +## Current CI e2e setup + +The current CI workflow runs three e2e executions: + +1. Full e2e behavior suite (`e2e-test`, runs `make test-e2e`) +2. Install smoke via Helm (`e2e-install-smoke` matrix: `helm`) +3. Install smoke via manifest (`e2e-install-smoke` matrix: `manifest`) + +References: + +- `.github/workflows/ci.yml:249` +- `.github/workflows/ci.yml:304` + +## What each existing e2e flow validates + +### 1) Full e2e (`make test-e2e`) + +Validates broad runtime behavior including: + +- Controller/webhook readiness +- Metrics and audit webhook behavior +- GitProvider/GitTarget/WatchRule behavior +- Commit/write/update/delete paths to Git +- Secret encryption flows +- CRD/ClusterWatchRule scenarios + +Reference: + +- `test/e2e/e2e_test.go` + +Important note: + +- This suite installs via `make install` + `make deploy` inside tests, not via the README quickstart `install.yaml` path. +- Reference: `test/e2e/e2e_test.go:98` + +### 2/3) Install smoke matrix (`helm`, `manifest`) + +Validates installation and rollout health only: + +- Deployment rollout and readiness +- Pod readiness +- CRDs present +- ValidatingWebhookConfiguration present + +Reference: + +- `test/e2e/scripts/install-smoke.sh:103` + +Important note: + +- It does not validate the first-time functional journey: + - create credentials + - apply minimal GitProvider/GitTarget/WatchRule + - create resource + - verify commit in Git + +## Gaps for first-time users + +### Gap 1: Quickstart parity gap + +- README quickstart install path is release manifest: + - `kubectl apply -f .../releases/latest/download/install.yaml` + - `README.md:69` +- Full behavior e2e primarily validates kustomize deploy path: + - `test/e2e/e2e_test.go:98` + +Risk: + +- A user can follow quickstart install successfully, but still hit an untested first functional path. + +### Gap 2: First-success path not covered by install smoke + +- Install smoke stops at installation health checks. +- No assertion that a brand-new user can get first commit into Git with minimal objects. + +Reference: + +- `test/e2e/scripts/install-smoke.sh:103` + +### Gap 3: Documentation/API drift risk (`baseFolder` vs `path`) + +- README quickstart example uses `baseFolder`: + - `README.md:126` +- API currently defines `spec.path`: + - `api/v1alpha1/gittarget_types.go:55` +- e2e templates use `path`: + - `test/e2e/templates/gittarget.tmpl:11` + +Risk: + +- First-time users copying README may fail or get confusing validation behavior depending on compatibility handling. + +### Gap 4: README points to missing samples path + +- README references `config/samples/`: + - `README.md:151` +- Repository currently has no `config/samples` directory. + +Risk: + +- New users hit dead links and lose confidence early. + +### Gap 5: Existing strategy doc is only partially implemented + +- Existing design already recommends a 3-layer approach: + - Layer 1 install smoke + - Layer 2 quickstart flow with Gitea + - Layer 3 scheduled external provider check +- Current CI implements Layer 1 and full e2e, but no dedicated quickstart-flow job. + +Reference: + +- `docs/design/quickstart-flow-e2e-strategy.md` + +## Recommendation + +Replace the current install-only smoke model with an install-plus-first-success model: + +- Rename and evolve `install-smoke` into `install-smoke-quickstart`. +- Keep `helm` and `manifest` matrix modes. +- Keep existing rollout checks. +- Add minimal quickstart functional assertions in the same job. + +This keeps CI cost close to current while directly validating the first-time user path. + +## Target state + +After migration, e2e CI has: + +1. `e2e-install-quickstart` matrix (`helm`, `manifest`) as required PR gate. +2. `e2e-test` full suite as broad behavior coverage. +3. Optional scheduled external-provider quickstart check. + +## `install-smoke-quickstart` test contract + +Each matrix scenario (`helm`, `manifest`) should validate: + +1. Install and rollout: + - deployment rollout success + - pod ready + - required CRDs present + - validating webhook present +2. First-time quickstart functionality: + - create git credentials secret + - apply minimal `GitProvider` + - wait for `GitProvider` Ready=True + - apply minimal `GitTarget` (`spec.path`, not `baseFolder`) with encryption enabled by default + - set `spec.encryption.provider: sops` + - set `spec.encryption.generateWhenMissing: true` by default for quickstart-first UX + - wait for `GitTarget` Ready=True + - verify encryption secret is generated automatically when missing + - verify generated secret includes backup warning annotation + - apply minimal `WatchRule` + - wait for `WatchRule` Ready=True + - create ConfigMap + - verify corresponding YAML file exists in repo path + - verify commit count increased by at least one +3. Basic regression path: + - update ConfigMap and verify file/commit changed + - delete ConfigMap and verify file removed +4. Failure UX path: + - apply invalid git credentials scenario + - verify status condition reason/message is actionable +5. Safety messaging path: + - assert quickstart output and docs mention key backup is mandatory + - assert warning text explains consequence: encrypted files are unrecoverable without the private key + +## Detailed implementation plan + +## Phase 0: Docs and schema parity (fast, low risk) + +1. Update README quickstart example from `baseFolder` to `path`. +2. Fix `config/samples` references or add valid samples. +3. Update quickstart examples to enable encryption by default: + - include `spec.encryption.provider: sops` + - include `spec.encryption.generateWhenMissing: true` +4. Add explicit backup guidance in quickstart docs: + - backup generated `SOPS_AGE_KEY` immediately and securely + - remove backup-warning annotation only after verified backup +5. Add a docs-contract check job that validates quickstart snippets with server-side dry-run in Kind. + +Deliverables: + +- Updated `README.md`. +- Valid sample links or sample files. +- CI check for quickstart snippets. + +Exit criteria: + +- New user can copy quickstart YAML without field-name mismatch. +- Quickstart defaults to encrypted secret handling. +- Backup requirement is clearly visible in docs and quickstart guidance. + +## Phase 1: Refactor install smoke into quickstart smoke + +1. Replace `test/e2e/scripts/install-smoke.sh` with `install-smoke-quickstart.sh`. +2. Keep existing install verification logic intact. +3. Add a minimal quickstart scenario runner. +4. Reuse existing e2e templates where possible, but ensure quickstart parity. +5. Add deterministic assertions with explicit timeouts and retries. +6. Add assertion that encryption default is active in the quickstart path. +7. Add assertion that backup warning annotation appears on generated key secret. + +Deliverables: + +- New script and helper functions. +- Updated Make targets: + - `test-e2e-quickstart` + - `test-e2e-quickstart-helm` + - `test-e2e-quickstart-manifest` +- Backward-compatible alias targets for one release cycle (optional). + +Exit criteria: + +- Both matrix scenarios pass with install and first commit assertions. + +## Phase 2: CI migration and gating + +1. Rename workflow job from `e2e-install-smoke` to `e2e-install-quickstart`. +2. Keep matrix on `helm` and `manifest`. +3. Update downstream `needs` references in release jobs. +4. Publish artifacts/logs for fast triage: + - controller logs + - namespace events + - GitProvider/GitTarget/WatchRule YAML status dumps + +Deliverables: + +- Updated `.github/workflows/ci.yml`. +- Updated release job dependencies. + +Exit criteria: + +- Required PR gate covers install + first success path. + +## Phase 3: Stability hardening + +1. Run as required on PRs for 1-2 weeks. +2. Track flake rate and median duration. +3. Harden waits/assertions where failures are timing-related. +4. Keep full e2e unchanged until quickstart lane is stable. + +Exit criteria: + +- Flake rate below agreed threshold. +- No increase in false negatives. + +## Phase 4: External reality check (optional but recommended) + +1. Add nightly `quickstart-github` job against dedicated repo. +2. Use restricted bot credentials. +3. Create temporary branch per run and clean up. + +Exit criteria: + +- Periodic hosted-provider validation without blocking PRs. + +## CI/runtime impact estimate + +- Runtime increase versus current install smoke: moderate. +- Maintenance impact: low-to-moderate if scenario stays minimal. +- Confidence gain: high for first-time user success. + +## Risk register and mitigations + +1. Risk: Flakiness from async controller readiness. + Mitigation: explicit condition polling and bounded retries. +2. Risk: Drift between quickstart docs and test fixtures. + Mitigation: derive smoke fixtures from quickstart snippets/templates. +3. Risk: Increased CI time. + Mitigation: keep scenario minimal, avoid duplicate heavy validations. +4. Risk: Secret-handling issues in CI. + Mitigation: scope secrets per job and avoid exposing in logs. + +## Acceptance criteria + +1. `helm` quickstart smoke passes install + first commit + update + delete checks. +2. `manifest` quickstart smoke passes the same checks. +3. README quickstart YAML is API-valid against current CRDs. +4. Quickstart examples enable encryption by default. +5. Backup warning and key-backup requirement are explicitly documented. +6. Clear status messaging is asserted for at least one invalid-credentials path. +7. Release workflow dependencies point to new quickstart-smoke job name. + +## Suggested execution order + +1. Land docs/schema parity fixes (Phase 0). +2. Implement script/Make target refactor (Phase 1). +3. Update CI job and release dependencies (Phase 2). +4. Observe stability window and harden (Phase 3). +5. Add optional nightly hosted-provider check (Phase 4). + +## Summary + +Replacing install-only smoke with `install-smoke-quickstart` is the right direction. It preserves the strong install signal already in CI and adds the missing first-time-user success proof: a clean install that produces a real first commit with minimal configuration. diff --git a/docs/design/gittarget-state-machine-bootstrap-assessment.md b/docs/design/gittarget-state-machine-bootstrap-assessment.md new file mode 100644 index 00000000..0b038abc --- /dev/null +++ b/docs/design/gittarget-state-machine-bootstrap-assessment.md @@ -0,0 +1,441 @@ +# GitTarget Startup State Machine and Status Contract + +## Purpose + +Define a concrete implementation plan for `GitTarget` startup lifecycle and status design, integrating: + +- explicit startup gates (validation, encryption, bootstrap, initial snapshot, stream-live), +- Kubernetes condition best practices, +- current `gitops-reverser` CRD/controller patterns. + +This document supersedes the earlier bootstrap-only assessment and aligns with `docs/design/status-design-git-target.md`. + +## Current Repo Landscape (What Exists Today) + +## Existing CRD status patterns + +- `GitTarget.status` currently has: + - `conditions []metav1.Condition` + - `lastCommit string` + - `lastPushTime *metav1.Time` +- `GitProvider`, `WatchRule`, `ClusterWatchRule` status currently use condition-driven readiness and do not expose `phase`. +- Across controllers, the dominant pattern is a single `Ready` condition with reason/message updates. + +## Existing startup behavior + +Current `GitTarget` reconcile path (`internal/controller/gittarget_controller.go`) effectively does: + +1. Validate provider/branch/conflict. +2. Ensure encryption secret (optional auto-create). +3. Register worker and event stream. +4. Signal stream transition via `OnReconciliationComplete()`. + +Bootstrap and startup synchronization exist, but they are implicit side effects across controller, worker manager, branch worker, and event stream. + +## Key Gaps + +1. Startup gates are not represented as first-class conditions. +2. Initial snapshot sync (including stale-file deletion) is not an explicit lifecycle gate in status. +3. Worker manager lock scope includes slow bootstrap work. +4. Status today is not explicit enough for operators to answer "what gate is blocking readiness". + +## Naming Alignment With Current CRDs + +Use current schema names from `api/v1alpha1`: + +- `spec.providerRef` (not `destinationRef`, not `gitRepoConfigRef`) +- `spec.encryption.provider` +- `spec.encryption.secretRef.name` +- `spec.encryption.generateWhenMissing` + +Status additions should extend `GitTargetStatus` while preserving existing fields (`lastCommit`, `lastPushTime`) for compatibility. + +## Proposed GitTarget Status Contract + +## Required top-level fields + +- `observedGeneration int64` +- `conditions []metav1.Condition` +- `lastReconcileTime metav1.Time` + +Keep existing: + +- `lastCommit string` +- `lastPushTime *metav1.Time` + +## Optional detail blocks (recommended) + +- `workerRef {name, uid}` +- `encryption {provider, secretRefName, recipientsHash, lastValidatedTime}` +- `bootstrap {targetPath, commit, lastAppliedTime}` +- `snapshot {lastCompletedTime, stats{created,updated,deleted}}` +- `stream {registered bool, registeredTime, lastEventTime}` + +These are observational only; they must never become desired-state inputs. + +## Condition Types and Ready Semantics + +## Condition types + +Required gate conditions: + +- `Ready` (summary) +- `Validated` +- `EncryptionConfigured` +- `Bootstrapped` +- `SnapshotSynced` +- `EventStreamLive` + +Optional UX condition: + +- `Reconciling` + +## Ready semantics + +`Ready=True` iff all required gates are true: + +- `Validated=True` +- `EncryptionConfigured=True` (or `True` with `NotRequired`) +- `Bootstrapped=True` +- `SnapshotSynced=True` +- `EventStreamLive=True` + +## Canonical reason set + +- `Validated`: `OK`, `ProviderNotFound`, `BranchNotAllowed`, `TargetConflict` +- `EncryptionConfigured`: `OK`, `NotRequired`, `MissingSecret`, `InvalidConfig`, `SecretCreateDisabled` +- `Bootstrapped`: `BootstrapApplied`, `BootstrapNotNeeded`, `WorkerNotFound`, `BootstrapFailed` +- `SnapshotSynced`: `Running`, `Completed`, `SnapshotFailed` +- `EventStreamLive`: `Registered`, `RegistrationFailed`, `Disconnected` +- `Ready`: `OK`, `ValidationFailed`, `EncryptionNotConfigured`, `BootstrapNotComplete`, `InitialSyncInProgress`, `StreamNotLive` + +## Deterministic condition mechanics + +Implement one shared upsert helper (recommended: shared utility reusable by all controllers): + +- condition set behaves as map keyed by `Type`, +- no duplicate types, +- `LastTransitionTime` changes only when `Status` changes, +- `Reason` and `Message` always refreshed, +- condition-level `ObservedGeneration` set on update, +- unevaluated gates start as `Unknown` with explicit reason (`NotChecked`/`NotStarted`/`Blocked`). + +## Proposed Startup Lifecycle (Gate Pipeline) + +Each reconcile evaluates gates in order and writes status before returning on block: + +1. `Validated` +- Check provider exists, branch allowed, no target conflict. +- If false: `Ready=False`, stop. + +2. `EncryptionConfigured` +- If encryption disabled: set `True/NotRequired`. +- If enabled: resolve config, ensure secret, validate recipient derivation input. +- If false: `Ready=False`, stop. + +3. `Bootstrapped` +- Ensure worker exists and target path bootstrap is applied/idempotent. +- Capture bootstrap commit when created. +- If false: `Ready=False`, stop. + +4. `SnapshotSynced` +- Trigger initial full snapshot reconciliation. +- Must include create/update/delete reconciliation for target path. +- Keep `SnapshotSynced=False, reason=Running` until complete. +- If failed: `SnapshotSynced=False, reason=SnapshotFailed`, stop. + +5. `EventStreamLive` +- Register/verify stream only after initial snapshot success. +- Call `OnReconciliationComplete()` only at this point. +- If false: `Ready=False`, stop. + +6. `Ready` +- Compute from gate conditions only. + +## Status YAML Examples + +These are illustrative snapshots of `GitTarget.status` for key situations. + +## 1. Validation blocked (missing provider) + +```yaml +status: + observedGeneration: 7 + lastReconcileTime: "2026-02-19T10:10:11Z" + conditions: + - type: Validated + status: "False" + reason: ProviderNotFound + message: "Referenced GitProvider 'sut/main-provider' not found" + observedGeneration: 7 + lastTransitionTime: "2026-02-19T10:10:11Z" + - type: EncryptionConfigured + status: Unknown + reason: Blocked + message: "Blocked by Validated=False" + observedGeneration: 7 + lastTransitionTime: "2026-02-19T10:10:11Z" + - type: Bootstrapped + status: Unknown + reason: Blocked + message: "Blocked by Validated=False" + observedGeneration: 7 + lastTransitionTime: "2026-02-19T10:10:11Z" + - type: SnapshotSynced + status: Unknown + reason: NotStarted + message: "Initial snapshot sync has not started" + observedGeneration: 7 + lastTransitionTime: "2026-02-19T10:10:11Z" + - type: EventStreamLive + status: Unknown + reason: NotStarted + message: "Event stream activation has not started" + observedGeneration: 7 + lastTransitionTime: "2026-02-19T10:10:11Z" + - type: Ready + status: "False" + reason: ValidationFailed + message: "Validated gate failed: ProviderNotFound" + observedGeneration: 7 + lastTransitionTime: "2026-02-19T10:10:11Z" +``` + +## 2. Encryption blocked (secret missing + autocreate disabled) + +```yaml +status: + observedGeneration: 8 + lastReconcileTime: "2026-02-19T10:13:45Z" + conditions: + - type: Validated + status: "True" + reason: OK + message: "Provider and branch validation passed" + observedGeneration: 8 + lastTransitionTime: "2026-02-19T10:13:40Z" + - type: EncryptionConfigured + status: "False" + reason: SecretCreateDisabled + message: "Encryption secret sut/sops-age-key is missing and generateWhenMissing is false" + observedGeneration: 8 + lastTransitionTime: "2026-02-19T10:13:45Z" + - type: Bootstrapped + status: Unknown + reason: Blocked + message: "Blocked by EncryptionConfigured=False" + observedGeneration: 8 + lastTransitionTime: "2026-02-19T10:13:45Z" + - type: Ready + status: "False" + reason: EncryptionNotConfigured + message: "EncryptionConfigured gate failed: SecretCreateDisabled" + observedGeneration: 8 + lastTransitionTime: "2026-02-19T10:13:45Z" +``` + +## 3. Snapshot running (initial create/delete reconciliation in progress) + +```yaml +status: + observedGeneration: 9 + lastReconcileTime: "2026-02-19T10:18:02Z" + lastCommit: "3f95c85f53e7862d9d124d67f22e8de2919759b8" + conditions: + - type: Validated + status: "True" + reason: OK + message: "Provider and branch validation passed" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:17:54Z" + - type: EncryptionConfigured + status: "True" + reason: OK + message: "Encryption configuration is valid" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:17:55Z" + - type: Bootstrapped + status: "True" + reason: BootstrapApplied + message: "Bootstrap commit applied for path clusters/prod" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:17:57Z" + - type: SnapshotSynced + status: "False" + reason: Running + message: "Initial snapshot reconciliation in progress" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:18:02Z" + - type: EventStreamLive + status: Unknown + reason: Blocked + message: "Blocked until SnapshotSynced=True" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:18:02Z" + - type: Ready + status: "False" + reason: InitialSyncInProgress + message: "SnapshotSynced gate is still running" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:18:02Z" + snapshot: + stats: + created: 12 + updated: 3 + deleted: 2 +``` + +## 4. Stream registration failed after snapshot success + +```yaml +status: + observedGeneration: 9 + lastReconcileTime: "2026-02-19T10:18:17Z" + conditions: + - type: Validated + status: "True" + reason: OK + message: "Provider and branch validation passed" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:17:54Z" + - type: EncryptionConfigured + status: "True" + reason: OK + message: "Encryption configuration is valid" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:17:55Z" + - type: Bootstrapped + status: "True" + reason: BootstrapNotNeeded + message: "Bootstrap files already present" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:17:57Z" + - type: SnapshotSynced + status: "True" + reason: Completed + message: "Initial snapshot reconciliation completed" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:18:14Z" + - type: EventStreamLive + status: "False" + reason: RegistrationFailed + message: "Failed to register GitTargetEventStream for sut/prod-target" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:18:17Z" + - type: Ready + status: "False" + reason: StreamNotLive + message: "EventStreamLive gate failed: RegistrationFailed" + observedGeneration: 9 + lastTransitionTime: "2026-02-19T10:18:17Z" +``` + +## 5. Fully ready + +```yaml +status: + observedGeneration: 10 + lastReconcileTime: "2026-02-19T10:22:01Z" + lastCommit: "a12f67b7c744eeb5ee0a6d7b0d5ea32b5159cf3f" + lastPushTime: "2026-02-19T10:21:58Z" + conditions: + - type: Validated + status: "True" + reason: OK + message: "Provider and branch validation passed" + observedGeneration: 10 + lastTransitionTime: "2026-02-19T10:21:40Z" + - type: EncryptionConfigured + status: "True" + reason: NotRequired + message: "Encryption is not configured for this GitTarget" + observedGeneration: 10 + lastTransitionTime: "2026-02-19T10:21:40Z" + - type: Bootstrapped + status: "True" + reason: BootstrapNotNeeded + message: "Bootstrap files already present" + observedGeneration: 10 + lastTransitionTime: "2026-02-19T10:21:41Z" + - type: SnapshotSynced + status: "True" + reason: Completed + message: "Initial snapshot reconciliation completed" + observedGeneration: 10 + lastTransitionTime: "2026-02-19T10:21:47Z" + - type: EventStreamLive + status: "True" + reason: Registered + message: "GitTarget event stream is live" + observedGeneration: 10 + lastTransitionTime: "2026-02-19T10:21:48Z" + - type: Ready + status: "True" + reason: OK + message: "All lifecycle gates satisfied" + observedGeneration: 10 + lastTransitionTime: "2026-02-19T10:21:48Z" +``` + +## Phase Field Analysis (Repo-Wide) + +## What the repo currently does + +- No current CRD status type includes `phase`. +- Existing resources (`GitProvider`, `WatchRule`, `ClusterWatchRule`, `GitTarget`) expose readiness through conditions. +- Automation expectations are already condition-centric (`Ready`). + +## Recommendation + +Do not add `status.phase` in the first implementation step. + +Rationale: + +- It is not used elsewhere in the project today. +- It adds one more state surface to maintain and test. +- Conditions already provide machine-readable and human-readable gate state. + +If added later for UX, keep it strictly derived from conditions and informational only. Suggested derived values: + +- `PENDING`, `BOOTSTRAPPING`, `SYNCING`, `RUNNING` + +But phase must never be used as an automation contract. + +## Refactor Strategy + +1. Split worker registration APIs: +- `EnsureWorker(...)` under short lock. +- `EnsureTargetBootstrapped(...)` outside manager global lock. + +2. Introduce GitTarget gate-condition pipeline in reconciler. + +3. Add initial snapshot completion barrier before stream live transition. + +4. Add shared condition helper and migrate GitTarget to it first. + +5. Optionally follow-up by migrating `GitProvider`/`WatchRule`/`ClusterWatchRule` to same helper for consistency. + +## Testing Plan + +## Unit + +- Condition helper upsert behavior (no duplicates, transition-time semantics). +- Gate evaluation table tests for each block point. +- Ready aggregation tests. + +## Integration + +- Startup progression assertions on conditions: + - validate -> encryption -> bootstrap -> snapshot -> stream -> ready. +- Snapshot gate explicitly verifies stale-file deletion path. +- `kubectl wait --for=condition=Ready=true gittarget/` compatibility. +- Optional: `kubectl wait --for=condition=SnapshotSynced=true` if exposed. + +## Minimal First Delivery + +1. Add gate conditions on `GitTarget` (without adding `phase`). +2. Move bootstrap out of `WorkerManager` global lock scope. +3. Add initial snapshot sync gate and block stream-live transition until complete. +4. Keep `Ready` as summary contract. + +This delivers the main operational clarity and race/ordering improvements with minimal API churn. diff --git a/docs/design/status-design-git-target.md b/docs/design/status-design-git-target.md new file mode 100644 index 00000000..59c5792d --- /dev/null +++ b/docs/design/status-design-git-target.md @@ -0,0 +1,333 @@ +# Plan: GitTarget Status, Conditions, and Lifecycle Implementation + +## Outcomes + +- `GitTarget.status` is a reliable, observation-only view of reality. +- A small `status.phase` provides quick human scanning. +- `status.conditions` is the automation contract (`kubectl wait --for=condition=Ready=true`). +- Every gate in startup lifecycle is represented as a stateful condition. +- Condition updates are deterministic (no duplicates, no flapping). + +## 1. Define the public API contract + +### 1.1 Status fields (what you expose) + +Define these in CRD Go types (or equivalent). + +Top-level: + +- `observedGeneration: int64` +- `phase: string` (small stable enum; informational) +- `conditions: []metav1.Condition` (or compatible schema) +- `lastReconcileTime: metav1.Time` (optional but helpful) + +Pointers / detail sub-objects (optional but recommended): + +- `workerRef: {name, uid}` +- `encryption: {mode, secretRef, recipientsHash, lastValidatedTime}` +- `bootstrap: {targetPath, commit, lastAppliedTime}` +- `snapshot: {lastCompletedTime, lastCommit, stats{created,updated,deleted}}` +- `stream: {status, registeredTime, lastEventTime}` + +### 1.2 Phase values (small and stable) + +Keep phase values minimal and derived: + +- `PENDING` +- `BOOTSTRAPPING` +- `SYNCING` +- `RUNNING` +- Optional: `DEGRADED`, `ERROR` if explicit "can’t proceed" vs "limping" is needed. + +Rule: automation should never depend on phase; only on conditions. + +### 1.3 Condition types (real gates) + +Define these condition types (string constants, CamelCase): + +- `Ready` (summary condition) +- `Validated` +- `EncryptionConfigured` +- `Bootstrapped` +- `SnapshotSynced` +- `EventStreamLive` + +Optional (UX only, not a gate): + +- `Reconciling` (`True` while reconciliation is actively running) + +### 1.4 Ready semantics (document it) + +Define `Ready` in docs and code as: + +`Ready=True` iff: + +- `Validated=True` +- `Bootstrapped=True` +- `SnapshotSynced=True` +- `EventStreamLive=True` +- `EncryptionConfigured=True` (or `True` with reason `NotRequired` when encryption is disabled) + +## 2. CRD and RBAC wiring + +### 2.1 CRD subresource + +Enable `/status` subresource: + +```yaml +spec: + versions: + - subresources: + status: {} +``` + +### 2.2 RBAC for controller + +Grant controller: + +- `get/list/watch` on `gittargets` +- `update/patch` on `gittargets/status` +- Plus whatever it needs to manage worker objects, secrets, etc. + +## 3. Implement condition mechanics once (shared utilities) + +### 3.1 Condition upsert helper + +Implement a single `SetCondition(status *GitTargetStatus, cond metav1.Condition)` that: + +- treats conditions as a map keyed by type, +- updates existing entry if present, otherwise inserts, +- updates `LastTransitionTime` only if `Status` changed, +- always sets/updates `Reason` and `Message`, +- sets `ObservedGeneration` inside the condition (recommended). + +### 3.2 Standardize reason enums + +Create reason constants per condition. Examples: + +- `Validated`: `OK`, `ProviderNotFound`, `BranchNotAllowed`, `TargetConflict` +- `EncryptionConfigured`: `OK`, `NotRequired`, `MissingSecret`, `InvalidRecipients`, `SecretCreateDisabled` +- `Bootstrapped`: `BootstrapApplied`, `BootstrapNotNeeded`, `WorkerNotFound`, `BootstrapFailed` +- `SnapshotSynced`: `Completed`, `Running`, `SnapshotFailed` +- `EventStreamLive`: `Registered`, `RegistrationFailed`, `Disconnected` +- `Ready`: `OK`, `ValidationFailed`, `EncryptionNotConfigured`, `BootstrapNotComplete`, `InitialSyncInProgress`, `StreamNotLive` + +### 3.3 Avoid flapping rules + +Enforce in helper layer: + +- no duplicate types, +- no `LastTransitionTime` churn unless status changes, +- if a gate hasn’t been evaluated yet, set to `Unknown` with clear reason (`NotChecked`, `NotStarted`, `Blocked`). + +## 4. Reconcile algorithm: gate-by-gate lifecycle + +Implement reconciliation as a deterministic pipeline. Each step: + +- computes gate result from observed state, +- writes its condition, +- if blocking, stops further progress (but still updates `Ready` and `Phase`). + +### 4.1 Step 0: start-of-reconcile bookkeeping + +At beginning of each reconcile: + +- set `status.lastReconcileTime = now` +- set `status.observedGeneration` only when reconciling current spec successfully (or after all gates are evaluated; pick one strategy and keep it consistent) + +Optionally: + +- set `Reconciling=True` at start and `False` at end + +### 4.2 Step 1: `Validated` + +Check: + +- Provider exists +- Branch allowed +- No target conflict (uniqueness constraints) + +Set: + +- `Validated=True/False` + +If false: + +- set `Ready=False` reason `ValidationFailed` (or more specific reason), +- set phase accordingly, +- return. + +### 4.3 Step 2: `EncryptionConfigured` + +Resolve encryption mode: + +- If encryption disabled: set `EncryptionConfigured=True` reason `NotRequired` +- If enabled: + - resolve encryption config, + - ensure secret exists (or create if allowed), + - validate recipients/key derivation inputs, + - store `recipientsHash` and `secretRef` in status. + +Set: + +- `EncryptionConfigured=True/False` + +If false: block and return. + +### 4.4 Step 3: `Bootstrapped` + +Ensure: + +- worker exists/bound for `(provider, branch)` and store `workerRef`, +- target path bootstrap applied/idempotently verified, +- if bootstrap commit is needed, ensure it happened and store `bootstrap.commit`. + +Set: + +- `Bootstrapped=True/False` + +If false: block and return. + +### 4.5 Step 4: `SnapshotSynced` (initial snapshot) + +Critical “clean slate” step: + +- trigger snapshot reconciliation for whole folder (creates/updates/deletes), +- wait for completion indication (e.g. reconciliation ID or observed commit), +- record results in `status.snapshot.*` including stats and timestamps. + +Set: + +- while running: `SnapshotSynced=False` reason `Running` +- on success: `SnapshotSynced=True` reason `Completed` +- on failure: `SnapshotSynced=False` reason `SnapshotFailed` + +Block progression until `SnapshotSynced=True`. + +### 4.6 Step 5: `EventStreamLive` + +Once initial snapshot sync is done: + +- register event stream, +- confirm it is live (at least “registered”; optionally track last heartbeat/event timestamp). + +Set: + +- `EventStreamLive=True/False` + +If false: keep `Ready=False` until live. + +### 4.7 Step 6: `Ready` (summary) + +Compute `Ready` from gate conditions: + +- if all required gates are true: `Ready=True` reason `OK` +- else: `Ready=False` with reason/message pointing to first failing gate + +Finally set: + +- `status.phase` (derived; see next section) + +## 5. Phase derivation rules + +Make phase a pure function of conditions (not an independent state machine). + +Example derivation: + +- If `Ready=True` -> `RUNNING` +- Else if `SnapshotSynced` is `False/Unknown` and `Bootstrapped=True` -> `SYNCING` +- Else if any of `Validated`, `EncryptionConfigured`, `Bootstrapped` is `False/Unknown` -> `BOOTSTRAPPING` +- Else -> `PENDING` + +Optional: + +- If any gate is false with non-retryable reason (e.g. policy denied) -> `ERROR` +- If `Ready=False` but gates mostly true and stream intermittently down -> `DEGRADED` + +Keep it minimal unless extra phases are truly needed. + +## 6. Example status progression (golden path) + +New `GitTarget`: + +- `phase=PENDING` +- most gates `Unknown`, `Ready=False` + +After validation: + +- `Validated=True` +- `phase=BOOTSTRAPPING` + +After encryption: + +- `EncryptionConfigured=True` (or `True/NotRequired`) + +After bootstrap: + +- `Bootstrapped=True` + +During initial snapshot: + +- `phase=SYNCING` +- `SnapshotSynced=False` reason `Running` + +After snapshot complete: + +- `SnapshotSynced=True` + +Stream registered: + +- `EventStreamLive=True` + +Fully ready: + +- `Ready=True` +- `phase=RUNNING` + +## 7. Testing plan + +### 7.1 Unit tests for condition helper + +- upsert inserts new types +- upsert updates existing type without duplication +- `LastTransitionTime` changes only when `Status` changes +- ordering does not matter + +### 7.2 Reconcile gate tests (table-driven) + +Write scenarios and assert conditions + phase: + +- provider missing -> `Validated=False`, `Ready=False`, `phase=BOOTSTRAPPING` +- encryption secret missing and `autocreate=false` -> `EncryptionConfigured=False` +- worker missing -> `Bootstrapped=False` +- snapshot running -> `SnapshotSynced=False` reason `Running`, `phase=SYNCING` +- stream registration fails -> `EventStreamLive=False`, `Ready=False` +- happy path -> all true, `Ready=True`, `phase=RUNNING` + +### 7.3 Integration tests for `kubectl wait` compatibility + +- `kubectl wait --for=condition=Ready=true gittarget/` +- `kubectl wait --for=condition=SnapshotSynced=true gittarget/` (if automation should wait for initial sync separately) + +## 8. Operational conventions (docs to ship) + +Document: + +- condition types and what `True/False/Unknown` means for each, +- reason enums emitted (for alerting), +- ready semantics as summary gate, +- phase as informational only, +- “conditions are a map keyed by type” behavior. + +## 9. Implementation checklist + +- [ ] Add status subresource to CRD +- [ ] Add controller RBAC for `/status` +- [ ] Implement `SetCondition` helper and reason constants +- [ ] Add gate constants/types +- [ ] Implement reconcile pipeline steps 1–6 +- [ ] Implement phase derivation function +- [ ] Add unit tests for condition helper +- [ ] Add reconcile tests (blocking and happy path) +- [ ] Add integration tests for `kubectl wait` +- [ ] Document status/conditions contract diff --git a/internal/controller/condition_helper.go b/internal/controller/condition_helper.go new file mode 100644 index 00000000..1e98be4d --- /dev/null +++ b/internal/controller/condition_helper.go @@ -0,0 +1,61 @@ +/* +SPDX-License-Identifier: Apache-2.0 + +Copyright 2025 ConfigButler + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func upsertCondition( + conditions []metav1.Condition, + conditionType string, + status metav1.ConditionStatus, + reason, message string, + observedGeneration int64, +) []metav1.Condition { + now := metav1.Now() + next := metav1.Condition{ + Type: conditionType, + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: observedGeneration, + LastTransitionTime: now, + } + + var existing *metav1.Condition + result := make([]metav1.Condition, 0, len(conditions)) + for i := range conditions { + cond := conditions[i] + if cond.Type == conditionType { + if existing == nil { + existing = &cond + } + continue + } + result = append(result, cond) + } + + if existing != nil && existing.Status == next.Status && !existing.LastTransitionTime.IsZero() { + next.LastTransitionTime = existing.LastTransitionTime + } + + result = append(result, next) + return result +} diff --git a/internal/controller/condition_helper_test.go b/internal/controller/condition_helper_test.go new file mode 100644 index 00000000..fc46080c --- /dev/null +++ b/internal/controller/condition_helper_test.go @@ -0,0 +1,94 @@ +/* +SPDX-License-Identifier: Apache-2.0 + +Copyright 2025 ConfigButler + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestUpsertCondition_DeduplicatesAndUpdatesFields(t *testing.T) { + oldTransition := metav1.NewTime(time.Now().Add(-5 * time.Minute)) + conditions := []metav1.Condition{ + { + Type: GitTargetConditionReady, + Status: metav1.ConditionTrue, + Reason: "OldReason", + Message: "OldMessage", + ObservedGeneration: 1, + LastTransitionTime: oldTransition, + }, + { + Type: GitTargetConditionReady, + Status: metav1.ConditionTrue, + Reason: "Duplicate", + Message: "Duplicate", + ObservedGeneration: 1, + LastTransitionTime: oldTransition, + }, + } + + conditions = upsertCondition( + conditions, + GitTargetConditionReady, + metav1.ConditionTrue, + GitTargetReasonOK, + "Updated message", + 9, + ) + + require.Len(t, conditions, 1) + require.Equal(t, GitTargetConditionReady, conditions[0].Type) + require.Equal(t, metav1.ConditionTrue, conditions[0].Status) + require.Equal(t, GitTargetReasonOK, conditions[0].Reason) + require.Equal(t, "Updated message", conditions[0].Message) + require.Equal(t, int64(9), conditions[0].ObservedGeneration) + require.Equal(t, oldTransition, conditions[0].LastTransitionTime) +} + +func TestUpsertCondition_ChangesTransitionTimeWhenStatusChanges(t *testing.T) { + oldTransition := metav1.NewTime(time.Now().Add(-10 * time.Minute)) + conditions := []metav1.Condition{ + { + Type: GitTargetConditionValidated, + Status: metav1.ConditionFalse, + Reason: GitTargetReasonProviderNotFound, + Message: "missing provider", + ObservedGeneration: 2, + LastTransitionTime: oldTransition, + }, + } + + conditions = upsertCondition( + conditions, + GitTargetConditionValidated, + metav1.ConditionTrue, + GitTargetReasonOK, + "Provider and branch validation passed", + 3, + ) + + require.Len(t, conditions, 1) + require.Equal(t, metav1.ConditionTrue, conditions[0].Status) + require.NotEqual(t, oldTransition, conditions[0].LastTransitionTime) + require.Equal(t, int64(3), conditions[0].ObservedGeneration) +} diff --git a/internal/controller/gittarget_controller.go b/internal/controller/gittarget_controller.go index 4843787f..766eb6df 100644 --- a/internal/controller/gittarget_controller.go +++ b/internal/controller/gittarget_controller.go @@ -20,11 +20,15 @@ package controller import ( "context" + "errors" "fmt" "path/filepath" + "strings" "time" + "filippo.io/age" "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -41,14 +45,53 @@ import ( "github.com/ConfigButler/gitops-reverser/internal/watch" ) -// GitTarget status condition reasons. const ( - GitTargetReasonValidating = "Validating" - GitTargetReasonGitProviderNotFound = "GitProviderNotFound" - GitTargetReasonBranchNotAllowed = "BranchNotAllowed" - GitTargetReasonRepositoryUnavailable = "RepositoryUnavailable" - GitTargetReasonConflict = "Conflict" - GitTargetReasonReady = "Ready" + GitTargetConditionReady = ConditionTypeReady + GitTargetConditionValidated = "Validated" + GitTargetConditionEncryptionConfigured = "EncryptionConfigured" + GitTargetConditionBootstrapped = "Bootstrapped" + GitTargetConditionSnapshotSynced = "SnapshotSynced" + GitTargetConditionEventStreamLive = "EventStreamLive" +) + +// GitTargetReasonReady is a backward-compatible alias used by existing tests. +const GitTargetReasonReady = GitTargetConditionReady +const GitTargetReasonConflict = GitTargetReasonTargetConflict + +const ( + GitTargetReasonOK = "OK" + GitTargetReasonProviderNotFound = "ProviderNotFound" + GitTargetReasonBranchNotAllowed = "BranchNotAllowed" + GitTargetReasonTargetConflict = "TargetConflict" + GitTargetReasonNotChecked = "NotChecked" + GitTargetReasonBlocked = "Blocked" + GitTargetReasonNotStarted = "NotStarted" + GitTargetReasonNotRequired = "NotRequired" + GitTargetReasonMissingSecret = "MissingSecret" + GitTargetReasonInvalidConfig = "InvalidConfig" + GitTargetReasonSecretCreateDisabled = "SecretCreateDisabled" + GitTargetReasonBootstrapApplied = "BootstrapApplied" + GitTargetReasonWorkerNotFound = "WorkerNotFound" + GitTargetReasonBootstrapFailed = "BootstrapFailed" + GitTargetReasonRunning = "Running" + GitTargetReasonCompleted = "Completed" + GitTargetReasonSnapshotFailed = "SnapshotFailed" + GitTargetReasonRegistered = "Registered" + GitTargetReasonRegistrationFailed = "RegistrationFailed" + GitTargetReasonDisconnected = "Disconnected" + + GitTargetReadyReasonValidationFailed = "ValidationFailed" + GitTargetReadyReasonEncryptionNotConfigured = "EncryptionNotConfigured" + GitTargetReadyReasonBootstrapNotComplete = "BootstrapNotComplete" + GitTargetReadyReasonInitialSyncInProgress = "InitialSyncInProgress" + GitTargetReadyReasonStreamNotLive = "StreamNotLive" +) + +const ( + sopsAgeKeySecretKey = "SOPS" + "_AGE_KEY" + encryptionSecretRecipientAnnoKey = "configbutler.ai/age-recipient" + encryptionSecretBackupWarningAnno = "configbutler.ai/backup-warning" + encryptionSecretBackupWarningValue = "REMOVE_AFTER_BACKUP" ) // GitTargetReconciler reconciles a GitTarget object. @@ -63,455 +106,753 @@ type GitTargetReconciler struct { // +kubebuilder:rbac:groups=configbutler.ai,resources=gittargets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=configbutler.ai,resources=gittargets/status,verbs=get;update;patch // +kubebuilder:rbac:groups=configbutler.ai,resources=gitproviders,verbs=get;list;watch +// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create -// Reconcile validates GitTarget references and updates status conditions. -// Cleanup of workers and event streams is handled by ReconcileWorkers, not finalizers. +// Reconcile validates GitTarget references and drives startup lifecycle gates. +// +//nolint:gocognit,cyclop,funlen // Gate pipeline is intentionally explicit to keep status transitions obvious. func (r *GitTargetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := logf.FromContext(ctx).WithName("GitTargetReconciler") - log.Info("Starting reconciliation", "namespacedName", req.NamespacedName) - // Fetch the GitTarget instance var target configbutleraiv1alpha1.GitTarget if err := r.Get(ctx, req.NamespacedName, &target); err != nil { return r.handleFetchError(err, log, req.NamespacedName) } - log.Info("Validating GitTarget", - "name", target.Name, - "namespace", target.Namespace, - "provider", target.Spec.ProviderRef, - "branch", target.Spec.Branch, - "path", target.Spec.Path, - "generation", target.Generation, - "resourceVersion", target.ResourceVersion) + target.Status.ObservedGeneration = target.Generation + target.Status.LastReconcileTime = metav1.Now() - // Set initial validating status - r.setCondition(&target, metav1.ConditionUnknown, - GitTargetReasonValidating, "Validating GitTarget configuration...") - - // Validate GitProvider and branch - // GitProvider must be in the same namespace as GitTarget (enforced by API structure lacking namespace field) providerNS := target.Namespace - - validationResult, validationErr := r.validateGitProvider(ctx, &target, providerNS, log) + validated, validationMsg, validationResult, validationErr := r.evaluateValidatedGate(ctx, &target, providerNS) if validationErr != nil { return ctrl.Result{}, validationErr } - if validationResult != nil { - return *validationResult, nil - } - - // Get GitProvider for conflict checking and repository status - var gp configbutleraiv1alpha1.GitProvider - gpKey := k8stypes.NamespacedName{Name: target.Spec.ProviderRef.Name, Namespace: providerNS} - if err := r.Get(ctx, gpKey, &gp); err != nil { - log.Error(err, "Failed to get GitProvider for status checking") + if !validated { + r.setCondition( + &target, + GitTargetConditionEncryptionConfigured, + metav1.ConditionUnknown, + GitTargetReasonBlocked, + "Blocked by Validated=False", + ) + r.setCondition( + &target, + GitTargetConditionBootstrapped, + metav1.ConditionUnknown, + GitTargetReasonBlocked, + "Blocked by Validated=False", + ) + r.setCondition( + &target, + GitTargetConditionSnapshotSynced, + metav1.ConditionUnknown, + GitTargetReasonNotStarted, + "Initial snapshot sync has not started", + ) + r.setCondition( + &target, + GitTargetConditionEventStreamLive, + metav1.ConditionUnknown, + GitTargetReasonNotStarted, + "Event stream activation has not started", + ) + r.setReadyCondition( + &target, + metav1.ConditionFalse, + GitTargetReadyReasonValidationFailed, + validationMsg, + ) + if validationResult != nil { + if err := r.updateStatusWithRetry(ctx, &target); err != nil { + return ctrl.Result{}, err + } + return *validationResult, nil + } + if err := r.updateStatusWithRetry(ctx, &target); err != nil { + return ctrl.Result{}, err + } return ctrl.Result{RequeueAfter: RequeueShortInterval}, nil } - // Check for conflicts with other GitTargets (defense-in-depth) - if conflictResult := r.checkForConflicts(ctx, &target, providerNS, log); conflictResult != nil { - return *conflictResult, nil + encryptionReady, encryptionMessage, encryptionRequeueAfter := r.evaluateEncryptionGate(ctx, &target, log) + if !encryptionReady { + r.setCondition( + &target, + GitTargetConditionBootstrapped, + metav1.ConditionUnknown, + GitTargetReasonBlocked, + "Blocked by EncryptionConfigured=False", + ) + r.setCondition( + &target, + GitTargetConditionSnapshotSynced, + metav1.ConditionUnknown, + GitTargetReasonNotStarted, + "Initial snapshot sync has not started", + ) + r.setCondition( + &target, + GitTargetConditionEventStreamLive, + metav1.ConditionUnknown, + GitTargetReasonNotStarted, + "Event stream activation has not started", + ) + r.setReadyCondition( + &target, + metav1.ConditionFalse, + GitTargetReadyReasonEncryptionNotConfigured, + encryptionMessage, + ) + if err := r.updateStatusWithRetry(ctx, &target); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: encryptionRequeueAfter}, nil } - // Update repository status (branch existence, SHA tracking) - r.updateRepositoryStatus(ctx, &target, &gp, log) + bootstrapReady, bootstrapMsg, bootstrapRequeueAfter := r.evaluateBootstrapGate(ctx, &target, providerNS, log) + if !bootstrapReady { + r.setCondition( + &target, + GitTargetConditionSnapshotSynced, + metav1.ConditionUnknown, + GitTargetReasonNotStarted, + "Initial snapshot sync has not started", + ) + r.setCondition( + &target, + GitTargetConditionEventStreamLive, + metav1.ConditionUnknown, + GitTargetReasonNotStarted, + "Event stream activation has not started", + ) + r.setReadyCondition( + &target, + metav1.ConditionFalse, + GitTargetReadyReasonBootstrapNotComplete, + bootstrapMsg, + ) + if err := r.updateStatusWithRetry(ctx, &target); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: bootstrapRequeueAfter}, nil + } - // Register with worker and event stream - r.registerWithWorkerAndEventStream(ctx, &target, providerNS, log) + stream, snapshotState, snapshotMessage, snapshotRequeueAfter, snapshotErr := r.evaluateSnapshotGate( + ctx, + &target, + providerNS, + log, + ) + if snapshotErr != nil { + r.setCondition( + &target, + GitTargetConditionSnapshotSynced, + metav1.ConditionFalse, + GitTargetReasonSnapshotFailed, + snapshotErr.Error(), + ) + r.setCondition( + &target, + GitTargetConditionEventStreamLive, + metav1.ConditionUnknown, + GitTargetReasonBlocked, + "Blocked until SnapshotSynced=True", + ) + r.setReadyCondition( + &target, + metav1.ConditionFalse, + GitTargetReadyReasonInitialSyncInProgress, + "SnapshotSynced gate failed: SnapshotFailed", + ) + if err := r.updateStatusWithRetry(ctx, &target); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: RequeueShortInterval}, nil + } + if snapshotState == metav1.ConditionFalse { + r.setCondition( + &target, + GitTargetConditionSnapshotSynced, + metav1.ConditionFalse, + GitTargetReasonRunning, + snapshotMessage, + ) + r.setCondition( + &target, + GitTargetConditionEventStreamLive, + metav1.ConditionUnknown, + GitTargetReasonBlocked, + "Blocked until SnapshotSynced=True", + ) + r.setReadyCondition( + &target, + metav1.ConditionFalse, + GitTargetReadyReasonInitialSyncInProgress, + "SnapshotSynced gate is still running", + ) + if err := r.updateStatusWithRetry(ctx, &target); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: snapshotRequeueAfter}, nil + } + if snapshotState == metav1.ConditionTrue { + r.setCondition( + &target, + GitTargetConditionSnapshotSynced, + metav1.ConditionTrue, + GitTargetReasonCompleted, + snapshotMessage, + ) + } - // Signal reconciliation complete to enable event processing - if r.EventRouter != nil { - gitDest := types.NewResourceReference(target.Name, target.Namespace) - if stream := r.EventRouter.GetGitTargetEventStream(gitDest); stream != nil { - stream.OnReconciliationComplete() + streamReady, streamMessage := r.evaluateEventStreamGate(&target, stream, providerNS) + if !streamReady { + r.setReadyCondition( + &target, + metav1.ConditionFalse, + GitTargetReadyReasonStreamNotLive, + streamMessage, + ) + if err := r.updateStatusWithRetry(ctx, &target); err != nil { + return ctrl.Result{}, err } + return ctrl.Result{RequeueAfter: RequeueShortInterval}, nil } - log.Info("Updating status with success condition") + r.setReadyCondition( + &target, + metav1.ConditionTrue, + GitTargetReasonOK, + "All lifecycle gates satisfied", + ) if err := r.updateStatusWithRetry(ctx, &target); err != nil { - log.Error(err, "Failed to update GitTarget status") return ctrl.Result{}, err } - log.Info("Reconciliation successful", "name", target.Name) return ctrl.Result{RequeueAfter: RequeueLongInterval}, nil } -// handleFetchError handles errors from fetching GitTarget. -func (r *GitTargetReconciler) handleFetchError( - err error, +func (r *GitTargetReconciler) evaluateValidatedGate( + ctx context.Context, + target *configbutleraiv1alpha1.GitTarget, + providerNS string, +) (bool, string, *ctrl.Result, error) { + validated, message, reason, result, err := r.validateProviderAndBranch(ctx, target, providerNS) + if err != nil { + return false, "", nil, err + } + if !validated { + r.setCondition(target, GitTargetConditionValidated, metav1.ConditionFalse, reason, message) + return false, fmt.Sprintf("Validated gate failed: %s", reason), result, nil + } + + if conflict, conflictMsg, conflictReason, conflictResult := r.checkForConflicts(ctx, target, providerNS); conflict { + r.setCondition(target, GitTargetConditionValidated, metav1.ConditionFalse, conflictReason, conflictMsg) + return false, fmt.Sprintf("Validated gate failed: %s", conflictReason), &conflictResult, nil + } + + r.setCondition( + target, + GitTargetConditionValidated, + metav1.ConditionTrue, + GitTargetReasonOK, + "Provider and branch validation passed", + ) + return true, "", nil, nil +} + +func (r *GitTargetReconciler) evaluateEncryptionGate( + ctx context.Context, + target *configbutleraiv1alpha1.GitTarget, log logr.Logger, - namespacedName k8stypes.NamespacedName, -) (ctrl.Result, error) { - if client.IgnoreNotFound(err) == nil { - log.Info("GitTarget not found, was likely deleted", "namespacedName", namespacedName) - return ctrl.Result{}, nil +) (bool, string, time.Duration) { + if target.Spec.Encryption == nil { + r.setCondition( + target, + GitTargetConditionEncryptionConfigured, + metav1.ConditionTrue, + GitTargetReasonNotRequired, + "Encryption is not configured for this GitTarget", + ) + return true, "", 0 } - log.Error(err, "unable to fetch GitTarget", "namespacedName", namespacedName) - return ctrl.Result{}, err + + if err := r.ensureEncryptionSecret(ctx, target, log); err != nil { + reason := GitTargetReasonInvalidConfig + if strings.Contains(err.Error(), "missing and generateWhenMissing is disabled") { + reason = GitTargetReasonSecretCreateDisabled + } + if strings.Contains(err.Error(), "failed to fetch encryption secret") { + reason = GitTargetReasonMissingSecret + } + r.setCondition(target, GitTargetConditionEncryptionConfigured, metav1.ConditionFalse, reason, err.Error()) + return false, fmt.Sprintf("EncryptionConfigured gate failed: %s", reason), RequeueMediumInterval + } + + r.setCondition( + target, + GitTargetConditionEncryptionConfigured, + metav1.ConditionTrue, + GitTargetReasonOK, + "Encryption configuration is valid", + ) + return true, "", 0 } -// validateGitProvider validates the GitProvider reference and branch. -// Returns a result pointer if validation failed (caller should return it), nil if validation passed. -func (r *GitTargetReconciler) validateGitProvider( +func (r *GitTargetReconciler) evaluateBootstrapGate( ctx context.Context, target *configbutleraiv1alpha1.GitTarget, providerNS string, log logr.Logger, -) (*ctrl.Result, error) { - // TODO: Handle Flux GitRepository support - if target.Spec.ProviderRef.Kind != "" && target.Spec.ProviderRef.Kind != "GitProvider" { - // For now, we only support GitProvider. - // In future, we would fetch GitRepository here. - // But since we are porting existing logic, we assume GitProvider. - // If user provides GitRepository, it will fail here or we should handle it. - // Given the task is to fix e2e tests which use GitProvider, we focus on that. - log.Info("Unsupported provider kind", "kind", target.Spec.ProviderRef.Kind) +) (bool, string, time.Duration) { + if r.WorkerManager == nil { + r.setCondition( + target, + GitTargetConditionBootstrapped, + metav1.ConditionFalse, + GitTargetReasonWorkerNotFound, + "Worker manager is not configured", + ) + return false, "Bootstrapped gate failed: WorkerNotFound", RequeueShortInterval } - var gp configbutleraiv1alpha1.GitProvider - gpKey := k8stypes.NamespacedName{Name: target.Spec.ProviderRef.Name, Namespace: providerNS} - if err := r.Get(ctx, gpKey, &gp); err != nil { - if apierrors.IsNotFound(err) { - msg := fmt.Sprintf("Referenced GitProvider '%s/%s' not found", providerNS, target.Spec.ProviderRef.Name) - log.Info("GitProvider not found", "message", msg) - r.setCondition(target, metav1.ConditionFalse, GitTargetReasonGitProviderNotFound, msg) - result, updateErr := r.updateStatusAndRequeue(ctx, target, RequeueShortInterval) - return &result, updateErr - } - log.Error(err, "Failed to get referenced GitProvider", "gitProvider", gpKey.String()) - result := ctrl.Result{} - return &result, err + if err := r.WorkerManager.EnsureWorker( + ctx, + target.Spec.ProviderRef.Name, + providerNS, + target.Spec.Branch, + ); err != nil { + log.Error(err, "Failed to ensure worker") + r.setCondition( + target, + GitTargetConditionBootstrapped, + metav1.ConditionFalse, + GitTargetReasonWorkerNotFound, + err.Error(), + ) + return false, "Bootstrapped gate failed: WorkerNotFound", RequeueShortInterval } - // Validate branch - if result := r.validateBranch(ctx, target, &gp, providerNS, log); result != nil { - return result, nil + if err := r.WorkerManager.EnsureTargetBootstrapped( + ctx, + target.Name, + target.Namespace, + target.Spec.ProviderRef.Name, + providerNS, + target.Spec.Branch, + target.Spec.Path, + ); err != nil { + log.Error(err, "Failed to bootstrap target path") + r.setCondition( + target, + GitTargetConditionBootstrapped, + metav1.ConditionFalse, + GitTargetReasonBootstrapFailed, + err.Error(), + ) + return false, "Bootstrapped gate failed: BootstrapFailed", RequeueShortInterval } - // All validations passed - msg := fmt.Sprintf("GitTarget is ready. Provider='%s/%s', Branch='%s', Path='%s'", - providerNS, target.Spec.ProviderRef.Name, target.Spec.Branch, target.Spec.Path) - r.setCondition(target, metav1.ConditionTrue, GitTargetReasonReady, msg) - // target.Status.ObservedGeneration = target.Generation // Not in struct + r.setCondition( + target, + GitTargetConditionBootstrapped, + metav1.ConditionTrue, + GitTargetReasonBootstrapApplied, + fmt.Sprintf("Bootstrap ensured for path %s", target.Spec.Path), + ) - return nil, nil //nolint:nilnil // nil result means validation passed + r.updateRepositoryStatus(ctx, target, log) + return true, "", 0 } -// validateBranch validates that the branch matches at least one pattern in allowedBranches. -// Supports glob patterns like "main", "feature/*", "release/v*". -func (r *GitTargetReconciler) validateBranch( +func (r *GitTargetReconciler) evaluateSnapshotGate( ctx context.Context, target *configbutleraiv1alpha1.GitTarget, - gp *configbutleraiv1alpha1.GitProvider, providerNS string, log logr.Logger, -) *ctrl.Result { +) (*reconcile.GitTargetEventStream, metav1.ConditionStatus, string, time.Duration, error) { + if r.EventRouter == nil || r.EventRouter.ReconcilerManager == nil { + now := metav1.Now() + if target.Status.Snapshot == nil { + target.Status.Snapshot = &configbutleraiv1alpha1.GitTargetSnapshotStatus{} + } + target.Status.Snapshot.LastCompletedTime = &now + return nil, metav1.ConditionTrue, "Initial snapshot reconciliation completed", 0, nil + } + + stream, err := r.ensureEventStream(target, providerNS, log) + if err != nil { + return nil, metav1.ConditionFalse, "", 0, err + } + + gitDest := types.NewResourceReference(target.Name, target.Namespace) + reconciler := r.EventRouter.ReconcilerManager.CreateReconciler(gitDest, stream) + if err := reconciler.StartReconciliation(ctx); err != nil { + return stream, metav1.ConditionFalse, "", 0, fmt.Errorf( + "failed to start initial snapshot reconciliation: %w", + err, + ) + } + + if !reconciler.HasBothStates() { + return stream, metav1.ConditionFalse, "Initial snapshot reconciliation in progress", RequeueShortInterval, nil + } + + stats := reconciler.GetLastSnapshotStats() + now := metav1.Now() + if target.Status.Snapshot == nil { + target.Status.Snapshot = &configbutleraiv1alpha1.GitTargetSnapshotStatus{} + } + target.Status.Snapshot.LastCompletedTime = &now + target.Status.Snapshot.Stats = configbutleraiv1alpha1.GitTargetSnapshotStats{ + Created: clampIntToInt32(stats.Created), + Updated: clampIntToInt32(stats.Updated), + Deleted: clampIntToInt32(stats.Deleted), + } + + return stream, metav1.ConditionTrue, "Initial snapshot reconciliation completed", 0, nil +} + +func (r *GitTargetReconciler) evaluateEventStreamGate( + target *configbutleraiv1alpha1.GitTarget, + stream *reconcile.GitTargetEventStream, + providerNS string, +) (bool, string) { + if r.EventRouter == nil { + r.setCondition( + target, + GitTargetConditionEventStreamLive, + metav1.ConditionTrue, + GitTargetReasonRegistered, + "GitTarget event stream is live", + ) + return true, "" + } + + if stream == nil { + r.setCondition( + target, + GitTargetConditionEventStreamLive, + metav1.ConditionFalse, + GitTargetReasonRegistrationFailed, + fmt.Sprintf( + "Failed to register GitTargetEventStream for %s/%s", + target.Namespace, + target.Name, + ), + ) + return false, "EventStreamLive gate failed: RegistrationFailed" + } + + if stream.GetState() != reconcile.LiveProcessing { + stream.OnReconciliationComplete() + } + + if stream.GetState() != reconcile.LiveProcessing { + r.setCondition( + target, + GitTargetConditionEventStreamLive, + metav1.ConditionFalse, + GitTargetReasonDisconnected, + "GitTarget event stream failed to transition to live processing", + ) + return false, "EventStreamLive gate failed: Disconnected" + } + + _ = providerNS // kept for function parity and future diagnostics + r.setCondition( + target, + GitTargetConditionEventStreamLive, + metav1.ConditionTrue, + GitTargetReasonRegistered, + "GitTarget event stream is live", + ) + return true, "" +} + +func (r *GitTargetReconciler) ensureEventStream( + target *configbutleraiv1alpha1.GitTarget, + providerNS string, + log logr.Logger, +) (*reconcile.GitTargetEventStream, error) { + if r.WorkerManager == nil { + return nil, errors.New("worker manager is not configured") + } + + worker, exists := r.WorkerManager.GetWorkerForTarget(target.Spec.ProviderRef.Name, providerNS, target.Spec.Branch) + if !exists { + return nil, fmt.Errorf( + "branch worker not found for provider=%s/%s branch=%s", + providerNS, + target.Spec.ProviderRef.Name, + target.Spec.Branch, + ) + } + + gitDest := types.NewResourceReference(target.Name, target.Namespace) + if existingStream := r.EventRouter.GetGitTargetEventStream(gitDest); existingStream != nil { + return existingStream, nil + } + + stream := reconcile.NewGitTargetEventStream(target.Name, target.Namespace, worker, log) + r.EventRouter.RegisterGitTargetEventStream(gitDest, stream) + return stream, nil +} + +func (r *GitTargetReconciler) setReadyCondition( + target *configbutleraiv1alpha1.GitTarget, + status metav1.ConditionStatus, + reason, message string, +) { + r.setCondition(target, GitTargetConditionReady, status, reason, message) +} + +func (r *GitTargetReconciler) setCondition( + target *configbutleraiv1alpha1.GitTarget, + conditionType string, + status metav1.ConditionStatus, + reason, message string, +) { + target.Status.Conditions = upsertCondition( + target.Status.Conditions, + conditionType, + status, + reason, + message, + target.Generation, + ) +} + +func (r *GitTargetReconciler) validateProviderAndBranch( + ctx context.Context, + target *configbutleraiv1alpha1.GitTarget, + providerNS string, +) (bool, string, string, *ctrl.Result, error) { + var gp configbutleraiv1alpha1.GitProvider + gpKey := k8stypes.NamespacedName{Name: target.Spec.ProviderRef.Name, Namespace: providerNS} + if err := r.Get(ctx, gpKey, &gp); err != nil { + if apierrors.IsNotFound(err) { + msg := fmt.Sprintf("Referenced GitProvider '%s/%s' not found", providerNS, target.Spec.ProviderRef.Name) + result := ctrl.Result{RequeueAfter: RequeueShortInterval} + return false, msg, GitTargetReasonProviderNotFound, &result, nil + } + return false, "", "", nil, err + } + branchAllowed := false for _, pattern := range gp.Spec.AllowedBranches { - if match, err := filepath.Match(pattern, target.Spec.Branch); match { + match, matchErr := filepath.Match(pattern, target.Spec.Branch) + if matchErr != nil { + continue + } + if match { branchAllowed = true break - } else if err != nil { - // Log malformed pattern but continue checking other patterns - log.Info("Invalid glob pattern in allowedBranches", "pattern", pattern, "error", err) } } - if !branchAllowed { - msg := fmt.Sprintf("Branch '%s' does not match any pattern in allowedBranches list %v of GitProvider '%s/%s'", - target.Spec.Branch, gp.Spec.AllowedBranches, providerNS, target.Spec.ProviderRef.Name) - log.Info("Branch validation failed", "branch", target.Spec.Branch, "allowedBranches", gp.Spec.AllowedBranches) - r.setCondition(target, metav1.ConditionFalse, GitTargetReasonBranchNotAllowed, msg) - // Security requirement: Clear LastCommit when branch not allowed target.Status.LastCommit = "" target.Status.LastPushTime = nil - result, _ := r.updateStatusAndRequeue(ctx, target, RequeueShortInterval) - return &result + msg := fmt.Sprintf( + "Branch '%s' does not match any pattern in allowedBranches list %v of GitProvider '%s/%s'", + target.Spec.Branch, + gp.Spec.AllowedBranches, + providerNS, + target.Spec.ProviderRef.Name, + ) + result := ctrl.Result{RequeueAfter: RequeueShortInterval} + return false, msg, GitTargetReasonBranchNotAllowed, &result, nil } - return nil + return true, "", "", nil, nil } -// checkForConflicts checks if this GitTarget conflicts with other GitTargets. -// This provides defense-in-depth alongside webhook validation. -// Returns a result pointer if conflict detected, nil if no conflict. func (r *GitTargetReconciler) checkForConflicts( ctx context.Context, target *configbutleraiv1alpha1.GitTarget, providerNS string, - log logr.Logger, -) *ctrl.Result { - // List all GitTargets in the cluster +) (bool, string, string, ctrl.Result) { var allTargets configbutleraiv1alpha1.GitTargetList if err := r.List(ctx, &allTargets); err != nil { - log.Error(err, "Failed to list GitTargets for conflict checking") - // Don't fail reconciliation due to listing error, just continue - return nil + return false, "", "", ctrl.Result{} } - // Check each target for conflicts for i := range allTargets.Items { existing := &allTargets.Items[i] - - // Skip self (same namespace and name) if existing.Namespace == target.Namespace && existing.Name == target.Name { continue } - - // Skip if not referencing the same GitProvider - // GitProvider is always in the same namespace as GitTarget if existing.Namespace != providerNS || existing.Spec.ProviderRef.Name != target.Spec.ProviderRef.Name { continue } - - // Check if branch and path match (conflict condition) if existing.Spec.Branch == target.Spec.Branch && existing.Spec.Path == target.Spec.Path { - // Conflict detected! Elect winner by creationTimestamp if target.CreationTimestamp.After(existing.CreationTimestamp.Time) { - // Current target is the loser msg := fmt.Sprintf( - "Conflict detected. Another GitTarget '%s/%s' (created at %s) "+ - "is already using GitProvider '%s/%s', branch '%s', path '%s'. "+ - "This GitTarget was created later and will not be processed.", - existing.Namespace, existing.Name, + "Conflict detected. Another GitTarget '%s/%s' (created at %s) is already using GitProvider '%s/%s', branch '%s', path '%s'. This GitTarget was created later and will not be processed.", + existing.Namespace, + existing.Name, existing.CreationTimestamp.Format(time.RFC3339), - providerNS, target.Spec.ProviderRef.Name, - target.Spec.Branch, target.Spec.Path, + providerNS, + target.Spec.ProviderRef.Name, + target.Spec.Branch, + target.Spec.Path, ) - log.Info("Conflict detected, this GitTarget is the loser", - "winner", fmt.Sprintf("%s/%s", existing.Namespace, existing.Name), - "winnerCreated", existing.CreationTimestamp.Format(time.RFC3339), - "loserCreated", target.CreationTimestamp.Format(time.RFC3339)) - - r.setCondition(target, metav1.ConditionFalse, GitTargetReasonConflict, msg) - result, _ := r.updateStatusAndRequeue(ctx, target, RequeueShortInterval) - return &result + return true, msg, GitTargetReasonTargetConflict, ctrl.Result{RequeueAfter: RequeueShortInterval} } - // Current target is the winner or equal timestamp - continue } } - // No conflicts detected - return nil -} - -// registerWithWorkerAndEventStream registers the GitTarget with worker and event stream. -func (r *GitTargetReconciler) registerWithWorkerAndEventStream( - ctx context.Context, - target *configbutleraiv1alpha1.GitTarget, - providerNS string, - log logr.Logger, -) { - // Register with branch worker - r.registerWithWorker(ctx, target, providerNS, log) - - // Register event stream - r.registerEventStream(target, providerNS, log) + return false, "", "", ctrl.Result{} } -// registerWithWorker registers the target with branch worker. -func (r *GitTargetReconciler) registerWithWorker( +func (r *GitTargetReconciler) ensureEncryptionSecret( ctx context.Context, target *configbutleraiv1alpha1.GitTarget, - providerNS string, log logr.Logger, -) { - if r.WorkerManager == nil { - return +) error { + if target.Spec.Encryption == nil { + return nil } - if err := r.WorkerManager.RegisterTarget( - ctx, - target.Name, target.Namespace, - target.Spec.ProviderRef.Name, providerNS, - target.Spec.Branch, - target.Spec.Path, - ); err != nil { - log.Error(err, "Failed to register target with worker") - } else { - log.Info("Registered target with branch worker", - "provider", target.Spec.ProviderRef.Name, - "branch", target.Spec.Branch, - "path", target.Spec.Path) + secretName := strings.TrimSpace(target.Spec.Encryption.SecretRef.Name) + if secretName == "" { + return errors.New("encryption.secretRef.name must be set when encryption is configured") } -} -// registerEventStream registers the GitTargetEventStream with EventRouter. -func (r *GitTargetReconciler) registerEventStream( - target *configbutleraiv1alpha1.GitTarget, - providerNS string, - log logr.Logger, -) { - if r.EventRouter == nil { - return + secretKey := k8stypes.NamespacedName{Name: secretName, Namespace: target.Namespace} + var existing corev1.Secret + if err := r.Get(ctx, secretKey, &existing); err == nil { + if existing.Annotations[encryptionSecretBackupWarningAnno] == encryptionSecretBackupWarningValue { + log.Info("ENCRYPTION KEY BACKUP REQUIRED: remove annotation after backup is completed", + "secret", secretKey.String(), + "annotation", encryptionSecretBackupWarningAnno) + } + return nil + } else if !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to fetch encryption secret %s: %w", secretKey.String(), err) } - branchWorker, exists := r.WorkerManager.GetWorkerForTarget( - target.Spec.ProviderRef.Name, providerNS, target.Spec.Branch, - ) - if !exists { - log.Error(nil, "BranchWorker not found for GitTargetEventStream registration", - "provider", target.Spec.ProviderRef.Name, - "namespace", providerNS, - "branch", target.Spec.Branch) - return + if !target.Spec.Encryption.GenerateWhenMissing { + return fmt.Errorf("encryption secret %s is missing and generateWhenMissing is disabled", secretKey.String()) } - gitDest := types.NewResourceReference(target.Name, target.Namespace) + identity, err := age.GenerateX25519Identity() + if err != nil { + return fmt.Errorf("failed to generate age identity for encryption secret %s: %w", secretKey.String(), err) + } + recipient := identity.Recipient().String() + + secret := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: target.Namespace, + Annotations: map[string]string{ + encryptionSecretRecipientAnnoKey: recipient, + encryptionSecretBackupWarningAnno: encryptionSecretBackupWarningValue, + }, + }, + Type: corev1.SecretTypeOpaque, + StringData: map[string]string{ + sopsAgeKeySecretKey: identity.String(), + }, + } - // Check if already registered - if existingStream := r.EventRouter.GetGitTargetEventStream(gitDest); existingStream != nil { - return + if err := r.Create(ctx, &secret); err != nil { + if apierrors.IsAlreadyExists(err) { + return nil + } + return fmt.Errorf("failed to create encryption secret %s: %w", secretKey.String(), err) } - stream := reconcile.NewGitTargetEventStream( - target.Name, target.Namespace, - branchWorker, - log, + log.Info( + "Generated missing encryption secret with age key. Back up the private key and remove warning annotation.", + "secret", secretKey.String(), + "recipient", recipient, + "warningAnnotation", encryptionSecretBackupWarningAnno, ) - r.EventRouter.RegisterGitTargetEventStream(gitDest, stream) - log.Info("Registered GitTargetEventStream with EventRouter", - "gitDest", gitDest.String(), - "provider", target.Spec.ProviderRef.Name, - "branch", target.Spec.Branch, - "path", target.Spec.Path) + + return nil } -// updateRepositoryStatus synchronously fetches and updates repository status. func (r *GitTargetReconciler) updateRepositoryStatus( ctx context.Context, target *configbutleraiv1alpha1.GitTarget, - _ *configbutleraiv1alpha1.GitProvider, log logr.Logger, ) { - log.Info("Syncing repository status from remote") - - // Get the branch worker for this target - providerNS := target.Namespace - if r.WorkerManager == nil { - log.Error(nil, "WorkerManager is nil, cannot sync repository status") return } - worker, exists := r.WorkerManager.GetWorkerForTarget( - target.Spec.ProviderRef.Name, providerNS, target.Spec.Branch, - ) - + providerNS := target.Namespace + worker, exists := r.WorkerManager.GetWorkerForTarget(target.Spec.ProviderRef.Name, providerNS, target.Spec.Branch) if !exists { - // Worker not yet created - this is normal during initial reconciliation - log.V(1).Info("Worker not yet available, will update status on next reconcile") return } - // SYNCHRONOUS: Block and fetch fresh metadata (or use 30s cache) report, err := worker.SyncAndGetMetadata(ctx) if err != nil { log.Error(err, "Failed to sync repository metadata") - // Don't fail reconcile, just skip status update return } - - // Update status with FRESH data from PullReport - // target.Status.BranchExists = report.ExistsOnRemote // Not in struct target.Status.LastCommit = report.HEAD.Sha - // target.Status.LastSyncTime = &metav1.Time{Time: time.Now()} // Not in struct - - log.Info("Repository status updated from remote", - "branchExists", report.ExistsOnRemote, - "lastCommit", report.HEAD.Sha, - "incomingChanges", report.IncomingChanges) } -// setCondition sets or updates the Ready condition. -func (r *GitTargetReconciler) setCondition(target *configbutleraiv1alpha1.GitTarget, - status metav1.ConditionStatus, reason, message string, -) { - condition := metav1.Condition{ - Type: GitTargetReasonReady, - Status: status, - Reason: reason, - Message: message, - LastTransitionTime: metav1.Now(), - } - - // Update existing condition or add new one - for i, existingCondition := range target.Status.Conditions { - if existingCondition.Type == GitTargetReasonReady { - target.Status.Conditions[i] = condition - return - } +// handleFetchError handles errors from fetching GitTarget. +func (r *GitTargetReconciler) handleFetchError( + err error, + log logr.Logger, + namespacedName k8stypes.NamespacedName, +) (ctrl.Result, error) { + if client.IgnoreNotFound(err) == nil { + log.Info("GitTarget not found, was likely deleted", "namespacedName", namespacedName) + return ctrl.Result{}, nil } - - target.Status.Conditions = append(target.Status.Conditions, condition) + log.Error(err, "unable to fetch GitTarget", "namespacedName", namespacedName) + return ctrl.Result{}, err } -// updateStatusAndRequeue updates the status and returns requeue result. -func (r *GitTargetReconciler) updateStatusAndRequeue( - ctx context.Context, target *configbutleraiv1alpha1.GitTarget, requeueAfter time.Duration, -) (ctrl.Result, error) { - if err := r.updateStatusWithRetry(ctx, target); err != nil { - return ctrl.Result{}, err +func clampIntToInt32(value int) int32 { + if value < 0 { + return 0 + } + maxInt32 := int(^uint32(0) >> 1) + if value > maxInt32 { + return int32(maxInt32) } - return ctrl.Result{RequeueAfter: requeueAfter}, nil + return int32(value) } // updateStatusWithRetry updates the status with retry logic to handle race conditions. -// -//nolint:dupl // Similar retry logic pattern used across controllers func (r *GitTargetReconciler) updateStatusWithRetry( - ctx context.Context, target *configbutleraiv1alpha1.GitTarget, + ctx context.Context, + target *configbutleraiv1alpha1.GitTarget, ) error { log := logf.FromContext(ctx).WithName("updateStatusWithRetry") - log.Info("Starting status update with retry", - "name", target.Name, - "namespace", target.Namespace, - "conditionsCount", len(target.Status.Conditions)) - return wait.ExponentialBackoff(wait.Backoff{ Duration: RetryInitialDuration, Factor: RetryBackoffFactor, Jitter: RetryBackoffJitter, Steps: RetryMaxSteps, }, func() (bool, error) { - log.Info("Attempting status update") - - // Get the latest version of the resource latest := &configbutleraiv1alpha1.GitTarget{} key := client.ObjectKeyFromObject(target) if err := r.Get(ctx, key, latest); err != nil { if apierrors.IsNotFound(err) { - log.Info("Resource was deleted, nothing to update") return true, nil } - log.Error(err, "Failed to get latest resource version") return false, err } - log.Info("Got latest resource version", - "generation", latest.Generation, - "resourceVersion", latest.ResourceVersion) - - // Copy our status to the latest version latest.Status = target.Status - - log.Info("Attempting to update status", - "conditionsCount", len(latest.Status.Conditions)) - - // Attempt to update if err := r.Status().Update(ctx, latest); err != nil { if apierrors.IsConflict(err) { - log.Info("Resource version conflict, retrying") + log.V(1).Info("Status conflict, retrying") return false, nil } - log.Error(err, "Failed to update status") return false, err } - log.Info("Status update successful") return true, nil }) } diff --git a/internal/controller/gittarget_controller_test.go b/internal/controller/gittarget_controller_test.go index 92daad51..4a2a2006 100644 --- a/internal/controller/gittarget_controller_test.go +++ b/internal/controller/gittarget_controller_test.go @@ -24,6 +24,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -108,8 +109,16 @@ var _ = Describe("GitTarget Controller Security", func() { } Expect(readyCondition).NotTo(BeNil(), "Ready condition should exist") Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse), "Ready should be False") - Expect(readyCondition.Reason).To(Equal(GitTargetReasonBranchNotAllowed), - "Reason should be BranchNotAllowed") + Expect(readyCondition.Reason).To(Equal(GitTargetReadyReasonValidationFailed)) + var validatedCondition *metav1.Condition + for i, condition := range createdGitTarget.Status.Conditions { + if condition.Type == GitTargetConditionValidated { + validatedCondition = &createdGitTarget.Status.Conditions[i] + break + } + } + Expect(validatedCondition).NotTo(BeNil(), "Validated condition should exist") + Expect(validatedCondition.Reason).To(Equal(GitTargetReasonBranchNotAllowed)) // SECURITY TEST: Verify sensitive fields are cleared // This prevents unauthorized users from discovering SHA information @@ -302,7 +311,16 @@ var _ = Describe("GitTarget Controller Security", func() { if !tc.shouldBeAllowed { Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) - Expect(readyCondition.Reason).To(Equal(GitTargetReasonBranchNotAllowed)) + Expect(readyCondition.Reason).To(Equal(GitTargetReadyReasonValidationFailed)) + var validatedCondition *metav1.Condition + for i, condition := range createdGitTarget.Status.Conditions { + if condition.Type == GitTargetConditionValidated { + validatedCondition = &createdGitTarget.Status.Conditions[i] + break + } + } + Expect(validatedCondition).NotTo(BeNil()) + Expect(validatedCondition.Reason).To(Equal(GitTargetReasonBranchNotAllowed)) // Security: verify fields are cleared Expect(createdGitTarget.Status.LastCommit).To(BeEmpty()) } else { @@ -400,7 +418,8 @@ var _ = Describe("GitTarget Controller Security", func() { return false } for _, condition := range target.Status.Conditions { - if condition.Type == GitTargetReasonReady && condition.Reason == GitTargetReasonConflict { + if condition.Type == GitTargetConditionValidated && + condition.Reason == GitTargetReasonTargetConflict { return true } } @@ -421,9 +440,18 @@ var _ = Describe("GitTarget Controller Security", func() { Expect(readyCondition).NotTo(BeNil()) Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) - Expect(readyCondition.Reason).To(Equal(GitTargetReasonConflict)) - Expect(readyCondition.Message).To(ContainSubstring("first-target-conflict")) - Expect(readyCondition.Message).To(ContainSubstring("created later")) + Expect(readyCondition.Reason).To(Equal(GitTargetReadyReasonValidationFailed)) + var validatedCondition *metav1.Condition + for i, condition := range secondReconciledTarget.Status.Conditions { + if condition.Type == GitTargetConditionValidated { + validatedCondition = &secondReconciledTarget.Status.Conditions[i] + break + } + } + Expect(validatedCondition).NotTo(BeNil()) + Expect(validatedCondition.Reason).To(Equal(GitTargetReasonTargetConflict)) + Expect(validatedCondition.Message).To(ContainSubstring("first-target-conflict")) + Expect(validatedCondition.Message).To(ContainSubstring("created later")) // Cleanup Expect(k8sClient.Delete(ctx, secondTarget)).Should(Succeed()) @@ -512,7 +540,7 @@ var _ = Describe("GitTarget Controller Security", func() { } Expect(readyCondition).NotTo(BeNil()) - Expect(readyCondition.Reason).NotTo(Equal(GitTargetReasonConflict), + Expect(readyCondition.Reason).NotTo(Equal(GitTargetReadyReasonValidationFailed), "Should not have conflict when path is different") // Cleanup @@ -521,4 +549,133 @@ var _ = Describe("GitTarget Controller Security", func() { Expect(k8sClient.Delete(ctx, gitProvider)).Should(Succeed()) }) }) + + Context("When encryption secret auto-generation is configured", func() { + It("Should create missing encryption secret with recipient annotation and warning annotation", func() { + ctx := context.Background() + + gitProvider := &configbutleraiv1alpha1.GitProvider{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-provider-generate-enc-secret", + Namespace: "default", + }, + Spec: configbutleraiv1alpha1.GitProviderSpec{ + URL: "https://github.com/test-org/test-repo.git", + AllowedBranches: []string{"main"}, + }, + } + Expect(k8sClient.Create(ctx, gitProvider)).Should(Succeed()) + + target := &configbutleraiv1alpha1.GitTarget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-target-generate-enc-secret", + Namespace: "default", + }, + Spec: configbutleraiv1alpha1.GitTargetSpec{ + ProviderRef: configbutleraiv1alpha1.GitProviderReference{ + Name: "test-provider-generate-enc-secret", + Kind: "GitProvider", + }, + Branch: "main", + Path: "test-path", + Encryption: &configbutleraiv1alpha1.EncryptionSpec{ + Provider: "sops", + SecretRef: configbutleraiv1alpha1.LocalSecretReference{ + Name: "generated-sops-age-key", + }, + GenerateWhenMissing: true, + }, + }, + } + Expect(k8sClient.Create(ctx, target)).Should(Succeed()) + + secretKey := types.NamespacedName{Name: "generated-sops-age-key", Namespace: "default"} + Eventually(func(g Gomega) { + var secret corev1.Secret + err := k8sClient.Get(ctx, secretKey, &secret) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(secret.Type).To(Equal(corev1.SecretTypeOpaque)) + g.Expect(secret.Data).To(HaveKey("SOPS_AGE_KEY")) + g.Expect(string(secret.Data["SOPS_AGE_KEY"])).To(ContainSubstring("AGE-SECRET-KEY-")) + g.Expect(secret.Annotations).To(HaveKey(encryptionSecretRecipientAnnoKey)) + g.Expect(secret.Annotations[encryptionSecretRecipientAnnoKey]).To(HavePrefix("age1")) + g.Expect(secret.Annotations).To(HaveKeyWithValue( + encryptionSecretBackupWarningAnno, + encryptionSecretBackupWarningValue, + )) + }, timeout, interval).Should(Succeed()) + + Expect(k8sClient.Delete(ctx, target)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, gitProvider)).Should(Succeed()) + }) + + It("Should report invalid encryption config when secret is missing and generation is disabled", func() { + ctx := context.Background() + + gitProvider := &configbutleraiv1alpha1.GitProvider{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-provider-no-generate-enc-secret", + Namespace: "default", + }, + Spec: configbutleraiv1alpha1.GitProviderSpec{ + URL: "https://github.com/test-org/test-repo.git", + AllowedBranches: []string{"main"}, + }, + } + Expect(k8sClient.Create(ctx, gitProvider)).Should(Succeed()) + + target := &configbutleraiv1alpha1.GitTarget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-target-no-generate-enc-secret", + Namespace: "default", + }, + Spec: configbutleraiv1alpha1.GitTargetSpec{ + ProviderRef: configbutleraiv1alpha1.GitProviderReference{ + Name: "test-provider-no-generate-enc-secret", + Kind: "GitProvider", + }, + Branch: "main", + Path: "test-path", + Encryption: &configbutleraiv1alpha1.EncryptionSpec{ + Provider: "sops", + SecretRef: configbutleraiv1alpha1.LocalSecretReference{ + Name: "missing-sops-age-key", + }, + }, + }, + } + Expect(k8sClient.Create(ctx, target)).Should(Succeed()) + + targetKey := types.NamespacedName{Name: "test-target-no-generate-enc-secret", Namespace: "default"} + Eventually(func(g Gomega) { + var got configbutleraiv1alpha1.GitTarget + err := k8sClient.Get(ctx, targetKey, &got) + g.Expect(err).NotTo(HaveOccurred()) + + var readyCondition *metav1.Condition + for i, condition := range got.Status.Conditions { + if condition.Type == GitTargetReasonReady { + readyCondition = &got.Status.Conditions[i] + break + } + } + g.Expect(readyCondition).NotTo(BeNil()) + g.Expect(readyCondition.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(readyCondition.Reason).To(Equal(GitTargetReadyReasonEncryptionNotConfigured)) + var encryptionCondition *metav1.Condition + for i, condition := range got.Status.Conditions { + if condition.Type == GitTargetConditionEncryptionConfigured { + encryptionCondition = &got.Status.Conditions[i] + break + } + } + g.Expect(encryptionCondition).NotTo(BeNil()) + g.Expect(encryptionCondition.Reason).To(Equal(GitTargetReasonSecretCreateDisabled)) + g.Expect(encryptionCondition.Message).To(ContainSubstring("generateWhenMissing is disabled")) + }, timeout, interval).Should(Succeed()) + + Expect(k8sClient.Delete(ctx, target)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, gitProvider)).Should(Succeed()) + }) + }) }) diff --git a/internal/git/branch_worker.go b/internal/git/branch_worker.go index 821b515b..ddae734e 100644 --- a/internal/git/branch_worker.go +++ b/internal/git/branch_worker.go @@ -78,6 +78,9 @@ type BranchWorker struct { branchExists bool lastCommitSHA string lastFetchTime time.Time + + // repoMu serializes repository/worktree operations within this worker. + repoMu sync.Mutex } // NewBranchWorker creates a worker for a (provider, branch) combination. @@ -160,6 +163,9 @@ func (w *BranchWorker) Enqueue(event Event) { // ListResourcesInPath returns resource identifiers found in a Git folder. // This is a synchronous service method called by EventRouter. func (w *BranchWorker) ListResourcesInPath(path string) ([]itypes.ResourceIdentifier, error) { + w.repoMu.Lock() + defer w.repoMu.Unlock() + // Ensure repository is initialized and up-to-date if err := w.ensureRepositoryInitialized(w.ctx); err != nil { return nil, fmt.Errorf("failed to initialize repository: %w", err) @@ -175,6 +181,9 @@ func (w *BranchWorker) ListResourcesInPath(path string) ([]itypes.ResourceIdenti // EnsurePathBootstrapped applies bootstrap templates to a path. // Existing files are preserved, and only missing template files are added. func (w *BranchWorker) EnsurePathBootstrapped(path, targetName, targetNamespace string) error { + w.repoMu.Lock() + defer w.repoMu.Unlock() + ctx := w.ctx if ctx == nil { ctx = context.Background() @@ -425,6 +434,9 @@ func (w *BranchWorker) commitAndPush( provider *configv1alpha1.GitProvider, events []Event, ) { + w.repoMu.Lock() + defer w.repoMu.Unlock() + log := w.Log.WithValues("eventCount", len(events)) log.Info("Starting git commit and push", diff --git a/internal/git/worker_manager.go b/internal/git/worker_manager.go index 5b2b98d0..ead4ea81 100644 --- a/internal/git/worker_manager.go +++ b/internal/git/worker_manager.go @@ -36,17 +36,19 @@ type WorkerManager struct { Client client.Client Log logr.Logger - mu sync.RWMutex - workers map[BranchKey]*BranchWorker - ctx context.Context + mu sync.RWMutex + workers map[BranchKey]*BranchWorker + bootstrapLocks map[BranchKey]*sync.Mutex + ctx context.Context } // NewWorkerManager creates a new worker manager. func NewWorkerManager(client client.Client, log logr.Logger) *WorkerManager { return &WorkerManager{ - Client: client, - Log: log, - workers: make(map[BranchKey]*BranchWorker), + Client: client, + Log: log, + workers: make(map[BranchKey]*BranchWorker), + bootstrapLocks: make(map[BranchKey]*sync.Mutex), } } @@ -54,10 +56,45 @@ func NewWorkerManager(client client.Client, log logr.Logger) *WorkerManager { // and registers the target with that worker. // This is called by GitTarget controller when a target becomes Ready. func (m *WorkerManager) RegisterTarget( - _ context.Context, + ctx context.Context, targetName, targetNamespace string, providerName, providerNamespace string, branch, path string, +) error { + if err := m.EnsureWorker(ctx, providerName, providerNamespace, branch); err != nil { + return err + } + + if err := m.EnsureTargetBootstrapped( + ctx, + targetName, + targetNamespace, + providerName, + providerNamespace, + branch, + path, + ); err != nil { + return err + } + + m.Log.Info("GitTarget registered with branch worker", + "target", fmt.Sprintf("%s/%s", targetNamespace, targetName), + "workerKey", BranchKey{ + RepoNamespace: providerNamespace, + RepoName: providerName, + Branch: branch, + }.String(), + "path", path) + + return nil +} + +// EnsureWorker ensures a worker exists for the given (provider, branch). +// Worker creation/start is protected by the manager lock. +func (m *WorkerManager) EnsureWorker( + _ context.Context, + providerName, providerNamespace string, + branch string, ) error { m.mu.Lock() defer m.mu.Unlock() @@ -68,7 +105,6 @@ func (m *WorkerManager) RegisterTarget( Branch: branch, } - // Get or create worker for this (provider, branch) if _, exists := m.workers[key]; !exists { m.Log.Info("Creating new branch worker", "key", key.String()) worker := NewBranchWorker( @@ -87,19 +123,54 @@ func (m *WorkerManager) RegisterTarget( m.workers[key] = worker } - worker := m.workers[key] + return nil +} + +// EnsureTargetBootstrapped ensures the target path bootstrap files are present. +// This intentionally runs outside the manager-wide lock. +func (m *WorkerManager) EnsureTargetBootstrapped( + _ context.Context, + targetName, targetNamespace string, + providerName, providerNamespace string, + branch, path string, +) error { + key := BranchKey{ + RepoNamespace: providerNamespace, + RepoName: providerName, + Branch: branch, + } + + m.mu.RLock() + worker, exists := m.workers[key] + m.mu.RUnlock() + if !exists { + return fmt.Errorf("worker not found for %s", key.String()) + } + + bootstrapLock := m.getBootstrapLock(key) + bootstrapLock.Lock() + defer bootstrapLock.Unlock() + if err := worker.EnsurePathBootstrapped(path, targetName, targetNamespace); err != nil { return fmt.Errorf("failed to ensure path bootstrap for %s/%s: %w", targetNamespace, targetName, err) } - m.Log.Info("GitTarget registered with branch worker", - "target", fmt.Sprintf("%s/%s", targetNamespace, targetName), - "workerKey", key.String(), - "path", path) - return nil } +func (m *WorkerManager) getBootstrapLock(key BranchKey) *sync.Mutex { + m.mu.Lock() + defer m.mu.Unlock() + + lock, exists := m.bootstrapLocks[key] + if !exists { + lock = &sync.Mutex{} + m.bootstrapLocks[key] = lock + } + + return lock +} + // UnregisterTarget removes a GitTarget from its worker. // Destroys the worker if it was the last target using it. // This is called by GitTarget controller when a target is deleted. @@ -127,6 +198,7 @@ func (m *WorkerManager) UnregisterTarget( m.Log.Info("Unregistering target, destroying worker", "key", key.String()) worker.Stop() delete(m.workers, key) + delete(m.bootstrapLocks, key) return nil } @@ -188,6 +260,7 @@ func (m *WorkerManager) ReconcileWorkers(ctx context.Context) error { m.Log.Info("Cleaning up orphaned worker", "key", key.String()) worker.Stop() delete(m.workers, key) + delete(m.bootstrapLocks, key) } } @@ -217,6 +290,7 @@ func (m *WorkerManager) Start(ctx context.Context) error { } m.workers = make(map[BranchKey]*BranchWorker) + m.bootstrapLocks = make(map[BranchKey]*sync.Mutex) m.Log.Info("WorkerManager stopped") return nil } diff --git a/internal/reconcile/folder_reconciler.go b/internal/reconcile/folder_reconciler.go index be4c4828..3cc674fe 100644 --- a/internal/reconcile/folder_reconciler.go +++ b/internal/reconcile/folder_reconciler.go @@ -38,11 +38,22 @@ type FolderReconciler struct { // Current state snapshots clusterResources []types.ResourceIdentifier gitResources []types.ResourceIdentifier + clusterStateSeen bool + gitStateSeen bool // Dependencies for event emission eventEmitter EventEmitter controlEmitter events.ControlEventEmitter logger logr.Logger + + lastSnapshotStats SnapshotStats +} + +// SnapshotStats captures the latest create/update/delete counts from reconciliation. +type SnapshotStats struct { + Created int + Updated int + Deleted int } // NewFolderReconciler creates a new FolderReconciler. @@ -88,6 +99,7 @@ func (r *FolderReconciler) OnClusterState(event events.ClusterStateEvent) { return } r.clusterResources = event.Resources + r.clusterStateSeen = true r.logger.V(1).Info("Received cluster state", "resourceCount", len(event.Resources)) r.reconcile() } @@ -98,6 +110,7 @@ func (r *FolderReconciler) OnRepoState(event events.RepoStateEvent) { return } r.gitResources = event.Resources + r.gitStateSeen = true r.logger.V(1).Info("Received repo state", "resourceCount", len(event.Resources)) r.reconcile() } @@ -105,12 +118,17 @@ func (r *FolderReconciler) OnRepoState(event events.RepoStateEvent) { // reconcile performs the reconciliation logic when both states are available. func (r *FolderReconciler) reconcile() { // Only reconcile when we have both cluster and Git state - if r.clusterResources == nil || r.gitResources == nil { + if !r.clusterStateSeen || !r.gitStateSeen { return } // Compute reconciliation actions (pure logic, no time concerns) toCreate, toDelete, existingInBoth := r.findDifferences(r.clusterResources, r.gitResources) + r.lastSnapshotStats = SnapshotStats{ + Created: len(toCreate), + Updated: len(existingInBoth), + Deleted: len(toDelete), + } r.logger.V(1).Info("Reconciliation computed", "toCreate", len(toCreate), @@ -138,6 +156,11 @@ func (r *FolderReconciler) reconcile() { } } +// GetLastSnapshotStats returns stats from the latest completed reconciliation diff. +func (r *FolderReconciler) GetLastSnapshotStats() SnapshotStats { + return r.lastSnapshotStats +} + // findDifferences computes what needs to be created, deleted, and resources that exist in both. func (r *FolderReconciler) findDifferences( clusterResources, gitResources []types.ResourceIdentifier, @@ -183,7 +206,7 @@ func (r *FolderReconciler) findDifferences( // HasBothStates returns true if the reconciler has received both cluster and Git state. func (r *FolderReconciler) HasBothStates() bool { - return r.clusterResources != nil && r.gitResources != nil + return r.clusterStateSeen && r.gitStateSeen } // GetGitDest returns the GitDestination reference this reconciler is responsible for. diff --git a/internal/reconcile/folder_reconciler_test.go b/internal/reconcile/folder_reconciler_test.go index b3e9b82e..774f3ac9 100644 --- a/internal/reconcile/folder_reconciler_test.go +++ b/internal/reconcile/folder_reconciler_test.go @@ -241,6 +241,7 @@ func TestFolderReconciler_HasBothStates(t *testing.T) { reconciler.clusterResources = []types.ResourceIdentifier{ {Group: "", Version: "v1", Resource: "pods", Name: "app-pod"}, } + reconciler.clusterStateSeen = true // Should still not have both states assert.False(t, reconciler.HasBothStates(), "Should not have both states with only cluster state") @@ -249,11 +250,31 @@ func TestFolderReconciler_HasBothStates(t *testing.T) { reconciler.gitResources = []types.ResourceIdentifier{ {Group: "", Version: "v1", Resource: "pods", Name: "app-pod"}, } + reconciler.gitStateSeen = true // Should now have both states assert.True(t, reconciler.HasBothStates(), "Should have both states when both are set") } +func TestFolderReconciler_HasBothStates_WithEmptyStates(t *testing.T) { + mockEmitter := &MockEventEmitter{} + mockControlEmitter := &MockControlEventEmitter{} + gitDest := types.NewResourceReference("test-gitdest", "default") + reconciler := NewFolderReconciler(gitDest, mockEmitter, mockControlEmitter, log.Log) + + reconciler.OnClusterState(events.ClusterStateEvent{ + GitDest: gitDest, + Resources: nil, + }) + assert.False(t, reconciler.HasBothStates(), "Should not have both states with only cluster state event") + + reconciler.OnRepoState(events.RepoStateEvent{ + GitDest: gitDest, + Resources: nil, + }) + assert.True(t, reconciler.HasBothStates(), "Should have both states when both events are received, even if empty") +} + func TestFolderReconciler_GetGitDest(t *testing.T) { mockEmitter := &MockEventEmitter{} mockControlEmitter := &MockControlEventEmitter{} diff --git a/internal/reconcile/git_target_event_stream.go b/internal/reconcile/git_target_event_stream.go index 88ede507..9e4f4a5b 100644 --- a/internal/reconcile/git_target_event_stream.go +++ b/internal/reconcile/git_target_event_stream.go @@ -21,6 +21,7 @@ package reconcile import ( "crypto/sha256" "fmt" + "sync" "github.com/go-logr/logr" @@ -59,6 +60,7 @@ type GitTargetEventStream struct { // Dependencies branchWorker EventEnqueuer logger logr.Logger + mu sync.RWMutex } // EventEnqueuer interface for enqueuing events (allows mocking). @@ -92,12 +94,15 @@ func NewGitTargetEventStream( // OnWatchEvent processes incoming watch events from the cluster. func (s *GitTargetEventStream) OnWatchEvent(event git.Event) { + s.mu.Lock() switch s.state { case StartupReconcile: // Buffer all events during reconciliation (no deduplication) s.bufferedEvents = append(s.bufferedEvents, event) + bufferSize := len(s.bufferedEvents) + s.mu.Unlock() s.logger.V(1). - Info("Buffered event during reconciliation", "resource", event.Identifier.String(), "bufferSize", len(s.bufferedEvents)) + Info("Buffered event during reconciliation", "resource", event.Identifier.String(), "bufferSize", bufferSize) case LiveProcessing: // Check for duplicates using event hash @@ -105,45 +110,63 @@ func (s *GitTargetEventStream) OnWatchEvent(event git.Event) { resourceKey := event.Identifier.String() if lastHash, exists := s.processedEventHashes[resourceKey]; exists && lastHash == eventHash { + s.mu.Unlock() s.logger.V(1).Info("Skipping duplicate event", "resource", resourceKey, "hash", eventHash) return } + s.mu.Unlock() // Process immediately s.processEvent(event, eventHash, resourceKey) + + default: + s.mu.Unlock() } } // OnReconciliationComplete signals that initial reconciliation has finished. func (s *GitTargetEventStream) OnReconciliationComplete() { + s.mu.Lock() if s.state != StartupReconcile { + currentState := s.state + s.mu.Unlock() s.logger.Info( "Reconciliation complete signal received but not in STARTUP_RECONCILE state", "currentState", - s.state, + currentState, ) return } - s.logger.Info("Reconciliation completed, transitioning to LIVE_PROCESSING", "bufferedEvents", len(s.bufferedEvents)) + bufferedEvents := append([]git.Event(nil), s.bufferedEvents...) + s.logger.Info("Reconciliation completed, transitioning to LIVE_PROCESSING", "bufferedEvents", len(bufferedEvents)) // Transition to live processing s.state = LiveProcessing + s.bufferedEvents = nil + s.mu.Unlock() // Process all buffered events - for _, event := range s.bufferedEvents { + for _, event := range bufferedEvents { eventHash := s.computeEventHash(event) resourceKey := event.Identifier.String() s.processEvent(event, eventHash, resourceKey) } - // Clear buffer - s.bufferedEvents = nil s.logger.Info("Finished processing buffered events") } // processEvent forwards the event to BranchWorker and updates deduplication state. func (s *GitTargetEventStream) processEvent(event git.Event, eventHash, resourceKey string) { + if event.Object == nil && event.Operation != "DELETE" { + s.logger.V(1).Info( + "Skipping event with no object payload", + "resource", resourceKey, + "operation", event.Operation, + ) + return + } + event.GitTargetName = s.gitTargetName event.GitTargetNamespace = s.gitTargetNamespace @@ -151,7 +174,9 @@ func (s *GitTargetEventStream) processEvent(event git.Event, eventHash, resource s.branchWorker.Enqueue(event) // Update deduplication state + s.mu.Lock() s.processedEventHashes[resourceKey] = eventHash + s.mu.Unlock() s.logger.V(1).Info("Processed event", "resource", resourceKey, "hash", eventHash) } @@ -181,21 +206,29 @@ func (s *GitTargetEventStream) computeEventHash(event git.Event) string { // GetState returns the current state of the event stream. func (s *GitTargetEventStream) GetState() EventStreamState { + s.mu.RLock() + defer s.mu.RUnlock() return s.state } // GetBufferedEventCount returns the number of events currently buffered. func (s *GitTargetEventStream) GetBufferedEventCount() int { + s.mu.RLock() + defer s.mu.RUnlock() return len(s.bufferedEvents) } // GetProcessedEventCount returns the number of unique events processed. func (s *GitTargetEventStream) GetProcessedEventCount() int { + s.mu.RLock() + defer s.mu.RUnlock() return len(s.processedEventHashes) } // String returns a string representation for debugging. func (s *GitTargetEventStream) String() string { + s.mu.RLock() + defer s.mu.RUnlock() return fmt.Sprintf("GitTargetEventStream(gitTarget=%s/%s, state=%s, buffered=%d, processed=%d)", s.gitTargetNamespace, s.gitTargetName, s.state, len(s.bufferedEvents), len(s.processedEventHashes)) } diff --git a/internal/reconcile/reconciler_manager.go b/internal/reconcile/reconciler_manager.go index 4988ae08..e8972ace 100644 --- a/internal/reconcile/reconciler_manager.go +++ b/internal/reconcile/reconciler_manager.go @@ -21,6 +21,7 @@ package reconcile import ( "context" "errors" + "sync" "github.com/go-logr/logr" @@ -30,6 +31,7 @@ import ( // ReconcilerManager manages the lifecycle of FolderReconciler instances. type ReconcilerManager struct { + mu sync.RWMutex reconcilers map[string]*FolderReconciler // key = gitDest.Key() = "namespace/name" eventRouter interface { ProcessControlEvent(ctx context.Context, event events.ControlEvent) error @@ -51,6 +53,15 @@ func NewReconcilerManager( } } +// SetEventRouter sets the control-event processor dependency after construction. +func (m *ReconcilerManager) SetEventRouter( + eventRouter interface { + ProcessControlEvent(ctx context.Context, event events.ControlEvent) error + }, +) { + m.eventRouter = eventRouter +} + // CreateReconciler creates or retrieves a FolderReconciler for the given GitDestination. func (m *ReconcilerManager) CreateReconciler( gitDest types.ResourceReference, @@ -58,6 +69,9 @@ func (m *ReconcilerManager) CreateReconciler( ) *FolderReconciler { key := gitDest.Key() + m.mu.Lock() + defer m.mu.Unlock() + if reconciler, exists := m.reconcilers[key]; exists { m.logger.V(1).Info("Reconciler already exists", "gitDest", gitDest.String()) return reconciler @@ -71,12 +85,17 @@ func (m *ReconcilerManager) CreateReconciler( // GetReconciler retrieves a FolderReconciler for the given GitDestination. func (m *ReconcilerManager) GetReconciler(gitDest types.ResourceReference) (*FolderReconciler, bool) { + m.mu.RLock() + defer m.mu.RUnlock() reconciler, exists := m.reconcilers[gitDest.Key()] return reconciler, exists } // DeleteReconciler removes a FolderReconciler from management. func (m *ReconcilerManager) DeleteReconciler(gitDest types.ResourceReference) bool { + m.mu.Lock() + defer m.mu.Unlock() + key := gitDest.Key() if _, exists := m.reconcilers[key]; !exists { m.logger.V(1).Info("Reconciler not found", "gitDest", gitDest.String()) @@ -97,6 +116,9 @@ func (m *ReconcilerManager) EmitControlEvent(event events.ControlEvent) error { // ListReconcilers returns all managed reconcilers. func (m *ReconcilerManager) ListReconcilers() []*FolderReconciler { + m.mu.RLock() + defer m.mu.RUnlock() + var reconcilers []*FolderReconciler for _, reconciler := range m.reconcilers { reconcilers = append(reconcilers, reconciler) @@ -106,5 +128,7 @@ func (m *ReconcilerManager) ListReconcilers() []*FolderReconciler { // CountReconcilers returns the number of managed reconcilers. func (m *ReconcilerManager) CountReconcilers() int { + m.mu.RLock() + defer m.mu.RUnlock() return len(m.reconcilers) } diff --git a/internal/sanitize/marshal.go b/internal/sanitize/marshal.go index d53f1064..f816409e 100644 --- a/internal/sanitize/marshal.go +++ b/internal/sanitize/marshal.go @@ -20,6 +20,7 @@ package sanitize import ( "bytes" + "errors" "fmt" "sort" @@ -30,6 +31,10 @@ import ( // MarshalToOrderedYAML converts an unstructured object to YAML with guaranteed field order. // Field order: apiVersion, kind, metadata, then payload (spec, data, rules, etc.) func MarshalToOrderedYAML(obj *unstructured.Unstructured) ([]byte, error) { + if obj == nil { + return nil, errors.New("object is nil") + } + var buf bytes.Buffer // Header: apiVersion, kind, metadata diff --git a/internal/sanitize/marshal_test.go b/internal/sanitize/marshal_test.go index cf00472d..30d3276d 100644 --- a/internal/sanitize/marshal_test.go +++ b/internal/sanitize/marshal_test.go @@ -59,6 +59,13 @@ func TestMarshalToOrderedYAML_FieldOrder(t *testing.T) { assert.Less(t, metadataPos, specPos, "metadata should come before spec") } +func TestMarshalToOrderedYAML_NilObject(t *testing.T) { + yamlBytes, err := MarshalToOrderedYAML(nil) + require.Error(t, err) + assert.Nil(t, yamlBytes) + assert.Contains(t, err.Error(), "object is nil") +} + func TestMarshalToOrderedYAML_Pod(t *testing.T) { obj := &unstructured.Unstructured{ Object: map[string]interface{}{ diff --git a/internal/watch/event_router.go b/internal/watch/event_router.go index b75f54a1..1c06721c 100644 --- a/internal/watch/event_router.go +++ b/internal/watch/event_router.go @@ -21,6 +21,7 @@ package watch import ( "context" "fmt" + "sync" "github.com/go-logr/logr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -44,6 +45,7 @@ type EventRouter struct { // Registry of GitTargetEventStreams by gitDest key gitTargetStreams map[string]*reconcile.GitTargetEventStream + streamsMu sync.RWMutex } // NewEventRouter creates a new event router. @@ -195,6 +197,8 @@ func (r *EventRouter) RegisterGitTargetEventStream( stream *reconcile.GitTargetEventStream, ) { key := gitDest.Key() + r.streamsMu.Lock() + defer r.streamsMu.Unlock() r.gitTargetStreams[key] = stream r.Log.Info("Registered GitTargetEventStream", "gitDest", gitDest.String(), @@ -204,6 +208,8 @@ func (r *EventRouter) RegisterGitTargetEventStream( // GetGitTargetEventStream returns the registered GitTargetEventStream for a GitTarget. func (r *EventRouter) GetGitTargetEventStream(gitDest types.ResourceReference) *reconcile.GitTargetEventStream { key := gitDest.Key() + r.streamsMu.RLock() + defer r.streamsMu.RUnlock() return r.gitTargetStreams[key] } @@ -211,6 +217,8 @@ func (r *EventRouter) GetGitTargetEventStream(gitDest types.ResourceReference) * // This is called during GitTarget deletion cleanup. func (r *EventRouter) UnregisterGitTargetEventStream(gitDest types.ResourceReference) { key := gitDest.Key() + r.streamsMu.Lock() + defer r.streamsMu.Unlock() if _, exists := r.gitTargetStreams[key]; exists { delete(r.gitTargetStreams, key) r.Log.Info("Unregistered GitTargetEventStream", "gitDest", gitDest.String()) @@ -224,7 +232,9 @@ func (r *EventRouter) RouteToGitTargetEventStream( gitDest types.ResourceReference, ) error { key := gitDest.Key() + r.streamsMu.RLock() stream, exists := r.gitTargetStreams[key] + r.streamsMu.RUnlock() if !exists { return fmt.Errorf("no GitTargetEventStream registered for %s", key) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 3f6647f1..b3ca2502 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -20,6 +20,7 @@ package e2e import ( "bytes" + "encoding/base64" "encoding/json" "errors" "fmt" @@ -453,7 +454,7 @@ var _ = Describe("Manager", Ordered, func() { createGitTarget(destName, namespace, gitProviderName, "test/invalid", "different-branch") By("verifying GitTarget fails branch validation") - verifyResourceStatus("gittarget", destName, namespace, "False", "BranchNotAllowed", "") + verifyResourceStatus("gittarget", destName, namespace, "False", "ValidationFailed", "BranchNotAllowed") cleanupGitTarget(destName, namespace) cleanupGitProvider(gitProviderName) @@ -548,6 +549,7 @@ var _ = Describe("Manager", Ordered, func() { err := applyFromTemplate("test/e2e/templates/watchrule.tmpl", data, namespace) Expect(err).NotTo(HaveOccurred(), "Failed to apply WatchRule") + verifyResourceStatus("gittarget", destName, namespace, "True", "Ready", "") verifyResourceStatus("watchrule", watchRuleName, namespace, "True", "Ready", "") verifyResourceStatus("gittarget", destName, namespace, "True", "Ready", "") @@ -611,6 +613,145 @@ var _ = Describe("Manager", Ordered, func() { cleanupGitTarget(destName, namespace) }) + It("should generate missing SOPS age secret when generateWhenMissing is enabled", func() { + gitProviderName := "gitprovider-normal" + watchRuleName := "watchrule-secret-autogen-test" + secretName := "test-secret-autogen" + generatedSecretName := "sops-age-key-autogen" + + By("ensuring generated encryption secret does not exist before test") + _, _ = utils.Run(exec.Command("kubectl", "delete", "secret", generatedSecretName, + "-n", namespace, "--ignore-not-found=true")) + + By("creating GitTarget with generateWhenMissing enabled") + destName := watchRuleName + "-dest" + createGitTargetWithEncryptionOptions( + destName, + namespace, + gitProviderName, + "e2e/secret-autogen-test", + "main", + generatedSecretName, + true, + ) + + data := struct { + Name string + Namespace string + DestinationName string + }{ + Name: watchRuleName, + Namespace: namespace, + DestinationName: destName, + } + + err := applyFromTemplate("test/e2e/templates/watchrule.tmpl", data, namespace) + Expect(err).NotTo(HaveOccurred(), "Failed to apply WatchRule") + verifyResourceStatus("watchrule", watchRuleName, namespace, "True", "Ready", "") + + By("validating generated encryption secret has recipient and warning annotations") + var generatedAgeKey string + var generatedRecipient string + Eventually(func(g Gomega) { + cmd := exec.Command("kubectl", "get", "secret", generatedSecretName, "-n", namespace, "-o", "json") + output, getErr := utils.Run(cmd) + g.Expect(getErr).NotTo(HaveOccurred()) + + var secretObj map[string]interface{} + unmarshalErr := json.Unmarshal([]byte(output), &secretObj) + g.Expect(unmarshalErr).NotTo(HaveOccurred()) + + annotations, _, annoErr := unstructured.NestedStringMap(secretObj, "metadata", "annotations") + g.Expect(annoErr).NotTo(HaveOccurred()) + g.Expect(annotations).To(HaveKey("configbutler.ai/age-recipient")) + g.Expect(annotations["configbutler.ai/age-recipient"]).To(HavePrefix("age1")) + g.Expect(annotations).To(HaveKeyWithValue("configbutler.ai/backup-warning", "REMOVE_AFTER_BACKUP")) + generatedRecipient = annotations["configbutler.ai/age-recipient"] + + sopsAgeKeyB64, found, keyErr := unstructured.NestedString(secretObj, "data", "SOPS_AGE_KEY") + g.Expect(keyErr).NotTo(HaveOccurred()) + g.Expect(found).To(BeTrue()) + + keyBytes, decodeErr := base64.StdEncoding.DecodeString(sopsAgeKeyB64) + g.Expect(decodeErr).NotTo(HaveOccurred()) + generatedAgeKey = strings.TrimSpace(string(keyBytes)) + g.Expect(generatedAgeKey).To(HavePrefix("AGE-SECRET-KEY-")) + }, "30s", "2s").Should(Succeed()) + + By("waiting for auto-generated target bootstrap file to be present") + Eventually(func(g Gomega) { + pullCmd := exec.Command("git", "pull") + pullCmd.Dir = checkoutDir + pullOutput, pullErr := pullCmd.CombinedOutput() + if pullErr != nil { + g.Expect(pullErr).NotTo(HaveOccurred(), + fmt.Sprintf("Should successfully pull latest changes. Output: %s", string(pullOutput))) + } + + bootstrapSOPSFile := filepath.Join(checkoutDir, "e2e/secret-autogen-test", ".sops.yaml") + bootstrapContent, bootstrapErr := os.ReadFile(bootstrapSOPSFile) + g.Expect(bootstrapErr).NotTo(HaveOccurred(), + fmt.Sprintf(".sops.yaml must exist at %s", bootstrapSOPSFile)) + g.Expect(string(bootstrapContent)).To(ContainSubstring(generatedRecipient)) + }, "30s", "2s").Should(Succeed()) + + By("creating Secret in watched namespace") + _, _ = utils.Run(exec.Command("kubectl", "delete", "secret", secretName, + "-n", namespace, "--ignore-not-found=true")) + cmd := exec.Command("kubectl", "create", "secret", "generic", secretName, + "--from-literal=password=do-not-commit", "-n", namespace) + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Secret creation should succeed") + + By("patching Secret once to avoid informer start race and force an update event") + patchCmd := exec.Command( + "kubectl", "patch", "secret", secretName, "-n", namespace, + "--type=merge", "--patch", `{"stringData":{"password":"autogen-never-commit-this"}}`, + ) + _, err = utils.Run(patchCmd) + Expect(err).NotTo(HaveOccurred(), "Secret patch should succeed") + + By("verifying committed secret is encrypted and decryptable with generated key") + verifyEncryptedSecretCommitted := func(g Gomega) { + pullCmd := exec.Command("git", "pull") + pullCmd.Dir = checkoutDir + pullOutput, pullErr := pullCmd.CombinedOutput() + if pullErr != nil { + g.Expect(pullErr).NotTo(HaveOccurred(), + fmt.Sprintf("Should successfully pull latest changes. Output: %s", string(pullOutput))) + } + + expectedFile := filepath.Join(checkoutDir, + "e2e/secret-autogen-test", + fmt.Sprintf("v1/secrets/%s/%s.sops.yaml", namespace, secretName)) + content, readErr := os.ReadFile(expectedFile) + g.Expect(readErr).NotTo(HaveOccurred(), fmt.Sprintf("Secret file must exist at %s", expectedFile)) + g.Expect(string(content)).To(ContainSubstring("sops:")) + g.Expect(string(content)).NotTo(ContainSubstring("autogen-never-commit-this")) + + bootstrapSOPSFile := filepath.Join(checkoutDir, "e2e/secret-autogen-test", ".sops.yaml") + bootstrapContent, bootstrapErr := os.ReadFile(bootstrapSOPSFile) + g.Expect(bootstrapErr).NotTo( + HaveOccurred(), + fmt.Sprintf(".sops.yaml must exist at %s", bootstrapSOPSFile), + ) + g.Expect(string(bootstrapContent)).To(ContainSubstring(generatedRecipient)) + + decryptedOutput, decryptErr := decryptWithControllerSOPS(content, generatedAgeKey) + g.Expect(decryptErr).NotTo(HaveOccurred(), "Should decrypt committed secret via generated age key") + g.Expect(decryptedOutput).To(ContainSubstring("YXV0b2dlbi1uZXZlci1jb21taXQtdGhpcw==")) + } + Eventually(verifyEncryptedSecretCommitted, "30s", "2s").Should(Succeed()) + + By("cleaning up test resources") + _, _ = utils.Run(exec.Command("kubectl", "delete", "secret", secretName, + "-n", namespace, "--ignore-not-found=true")) + _, _ = utils.Run(exec.Command("kubectl", "delete", "secret", generatedSecretName, + "-n", namespace, "--ignore-not-found=true")) + cleanupWatchRule(watchRuleName, namespace) + cleanupGitTarget(destName, namespace) + }) + It("should create Git commit when ConfigMap is added via WatchRule", func() { gitProviderName := "gitprovider-normal" watchRuleName := "watchrule-configmap-test" @@ -1566,7 +1707,11 @@ func verifyResourceStatus(resourceType, name, ns, expectedStatus, expectedReason } g.Expect(readyStatus).To(Equal(expectedStatus)) - g.Expect(readyReason).To(Equal(expectedReason)) + if resourceType == "gittarget" && expectedReason == "Ready" { + g.Expect([]string{"Ready", "OK"}).To(ContainElement(readyReason)) + } else { + g.Expect(readyReason).To(Equal(expectedReason)) + } if expectedMessageContains != "" { g.Expect(readyMessage).To(ContainSubstring(expectedMessageContains)) } diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 0682c506..d0f726ac 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -204,6 +204,26 @@ func getBaseFolder() string { // //nolint:unparam // in e2e helpers we accept constant namespace ("sut"); keep signature for clarity in template calls func createGitTarget(name, namespace, providerName, path, branch string) { + createGitTargetWithEncryptionOptions( + name, + namespace, + providerName, + path, + branch, + e2eEncryptionRefName, + false, + ) +} + +func createGitTargetWithEncryptionOptions( + name, + namespace, + providerName, + path, + branch, + encryptionSecretName string, + generateWhenMissing bool, +) { By(fmt.Sprintf("creating GitTarget '%s' in ns '%s' for GitProvider '%s' with path '%s'", name, namespace, providerName, path)) @@ -214,13 +234,15 @@ func createGitTarget(name, namespace, providerName, path, branch string) { Branch string Path string EncryptionSecretName string + GenerateWhenMissing bool }{ Name: name, Namespace: namespace, ProviderName: providerName, Branch: branch, Path: path, - EncryptionSecretName: e2eEncryptionRefName, + EncryptionSecretName: encryptionSecretName, + GenerateWhenMissing: generateWhenMissing, } err := applyFromTemplate("test/e2e/templates/gittarget.tmpl", data, namespace) diff --git a/test/e2e/scripts/install-smoke.sh b/test/e2e/scripts/install-smoke.sh deleted file mode 100755 index c6a8c87c..00000000 --- a/test/e2e/scripts/install-smoke.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -MODE="${1:-}" -NAMESPACE="gitops-reverser" -HELM_CHART_SOURCE="${HELM_CHART_SOURCE:-charts/gitops-reverser}" -WAIT_TIMEOUT="${WAIT_TIMEOUT:-60s}" -PROJECT_IMAGE="${PROJECT_IMAGE:-}" - -if [[ -z "${MODE}" ]]; then - echo "usage: $0 " - exit 1 -fi - -get_controller_pod_selector() { - local selector - selector="$(kubectl -n "${NAMESPACE}" get deployment gitops-reverser \ - -o jsonpath='{range $k,$v := .spec.selector.matchLabels}{$k}={$v},{end}' 2>/dev/null || true)" - selector="${selector%,}" - - if [[ -z "${selector}" ]]; then - # Fallback selector used by chart/manifests if deployment query is not available yet. - selector="app.kubernetes.io/name=gitops-reverser" - fi - - printf '%s' "${selector}" -} - -install_helm() { - local helm_image_args=() - - if [[ -n "${PROJECT_IMAGE}" ]]; then - # Helm chart image is repository + tag. For smoke tests, parse PROJECT_IMAGE and override both. - local image_no_digest image_repo image_tag - image_no_digest="${PROJECT_IMAGE%%@*}" - if [[ "${image_no_digest##*/}" == *:* ]]; then - image_repo="${image_no_digest%:*}" - image_tag="${image_no_digest##*:}" - else - image_repo="${image_no_digest}" - image_tag="latest" - fi - helm_image_args+=(--set "image.repository=${image_repo}" --set "image.tag=${image_tag}") - echo "Overriding chart image from PROJECT_IMAGE=${PROJECT_IMAGE}" - fi - - echo "Installing from Helm chart (mode=helm, source=${HELM_CHART_SOURCE})" - helm upgrade --install "name-is-cool-but-not-relevant" "${HELM_CHART_SOURCE}" \ - --namespace "${NAMESPACE}" \ - --create-namespace \ - --set fullnameOverride=gitops-reverser \ - "${helm_image_args[@]}" -} - -install_manifest() { - echo "Installing from generated dist/install.yaml (mode=manifest)" - kubectl apply -f dist/install.yaml - - if [[ -n "${PROJECT_IMAGE}" ]]; then - echo "Overriding manifest deployment image from PROJECT_IMAGE=${PROJECT_IMAGE}" - kubectl -n "${NAMESPACE}" set image deployment/gitops-reverser manager="${PROJECT_IMAGE}" - fi -} - -print_debug_info() { - local pod_selector - pod_selector="$(get_controller_pod_selector)" - - echo - echo "Install smoke test diagnostics (${MODE})" - echo "Namespace: ${NAMESPACE}" - echo "Pod selector: ${pod_selector}" - echo "Deployment status:" - kubectl -n "${NAMESPACE}" get deployment gitops-reverser -o wide || true - echo - echo "Deployment describe:" - kubectl -n "${NAMESPACE}" describe deployment gitops-reverser || true - echo - echo "Pods:" - kubectl -n "${NAMESPACE}" get pods -o wide || true - echo - echo "Controller-manager pod describe:" - kubectl -n "${NAMESPACE}" describe pod -l "${pod_selector}" || true - echo - echo "Controller-manager logs (last 200 lines):" - kubectl -n "${NAMESPACE}" logs -l "${pod_selector}" --tail=200 --all-containers=true || true - echo - echo "Recent namespace events:" - kubectl -n "${NAMESPACE}" get events --sort-by=.metadata.creationTimestamp | tail -n 50 || true -} - -run_or_debug() { - local description="$1" - shift - echo "${description}" - if ! "$@"; then - echo "FAILED: ${description}" >&2 - print_debug_info - return 1 - fi -} - -verify_installation() { - local pod_selector - pod_selector="$(get_controller_pod_selector)" - - run_or_debug \ - "Waiting for deployment rollout (timeout=${WAIT_TIMEOUT})" \ - kubectl -n "${NAMESPACE}" rollout status deployment/gitops-reverser --timeout="${WAIT_TIMEOUT}" - - run_or_debug \ - "Checking deployment availability (timeout=${WAIT_TIMEOUT})" \ - kubectl -n "${NAMESPACE}" wait --for=condition=available deployment/gitops-reverser --timeout="${WAIT_TIMEOUT}" - - run_or_debug \ - "Checking pod readiness (selector=${pod_selector}, timeout=${WAIT_TIMEOUT})" \ - kubectl -n "${NAMESPACE}" wait --for=condition=ready pod -l "${pod_selector}" --timeout="${WAIT_TIMEOUT}" - - echo "Checking CRDs" - kubectl get crd \ - gitproviders.configbutler.ai \ - gittargets.configbutler.ai \ - watchrules.configbutler.ai \ - clusterwatchrules.configbutler.ai >/dev/null - - echo "Checking validating webhook configuration" - kubectl get validatingwebhookconfiguration gitops-reverser-validating-webhook-configuration >/dev/null -} - -case "${MODE}" in - helm) - install_helm - ;; - manifest) - install_manifest - ;; - *) - echo "unsupported mode: ${MODE} (expected helm or manifest)" - exit 1 - ;; -esac - -verify_installation -echo "Install smoke test passed (${MODE})" diff --git a/test/e2e/scripts/run-quickstart.sh b/test/e2e/scripts/run-quickstart.sh new file mode 100755 index 00000000..a20c9064 --- /dev/null +++ b/test/e2e/scripts/run-quickstart.sh @@ -0,0 +1,585 @@ +#!/usr/bin/env bash +set -euo pipefail + +MODE="${1:-}" +NAMESPACE="gitops-reverser" +QUICKSTART_NAMESPACE="sut" +HELM_CHART_SOURCE="${HELM_CHART_SOURCE:-charts/gitops-reverser}" +WAIT_TIMEOUT="${WAIT_TIMEOUT:-60s}" +QUICKSTART_TIMEOUT_SECONDS="${QUICKSTART_TIMEOUT_SECONDS:-180}" +PROJECT_IMAGE="${PROJECT_IMAGE:-}" + +TEST_ID="$(date +%s)-$RANDOM" +REPO_NAME="quickstart-smoke-${MODE}-${TEST_ID}" +CHECKOUT_DIR="/tmp/gitops-reverser/${REPO_NAME}" + +GIT_PROVIDER_NAME="quickstart-provider-${TEST_ID}" +GIT_TARGET_NAME="quickstart-target-${TEST_ID}" +WATCHRULE_NAME="quickstart-watchrule-${TEST_ID}" +INVALID_PROVIDER_NAME="quickstart-invalid-provider-${TEST_ID}" +ENCRYPTION_SECRET_NAME="quickstart-sops-age-key-${TEST_ID}" +CONFIGMAP_NAME="quickstart-config-${TEST_ID}" +EXPECTED_CONFIGMAP_FILE="${CHECKOUT_DIR}/live-cluster/v1/configmaps/${QUICKSTART_NAMESPACE}/${CONFIGMAP_NAME}.yaml" +SECRET_NAME="quickstart-secret-${TEST_ID}" +SECRET_VALUE_ONE="quickstart-plaintext-one-${TEST_ID}" +SECRET_VALUE_TWO="quickstart-plaintext-two-${TEST_ID}" +EXPECTED_SECRET_FILE="${CHECKOUT_DIR}/live-cluster/v1/secrets/${QUICKSTART_NAMESPACE}/${SECRET_NAME}.sops.yaml" +GITEA_PORT_FORWARD_PID="" + +if [[ -z "${MODE}" ]]; then + echo "usage: $0 " + exit 1 +fi + +get_controller_pod_selector() { + local selector + selector="$(kubectl -n "${NAMESPACE}" get deployment gitops-reverser \ + -o jsonpath='{range $k,$v := .spec.selector.matchLabels}{$k}={$v},{end}' 2>/dev/null || true)" + selector="${selector%,}" + + if [[ -z "${selector}" ]]; then + selector="app.kubernetes.io/name=gitops-reverser" + fi + + printf '%s' "${selector}" +} + +install_helm() { + local helm_image_args=() + local helm_crd_args=() + + if [[ -n "${PROJECT_IMAGE}" ]]; then + local image_no_digest image_repo image_tag + image_no_digest="${PROJECT_IMAGE%%@*}" + if [[ "${image_no_digest##*/}" == *:* ]]; then + image_repo="${image_no_digest%:*}" + image_tag="${image_no_digest##*:}" + else + image_repo="${image_no_digest}" + image_tag="latest" + fi + helm_image_args+=(--set "image.repository=${image_repo}" --set "image.tag=${image_tag}") + echo "Overriding chart image from PROJECT_IMAGE=${PROJECT_IMAGE}" + fi + + if kubectl get crd gitproviders.configbutler.ai gittargets.configbutler.ai \ + watchrules.configbutler.ai clusterwatchrules.configbutler.ai >/dev/null 2>&1; then + helm_crd_args+=(--skip-crds) + echo "Detected existing CRDs in cluster; installing Helm release with --skip-crds" + fi + + echo "Installing from Helm chart (mode=helm, source=${HELM_CHART_SOURCE})" + helm upgrade --install "name-is-cool-but-not-relevant" "${HELM_CHART_SOURCE}" \ + --namespace "${NAMESPACE}" \ + --create-namespace \ + --set fullnameOverride=gitops-reverser \ + "${helm_crd_args[@]}" \ + "${helm_image_args[@]}" +} + +install_manifest() { + echo "Installing from generated dist/install.yaml (mode=manifest)" + kubectl apply -f dist/install.yaml + + if [[ -n "${PROJECT_IMAGE}" ]]; then + echo "Overriding manifest deployment image from PROJECT_IMAGE=${PROJECT_IMAGE}" + kubectl -n "${NAMESPACE}" set image deployment/gitops-reverser manager="${PROJECT_IMAGE}" + fi +} + +print_debug_info() { + local pod_selector + pod_selector="$(get_controller_pod_selector)" + + echo + echo "Install quickstart smoke diagnostics (${MODE})" + echo "Namespace: ${NAMESPACE}" + echo "Quickstart namespace: ${QUICKSTART_NAMESPACE}" + echo "Pod selector: ${pod_selector}" + echo + echo "Deployment status:" + kubectl -n "${NAMESPACE}" get deployment gitops-reverser -o wide || true + echo + echo "Deployment describe:" + kubectl -n "${NAMESPACE}" describe deployment gitops-reverser || true + echo + echo "Pods:" + kubectl -n "${NAMESPACE}" get pods -o wide || true + echo + echo "Controller-manager pod describe:" + kubectl -n "${NAMESPACE}" describe pod -l "${pod_selector}" || true + echo + echo "Controller-manager logs (last 200 lines):" + kubectl -n "${NAMESPACE}" logs -l "${pod_selector}" --tail=200 --all-containers=true || true + echo + echo "Quickstart resources:" + kubectl -n "${QUICKSTART_NAMESPACE}" get gitprovider,gittarget,watchrule \ + "${GIT_PROVIDER_NAME}" "${GIT_TARGET_NAME}" "${WATCHRULE_NAME}" "${INVALID_PROVIDER_NAME}" 2>/dev/null || true + echo + echo "Quickstart resource status dumps:" + kubectl -n "${QUICKSTART_NAMESPACE}" get gitprovider "${GIT_PROVIDER_NAME}" -o yaml 2>/dev/null || true + kubectl -n "${QUICKSTART_NAMESPACE}" get gittarget "${GIT_TARGET_NAME}" -o yaml 2>/dev/null || true + kubectl -n "${QUICKSTART_NAMESPACE}" get watchrule "${WATCHRULE_NAME}" -o yaml 2>/dev/null || true + kubectl -n "${QUICKSTART_NAMESPACE}" get gitprovider "${INVALID_PROVIDER_NAME}" -o yaml 2>/dev/null || true + echo + echo "Recent namespace events (${NAMESPACE}):" + kubectl -n "${NAMESPACE}" get events --sort-by=.metadata.creationTimestamp | tail -n 50 || true + echo + echo "Recent namespace events (${QUICKSTART_NAMESPACE}):" + kubectl -n "${QUICKSTART_NAMESPACE}" get events --sort-by=.metadata.creationTimestamp | tail -n 50 || true + echo + echo "Git checkout state (${CHECKOUT_DIR}):" + git -C "${CHECKOUT_DIR}" status --short 2>/dev/null || true + git -C "${CHECKOUT_DIR}" --no-pager log --oneline -n 10 2>/dev/null || true +} + +run_or_debug() { + local description="$1" + shift + echo "${description}" + if ! "$@"; then + echo "FAILED: ${description}" >&2 + print_debug_info + return 1 + fi +} + +cleanup_gitea_port_forward() { + if [[ -n "${GITEA_PORT_FORWARD_PID}" ]]; then + kill "${GITEA_PORT_FORWARD_PID}" >/dev/null 2>&1 || true + wait "${GITEA_PORT_FORWARD_PID}" >/dev/null 2>&1 || true + GITEA_PORT_FORWARD_PID="" + fi +} + +ensure_gitea_api_port_forward() { + cleanup_gitea_port_forward + pkill -f "kubectl.*port-forward.*13000:13000" >/dev/null 2>&1 || true + + kubectl -n gitea-e2e port-forward svc/gitea-http 13000:13000 >/tmp/gitea-port-forward.log 2>&1 & + GITEA_PORT_FORWARD_PID="$!" + trap cleanup_gitea_port_forward EXIT + + for _ in {1..30}; do + if curl -s -f "http://localhost:13000/api/v1/version" >/dev/null 2>&1; then + echo "Gitea API port-forward is ready on localhost:13000" + return 0 + fi + sleep 2 + done + + echo "Timed out waiting for Gitea API port-forward readiness" + return 1 +} + +reset_install_state() { + echo "Resetting install state for clean first-time install validation" + kubectl delete clusterrole \ + gitops-reverser-manager-role \ + gitops-reverser-metrics-reader \ + gitops-reverser-proxy-role \ + --ignore-not-found=true >/dev/null || true + kubectl delete clusterrolebinding \ + gitops-reverser-manager-rolebinding \ + gitops-reverser-proxy-rolebinding \ + --ignore-not-found=true >/dev/null || true + kubectl delete validatingwebhookconfiguration gitops-reverser-validating-webhook-configuration \ + --ignore-not-found=true >/dev/null || true + kubectl delete namespace "${NAMESPACE}" --wait=true --ignore-not-found=true >/dev/null || true +} + +verify_installation() { + local pod_selector + pod_selector="$(get_controller_pod_selector)" + + run_or_debug \ + "Waiting for deployment rollout (timeout=${WAIT_TIMEOUT})" \ + kubectl -n "${NAMESPACE}" rollout status deployment/gitops-reverser --timeout="${WAIT_TIMEOUT}" + + run_or_debug \ + "Checking deployment availability (timeout=${WAIT_TIMEOUT})" \ + kubectl -n "${NAMESPACE}" wait --for=condition=available deployment/gitops-reverser --timeout="${WAIT_TIMEOUT}" + + run_or_debug \ + "Checking pod readiness (selector=${pod_selector}, timeout=${WAIT_TIMEOUT})" \ + kubectl -n "${NAMESPACE}" wait --for=condition=ready pod -l "${pod_selector}" --timeout="${WAIT_TIMEOUT}" + + echo "Checking CRDs" + kubectl get crd \ + gitproviders.configbutler.ai \ + gittargets.configbutler.ai \ + watchrules.configbutler.ai \ + clusterwatchrules.configbutler.ai >/dev/null + + echo "Checking validating webhook configuration" + kubectl get validatingwebhookconfiguration gitops-reverser-validating-webhook-configuration >/dev/null +} + +wait_for_ready() { + local kind="$1" + local name="$2" + local namespace="$3" + local timeout_seconds="${4}" + local start_epoch now status reason message + start_epoch="$(date +%s)" + + while true; do + status="$(kubectl -n "${namespace}" get "${kind}" "${name}" \ + -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)" + reason="$(kubectl -n "${namespace}" get "${kind}" "${name}" \ + -o jsonpath='{.status.conditions[?(@.type=="Ready")].reason}' 2>/dev/null || true)" + message="$(kubectl -n "${namespace}" get "${kind}" "${name}" \ + -o jsonpath='{.status.conditions[?(@.type=="Ready")].message}' 2>/dev/null || true)" + + if [[ "${status}" == "True" ]]; then + echo "${kind}/${name} Ready=True (${reason})" + return 0 + fi + + now="$(date +%s)" + if (( now - start_epoch >= timeout_seconds )); then + echo "Timed out waiting for ${kind}/${name} Ready=True (status=${status} reason=${reason})" + echo "Last message: ${message}" + kubectl -n "${namespace}" get "${kind}" "${name}" -o yaml || true + return 1 + fi + sleep 2 + done +} + +wait_for_connection_failed_actionable_message() { + local name="$1" + local timeout_seconds="${2}" + local start_epoch now status reason message + start_epoch="$(date +%s)" + + while true; do + status="$(kubectl -n "${QUICKSTART_NAMESPACE}" get gitprovider "${name}" \ + -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || true)" + reason="$(kubectl -n "${QUICKSTART_NAMESPACE}" get gitprovider "${name}" \ + -o jsonpath='{.status.conditions[?(@.type=="Ready")].reason}' 2>/dev/null || true)" + message="$(kubectl -n "${QUICKSTART_NAMESPACE}" get gitprovider "${name}" \ + -o jsonpath='{.status.conditions[?(@.type=="Ready")].message}' 2>/dev/null || true)" + + if [[ "${status}" == "False" && "${reason}" == "ConnectionFailed" && -n "${message}" ]]; then + shopt -s nocasematch + if [[ "${message}" =~ auth|credential|connect|repository|secret ]]; then + shopt -u nocasematch + echo "gitprovider/${name} exposes actionable failure message" + return 0 + fi + shopt -u nocasematch + fi + + now="$(date +%s)" + if (( now - start_epoch >= timeout_seconds )); then + echo "Timed out waiting for actionable ConnectionFailed message on gitprovider/${name}" + echo "Last seen status=${status} reason=${reason} message=${message}" + kubectl -n "${QUICKSTART_NAMESPACE}" get gitprovider "${name}" -o yaml || true + return 1 + fi + sleep 2 + done +} + +git_commit_count() { + git -C "${CHECKOUT_DIR}" rev-list --count --all 2>/dev/null || echo 0 +} + +wait_for_file_exists() { + local file_path="$1" + local timeout_seconds="$2" + local start_epoch now + start_epoch="$(date +%s)" + + while true; do + git -C "${CHECKOUT_DIR}" pull --ff-only >/dev/null 2>&1 || true + if [[ -f "${file_path}" ]]; then + return 0 + fi + now="$(date +%s)" + if (( now - start_epoch >= timeout_seconds )); then + echo "Timed out waiting for file to exist: ${file_path}" + return 1 + fi + sleep 2 + done +} + +wait_for_file_contains() { + local file_path="$1" + local expected_text="$2" + local timeout_seconds="$3" + local start_epoch now + start_epoch="$(date +%s)" + + while true; do + git -C "${CHECKOUT_DIR}" pull --ff-only >/dev/null 2>&1 || true + if [[ -f "${file_path}" ]] && grep -Fq "${expected_text}" "${file_path}"; then + return 0 + fi + now="$(date +%s)" + if (( now - start_epoch >= timeout_seconds )); then + echo "Timed out waiting for file content: ${file_path} to contain '${expected_text}'" + return 1 + fi + sleep 2 + done +} + +wait_for_file_absent() { + local file_path="$1" + local timeout_seconds="$2" + local start_epoch now + start_epoch="$(date +%s)" + + while true; do + git -C "${CHECKOUT_DIR}" pull --ff-only >/dev/null 2>&1 || true + if [[ ! -f "${file_path}" ]]; then + return 0 + fi + now="$(date +%s)" + if (( now - start_epoch >= timeout_seconds )); then + echo "Timed out waiting for file to be deleted: ${file_path}" + return 1 + fi + sleep 2 + done +} + +wait_for_file_not_contains() { + local file_path="$1" + local unexpected_text="$2" + local timeout_seconds="$3" + local start_epoch now + start_epoch="$(date +%s)" + + while true; do + git -C "${CHECKOUT_DIR}" pull --ff-only >/dev/null 2>&1 || true + if [[ -f "${file_path}" ]] && ! grep -Fq "${unexpected_text}" "${file_path}"; then + return 0 + fi + now="$(date +%s)" + if (( now - start_epoch >= timeout_seconds )); then + echo "Timed out waiting for file content: ${file_path} to NOT contain '${unexpected_text}'" + return 1 + fi + sleep 2 + done +} + +extract_generated_age_key() { + local secret_name="$1" + local key_data age_key + + key_data="$( + kubectl -n "${QUICKSTART_NAMESPACE}" get secret "${secret_name}" -o jsonpath='{.data.SOPS_AGE_KEY}' 2>/dev/null || true + )" + if [[ -z "${key_data}" ]]; then + echo "SOPS_AGE_KEY is missing from secret ${QUICKSTART_NAMESPACE}/${secret_name}" + return 1 + fi + + age_key="$(printf '%s' "${key_data}" | base64 -d 2>/dev/null | awk '/AGE-SECRET-KEY-/{print; exit}')" + if [[ -z "${age_key}" ]]; then + echo "Failed to decode AGE-SECRET-KEY from secret ${QUICKSTART_NAMESPACE}/${secret_name}" + return 1 + fi + + printf '%s' "${age_key}" +} + +decrypt_file_with_controller_sops() { + local file_path="$1" + local age_key="$2" + local pod_selector pod_name + + pod_selector="$(get_controller_pod_selector)" + pod_name="$( + kubectl -n "${NAMESPACE}" get pod -l "${pod_selector}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true + )" + if [[ -z "${pod_name}" ]]; then + echo "Failed to find controller pod in namespace ${NAMESPACE}" + return 1 + fi + + kubectl -n "${NAMESPACE}" exec -i "${pod_name}" -- \ + env "SOPS_AGE_KEY=${age_key}" \ + /usr/local/bin/sops --decrypt --input-type yaml --output-type yaml /dev/stdin < "${file_path}" +} + +run_quickstart_flow() { + ensure_gitea_api_port_forward + + echo "Setting up quickstart repo and credentials" + run_or_debug \ + "Creating dedicated Gitea repo for quickstart smoke (${REPO_NAME})" \ + bash test/e2e/scripts/setup-gitea.sh "${REPO_NAME}" "${CHECKOUT_DIR}" + + echo "Applying minimal GitProvider/GitTarget/WatchRule quickstart resources" + cat </dev/null + local warning_anno recipient_anno + warning_anno="$(kubectl -n "${QUICKSTART_NAMESPACE}" get secret "${ENCRYPTION_SECRET_NAME}" \ + -o jsonpath='{.metadata.annotations.configbutler\.ai/backup-warning}')" + recipient_anno="$(kubectl -n "${QUICKSTART_NAMESPACE}" get secret "${ENCRYPTION_SECRET_NAME}" \ + -o jsonpath='{.metadata.annotations.configbutler\.ai/age-recipient}')" + if [[ "${warning_anno}" != "REMOVE_AFTER_BACKUP" ]]; then + echo "Expected backup warning annotation REMOVE_AFTER_BACKUP, got '${warning_anno}'" + return 1 + fi + if [[ "${recipient_anno}" != age1* ]]; then + echo "Expected age recipient annotation to start with age1, got '${recipient_anno}'" + return 1 + fi + + echo "Running create/update/delete quickstart functional checks" + local commits_before_create commits_after_create commits_after_update commits_after_delete + commits_before_create="$(git_commit_count)" + + kubectl -n "${QUICKSTART_NAMESPACE}" delete configmap "${CONFIGMAP_NAME}" --ignore-not-found=true >/dev/null + kubectl -n "${QUICKSTART_NAMESPACE}" create configmap "${CONFIGMAP_NAME}" --from-literal=value=one >/dev/null + wait_for_file_exists "${EXPECTED_CONFIGMAP_FILE}" "${QUICKSTART_TIMEOUT_SECONDS}" + wait_for_file_contains "${EXPECTED_CONFIGMAP_FILE}" "value: one" "${QUICKSTART_TIMEOUT_SECONDS}" + commits_after_create="$(git_commit_count)" + if (( commits_after_create <= commits_before_create )); then + echo "Expected commit count to increase after create (${commits_before_create} -> ${commits_after_create})" + return 1 + fi + + kubectl -n "${QUICKSTART_NAMESPACE}" patch configmap "${CONFIGMAP_NAME}" --type merge --patch '{"data":{"value":"two"}}' >/dev/null + wait_for_file_contains "${EXPECTED_CONFIGMAP_FILE}" "value: two" "${QUICKSTART_TIMEOUT_SECONDS}" + commits_after_update="$(git_commit_count)" + if (( commits_after_update <= commits_after_create )); then + echo "Expected commit count to increase after update (${commits_after_create} -> ${commits_after_update})" + return 1 + fi + + kubectl -n "${QUICKSTART_NAMESPACE}" delete configmap "${CONFIGMAP_NAME}" --ignore-not-found=true >/dev/null + wait_for_file_absent "${EXPECTED_CONFIGMAP_FILE}" "${QUICKSTART_TIMEOUT_SECONDS}" + commits_after_delete="$(git_commit_count)" + if (( commits_after_delete <= commits_after_update )); then + echo "Expected commit count to increase after delete (${commits_after_update} -> ${commits_after_delete})" + return 1 + fi + + echo "Running encrypted Secret commit check" + local commits_before_secret commits_after_secret secret_value_one_b64 secret_value_two_b64 + commits_before_secret="$(git_commit_count)" + secret_value_one_b64="$(printf '%s' "${SECRET_VALUE_ONE}" | base64 | tr -d '\n')" + secret_value_two_b64="$(printf '%s' "${SECRET_VALUE_TWO}" | base64 | tr -d '\n')" + + kubectl -n "${QUICKSTART_NAMESPACE}" delete secret "${SECRET_NAME}" --ignore-not-found=true >/dev/null + kubectl -n "${QUICKSTART_NAMESPACE}" create secret generic "${SECRET_NAME}" \ + --from-literal=password="${SECRET_VALUE_ONE}" >/dev/null + kubectl -n "${QUICKSTART_NAMESPACE}" patch secret "${SECRET_NAME}" --type merge \ + --patch "{\"stringData\":{\"password\":\"${SECRET_VALUE_TWO}\"}}" >/dev/null + + wait_for_file_exists "${EXPECTED_SECRET_FILE}" "${QUICKSTART_TIMEOUT_SECONDS}" + wait_for_file_contains "${EXPECTED_SECRET_FILE}" "sops:" "${QUICKSTART_TIMEOUT_SECONDS}" + wait_for_file_not_contains "${EXPECTED_SECRET_FILE}" "${SECRET_VALUE_ONE}" "${QUICKSTART_TIMEOUT_SECONDS}" + wait_for_file_not_contains "${EXPECTED_SECRET_FILE}" "${SECRET_VALUE_TWO}" "${QUICKSTART_TIMEOUT_SECONDS}" + wait_for_file_not_contains "${EXPECTED_SECRET_FILE}" "${secret_value_one_b64}" "${QUICKSTART_TIMEOUT_SECONDS}" + wait_for_file_not_contains "${EXPECTED_SECRET_FILE}" "${secret_value_two_b64}" "${QUICKSTART_TIMEOUT_SECONDS}" + + echo "Validating encrypted Secret is decryptable with generated key" + local generated_age_key decrypted_secret + generated_age_key="$(extract_generated_age_key "${ENCRYPTION_SECRET_NAME}")" + decrypted_secret="$(decrypt_file_with_controller_sops "${EXPECTED_SECRET_FILE}" "${generated_age_key}")" + if ! grep -Fq "${secret_value_two_b64}" <<<"${decrypted_secret}"; then + echo "Expected decrypted secret payload to contain updated value (base64) after patch" + return 1 + fi + + commits_after_secret="$(git_commit_count)" + if (( commits_after_secret <= commits_before_secret )); then + echo "Expected commit count to increase after secret write (${commits_before_secret} -> ${commits_after_secret})" + return 1 + fi + + kubectl -n "${QUICKSTART_NAMESPACE}" delete secret "${SECRET_NAME}" --ignore-not-found=true >/dev/null + + echo "Running invalid-credentials quickstart UX check" + cat <