diff --git a/.github/workflows/e2e-suite.yaml b/.github/workflows/e2e-suite.yaml
new file mode 100644
index 0000000..dce5b8a
--- /dev/null
+++ b/.github/workflows/e2e-suite.yaml
@@ -0,0 +1,109 @@
+# Reusable e2e workflow (workflow_call): shared setup (build image, kind, deploy
+# fluence base), then run ONE test suite — a directory under test/e2e/. The
+# suite's tests are DISCOVERED (every NN-*.sh, run in sorted order); adding a test
+# is just dropping a file in the directory, no workflow edit. If the suite needs
+# special preparation it provides a setup.sh in its directory, which is run before
+# the tests (the gang suite has none; the quantum suite installs the qpu add-on).
+name: e2e-suite
+on:
+  workflow_call:
+    inputs:
+      suite:
+        description: "test suite directory name under test/e2e/ (e.g. gang, quantum)"
+        required: true
+        type: string
+
+env:
+  IMAGE: vanessa/fluence:test
+
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build fluence image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          push: false
+          load: true
+          tags: ${{ env.IMAGE }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Create k8s Kind Cluster
+        uses: helm/kind-action@v1.10.0
+        with:
+          version: v0.32.0              # required for gang
+          node_image: kindest/node:v1.36.1
+          config: ./deploy/kind-config.yaml
+
+      - name: Free Disk Space (Ubuntu)
+        run: |
+          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc \
+                      /opt/hostedtoolcache/CodeQL
+          sudo apt-get clean
+          df -h
+
+      - name: Load docker images
+        run: |
+          cluster=$(kind get clusters)
+          kind load --name "$cluster" docker-image ${{ env.IMAGE }}
+
+      - name: Deploy fluence (base)
+        run: |
+          kubectl apply -f deploy/fluence-test.yaml
+          kubectl rollout status -n kube-system deployment/fluence --timeout=180s
+          POD=""
+          for i in $(seq 1 60); do
+            POD=$(kubectl -n kube-system get pods -l app=fluence \
+              -o go-template='{{range .items}}{{if not .metadata.deletionTimestamp}}{{$name := .metadata.name}}{{range .status.conditions}}{{if and (eq .type "Ready") (eq .status "True")}}{{$name}}{{"\n"}}{{end}}{{end}}{{end}}{{end}}' 2>/dev/null | head -1 || true)
+            [ -n "$POD" ] && break
+            sleep 2
+          done
+          [ -n "$POD" ] || { echo "ERROR: no Ready non-terminating fluence pod"; kubectl -n kube-system get pods -l app=fluence -o wide; exit 1; }
+          echo "Using pod: $POD"
+          sleep 5
+          kubectl -n kube-system exec "$POD" -- /bin/bash -c "cat /tmp/fluence-graph-*.json" || true
+          kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{": cpu="}{.status.allocatable.cpu}{" mem="}{.status.allocatable.memory}{"\n"}{end}'
+
+      # Per-suite special setup, if the suite directory provides one.
+      - name: Suite setup (${{ inputs.suite }})
+        run: |
+          s="test/e2e/${{ inputs.suite }}/setup.sh"
+          if [ -f "$s" ]; then
+            echo "running $s"
+            bash "$s"
+          else
+            echo "no setup.sh for suite '${{ inputs.suite }}' — skipping"
+          fi
+
+      # Discover and run every NN-*.sh in the suite directory, in sorted order.
+      - name: Run suite (${{ inputs.suite }})
+        run: |
+          dir="test/e2e/${{ inputs.suite }}"
+          [ -d "$dir" ] || { echo "ERROR: no such suite dir: $dir"; exit 1; }
+          shopt -s nullglob
+          tests=("$dir"/[0-9]*.sh)
+          [ ${#tests[@]} -gt 0 ] || { echo "ERROR: no NN-*.sh tests in $dir"; exit 1; }
+          IFS=$'\n' tests=($(sort <<<"${tests[*]}")); unset IFS
+          echo "discovered ${#tests[@]} test(s) in $dir:"
+          printf '  %s\n' "${tests[@]}"
+          for t in "${tests[@]}"; do
+            echo "::group::$t"
+            bash "$t"
+            echo "::endgroup::"
+          done
+
+      - name: Dump diagnostics on failure
+        if: failure()
+        run: |
+          kubectl get pods -A -o wide
+          kubectl logs -n kube-system deployment/fluence || true
+          kubectl logs -n kube-system deployment/fluence-webhook || true
\ No newline at end of file
diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml
index a6c1266..4b405f6 100644
--- a/.github/workflows/e2e-tests.yaml
+++ b/.github/workflows/e2e-tests.yaml
@@ -8,140 +8,15 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
-env:
-  KIND_VERSION: v0.32.0
-  IMAGE: vanessa/fluence:test
-
 jobs:
+  # Fan out the suites as parallel jobs, each a call into the reusable workflow.
+  # The shared setup (build, kind, deploy) lives once in e2e-suite.yaml; the
+  # matrix runs gang and quantum concurrently.
   e2e:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Build fluence image
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          file: ./Dockerfile
-          push: false
-          load: true
-          tags: ${{ env.IMAGE }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          
-      - name: Create k8s Kind Cluster
-        uses: helm/kind-action@v1.10.0
-        with:
-          version: v0.32.0              # required for gang
-          node_image: kindest/node:v1.36.1
-          config: ./deploy/kind-config.yaml
-          
-      - name: Free Disk Space (Ubuntu)
-        run: |
-          echo "=== Disk space before cleanup ==="
-          df -h
-          
-          # Remove large software runtimes and tools
-          sudo rm -rf /usr/share/dotnet
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /opt/ghc
-          sudo rm -rf /opt/hostedtoolcache/CodeQL
-          
-          # Clean package caches
-          sudo apt-get clean          
-          echo "=== Disk space after cleanup ==="
-          df -h
-
-      - name: Load docker images
-        run: |
-          kind get clusters
-          cluster=$(kind get clusters)
-          kind load --name $cluster docker-image vanessa/fluence:test
-
-      - name: Deploy fluence (base)
-        run: |
-          kubectl apply -f deploy/fluence-test.yaml
-          kubectl rollout status -n kube-system deployment/fluence --timeout=180s
-          # rollout status can return while the OLD ReplicaSet's pod is still
-          # Running (terminating). Selecting by phase=Running alone can grab that
-          # stale pod, which then 404s on exec/logs. Wait until exactly one
-          # fluence pod remains, and require it to be Ready and not terminating.
-          POD=""
-          for i in $(seq 1 60); do
-            # names of pods that are Ready AND have no deletionTimestamp (not terminating)
-            POD=$(kubectl -n kube-system get pods -l app=fluence \
-              -o go-template='{{range .items}}{{if not .metadata.deletionTimestamp}}{{$name := .metadata.name}}{{range .status.conditions}}{{if and (eq .type "Ready") (eq .status "True")}}{{$name}}{{"\n"}}{{end}}{{end}}{{end}}{{end}}' 2>/dev/null | head -1 || true)
-            [ -n "$POD" ] && break
-            sleep 2
-          done
-          [ -n "$POD" ] || { echo "ERROR: no Ready non-terminating fluence pod found"; kubectl -n kube-system get pods -l app=fluence -o wide; exit 1; }
-          echo "Using pod: $POD"
-          # Brief sleep to let the container runtime stabilize before exec
-          sleep 5
-          kubectl -n kube-system exec "$POD" -- ls /tmp/
-          kubectl -n kube-system logs "$POD"
-          kubectl -n kube-system exec "$POD" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
-          kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{": cpu="}{.status.allocatable.cpu}{" mem="}{.status.allocatable.memory}{"\n"}{end}'
-         
-      - name: E2E - classical gang
-        run: bash test/e2e/01-classical-gang.sh
-
-      - name: Deploy quantum add-on
-        run: |
-          # Includes the device plugin and oriented to testing container
-          kubectl apply -f deploy/fluence-resources-test.yaml
-          kubectl rollout restart -n kube-system deployment/fluence
-          kubectl rollout status  -n kube-system deployment/fluence --timeout=60s
-          for i in $(seq 1 60); do
-            kubectl get nodes -o jsonpath='{range .items[*]}{.status.allocatable}{"\n"}{end}'
-            kubectl get nodes -o jsonpath='{range .items[*]}{.status.allocatable}{"\n"}{end}' | grep -q 'fluxion.flux-framework.org/qpu' && break
-            sleep 1
-          done
-          # After a rollout restart BOTH the old and new pods are briefly Running.
-          # Select only a Ready pod with no deletionTimestamp (i.e. the new one,
-          # not the terminating old one) so exec/logs don't 404.
-          POD=""
-          for i in $(seq 1 60); do
-            POD=$(kubectl -n kube-system get pods -l app=fluence \
-              -o go-template='{{range .items}}{{if not .metadata.deletionTimestamp}}{{$name := .metadata.name}}{{range .status.conditions}}{{if and (eq .type "Ready") (eq .status "True")}}{{$name}}{{"\n"}}{{end}}{{end}}{{end}}{{end}}' 2>/dev/null | head -1 || true)
-            [ -n "$POD" ] && break
-            sleep 2
-          done
-          [ -n "$POD" ] || { echo "ERROR: no Ready non-terminating fluence pod found after restart"; kubectl -n kube-system get pods -l app=fluence -o wide; exit 1; }
-          echo "Using pod: $POD"
-          # Brief sleep to let the container runtime stabilize before exec
-          sleep 5
-          kubectl -n kube-system exec "$POD" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
-
-      - name: Wait for webhook
-        run: |
-
-          # wait for the deployment AND for the caBundle to be populated on the webhook config
-          kubectl -n kube-system rollout status deployment/fluence-webhook --timeout=120s
-          for i in $(seq 1 30); do
-            cab=$(kubectl get mutatingwebhookconfiguration fluence-webhook \
-                  -o jsonpath='{.webhooks[0].clientConfig.caBundle}' 2>/dev/null)
-            [ -n "$cab" ] && break
-            sleep 2
-          done
-          # let TLS serving settle after caBundle patch
-          sleep 3 
-
-      - name: E2E - quantum placement
-        run: bash test/e2e/02-quantum-placement.sh
-
-      #- name: E2E - restart recovery (no double-book)
-      #  run: bash test/e2e/03-restart-recovery.sh
-
-      - name: E2E - sidecar ungate
-        run: bash test/e2e/04-sidecar-ungate.sh
-
-      - name: Dump diagnostics on failure
-        if: failure()
-        run: |
-          kubectl get pods -A -o wide
-          kubectl logs -n kube-system deployment/fluence
+    strategy:
+      fail-fast: false        # one suite failing should not cancel the other
+      matrix:
+        suite: [gang, quantum]
+    uses: ./.github/workflows/e2e-suite.yaml
+    with:
+      suite: ${{ matrix.suite }}
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 1160cb4..5e2c050 100644
--- a/Makefile
+++ b/Makefile
@@ -27,8 +27,8 @@ build: ## Build all binaries (scheduler needs flux-sched; helpers are pure Go)
 
 .PHONY: python
 python:
-	docker build -f python/Dockerfile -t ghcr.io/converged-computing/fluence-sidecar:latest ./python
-	docker push ghcr.io/converged-computing/fluence-sidecar:latest
+	docker build -f python/Dockerfile -t vanessa/fluence-sidecar:latest ./python
+	docker push vanessa/fluence-sidecar:latest
 	# kind load docker-image ghcr.io/converged-computing/fluence-sidecar:latest
 
 .PHONY: test
@@ -55,13 +55,16 @@ test-image-deploy: test-image
 	kubectl patch podgroup training -n default --type=merge -p '{"metadata":{"finalizers":null}}' || true
 	kubectl delete deployments --all
 	kubectl delete pods --all
-	kubectl delete -f deploy/fluence-test.yaml
+	kubectl delete -f deploy/fluence-test.yaml || true
 	kubectl delete pods --all
 
+.PHONY: test-deploy-recreate
+test-deploy-recreate: test-image-deploy
+	kubectl apply -f deploy/fluence-pull-test.yaml
 
 .PHONY: deploy
 deploy: ## Install RBAC + scheduler into kube-system
-	kubectl apply -f deploy/fluence.yaml
+	kubectl apply -f deploy/fluence-.yaml
 
 .PHONY: help
 help:
diff --git a/README.md b/README.md
index 3ee668f..d299757 100644
--- a/README.md
+++ b/README.md
@@ -194,10 +194,10 @@ ceiling. Types come from the same config as the graph, so they can't drift.
 
 ### `sidecars/` — quantum coordination sidecars
 
-Vendor-specific sidecar containers injected by the webhook into leader pods
-of quantum workflow groups. Each sidecar discovers the QPU task submitted by
-the leader, polls the vendor queue, and ungates worker pods when the task
-reaches position==1.
+Vendor-specific sidecar containers injected by the webhook into the producer pod
+of a shared quantum workflow group. Each sidecar discovers the QPU task submitted
+by the producer, polls the vendor queue, and ungates the consumer pods when the
+task reaches position==1.
 
 ```console
 sidecars/
@@ -221,7 +221,7 @@ spec:
 ```
 
 Fluence creates the PodGroup, injects the sidecar, creates per-namespace
-RBAC, and gates all non-leader pods. See `sidecars/braket/design.md` for
+RBAC, and gates the consumer pods. See `sidecars/braket/design.md` for
 the full design including the SDK interceptor, queue position polling, and
 the two-queue problem motivation.
 
@@ -369,88 +369,90 @@ Submission is **not** done by the scheduler — the workload container holds the
 user's credentials and submits via qrmi-go. Fluence only schedules and hands off
 the backend. (When we control local quantum devices this will change.)
 
-### 3. Quantum workflow groups (leader + workers)
+### 3. Quantum workflow groups (producer + consumers)
 
-A quantum workflow group is one pod that **submits** quantum work (the leader)
-plus N pods that **wait** for the result (the workers). All pods share a group
-label; Fluence co-schedules them, gives the leader a sidecar that watches the
-vendor queue, and gates the workers so they consume no node resources during the
-(long, variable) QPU queue wait — releasing them only when the task reaches
-`queue_position == 1`.
+A quantum workflow group is a gang whose members share **one** quantum task:
+one pod **submits** the work (the producer) and N−1 pods **wait** for the result
+(the consumers). All pods share a group label and run the *same* image; Fluence
+co-schedules them, gives the producer a sidecar that watches the vendor queue, and
+gates the consumers so they consume no node resources during the (long, variable)
+QPU queue wait — releasing them only when the task reaches `queue_position == 1`.
 
 ```yaml
-# Every pod in the group carries the same group label + schedulerName: fluence
+# Every pod in the group carries the same group label + schedulerName: fluence,
+# and opts into shared coordination.
 metadata:
   labels:
     fluence.flux-framework.org/group: my-qaoa-workflow
+  annotations:
+    fluence.flux-framework.org/coordination: shared
 spec:
   schedulerName: fluence
 ```
 
-#### How the leader is chosen — two mechanisms
+#### Coordination modes
 
-There are two ways Fluence decides which pod is the leader. They are mutually
-exclusive per group; pick the one that matches how your workload is built.
+`fluence.flux-framework.org/coordination` selects how the gang is coordinated; it
+defaults to `independent`.
 
-**(a) Explicit role (recommended for leader/worker workflows).** Each pod
-declares its role with an annotation. This is **authoritative**: admission order
-is never consulted, and the same value is injected into the container as
-`FLUENCE_ROLE` so your application reads the exact role Fluence used — the two
-can never disagree.
+- **`shared`** — the gang shares ONE quantum task. Fluence promotes one member to
+  producer and gates the rest as consumers (see below). Use this for a coordinated
+  workflow where the classical post-processing should start together as the single
+  result lands.
+- **`independent`** (default) — every member does its own quantum work: its own
+  real submit, its own queue wait, no gating. N members run N tasks. This is the
+  honest default; Fluence never invents coordination you did not ask for, and
+  never dedups tasks meant to be distinct.
 
-```yaml
-metadata:
-  labels:
-    fluence.flux-framework.org/group: my-qaoa-workflow
-  annotations:
-    fluence.flux-framework.org/role: leader     # or: worker
-```
+#### How the producer is chosen
 
-Use this when the leader and workers are **different** (the leader submits the
-quantum task and runs the sidecar; workers process results). The leader gets the
-interceptor + sidecar; workers are gated. Because the decision is declared, it is
-race-free regardless of which pod the API server admits first. Your container can
-branch on `$FLUENCE_ROLE` (e.g. `leader` → submit; `worker` → wait).
+In `shared` mode the producer is the member the Job controller stamps with
+`batch.kubernetes.io/job-completion-index: "0"` — so an **indexed Job** gives
+deterministic, race-free election from a single identical template (every pod has
+the same image and group label; only the index differs). This serves two contracts
+with no extra configuration:
 
-**(b) Admission order (default when no role annotation is present).** If pods
-carry the group label but **no** role annotation, the **first pod admitted**
-becomes the leader and every subsequent pod is a worker. This suits a
-*homogeneous* pod-template gang (Deployment/Job/StatefulSet) where every replica
-is byte-identical — any one of them can lead, so "first admitted" is a fine
-tiebreaker. It is **not** suitable for a heterogeneous leader/worker workflow:
-since admission order is nondeterministic, a worker pod could be admitted first
-and wrongly elected leader. Use mechanism (a) for that case.
+- an **explicit-role script** that branches on the completion index (index 0
+  submits; others wait and consume the result), and
+- an **identical script** where every pod calls submit — the producer's submit is
+  real, and each consumer's submit is transparently returned the producer's task
+  (the shared-result dedup), so the code need not branch at all.
 
-> Rule of thumb: identical replicas → admission order is fine. Distinct
-> leader/worker pods → use the explicit `role` annotation.
+For loose pods with no completion index, the first pod admitted claims the producer
+slot; an indexed Job is recommended when you need determinism.
 
 #### What Fluence does
 
-Regardless of mechanism, the leader gets the sidecar and a PodGroup is created
-(`minCount: 1`); workers get a `quantum.braket/ready` scheduling gate and consume
-no node resources during the QPU queue wait. When the sidecar observes
-`queue_position == 1`, it patches the task ARN onto each worker's annotations and
-removes their gates atomically with setting the `fluence-quantum-classical`
-priority class so they reschedule promptly.
+In `shared` mode the producer gets the interceptor (real mode) + sidecar and its
+own group-of-one PodGroup `<group>-producer` (`minCount: 1`), so it schedules
+alone and runs the single real submit; it is never gated. The consumers join the
+`<group>` gang (`minCount: N−1`), get a `quantum.braket/ready` scheduling gate, and
+consume no node resources during the QPU queue wait. When the sidecar observes
+`queue_position == 1`, it stamps the producer's task id onto each consumer
+(surfaced as `FLUENCE_QUANTUM_JOB_ID`) and removes their gates atomically with
+setting the `fluence-quantum-classical` priority class so they reschedule promptly.
+The producer is one of the N members, so the application runs exactly N times —
+never N+1, and there is no separate submitter pod.
 
 Per-namespace RBAC (`fluence-sidecar` ServiceAccount/Role/RoleBinding) and the
-interceptor ConfigMap are created automatically by the webhook on first use — no
+interceptor staging are created automatically by the webhook on first use — no
 manual setup required.
 
 ```bash
-# Just apply your pods with the group label (+ optional role annotation) and
+# Apply your pods with the group label + coordination annotation +
 # schedulerName: fluence. RBAC is created for you.
 kubectl apply -f my-quantum-workflow.yaml
 ```
 
-#### A note on the homogeneous "all submit" case
+#### A note on the independent "all submit" case
 
-A group where *every* pod submits its own quantum task (no leader/worker split)
-is possible but rarely what you want: N independent submissions land in the
-vendor queue and run at uncoordinated times, so there is no coordination benefit
-from grouping them — you would just have N standalone quantum pods. For a single
-quantum submission, use a standalone pod (no group label, see §2). For a
-coordinated workflow, use the leader/worker form above with an explicit role.
+`coordination: independent` (the default) means *every* pod submits its own
+quantum task: N independent submissions land in the vendor queue and run at
+uncoordinated times. That is correct and sometimes exactly what you want (N
+distinct circuits), but it offers no coordination benefit from grouping — it is
+equivalent to N standalone quantum pods. For a single quantum submission, use a
+standalone pod (no group label, see §2). For a coordinated workflow that shares
+one result, use `coordination: shared` above.
 
 
 ### Notes
diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go
index ea2669a..1a6709d 100644
--- a/cmd/webhook/main.go
+++ b/cmd/webhook/main.go
@@ -12,9 +12,11 @@ package main
 import (
 	"context"
 	"crypto/tls"
+	"flag"
 	"log"
 	"net/http"
 	"os"
+	"strings"
 	"time"
 
 	"github.com/converged-computing/fluence/pkg/cluster"
@@ -38,6 +40,29 @@ func main() {
 	cfgName := env("WEBHOOK_CONFIG", "fluence-webhook")
 	addr := env("WEBHOOK_ADDR", ":8443")
 
+	// Handler selection. By default ALL registered handlers are enabled. The
+	// operator may restrict the active set with --handlers (comma-separated) or
+	// the FLUENCE_HANDLERS env var, e.g. --handlers=fluxion,gang to run without
+	// quantum. An empty value means all enabled. Unknown names are warned about
+	// but not fatal (so config survives a handler being renamed/removed).
+	handlersFlag := flag.String("handlers", env("FLUENCE_HANDLERS", ""),
+		"comma-separated handlers in dispatch order (default: fluxion,quantum,gang). e.g. fluxion,gang disables quantum")
+	flag.Parse()
+
+	var requested []string
+	if *handlersFlag != "" {
+		for _, n := range strings.Split(*handlersFlag, ",") {
+			if n = strings.TrimSpace(n); n != "" {
+				requested = append(requested, n)
+			}
+		}
+	}
+	active, unknown := webhook.SetActiveHandlers(requested)
+	for _, n := range unknown {
+		log.Printf("[fluence-webhook] WARNING: unknown handler %q — ignoring", n)
+	}
+	log.Printf("[fluence-webhook] active handlers (in dispatch order): %v", active)
+
 	dnsNames := []string{
 		svc + "." + ns + ".svc",
 		svc + "." + ns + ".svc.cluster.local",
@@ -87,7 +112,6 @@ func main() {
 	mutator := &webhook.Mutator{
 		AttributeKeys: attrKeys,
 		Clientset:     client,
-		SidecarImage:  env("FLUENCE_SIDECAR_IMAGE", ""),
 	}
 	log.Printf("[fluence-webhook] env contract injected into fluxion pods: %v", mutator.EnvVarNames())
 
diff --git a/deploy/fluence-pull-test.yaml b/deploy/fluence-pull-test.yaml
new file mode 100644
index 0000000..8e42158
--- /dev/null
+++ b/deploy/fluence-pull-test.yaml
@@ -0,0 +1,287 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: fluence
+  namespace: kube-system
+---
+# Bind the built-in scheduler roles so fluence (a full kube-scheduler build) has
+# every list/watch the scheduling framework needs (nodes, pods, PV/PVC, CSI,
+# storageclasses, resourceclaims/slices, volumeattachments, events, etc.).
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-as-kube-scheduler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:kube-scheduler
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-as-volume-scheduler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:volume-scheduler
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+# Delegated authentication: read the auth configmap in kube-system. This is the
+# fix for the "extension-apiserver-authentication ... forbidden" errors.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: fluence-extension-apiserver-authentication-reader
+  namespace: kube-system
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: extension-apiserver-authentication-reader
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+# Extras the built-in scheduler role does not grant: the alpha PodGroup/Workload
+# API (gang), and leader-election leases under our scheduler name.
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: fluence-extra
+rules:
+  - apiGroups: ["scheduling.k8s.io"]
+    resources: ["podgroups", "workloads", "podgroups/status", "workloads/status"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["create", "get", "update", "list", "watch"]
+  # PreBind stamps the allocated backend onto the pod as an annotation; the
+  # built-in system:kube-scheduler role only allows patching pods/status, not
+  # the pod object, so grant it here.
+  - apiGroups: [""]
+    resources: ["pods"]
+    # create/delete: the webhook creates the one-off quantum submitter pod
+    # (ensureSubmitterPod) and the scheduler reaps it during gang cleanup.
+    verbs: ["get", "list", "watch", "create", "patch", "update", "delete"]
+  # The webhook self-manages its TLS by patching its own config's caBundle.
+  - apiGroups: ["admissionregistration.k8s.io"]
+    resources: ["mutatingwebhookconfigurations"]
+    verbs: ["get", "list", "watch", "patch"]
+  # The webhook creates per-namespace sidecar RBAC on demand when a leader
+  # pod is admitted, so users do not need to apply RBAC manually.
+  - apiGroups: [""]
+    resources: ["serviceaccounts"]
+    verbs: ["get", "create"]
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["get", "create"]
+  - apiGroups: ["rbac.authorization.k8s.io"]
+    resources: ["roles", "rolebindings"]
+    verbs: ["get", "create"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: fluence-extra
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: fluence-extra
+subjects:
+  - kind: ServiceAccount
+    name: fluence
+    namespace: kube-system
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: fluence-scheduler-config
+  namespace: kube-system
+data:
+  scheduler-config.yaml: |
+    apiVersion: kubescheduler.config.k8s.io/v1
+    kind: KubeSchedulerConfiguration
+    leaderElection:
+      leaderElect: false
+    profiles:
+      - schedulerName: fluence
+        plugins:
+          # multiPoint wires Fluence into every extension point its Go type
+          # implements: PreFilter, Filter, and PreBind (which stamps the backend
+          # annotation). Listing points individually risks omitting one — that is
+          # exactly what left PreBind unwired and the backend annotation unset.
+          multiPoint:
+            enabled: [{name: Fluence}]
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: fluence
+  namespace: kube-system
+  labels: {app: fluence}
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: fluence}
+  template:
+    metadata:
+      labels: {app: fluence}
+    spec:
+      serviceAccountName: fluence
+      containers:
+        - name: fluence
+          image: vanessa/fluence:test
+          imagePullPolicy: Always          
+          command:
+            - /bin/fluence
+            - --config=/etc/fluence/scheduler-config.yaml
+            # fluence is its own scheduler binary, so it needs the gang gates set
+            # here (the cluster-level kube-scheduler gates don't apply to it).
+            # Without these its PodGroup/GangScheduling plugin is inactive, pods
+            # schedule with no gang semantics, and PodGroup status stays Pending.
+            - --feature-gates=GenericWorkload=true,GangScheduling=true
+            - --v=4
+          env:
+            # Path to the resources config (e.g. quantum backends). Unset/empty
+            # file -> classical-only graph. Supplied by the quantum add-on.
+            - name: FLUENCE_RESOURCES
+              value: /etc/fluence/resources.yaml
+          volumeMounts:
+            - name: config
+              mountPath: /etc/fluence
+      volumes:
+        - name: config
+          projected:
+            sources:
+              - configMap: {name: fluence-scheduler-config}
+              - configMap: {name: fluence-resources, optional: true}
+---
+# Mutating webhook: injects scheduler-chosen values into pods at creation time
+# (currently a downward-API QRMI_BACKEND env for quantum pods). It self-manages
+# TLS — generates a CA + serving cert at startup and patches the caBundle below —
+# so no cert-manager and no committed keys. failurePolicy Ignore keeps a webhook
+# outage from blocking pod creation cluster-wide.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: fluence-webhook
+  namespace: kube-system
+  labels: {app: fluence-webhook}
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: fluence-webhook}
+  template:
+    metadata:
+      labels: {app: fluence-webhook}
+    spec:
+      serviceAccountName: fluence
+      containers:
+        - name: webhook
+          image: vanessa/fluence:test
+          imagePullPolicy: Always
+          command: ["/bin/fluence-webhook"]
+          # The webhook derives the FLUXION_* env contract (FLUXION_VENDOR,
+          # FLUXION_QRMI_TYPE, ...) from the resource graph's attribute keys, so
+          # it needs the same graph the scheduler and device plugin read. Without
+          # this it injects only FLUXION_BACKEND, and the sidecar can't route to
+          # a provider (which keys on qrmi_type).
+          env:
+            - name: FLUENCE_RESOURCES
+              value: /etc/fluence/resources.yaml
+          ports:
+            - containerPort: 8443
+          readinessProbe:
+            httpGet: {path: /healthz, port: 8443, scheme: HTTPS}
+            initialDelaySeconds: 2
+          volumeMounts:
+            - name: config
+              mountPath: /etc/fluence
+      volumes:
+        - name: config
+          projected:
+            sources:
+              - configMap: {name: fluence-resources, optional: true}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: fluence-webhook
+  namespace: kube-system
+spec:
+  selector: {app: fluence-webhook}
+  ports:
+    - port: 443
+      targetPort: 8443
+---
+apiVersion: admissionregistration.k8s.io/v1
+kind: MutatingWebhookConfiguration
+metadata:
+  name: fluence-webhook
+webhooks:
+  - name: pods.fluence.flux-framework.org
+    admissionReviewVersions: ["v1"]
+    sideEffects: None
+    failurePolicy: Ignore        # never block pod creation if the webhook is down
+    # caBundle is filled in at runtime by the webhook patching this object.
+    clientConfig:
+      service:
+        name: fluence-webhook
+        namespace: kube-system
+        path: /mutate
+        port: 443
+    rules:
+      - apiGroups: [""]
+        apiVersions: ["v1"]
+        operations: ["CREATE"]
+        resources: ["pods"]
+        scope: Namespaced
+    # Don't intercept system pods (and avoid bootstrap coupling).
+    namespaceSelector:
+      matchExpressions:
+        - key: kubernetes.io/metadata.name
+          operator: NotIn
+          values: ["kube-system"]
+# fluence-sidecar.yaml
+#
+# RBAC and supporting resources for the Fluence quantum sidecar.
+#
+# The sidecar runs inside a leader pod and needs:
+#   - patch/annotate on pods in its own namespace (to ungate workers and
+#     propagate the task ARN annotation)
+#
+# The sidecar ServiceAccount is namespace-scoped — it only has permissions
+# in the namespace where the workflow runs. The webhook sets
+# spec.serviceAccountName on the leader pod to fluence-sidecar.
+#
+# The fluence Python package is staged into user containers by an init
+# container (Model C): the webhook injects an init container from the
+# sidecar image that copies the package + sitecustomize into a shared
+# volume on the user container's PYTHONPATH. No ConfigMap, no user install.
+#
+# Apply with:
+#   kubectl apply -f deploy/fluence-sidecar.yaml
+
+
+---
+# PriorityClass for classical pods paired with quantum work.
+# Applied to worker pods by the webhook when they are gated.
+# When ungated, high priority triggers preemption of lower-priority work
+# so workers get nodes immediately as the QPU result arrives.
+apiVersion: scheduling.k8s.io/v1
+kind: PriorityClass
+metadata:
+  name: fluence-quantum-classical
+  labels:
+    app: fluence
+value: 1000000
+globalDefault: false
+preemptionPolicy: PreemptLowerPriority
+description: "High priority for classical pods paired with quantum work. Set by Fluence webhook."
diff --git a/deploy/fluence-test.yaml b/deploy/fluence-test.yaml
index 6d1dace..ab61a91 100644
--- a/deploy/fluence-test.yaml
+++ b/deploy/fluence-test.yaml
@@ -67,7 +67,9 @@ rules:
   # the pod object, so grant it here.
   - apiGroups: [""]
     resources: ["pods"]
-    verbs: ["get", "list", "watch", "patch", "update"]
+    # create/delete: the webhook creates the one-off quantum submitter pod
+    # (ensureSubmitterPod) and the scheduler reaps it during gang cleanup.
+    verbs: ["get", "list", "watch", "create", "patch", "update", "delete"]
   # The webhook self-manages its TLS by patching its own config's caBundle.
   - apiGroups: ["admissionregistration.k8s.io"]
     resources: ["mutatingwebhookconfigurations"]
@@ -146,6 +148,13 @@ spec:
             # Without these its PodGroup/GangScheduling plugin is inactive, pods
             # schedule with no gang semantics, and PodGroup status stays Pending.
             - --feature-gates=GenericWorkload=true,GangScheduling=true
+            # Re-attempt unschedulable pods more often than the 5m default. In the
+            # contention experiment a gang that loses the initial race for nodes is
+            # marked Unschedulable; this is how soon it is re-tried after capacity
+            # frees (the event-driven QueueingHint is best-effort; this is the
+            # backstop that bounds worst-case requeue latency). 30s keeps contended
+            # gangs draining promptly without thrashing the queue.
+            - --pod-max-in-unschedulable-pods-duration=30s
             - --v=4
           env:
             # Path to the resources config (e.g. quantum backends). Unset/empty
diff --git a/deploy/fluence.yaml b/deploy/fluence.yaml
index b856268..7d71386 100644
--- a/deploy/fluence.yaml
+++ b/deploy/fluence.yaml
@@ -67,7 +67,9 @@ rules:
   # the pod object, so grant it here.
   - apiGroups: [""]
     resources: ["pods"]
-    verbs: ["get", "list", "watch", "patch", "update"]
+    # create/delete: the webhook creates the one-off quantum submitter pod
+    # (ensureSubmitterPod) and the scheduler reaps it during gang cleanup.
+    verbs: ["get", "list", "watch", "create", "patch", "update", "delete"]
   # The webhook self-manages its TLS by patching its own config's caBundle.
   - apiGroups: ["admissionregistration.k8s.io"]
     resources: ["mutatingwebhookconfigurations"]
diff --git a/deploy/kind-config.yaml b/deploy/kind-config.yaml
index c94e070..ec310bc 100644
--- a/deploy/kind-config.yaml
+++ b/deploy/kind-config.yaml
@@ -32,4 +32,4 @@ nodes:
             - name: feature-gates
               value: "GenericWorkload=true"
   - role: worker
-  - role: worker
+  - role: worker
\ No newline at end of file
diff --git a/docs/coordination-handler-design.md b/docs/coordination-handler-design.md
new file mode 100644
index 0000000..cdcfd38
--- /dev/null
+++ b/docs/coordination-handler-design.md
@@ -0,0 +1,387 @@
+# Coordination handlers: producer/consumer gang split (no separate submitter)
+
+> **Status: implemented.** This design is live in `pkg/webhook/handlers/quantum.go`
+> (the coordination router + `mutateProducer`/`mutateConsumer`/`coordinationMode`/
+> `isProducer`), `pkg/webhook/handlers/gang.go` (classical gangs defer quantum
+> pods to the quantum handler), and `pkg/fluence/fluence.go` (reconcile reaps the
+> `<group>-producer` PodGroup, never the producer pod — it is a real member).
+> Unit tests are in `pkg/webhook/handlers/quantum_test.go`; structural e2e in
+> `test/e2e/quantum/02–04`. Coordination is **role-aware**: the webhook stamps
+> `FLUENCE_COORDINATION_ROLE` (producer/consumer) and hands consumers the
+> producer's task id (`FLUENCE_QUANTUM_JOB_ID`); the workload branches on the role
+> (producer submits, consumer fetches the shared result by id). No submit
+> interception, no faux flag — that earlier mechanism has been removed.
+
+## Why this replaces the submitter-pod model
+
+The `add-sidecar-interface` branch coordinates a quantum gang by creating a
+*separate* one-off submitter pod (`<group>-submitter`) that runs the user's
+application image to do the real submit, then ungates a gang of N faux-submitting
+members. That works, but it runs the user's application **N+1 times** for an
+N-gang: once in the submitter (a full run whose post-processing nobody consumes)
+plus once in each of the N members. The redundant run is not an implementation
+wart — it is a symptom of modeling quantum work as a producer/consumer split
+while pretending one image plays both roles, selected at runtime by a faux flag.
+
+This design keeps the split (it is correct) but removes the separate pod: the
+**producer is one of the N members**, promoted at admission, so the application
+runs exactly **N times** — the needed number — with exactly **one real submit**.
+
+The core thesis is unchanged: Fluence is a generic gang scheduler (native gangs
+since k8s 1.36), and per-resource nuance lives in handlers. This is entirely a
+change to the `quantum` handler plus a one-line deferral in the `gang` handler.
+
+## The fundamental constraint
+
+A quantum task's content (the circuit) comes from user code, so **the pod that
+defines a task must run to submit it**. Therefore, per pod, *submit* and *gate*
+are mutually exclusive — a pod either runs (and can submit) or is gated (and
+cannot). Gating only ever buys resource savings for pods that **do not submit**:
+pods that consume a result someone else produced.
+
+That partitions a quantum gang into two kinds, decided per pod:
+
+- **producer** — runs its code, submits its own task, holds a node through the
+  queue wait. Not gateable, ever.
+- **consumer** — never submits; reads the producer's result. Fully gateable until
+  that result is ready.
+
+## Coordination modes (user-facing contract)
+
+Identical pod templates (a Job/Deployment) are genuinely ambiguous between "one
+shared task, fan the result out to N pods" and "N independent tasks." Fluence
+cannot infer this; the user declares it with one annotation on the pod template:
+
+```yaml
+metadata:
+  annotations:
+    fluence.flux-framework.org/coordination: shared      # or: independent
+```
+
+| mode | meaning | who submits | gating | app runs | real submits |
+|------|---------|-------------|--------|----------|--------------|
+| `independent` (default) | N pods each do their own quantum work | every pod | none possible (all are producers) | N | N |
+| `shared` | one task; N pods consume the result | producer only | consumers gated until task ready | N | 1 |
+
+`coordination` is an open enum so future designs (e.g. `scatter` — index-paired
+task↔pod, §6.2 of the quantum doc) slot in as new modes without changing the
+mechanism. Default is `independent`: never invent coordination the user did not
+ask for, and never dedup tasks that were meant to be distinct.
+
+### What each mode does to resources, honestly
+
+- **shared**: the producer (1 node) holds its node through the queue wait;
+  consumers (N−1) consume **zero** node resources while gated, then start at
+  position==1. Idle cost during the wait ≈ 1 node, vs N for a traditional gang.
+- **independent**: every pod is a producer, so every pod holds its node through
+  its own queue wait — N nodes idle. There is nothing to coordinate (no shared
+  result), so this is not a Fluence deficiency; it is the physics of "N
+  independent tasks," and it is the user's explicit choice. The only way to
+  reclaim even the producer's node in either mode is a resumable `.result()`
+  (replay), and is deliberately **out of scope
+  for v1** (one idle node is cheap; replay imposes a replay-safe-code contract).
+
+## Producer election
+
+Exactly one member must be the producer. Election is deterministic for the
+recommended workload and best-effort otherwise:
+
+- **Indexed Job (recommended):** the pod carries
+  `batch.kubernetes.io/job-completion-index`. **Index `0` is the producer**;
+  every other index is a consumer. Deterministic, race-free, no recorded state —
+  the controller already stamped the index, and identical templates yield
+  differentiated behavior purely from it. This is why an indexed Job is the right
+  shape and is what the experiments use.
+- **Non-indexed gang (Deployment / raw grouped pods):** first arrival claims the
+  producer slot by creating the producer PodGroup (create-if-absent); later pods
+  find it present and become consumers. Best-effort (racy under simultaneous
+  admission); documented, with indexed Job recommended for determinism.
+
+## The two-group split
+
+| | producer (index 0) | consumers (indices 1..N−1) |
+|---|---|---|
+| PodGroup | `<group>-producer`, `minCount=1` | `<group>`, `minCount=N−1` |
+| schedules | immediately, alone | atomically as a gang, **after ungate** |
+| gate | none | `quantum.braket/ready` + preempting priority |
+| interceptor | staged (tags the real submit) | **not staged** (a consumer never submits) |
+| sidecar | yes — polls the task, ungates `<group>` at position==1 | no |
+| app run | full; submits the one real task | full; reads role=consumer and fetches the shared result by id (no submit) |
+
+`minCount=1` on the producer group is what removes the deadlock that forced a
+separate submitter: a single-member group schedules alone, so the producer runs
+during the wait while the `minCount=N−1` consumer group sits gated. The two
+groups have independent minCounts; neither blocks the other. The consumer group
+keeps a real gang `minCount` (N−1), so **gang scheduling is preserved and
+demonstrable** (experiment requirement 1).
+
+Coordination is role-aware rather than interception-based: the consumer is told
+`FLUENCE_COORDINATION_ROLE=consumer` and handed the producer's task id
+(`FLUENCE_QUANTUM_JOB_ID`, stamped by the sidecar at ungate), and the workload
+fetches the shared result by that id instead of submitting. One real task, N
+consumers, each app run once, in full — and no SDK submit-interception.
+
+## Gate / ungate flow (shared mode)
+
+```
+1. Producer (index 0) admitted -> own group-of-one, ungated, sidecar attached
+   (FLUENCE_GANG_GROUP=<group>), interceptor in REAL mode.
+   Consumers (1..N-1) admitted -> group <group> (minCount N-1), GATED,
+   role=consumer, depends-on producer=<group>-producer.
+
+2. Scheduler places the producer immediately (minCount=1). It runs the user app,
+   .run() submits the ONE real task (tagged fluence-pod-uid).
+
+3. Producer sidecar discovers the task by tag, polls queue position.
+
+4. At position==1 (or RUNNING): for each gated pod in <group>:
+     annotate fluence.flux-framework.org/quantum-job-id=<task id>
+     remove the quantum.braket/ready gate (priority already set at admission)
+
+5. Consumer group (now ungated, minCount N-1) gang-schedules atomically and
+   starts as the quantum result arrives. Each consumer reads role=consumer and
+   fetches the producer's task by FLUENCE_QUANTUM_JOB_ID (.result() returns the
+   shared result); app post-processes. No consumer submits.
+```
+
+`independent` mode skips all of this: each pod is its own group-of-one, ungated,
+real submit, optional observe-only sidecar — i.e. today's standalone path applied
+per pod.
+
+---
+
+## Patch
+
+All changes are in `pkg/webhook/handlers/`. The webhook core, the `fluxion`
+handler, `dependency.go`, `sidecar.go`, and the Python interceptor/sidecar are
+**unchanged**.
+
+### `gang.go` — defer on quantum pods (removes the ordering dependency)
+
+The gang handler currently calls `EnsurePodGroup` unconditionally and relies on
+idempotency to coexist with the quantum handler. With the two-group split the
+quantum handler owns *both* quantum PodGroups (and the producer's group differs
+from its admission-time label), so the gang handler must not also gang quantum
+pods. Make it skip them:
+
+```go
+func (h *gangHandler) Applies(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) bool {
+	// Classical gangs only. A pod that requests the quantum resource is gang-
+	// scheduled by the quantum handler (which owns the producer/consumer split);
+	// handling it here too would create a second, conflicting PodGroup.
+	if spec.PodRequestsResource(pod, QuantumResource) {
+		return false
+	}
+	return webhook.GroupName(pod) != ""
+}
+```
+
+### `quantum.go` — replace `Mutate` and the submitter machinery
+
+**Add** these constants (near the existing const block):
+
+```go
+const (
+	// CoordinationAnnotation selects how a quantum gang is coordinated. Open enum
+	// so new designs (e.g. "scatter") add a mode without changing the mechanism.
+	CoordinationAnnotation = "fluence.flux-framework.org/coordination"
+	// CoordinationShared: one real task; the producer (index 0) submits, the
+	// other members are gated consumers that dedup to the producer's task.
+	CoordinationShared = "shared"
+	// CoordinationIndependent (default): every member does its own quantum work;
+	// no coordination, no gating, each holds its node through its own queue wait.
+	CoordinationIndependent = "independent"
+
+	// ProducerGroupSuffix names the producer's own group-of-one: <group>-producer
+	// (minCount 1) so it schedules alone and never deadlocks against the gated
+	// consumer gang.
+	ProducerGroupSuffix = "-producer"
+
+	// CompletionIndexAnnotation is the indexed-Job completion index the Job
+	// controller stamps on each pod; index "0" is the producer (deterministic
+	// election, no recorded state).
+	CompletionIndexAnnotation = "batch.kubernetes.io/job-completion-index"
+	// ProducerIndex is the completion index promoted to producer.
+	ProducerIndex = "0"
+)
+```
+
+Keep `GangGroupEnv` (`FLUENCE_GANG_GROUP`) — it now tells the **producer's**
+sidecar which consumer group to ungate. **Delete** the separate-submitter
+constants and helpers: `SubmitterAnnotation`, `GangGroupAnnotation`,
+`SubmitterGroupSuffix`, `SubmitterPodSuffix`, and the functions
+`mutateSubmitter` and `ensureSubmitterPod`. Everything else in the file
+(`resolveGroup`, `resolveGangSize`, `ownerReplicaSetN`, `countGroupPods`,
+`linkGroupOps`, the role/job-id env section, the sidecar section) is reused unchanged.
+
+**Replace** `Mutate` with the coordination router plus two small role functions:
+
+```go
+func (h *quantumHandler) Mutate(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) []spec.Op {
+	g := resolveGroup(pod)
+	n := resolveGangSize(ctx, m, pod, g)
+	mode := coordinationMode(pod)
+	observe := spec.Label(pod, ObserveLabel) == "true"
+
+	// No coordination: a standalone quantum pod, or an explicitly independent
+	// member. The REAL submit happens in THIS pod; sidecar only for observe-only
+	// telemetry. (independent mode routes every member here -> N standalone
+	// producers, each owning its task and its own queue wait.)
+	if mode != CoordinationShared || g == "" || n <= 1 {
+		ops := interceptorOps(pod)
+		if observe {
+			sc := sidecarFor(m)
+			sc.EnsureRBAC(ctx, pod.Namespace)
+			ops = append(ops, sc.ContainerOps(pod, true, nil)...)
+		}
+		log.Printf("[fluence-webhook] quantum %s/%s mode=%s (standalone/independent, observe=%v)",
+			pod.Namespace, pod.Name, mode, observe)
+		return ops
+	}
+
+	// shared mode: promote one member to producer; the rest are gated consumers.
+	if isProducer(ctx, m, pod, g) {
+		return h.mutateProducer(ctx, m, pod, g)
+	}
+	return h.mutateConsumer(ctx, m, pod, g, n)
+}
+
+// mutateProducer: index-0 member. Its own group-of-one (minCount 1) so it
+// schedules alone and runs the REAL submit; sidecar polls the task and ungates
+// the consumer group. NOT gated, no faux. The producer is one of the N members,
+// so the application is NOT run an extra time.
+func (h *quantumHandler) mutateProducer(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod, group string) []spec.Op {
+	pg := group + ProducerGroupSuffix
+	m.EnsurePodGroup(ctx, pod.Namespace, pg, pod.Name, 1)
+	ops := linkGroupOps(pod, pg)
+	ops = append(ops, interceptorOps(pod)...)           // tags the real submit
+	ops = append(ops, roleEnvOps(pod, RoleProducer)...) // FLUENCE_COORDINATION_ROLE=producer
+	sc := sidecarFor(m)
+	sc.EnsureRBAC(ctx, pod.Namespace)
+	// Tell the sidecar which consumer group (the base group) to list + ungate.
+	ops = append(ops, sc.ContainerOps(pod, false, []corev1.EnvVar{{Name: GangGroupEnv, Value: group}})...)
+	log.Printf("[fluence-webhook] quantum producer %s/%s — group %s (ungates %q)",
+		pod.Namespace, pod.Name, pg, group)
+	return ops
+}
+
+// mutateConsumer: a non-producer member. Joins the <group> consumer gang
+// (minCount N-1), is gated until the producer's task is ready, and is told its
+// role (FLUENCE_COORDINATION_ROLE=consumer) + the producer's task id
+// (FLUENCE_QUANTUM_JOB_ID). A consumer fetches the shared result by id; it never
+// submits, so it gets neither the interceptor nor a faux flag.
+func (h *quantumHandler) mutateConsumer(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod, group string, n int32) []spec.Op {
+	m.EnsurePodGroup(ctx, pod.Namespace, group, pod.Name, n-1)
+	ops := linkGroupOps(pod, group)
+	dep := Dependency{Kind: DependencyKindQuantumSubmit, Producer: group + ProducerGroupSuffix, Gate: QuantumGate}
+	ops = append(ops, dep.applyOps(pod)...) // gate + preempting priority + depends-on
+	ops = append(ops, consumerEnvOps(pod)...) // role=consumer + FLUENCE_QUANTUM_JOB_ID
+	log.Printf("[fluence-webhook] quantum consumer %s/%s — group %s minCount=%d, gated (role=consumer)",
+		pod.Namespace, pod.Name, group, n-1)
+	return ops
+}
+
+// coordinationMode reads the coordination annotation; default independent.
+func coordinationMode(pod *corev1.Pod) string {
+	if v := spec.Annotation(pod, CoordinationAnnotation); v != "" {
+		return v
+	}
+	return CoordinationIndependent
+}
+
+// isProducer decides whether THIS pod is the gang's single producer. Indexed Job
+// (recommended): completion index 0 is the producer — deterministic, race-free.
+// Otherwise: first arrival claims the producer slot by the absence of the
+// producer PodGroup (best-effort; prefer an indexed Job).
+func isProducer(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod, group string) bool {
+	if idx, ok := pod.Annotations[CompletionIndexAnnotation]; ok {
+		return idx == ProducerIndex
+	}
+	c := m.Client()
+	if c == nil {
+		return true // tests / no client: treat as producer
+	}
+	pg := group + ProducerGroupSuffix
+	if _, err := c.SchedulingV1alpha2().PodGroups(pod.Namespace).Get(ctx, pg, metav1.GetOptions{}); err == nil {
+		return false // already claimed by an earlier arrival
+	}
+	return true
+}
+```
+
+Note: `pod.Annotations` may be nil; the `idx, ok := pod.Annotations[...]` form is
+nil-safe in Go (indexing a nil map yields the zero value, `ok=false`).
+
+### Sidecar (Python) — no change
+
+The producer's sidecar already resolves the vendor at runtime from
+`FLUXION_BACKEND`, discovers the task by the `fluence-pod-uid` tag, polls queue
+position, and ungates the group named by `FLUENCE_GANG_GROUP` (now the consumer
+group) at position==1, stamping `quantum-job-id` on each consumer before removing
+its gate. That is exactly the existing flow with the producer in place of the
+submitter pod.
+
+---
+
+## Experiments
+
+Two requirements, both demonstrable on a kind cluster with the mock path and
+on a real cluster with Braket.
+
+### Requirement 1 — Fluence still gang-schedules
+
+Unchanged classical-gang coverage plus a shared-mode assertion:
+
+- **Classical gang (regression):** keep `test/e2e/gang/*`. A `minCount=N` classical
+  PodGroup schedules all-or-nothing. This proves the generic gang machinery is
+  intact (the change only adds a quantum-pod deferral to `gang.Applies`).
+- **Shared consumer gang (new assertion):** submit a `coordination: shared`
+  indexed Job of N. Assert: exactly one `<job>-producer` PodGroup (minCount 1) and
+  one `<job>` PodGroup (minCount N−1); the producer runs while the N−1 consumers
+  are `SchedulingGated`; after ungate the N−1 schedule **together** (gang), not
+  one-by-one. This proves gang scheduling still holds for the consumer group.
+
+### Requirement 2 — Both modes work and shared beats a traditional gang
+
+The metric that isolates the win is **classical node-seconds consumed during the
+quantum queue wait** (lower is better), alongside correctness checks.
+
+Three arms, same N, same workload (the QAOA sampler), same backend:
+
+| arm | how | expected node-seconds during queue wait | correctness |
+|-----|-----|------------------------------------------|-------------|
+| **traditional gang** (baseline) | N pods all running, each waits the full queue (no Fluence coordination — e.g. a plain native gang, or `independent` with N=N) | ≈ **N × T_queue** | N pods each run; if they each submit, N real tasks |
+| **shared** (new) | `coordination: shared` indexed Job, N pods | ≈ **1 × T_queue** (producer only; consumers gated) | **1** real task; all N pods produce the **same** result; app runs N times, never N+1 |
+| **independent** (new) | `coordination: independent` indexed Job, N pods | ≈ **N × T_queue** (no coordination possible) | N distinct tasks/results; correct and the user's explicit choice (reported as the honest baseline, **not** claimed as an improvement) |
+
+Headline comparison is **shared vs traditional**: same observable result to the N
+pods, but shared idles ~1 node through the queue wait instead of N, saving
+≈ (N−1) × T_queue node-seconds, and runs the application N times rather than N+1
+(the submitter-pod model's extra run is gone).
+
+Instrumentation (reuse the Experiment 2 harness):
+- per-pod `TIMING` lines → derive each pod's gated interval vs running interval;
+  sum running-but-pre-result node-seconds per arm.
+- producer's sidecar logs queue position over time → T_queue.
+- assert real-submit count: shared = 1 (one tagged task on the backend),
+  independent/traditional = N (count tagged tasks).
+- assert shared correctness: all N pods log the **same** task id / result hash.
+
+Suggested location: a new `experiments/4-coordination/` modeled on
+`experiments/2-gang/` (it already measures idle reclamation), parameterized by the
+`coordination` annotation and N, emitting node-seconds-during-wait, real-submit
+count, and result-agreement per arm. Plot node-seconds vs N for the three arms:
+traditional and independent rise ~linearly in N; shared stays ~flat at one node.
+
+### Build/run notes
+
+- The producer/consumer split needs no new image: producers and consumers run the
+  same role-aware sampler; the branch is `FLUENCE_COORDINATION_ROLE`
+  (producer submits; consumer fetches the shared result by `FLUENCE_QUANTUM_JOB_ID`).
+- Use an **indexed** Job (`completionMode: Indexed`, `parallelism == completions == N`)
+  so producer election is deterministic (index 0) and `resolveGangSize` reads N
+  from the owner. Stamp `fluence.flux-framework.org/coordination` in the pod
+  template's annotations.
+- kind/mock runs exercise the structural assertions (groups, gating, ungate
+  ordering) without a backend; real-Braket runs add the node-seconds and
+  real-submit-count measurements.
diff --git a/docs/handlers.md b/docs/handlers.md
new file mode 100644
index 0000000..ee70519
--- /dev/null
+++ b/docs/handlers.md
@@ -0,0 +1,83 @@
+# Webhook handlers & sidecar architecture
+
+Fluence's value is not creating gangs (Kubernetes 1.36 native gang scheduling
+already does that). It is **customizing the gang on the fly based on the
+resources a pod requests** — e.g. a shared quantum gang becomes a size-1
+producer gang plus a size-(N-1) consumer gang, with the producer running a
+sidecar that ungates its consumers when the quantum task is ready.
+
+## Handlers
+
+Each handler is an interface implementation (`pkg/webhook/handler.go`):
+
+```go
+type Handler interface {
+    Name() string
+    Applies(ctx, m MutatorAPI, pod) bool
+    Mutate(ctx, m MutatorAPI, pod) []spec.Op
+}
+```
+
+Handlers self-register by name (`init()` -> `webhook.Register`); a blank import
+of the handlers package makes them AVAILABLE. The core never names a handler.
+
+**Ordering = the active list.** There is no per-handler priority. The active
+handler list is BOTH the selection and the dispatch order:
+
+```go
+var DefaultHandlerOrder = []string{"fluxion", "quantum", "gang"}
+```
+
+Dispatch walks this list in order. `gang` is last because it is last in the
+list — the fallback that applies common defaults (honor `group-size`, else
+owner-derived N) only if no earlier handler already shaped the gang. A
+custom-resource handler is inserted into the list before `gang` to shape its own
+gang first. To change the order, or disable a handler, pass a different list.
+
+## Enabling/disabling handlers
+
+By default ALL registered handlers are enabled. Restrict the active set on the
+webhook command:
+
+```
+fluence-webhook --handlers=fluxion,gang        # run without quantum
+FLUENCE_HANDLERS=fluxion,quantum,gang fluence-webhook
+```
+
+Empty = the default list. The list is the order: `--handlers=gang,fluxion` runs
+gang first; omitting a name disables it. Unknown names are warned and dropped.
+
+(The handler set lives in the WEBHOOK, which mutates pods. `cmd/fluence` is the
+scheduler plugin and runs no handlers.)
+
+## Sidecar interface
+
+The coordination sidecar is a handler-owned capability, not a core one. Handlers
+that need a sidecar use `handlers.Sidecar`:
+
+```go
+type Sidecar interface {
+    EnsureRBAC(ctx, namespace)
+    InterceptorOps(pod) []spec.Op
+    ContainerOps(pod, observe bool) []spec.Op
+}
+```
+
+The default `coreSidecar` delegates to the core's staging primitives. The quantum
+handler uses it today; a custom handler can supply its own implementation
+(different image, env, gating) without touching the core or other handlers. The
+core's `MutatorAPI` keeps the staging primitives only so the default
+implementation can delegate — handlers do not call them directly.
+
+## Group size resolution (the default gang handler)
+
+`minCount` (the atomic-schedule count) resolves as:
+
+1. explicit `fluence.flux-framework.org/group-size` annotation — honored verbatim
+   (the override; e.g. a quantum split sets it directly);
+2. else the owning indexed Job's `parallelism` (== MiniCluster size N);
+3. else 1, logged.
+
+This is a common default available to every gang; handler-specific annotations
+(quantum coordination mode, completion index, etc.) live in their handlers and are not
+required by the core.
diff --git a/docs/quantum-scheduling.md b/docs/quantum-scheduling.md
index a6967ba..de32220 100644
--- a/docs/quantum-scheduling.md
+++ b/docs/quantum-scheduling.md
@@ -5,15 +5,15 @@
 Hybrid quantum-classical workflows submit work to two independent queues:
 the Kubernetes scheduler (classical compute) and a QPU vendor API (quantum
 execution). Classical pods waste node resources while waiting for QPU queue
-results. Fluence's coordination system thus gates classical worker pods until 
+results. Fluence's coordination system thus gates classical consumer pods until 
 the QPU task is one position from executing, then releases them with high 
 priority so they preempt lower-priority work and start immediately as the 
 QPU result arrives. Yes, it could be the case the one task in the queue before
-it takes a long time, but I think this is an improved approach than having worker
+it takes a long time, but I think this is an improved approach than having consumer
 pods running (and waiting) for a much longer queue. This only is important
-given that you have gangs, or leader worker designs where some leader is launching
-the quantum work and otherwise the workers would be waiting and doing nothing
-(and wasting resources).
+given that you have gangs, or producer/consumer designs where one member is
+launching the quantum work and otherwise the other members would be waiting and
+doing nothing (and wasting resources).
 
 ## 1. The Two-Queue Problem
 
@@ -67,11 +67,13 @@ queue wait — which is worse than the original problem.
 
 The design combines four mechanisms:
 
-1. **SDK interceptor** — tags every QPU task with the pod UID
-2. **Fluence webhook** — gates worker pods, injects sidecar into quantum pods
+1. **SDK interceptor** — tags every submitted QPU task with the pod UID so the
+   sidecar can find it (staged only on pods that submit)
+2. **Fluence webhook** — splits a shared quantum gang into one producer and N-1
+   gated consumers; injects the sidecar into the producer
 3. **Sidecar controller** — discovers the QPU task, polls queue position,
-   ungates workers when position==1
-4. **High-priority ungating** — workers preempt lower-priority work at the
+   ungates the consumers when position==1
+4. **High-priority ungating** — consumers preempt lower-priority work at the
    last responsible moment
 
 ### 3.0 When Fluence acts: the decision matrix
@@ -84,19 +86,26 @@ determine what Fluence does:
   work and there is a vendor backend behind it.
 - **G (gang?)** — does the pod carry `fluence.flux-framework.org/group`?
 
-|              | not quantum            | quantum                                                        |
-|--------------|------------------------|----------------------------------------------------------------|
-| **not gang** | group of 1 (nothing)   | inject provider interceptor + env; **sidecar only in observe-only mode if telemetry requested** (no workers to ungate) |
-| **gang**     | gang-schedule only     | leader: interceptor + env + sidecar (gates + ungates workers); workers: gate only |
+A third property applies only to quantum gangs: the **coordination mode**
+(`fluence.flux-framework.org/coordination`, default `independent`). In `shared`
+mode the gang produces ONE quantum task that all members share; in `independent`
+mode every member does its own quantum work.
+
+|              | not quantum            | quantum                                                                        |
+|--------------|------------------------|--------------------------------------------------------------------------------|
+| **not gang** | group of 1 (nothing)   | inject provider interceptor + env; **sidecar only in observe-only mode if telemetry requested** (nothing to ungate) |
+| **gang** (independent) | gang-schedule only | every member is a standalone producer: interceptor + env, real submit, no gate |
+| **gang** (shared)      | —              | producer (index 0): interceptor + env + sidecar, real submit, not gated, group-of-one `<group>-producer`, role=producer; consumers: gate + role=consumer + producer's task id, gang `<group>` (minCount N-1) |
 
 The crucial rule: **sidecar/interceptor injection is triggered by the quantum
 resource request, not the group label.** The group label only controls gang
-scheduling and worker gating. A group leader that requests no quantum resource
-(e.g. a classical pod that happens to set `BRAKET_DEVICE` itself) is just
-gang-scheduled — Fluence injects no sidecar, because there is no quantum work
-for it to coordinate. `BRAKET_DEVICE` (or any direct device selection by the
-user) is the signal that Fluence is *not* scheduling the quantum resource;
-`fluxion.flux-framework.org/qpu` is the signal that it is.
+scheduling and (in shared mode) the producer/consumer split. A grouped pod that
+requests no quantum resource (e.g. a classical pod that happens to set
+`BRAKET_DEVICE` itself) is just gang-scheduled — Fluence injects no sidecar,
+because there is no quantum work for it to coordinate. `BRAKET_DEVICE` (or any
+direct device selection by the user) is the signal that Fluence is *not*
+scheduling the quantum resource; `fluxion.flux-framework.org/qpu` is the signal
+that it is.
 
 ### 3.1 User interface
 
@@ -106,10 +115,19 @@ The user labels all pods in a workflow group with:
 metadata:
   labels:
     fluence.flux-framework.org/group: my-workflow
+  annotations:
+    # only for a quantum gang that shares ONE task across members:
+    fluence.flux-framework.org/coordination: shared
 spec:
   schedulerName: fluence
 ```
 
+`coordination` defaults to `independent` (every member does its own quantum
+work). Set it to `shared` when the members should share a single quantum task —
+then Fluence promotes one member (the indexed-Job completion index 0) to producer
+and gates the rest as consumers. The user authors no roles and no submitter pod;
+the split is derived from the completion index the Job controller already stamps.
+
 I initially started with having the user create a PodGroup object, and I found
 that annoying. I do not want to require a PodGroup object when an annotation is easier,
 and then I have fine-grained control of what the groups looks like. Fluence can handle
@@ -117,7 +135,7 @@ everything else automatically.
 
 The namespace distinction:
 - `fluence.flux-framework.org/*` — Fluence scheduler-plugin concerns
-  (group label, leader annotation, gate name)
+  (group label, coordination mode, gate name)
 - `fluxion.flux-framework.org/*` — Fluxion resource-graph concerns
   (extended resource types, backend attribute env vars)
 
@@ -140,31 +158,57 @@ The three handlers (`pkg/webhook/handlers/`):
 (backend + attributes) sourced from the annotations the scheduler writes in
 PreBind. Generic to all Fluxion resources.
 
-**`gang` (`gang.go`)** — applies when the pod carries the group label. Creates a
-Fluence-owned PodGroup (`minCount: 1`) on first admission, records that first
-pod as the admission-order leader, and stamps `spec.schedulingGroup.podGroupName`
-on every pod in the group so the scheduler gangs them. The user only ever sets
-the LABEL; the webhook translates it into the native field, so the user never
-creates a PodGroup or knows it exists. Knows nothing about quantum — a purely
-classical gang is fully handled here, with no sidecar.
+**`gang` (`gang.go`)** — applies when the pod carries the group label **and does
+not request the quantum resource** (a quantum pod is gang-scheduled by the quantum
+handler instead, which owns the producer/consumer split). Creates a Fluence-owned
+PodGroup on first admission and stamps `spec.schedulingGroup.podGroupName` on
+every pod in the group so the scheduler gangs them. The user only ever sets the
+LABEL; the webhook translates it into the native field, so the user never creates
+a PodGroup or knows it exists. Knows nothing about quantum — a purely classical
+gang is fully handled here, with no sidecar.
 
 **`quantum` (`quantum.go`)** — the only handler that knows about quantum
-resources, gates, and observe semantics. Applies to a pod in either role:
-- **submitter** (requests `fluxion.flux-framework.org/qpu`): a group leader, or
-  a standalone quantum pod. Always gets the interceptor staged (so its task is
-  tagged). Gets the **sidecar** only when there is coordination to do — it is a
-  group leader (workers to ungate) or observe-only telemetry is requested.
-- **worker** (a non-leader member of a group whose recorded leader is a quantum
-  pod): gets the `quantum.braket/ready` scheduling gate, entering
-  `SchedulingGated` state — invisible to Fluxion, consuming no resources — until
-  the leader's sidecar ungates it.
-
-Role is decided by **admission order**, not resource request. In a pod-template
-gang (Deployment/Job/StatefulSet) every pod is identical — same group label,
-every pod requests the quantum resource — so the leader is simply the first pod
-admitted (recorded on the PodGroup); every other pod is a worker, regardless of
-its own request. The gate holds workers at PreEnqueue, so the scheduler does not
-run PreFilter for them (and `groupPods` excludes gated pods) until ungated.
+resources, gates, coordination, and observe semantics. A quantum task's circuit
+comes from user code, so the pod that defines a task must RUN to submit it: submit
+and gate are mutually exclusive per pod, and gating only helps pods that do not
+submit. The handler therefore routes each quantum pod to one of three roles:
+- **standalone / independent** (a lone quantum pod, or any member of a gang in
+  the default `independent` mode): gets the interceptor staged (real mode) so its
+  own task is tagged, performs its own real submit, is never gated, and gets the
+  sidecar only when observe-only telemetry is requested. Independent mode means N
+  members run N tasks and hold N node-waits — honest physics, the user's explicit
+  default.
+- **producer** (in `shared` mode, the completion index 0 member): its own
+  group-of-one `<group>-producer` (minCount 1) so it schedules alone and runs the
+  SINGLE real submit; interceptor in real mode; gets the **sidecar**, told which
+  consumer group to ungate (`FLUENCE_GANG_GROUP`); never gated. The producer is
+  one of the N members, so the application runs exactly N times — never N+1.
+- **consumer** (in `shared` mode, the other N-1 members): joins the `<group>`
+  gang (minCount N-1), gets the `quantum.braket/ready` scheduling gate (entering
+  `SchedulingGated` — invisible to Fluxion, consuming no resources — until the
+  producer's sidecar ungates it), and is told its role
+  (`FLUENCE_COORDINATION_ROLE=consumer`) and the producer's task id
+  (`FLUENCE_QUANTUM_JOB_ID`, stamped at ungate). A consumer does **not** submit —
+  it fetches the shared result by that id — so it gets neither the interceptor nor
+  any faux flag.
+
+Role is decided by the **completion index**, not resource request or admission
+order. In an indexed Job every pod is identical — same group label, same image,
+every pod requests the quantum resource — so the producer is simply the pod the
+Job controller stamps with `batch.kubernetes.io/job-completion-index: "0"`; every
+other index is a consumer. (For loose pods with no completion index, the first
+arrival claims the producer slot by the absence of the `<group>-producer`
+PodGroup; an indexed Job is recommended for deterministic election.) The two
+groups carry independent minCounts (producer=1, consumers=N-1), which is what lets
+the producer schedule and submit while the consumers stay gated — no deadlock, and
+no separate submitter pod.
+
+The workload is **role-aware**: every shared-mode pod is told its role positively
+via `FLUENCE_COORDINATION_ROLE` (the webhook's election is the single source of
+truth), and the application branches on it — the producer submits, a consumer
+fetches the shared result by `FLUENCE_QUANTUM_JOB_ID`. The same image plays both
+roles with one cheap branch; there is no submit-interception magic and no faux
+flag.
 
 ### 3.3 Interceptor and Model C delivery
 
@@ -207,7 +251,11 @@ def patched_run(self, task_specification, *args, **kwargs):
 This is completely transparent to the user application — no code changes, no
 package install, no vendor SDK added to the user image (the hook patches
 whatever SDK the user already has).
-leader pod, sharing its AWS credentials and network namespace.
+
+### 3.4 Sidecar controller
+
+The sidecar runs as a container alongside the producer pod, sharing its AWS
+credentials and network namespace.
 
 ```console
 1. READ  FLUXION_ARN, FLUENCE_POD_UID from env
@@ -221,7 +269,7 @@ leader pod, sharing its AWS credentials and network namespace.
    On timeout: fall back to time-window heuristic (tasks submitted
    after pod start time on the same device).
 
-3. DISCOVER worker pods:
+3. DISCOVER consumer pods:
    List pods in namespace with fluence.flux-framework.org/group label
    matching this pod's group, having quantum.braket/ready gate present.
 
@@ -229,7 +277,7 @@ leader pod, sharing its AWS credentials and network namespace.
    Log position for experiment instrumentation.
 
 5. WHEN  is_ready_to_ungate(task)  (position == 1 OR state == RUNNING):
-   For each worker pod:
+   For each consumer pod:
      kubectl annotate pod <name> fluence.flux-framework.org/quantum-job-id=<job_id>
      kubectl patch pod <name> --type=json \
        -p='[{"op":"add","path":"/spec/priorityClassName",
@@ -240,7 +288,7 @@ leader pod, sharing its AWS credentials and network namespace.
 ```
 
 The priority class and gate removal are applied atomically in one patch.
-This ensures workers enter the scheduling queue with high priority
+This ensures consumers enter the scheduling queue with high priority
 immediately, without a window where they are ungated but low-priority.
 
 ### 3.5 Priority and preemption
@@ -250,10 +298,10 @@ by the sidecar at ungate time, not by the webhook at pod creation. Setting
 it at creation time causes an admission controller conflict (priority integer
 already defaulted to 0).
 
-When workers are ungated with high priority, Kubernetes preemption evicts
+When consumers are ungated with high priority, Kubernetes preemption evicts
 lower-priority pods to make room. Fluence's pod deletion informer catches
 these evictions, calls `Cancel(jobid)` in Fluxion, and frees the graph
-vertices so Fluxion can allocate them to the incoming high-priority workers.
+vertices so Fluxion can allocate them to the incoming high-priority consumers.
 
 ### 3.6 Classical allocation follows quantum execution order
 
@@ -298,7 +346,7 @@ Provider:
     find_my_task(pod_uid, ...)    # search by the fluence-pod-uid tag → opaque Task
     is_ready_to_ungate(task)      # decision primitive: position==1 OR running
     queue_position(task)          # optional richer telemetry; None if unavailable
-    job_id(task)                  # cross-vendor id handed to workers (NOT the ARN)
+    job_id(task)                  # cross-vendor id handed to consumers (NOT the ARN)
 ```
 
 Vendor-specific identifiers (a Braket task ARN, an IBM job id, a GCP operation
@@ -320,7 +368,7 @@ matching provider). Nothing else changes — no build script, no concatenation.
 
 #### Observe-only (telemetry) mode
 
-A quantum pod that is *not* a gang (a single quantum pod, no workers to ungate)
+A quantum pod that is *not* a gang (a single quantum pod, no consumers to ungate)
 gets the interceptor and env only — no sidecar — by default, so no surprise
 machinery is injected. Telemetry is opt-in via the label
 `fluence.flux-framework.org/observe: "true"`, surfaced to the sidecar as
@@ -345,7 +393,7 @@ singleton and gang runs.
 
 ### 5.1 Preemption disrupts lower-priority work
 
-At position==1, workers preempt running lower-priority pods. This work is
+At position==1, consumers preempt running lower-priority pods. This work is
 re-queued and eventually runs, but there is a disruption cost. A future
 design using a `MatchReserveAt(time_at, spec)` Fluxion primitive — where
 `time_at` is supplied by the QPU vendor via an ETA or task-start event —
@@ -364,7 +412,7 @@ heuristic (e.g. a time window) rather than the tag mechanism.
 
 ### 5.3 Single task per workflow
 
-The sidecar tracks one QPU task ARN per leader pod. Parameter-shift gradient
+The sidecar tracks one QPU task ARN per producer pod. Parameter-shift gradient
 estimation and other multi-circuit workflows require tracking a set of ARNs.
 See the scatter design issue for the proposed extension.
 
@@ -388,7 +436,7 @@ function to be exposed through the Go bindings with a `starttime` parameter.
 
 For workflows with N independent QPU tasks each paired with one classical
 pod, an index-based pairing mechanism (`fluence.flux-framework.org/index`)
-would allow the sidecar to ungate specific worker pods when their specific
+would allow the sidecar to ungate specific consumer pods when their specific
 task reaches position==1. See the open scatter design issue.
 
 ### 6.3 Vendor task-start events
@@ -401,7 +449,7 @@ precise ungating.
 ### 6.4 PostFilter topology-aware preemption
 
 A custom Fluence `PostFilter` plugin would ask Fluxion which graph vertices
-are blocking a high-priority worker pod, then target preemption at exactly
+are blocking a high-priority consumer pod, then target preemption at exactly
 those pods — rather than the default Kubernetes preemption which picks
 lowest-priority pods regardless of graph topology. This ensures preemption
 always produces a valid Fluxion allocation.
diff --git a/examples/quantum-pod.yaml b/examples/quantum-pod.yaml
index a619df9..b5dfbc9 100644
--- a/examples/quantum-pod.yaml
+++ b/examples/quantum-pod.yaml
@@ -2,7 +2,7 @@
 # via resources (the fluence device plugin advertises fluxion.flux-framework.org/qpu
 # on every node, so NodeResourcesFit is satisfied). Fluence's PreFilter matches
 # the request against the resource graph and picks a backend, the webhook injects
-# QRMI_BACKEND (the allocated backend) automatically, and note we can add other
+# FLUXION_BACKEND (the allocated backend) automatically, and note we can add other
 # envars here in the future. I chose a webhook because I think this is going to
 # be a requirement, and the pod is immutable after creation. 
 # Then the container submits via qrmi-go (the separate qrmi-sampler image).
@@ -27,4 +27,4 @@ spec:
         requests:
           fluxion.flux-framework.org/qpu: "1"
         limits:
-          fluxion.flux-framework.org/qpu: "1"
\ No newline at end of file
+          fluxion.flux-framework.org/qpu: "1"
diff --git a/examples/test/e2e/gang/multi-gang-contention.yaml b/examples/test/e2e/gang/multi-gang-contention.yaml
new file mode 100644
index 0000000..14b0fd8
--- /dev/null
+++ b/examples/test/e2e/gang/multi-gang-contention.yaml
@@ -0,0 +1,40 @@
+# Two gangs that cannot both place: fluxion allocates one core per slot, so two
+# 2-pod gangs need 4 cores, but the cluster graphs ~3 (3 workers, ~1 core each). One gang places entirely; the loser stays FULLY pending
+# (all-or-nothing), never partial.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gang-a
+spec:
+  replicas: 2
+  selector: {matchLabels: {app: gang-a}}
+  template:
+    metadata:
+      labels: {app: gang-a, fluence.flux-framework.org/group: gang-a}
+      annotations: {fluence.flux-framework.org/group-size: "2"}
+    spec:
+      schedulerName: fluence
+      containers:
+        - name: w
+          image: busybox
+          command: ["sleep", "3600"]
+          resources: {requests: {cpu: "1"}}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gang-b
+spec:
+  replicas: 2
+  selector: {matchLabels: {app: gang-b}}
+  template:
+    metadata:
+      labels: {app: gang-b, fluence.flux-framework.org/group: gang-b}
+      annotations: {fluence.flux-framework.org/group-size: "2"}
+    spec:
+      schedulerName: fluence
+      containers:
+        - name: w
+          image: busybox
+          command: ["sleep", "3600"]
+          resources: {requests: {cpu: "1"}}
diff --git a/examples/test/e2e/gang/multi-gang-requeue.yaml b/examples/test/e2e/gang/multi-gang-requeue.yaml
new file mode 100644
index 0000000..a8e8636
--- /dev/null
+++ b/examples/test/e2e/gang/multi-gang-requeue.yaml
@@ -0,0 +1,48 @@
+# Requeue-on-capacity + gang-atomicity test (test/e2e/gang/09).
+# gang-win: a 2-pod gang that runs a SHORT job and COMPLETES (pods -> Succeeded),
+#           freeing its nodes.
+# gang-wait: a 2-pod gang needing the same nodes; loses the initial race and sits
+#            Unschedulable. When gang-win completes, gang-wait must be re-attempted
+#            (via the shortened unschedulable-recheck timeout) and place atomically.
+# On a 3-worker (~3-core) cluster the two 2-pod gangs (4 cores) cannot co-run.
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: gang-win
+spec:
+  completions: 2
+  parallelism: 2
+  completionMode: Indexed
+  template:
+    metadata:
+      labels: {fluence.flux-framework.org/group: gang-win}
+      annotations: {fluence.flux-framework.org/group-size: "2"}
+    spec:
+      schedulerName: fluence
+      restartPolicy: Never
+      containers:
+        - name: w
+          image: busybox
+          command: ["sh","-c","sleep 30"]   # completes, frees nodes
+          resources: {requests: {cpu: "1"}}
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: gang-wait
+spec:
+  completions: 2
+  parallelism: 2
+  completionMode: Indexed
+  template:
+    metadata:
+      labels: {fluence.flux-framework.org/group: gang-wait}
+      annotations: {fluence.flux-framework.org/group-size: "2"}
+    spec:
+      schedulerName: fluence
+      restartPolicy: Never
+      containers:
+        - name: w
+          image: busybox
+          command: ["sh","-c","sleep 10"]
+          resources: {requests: {cpu: "1"}}
\ No newline at end of file
diff --git a/examples/test/e2e/gang/multi-gang.yaml b/examples/test/e2e/gang/multi-gang.yaml
new file mode 100644
index 0000000..9bfa67c
--- /dev/null
+++ b/examples/test/e2e/gang/multi-gang.yaml
@@ -0,0 +1,25 @@
+# Multi-pod gang via the WEBHOOK path (the path the experiments use
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gang3
+spec:
+  replicas: 2
+  selector:
+    matchLabels: {app: gang3}
+  template:
+    metadata:
+      labels:
+        app: gang3
+        fluence.flux-framework.org/group: gang3
+      annotations:
+        fluence.flux-framework.org/group-size: "2"
+    spec:
+      schedulerName: fluence
+      containers:
+        - name: worker
+          image: busybox
+          command: ["sleep", "3600"]
+          resources:
+            requests:
+              cpu: "1"
diff --git a/examples/single-podgroup.yaml b/examples/test/e2e/gang/single-podgroup.yaml
similarity index 100%
rename from examples/single-podgroup.yaml
rename to examples/test/e2e/gang/single-podgroup.yaml
diff --git a/examples/test/e2e/quantum/quantum-gang-pods.yaml b/examples/test/e2e/quantum/quantum-gang-pods.yaml
new file mode 100644
index 0000000..aacce44
--- /dev/null
+++ b/examples/test/e2e/quantum/quantum-gang-pods.yaml
@@ -0,0 +1,62 @@
+# Shared-coordination quantum gang for the e2e (producer/consumer, no submitter).
+#
+# Two identical pods, both requesting the quantum resource, in group "qgang" with
+# coordination=shared. The user authors NO roles and NO submitter pod. The webhook
+# splits the gang by completion index:
+#   qgang-0 (index 0)  -> PRODUCER: its own group-of-one "qgang-producer"
+#                         (minCount 1), real submit, sidecar, NOT gated. It is a
+#                         real member, so the app runs N times, never N+1.
+#   qgang-1 (index 1+)  -> CONSUMER: the "qgang" gang (minCount N-1=1), gated on
+#                         quantum.braket/ready + preempting priority, interceptor
+#                         told role=consumer; it fetches the producer's result by id.
+#
+# These are raw pods (not a Job) so the e2e can reference stable names; the
+# completion-index annotation is set manually to make producer election
+# deterministic (a real workload uses an indexed Job, which the controller stamps
+# with batch.kubernetes.io/job-completion-index automatically). group-size makes N
+# deterministic for raw pods, which have no owning Job to derive it from. busybox
+# stands in for the quantum app; the interceptor staging fails soft (no python),
+# which is fine for the structural assertions in 02/03/04.
+apiVersion: v1
+kind: Pod
+metadata:
+  name: qgang-0
+  labels:
+    app: qgang
+    fluence.flux-framework.org/group: qgang
+  annotations:
+    fluence.flux-framework.org/group-size: "2"
+    fluence.flux-framework.org/coordination: shared
+    batch.kubernetes.io/job-completion-index: "0"   # -> producer
+spec:
+  schedulerName: fluence
+  restartPolicy: Never
+  containers:
+    - name: app
+      image: busybox
+      command: ["sh", "-c", "echo gang member; sleep 600"]
+      resources:
+        requests: {fluxion.flux-framework.org/qpu: "1"}
+        limits:   {fluxion.flux-framework.org/qpu: "1"}
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: qgang-1
+  labels:
+    app: qgang
+    fluence.flux-framework.org/group: qgang
+  annotations:
+    fluence.flux-framework.org/group-size: "2"
+    fluence.flux-framework.org/coordination: shared
+    batch.kubernetes.io/job-completion-index: "1"   # -> consumer
+spec:
+  schedulerName: fluence
+  restartPolicy: Never
+  containers:
+    - name: app
+      image: busybox
+      command: ["sh", "-c", "echo gang member; sleep 600"]
+      resources:
+        requests: {fluxion.flux-framework.org/qpu: "1"}
+        limits:   {fluxion.flux-framework.org/qpu: "1"}
diff --git a/examples/test/e2e/quantum-pod-mock.yaml b/examples/test/e2e/quantum/quantum-pod-mock.yaml
similarity index 100%
rename from examples/test/e2e/quantum-pod-mock.yaml
rename to examples/test/e2e/quantum/quantum-pod-mock.yaml
diff --git a/examples/test/e2e/sidecar-mock-pods.yaml b/examples/test/e2e/sidecar-mock-pods.yaml
deleted file mode 100644
index fb223a7..0000000
--- a/examples/test/e2e/sidecar-mock-pods.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
----
-# Leader pod — first admitted, webhook creates PodGroup, injects sidecar, creates RBAC
-# User only needs schedulerName: fluence and the quantum-group label.
-# No PodGroup object needed — Fluence creates it.
-apiVersion: v1
-kind: Pod
-metadata:
-  name: sidecar-test-leader
-  labels:
-    app: fluence-sidecar-test
-    fluence.flux-framework.org/group: sidecar-test-group
-spec:
-  schedulerName: fluence
-  restartPolicy: Never
-  containers:
-    - name: mock-quantum-app
-      image: busybox
-      command:
-        - sh
-        - -c
-        - |
-          echo "mock-quantum-app: running"
-          echo "arn:aws:braket:us-east-1:123456:quantum-task/mock-abc123" \
-            > /tmp/task-arn
-          echo "mock-quantum-app: task ARN written"
-          sleep 3600
-      resources:
-        requests:
-          fluxion.flux-framework.org/qpu: "1"
-        limits:
-          fluxion.flux-framework.org/qpu: "1"
-
----
-# Worker pod — classical (no QPU). Gated by the webhook because it is a
-# non-leader member of a group whose leader is a quantum pod.
-apiVersion: v1
-kind: Pod
-metadata:
-  name: sidecar-test-worker
-  labels:
-    app: fluence-sidecar-test
-    fluence.flux-framework.org/group: sidecar-test-group
-spec:
-  schedulerName: fluence
-  restartPolicy: Never
-  containers:
-    - name: classical-worker
-      image: busybox
-      command:
-        - sh
-        - -c
-        - |
-          echo "classical-worker: started"
-          echo "TASK_ARN=$BRAKET_TASK_ARN"
-          sleep 10
-      env:
-        - name: FLUENCE_QUANTUM_JOB_ID
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.annotations['fluence.flux-framework.org/quantum-job-id']
-      resources:
-        requests:
-          cpu: "100m"
-          memory: "128Mi"
diff --git a/pkg/fluence/fluence.go b/pkg/fluence/fluence.go
index a1a10e1..6c3dc13 100644
--- a/pkg/fluence/fluence.go
+++ b/pkg/fluence/fluence.go
@@ -77,14 +77,61 @@ type Fluence struct {
 	mu sync.Mutex
 	// placement maps a group key to its allocation (nodes, backend, jobids).
 	placement map[string]groupAlloc
+	// excludedNodes maps a group key to the set of nodes that are GENUINELY
+	// INCOMPATIBLE with that group (PostFilter saw UnschedulableAndUnresolvable
+	// from another plugin: a taint, affinity, or constraint Fluxion's graph does
+	// not model). PreFilter feeds them back as an RFC 31 negated-hostlist
+	// constraint so the re-match is steered onto other nodes. Nodes that were
+	// merely BUSY are deliberately NOT recorded here (excluding them would turn
+	// transient contention into permanent group failure). The set only grows for a
+	// group, so the exclusion-driven re-match is finite, and it is cleared on
+	// teardown. Guarded by mu.
+	excludedNodes map[string]map[string]bool
 }
 
 var (
-	_ fwk.PreFilterPlugin = (*Fluence)(nil)
-	_ fwk.FilterPlugin    = (*Fluence)(nil)
-	_ fwk.PreBindPlugin   = (*Fluence)(nil)
+	_ fwk.PreFilterPlugin  = (*Fluence)(nil)
+	_ fwk.FilterPlugin     = (*Fluence)(nil)
+	_ fwk.PostFilterPlugin = (*Fluence)(nil)
+	_ fwk.ReservePlugin    = (*Fluence)(nil)
+	_ fwk.PreBindPlugin    = (*Fluence)(nil)
 )
 
+// schedulableNodes returns only the nodes a normal pod could actually be placed
+// on, so the Fluxion graph never offers a node that Kubernetes will then reject
+// in Filter. Two kinds are dropped:
+//
+//   - cordoned nodes (spec.unschedulable), and
+//   - nodes carrying a NoSchedule/NoExecute taint (e.g. the control-plane's
+//     node-role.kubernetes.io/control-plane:NoSchedule).
+//
+// Without this, Fluxion can place a gang slot on the control-plane (it looks like
+// a valid virtual=false compute node to the graph), the pod is then rejected by
+// TaintToleration with UnschedulableAndUnresolvable, and PostFilter abandons the
+// whole allocation — on a small cluster that strands the gang permanently. We do
+// not attempt to honor specific tolerations here: gang workloads in this setup do
+// not tolerate node taints, so any NoSchedule/NoExecute taint means "not for us".
+func schedulableNodes(nodes []corev1.Node) []corev1.Node {
+	out := make([]corev1.Node, 0, len(nodes))
+	for _, n := range nodes {
+		if n.Spec.Unschedulable {
+			continue
+		}
+		tainted := false
+		for _, t := range n.Spec.Taints {
+			if t.Effect == corev1.TaintEffectNoSchedule || t.Effect == corev1.TaintEffectNoExecute {
+				tainted = true
+				break
+			}
+		}
+		if tainted {
+			continue
+		}
+		out = append(out, n)
+	}
+	return out
+}
+
 // New builds the plugin: discover cluster nodes, optionally inject quantum
 // resources, write the JGF graph, initialize the Fluxion matcher, and register
 // the delete handlers that cancel allocations when their owning object is gone.
@@ -129,7 +176,7 @@ func New(ctx context.Context, _ runtime.Object, h fwk.Handle) (fwk.Plugin, error
 		}
 	}
 
-	jgfBytes, err := cluster.BuildGraph(nodeList.Items, opts)
+	jgfBytes, err := cluster.BuildGraph(schedulableNodes(nodeList.Items), opts)
 	if err != nil {
 		return nil, fmt.Errorf("build resource graph: %w", err)
 	}
@@ -161,10 +208,11 @@ func New(ctx context.Context, _ runtime.Object, h fwk.Handle) (fwk.Plugin, error
 	fluxion.Init(tmp.Name(), os.Getenv("FLUENCE_MATCH_POLICY"), "")
 
 	f := &Fluence{
-		handle:       h,
-		matcher:      fluxion,
-		knownDevices: knownDevices,
-		placement:    map[string]groupAlloc{},
+		handle:        h,
+		matcher:       fluxion,
+		knownDevices:  knownDevices,
+		placement:     map[string]groupAlloc{},
+		excludedNodes: map[string]map[string]bool{},
 	}
 	f.registerCancelHandlers()
 	// Periodic + startup reconcile of completed Fluence-created PodGroups, so a
@@ -251,7 +299,15 @@ func (f *Fluence) PreFilter(
 		return nil, fwk.AsStatus(err)
 	}
 
-	specs, err := placement.JobspecsForGroup(group, pods, f.knownDevices)
+	f.mu.Lock()
+	excluded := make([]string, 0, len(f.excludedNodes[group]))
+	for n := range f.excludedNodes[group] {
+		excluded = append(excluded, n)
+	}
+	f.mu.Unlock()
+	sort.Strings(excluded) // deterministic constraint for stable matching/logs
+
+	specs, err := placement.JobspecsForGroup(group, pods, f.knownDevices, excluded)
 	if err != nil {
 		return nil, fwk.AsStatus(err)
 	}
@@ -390,6 +446,103 @@ func (f *Fluence) Filter(
 	return fwk.NewStatus(fwk.Unschedulable, "node not in fluxion allocation for this group")
 }
 
+// PostFilter runs when a pod could not be scheduled after Filter — for a Fluence
+// group, this means the cached Fluxion allocation's nodes did not all survive the
+// other scheduler plugins' Filter checks. Without intervention the group would
+// retry forever against the same cached allocation while the Fluxion reservation
+// leaked, because PreFilter short-circuits on the cache and nothing else releases
+// it on a scheduling failure.
+//
+// We always abandon the failed allocation here (cancel the Fluxion jobids, drop
+// the cached placement) so the next PreFilter re-matches fresh. The careful part
+// is WHICH nodes we then permanently exclude from the group's future matches,
+// because a group reaches PostFilter for two very different reasons and they must
+// be handled oppositely (see fwk.Code docs):
+//
+//   - UnschedulableAndUnresolvable: the node genuinely cannot host this pod and
+//     re-trying it is pointless (a taint the pod does not tolerate, node affinity
+//     mismatch, a constraint Fluxion's graph does not model). EXCLUDE it; the
+//     next PreFilter feeds the exclusion set back as an RFC 31 negated-hostlist
+//     constraint so Fluxion is steered onto other nodes.
+//
+//   - Unschedulable (plain): the node could host the pod, just not at this
+//     instant (it is momentarily full). This is TRANSIENT. Do NOT exclude it —
+//     excluding a merely-busy node converts ordinary contention into permanent
+//     group failure, and in a saturated cluster (a gang that needs the whole node
+//     set) it strands the gang forever even though it would fit once a node frees.
+//
+// So contention excludes nothing and the group recovers by waiting/retrying;
+// only durable incompatibility accumulates in excludedNodes (cleared on group
+// teardown), which keeps the exclusion-driven re-match finite and correct.
+func (f *Fluence) PostFilter(
+	ctx context.Context,
+	state fwk.CycleState,
+	pod *corev1.Pod,
+	filteredNodeStatusMap fwk.NodeToStatusReader,
+) (*fwk.PostFilterResult, *fwk.Status) {
+	group := groupKey(pod)
+
+	f.mu.Lock()
+	alloc, ok := f.placement[group]
+	if !ok {
+		// No cached allocation for this group — nothing of ours to reconcile.
+		// (Another plugin's PostFilter, or a non-group pod.)
+		f.mu.Unlock()
+		return nil, fwk.NewStatus(fwk.Unschedulable)
+	}
+	// Exclude ONLY nodes that are genuinely incompatible with this pod, never
+	// nodes that were merely busy this cycle. The framework gives us a per-node
+	// status: UnschedulableAndUnresolvable means the node cannot host the pod and
+	// re-trying it is pointless (a taint the pod does not tolerate, node affinity
+	// mismatch, a constraint Fluxion's graph does not model) -> exclude it so the
+	// re-match is steered elsewhere. A plain Unschedulable means the node could
+	// host the pod but not right now (it is momentarily full) -> do NOT exclude
+	// it; it must stay eligible so the group can land there once capacity frees.
+	//
+	// This is the whole point: a group enters PostFilter for many reasons, and
+	// "the cluster is just full at this instant" is the common one. Permanently
+	// banning the busy nodes (the old whole-allocation exclusion) turned transient
+	// contention into permanent group failure — exactly backwards. Now contention
+	// excludes nothing; the group simply abandons this cycle's reservation and
+	// retries the same nodes when they free.
+	if f.excludedNodes[group] == nil {
+		f.excludedNodes[group] = map[string]bool{}
+	}
+	var incompatible, busy []string
+	for _, n := range alloc.place.Nodes {
+		var code fwk.Code
+		if filteredNodeStatusMap != nil {
+			if st := filteredNodeStatusMap.Get(n); st != nil {
+				code = st.Code()
+			}
+		}
+		if code == fwk.UnschedulableAndUnresolvable {
+			f.excludedNodes[group][n] = true
+			incompatible = append(incompatible, n)
+		} else {
+			// plain Unschedulable, Success, or unknown/nil -> transient, keep.
+			busy = append(busy, n)
+		}
+	}
+	excludedCount := len(f.excludedNodes[group])
+	jobids := alloc.jobids
+	delete(f.placement, group)
+	f.mu.Unlock()
+
+	// Release the Fluxion reservation for the abandoned allocation so the graph
+	// does not leak it while the group retries.
+	f.cancelJobids(jobids)
+
+	log.Printf("[fluence] group %s unschedulable: abandoning allocation (jobids %v); "+
+		"incompatible(excluded)=%v busy(retryable, NOT excluded)=%v; %d node(s) excluded total",
+		group, jobids, incompatible, busy, excludedCount)
+
+	// Returning Unschedulable (no nominated node) lets the pod be requeued; the
+	// next PreFilter re-matches (with any incompatible nodes excluded, but busy
+	// nodes still in play). Fluxion, not PostFilter preemption, chooses placement.
+	return nil, fwk.NewStatus(fwk.Unschedulable)
+}
+
 // PreBindPreFlight runs before PreBind. It returns Success when we have a cached
 // allocation for the pod's group (so PreBind can record the jobid, and stamp the
 // backend for a quantum pod), and Skip otherwise.
@@ -408,12 +561,59 @@ func (f *Fluence) PreBindPreFlight(
 	return nil, fwk.NewStatus(fwk.Success)
 }
 
+// Reserve stamps the chosen backend (and matched attributes) onto the pod as
+// early as possible — at reservation, in the scheduling cycle — rather than in
+// PreBind. The webhook injects FLUXION_BACKEND (and FLUXION_<ATTR>) as a
+// downward-API env sourced from these annotations; downward-API env is resolved
+// by the kubelet when the container starts and is NOT updated afterward, so the
+// annotation must be present well before the container starts. PreBind runs in
+// the (asynchronous) binding cycle, milliseconds before Bind, which races the
+// kubelet — Reserve runs earlier and synchronously, giving the annotation time
+// to propagate so the value reliably surfaces in the container.
+func (f *Fluence) Reserve(
+	ctx context.Context,
+	state fwk.CycleState,
+	pod *corev1.Pod,
+	nodeName string,
+) *fwk.Status {
+	if err := f.stampBackend(ctx, pod); err != nil {
+		return fwk.AsStatus(fmt.Errorf("stamp backend annotations: %w", err))
+	}
+	return fwk.NewStatus(fwk.Success)
+}
+
+// Unreserve is a no-op: a stale backend annotation from a reservation that was
+// later rejected is harmless (it is overwritten on the next attempt and the
+// value is correct for the allocation that produced it), and clearing it would
+// cost an extra API call. Required to satisfy fwk.ReservePlugin.
+func (f *Fluence) Unreserve(ctx context.Context, state fwk.CycleState, pod *corev1.Pod, nodeName string) {
+}
+
+// stampBackend writes the allocated backend name and matched attributes onto the
+// pod (idempotent merge patch). No-op when there is no cached allocation or the
+// allocation carries no backend (classical, non-quantum gangs).
+func (f *Fluence) stampBackend(ctx context.Context, pod *corev1.Pod) error {
+	f.mu.Lock()
+	alloc, ok := f.placement[groupKey(pod)]
+	f.mu.Unlock()
+	if !ok || alloc.place.Backend == "" {
+		return nil
+	}
+	ann := map[string]string{placement.BackendAnnotation: alloc.place.Backend}
+	for k, v := range alloc.place.BackendAttributes {
+		ann[placement.AttributeAnnotationPrefix+k] = v
+	}
+	log.Printf("[fluence] group %s -> backend %q attrs %v (reserve-stamped, nodes %v)",
+		groupKey(pod), alloc.place.Backend, alloc.place.BackendAttributes, alloc.place.Nodes)
+	return f.patchPodAnnotations(ctx, pod.Namespace, pod.Name, ann)
+}
+
 // PreBind records, in the commit phase, the durable state for this group:
-//   - the Fluxion jobid onto the owning object (the PodGroup for a gang, else the
-//     pod) so the allocation can be cancelled when that object is deleted;
-//   - for a quantum group, the allocated backend onto the pod, which the webhook-
-//     injected downward-API env surfaces as QRMI_BACKEND (container env is
-//     immutable post-creation, so the value must travel via an annotation).
+// the Fluxion jobid onto the owning object (the PodGroup for a gang, else the
+// pod) so the allocation can be cancelled when that object is deleted. The
+// backend annotation is stamped earlier, in Reserve (see stampBackend), because
+// the webhook-injected downward-API env (FLUXION_BACKEND) must be present before
+// the container starts; PreBind is too late and races the kubelet.
 func (f *Fluence) PreBind(
 	ctx context.Context,
 	state fwk.CycleState,
@@ -430,20 +630,10 @@ func (f *Fluence) PreBind(
 	if err := f.recordJobIDs(ctx, pod, alloc.jobids); err != nil {
 		return fwk.AsStatus(fmt.Errorf("record jobids: %w", err))
 	}
-	if alloc.place.Backend != "" {
-		// Stamp the backend name and all matched attributes in one patch. The
-		// webhook injects a normalized env per annotation so the workload reads
-		// exactly what it matched (backend + region/qubits/...).
-		ann := map[string]string{placement.BackendAnnotation: alloc.place.Backend}
-		for k, v := range alloc.place.BackendAttributes {
-			ann[placement.AttributeAnnotationPrefix+k] = v
-		}
-		log.Printf("[fluence] group %s -> backend %q attrs %v (nodes %v, jobids %v)",
-			groupKey(pod), alloc.place.Backend, alloc.place.BackendAttributes,
-			alloc.place.Nodes, alloc.jobids)
-		if err := f.patchPodAnnotations(ctx, pod.Namespace, pod.Name, ann); err != nil {
-			return fwk.AsStatus(fmt.Errorf("stamp backend annotations: %w", err))
-		}
+	// Backstop: if Reserve was skipped for any reason, ensure the backend is
+	// stamped before bind anyway (idempotent).
+	if err := f.stampBackend(ctx, pod); err != nil {
+		return fwk.AsStatus(fmt.Errorf("stamp backend annotations: %w", err))
 	}
 	return fwk.NewStatus(fwk.Success)
 }
@@ -637,6 +827,20 @@ func (f *Fluence) reconcileGroup(ctx context.Context, namespace, group string) {
 	}
 	log.Printf("fluence: reconciled completed gang %s/%s — deleted Fluence-created PodGroup, allocation freed",
 		namespace, group)
+
+	// Producer-group cleanup: in shared coordination the gang is split into the
+	// consumer group <group> (this group) and the producer's group-of-one
+	// <group>-producer (a Fluence-created PodGroup, minCount 1). The producer POD
+	// is a real member of the user's workload (indexed-Job index 0), so we must
+	// NOT delete it — only its Fluence-created PodGroup, as a backstop to free its
+	// allocation (its own reconcile pass also reaps it once the producer pod is
+	// terminal). Skip when this group is itself a producer group, to avoid
+	// recursing on <group>-producer-producer.
+	if !strings.HasSuffix(group, producerGroupSuffix) {
+		pg := group + producerGroupSuffix
+		_ = f.handle.ClientSet().SchedulingV1alpha2().PodGroups(namespace).Delete(ctx, pg, metav1.DeleteOptions{})
+		log.Printf("fluence: reaped producer PodGroup %s/%s for gang %s", namespace, pg, group)
+	}
 }
 
 // reconcileGraceForEmpty is how long a Fluence-created PodGroup with no live
@@ -648,6 +852,12 @@ const reconcileGraceForEmpty = 2 * time.Minute
 // package (the scheduler must not depend on the webhook). Kept in sync with it.
 const webhookGroupLabel = "fluence.flux-framework.org/group"
 
+// producerGroupSuffix mirrors handlers.ProducerGroupSuffix: in shared
+// coordination the producer (indexed-Job index 0) is its own group-of-one named
+// <g>-producer. Duplicated here to avoid importing the webhook handlers package
+// into the scheduler plugin; keep the two in sync.
+const producerGroupSuffix = "-producer"
+
 // onPodGroupDeleted frees the gang's allocation when its PodGroup is deleted.
 func (f *Fluence) onPodGroupDeleted(obj interface{}) {
 	pg, ok := obj.(*schedv1a2.PodGroup)
@@ -718,6 +928,7 @@ func (f *Fluence) cancelGroup(key string, ann map[string]string) {
 
 	f.mu.Lock()
 	delete(f.placement, key)
+	delete(f.excludedNodes, key) // drop accumulated exclusions so a future group reusing the name starts clean
 	f.mu.Unlock()
 }
 
diff --git a/pkg/fluence/fluence_test.go b/pkg/fluence/fluence_test.go
index 998e1a7..5228f97 100644
--- a/pkg/fluence/fluence_test.go
+++ b/pkg/fluence/fluence_test.go
@@ -1,6 +1,7 @@
 package fluence
 
 import (
+	"context"
 	"errors"
 	"testing"
 
@@ -12,6 +13,7 @@ import (
 	schedv1a2 "k8s.io/api/scheduling/v1alpha2"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/tools/cache"
+	fwk "k8s.io/kube-scheduler/framework"
 )
 
 // fakeMatcher records Cancel calls so cancel behavior can be asserted without
@@ -46,7 +48,11 @@ func (m *fakeMatcher) Cancel(jobid uint64) error {
 }
 
 func newTestFluence(m matcher) *Fluence {
-	return &Fluence{matcher: m, placement: map[string]groupAlloc{}}
+	return &Fluence{
+		matcher:       m,
+		placement:     map[string]groupAlloc{},
+		excludedNodes: map[string]map[string]bool{},
+	}
 }
 
 func ann(jobid string) map[string]string {
@@ -345,3 +351,205 @@ func twoSpecs() []*jobspec.Jobspec {
 		{Version: 9999},
 	}
 }
+
+// --- PostFilter allocation reconciliation -----------------------------------
+
+// fakeNodeStatus is a minimal fwk.NodeToStatusReader for PostFilter tests: it
+// maps node name -> status code so a test can mark some nodes incompatible
+// (UnschedulableAndUnresolvable) and others merely busy (Unschedulable).
+type fakeNodeStatus map[string]fwk.Code
+
+func (s fakeNodeStatus) Get(node string) *fwk.Status {
+	if c, ok := s[node]; ok {
+		return fwk.NewStatus(c)
+	}
+	return nil
+}
+func (s fakeNodeStatus) NodesForStatusCode(fwk.NodeInfoLister, fwk.Code) ([]fwk.NodeInfo, error) {
+	return nil, nil
+}
+
+// PostFilter abandons the failed allocation (cancel jobids, drop cache) and
+// excludes ONLY genuinely-incompatible nodes (UnschedulableAndUnresolvable).
+// A node that was merely busy (plain Unschedulable) MUST stay eligible.
+func TestPostFilterExcludesOnlyIncompatibleNodes(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	f.placement[key] = groupAlloc{
+		place:  placement.Placement{Nodes: []string{"node-a", "node-b", "node-c"}},
+		jobids: []uint64{11, 12},
+	}
+	pod := groupedPod("default", "training-0", "training", nil)
+
+	// node-a incompatible (taint); node-b busy; node-c survived Filter.
+	status := fakeNodeStatus{
+		"node-a": fwk.UnschedulableAndUnresolvable,
+		"node-b": fwk.Unschedulable,
+		"node-c": fwk.Success,
+	}
+
+	_, st := f.PostFilter(context.Background(), nil, pod, status)
+	if st == nil || st.Code() != fwk.Unschedulable {
+		t.Fatalf("expected Unschedulable status, got %v", st)
+	}
+	if _, still := f.placement[key]; still {
+		t.Fatal("placement cache should be deleted after PostFilter")
+	}
+	if len(m.cancelled) != 2 {
+		t.Fatalf("expected both jobids cancelled, got %v", m.cancelled)
+	}
+	excl := f.excludedNodes[key]
+	if !excl["node-a"] {
+		t.Fatalf("incompatible node-a should be excluded, set=%v", excl)
+	}
+	if excl["node-b"] || excl["node-c"] {
+		t.Fatalf("busy/ok nodes must NOT be excluded (would strand a saturated gang), set=%v", excl)
+	}
+	if len(excl) != 1 {
+		t.Fatalf("expected exactly 1 excluded node, got %v", excl)
+	}
+}
+
+// A group blocked purely by contention (every node merely busy) excludes NOTHING
+// so it can retry the same nodes once they free — the saturated-cluster property.
+func TestPostFilterContentionExcludesNothing(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	f.placement[key] = groupAlloc{
+		place:  placement.Placement{Nodes: []string{"node-a", "node-b"}},
+		jobids: []uint64{1},
+	}
+	pod := groupedPod("default", "training-0", "training", nil)
+	status := fakeNodeStatus{"node-a": fwk.Unschedulable, "node-b": fwk.Unschedulable}
+
+	f.PostFilter(context.Background(), nil, pod, status)
+
+	if len(f.excludedNodes[key]) != 0 {
+		t.Fatalf("a purely-busy group must exclude no nodes, got %v", f.excludedNodes[key])
+	}
+	if _, still := f.placement[key]; still {
+		t.Fatal("placement cache should be deleted even when nothing is excluded")
+	}
+	if len(m.cancelled) != 1 {
+		t.Fatalf("expected the jobid cancelled, got %v", m.cancelled)
+	}
+}
+
+// A nil status map (e.g. all nodes filtered out upstream) must be safe and
+// exclude nothing rather than panic or ban the whole allocation.
+func TestPostFilterNilStatusMapExcludesNothing(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	f.placement[key] = groupAlloc{place: placement.Placement{Nodes: []string{"node-a", "node-b"}}, jobids: []uint64{7}}
+	pod := groupedPod("default", "training-0", "training", nil)
+
+	_, st := f.PostFilter(context.Background(), nil, pod, nil)
+	if st == nil || st.Code() != fwk.Unschedulable {
+		t.Fatalf("expected Unschedulable, got %v", st)
+	}
+	if len(f.excludedNodes[key]) != 0 {
+		t.Fatalf("nil status map must exclude nothing, got %v", f.excludedNodes[key])
+	}
+}
+
+// Incompatible nodes accumulate across attempts; busy ones never do.
+func TestPostFilterAccumulatesIncompatibleAcrossAttempts(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	pod := groupedPod("default", "training-0", "training", nil)
+
+	f.placement[key] = groupAlloc{place: placement.Placement{Nodes: []string{"node-a", "node-b"}}, jobids: []uint64{1}}
+	f.PostFilter(context.Background(), nil, pod, fakeNodeStatus{"node-a": fwk.UnschedulableAndUnresolvable, "node-b": fwk.Unschedulable})
+	f.placement[key] = groupAlloc{place: placement.Placement{Nodes: []string{"node-c", "node-d"}}, jobids: []uint64{2}}
+	f.PostFilter(context.Background(), nil, pod, fakeNodeStatus{"node-c": fwk.UnschedulableAndUnresolvable, "node-d": fwk.Unschedulable})
+
+	excl := f.excludedNodes[key]
+	for _, n := range []string{"node-a", "node-c"} {
+		if !excl[n] {
+			t.Fatalf("incompatible %s should accumulate, got %v", n, excl)
+		}
+	}
+	if excl["node-b"] || excl["node-d"] {
+		t.Fatalf("busy nodes must never accumulate, got %v", excl)
+	}
+	if len(excl) != 2 {
+		t.Fatalf("exclusion set should be the 2 incompatible nodes, got %v", excl)
+	}
+}
+
+// PostFilter on a group with no cached allocation (not ours, or already cleared)
+// is a safe no-op: no panic, no cancel, returns Unschedulable.
+func TestPostFilterUnknownGroupNoop(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	pod := groupedPod("default", "stranger-0", "stranger", nil)
+
+	_, status := f.PostFilter(context.Background(), nil, pod, nil)
+	if status == nil || status.Code() != fwk.Unschedulable {
+		t.Fatalf("expected Unschedulable, got %v", status)
+	}
+	if len(m.cancelled) != 0 {
+		t.Fatalf("nothing should be cancelled for an unknown group, got %v", m.cancelled)
+	}
+	if len(f.excludedNodes) != 0 {
+		t.Fatalf("no exclusion set should be created for an unknown group, got %v", f.excludedNodes)
+	}
+}
+
+// Teardown (cancelGroup) must clear the exclusion set so a future group reusing
+// the same key does not inherit stale exclusions.
+func TestCancelGroupClearsExclusions(t *testing.T) {
+	m := &fakeMatcher{}
+	f := newTestFluence(m)
+	key := "default/training"
+	f.placement[key] = groupAlloc{jobids: []uint64{9}}
+	f.excludedNodes[key] = map[string]bool{"node-a": true}
+
+	f.cancelGroup(key, ann("9"))
+
+	if _, still := f.excludedNodes[key]; still {
+		t.Fatal("exclusion set should be cleared on teardown")
+	}
+}
+
+// schedulableNodes must drop control-plane (NoSchedule taint), NoExecute-tainted,
+// and cordoned nodes, keeping only nodes a normal gang pod can actually land on.
+// This keeps the Fluxion graph from offering nodes Kubernetes will reject in
+// Filter (which, with whole-allocation PostFilter exclusion, strands the gang).
+func TestSchedulableNodesDropsTaintedAndCordoned(t *testing.T) {
+	node := func(name string, unsched bool, effects ...corev1.TaintEffect) corev1.Node {
+		n := corev1.Node{}
+		n.Name = name
+		n.Spec.Unschedulable = unsched
+		for _, e := range effects {
+			n.Spec.Taints = append(n.Spec.Taints, corev1.Taint{Key: "k", Effect: e})
+		}
+		return n
+	}
+	in := []corev1.Node{
+		node("worker-1", false),
+		node("worker-2", false),
+		node("control-plane", false, corev1.TaintEffectNoSchedule),
+		node("draining", false, corev1.TaintEffectNoExecute),
+		node("cordoned", true),
+		node("prefer-only", false, corev1.TaintEffectPreferNoSchedule), // soft taint: keep
+	}
+	got := schedulableNodes(in)
+	gotNames := map[string]bool{}
+	for _, n := range got {
+		gotNames[n.Name] = true
+	}
+	want := []string{"worker-1", "worker-2", "prefer-only"}
+	if len(got) != len(want) {
+		t.Fatalf("expected %d schedulable nodes %v, got %d %v", len(want), want, len(got), gotNames)
+	}
+	for _, w := range want {
+		if !gotNames[w] {
+			t.Fatalf("expected %s kept, got set %v", w, gotNames)
+		}
+	}
+}
diff --git a/pkg/placement/placement.go b/pkg/placement/placement.go
index 554f319..c7f76de 100644
--- a/pkg/placement/placement.go
+++ b/pkg/placement/placement.go
@@ -214,14 +214,36 @@ func withEntries(counts map[string]int) []jobspec.Resource {
 // allocation (duration 0 runs to graph end) plus an RFC 31 property constraint
 // selecting the eligible node set. properties is the AND-set of composed
 // key=value property strings a matched node must carry.
-func systemAttributes(properties []string) map[string]interface{} {
+func systemAttributes(properties []string, excludeNodes []string) map[string]interface{} {
+	// Base property constraint (the eligible-node property AND-set).
+	constraints := map[string]interface{}{
+		"properties": properties,
+	}
+	// When a group has had a placement rejected by other scheduler plugins
+	// (taints, affinity, volume topology that Fluxion's graph does not model),
+	// PostFilter accumulates the rejected hostnames and we AND in an RFC 31
+	// negated hostlist so the re-match is forced onto untried nodes. RFC 31 is
+	// JsonLogic-style ({operator:[values]}, one operator per object), so to AND
+	// two operators we nest them under an explicit `and`. We only do this when
+	// there is something to exclude, so the no-exclusion jobspec is byte-for-byte
+	// what it was before (and existing tests/behavior are unchanged).
+	if len(excludeNodes) > 0 {
+		constraints = map[string]interface{}{
+			"and": []interface{}{
+				map[string]interface{}{"properties": properties},
+				map[string]interface{}{
+					"not": []interface{}{
+						map[string]interface{}{"hostlist": excludeNodes},
+					},
+				},
+			},
+		}
+	}
 	return map[string]interface{}{
 		"system": map[string]interface{}{
 			// duration 0 => hold the allocation until we explicitly Cancel.
-			"duration": 0,
-			"constraints": map[string]interface{}{
-				"properties": properties,
-			},
+			"duration":    0,
+			"constraints": constraints,
 		},
 	}
 }
@@ -229,7 +251,7 @@ func systemAttributes(properties []string) map[string]interface{} {
 // computeJobspec builds the physical-compute jobspec for a group: one slot per
 // pod holding the compute resources, constrained to virtual=false nodes. This is
 // the only jobspec for a group that requests no virtual devices.
-func computeJobspec(groupName string, slots int, compute map[string]int) *jobspec.Jobspec {
+func computeJobspec(groupName string, slots int, compute map[string]int, excludeNodes []string) *jobspec.Jobspec {
 	return &jobspec.Jobspec{
 		Version: 9999,
 		Resources: []jobspec.Resource{{
@@ -238,7 +260,7 @@ func computeJobspec(groupName string, slots int, compute map[string]int) *jobspe
 			Label: "default",
 			With:  withEntries(compute),
 		}},
-		Attributes: systemAttributes([]string{VirtualPropertyFalse}),
+		Attributes: systemAttributes([]string{VirtualPropertyFalse}, excludeNodes),
 		Tasks: []jobspec.Task{{
 			Command: []string{groupName},
 			Slot:    "default",
@@ -272,7 +294,7 @@ func deviceJobspec(groupName, deviceType string, count int, extraProps []string)
 			Label: "device",
 			With:  []jobspec.Resource{{Type: "node", Count: count}},
 		}},
-		Attributes: systemAttributes(props),
+		Attributes: systemAttributes(props, nil),
 		Tasks: []jobspec.Task{{
 			Command: []string{groupName},
 			Slot:    "device",
@@ -299,6 +321,7 @@ func JobspecsForGroup(
 	groupName string,
 	pods []corev1.Pod,
 	knownDevices map[string]bool,
+	excludeNodes []string,
 ) ([]*jobspec.Jobspec, error) {
 	if len(pods) == 0 {
 		return nil, fmt.Errorf("pod group %q has no pods", groupName)
@@ -321,7 +344,7 @@ func JobspecsForGroup(
 		}
 	}
 
-	specs := []*jobspec.Jobspec{computeJobspec(groupName, len(pods), compute)}
+	specs := []*jobspec.Jobspec{computeJobspec(groupName, len(pods), compute, excludeNodes)}
 
 	// Deterministic device order for stable output.
 	deviceTypes := make([]string, 0, len(devices))
diff --git a/pkg/placement/placement_test.go b/pkg/placement/placement_test.go
index 33786c8..fe68917 100644
--- a/pkg/placement/placement_test.go
+++ b/pkg/placement/placement_test.go
@@ -64,7 +64,7 @@ func TestClassicalSingleMatch(t *testing.T) {
 		podWith("p0", corev1.ResourceList{corev1.ResourceCPU: qty(4), "nvidia.com/gpu": qty(1)}),
 		podWith("p1", corev1.ResourceList{corev1.ResourceCPU: qty(4), "nvidia.com/gpu": qty(1)}),
 	}
-	specs, err := JobspecsForGroup("grp", pods, nil)
+	specs, err := JobspecsForGroup("grp", pods, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -101,7 +101,7 @@ func TestGroupDeviceMatchWhenLeaderNotFirst(t *testing.T) {
 	})
 	// Leader deliberately placed last.
 	pods := []corev1.Pod{worker, worker, leader}
-	specs, err := JobspecsForGroup("qgrp", pods, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("qgrp", pods, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -132,7 +132,7 @@ func qpuPodWithRequires(name string, requires map[string]string) corev1.Pod {
 // constraints, nothing extra (over-constraining would break unconstrained runs).
 func TestNoRequireAnnotationsAddsNoConstraints(t *testing.T) {
 	p := qpuPodWithRequires("q", nil)
-	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -145,7 +145,7 @@ func TestNoRequireAnnotationsAddsNoConstraints(t *testing.T) {
 // Exactly one require- constraint.
 func TestSingleRequireConstraint(t *testing.T) {
 	p := qpuPodWithRequires("q", map[string]string{"qrmi_type": "braket-gate"})
-	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -169,7 +169,7 @@ func TestMultipleRequireConstraintsAreDeduped(t *testing.T) {
 	// a worker that happens to repeat one of the same require- annotations
 	worker := qpuPodWithRequires("w0", map[string]string{"vendor": "amazon"})
 	specs, err := JobspecsForGroup("g", []corev1.Pod{leader, worker},
-		map[string]bool{"qpu": true})
+		map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -211,7 +211,7 @@ func TestRequireAnnotationConstrainsDevice(t *testing.T) {
 	leader.Annotations[RequireAnnotationPrefix+"vendor"] = "amazon"
 
 	specs, err := JobspecsForGroup("qgrp", []corev1.Pod{leader},
-		map[string]bool{"qpu": true})
+		map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -232,7 +232,7 @@ func TestDeviceProducesSecondMatch(t *testing.T) {
 		FluxionResourcePrefix + "qpu": qty(1),
 	})
 	known := map[string]bool{"qpu": true}
-	specs, err := JobspecsForGroup("qgrp", []corev1.Pod{p}, known)
+	specs, err := JobspecsForGroup("qgrp", []corev1.Pod{p}, known, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -274,7 +274,7 @@ func TestDeviceProducesSecondMatch(t *testing.T) {
 // node), so there are two matches: compute (core=1, virtual=false) and device.
 func TestDeviceOnlyStillForcesCompute(t *testing.T) {
 	p := podWith("q", corev1.ResourceList{FluxionResourcePrefix + "qpu": qty(1)})
-	specs, err := JobspecsForGroup("qonly", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("qonly", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -289,7 +289,7 @@ func TestDeviceOnlyStillForcesCompute(t *testing.T) {
 // Requesting a device type the graph does not model is a hard error.
 func TestUnknownDeviceErrors(t *testing.T) {
 	p := podWith("q", corev1.ResourceList{FluxionResourcePrefix + "fpga": qty(1)})
-	_, err := JobspecsForGroup("grp", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	_, err := JobspecsForGroup("grp", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err == nil {
 		t.Fatal("expected an error for an unmodeled device type")
 	}
@@ -301,7 +301,7 @@ func TestHoldDurationZero(t *testing.T) {
 		corev1.ResourceCPU:            qty(1),
 		FluxionResourcePrefix + "qpu": qty(1),
 	})
-	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true})
+	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, map[string]bool{"qpu": true}, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -366,3 +366,76 @@ func TestPlacementUnmarkedNodeIsCompute(t *testing.T) {
 		t.Fatalf("unmarked node should not be a backend, got %q", p.Backend)
 	}
 }
+
+// When excludeNodes is non-empty, the compute jobspec's constraint must AND the
+// base properties with an RFC 31 negated hostlist, so a re-match avoids the
+// rejected nodes. When empty, the constraint must be the plain properties form
+// (byte-for-byte the pre-exclusion behavior).
+func TestExcludeNodesAddsNegatedHostlist(t *testing.T) {
+	p := podWith("p", corev1.ResourceList{corev1.ResourceCPU: qty(1)})
+
+	// no exclusion -> plain properties, no `and`/`not`
+	specs, err := JobspecsForGroup("g", []corev1.Pod{p}, nil, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	cons := computeConstraints(t, specs[0])
+	if _, hasAnd := cons["and"]; hasAnd {
+		t.Fatalf("no-exclusion constraint must not use `and`: %#v", cons)
+	}
+	if _, hasProps := cons["properties"]; !hasProps {
+		t.Fatalf("no-exclusion constraint must have plain properties: %#v", cons)
+	}
+
+	// with exclusion -> and[ properties, not[ hostlist ] ]
+	specs, err = JobspecsForGroup("g", []corev1.Pod{p}, nil, []string{"node-b", "node-c"})
+	if err != nil {
+		t.Fatal(err)
+	}
+	cons = computeConstraints(t, specs[0])
+	andTerms, ok := cons["and"].([]interface{})
+	if !ok || len(andTerms) != 2 {
+		t.Fatalf("exclusion constraint must be `and` of 2 terms: %#v", cons)
+	}
+	// find the not/hostlist term
+	foundHostlist := false
+	for _, term := range andTerms {
+		tm, _ := term.(map[string]interface{})
+		notTerm, ok := tm["not"].([]interface{})
+		if !ok || len(notTerm) == 0 {
+			continue
+		}
+		inner, _ := notTerm[0].(map[string]interface{})
+		hl, ok := inner["hostlist"].([]string)
+		if !ok {
+			// json round-trip may make it []interface{}; accept both
+			if hlAny, ok2 := inner["hostlist"].([]interface{}); ok2 {
+				if len(hlAny) == 2 {
+					foundHostlist = true
+				}
+			}
+			continue
+		}
+		if len(hl) == 2 {
+			foundHostlist = true
+		}
+	}
+	if !foundHostlist {
+		t.Fatalf("exclusion constraint must contain not[hostlist[2 nodes]]: %#v", cons)
+	}
+}
+
+// computeConstraints digs out attributes.system.constraints from the compute
+// jobspec (the first spec; device specs do not carry node exclusions).
+func computeConstraints(t *testing.T, spec *jobspec.Jobspec) map[string]interface{} {
+	t.Helper()
+	sys, ok := spec.Attributes["system"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("no system attributes: %#v", spec.Attributes)
+	}
+	cons, ok := sys["constraints"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("no constraints: %#v", sys)
+	}
+	return cons
+}
diff --git a/pkg/webhook/handler.go b/pkg/webhook/handler.go
index 82a1227..61b97b1 100644
--- a/pkg/webhook/handler.go
+++ b/pkg/webhook/handler.go
@@ -25,34 +25,32 @@ type MutatorAPI interface {
 	// InjectedEnv is the FLUXION_* env contract the scheduler/webhook supplies.
 	InjectedEnv() []corev1.EnvVar
 
-	// PodGroup operations (gang scheduling). Group identity is the value of the
-	// group label, which the core treats as an opaque string.
-	PodGroupLeader(ctx context.Context, namespace, group string) string
-	EnsurePodGroup(ctx context.Context, namespace, group, leaderPod string)
-	RecordLeader(ctx context.Context, namespace, group, leaderPod string)
-
-	// EnsureSidecarRBAC provisions the per-namespace ServiceAccount/Role/Binding
-	// the sidecar needs.
-	EnsureSidecarRBAC(ctx context.Context, namespace string)
-
-	// InterceptorOps stages the fluence package into the quantum container via an
-	// init container + shared volume on PYTHONPATH (Model C). SidecarContainerOps
-	// adds the sidecar container (observe=true => observe-only telemetry mode).
-	InterceptorOps(pod *corev1.Pod) []spec.Op
-	SidecarContainerOps(pod *corev1.Pod, observe bool) []spec.Op
+	// EnsurePodGroup creates the group's PodGroup with the given gang minCount if
+	// it does not already exist (idempotent). Group identity is the opaque value
+	// of the group label. creatorPod is recorded only as the PodGroup's creator
+	// reference; the core ascribes no role semantics to it.
+	EnsurePodGroup(ctx context.Context, namespace, group, creatorPod string, minCount int32)
 }
 
 // Handler inspects a pod and, when it applies, contributes JSON patch ops. A pod
 // flows through every registered handler whose Applies returns true; their ops
 // are concatenated. Applies is fully general — it receives the pod and the
-// MutatorAPI, so a handler may consult cluster state (e.g. resolve a group's
-// leader) in deciding whether it applies.
+// MutatorAPI, so a handler may consult cluster state in deciding whether it
+// applies.
 type Handler interface {
 	Name() string
 	Applies(ctx context.Context, m MutatorAPI, pod *corev1.Pod) bool
 	Mutate(ctx context.Context, m MutatorAPI, pod *corev1.Pod) []spec.Op
 }
 
+// DefaultHandlerOrder is the active set AND the dispatch order when the operator
+// passes no --handlers flag. Order matters: specific handlers run before the
+// generic gang fallback, so "gang" is LAST — it applies default gang sizing
+// (group-size annotation or owner-derived N) only if no earlier handler already
+// shaped the gang. To change the order or disable a handler, pass a different
+// list (e.g. --handlers=fluxion,gang drops quantum).
+var DefaultHandlerOrder = []string{"fluxion", "quantum", "gang"}
+
 // ── registration ────────────────────────────────────────────────────────────────
 //
 // Handlers self-register via Register() from their package's init(). The core
@@ -60,15 +58,57 @@ type Handler interface {
 // webhook server wiring) is what populates the registry. This keeps the core
 // domain-agnostic: adding or removing a handler does not touch core code.
 
-var registry []Handler
+// available maps a handler's Name() to the handler. Populated by Register() from
+// each handler package's init(). This is the set of handlers that EXIST; which
+// ones actually run, and in what order, is decided by activeOrder.
+var available = map[string]Handler{}
+
+// activeOrder is the ordered list of handler names to dispatch. It is BOTH the
+// selection (names not present are disabled) and the order (dispatch follows the
+// slice). Defaults to DefaultHandlerOrder; overridden by SetActiveHandlers.
+var activeOrder = append([]string(nil), DefaultHandlerOrder...)
 
-// Register adds a handler to the global set. Called from handler packages'
-// init(). Order of registration is the order handlers run.
+// Register adds a handler to the available set under its Name(). Called from
+// handler packages' init().
 func Register(h Handler) {
-	registry = append(registry, h)
+	available[h.Name()] = h
+}
+
+// SetActiveHandlers sets the active, ordered handler list (the --handlers value).
+// Empty/nil restores DefaultHandlerOrder. Names with no registered handler are
+// dropped and returned as `unknown` so the caller can warn. Order is preserved
+// exactly as given — the list is the dispatch order.
+func SetActiveHandlers(names []string) (active, unknown []string) {
+	if len(names) == 0 {
+		activeOrder = append([]string(nil), DefaultHandlerOrder...)
+		return activeOrder, nil
+	}
+	var ordered []string
+	for _, n := range names {
+		if _, ok := available[n]; ok {
+			ordered = append(ordered, n)
+		} else {
+			unknown = append(unknown, n)
+		}
+	}
+	activeOrder = ordered
+	return activeOrder, unknown
+}
+
+// ActiveHandlerNames returns the active dispatch order (for logging at startup).
+func ActiveHandlerNames() []string {
+	return append([]string(nil), activeOrder...)
 }
 
-// registered returns the registered handlers (the live registry).
+// registered returns the active handlers, resolved from activeOrder, in order.
+// Names in the order with no registered handler are skipped (already warned at
+// SetActiveHandlers time).
 func registered() []Handler {
-	return registry
+	out := make([]Handler, 0, len(activeOrder))
+	for _, n := range activeOrder {
+		if h, ok := available[n]; ok {
+			out = append(out, h)
+		}
+	}
+	return out
 }
diff --git a/pkg/webhook/handlers/dependency.go b/pkg/webhook/handlers/dependency.go
new file mode 100644
index 0000000..d25d598
--- /dev/null
+++ b/pkg/webhook/handlers/dependency.go
@@ -0,0 +1,131 @@
+package handlers
+
+import (
+	"github.com/converged-computing/fluence/pkg/webhook/spec"
+
+	corev1 "k8s.io/api/core/v1"
+)
+
+// Dependency is Fluence's GENERAL "this set of pods must wait for a producer to
+// be ready" primitive. It is deliberately NOT quantum-specific: quantum is the
+// first resource type to use it (a gang waits for a quantum submission to reach
+// the device queue), but the same primitive applies to any resource type whose
+// readiness is produced out-of-band — a license server, a data stage-in job, a
+// warmed cache, another gang, etc.
+//
+// A Dependency has three parts, each carried as a pod annotation so the
+// relationship lives at the GROUP level (not duplicated as bespoke per-resource
+// fields) and is readable by both the webhook (at admission) and the scheduler
+// (in its reconcile loop):
+//
+//   - Kind:     what KIND of readiness this is (the resource type's name). The
+//     producer side knows how to satisfy this kind; the consumer side
+//     only knows it must wait. Quantum's kind is "quantum-submit".
+//   - Producer: the identity of the thing that will signal ready. For quantum it
+//     is the submitter's (base) group; generally it is whatever the
+//     kind's handler records as the satisfier.
+//   - Gate:     the scheduling gate held on the dependent (consumer) pods until
+//     the producer signals ready. Removing the gate is the "ungate"
+//     and is performed by whatever observes the producer's readiness
+//     (the quantum sidecar for kind=quantum-submit; the scheduler's
+//     reconcile loop for kinds whose readiness is in-cluster, e.g.
+//     "another gang is Running").
+//
+// The webhook PRODUCES a Dependency (gates the consumers, stamps the
+// annotations); REMOVING the gate is owned by the observer best placed to see
+// the producer's readiness. That split — declare here, observe elsewhere — is
+// what keeps the primitive general: a new resource type adds a Kind and an
+// observer and reuses the gating/annotation machinery unchanged.
+type Dependency struct {
+	Kind     string // resource-type readiness kind, e.g. "quantum-submit"
+	Producer string // identity of the readiness producer (e.g. the base group)
+	Gate     string // scheduling gate held on dependents until ready
+}
+
+// Dependency annotation keys (stamped on the dependent pods). Generic — no
+// quantum in the names, so any resource type reuses them.
+const (
+	// DependsOnKindAnnotation names the readiness kind the dependent waits for.
+	DependsOnKindAnnotation = "fluence.flux-framework.org/depends-on-kind"
+	// DependsOnProducerAnnotation names the producer expected to signal ready.
+	DependsOnProducerAnnotation = "fluence.flux-framework.org/depends-on-producer"
+	// DependsOnGateAnnotation records which scheduling gate encodes the wait, so
+	// an observer knows exactly which gate to remove when the producer is ready.
+	DependsOnGateAnnotation = "fluence.flux-framework.org/depends-on-gate"
+)
+
+// applyOps gates the dependent pod and stamps the dependency annotations so the
+// relationship is self-describing on the pod. It reuses the gate machinery
+// (gateWithName) verbatim — the gate is the universal "held until ready"
+// mechanism regardless of resource type — so a new Kind costs only its readiness
+// observer, not new gating code.
+func (d Dependency) applyOps(pod *corev1.Pod) []spec.Op {
+	ops := gateWithName(pod, d.Gate)
+	ops = append(ops, annotateOp(pod, DependsOnKindAnnotation, d.Kind)...)
+	ops = append(ops, annotateOp(pod, DependsOnProducerAnnotation, d.Producer)...)
+	ops = append(ops, annotateOp(pod, DependsOnGateAnnotation, d.Gate)...)
+	return ops
+}
+
+// DependencyOf reads a dependent pod's declared Dependency, or ok=false if it
+// carries none. The scheduler's reconcile loop and the sidecar use this to learn
+// what a gated pod is waiting for without hardcoding a kind.
+func DependencyOf(pod *corev1.Pod) (Dependency, bool) {
+	kind := spec.Annotation(pod, DependsOnKindAnnotation)
+	if kind == "" {
+		return Dependency{}, false
+	}
+	return Dependency{
+		Kind:     kind,
+		Producer: spec.Annotation(pod, DependsOnProducerAnnotation),
+		Gate:     spec.Annotation(pod, DependsOnGateAnnotation),
+	}, true
+}
+
+// annotateOp adds a single metadata annotation (creating the annotations map if
+// the pod has none). The key is JSON-Pointer-escaped so slashes are handled.
+func annotateOp(pod *corev1.Pod, key, value string) []spec.Op {
+	if value == "" {
+		return nil
+	}
+	if pod.Annotations == nil {
+		return []spec.Op{{
+			Op:    "add",
+			Path:  "/metadata/annotations",
+			Value: map[string]string{key: value},
+		}}
+	}
+	return []spec.Op{{
+		Op:    "add",
+		Path:  "/metadata/annotations/" + escapeJSONPointer(key),
+		Value: value,
+	}}
+}
+
+// gateWithName adds a named scheduling gate (idempotent) and raises priority for
+// the held pod, generalizing the quantum gating to ANY gate name so the
+// dependency primitive is not tied to the quantum gate.
+func gateWithName(pod *corev1.Pod, gateName string) []spec.Op {
+	for _, g := range pod.Spec.SchedulingGates {
+		if g.Name == gateName {
+			return nil
+		}
+	}
+	var ops []spec.Op
+	gate := corev1.PodSchedulingGate{Name: gateName}
+	if len(pod.Spec.SchedulingGates) == 0 {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGates", Value: []corev1.PodSchedulingGate{gate}})
+	} else {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGates/-", Value: gate})
+	}
+	// Gated dependents schedule reliably once ungated only if they outrank other
+	// pending work; priorityClassName is immutable post-creation so it must be
+	// set now. Don't override a user's explicit class. spec.priority is cleared
+	// to null so the priority admission controller recomputes it from the class
+	// (add-null is valid whether the field is absent, 0, or set).
+	if pod.Spec.PriorityClassName == "" {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/priorityClassName", Value: QuantumClassicalPriorityClass})
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/priority", EmitNull: true})
+	}
+	return ops
+}
diff --git a/pkg/webhook/handlers/gang.go b/pkg/webhook/handlers/gang.go
index a6c6126..8ba83f3 100644
--- a/pkg/webhook/handlers/gang.go
+++ b/pkg/webhook/handlers/gang.go
@@ -2,11 +2,14 @@ package handlers
 
 import (
 	"context"
+	"log"
+	"strconv"
 
 	"github.com/converged-computing/fluence/pkg/webhook"
 	"github.com/converged-computing/fluence/pkg/webhook/spec"
 
 	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
 func init() {
@@ -14,7 +17,7 @@ func init() {
 }
 
 // gangHandler gang-schedules pods that carry the group label: it creates a
-// Fluence-owned PodGroup (first pod admitted becomes the recorded leader) and
+// Fluence-owned PodGroup and
 // links every pod to it via spec.schedulingGroup.podGroupName, which is the
 // field the scheduler gangs by. It knows nothing about quantum — a purely
 // classical gang is fully handled here, with no sidecar.
@@ -23,20 +26,88 @@ type gangHandler struct{}
 func (h *gangHandler) Name() string { return "gang" }
 
 func (h *gangHandler) Applies(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) bool {
+	// Classical gangs only. A pod that requests the quantum resource is gang-
+	// scheduled by the quantum handler, which owns the producer/consumer split and
+	// creates both the <group>-producer and <group> PodGroups itself; handling it
+	// here too would create a second, conflicting PodGroup for the group.
+	if spec.PodRequestsResource(pod, QuantumResource) {
+		return false
+	}
 	return webhook.GroupName(pod) != ""
 }
 
 func (h *gangHandler) Mutate(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) []spec.Op {
 	g := webhook.GroupName(pod)
-	// First pod admitted in the group creates the PodGroup and is recorded as
-	// the admission-order leader. All pods are linked to the group.
-	if m.PodGroupLeader(ctx, pod.Namespace, g) == "" {
-		m.EnsurePodGroup(ctx, pod.Namespace, g, pod.Name)
-		m.RecordLeader(ctx, pod.Namespace, g, pod.Name)
-	}
+	// Ensure the group's PodGroup exists with the resolved gang size, and link
+	// this pod to it. EnsurePodGroup is idempotent (no-ops if the PodGroup
+	// already exists — e.g. created by an earlier, more specific handler), so we
+	// call it unconditionally. The gang handler knows nothing about quantum or
+	// submitters; that is the quantum handler's concern.
+	// minCount = full gang size N (group-size annotation, else owner-derived);
+	// see resolveMinCount.
+	m.EnsurePodGroup(ctx, pod.Namespace, g, pod.Name, resolveMinCount(ctx, m, pod))
 	return schedulingGroupOps(pod, g)
 }
 
+// resolveMinCount determines the gang's atomic-schedule size N:
+//  1. explicit group-size annotation -> honor it verbatim. This is the override
+//     for when minCount must differ from the parent's replica count (e.g. the
+//     quantum leader/worker split, where the gang's N is expressed directly).
+//  2. otherwise derive from the OWNING object: a Flux Operator MiniCluster pod
+//     is owned by an indexed Job whose parallelism == completions == size == N.
+//     (The operator sets Parallelism = Completions = MiniCluster.Spec.Size.)
+//  3. otherwise default to 1, logged — never silently size a multi-pod gang to 1.
+//
+// The leader/worker (quantum) split is orthogonal and unchanged: it is driven by
+// QuantumResource in the quantum handler. minCount is always the
+// FULL gang N regardless of which pods get gated.
+func resolveMinCount(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) int32 {
+	// 1. explicit override
+	if pod.Annotations != nil {
+		if n := pod.Annotations[webhook.GroupSizeAnnotation]; n != "" {
+			if v, err := strconv.Atoi(n); err == nil && v > 0 {
+				return int32(v)
+			}
+		}
+	}
+	// 2. derive from the owning Job's parallelism
+	if n := ownerJobN(ctx, m, pod); n > 0 {
+		return n
+	}
+	// 3. no signal: a single-pod gang. Log so a missing size on a multi-pod
+	// workload is visible rather than a silent gang-of-1.
+	log.Printf("[fluence-webhook] group %s: no group-size annotation and no owning Job parallelism; defaulting minCount=1", webhook.GroupName(pod))
+	return 1
+}
+
+// ownerJobN returns the parallelism (== size N) of the indexed Job that owns the
+// pod, or 0 if there is no such owner. The Flux Operator sets a MiniCluster's
+// Job Parallelism == Completions == size, so this is the full gang size N.
+// Shared by the gang handler (classical: minCount = N) and the quantum handler
+// (split: leader group = 1, worker group = N-1).
+func ownerJobN(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) int32 {
+	c := m.Client()
+	if c == nil {
+		return 0
+	}
+	for _, ref := range pod.OwnerReferences {
+		if ref.Kind != "Job" {
+			continue
+		}
+		job, err := c.BatchV1().Jobs(pod.Namespace).Get(ctx, ref.Name, metav1.GetOptions{})
+		if err != nil {
+			return 0
+		}
+		if job.Spec.Parallelism != nil && *job.Spec.Parallelism > 0 {
+			return *job.Spec.Parallelism
+		}
+		if job.Spec.Completions != nil && *job.Spec.Completions > 0 {
+			return *job.Spec.Completions
+		}
+	}
+	return 0
+}
+
 // schedulingGroupOps links a pod to its PodGroup via the native 1.36 field
 // spec.schedulingGroup.podGroupName. Idempotent if already linked.
 func schedulingGroupOps(pod *corev1.Pod, group string) []spec.Op {
diff --git a/pkg/webhook/handlers/gang_test.go b/pkg/webhook/handlers/gang_test.go
new file mode 100644
index 0000000..ac027f8
--- /dev/null
+++ b/pkg/webhook/handlers/gang_test.go
@@ -0,0 +1,153 @@
+/*
+Copyright 2024 Lawrence Livermore National Security, LLC
+ (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+SPDX-License-Identifier: Apache-2.0
+*/
+
+// Tests for gang PodGroup minCount: the whole gang (full N) must schedule
+// atomically. Regression guard for the bug where every PodGroup was created
+// with minCount=1, so a multi-pod gang was "satisfied" by a single pod and the
+// rest were stranded (partial placement).
+package handlers
+
+import (
+	"context"
+	"testing"
+
+	"strconv"
+
+	"github.com/converged-computing/fluence/pkg/webhook"
+
+	corev1 "k8s.io/api/core/v1"
+
+	batchv1 "k8s.io/api/batch/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+// minCountOf runs the gang handler for the leader pod of a group and returns the
+// minCount of the PodGroup the webhook created.
+func minCountOf(t *testing.T, pod *corev1.Pod) int32 {
+	t.Helper()
+	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset()}
+	m.Mutate(context.Background(), pod)
+	pg, err := m.Clientset.SchedulingV1alpha2().
+		PodGroups(pod.Namespace).Get(context.Background(), webhook.GroupName(pod), metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("PodGroup not created: %v", err)
+	}
+	if pg.Spec.SchedulingPolicy.Gang == nil {
+		t.Fatal("PodGroup has no gang scheduling policy")
+	}
+	return pg.Spec.SchedulingPolicy.Gang.MinCount
+}
+
+// minCountWithClient runs the gang handler with a pre-seeded clientset (so the
+// owning Job exists) and returns the created PodGroup's minCount.
+func minCountWithClient(t *testing.T, pod *corev1.Pod, objs ...interface{}) int32 {
+	t.Helper()
+	cs := fake.NewSimpleClientset(toRuntime(objs)...)
+	m := &webhook.Mutator{Clientset: cs}
+	m.Mutate(context.Background(), pod)
+	pg, err := cs.SchedulingV1alpha2().PodGroups(pod.Namespace).
+		Get(context.Background(), webhook.GroupName(pod), metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("PodGroup not created: %v", err)
+	}
+	return pg.Spec.SchedulingPolicy.Gang.MinCount
+}
+
+func jobWithParallelism(ns, name string, n int32) *batchv1.Job {
+	return &batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &n, Completions: &n},
+	}
+}
+
+func ownedBy(pod *corev1.Pod, kind, name string) {
+	pod.OwnerReferences = append(pod.OwnerReferences,
+		metav1.OwnerReference{Kind: kind, Name: name})
+}
+
+// No annotation, but the pod is owned by an indexed Job with parallelism N
+// (the Flux Operator MiniCluster case: Parallelism == Completions == size == N).
+// minCount must come from the Job.
+func TestGangMinCountDerivedFromOwningJob(t *testing.T) {
+	pod := cpuPod("fluence")
+	pod.Namespace = "default"
+	pod.Labels = map[string]string{webhook.GroupLabel: "mc-gang"}
+	ownedBy(pod, "Job", "mc-gang-job")
+	got := minCountWithClient(t, pod, jobWithParallelism("default", "mc-gang-job", 4))
+	if got != 4 {
+		t.Errorf("owner-derived: minCount=%d, want 4 (from Job parallelism)", got)
+	}
+}
+
+// The explicit annotation OVERRIDES the owning Job's parallelism (the override
+// exists precisely because minCount may differ from the parent replica count).
+func TestGangMinCountAnnotationOverridesOwner(t *testing.T) {
+	pod := cpuPod("fluence")
+	pod.Namespace = "default"
+	pod.Labels = map[string]string{webhook.GroupLabel: "ovr-gang"}
+	pod.Annotations = map[string]string{webhook.GroupSizeAnnotation: "2"}
+	ownedBy(pod, "Job", "ovr-gang-job")
+	got := minCountWithClient(t, pod, jobWithParallelism("default", "ovr-gang-job", 8))
+	if got != 2 {
+		t.Errorf("annotation override: minCount=%d, want 2 (annotation wins over Job=8)", got)
+	}
+}
+
+// A classical gang of size N must get minCount = N so the whole group schedules
+// atomically (this is the core multi-gang fix).
+func atoi32(s string) int32 { v, _ := strconv.Atoi(s); return int32(v) }
+
+func toRuntime(objs []interface{}) []runtime.Object {
+	out := make([]runtime.Object, 0, len(objs))
+	for _, o := range objs {
+		if ro, ok := o.(runtime.Object); ok {
+			out = append(out, ro)
+		}
+	}
+	return out
+}
+
+func TestGangMinCountEqualsGroupSize(t *testing.T) {
+	for _, n := range []string{"2", "4", "8"} {
+		pod := cpuPod("fluence")
+		pod.Namespace = "default"
+		pod.Labels = map[string]string{webhook.GroupLabel: "g-" + n}
+		pod.Annotations = map[string]string{webhook.GroupSizeAnnotation: n}
+		got := minCountOf(t, pod)
+		want := atoi32(n)
+		if got != want {
+			t.Errorf("group-size=%s: minCount=%d, want %d", n, got, want)
+		}
+	}
+}
+
+// No group-size annotation -> minCount falls back to 1 (single-pod gang).
+func TestGangMinCountDefaultsToOne(t *testing.T) {
+	pod := cpuPod("fluence")
+	pod.Namespace = "default"
+	pod.Labels = map[string]string{webhook.GroupLabel: "g-default"}
+	if got := minCountOf(t, pod); got != 1 {
+		t.Errorf("absent group-size: minCount=%d, want 1", got)
+	}
+}
+
+// group-size is the authoritative gang minCount: a workload that sets it to N
+// gets minCount=N (the whole gang schedules atomically), regardless of any owner
+// replica count. In the gang+submitter model the full workload IS the gang —
+// there is no N-1 worker split.
+func TestGangMinCountHonorsGroupSize(t *testing.T) {
+	pod := cpuPod("fluence")
+	pod.Namespace = "default"
+	pod.Labels = map[string]string{webhook.GroupLabel: "q-gang"}
+	pod.Annotations = map[string]string{
+		webhook.GroupSizeAnnotation: "4", // full gang size
+	}
+	if got := minCountOf(t, pod); got != 4 {
+		t.Errorf("group-size gang: minCount=%d, want 4 (full N)", got)
+	}
+}
diff --git a/pkg/webhook/handlers/handlers_test.go b/pkg/webhook/handlers/handlers_test.go
index 04d0e02..dee0746 100644
--- a/pkg/webhook/handlers/handlers_test.go
+++ b/pkg/webhook/handlers/handlers_test.go
@@ -2,6 +2,7 @@ package handlers
 
 import (
 	"context"
+	"strings"
 	"testing"
 
 	"github.com/converged-computing/fluence/pkg/placement"
@@ -9,10 +10,7 @@ import (
 	"github.com/converged-computing/fluence/pkg/webhook/spec"
 
 	corev1 "k8s.io/api/core/v1"
-	schedulingv1alpha2 "k8s.io/api/scheduling/v1alpha2"
 	"k8s.io/apimachinery/pkg/api/resource"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/client-go/kubernetes/fake"
 )
 
 // ── fixtures ────────────────────────────────────────────────────────────────────
@@ -83,16 +81,29 @@ func hasGateOp(ops []spec.Op) bool {
 	return false
 }
 
+// hasDropQuantumResourceOp reports whether ops remove the Fluxion quantum
+// resource from a container's requests or limits (the consumer qpu strip).
+func hasDropQuantumResourceOp(ops []spec.Op) bool {
+	for _, op := range ops {
+		if op.Op == "remove" && strings.HasSuffix(op.Path, "qpu") &&
+			(strings.Contains(op.Path, "/resources/requests/") ||
+				strings.Contains(op.Path, "/resources/limits/")) {
+			return true
+		}
+	}
+	return false
+}
+
 func hasSidecarOp(ops []spec.Op) bool {
 	for _, op := range ops {
 		switch v := op.Value.(type) {
 		case corev1.Container:
-			if v.Name == "fluence-sidecar" {
+			if v.Name == SidecarContainerName {
 				return true
 			}
 		case []corev1.Container:
 			for _, c := range v {
-				if c.Name == "fluence-sidecar" {
+				if c.Name == SidecarContainerName {
 					return true
 				}
 			}
@@ -127,238 +138,6 @@ func TestMutateSkipsNonFluxion(t *testing.T) {
 	}
 }
 
-// ── quantum handler: submitter ──────────────────────────────────────────────────
-
-func TestSingleQuantumGetsInterceptorNoSidecar(t *testing.T) {
-	m := &webhook.Mutator{AttributeKeys: []string{"region"}}
-	ops := m.Mutate(context.Background(), qpuPod("fluence"))
-	names := opEnvNames(ops)
-	if !contains(names, "FLUXION_BACKEND") {
-		t.Errorf("want FLUXION_BACKEND, got %v", names)
-	}
-	if !contains(names, "PYTHONPATH") || !contains(names, "FLUENCE_POD_UID") {
-		t.Errorf("want interceptor env (PYTHONPATH, FLUENCE_POD_UID), got %v", names)
-	}
-	if hasSidecarOp(ops) {
-		t.Error("standalone quantum pod should not get a sidecar")
-	}
-	if hasGateOp(ops) {
-		t.Error("standalone quantum pod should not be gated")
-	}
-}
-
-func TestObserveLabelInjectsSidecar(t *testing.T) {
-	m := &webhook.Mutator{}
-	pod := qpuPod("fluence")
-	pod.Labels = map[string]string{ObserveLabel: "true"}
-	ops := m.Mutate(context.Background(), pod)
-	if !hasSidecarOp(ops) {
-		t.Error("observe-labeled quantum pod should get the sidecar")
-	}
-	if hasGateOp(ops) {
-		t.Error("observe-only pod should not be gated")
-	}
-}
-
-// ── quantum handler: worker gating ──────────────────────────────────────────────
-
-func quantumGroupFixture(ns, group, leaderName string) *fake.Clientset {
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: group, Namespace: ns,
-			Annotations: map[string]string{webhook.LeaderAnnotation: leaderName},
-		},
-	}
-	leaderPod := qpuPod("fluence")
-	leaderPod.Name = leaderName
-	leaderPod.Namespace = ns
-	leaderPod.Labels = map[string]string{webhook.GroupLabel: group}
-	return fake.NewSimpleClientset(pg, leaderPod)
-}
-
-func TestClassicalWorkerInQuantumGroupIsGated(t *testing.T) {
-	ns, group, leader := "default", "qaoa", "qaoa-leader"
-	m := &webhook.Mutator{Clientset: quantumGroupFixture(ns, group, leader)}
-
-	worker := cpuPod("fluence")
-	worker.Name = "qaoa-worker-0"
-	worker.Namespace = ns
-	worker.Labels = map[string]string{webhook.GroupLabel: group}
-
-	ops := m.Mutate(context.Background(), worker)
-	if !hasGateOp(ops) {
-		t.Errorf("classical worker in a quantum group should be gated; ops=%v", ops)
-	}
-	if hasSidecarOp(ops) {
-		t.Error("worker should not get a sidecar")
-	}
-}
-
-func TestClassicalGangWorkerNotGated(t *testing.T) {
-	ns, group, leader := "default", "classical", "classical-leader"
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns,
-			Annotations: map[string]string{webhook.LeaderAnnotation: leader}},
-	}
-	leaderPod := cpuPod("fluence")
-	leaderPod.Name = leader
-	leaderPod.Namespace = ns
-	leaderPod.Labels = map[string]string{webhook.GroupLabel: group}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg, leaderPod)}
-
-	worker := cpuPod("fluence")
-	worker.Name = "classical-worker-0"
-	worker.Namespace = ns
-	worker.Labels = map[string]string{webhook.GroupLabel: group}
-
-	if hasGateOp(m.Mutate(context.Background(), worker)) {
-		t.Error("worker in a classical gang must NOT be gated (would deadlock)")
-	}
-}
-
-// Pod-template gang: every pod requests QPU; only the recorded leader gets the
-// sidecar, the rest are gated workers (role by admission order, not request).
-func TestPodTemplateGangSecondPodIsWorker(t *testing.T) {
-	ns, group, leader := "default", "qaoa", "qaoa-abc123"
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns,
-			Annotations: map[string]string{webhook.LeaderAnnotation: leader}},
-	}
-	leaderPod := qpuPod("fluence")
-	leaderPod.Name = leader
-	leaderPod.Namespace = ns
-	leaderPod.Labels = map[string]string{webhook.GroupLabel: group}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg, leaderPod)}
-
-	second := qpuPod("fluence") // identical spec, requests QPU
-	second.Name = "qaoa-def456"
-	second.Namespace = ns
-	second.Labels = map[string]string{webhook.GroupLabel: group}
-
-	ops := m.Mutate(context.Background(), second)
-	if !hasGateOp(ops) {
-		t.Error("second pod in a pod-template gang must be gated as a worker")
-	}
-	if hasSidecarOp(ops) {
-		t.Error("second pod must NOT get a sidecar (it is a worker)")
-	}
-}
-
-// ── quantum handler: explicit role annotation ──────────────────────────────────
-//
-// These cover the fluence.flux-framework.org/role annotation, which makes the
-// leader/worker split EXPLICIT rather than inferred by admission order. When the
-// annotation is present it is authoritative; the same value is echoed to the
-// container as FLUENCE_ROLE so the app reads the role Fluence used.
-
-// roledQPUPod is a QPU-requesting pod in a group with an explicit role.
-func roledQPUPod(ns, group, name, role string) *corev1.Pod {
-	p := qpuPod("fluence")
-	p.Name = name
-	p.Namespace = ns
-	p.Labels = map[string]string{webhook.GroupLabel: group}
-	p.Annotations = map[string]string{webhook.RoleAnnotation: role}
-	return p
-}
-
-// An explicitly-declared leader gets the sidecar and is NOT gated — even though
-// no leader is recorded on the PodGroup (admission order never consulted).
-func TestExplicitLeaderGetsSidecarNotGated(t *testing.T) {
-	ns, group := "default", "qaoa"
-	// fixture with NO LeaderAnnotation recorded — proves we don't rely on it.
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns},
-	}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg)}
-
-	leader := roledQPUPod(ns, group, "qaoa-leader", RoleLeader)
-	ops := m.Mutate(context.Background(), leader)
-	if hasGateOp(ops) {
-		t.Error("explicit leader must NOT be gated")
-	}
-	if !hasSidecarOp(ops) {
-		t.Error("explicit leader must get the sidecar")
-	}
-	if !contains(opEnvNames(ops), "FLUENCE_ROLE") {
-		t.Error("leader must get FLUENCE_ROLE injected for the app to read")
-	}
-}
-
-// An explicitly-declared worker is gated and gets no sidecar — even if it
-// requests the QPU resource itself and even if it (wrongly) appears as the
-// recorded leader. The annotation overrides both.
-func TestExplicitWorkerIsGatedRegardlessOfAdmission(t *testing.T) {
-	ns, group := "default", "qaoa"
-	// Adversarial fixture: record THIS worker's own name as the admission-order
-	// leader. The explicit role:worker must still win and gate it.
-	worker := roledQPUPod(ns, group, "qaoa-worker-0", RoleWorker)
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns,
-			Annotations: map[string]string{webhook.LeaderAnnotation: worker.Name}},
-	}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg)}
-
-	ops := m.Mutate(context.Background(), worker)
-	if !hasGateOp(ops) {
-		t.Error("explicit worker must be gated even if mis-recorded as the admission-order leader")
-	}
-	if hasSidecarOp(ops) {
-		t.Error("explicit worker must NOT get a sidecar")
-	}
-	if !contains(opEnvNames(ops), "FLUENCE_ROLE") {
-		t.Error("worker must get FLUENCE_ROLE injected so the app knows it is a worker")
-	}
-}
-
-// A heterogeneous gang declared with explicit roles resolves to exactly one
-// leader (sidecar, ungated) and the rest workers (gated) — independent of the
-// order in which the webhook admits the pods. This is the property a
-// leader/worker quantum gang needs and that admission order cannot guarantee.
-func TestExplicitRolesResolveRegardlessOfOrder(t *testing.T) {
-	ns, group := "default", "qaoa"
-	pg := &schedulingv1alpha2.PodGroup{
-		ObjectMeta: metav1.ObjectMeta{Name: group, Namespace: ns}, // no recorded leader
-	}
-	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset(pg)}
-
-	pods := []*corev1.Pod{
-		roledQPUPod(ns, group, "w0", RoleWorker),
-		roledQPUPod(ns, group, "leader", RoleLeader),
-		roledQPUPod(ns, group, "w1", RoleWorker),
-	}
-	var leaders, workers int
-	for _, p := range pods { // any admission order
-		ops := m.Mutate(context.Background(), p)
-		switch {
-		case hasSidecarOp(ops) && !hasGateOp(ops):
-			leaders++
-		case hasGateOp(ops) && !hasSidecarOp(ops):
-			workers++
-		default:
-			t.Fatalf("pod %s resolved to neither a clean leader nor worker", p.Name)
-		}
-	}
-	if leaders != 1 || workers != 2 {
-		t.Fatalf("want 1 leader + 2 workers, got %d leaders / %d workers", leaders, workers)
-	}
-}
-
-// Backwards compatibility: with NO role annotation, the leader is still chosen
-// by admission order (the recorded PodGroup leader), exactly as before.
-func TestNoRoleAnnotationFallsBackToAdmissionOrder(t *testing.T) {
-	ns, group, leader := "default", "qaoa", "qaoa-leader"
-	m := &webhook.Mutator{Clientset: quantumGroupFixture(ns, group, leader)}
-
-	// a second pod with no role annotation, not the recorded leader -> worker
-	second := qpuPod("fluence")
-	second.Name = "qaoa-second"
-	second.Namespace = ns
-	second.Labels = map[string]string{webhook.GroupLabel: group}
-	if !hasGateOp(m.Mutate(context.Background(), second)) {
-		t.Error("without a role annotation, a non-leader group member must be gated by admission order")
-	}
-}
-
 // ── gang handler: scheduling group linkage ──────────────────────────────────────
 
 func TestGangStampsSchedulingGroup(t *testing.T) {
diff --git a/pkg/webhook/handlers/quantum.go b/pkg/webhook/handlers/quantum.go
index 97fbfa6..26a09b0 100644
--- a/pkg/webhook/handlers/quantum.go
+++ b/pkg/webhook/handlers/quantum.go
@@ -4,11 +4,16 @@ import (
 	"context"
 	"fmt"
 	"log"
+	"os"
+	"strconv"
+	"strings"
 
 	"github.com/converged-computing/fluence/pkg/webhook"
 	"github.com/converged-computing/fluence/pkg/webhook/spec"
 
 	corev1 "k8s.io/api/core/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
@@ -16,188 +21,604 @@ func init() {
 	webhook.Register(&quantumHandler{})
 }
 
-// Quantum-specific policy. The webhook core knows NONE of these — they live
-// only here, in the quantum handler.
+// Quantum-specific policy. The webhook core knows NONE of these — they live only
+// here, in the quantum handler.
+//
+// Model (producer/consumer split, no separate submitter pod). A quantum task's
+// circuit comes from user code, so the pod that defines a task must RUN to submit
+// it — submit and gate are mutually exclusive per pod. Gating therefore only
+// helps pods that do NOT submit. A quantum gang in CoordinationShared mode is
+// split, per pod, into two roles decided at admission:
+//
+//   - PRODUCER (one member, the indexed-Job completion index 0): its own
+//     group-of-one <group>-producer (minCount 1) so it schedules alone and runs
+//     the SINGLE real submit; staged with the interceptor in REAL (tag) mode and
+//     given the sidecar, which polls the task and ungates the consumers at
+//     position==1. NOT gated. The producer is one of the N members, so the
+//     application is run exactly N times — never N+1.
+//   - CONSUMERS (the other N-1 members): the <group> gang (minCount N-1), each
+//     gated on quantum.braket/ready + preempting priority, told its role via
+//     FLUENCE_COORDINATION_ROLE=consumer and handed the producer's task id via
+//     FLUENCE_QUANTUM_JOB_ID. A consumer does NOT submit; it fetches the shared
+//     result by that id. Ungated together when the producer's task is ready.
+//
+// In CoordinationIndependent mode (the default) there is no shared result to
+// coordinate: every member is its own standalone producer (real submit, no gate),
+// each owning its task and its own queue wait. A lone quantum pod (no group) is
+// always standalone.
 const (
-	// QuantumResource is the Fluxion resource a pod requests when it wants
-	// Fluence to schedule quantum work. Requesting it is the trigger for sidecar
-	// + interceptor injection.
+	// QuantumResource is the Fluxion resource a pod requests to ask Fluence to
+	// schedule quantum work. Requesting it is the sole trigger for this handler.
 	QuantumResource = "fluxion.flux-framework.org/qpu"
 
-	// QuantumGate holds a classical worker until the leader's quantum task is
-	// ready (the sidecar removes it).
+	// QuantumGate holds a consumer pod unscheduled until the producer's task is
+	// ready (the producer's sidecar removes it).
 	QuantumGate = "quantum.braket/ready"
 
-	// ObserveLabel opts a standalone quantum pod into observe-only telemetry:
-	// the sidecar is injected and polls queue position but ungates nothing.
+	// ObserveLabel opts a STANDALONE quantum pod (a group of one) into
+	// observe-only telemetry: the sidecar is injected and polls queue position
+	// but ungates nothing.
 	ObserveLabel = "fluence.flux-framework.org/observe"
 
-	// Role values for webhook.RoleAnnotation.
-	RoleLeader = "leader"
-	RoleWorker = "worker"
+	// DependencyKindQuantumSubmit is the readiness Kind for the quantum resource
+	// type: consumer pods wait for a quantum submission to reach the device queue.
+	// First concrete instance of the general Dependency primitive (dependency.go).
+	DependencyKindQuantumSubmit = "quantum-submit"
+
+	// CoordinationAnnotation selects how a quantum gang is coordinated. It is an
+	// open enum so future designs (e.g. index-paired "scatter") add a mode
+	// without changing the mechanism.
+	CoordinationAnnotation = "fluence.flux-framework.org/coordination"
+
+	// CoordinationShared: one real task; the producer (index 0) submits and the
+	// other members are gated consumers that fetch the producer's result. Each
+	// member is told its role via FLUENCE_COORDINATION_ROLE; a role-aware workload
+	// branches on it (producer submits, consumer fetches by FLUENCE_QUANTUM_JOB_ID).
+	CoordinationShared = "shared"
+
+	// CoordinationIndependent (default): every member does its own quantum work;
+	// no coordination, no gating. Never invent coordination the user did not ask
+	// for, and never dedup tasks meant to be distinct.
+	CoordinationIndependent = "independent"
+
+	// ProducerGroupSuffix names the producer's own group-of-one: <group>-producer
+	// (minCount 1) so it schedules alone and never deadlocks against the gated
+	// consumer gang.
+	ProducerGroupSuffix = "-producer"
+
+	// CompletionIndexAnnotation is the indexed-Job completion index the Job
+	// controller stamps on each pod; index "0" is the producer (deterministic
+	// election with no recorded state).
+	CompletionIndexAnnotation = "batch.kubernetes.io/job-completion-index"
+
+	// ProducerIndex is the completion index promoted to producer.
+	ProducerIndex = "0"
+
+	// GangGroupEnv tells the producer's sidecar which consumer group label to list
+	// and ungate when the task is ready.
+	GangGroupEnv = "FLUENCE_GANG_GROUP"
 )
 
-// quantumHandler coordinates quantum-classical workflows. It applies to a pod
-// in either role:
-//   - the quantum submitter (requests QuantumResource): inject the interceptor,
-//     plus the sidecar when there is coordination to do (group leader, or
-//     observe-only telemetry requested);
-//   - a classical worker (a non-leader member of a group whose leader is a
-//     quantum pod): gate it until the leader's task is ready.
-//
-// This is the only place in the webhook that knows about quantum resources,
-// gates, or observe semantics.
+// quantumHandler splits a shared quantum gang into a single producer (real
+// submit + sidecar) and N-1 gated, role-aware consumers, or runs every member
+// standalone in independent mode (see the package-level model comment). It is the
+// only place in the webhook that knows about quantum resources, gates,
+// coordination, or observe semantics.
 type quantumHandler struct{}
 
 func (h *quantumHandler) Name() string { return "quantum" }
 
+// Applies to any pod requesting the quantum resource. Producers, consumers, and
+// standalone quantum pods all request it; nothing without the resource needs
+// quantum handling, so this is the single, unambiguous trigger.
 func (h *quantumHandler) Applies(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) bool {
-	if spec.PodRequestsResource(pod, QuantumResource) {
-		return true
+	return spec.PodRequestsResource(pod, QuantumResource)
+}
+
+func (h *quantumHandler) Mutate(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) []spec.Op {
+	g := resolveGroup(pod)
+	n := resolveGangSize(ctx, m, pod, g)
+	mode := coordinationMode(pod)
+	observe := spec.Label(pod, ObserveLabel) == "true"
+
+	// No coordination: a standalone quantum pod, or an explicitly independent
+	// member. The REAL submit happens in THIS pod; the sidecar is added only for
+	// observe-only telemetry. (independent mode routes every member here -> N
+	// standalone producers, each owning its task and its own queue wait.)
+	if mode != CoordinationShared || g == "" || n <= 1 {
+		ops := interceptorOps(pod)
+		if observe {
+			sc := sidecarFor(m)
+			sc.EnsureRBAC(ctx, pod.Namespace)
+			ops = append(ops, sc.ContainerOps(pod, true, nil)...)
+		}
+		log.Printf("[fluence-webhook] quantum %s/%s mode=%s (standalone/independent, observe=%v)",
+			pod.Namespace, pod.Name, mode, observe)
+		return ops
+	}
+
+	// shared mode: promote one member to producer; the rest are gated consumers.
+	if isProducer(ctx, m, pod, g) {
+		return h.mutateProducer(ctx, m, pod, g)
+	}
+	return h.mutateConsumer(ctx, m, pod, g, n)
+}
+
+// mutateProducer wires the single producer member (indexed-Job completion index
+// 0): its own group-of-one <group>-producer (minCount 1) so it schedules alone
+// and runs the REAL submit, the interceptor in tag mode, RBAC, and the sidecar
+// told which consumer group to ungate (FLUENCE_GANG_GROUP). The producer is one
+// of the N members, so the application is NOT run an extra time. Never gated.
+func (h *quantumHandler) mutateProducer(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod, group string) []spec.Op {
+	pg := group + ProducerGroupSuffix
+	m.EnsurePodGroup(ctx, pod.Namespace, pg, pod.Name, 1)
+	ops := linkGroupOps(pod, pg)
+	ops = append(ops, interceptorOps(pod)...)           // tag mode: the producer submits for real
+	ops = append(ops, roleEnvOps(pod, RoleProducer)...) // FLUENCE_COORDINATION_ROLE=producer
+	sc := sidecarFor(m)
+	sc.EnsureRBAC(ctx, pod.Namespace)
+	extra := []corev1.EnvVar{{Name: GangGroupEnv, Value: group}}
+	ops = append(ops, sc.ContainerOps(pod, false, extra)...)
+	log.Printf("[fluence-webhook] quantum producer %s/%s — group %s (ungates consumers %q)",
+		pod.Namespace, pod.Name, pg, group)
+	return ops
+}
+
+// mutateConsumer wires a non-producer member: it joins the <group> consumer gang
+// (minCount N-1) and is gated until the producer's task is ready. It is told its
+// role (FLUENCE_COORDINATION_ROLE=consumer) and handed the producer's task id
+// (FLUENCE_QUANTUM_JOB_ID, stamped on the pod by the sidecar at ungate). A
+// role-aware consumer reads those and fetches the shared result instead of
+// submitting — so the consumer never calls the vendor submit, and needs neither
+// the interceptor nor a faux flag.
+func (h *quantumHandler) mutateConsumer(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod, group string, n int32) []spec.Op {
+	m.EnsurePodGroup(ctx, pod.Namespace, group, pod.Name, n-1)
+	ops := linkGroupOps(pod, group)
+	// Express the wait as the GENERAL dependency primitive: this consumer depends
+	// on the quantum submission produced by <group>-producer, held by the quantum
+	// gate. applyOps gates the pod, raises priority, and stamps depends-on-*.
+	dep := Dependency{Kind: DependencyKindQuantumSubmit, Producer: group + ProducerGroupSuffix, Gate: QuantumGate}
+	ops = append(ops, dep.applyOps(pod)...)
+	ops = append(ops, consumerEnvOps(pod)...)
+	// A gated consumer never runs the QPU task — it only fetches the producer's
+	// shared result — so it must not hold the Fluxion quantum resource. Leaving it
+	// would make Fluxion allocate a qpu per consumer, capping the gang at the
+	// backend's graph qpu count and, on a single-slot real QPU, leaving the
+	// consumers unschedulable. Applies() already routed this pod on the request, so
+	// stripping it here is safe.
+	ops = append(ops, dropQuantumResourceOps(pod)...)
+	log.Printf("[fluence-webhook] quantum consumer %s/%s — group %s minCount=%d, gated (role=consumer, qpu stripped)",
+		pod.Namespace, pod.Name, group, n-1)
+	return ops
+}
+
+// dropQuantumResourceOps removes the Fluxion quantum resource from a consumer's
+// containers (requests and limits), returning the patch ops and mutating pod in
+// place. Only entries that are present are removed (a JSON-patch remove on a
+// missing path would fail). The sidecar container is never a consumer concern.
+func dropQuantumResourceOps(pod *corev1.Pod) []spec.Op {
+	rn := corev1.ResourceName(QuantumResource)
+	// JSON Pointer escaping for the resource key: '~' -> '~0', '/' -> '~1'.
+	key := strings.ReplaceAll(strings.ReplaceAll(QuantumResource, "~", "~0"), "/", "~1")
+	var ops []spec.Op
+	for i, c := range pod.Spec.Containers {
+		if c.Name == SidecarContainerName {
+			continue
+		}
+		if _, ok := c.Resources.Requests[rn]; ok {
+			ops = append(ops, spec.Op{Op: "remove",
+				Path: fmt.Sprintf("/spec/containers/%d/resources/requests/%s", i, key)})
+			delete(pod.Spec.Containers[i].Resources.Requests, rn)
+		}
+		if _, ok := c.Resources.Limits[rn]; ok {
+			ops = append(ops, spec.Op{Op: "remove",
+				Path: fmt.Sprintf("/spec/containers/%d/resources/limits/%s", i, key)})
+			delete(pod.Spec.Containers[i].Resources.Limits, rn)
+		}
 	}
-	// An explicitly-declared worker applies (so it gets gated) even if it
-	// doesn't request the quantum resource and the leader isn't recorded yet —
-	// this removes the admission-order race for explicitly-roled gangs.
-	if webhook.Role(pod) == RoleWorker && webhook.GroupName(pod) != "" {
-		return true
+	return ops
+}
+
+// coordinationMode reads the coordination annotation; default independent.
+func coordinationMode(pod *corev1.Pod) string {
+	if v := spec.Annotation(pod, CoordinationAnnotation); v != "" {
+		return v
 	}
-	return h.isWorkerOfQuantumGroup(ctx, m, pod)
+	return CoordinationIndependent
 }
 
-// isWorkerOfQuantumGroup reports whether pod is a non-leader member of a group
-// whose recorded leader is a quantum (QuantumResource-requesting) pod. Workers
-// are classical and do not request the resource themselves, so their role is a
-// property of group membership, resolved against cluster state.
-func (h *quantumHandler) isWorkerOfQuantumGroup(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) bool {
-	g := webhook.GroupName(pod)
-	if g == "" || m.Client() == nil {
-		return false
+// isProducer decides whether THIS pod is the gang's single producer. Indexed Job
+// (recommended): completion index 0 is the producer — deterministic, race-free,
+// no recorded state. Otherwise: first arrival claims the producer slot by the
+// absence of the producer PodGroup (best-effort under concurrent admission;
+// prefer an indexed Job for determinism). Indexing a nil annotations map yields
+// ok=false, so the indexed branch is nil-safe.
+func isProducer(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod, group string) bool {
+	if idx, ok := pod.Annotations[CompletionIndexAnnotation]; ok {
+		return idx == ProducerIndex
 	}
-	leader := m.PodGroupLeader(ctx, pod.Namespace, g)
-	if leader == "" || leader == pod.Name {
-		return false
+	c := m.Client()
+	if c == nil {
+		return true // tests / no client: treat as producer
 	}
-	lp, err := m.Client().CoreV1().Pods(pod.Namespace).Get(ctx, leader, metav1.GetOptions{})
-	if err != nil {
-		return false
+	pg := group + ProducerGroupSuffix
+	if _, err := c.SchedulingV1alpha2().PodGroups(pod.Namespace).Get(ctx, pg, metav1.GetOptions{}); err == nil {
+		return false // already claimed by an earlier arrival
 	}
-	return spec.PodRequestsResource(lp, QuantumResource)
+	return true
 }
 
-func (h *quantumHandler) Mutate(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) []spec.Op {
-	g := webhook.GroupName(pod)
-
-	// Determine role. An explicit role annotation is AUTHORITATIVE: the workload
-	// declares which pod leads and which wait, and Fluence honors it directly —
-	// no admission-order race, and the same value is echoed to the app as
-	// FLUENCE_ROLE so the webhook's notion of leader and the application's notion
-	// cannot disagree. When the annotation is absent, fall back to the legacy
-	// behavior: role is decided by admission order (the first pod admitted in the
-	// group, recorded on the PodGroup by the gang handler). The admission-order
-	// path suits a homogeneous pod-template gang where every pod is identical;
-	// the explicit annotation suits a heterogeneous leader/worker gang.
-	role := webhook.Role(pod)
-	var isWorker bool
-	switch role {
-	case RoleWorker:
-		isWorker = true
-	case RoleLeader:
-		isWorker = false
-	default:
-		if g != "" {
-			leader := m.PodGroupLeader(ctx, pod.Namespace, g)
-			isWorker = leader != "" && leader != pod.Name
-		}
-	}
-
-	if g != "" && isWorker {
-		log.Printf("[fluence-webhook] quantum worker %s/%s (role=%q) — gating",
-			pod.Namespace, pod.Name, role)
-		ops := gateOps(pod)
-		ops = append(ops, roleEnvOps(pod, RoleWorker)...)
-		return ops
+// resolveGroup returns the gang group identity: the explicit group label, else
+// the owning controller's name (Job/ReplicaSet/StatefulSet — a Deployment's pods
+// are owned by a ReplicaSet), else "" (a loose quantum pod with no group, which
+// is treated as a standalone group of one).
+func resolveGroup(pod *corev1.Pod) string {
+	if g := webhook.GroupName(pod); g != "" {
+		return g
+	}
+	for _, ref := range pod.OwnerReferences {
+		switch ref.Kind {
+		case "Job", "ReplicaSet", "StatefulSet":
+			return ref.Name
+		}
 	}
+	return ""
+}
 
-	// Submitter/leader role: recorded or declared group leader, or a standalone
-	// quantum pod. Always gets the interceptor (so its task is tagged). It gets
-	// the SIDECAR only when there is coordination to do: it is a group leader
-	// (workers to ungate), or observe-only telemetry is requested.
-	isLeader := g != ""
-	observe := spec.Label(pod, ObserveLabel) == "true"
+// resolveGangSize returns the full gang size N: the explicit group-size
+// annotation (authoritative override), else the owner's replica count (Job
+// parallelism/completions, ReplicaSet replicas), else a count of pods already
+// carrying the group label (best-effort for loose grouped pods; admission-order
+// dependent, which is why the annotation is preferred), else 1.
+func resolveGangSize(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod, group string) int32 {
+	if pod.Annotations != nil {
+		if v, err := strconv.Atoi(pod.Annotations[webhook.GroupSizeAnnotation]); err == nil && v > 0 {
+			return int32(v)
+		}
+	}
+	if n := ownerJobN(ctx, m, pod); n > 0 {
+		return n
+	}
+	if n := ownerReplicaSetN(ctx, m, pod); n > 0 {
+		return n
+	}
+	if group != "" {
+		if n := countGroupPods(ctx, m, pod.Namespace, group); n > 0 {
+			return n
+		}
+	}
+	return 1
+}
 
-	log.Printf("[fluence-webhook] quantum pod %s/%s — interceptor (leader=%v role=%q observe=%v)",
-		pod.Namespace, pod.Name, isLeader, role, observe)
+// ownerReplicaSetN returns the replica count of the ReplicaSet that owns the pod
+// (the Deployment case: Deployment -> ReplicaSet -> Pod), or 0 if none.
+func ownerReplicaSetN(ctx context.Context, m webhook.MutatorAPI, pod *corev1.Pod) int32 {
+	c := m.Client()
+	if c == nil {
+		return 0
+	}
+	for _, ref := range pod.OwnerReferences {
+		if ref.Kind != "ReplicaSet" {
+			continue
+		}
+		rs, err := c.AppsV1().ReplicaSets(pod.Namespace).Get(ctx, ref.Name, metav1.GetOptions{})
+		if err != nil {
+			return 0
+		}
+		if rs.Spec.Replicas != nil && *rs.Spec.Replicas > 0 {
+			return *rs.Spec.Replicas
+		}
+	}
+	return 0
+}
 
-	ops := m.InterceptorOps(pod)
-	ops = append(ops, roleEnvOps(pod, RoleLeader)...)
-	if isLeader || observe {
-		m.EnsureSidecarRBAC(ctx, pod.Namespace)
-		ops = append(ops, m.SidecarContainerOps(pod, observe)...)
+// countGroupPods counts pods already carrying the group label (best-effort gang
+// size for loose grouped pods that have neither a group-size annotation nor an
+// owning controller). Admission-order dependent — prefer the group-size
+// annotation when the exact size must be guaranteed.
+func countGroupPods(ctx context.Context, m webhook.MutatorAPI, namespace, group string) int32 {
+	c := m.Client()
+	if c == nil {
+		return 0
 	}
-	return ops
+	list, err := c.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
+		LabelSelector: webhook.GroupLabel + "=" + group,
+	})
+	if err != nil {
+		return 0
+	}
+	return int32(len(list.Items))
 }
 
-// roleEnvOps injects FLUENCE_ROLE into every (non-sidecar) container so the
-// application reads its gang role from the same source of truth the webhook
-// used. effectiveRole is what the webhook decided (leader/worker), used only
-// when the pod carries no explicit role annotation; when the annotation is
-// present we source the value from it via the downward API so the two always
-// agree. Unlike InterceptorOps, this is NOT limited to Fluxion-resource
-// containers — worker containers do not request the quantum resource but still
-// need to know they are workers.
-func roleEnvOps(pod *corev1.Pod, effectiveRole string) []spec.Op {
-	var value corev1.EnvVar
-	if webhook.Role(pod) != "" {
-		value = spec.AnnotationEnv("FLUENCE_ROLE", webhook.RoleAnnotation)
-	} else {
-		value = corev1.EnvVar{Name: "FLUENCE_ROLE", Value: effectiveRole}
+// linkGroupOps ensures the gang pod carries the group label (so the producer's
+// sidecar can list it) and is linked to the gang PodGroup via
+// spec.schedulingGroup.podGroupName. Idempotent.
+func linkGroupOps(pod *corev1.Pod, group string) []spec.Op {
+	var ops []spec.Op
+	if webhook.GroupName(pod) != group {
+		if pod.Labels == nil {
+			ops = append(ops, spec.Op{Op: "add", Path: "/metadata/labels",
+				Value: map[string]string{webhook.GroupLabel: group}})
+		} else {
+			ops = append(ops, spec.Op{Op: "add",
+				Path:  "/metadata/labels/" + escapeJSONPointer(webhook.GroupLabel),
+				Value: group})
+		}
 	}
+	if pod.Spec.SchedulingGroup == nil || pod.Spec.SchedulingGroup.PodGroupName == nil ||
+		*pod.Spec.SchedulingGroup.PodGroupName != group {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGroup",
+			Value: map[string]string{"podGroupName": group}})
+	}
+	return ops
+}
+
+// escapeJSONPointer escapes "~" and "/" for use in a JSON Pointer path segment.
+func escapeJSONPointer(s string) string {
+	s = strings.ReplaceAll(s, "~", "~0")
+	s = strings.ReplaceAll(s, "/", "~1")
+	return s
+}
+
+const QuantumClassicalPriorityClass = "fluence-quantum-classical"
+
+// ── coordination role (producer / consumer) ─────────────────────────────────────
+//
+// In a shared gang each member is told its role positively, so the application
+// branches on it instead of relying on any submit-interception magic:
+//   producer  submits the one real task (and is tagged so the sidecar finds it);
+//   consumer  does NOT submit — it reads the producer's task id and fetches the
+//             shared result (e.g. via the vendor's S3-backed result API).
+// The role is decided at admission by isProducer (completion index 0, else the
+// producer-group claim) and surfaced as FLUENCE_COORDINATION_ROLE. Because the
+// election is the webhook's, this env is the single source of truth — the
+// container never re-derives its role from the Job index (which loose, non-Job
+// pods don't even have).
+
+const (
+	// CoordinationRoleEnv carries the pod's role in a shared gang. A role-aware
+	// workload branches on it: RoleProducer submits, RoleConsumer fetches the
+	// shared result by id. Unset for standalone/independent pods (they all submit).
+	CoordinationRoleEnv = "FLUENCE_COORDINATION_ROLE"
+	RoleProducer        = "producer"
+	RoleConsumer        = "consumer"
+
+	// QuantumJobIDAnnotation is the vendor-neutral task id the ungating sidecar
+	// stamps on each consumer (mirrors python/fluence/ungate.py JOB_ID_ANNOTATION),
+	// BEFORE removing the gate. Surfaced into FLUENCE_QUANTUM_JOB_ID via the
+	// downward API so a consumer can fetch the producer's result by id.
+	QuantumJobIDAnnotation = "fluence.flux-framework.org/quantum-job-id"
+
+	// QuantumJobIDEnv is the env a consumer reads for the producer's task id.
+	QuantumJobIDEnv = "FLUENCE_QUANTUM_JOB_ID"
+)
+
+// roleEnvOps sets FLUENCE_COORDINATION_ROLE=<role> on each non-sidecar container.
+func roleEnvOps(pod *corev1.Pod, role string) []spec.Op {
+	return setContainerEnvOps(pod, corev1.EnvVar{Name: CoordinationRoleEnv, Value: role})
+}
+
+// consumerEnvOps tells a consumer its role and hands it the producer's task id
+// (FLUENCE_QUANTUM_JOB_ID, downward API from the annotation the ungating sidecar
+// stamps). A consumer never submits, so it gets neither the interceptor nor any
+// faux flag — just its role and the id to fetch the shared result with.
+func consumerEnvOps(pod *corev1.Pod) []spec.Op {
+	ops := roleEnvOps(pod, RoleConsumer)
+	ops = append(ops, setContainerEnvOps(pod, spec.AnnotationEnv(QuantumJobIDEnv, QuantumJobIDAnnotation))...)
+	return ops
+}
+
+// setContainerEnvOps appends env var e to every non-sidecar container that does
+// not already define it, returning the patch ops and mutating pod in place.
+func setContainerEnvOps(pod *corev1.Pod, e corev1.EnvVar) []spec.Op {
 	var ops []spec.Op
 	for i, c := range pod.Spec.Containers {
-		if c.Name == "fluence-sidecar" || spec.HasEnv(c, "FLUENCE_ROLE") {
+		if c.Name == SidecarContainerName || spec.HasEnv(c, e.Name) {
 			continue
 		}
 		if len(c.Env) == 0 {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env", i), Value: []corev1.EnvVar{value}})
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env", i), Value: []corev1.EnvVar{e}})
+			pod.Spec.Containers[i].Env = []corev1.EnvVar{e}
 		} else {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: value})
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: e})
+			pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, e)
 		}
-		pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, value)
 	}
 	return ops
 }
 
-// gateOps adds the quantum scheduling gate (idempotent).
-const QuantumClassicalPriorityClass = "fluence-quantum-classical"
+// Sidecar implementation — quantum-owned, NOT core.
+//
+// The fluence coordination sidecar (its container, name, RBAC, image, and the
+// Python interceptor staging) is specific to the quantum integration: it polls a
+// vendor queue and ungates workers. None of this belongs on the webhook core,
+// which stays domain-agnostic and only exposes generic primitives (Client,
+// InjectedEnv, EnsurePodGroup). The core invokes each handler's generic Mutate;
+// a handler does its own create/edit side-effects (here: RBAC, ConfigMaps,
+// container injection) through the generic client.
+//
+// These are package-level functions (not methods on the core *Mutator) operating
+// on the generic webhook.MutatorAPI. coreSidecar (see sidecar.go) delegates to
+// them; a future non-quantum handler that needs a different sidecar supplies its
+// own Sidecar implementation and its own container name/image.
 
-func gateOps(pod *corev1.Pod) []spec.Op {
-	for _, g := range pod.Spec.SchedulingGates {
-		if g.Name == QuantumGate {
-			return nil
+const (
+	// SidecarContainerName is the injected sidecar container's name. Owned here
+	// (not a global core const) because the container is quantum-specific.
+	SidecarContainerName = "fluence-sidecar"
+
+	// SidecarServiceAccount is the ServiceAccount (and Role/RoleBinding) name the
+	// sidecar uses to patch pods and read PodGroups.
+	SidecarServiceAccount = "fluence-sidecar"
+
+	// defaultSidecarImage is used when FLUENCE_SIDECAR_IMAGE is not set. Owned by
+	// the quantum integration; the deployment may override it via the env var.
+	defaultSidecarImage = "vanessa/fluence-sidecar:latest"
+
+	// StageVolumeName / StageMountPath: the shared emptyDir the init container
+	// stages the fluence Python package into, mounted into workload containers
+	// and prepended to PYTHONPATH (Model C delivery).
+	StageVolumeName = "fluence-pkg"
+	StageMountPath  = "/opt/fluence-staged"
+)
+
+// sidecarImage resolves the sidecar image: the FLUENCE_SIDECAR_IMAGE override
+// (deployment config) or the quantum default. Read here so image config is owned
+// by the integration that uses it, not the core.
+func sidecarImage() string {
+	if v := os.Getenv("FLUENCE_SIDECAR_IMAGE"); v != "" {
+		return v
+	}
+	return defaultSidecarImage
+}
+
+// ensureSidecarRBAC provisions the per-namespace ServiceAccount/Role/RoleBinding
+// the sidecar uses to patch pods and read PodGroups. Idempotent (create-if-absent).
+func ensureSidecarRBAC(ctx context.Context, m webhook.MutatorAPI, namespace string) {
+	c := m.Client()
+	if c == nil {
+		return
+	}
+	lbl := map[string]string{"app": SidecarServiceAccount}
+
+	if _, err := c.CoreV1().ServiceAccounts(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
+		sa := &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl}}
+		if _, err := c.CoreV1().ServiceAccounts(namespace).Create(ctx, sa, metav1.CreateOptions{}); err != nil {
+			log.Printf("[fluence-webhook] could not create ServiceAccount %s/%s: %v", namespace, SidecarServiceAccount, err)
 		}
 	}
+	if _, err := c.RbacV1().Roles(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
+		role := &rbacv1.Role{
+			ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl},
+			Rules: []rbacv1.PolicyRule{
+				{APIGroups: []string{""}, Resources: []string{"pods"}, Verbs: []string{"get", "list", "patch", "update"}},
+				{APIGroups: []string{"scheduling.k8s.io"}, Resources: []string{"podgroups"}, Verbs: []string{"get", "list"}},
+			},
+		}
+		if _, err := c.RbacV1().Roles(namespace).Create(ctx, role, metav1.CreateOptions{}); err != nil {
+			log.Printf("[fluence-webhook] could not create Role %s/%s: %v", namespace, SidecarServiceAccount, err)
+		}
+	}
+	if _, err := c.RbacV1().RoleBindings(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
+		rb := &rbacv1.RoleBinding{
+			ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl},
+			Subjects:   []rbacv1.Subject{{Kind: "ServiceAccount", Name: SidecarServiceAccount, Namespace: namespace}},
+			RoleRef:    rbacv1.RoleRef{APIGroup: "rbac.authorization.k8s.io", Kind: "Role", Name: SidecarServiceAccount},
+		}
+		if _, err := c.RbacV1().RoleBindings(namespace).Create(ctx, rb, metav1.CreateOptions{}); err != nil {
+			log.Printf("[fluence-webhook] could not create RoleBinding %s/%s: %v", namespace, SidecarServiceAccount, err)
+		}
+	}
+}
+
+// interceptorOps stages the fluence Python package (Model C): an init container
+// copies it into a shared emptyDir, mounted into every workload container
+// (skipping the sidecar) with PYTHONPATH + FLUENCE_POD_UID, so Python auto-imports
+// the interceptor via sitecustomize, which tags the vendor submit so the sidecar
+// can find the task. Added to producers and standalone/independent pods (the ones
+// that actually submit); consumers don't submit, so they don't get it.
+func interceptorOps(pod *corev1.Pod) []spec.Op {
 	var ops []spec.Op
-	gate := corev1.PodSchedulingGate{Name: QuantumGate}
-	if len(pod.Spec.SchedulingGates) == 0 {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGates", Value: []corev1.PodSchedulingGate{gate}})
+
+	vol := corev1.Volume{Name: StageVolumeName, VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}}
+	if len(pod.Spec.Volumes) == 0 {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/volumes", Value: []corev1.Volume{vol}})
 	} else {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/schedulingGates/-", Value: gate})
-	}
-	// Give gated classical workers a raised priority so they schedule reliably
-	// once ungated. priorityClassName is immutable post-creation, so it MUST be
-	// set here at admission, not at ungate time. Only set it if the pod doesn't
-	// already declare one (don't overwrite a user's class).
-	if pod.Spec.PriorityClassName == "" {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/priorityClassName", Value: QuantumClassicalPriorityClass})
-		// Clear spec.priority so the priority admission controller recomputes it
-		// from the class. The controller errors only when spec.priority is
-		// non-nil AND differs from the class value; setting it to null avoids
-		// that in every case. We use add-with-null (not remove): a JSON Patch
-		// "remove" of an absent path is a hard error, and whether the API has
-		// already defaulted spec.priority differs across clusters/k8s versions
-		// (it broke in CI but not on GKE, or vice versa). add-null is valid
-		// whether the field is absent, 0, or set.
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/priority", EmitNull: true})
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/volumes/-", Value: vol})
+	}
+
+	initc := corev1.Container{
+		Name:            "fluence-stage",
+		Image:           sidecarImage(),
+		ImagePullPolicy: corev1.PullAlways,
+		Command: []string{"sh", "-c",
+			fmt.Sprintf("python3 -m fluence.stage %s || echo '[fluence] staging skipped (interceptor unavailable)'", StageMountPath)},
+		VolumeMounts: []corev1.VolumeMount{{Name: StageVolumeName, MountPath: StageMountPath}},
+	}
+	if len(pod.Spec.InitContainers) == 0 {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/initContainers", Value: []corev1.Container{initc}})
+	} else {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/initContainers/-", Value: initc})
+	}
+
+	mount := corev1.VolumeMount{Name: StageVolumeName, MountPath: StageMountPath, ReadOnly: true}
+	pythonpath := corev1.EnvVar{Name: "PYTHONPATH", Value: StageMountPath}
+	uid := spec.FieldEnv("FLUENCE_POD_UID", "metadata.uid")
+	for i, c := range pod.Spec.Containers {
+		if c.Name == SidecarContainerName {
+			continue
+		}
+		if len(c.VolumeMounts) == 0 {
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/volumeMounts", i), Value: []corev1.VolumeMount{mount}})
+		} else {
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/volumeMounts/-", i), Value: mount})
+		}
+		if !spec.HasEnv(c, "PYTHONPATH") {
+			if len(c.Env) == 0 {
+				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env", i), Value: []corev1.EnvVar{pythonpath}})
+				pod.Spec.Containers[i].Env = []corev1.EnvVar{pythonpath}
+			} else {
+				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: pythonpath})
+				pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, pythonpath)
+			}
+		}
+		if !spec.HasEnv(c, "FLUENCE_POD_UID") {
+			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: uid})
+			pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, uid)
+		}
+	}
+	return ops
+}
+
+// sidecarContainerOps adds the fluence sidecar container (pod identity env, the
+// generic FLUXION_* contract from InjectedEnv, the observe flag, handler-supplied
+// extraEnv, and the workload's secret/configMap-sourced credentials) and sets the
+// sidecar ServiceAccount. observe=true selects observe-only telemetry mode.
+func sidecarContainerOps(m webhook.MutatorAPI, pod *corev1.Pod, observe bool, extraEnv []corev1.EnvVar) []spec.Op {
+	var ops []spec.Op
+	env := []corev1.EnvVar{
+		spec.FieldEnv("FLUENCE_POD_UID", "metadata.uid"),
+		spec.FieldEnv("FLUENCE_POD_NAME", "metadata.name"),
+		spec.FieldEnv("FLUENCE_NAMESPACE", "metadata.namespace"),
+		spec.FieldEnv("FLUENCE_GROUP", "metadata.labels['"+webhook.GroupLabel+"']"),
+	}
+	env = append(env, m.InjectedEnv()...)
+	if observe {
+		env = append(env, corev1.EnvVar{Name: "FLUENCE_OBSERVE", Value: "true"})
+	}
+	env = append(env, extraEnv...)
+	// Copy the workload container's secret/configMap-sourced env onto the sidecar
+	// so it can talk to the same backend (domain-agnostic: we propagate whatever
+	// the workload pulls from a secret/configMap; existing FLUENCE_/FLUXION_ names
+	// are not overwritten).
+	if len(pod.Spec.Containers) > 0 {
+		have := map[string]bool{}
+		for _, e := range env {
+			have[e.Name] = true
+		}
+		for _, e := range pod.Spec.Containers[0].Env {
+			if have[e.Name] || e.ValueFrom == nil {
+				continue
+			}
+			if e.ValueFrom.SecretKeyRef != nil || e.ValueFrom.ConfigMapKeyRef != nil {
+				env = append(env, e)
+			}
+		}
+	}
+	sidecar := corev1.Container{
+		Name: SidecarContainerName, Image: sidecarImage(), ImagePullPolicy: corev1.PullAlways,
+		Env: env,
+		Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{
+			corev1.ResourceCPU: resource.MustParse("100m"), corev1.ResourceMemory: resource.MustParse("256Mi"),
+		}},
+	}
+	if len(pod.Spec.Containers) == 0 {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/containers", Value: []corev1.Container{sidecar}})
+	} else {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/containers/-", Value: sidecar})
+	}
+	if pod.Spec.ServiceAccountName == "" || pod.Spec.ServiceAccountName == "default" {
+		ops = append(ops, spec.Op{Op: "add", Path: "/spec/serviceAccountName", Value: SidecarServiceAccount})
 	}
 	return ops
 }
diff --git a/pkg/webhook/handlers/quantum_test.go b/pkg/webhook/handlers/quantum_test.go
new file mode 100644
index 0000000..10a000d
--- /dev/null
+++ b/pkg/webhook/handlers/quantum_test.go
@@ -0,0 +1,521 @@
+/*
+Copyright 2024 Lawrence Livermore National Security, LLC
+ (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+SPDX-License-Identifier: Apache-2.0
+*/
+
+// quantum_test.go — all tests for the quantum handler: the producer/consumer
+// shared-coordination split (no separate submitter pod), independent mode,
+// the coordination role + job-id handoff, the sidecar wiring, the Dependency primitive, and the
+// standalone/observe paths. Shared fixtures (qpuPod, cpuPod, op helpers) live in
+// handlers_test.go.
+package handlers
+
+import (
+	"context"
+	"testing"
+
+	"github.com/converged-computing/fluence/pkg/webhook"
+	"github.com/converged-computing/fluence/pkg/webhook/spec"
+
+	batchv1 "k8s.io/api/batch/v1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+// ── standalone / observe ────────────────────────────────────────────────────────
+
+func TestSingleQuantumGetsInterceptorNoSidecar(t *testing.T) {
+	m := &webhook.Mutator{AttributeKeys: []string{"region"}}
+	ops := m.Mutate(context.Background(), qpuPod("fluence"))
+	names := opEnvNames(ops)
+	if !contains(names, "FLUXION_BACKEND") {
+		t.Errorf("want FLUXION_BACKEND, got %v", names)
+	}
+	if !contains(names, "PYTHONPATH") || !contains(names, "FLUENCE_POD_UID") {
+		t.Errorf("want interceptor env (PYTHONPATH, FLUENCE_POD_UID), got %v", names)
+	}
+	if hasSidecarOp(ops) {
+		t.Error("standalone quantum pod should not get a sidecar")
+	}
+	if hasGateOp(ops) {
+		t.Error("standalone quantum pod should not be gated")
+	}
+}
+
+func TestObserveLabelInjectsSidecar(t *testing.T) {
+	m := &webhook.Mutator{}
+	pod := qpuPod("fluence")
+	pod.Labels = map[string]string{ObserveLabel: "true"}
+	ops := m.Mutate(context.Background(), pod)
+	if !hasSidecarOp(ops) {
+		t.Error("observe-labeled quantum pod should get the sidecar")
+	}
+	if hasGateOp(ops) {
+		t.Error("observe-only pod should not be gated")
+	}
+}
+
+// ── shared coordination: producer / consumer split ──────────────────────────────
+
+// sharedQPUPod is a quantum workload pod (requests the resource) in a group,
+// owned by a Job of parallelism N, with coordination=shared and a completion
+// index. Index "0" is the producer; any other index is a consumer. This is the
+// real shape: an indexed Job whose identical template yields differentiated
+// roles purely from the completion index.
+func sharedQPUPod(ns, group, name, job, index string) *corev1.Pod {
+	p := qpuPod("fluence")
+	p.Name = name
+	p.Namespace = ns
+	p.Labels = map[string]string{webhook.GroupLabel: group}
+	p.Annotations = map[string]string{
+		CoordinationAnnotation:    CoordinationShared,
+		CompletionIndexAnnotation: index,
+	}
+	p.OwnerReferences = []metav1.OwnerReference{{Kind: "Job", Name: job}}
+	return p
+}
+
+// gangQPUPod is a quantum workload pod in a group owned by a Job, with NO
+// coordination annotation — i.e. the default (independent) mode.
+func gangQPUPod(ns, group, name, job string) *corev1.Pod {
+	p := qpuPod("fluence")
+	p.Name = name
+	p.Namespace = ns
+	p.Labels = map[string]string{webhook.GroupLabel: group}
+	p.OwnerReferences = []metav1.OwnerReference{{Kind: "Job", Name: job}}
+	return p
+}
+
+// mincount returns the gang minCount of the named PodGroup, or ok=false.
+func mincount(t *testing.T, cs *fake.Clientset, ns, group string) (int32, bool) {
+	t.Helper()
+	pg, err := cs.SchedulingV1alpha2().PodGroups(ns).Get(context.Background(), group, metav1.GetOptions{})
+	if err != nil || pg.Spec.SchedulingPolicy.Gang == nil {
+		return 0, false
+	}
+	return pg.Spec.SchedulingPolicy.Gang.MinCount, true
+}
+
+// A shared-mode CONSUMER (completion index != 0, owned by Job parallelism=3) is
+// gated, told its role (FLUENCE_COORDINATION_ROLE=consumer), joins the <group>
+// consumer gang at minCount N-1 (the split), and gets NO sidecar (it is gated).
+// No separate submitter pod is ever created — the producer is one of the N members.
+func TestSharedConsumerGatedRoleAndSplit(t *testing.T) {
+	ns, group, job := "default", "qg", "qg-job"
+	par := int32(3)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), sharedQPUPod(ns, group, "qg-1", job, "1"))
+
+	if !hasGateOp(ops) {
+		t.Error("consumer must be gated")
+	}
+	if hasSidecarOp(ops) {
+		t.Error("consumer (gated) must NOT get a sidecar")
+	}
+	if !hasDropQuantumResourceOp(ops) {
+		t.Error("consumer (gated, never runs the QPU) must have its qpu resource stripped")
+	}
+	if e, ok := envOp(ops, CoordinationRoleEnv); !ok || e.Value != RoleConsumer {
+		t.Errorf("consumer must get %s=%s", CoordinationRoleEnv, RoleConsumer)
+	}
+	// Consumer gang is minCount N-1 (the producer/consumer split).
+	if mc, ok := mincount(t, cs, ns, group); !ok || mc != 2 {
+		t.Errorf("consumer PodGroup minCount=%d (ok=%v), want 2 (N-1 split)", mc, ok)
+	}
+	// No separate submitter pod is created.
+	pods, _ := cs.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
+	if len(pods.Items) != 0 {
+		t.Errorf("shared mode must NOT spawn a separate submitter pod; found %d pods", len(pods.Items))
+	}
+}
+
+// The shared-mode PRODUCER (completion index 0) is wired as the real coordinator:
+// its own group-of-one <group>-producer at minCount 1, the real sidecar, not
+// gated, role=producer, and told which consumer group to ungate via
+// FLUENCE_GANG_GROUP. It is one of the N members — no extra pod is created.
+func TestSharedProducerWiredAsRealSidecar(t *testing.T) {
+	ns, group, job := "default", "qg2", "qg2-job"
+	par := int32(2)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), sharedQPUPod(ns, group, "qg2-0", job, "0"))
+
+	if !hasSidecarOp(ops) {
+		t.Error("producer must get the real sidecar")
+	}
+	if hasGateOp(ops) {
+		t.Error("producer must NOT be gated")
+	}
+	if hasDropQuantumResourceOp(ops) {
+		t.Error("producer must KEEP its qpu resource (it runs the real submit)")
+	}
+	if e, ok := envOp(ops, CoordinationRoleEnv); !ok || e.Value != RoleProducer {
+		t.Errorf("producer must get %s=%s", CoordinationRoleEnv, RoleProducer)
+	}
+	if _, ok := envOp(ops, QuantumJobIDEnv); ok {
+		t.Error("producer must NOT get FLUENCE_QUANTUM_JOB_ID (it submits its own task)")
+	}
+	// FLUENCE_GANG_GROUP (the consumer group to ungate) is on the sidecar.
+	var sidecar *corev1.Container
+	for _, op := range ops {
+		if c, ok := op.Value.(corev1.Container); ok && c.Name == SidecarContainerName {
+			cc := c
+			sidecar = &cc
+		}
+	}
+	if sidecar == nil {
+		t.Fatal("no sidecar container on producer")
+	}
+	var gotGang bool
+	for _, e := range sidecar.Env {
+		if e.Name == GangGroupEnv && e.Value == group {
+			gotGang = true
+		}
+	}
+	if !gotGang {
+		t.Errorf("producer sidecar must get %s=%q", GangGroupEnv, group)
+	}
+	// Producer is its own group-of-one (minCount 1).
+	if mc, ok := mincount(t, cs, ns, group+ProducerGroupSuffix); !ok || mc != 1 {
+		t.Errorf("producer PodGroup %s minCount=%d (ok=%v), want 1", group+ProducerGroupSuffix, mc, ok)
+	}
+	// No separate submitter pod.
+	pods, _ := cs.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
+	if len(pods.Items) != 0 {
+		t.Errorf("producer is a member, not a spawned pod; found %d pods", len(pods.Items))
+	}
+}
+
+// Shared mode never creates an extra pod: a full gang (producer index 0 +
+// consumers) is N members, so the application runs exactly N times (not N+1 as
+// the old submitter-pod model did).
+func TestSharedGangNoSeparateSubmitterPod(t *testing.T) {
+	ns, group, job := "default", "qauto", "qauto-job"
+	par := int32(2)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	m.Mutate(context.Background(), sharedQPUPod(ns, group, "qauto-0", job, "0")) // producer
+	m.Mutate(context.Background(), sharedQPUPod(ns, group, "qauto-1", job, "1")) // consumer
+
+	pods, _ := cs.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
+	if len(pods.Items) != 0 {
+		t.Errorf("shared mode must not create any pods (no submitter); found %d", len(pods.Items))
+	}
+	// Both groups exist with the right minCounts.
+	if mc, ok := mincount(t, cs, ns, group+ProducerGroupSuffix); !ok || mc != 1 {
+		t.Errorf("producer group minCount=%d (ok=%v), want 1", mc, ok)
+	}
+	if mc, ok := mincount(t, cs, ns, group); !ok || mc != 1 {
+		t.Errorf("consumer group minCount=%d (ok=%v), want N-1=1", mc, ok)
+	}
+}
+
+// ── independent mode (default) ──────────────────────────────────────────────────
+
+// A grouped quantum pod with no coordination annotation is INDEPENDENT (default):
+// it does its own real submit, is not gated, carries no coordination role, and
+// triggers no group split and no submitter pod.
+func TestIndependentGroupedQuantumIsStandalone(t *testing.T) {
+	ns, group, job := "default", "indep", "indep-job"
+	par := int32(3)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), gangQPUPod(ns, group, "indep-0", job))
+
+	if hasGateOp(ops) {
+		t.Error("independent member must not be gated")
+	}
+	if _, ok := envOp(ops, CoordinationRoleEnv); ok {
+		t.Error("independent member must not get a coordination role env")
+	}
+	if _, ok := mincount(t, cs, ns, group+ProducerGroupSuffix); ok {
+		t.Error("independent mode must not create a producer group")
+	}
+	pods, _ := cs.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
+	if len(pods.Items) != 0 {
+		t.Error("independent mode must not spawn a submitter pod")
+	}
+}
+
+// A standalone quantum pod (no group, no owner → group of one) does its own real
+// submit: interceptor staged, but no gating, no coordination role, no submitter.
+func TestStandaloneQuantumIsReal(t *testing.T) {
+	ns := "default"
+	cs := fake.NewSimpleClientset()
+	m := &webhook.Mutator{Clientset: cs}
+
+	pod := qpuPod("fluence")
+	pod.Name = "solo"
+	pod.Namespace = ns
+
+	ops := m.Mutate(context.Background(), pod)
+	if hasGateOp(ops) {
+		t.Error("standalone quantum pod must not be gated")
+	}
+	if _, ok := envOp(ops, CoordinationRoleEnv); ok {
+		t.Error("standalone quantum pod must not get a coordination role env")
+	}
+	pods, _ := cs.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
+	if len(pods.Items) != 0 {
+		t.Error("standalone quantum pod must not spawn a submitter")
+	}
+}
+
+// Even with coordination=shared, a group of one (Job parallelism 1) has no
+// consumers to coordinate, so it falls through to the standalone real-submit path.
+func TestSharedGroupOfOneIsStandalone(t *testing.T) {
+	ns, group, job := "default", "one", "one-job"
+	par := int32(1)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), sharedQPUPod(ns, group, "one-0", job, "0"))
+	if hasGateOp(ops) {
+		t.Error("shared group-of-one must not be gated")
+	}
+	if _, ok := mincount(t, cs, ns, group+ProducerGroupSuffix); ok {
+		t.Error("shared group-of-one must not create a producer group")
+	}
+}
+
+// ── role + dependency ────────────────────────────────────────────────────
+
+// envOp returns the env var op with the given name, if present (covers both
+// single-EnvVar and []EnvVar op shapes).
+func envOp(ops []spec.Op, name string) (corev1.EnvVar, bool) {
+	for _, op := range ops {
+		switch v := op.Value.(type) {
+		case corev1.EnvVar:
+			if v.Name == name {
+				return v, true
+			}
+		case []corev1.EnvVar:
+			for _, e := range v {
+				if e.Name == name {
+					return e, true
+				}
+			}
+		}
+	}
+	return corev1.EnvVar{}, false
+}
+
+// annotationOps collects all annotation key=value pairs the ops would stamp.
+func annotationOps(ops []spec.Op) map[string]string {
+	out := map[string]string{}
+	for _, op := range ops {
+		// whole-map add: /metadata/annotations
+		if op.Path == "/metadata/annotations" {
+			if m, ok := op.Value.(map[string]string); ok {
+				for k, v := range m {
+					out[k] = v
+				}
+			}
+			continue
+		}
+		// single-key add: /metadata/annotations/<escaped-key> -> string value
+		const pfx = "/metadata/annotations/"
+		if len(op.Path) > len(pfx) && op.Path[:len(pfx)] == pfx {
+			if s, ok := op.Value.(string); ok {
+				key := unescapeJSONPointer(op.Path[len(pfx):])
+				out[key] = s
+			}
+		}
+	}
+	return out
+}
+
+// unescapeJSONPointer reverses escapeJSONPointer for assertion readability.
+func unescapeJSONPointer(s string) string {
+	// reverse order of escape: ~1 -> /, then ~0 -> ~
+	out := ""
+	for i := 0; i < len(s); i++ {
+		if s[i] == '~' && i+1 < len(s) {
+			switch s[i+1] {
+			case '1':
+				out += "/"
+				i++
+				continue
+			case '0':
+				out += "~"
+				i++
+				continue
+			}
+		}
+		out += string(s[i])
+	}
+	return out
+}
+
+// A shared-mode consumer is expressed as a general Dependency: gated, stamped
+// with depends-on-{kind,producer,gate}, and the producer is the <group>-producer
+// group.
+func TestQuantumConsumerIsGeneralDependency(t *testing.T) {
+	ns, group, job := "default", "depq", "depq-job"
+	par := int32(3)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), sharedQPUPod(ns, group, "depq-1", job, "1"))
+
+	if !hasGateOp(ops) {
+		t.Errorf("consumer not gated by the dependency (ops: %+v)", ops)
+	}
+	ann := annotationOps(ops)
+	if ann[DependsOnKindAnnotation] != DependencyKindQuantumSubmit {
+		t.Errorf("depends-on-kind=%q, want %q", ann[DependsOnKindAnnotation], DependencyKindQuantumSubmit)
+	}
+	if ann[DependsOnProducerAnnotation] != group+ProducerGroupSuffix {
+		t.Errorf("depends-on-producer=%q, want %q (the producer group)", ann[DependsOnProducerAnnotation], group+ProducerGroupSuffix)
+	}
+	if ann[DependsOnGateAnnotation] != QuantumGate {
+		t.Errorf("depends-on-gate=%q, want %q", ann[DependsOnGateAnnotation], QuantumGate)
+	}
+}
+
+// DependencyOf round-trips the stamped annotations back into a Dependency, so a
+// scheduler/sidecar observer can read what a gated pod waits for.
+func TestDependencyOfRoundTrip(t *testing.T) {
+	pod := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{
+		DependsOnKindAnnotation:     DependencyKindQuantumSubmit,
+		DependsOnProducerAnnotation: "grp",
+		DependsOnGateAnnotation:     QuantumGate,
+	}}}
+	d, ok := DependencyOf(pod)
+	if !ok || d.Kind != DependencyKindQuantumSubmit || d.Producer != "grp" || d.Gate != QuantumGate {
+		t.Errorf("DependencyOf=%+v ok=%v, want quantum-submit/grp/%s", d, ok, QuantumGate)
+	}
+	if _, ok := DependencyOf(&corev1.Pod{}); ok {
+		t.Errorf("DependencyOf on a pod with no dependency should be ok=false")
+	}
+}
+
+// The consumer is role-aware: it gets FLUENCE_COORDINATION_ROLE=consumer and the
+// producer's task id via the FLUENCE_QUANTUM_JOB_ID downward-API env, and it is
+// NOT staged with the interceptor (a consumer never submits, so it needs neither
+// the interceptor nor any faux flag). The user's script branches on the role.
+func TestQuantumConsumerStagedWithRole(t *testing.T) {
+	ns, group, job := "default", "roleq", "roleq-job"
+	par := int32(2)
+	cs := fake.NewSimpleClientset(&batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{Name: job, Namespace: ns},
+		Spec:       batchv1.JobSpec{Parallelism: &par, Completions: &par}})
+	m := &webhook.Mutator{Clientset: cs}
+
+	ops := m.Mutate(context.Background(), sharedQPUPod(ns, group, "roleq-1", job, "1"))
+
+	// Role surfaced to the container.
+	if e, ok := envOp(ops, CoordinationRoleEnv); !ok || e.Value != RoleConsumer {
+		t.Errorf("consumer missing %s=%s (got %+v, ok=%v)", CoordinationRoleEnv, RoleConsumer, e, ok)
+	}
+
+	// A consumer never submits, so it is NOT staged with the interceptor.
+	if _, ok := envOp(ops, "PYTHONPATH"); ok {
+		t.Error("consumer must NOT be staged with the interceptor (it does not submit)")
+	}
+
+	// Producer's task id sourced from the annotation the ungating sidecar stamps.
+	e, ok := envOp(ops, QuantumJobIDEnv)
+	if !ok {
+		t.Fatalf("consumer missing %s env", QuantumJobIDEnv)
+	}
+	if e.ValueFrom == nil || e.ValueFrom.FieldRef == nil ||
+		e.ValueFrom.FieldRef.FieldPath != "metadata.annotations['"+QuantumJobIDAnnotation+"']" {
+		t.Errorf("%s should be a downward-API ref to %s, got %+v", QuantumJobIDEnv, QuantumJobIDAnnotation, e)
+	}
+}
+
+// Classical override below the replica count: group-size=2 on a gang owned by a
+// Job(parallelism=5) must yield minCount=2 (the override), not 5. With a cluster
+// sized to 2, the gang reaches quorum and runs; if the override were dropped the
+// gang would wait forever for 5 (the e2e hang that fails CI).
+func TestClassicalOverrideBelowReplicaCount(t *testing.T) {
+	ns, group, job := "default", "ovr2", "ovr2-job"
+	pod := cpuPod("fluence")
+	pod.Namespace = ns
+	pod.Labels = map[string]string{webhook.GroupLabel: group}
+	pod.Annotations = map[string]string{webhook.GroupSizeAnnotation: "2"}
+	ownedBy(pod, "Job", job)
+
+	got := minCountWithClient(t, pod, jobWithParallelism(ns, job, 5))
+	if got != 2 {
+		t.Errorf("override below replicas: minCount=%d, want 2 (override wins over Job=5)", got)
+	}
+}
+
+// ── sidecar wiring ──────────────────────────────────────────────────────────────
+
+// The sidecar inherits the workload's secret/configMap-sourced credentials so it
+// can talk to the same backend, but NOT plain-value env. (Moved from the core
+// webhook package: sidecar construction is now quantum-owned.)
+func TestSidecarInheritsWorkloadSecretEnv(t *testing.T) {
+	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset()}
+	pod := &corev1.Pod{
+		Spec: corev1.PodSpec{
+			Containers: []corev1.Container{{
+				Name: "app",
+				Env: []corev1.EnvVar{
+					{Name: "PLAIN_VALUE", Value: "x"}, // plain value: NOT copied
+					{Name: "AWS_ACCESS_KEY_ID", ValueFrom: &corev1.EnvVarSource{
+						SecretKeyRef: &corev1.SecretKeySelector{
+							LocalObjectReference: corev1.LocalObjectReference{Name: "aws-braket-credentials"},
+							Key:                  "AWS_ACCESS_KEY_ID",
+						}}},
+				},
+			}},
+		},
+	}
+	ops := sidecarContainerOps(m, pod, false, nil)
+	var sidecar *corev1.Container
+	for _, op := range ops {
+		if c, ok := op.Value.(corev1.Container); ok && c.Name == SidecarContainerName {
+			sidecar = &c
+		}
+	}
+	if sidecar == nil {
+		t.Fatal("no sidecar container added")
+	}
+	var gotSecret, gotPlain bool
+	for _, e := range sidecar.Env {
+		if e.Name == "AWS_ACCESS_KEY_ID" && e.ValueFrom != nil && e.ValueFrom.SecretKeyRef != nil {
+			gotSecret = true
+		}
+		if e.Name == "PLAIN_VALUE" {
+			gotPlain = true
+		}
+	}
+	if !gotSecret {
+		t.Error("sidecar should inherit the workload's secret-sourced AWS creds")
+	}
+	if gotPlain {
+		t.Error("sidecar should NOT copy plain-value workload env")
+	}
+}
+
+// The producer member of a shared gang requests the quantum resource (it runs the
+// real submit). Sanity check that the helper builds a quantum pod.
+func TestSharedProducerRequestsQuantumResource(t *testing.T) {
+	p := sharedQPUPod("default", "g", "g-0", "g-job", "0")
+	if !spec.PodRequestsResource(p, QuantumResource) {
+		t.Error("producer must request the quantum resource (it runs the real submit)")
+	}
+}
diff --git a/pkg/webhook/handlers/registry_test.go b/pkg/webhook/handlers/registry_test.go
new file mode 100644
index 0000000..346d786
--- /dev/null
+++ b/pkg/webhook/handlers/registry_test.go
@@ -0,0 +1,82 @@
+/*
+Copyright 2024 Lawrence Livermore National Security, LLC
+ (c.f. AUTHORS, NOTICE.LLNS, COPYING)
+SPDX-License-Identifier: Apache-2.0
+*/
+
+// Registry behavior: dispatch order comes from the active handler list (not a
+// per-handler Order), and the list both selects and orders handlers.
+package handlers
+
+import (
+	"context"
+	"testing"
+
+	"github.com/converged-computing/fluence/pkg/webhook"
+	"github.com/converged-computing/fluence/pkg/webhook/spec"
+
+	"k8s.io/client-go/kubernetes/fake"
+)
+
+// The default active order ships gang LAST so it only applies default gang
+// sizing when no earlier handler shaped the gang.
+func TestDefaultOrderGangLast(t *testing.T) {
+	defer webhook.SetActiveHandlers(nil)
+	active, _ := webhook.SetActiveHandlers(nil) // restore + read default
+	if len(active) == 0 {
+		t.Fatal("no active handlers")
+	}
+	if active[len(active)-1] != "gang" {
+		t.Errorf("gang must be last in default order; got %v", active)
+	}
+	// default order is exactly fluxion, quantum, gang
+	want := []string{"fluxion", "quantum", "gang"}
+	if len(active) != len(want) {
+		t.Fatalf("default order = %v, want %v", active, want)
+	}
+	for i := range want {
+		if active[i] != want[i] {
+			t.Errorf("default order = %v, want %v", active, want)
+			break
+		}
+	}
+}
+
+// The active list IS the order: passing a custom order reorders dispatch, and
+// unknown names are reported, not silently kept.
+func TestActiveListSetsOrderAndReportsUnknown(t *testing.T) {
+	defer webhook.SetActiveHandlers(nil)
+	active, unknown := webhook.SetActiveHandlers([]string{"gang", "fluxion", "bogus"})
+	if len(active) != 2 || active[0] != "gang" || active[1] != "fluxion" {
+		t.Errorf("active = %v, want [gang fluxion] in that order", active)
+	}
+	if len(unknown) != 1 || unknown[0] != "bogus" {
+		t.Errorf("unknown = %v, want [bogus]", unknown)
+	}
+}
+
+// Dropping a handler from the list disables it: a quantum pod with quantum
+// omitted gets no interceptor ops (only fluxion/gang act).
+func TestOmittedHandlerDoesNotDispatch(t *testing.T) {
+	defer webhook.SetActiveHandlers(nil)
+	m := &webhook.Mutator{Clientset: fake.NewSimpleClientset()}
+
+	webhook.SetActiveHandlers(nil) // default: quantum present
+	if !hasInterceptor(m.Mutate(context.Background(), qpuPod("fluence"))) {
+		t.Fatal("with quantum active, expected interceptor (init container) ops")
+	}
+
+	webhook.SetActiveHandlers([]string{"fluxion", "gang"}) // quantum omitted
+	if hasInterceptor(m.Mutate(context.Background(), qpuPod("fluence"))) {
+		t.Error("with quantum omitted, interceptor ops must NOT be present")
+	}
+}
+
+func hasInterceptor(ops []spec.Op) bool {
+	for _, op := range ops {
+		if op.Path == "/spec/initContainers" || op.Path == "/spec/initContainers/-" {
+			return true
+		}
+	}
+	return false
+}
diff --git a/pkg/webhook/handlers/sidecar.go b/pkg/webhook/handlers/sidecar.go
new file mode 100644
index 0000000..d105a7c
--- /dev/null
+++ b/pkg/webhook/handlers/sidecar.go
@@ -0,0 +1,57 @@
+package handlers
+
+import (
+	"context"
+
+	"github.com/converged-computing/fluence/pkg/webhook"
+	"github.com/converged-computing/fluence/pkg/webhook/spec"
+
+	corev1 "k8s.io/api/core/v1"
+)
+
+// Sidecar is the capability a handler uses to attach a coordination sidecar to a
+// pod. It is NOT part of the webhook core's MutatorAPI: only handlers that need
+// a sidecar (today, quantum) depend on it, and a handler may supply its own
+// implementation to customize delivery. The default implementation
+// (coreSidecar) delegates to the webhook core's interceptor/sidecar ops, which
+// remain the staging mechanism shared by any sidecar-using handler.
+//
+// This is the seam your design calls for: "a general sidecar interface that can
+// be used across handlers and customized by the quantum [handler]". A future
+// custom-resource handler can implement Sidecar differently (different image,
+// env, gating) without touching the core or other handlers.
+type Sidecar interface {
+	// EnsureRBAC provisions the per-namespace ServiceAccount/Role/Binding the
+	// sidecar needs to read/patch pods and podgroups.
+	EnsureRBAC(ctx context.Context, namespace string)
+	// InterceptorOps stages the in-pod interceptor (Model C) into the workload
+	// containers (init container + shared volume on PYTHONPATH).
+	InterceptorOps(pod *corev1.Pod) []spec.Op
+	// ContainerOps adds the sidecar container. observe=true selects observe-only
+	// telemetry mode (no ungating). extraEnv carries handler-computed,
+	// domain-specific env (e.g. the quantum handler's FLUENCE_EXPECTED_WORKERS =
+	// N-1 and FLUENCE_WORKER_GROUP_BASE) so the core never has to know about
+	// leader/worker concepts — the handler that owns the split owns those values.
+	ContainerOps(pod *corev1.Pod, observe bool, extraEnv []corev1.EnvVar) []spec.Op
+}
+
+// coreSidecar is the default Sidecar. It delegates to the quantum-owned sidecar
+// implementation (see sidecar_impl.go), which uses only the generic MutatorAPI
+// (Client, InjectedEnv). The webhook core no longer carries any sidecar logic; a
+// custom handler could supply its own Sidecar with a different container/image.
+type coreSidecar struct{ m webhook.MutatorAPI }
+
+func (s coreSidecar) EnsureRBAC(ctx context.Context, namespace string) {
+	ensureSidecarRBAC(ctx, s.m, namespace)
+}
+func (s coreSidecar) InterceptorOps(pod *corev1.Pod) []spec.Op {
+	return interceptorOps(pod)
+}
+func (s coreSidecar) ContainerOps(pod *corev1.Pod, observe bool, extraEnv []corev1.EnvVar) []spec.Op {
+	return sidecarContainerOps(s.m, pod, observe, extraEnv)
+}
+
+// sidecarFor returns the Sidecar a handler should use. Centralized so the choice
+// of implementation (and any future per-handler customization) lives in one
+// place. Today every sidecar-using handler gets the core-backed default.
+func sidecarFor(m webhook.MutatorAPI) Sidecar { return coreSidecar{m: m} }
diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go
index 20a7288..b39bec1 100644
--- a/pkg/webhook/webhook.go
+++ b/pkg/webhook/webhook.go
@@ -1,11 +1,11 @@
 // Package webhook is fluence's mutating admission webhook.
 //
 // The core here is domain-agnostic plumbing: it owns the Mutator, the handler
-// dispatcher, per-namespace PodGroup/RBAC provisioning, the Model C package
-// staging (init container + shared volume on PYTHONPATH), the HTTP entrypoint,
-// and self-managed TLS. It knows nothing about quantum, Braket, gate names, or
-// observe labels — that policy lives entirely in the handlers (pkg/webhook/
-// handlers), which self-register via Register().
+// dispatcher, per-namespace PodGroup provisioning, the HTTP entrypoint, and
+// self-managed TLS. It knows nothing about quantum, Braket, gate names, sidecars,
+// RBAC, or interceptor staging — that policy and machinery lives entirely in the
+// handlers (pkg/webhook/handlers), which self-register via Register() and perform
+// their own create/edit side-effects through the generic MutatorAPI.
 //
 // The webhook self-manages TLS via a self-signed CA patched into the
 // MutatingWebhookConfiguration caBundle at startup.
@@ -32,9 +32,7 @@ import (
 
 	admissionv1 "k8s.io/api/admission/v1"
 	corev1 "k8s.io/api/core/v1"
-	rbacv1 "k8s.io/api/rbac/v1"
 	schedulingv1alpha2 "k8s.io/api/scheduling/v1alpha2"
-	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/client-go/kubernetes"
@@ -52,34 +50,12 @@ const (
 	// meaning to it (a handler decides what a group means).
 	GroupLabel = "fluence.flux-framework.org/group"
 
-	// LeaderAnnotation records the admission-order leader on a PodGroup.
-	LeaderAnnotation = "fluence.flux-framework.org/leader"
-
-	// RoleAnnotation, set by the workload on each pod, explicitly declares the
-	// pod's gang role ("leader" or "worker"). When present it is AUTHORITATIVE:
-	// the quantum handler gates workers and gives the leader the sidecar based
-	// on this value, instead of inferring the leader by admission order. The
-	// same value is injected into the container env as FLUENCE_ROLE so the
-	// application reads its role from the same source of truth Fluence used.
-	// When absent, role falls back to admission order (backwards compatible).
-	RoleAnnotation = "fluence.flux-framework.org/role"
-
-	// ExpectedWorkersAnnotation, set by the workload on the leader pod, tells the
-	// sidecar how many gated workers to wait for before ungating. The count is
-	// known at admission (the workload declares it) even though worker names are
-	// not, so it travels as a static sidecar env var. The core treats it as an
-	// opaque string and ascribes no meaning to it beyond propagation.
-	ExpectedWorkersAnnotation = "fluence.flux-framework.org/expected-workers"
-
-	// Sidecar/staging infrastructure (generic — not quantum-specific).
-	SidecarImage          = "ghcr.io/converged-computing/fluence-sidecar:latest"
-	SidecarServiceAccount = "fluence-sidecar"
-
-	// StageVolumeName / StageMountPath: the shared emptyDir the init container
-	// stages the fluence Python package into, mounted into the user container and
-	// prepended to PYTHONPATH (Model C delivery).
-	StageVolumeName = "fluence-pkg"
-	StageMountPath  = "/opt/fluence-staged"
+	// GroupSizeAnnotation is the gang member count N, set by the workload on each
+	// pod. It is the authoritative override for the PodGroup gang minCount when
+	// the size cannot (or should not) be derived from the owning controller — and
+	// for loose grouped pods where counting at admission is unreliable. The core
+	// treats it as an opaque integer string.
+	GroupSizeAnnotation = "fluence.flux-framework.org/group-size"
 )
 
 // ── Mutator ─────────────────────────────────────────────────────────────────────
@@ -87,31 +63,14 @@ const (
 type Mutator struct {
 	AttributeKeys []string
 	Clientset     kubernetes.Interface
-	SidecarImage  string
 }
 
 // compile-time check that *Mutator satisfies the handler capability interface.
 var _ MutatorAPI = (*Mutator)(nil)
 
-func (m *Mutator) sidecarImage() string {
-	if m.SidecarImage != "" {
-		return m.SidecarImage
-	}
-	return SidecarImage
-}
-
 // GroupName returns the value of GroupLabel on the pod, or "".
 func GroupName(pod *corev1.Pod) string { return spec.Label(pod, GroupLabel) }
 
-// Role returns the explicit gang role declared on the pod via RoleAnnotation
-// ("leader"/"worker"), or "" if unset (caller falls back to admission order).
-func Role(pod *corev1.Pod) string { return spec.Annotation(pod, RoleAnnotation) }
-
-func resourceQuantity(s string) *resource.Quantity {
-	q := resource.MustParse(s)
-	return &q
-}
-
 // ── MutatorAPI: capabilities exposed to handlers ────────────────────────────────
 
 // Client implements MutatorAPI: returns the Kubernetes client (nil in tests).
@@ -138,29 +97,13 @@ func (m *Mutator) EnvVarNames() []string {
 	return names
 }
 
-// PodGroupLeader returns the recorded admission-order leader for the group, or
-// "". Retries briefly to absorb the concurrent leader/worker admission race.
-func (m *Mutator) PodGroupLeader(ctx context.Context, namespace, group string) string {
-	if m.Clientset == nil || group == "" {
-		return ""
-	}
-	for i := 0; i < 3; i++ {
-		pg, err := m.Clientset.SchedulingV1alpha2().PodGroups(namespace).Get(ctx, group, metav1.GetOptions{})
-		if err != nil {
-			return ""
-		}
-		if pg.Annotations != nil && pg.Annotations[LeaderAnnotation] != "" {
-			return pg.Annotations[LeaderAnnotation]
-		}
-		if i < 2 {
-			time.Sleep(100 * time.Millisecond)
-		}
+// EnsurePodGroup creates a Fluence-owned PodGroup with gang minCount = the full
+// gang size N (the whole group schedules atomically) if absent. minCount<=0
+// falls back to 1.
+func (m *Mutator) EnsurePodGroup(ctx context.Context, namespace, group, leaderPod string, minCount int32) {
+	if minCount <= 0 {
+		minCount = 1
 	}
-	return ""
-}
-
-// EnsurePodGroup creates a Fluence-owned PodGroup (minCount:1) if absent.
-func (m *Mutator) EnsurePodGroup(ctx context.Context, namespace, group, leaderPod string) {
 	if m.Clientset == nil {
 		return
 	}
@@ -179,205 +122,17 @@ func (m *Mutator) EnsurePodGroup(ctx context.Context, namespace, group, leaderPo
 		},
 		Spec: schedulingv1alpha2.PodGroupSpec{
 			SchedulingPolicy: schedulingv1alpha2.PodGroupSchedulingPolicy{
-				Gang: &schedulingv1alpha2.GangSchedulingPolicy{MinCount: 1},
+				Gang: &schedulingv1alpha2.GangSchedulingPolicy{MinCount: minCount},
 			},
 		},
 	}
 	if _, err := m.Clientset.SchedulingV1alpha2().PodGroups(namespace).Create(ctx, pg, metav1.CreateOptions{}); err != nil {
 		log.Printf("[fluence-webhook] could not create PodGroup %s/%s: %v", namespace, group, err)
 	} else {
-		log.Printf("[fluence-webhook] created PodGroup %s/%s (minCount=1)", namespace, group)
-	}
-}
-
-// RecordLeader records leaderPod as the group's admission-order leader.
-func (m *Mutator) RecordLeader(ctx context.Context, namespace, group, leaderPod string) {
-	if m.Clientset == nil || group == "" {
-		return
-	}
-	patch := fmt.Sprintf(`{"metadata":{"annotations":{%q:%q}}}`, LeaderAnnotation, leaderPod)
-	if _, err := m.Clientset.SchedulingV1alpha2().PodGroups(namespace).Patch(
-		ctx, group, types.MergePatchType, []byte(patch), metav1.PatchOptions{}); err != nil {
-		log.Printf("[fluence-webhook] could not record leader on PodGroup %s/%s: %v", namespace, group, err)
-	}
-}
-
-// EnsureSidecarRBAC provisions the per-namespace ServiceAccount/Role/RoleBinding
-// the sidecar uses to patch pods and read PodGroups.
-func (m *Mutator) EnsureSidecarRBAC(ctx context.Context, namespace string) {
-	if m.Clientset == nil {
-		return
-	}
-	lbl := map[string]string{"app": "fluence-sidecar"}
-
-	if _, err := m.Clientset.CoreV1().ServiceAccounts(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
-		sa := &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl}}
-		if _, err := m.Clientset.CoreV1().ServiceAccounts(namespace).Create(ctx, sa, metav1.CreateOptions{}); err != nil {
-			log.Printf("[fluence-webhook] could not create ServiceAccount %s/%s: %v", namespace, SidecarServiceAccount, err)
-		}
-	}
-	if _, err := m.Clientset.RbacV1().Roles(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
-		role := &rbacv1.Role{
-			ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl},
-			Rules: []rbacv1.PolicyRule{
-				{APIGroups: []string{""}, Resources: []string{"pods"}, Verbs: []string{"get", "list", "patch", "update"}},
-				{APIGroups: []string{"scheduling.k8s.io"}, Resources: []string{"podgroups"}, Verbs: []string{"get", "list"}},
-			},
-		}
-		if _, err := m.Clientset.RbacV1().Roles(namespace).Create(ctx, role, metav1.CreateOptions{}); err != nil {
-			log.Printf("[fluence-webhook] could not create Role %s/%s: %v", namespace, SidecarServiceAccount, err)
-		}
-	}
-	if _, err := m.Clientset.RbacV1().RoleBindings(namespace).Get(ctx, SidecarServiceAccount, metav1.GetOptions{}); err != nil {
-		rb := &rbacv1.RoleBinding{
-			ObjectMeta: metav1.ObjectMeta{Name: SidecarServiceAccount, Namespace: namespace, Labels: lbl},
-			Subjects:   []rbacv1.Subject{{Kind: "ServiceAccount", Name: SidecarServiceAccount, Namespace: namespace}},
-			RoleRef:    rbacv1.RoleRef{APIGroup: "rbac.authorization.k8s.io", Kind: "Role", Name: SidecarServiceAccount},
-		}
-		if _, err := m.Clientset.RbacV1().RoleBindings(namespace).Create(ctx, rb, metav1.CreateOptions{}); err != nil {
-			log.Printf("[fluence-webhook] could not create RoleBinding %s/%s: %v", namespace, SidecarServiceAccount, err)
-		}
+		log.Printf("[fluence-webhook] created PodGroup %s/%s (minCount=%d)", namespace, group, minCount)
 	}
 }
 
-// InterceptorOps implements Model C delivery. It injects an init container (the
-// sidecar image) that stages the fluence Python package into a shared emptyDir,
-// mounts that volume into every Fluxion-resource container, and prepends it to
-// PYTHONPATH plus sets FLUENCE_POD_UID. Python auto-imports the staged
-// sitecustomize on startup, which runs the interceptor — no user code changes,
-// no PYTHONSTARTUP (which only fires interactively), no vendor SDK on our side.
-func (m *Mutator) InterceptorOps(pod *corev1.Pod) []spec.Op {
-	var ops []spec.Op
-
-	// Shared volume.
-	vol := corev1.Volume{Name: StageVolumeName, VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}}
-	if len(pod.Spec.Volumes) == 0 {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/volumes", Value: []corev1.Volume{vol}})
-	} else {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/volumes/-", Value: vol})
-	}
-
-	// Init container that stages the package into the shared volume.
-	//
-	// Fail-soft: the interceptor is best-effort, so its delivery must be too. We
-	// wrap the stage command so a failure (bad image, missing python, package
-	// problem) leaves the shared volume empty and exits 0 rather than blocking
-	// the user's pod with Init:Error. An empty staged dir simply means the
-	// interceptor does not run — the user application is unaffected. (This also
-	// lets CI use a minimal placeholder sidecar image for placement-only tests.)
-	initc := corev1.Container{
-		Name:            "fluence-stage",
-		Image:           m.sidecarImage(),
-		ImagePullPolicy: corev1.PullAlways,
-		Command: []string{"sh", "-c",
-			fmt.Sprintf("python -m fluence.stage %s || echo '[fluence] staging skipped (interceptor unavailable)'", StageMountPath)},
-		VolumeMounts: []corev1.VolumeMount{{Name: StageVolumeName, MountPath: StageMountPath}},
-	}
-	if len(pod.Spec.InitContainers) == 0 {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/initContainers", Value: []corev1.Container{initc}})
-	} else {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/initContainers/-", Value: initc})
-	}
-
-	// Mount the staged volume + set PYTHONPATH and FLUENCE_POD_UID on each
-	// Fluxion-resource container.
-	mount := corev1.VolumeMount{Name: StageVolumeName, MountPath: StageMountPath, ReadOnly: true}
-	pythonpath := corev1.EnvVar{Name: "PYTHONPATH", Value: StageMountPath}
-	uid := spec.FieldEnv("FLUENCE_POD_UID", "metadata.uid")
-	for i, c := range pod.Spec.Containers {
-		if !spec.RequestsFluxionResource(c) {
-			continue
-		}
-		if len(c.VolumeMounts) == 0 {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/volumeMounts", i), Value: []corev1.VolumeMount{mount}})
-		} else {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/volumeMounts/-", i), Value: mount})
-		}
-		if !spec.HasEnv(c, "PYTHONPATH") {
-			if len(c.Env) == 0 {
-				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env", i), Value: []corev1.EnvVar{pythonpath}})
-				pod.Spec.Containers[i].Env = []corev1.EnvVar{pythonpath}
-			} else {
-				ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: pythonpath})
-				pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, pythonpath)
-			}
-		}
-		if !spec.HasEnv(c, "FLUENCE_POD_UID") {
-			ops = append(ops, spec.Op{Op: "add", Path: fmt.Sprintf("/spec/containers/%d/env/-", i), Value: uid})
-			pod.Spec.Containers[i].Env = append(pod.Spec.Containers[i].Env, uid)
-		}
-	}
-	return ops
-}
-
-// SidecarContainerOps adds the fluence-sidecar container and sets its
-// ServiceAccount. observe=true selects observe-only telemetry mode.
-func (m *Mutator) SidecarContainerOps(pod *corev1.Pod, observe bool) []spec.Op {
-	var ops []spec.Op
-	// The sidecar resolves its vendor provider at runtime from the backend the
-	// scheduler chose. It gets the same FLUXION_* contract as the workload
-	// containers (FLUXION_BACKEND + attribute vars like FLUXION_VENDOR), sourced
-	// via the downward API from the scheduler's annotations — so the values
-	// resolve once the scheduler writes them, after admission.
-	env := []corev1.EnvVar{
-		spec.FieldEnv("FLUENCE_POD_UID", "metadata.uid"),
-		spec.FieldEnv("FLUENCE_POD_NAME", "metadata.name"),
-		spec.FieldEnv("FLUENCE_NAMESPACE", "metadata.namespace"),
-		spec.FieldEnv("FLUENCE_GROUP", "metadata.labels['"+GroupLabel+"']"),
-	}
-	env = append(env, m.InjectedEnv()...)
-	if observe {
-		env = append(env, corev1.EnvVar{Name: "FLUENCE_OBSERVE", Value: "true"})
-	}
-	// The gang size is known at admission (the leader carries it), even though
-	// the worker NAMES are not yet. Propagate the expected worker count to the
-	// sidecar as a static env var so it can wait until it has discovered that
-	// many gated workers before ungating, rather than ungating a partial set.
-	// Read from a generic annotation so the core stays domain-agnostic; the
-	// workload manifest sets it (e.g. from its own N_WORKERS).
-	if pod.Annotations != nil {
-		if n := pod.Annotations[ExpectedWorkersAnnotation]; n != "" {
-			env = append(env, corev1.EnvVar{Name: "FLUENCE_EXPECTED_WORKERS", Value: n})
-		}
-	}
-	// The sidecar talks to the same backend the workload does (e.g. to find the
-	// task and read its queue position), so it needs the same credentials. Copy
-	// the workload container's secret/configmap-sourced env onto the sidecar.
-	// This stays domain-agnostic: we don't know or name the provider's creds, we
-	// just propagate whatever the workload pulls from a secret/configMap (e.g.
-	// AWS_*, IBM tokens). Existing FLUENCE_/FLUXION_ names are not overwritten.
-	if len(pod.Spec.Containers) > 0 {
-		have := map[string]bool{}
-		for _, e := range env {
-			have[e.Name] = true
-		}
-		for _, e := range pod.Spec.Containers[0].Env {
-			if have[e.Name] || e.ValueFrom == nil {
-				continue
-			}
-			if e.ValueFrom.SecretKeyRef != nil || e.ValueFrom.ConfigMapKeyRef != nil {
-				env = append(env, e)
-			}
-		}
-	}
-	sidecar := corev1.Container{
-		Name: "fluence-sidecar", Image: m.sidecarImage(), ImagePullPolicy: corev1.PullAlways,
-		Env: env,
-		Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{
-			corev1.ResourceCPU: *resourceQuantity("100m"), corev1.ResourceMemory: *resourceQuantity("256Mi"),
-		}},
-	}
-	if len(pod.Spec.Containers) == 0 {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/containers", Value: []corev1.Container{sidecar}})
-	} else {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/containers/-", Value: sidecar})
-	}
-	if pod.Spec.ServiceAccountName == "" || pod.Spec.ServiceAccountName == "default" {
-		ops = append(ops, spec.Op{Op: "add", Path: "/spec/serviceAccountName", Value: SidecarServiceAccount})
-	}
-	return ops
-}
-
 // ── Dispatcher ──────────────────────────────────────────────────────────────────
 
 // Mutate dispatches the pod to every registered handler and concatenates the
diff --git a/pkg/webhook/webhook_test.go b/pkg/webhook/webhook_test.go
index 26983d4..9af6c9c 100644
--- a/pkg/webhook/webhook_test.go
+++ b/pkg/webhook/webhook_test.go
@@ -2,8 +2,6 @@ package webhook
 
 import (
 	"testing"
-
-	corev1 "k8s.io/api/core/v1"
 )
 
 // EnvVarNames returns the FLUXION_* contract names (used by the scheduler plugin
@@ -22,47 +20,3 @@ func TestEnvVarNames(t *testing.T) {
 		}
 	}
 }
-
-func TestSidecarInheritsWorkloadSecretEnv(t *testing.T) {
-	m := &Mutator{}
-	pod := &corev1.Pod{
-		Spec: corev1.PodSpec{
-			Containers: []corev1.Container{{
-				Name: "gang",
-				Env: []corev1.EnvVar{
-					{Name: "GANG_ROLE", Value: "leader"}, // plain value: NOT copied
-					{Name: "AWS_ACCESS_KEY_ID", ValueFrom: &corev1.EnvVarSource{
-						SecretKeyRef: &corev1.SecretKeySelector{
-							LocalObjectReference: corev1.LocalObjectReference{Name: "aws-braket-credentials"},
-							Key:                  "AWS_ACCESS_KEY_ID",
-						}}},
-				},
-			}},
-		},
-	}
-	ops := m.SidecarContainerOps(pod, false)
-	var sidecar *corev1.Container
-	for _, op := range ops {
-		if c, ok := op.Value.(corev1.Container); ok && c.Name == "fluence-sidecar" {
-			sidecar = &c
-		}
-	}
-	if sidecar == nil {
-		t.Fatal("no sidecar container added")
-	}
-	var gotSecret, gotPlain bool
-	for _, e := range sidecar.Env {
-		if e.Name == "AWS_ACCESS_KEY_ID" && e.ValueFrom != nil && e.ValueFrom.SecretKeyRef != nil {
-			gotSecret = true
-		}
-		if e.Name == "GANG_ROLE" {
-			gotPlain = true
-		}
-	}
-	if !gotSecret {
-		t.Error("sidecar should inherit the workload's secret-sourced AWS creds")
-	}
-	if gotPlain {
-		t.Error("sidecar should NOT copy plain-value workload env like GANG_ROLE")
-	}
-}
diff --git a/python/Dockerfile b/python/Dockerfile
index 5cff209..03bf153 100644
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -1,14 +1,6 @@
 # Fluence quantum coordination sidecar image.
-#
-# Bakes the `fluence` Python package in, so the SAME image serves three roles
-# (versions locked together — they are built from this one source tree):
-#   1. sidecar container   — runs `fluence-sidecar` (the coordination loop)
-#   2. init container      — runs `python -m fluence.stage <dir>` to copy the
-#                            pure-Python package + sitecustomize into a shared
-#                            volume that the webhook mounts onto the user
-#                            container's PYTHONPATH (Model C delivery)
-#   3. (the staged copy)   — the user container imports the staged package via
-#                            sitecustomize; no install required in the user image
+# TODO organize into subdirectories when we have >1 image
+#sitecustomize; no install required in the user image
 FROM python:3.11-slim
 
 LABEL org.opencontainers.image.source="https://github.com/converged-computing/fluence"
@@ -27,11 +19,8 @@ COPY . /app
 # Install the package with the vendor SDKs the SIDECAR needs for its own API
 # calls (task discovery / queue polling). The interceptor staged into the user
 # container carries NONE of these — it patches whatever SDK the user already has.
-RUN pip install --no-cache-dir ".[all]"
+RUN pip install --no-cache-dir ".[all]" && ln -s $(which python3) /usr/bin/python
 
-ENV FLUENCE_TASK_DISCOVERY_TIMEOUT=300
+ENV FLUENCE_TASK_DISCOVERY_TIMEOUT=300000
 ENV FLUENCE_POLL_INTERVAL=30
-
-# Default entrypoint is the sidecar loop; the init container overrides the
-# command with `python -m fluence.stage <dir>`.
 CMD ["fluence-sidecar"]
diff --git a/python/fluence/providers/base.py b/python/fluence/providers/base.py
index dca4429..561bca2 100644
--- a/python/fluence/providers/base.py
+++ b/python/fluence/providers/base.py
@@ -80,7 +80,7 @@ def find_my_task(self, pod_uid: str, backend: str, timeout: int) -> "Task | None
         raise NotImplementedError
 
     def is_ready_to_ungate(self, task: "Task") -> bool:
-        """True when workers should be ungated — queue position == 1 or the task
+        """True when the gang should be ungated — queue position == 1 or the task
         is already RUNNING/terminal. Always implementable."""
         raise NotImplementedError
 
@@ -134,4 +134,4 @@ def resolve_from_env() -> "Provider | None":
     for k, v in os.environ.items():
         if k.startswith("FLUXION_"):
             attrs[k[len("FLUXION_"):].lower()] = v
-    return resolve(attrs)
+    return resolve(attrs)
\ No newline at end of file
diff --git a/python/fluence/providers/braket.py b/python/fluence/providers/braket.py
index 23bd9fc..d6e6ea9 100644
--- a/python/fluence/providers/braket.py
+++ b/python/fluence/providers/braket.py
@@ -51,6 +51,11 @@ def install_interceptor(self, pod_uid: str) -> bool:
         original_run = AwsDevice.run
 
         def patched_run(self, task_specification, *args, **kwargs):
+            # Tag the submission with the pod-uid so the sidecar can find this task
+            # in the queue. The interceptor is staged only on pods that actually
+            # submit (producers and standalone/independent pods); consumers are
+            # role-aware (FLUENCE_COORDINATION_ROLE=consumer) and never call run(),
+            # so there is no submit to intercept and no faux mode to select.
             if pod_uid:
                 tags = kwargs.get("tags", {})
                 tags[TAG_KEY] = pod_uid
@@ -226,4 +231,4 @@ def job_id(self, task: BraketTask) -> str:
 
 
 PROVIDER = BraketProvider()
-register(PROVIDER)
+register(PROVIDER)
\ No newline at end of file
diff --git a/python/fluence/sidecar.py b/python/fluence/sidecar.py
index 098574b..d0724e5 100644
--- a/python/fluence/sidecar.py
+++ b/python/fluence/sidecar.py
@@ -1,18 +1,19 @@
 """
 fluence.sidecar — provider-agnostic quantum coordination sidecar main loop.
 
-Injected by the Fluence webhook into the quantum-submitting pod. Resolves its
-vendor at runtime from the backend annotation, discovers the task the user
-application submitted (tagged by the interceptor), polls readiness, and either
-ungates gated workers (gang mode) or just logs the queue-position series
-(observe-only mode).
+Injected by the Fluence webhook into the one-off SUBMITTER pod (gang + submitter
+model — there is no leader/worker split). Resolves its vendor at runtime from the
+backend annotation, discovers the task the user application submitted (tagged by
+the interceptor), polls readiness, and either ungates the gated GANG group (gang
+mode) or just logs the queue-position series (observe-only mode).
 
 Entry point: `fluence-sidecar` console script (see pyproject.toml) -> main().
 
 Environment (injected by the Fluence webhook):
   FLUENCE_POD_UID                 UID of this pod (matches interceptor tag)
   FLUENCE_NAMESPACE               Kubernetes namespace
-  FLUENCE_GATED_PODS              comma-separated gated worker names
+  FLUENCE_GANG_GROUP              group label of the gated gang to ungate
+  FLUENCE_GATED_PODS              optional explicit comma-separated gang pod names
   FLUENCE_OBSERVE                 "true" for observe-only telemetry mode
   FLUXION_BACKEND / FLUXION_VENDOR  scheduler-chosen backend / vendor
   FLUENCE_TASK_DISCOVERY_TIMEOUT  seconds to wait for discovery (default 300)
@@ -30,6 +31,7 @@
 from fluence.ungate import ungate_pods, gated_pods_from_env, namespace_from_env, wait_for_gated_pods
 
 
+
 def _poll(provider, task, poll_interval, ungate):
     mode = "gang" if ungate else "observe-only"
     log(f"{mode} mode: polling queue position")
@@ -52,18 +54,22 @@ def main():
     pod_uid = os.environ.get("FLUENCE_POD_UID", "")
     pod_name = os.environ.get("FLUENCE_POD_NAME", "")
     group = os.environ.get("FLUENCE_GROUP", "")
+    # Gang + submitter model: this sidecar runs in the one-off SUBMITTER pod
+    # (its own group-of-one, <gang>-submitter). The gated workload it must ungate
+    # is the GANG group, named by FLUENCE_GANG_GROUP (set by the webhook). There
+    # is no leader/worker split and no -workers subgroup.
+    gang_group = os.environ.get("FLUENCE_GANG_GROUP", "")
     backend = os.environ.get("FLUXION_BACKEND", "")
     observe = os.environ.get("FLUENCE_OBSERVE", "").lower() == "true"
     discovery_timeout = int(os.environ.get("FLUENCE_TASK_DISCOVERY_TIMEOUT", 300))
     poll_interval = int(os.environ.get("FLUENCE_POLL_INTERVAL", 30))
-    expected_workers = int(os.environ.get("FLUENCE_EXPECTED_WORKERS", 0))
     ungate_timeout = int(os.environ.get("FLUENCE_UNGATE_TIMEOUT", 120))
 
     namespace = namespace_from_env()
 
-    log("starting fluence quantum sidecar")
+    log("starting fluence quantum submitter sidecar")
     log(f"  pod_uid={pod_uid} namespace={namespace} group={group} "
-        f"backend={backend} observe={observe} expected_workers={expected_workers}")
+        f"gang_group={gang_group} backend={backend} observe={observe}")
 
     provider = resolve_from_env()
     if provider is None:
@@ -75,8 +81,9 @@ def main():
     if task is None:
         log("ERROR: could not discover quantum task")
         if not observe:
-            ungate_pods(wait_for_gated_pods(namespace, group, expected_workers,
-                                            exclude=pod_name, timeout=ungate_timeout),
+            # Fail open: ungate the gang so it is not stranded forever.
+            ungate_pods(wait_for_gated_pods(namespace, gang_group, exclude=pod_name,
+                                            timeout=ungate_timeout),
                         "", namespace)
         sys.exit(1)
 
@@ -89,19 +96,18 @@ def main():
         log("observe-only run complete")
         return
 
-    # Wait until all expected gated workers are present (gang is submitted
-    # together), then ungate them. expected_workers is N-1, propagated by the
-    # webhook from the leader at admission; if unset we ungate whatever is found.
+    # Ungate the gang: discover the gated pods in the gang group and remove their
+    # gate, stamping the job-id so each can fetch results by id. The gang pods are
+    # created up front (Job/Deployment), so they are present by submit time.
     gated_pods = gated_pods_from_env() or wait_for_gated_pods(
-        namespace, group, expected_workers, exclude=pod_name,
-        timeout=ungate_timeout)
-    log(f"ungating {len(gated_pods)} worker(s): {gated_pods}")
+        namespace, gang_group, exclude=pod_name, timeout=ungate_timeout)
+    log(f"ungating {len(gated_pods)} gang pod(s): {gated_pods}")
     n_ok = ungate_pods(gated_pods, job_id, namespace)
     if n_ok == len(gated_pods):
-        log(f"done — {n_ok} worker(s) ungated")
+        log(f"done — {n_ok} gang pod(s) ungated")
     else:
-        log(f"WARNING: ungated only {n_ok}/{len(gated_pods)} worker(s) — see errors above")
+        log(f"WARNING: ungated only {n_ok}/{len(gated_pods)} gang pod(s) — see errors above")
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/python/fluence/ungate.py b/python/fluence/ungate.py
index 1019ead..a40e662 100644
--- a/python/fluence/ungate.py
+++ b/python/fluence/ungate.py
@@ -84,10 +84,10 @@ def gated_pods_from_env():
 def discover_gated_pods(namespace, group, exclude=""):
     """
     Find the names of pods in the same group that still carry the quantum
-    scheduling gate (i.e. the workers this sidecar's leader must ungate).
+    scheduling gate (i.e. the gang pods this submitter must ungate).
 
-    The leader's sidecar is created before the workers are admitted, so the gated
-    set cannot be known at admission time and must be discovered at runtime. We
+    The submitter is created alongside the gang, so the gated set is discovered
+    at runtime rather than known at admission. We
     list pods by the group label and keep those with the QUANTUM_GATE_NAME gate
     still present, excluding the leader pod itself.
     """
@@ -114,31 +114,24 @@ def discover_gated_pods(namespace, group, exclude=""):
     return names
 
 
-def wait_for_gated_pods(namespace, group, expected, exclude="", timeout=120,
-                        interval=3):
+def wait_for_gated_pods(namespace, group, exclude="", timeout=120, interval=3):
     """
-    Wait until at least `expected` gated workers have been discovered in the
-    group, or `timeout` seconds elapse. The gang is submitted together, so all
-    workers appear quickly; the timeout is a backstop against a crashed/never-
-    admitted worker so the sidecar never hangs. Returns the discovered list
-    (which may be short of `expected` if the timeout fired).
+    Wait until at least one gated gang pod is discovered in the group (the gang
+    is created up front, so its pods appear quickly), then return all currently
+    gated pods. The timeout is a backstop so the submitter never hangs if the
+    gang never appears. Returns the discovered list (possibly empty on timeout).
     """
     deadline = time.time() + timeout
     found = []
     while time.time() < deadline:
         found = discover_gated_pods(namespace, group, exclude=exclude)
-        if expected and len(found) >= expected:
-            log(f"all {expected} gated worker(s) present")
+        if found:
             return found
-        if not expected:
-            # No expected count known — return whatever is present now.
-            return found
-        log(f"waiting for gated workers: {len(found)}/{expected}")
+        log("waiting for gated gang pods to appear")
         time.sleep(interval)
-    log(f"WARNING: timed out waiting for gated workers "
-        f"({len(found)}/{expected}); ungating what is present")
+    log("WARNING: timed out waiting for gated gang pods; none found")
     return found
 
 
 def namespace_from_env():
-    return os.environ.get("FLUENCE_NAMESPACE", "default")
+    return os.environ.get("FLUENCE_NAMESPACE", "default")
\ No newline at end of file
diff --git a/test/e2e/02-quantum-placement.sh b/test/e2e/02-quantum-placement.sh
deleted file mode 100644
index 17897a3..0000000
--- a/test/e2e/02-quantum-placement.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-# Quantum placement: a qpu pod is matched to a backend and the webhook injects QRMI_BACKEND.
-set -euo pipefail
-HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
-ANN="fluence.flux-framework.org/backend"
-
-log "TEST 2: quantum placement and backend handoff"
-kubectl apply -f examples/test/e2e/quantum-pod-mock.yaml
-
-wait_pod_phase sampler-mock Running 120 || fail "sampler-mock did not reach Running"
-
-# fluence must have stamped the chosen backend annotation.
-backend="$(kubectl get pod sampler-mock -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
-[ -n "$backend" ] || (show_webhook sampler-mock && fail "backend annotation ($ANN) was not set by fluence")
-log "fluence chose backend: $backend"
-
-# The webhook must have surfaced it as QRMI_BACKEND inside the container.
-out="$(kubectl logs sampler-mock || true)"
-echo "$out" | grep -q "BACKEND=${backend}" \
-  || (show_webhook sampler-mock && fail "QRMI_BACKEND in container ('$out') does not match annotation ($backend)")
-
-log "PASS: qpu pod scheduled, backend '$backend' chosen and injected as QRMI_BACKEND"
-kubectl delete -f examples/test/e2e/quantum-pod-mock.yaml --wait=false || true
diff --git a/test/e2e/03-restart-recovery.sh b/test/e2e/03-restart-recovery.sh
index 20c1be9..c26980f 100644
--- a/test/e2e/03-restart-recovery.sh
+++ b/test/e2e/03-restart-recovery.sh
@@ -9,7 +9,7 @@ ANN="fluence.flux-framework.org/backend"
 log "TEST 3: restart does not double-book an exclusive backend"
 
 # 1. Schedule the first qpu pod and capture its backend.
-kubectl apply -f examples/test/e2e/quantum-pod-mock.yaml
+kubectl apply -f examples/test/e2e/quantum/quantum-pod-mock.yaml
 wait_pod_phase sampler-mock "$NS" Running 120 || fail "sampler-mock did not reach Running"
 backend="$(kubectl get pod sampler-mock -n "$NS" -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
 [ -n "$backend" ] || fail "first pod has no backend annotation"
@@ -26,7 +26,7 @@ wait_pod_phase sampler-mock "$NS" Running 30 || fail "first pod not Running afte
 
 # 4. A second pod requesting the same exclusive qpu must NOT get the same backend.
 #    If recovery worked, the backend is occupied and the second pod stays Pending.
-kubectl apply -f examples/test/e2e/quantum-pod-mock-2.yaml
+kubectl apply -f examples/test/e2e/quantum/quantum-pod-mock-2.yaml
 if assert_stays_pending sampler-mock-2 "$NS" 45; then
   log "PASS: second qpu pod stayed Pending; backend '$backend' was not double-booked"
 else
@@ -38,5 +38,5 @@ else
   fi
 fi
 
-kubectl delete -f examples/test/e2e/quantum-pod-mock-2.yaml --wait=false || true
-kubectl delete -f examples/test/e2e/quantum-pod-mock.yaml --wait=false || true
+kubectl delete -f examples/test/e2e/quantum/quantum-pod-mock-2.yaml --wait=false || true
+kubectl delete -f examples/test/e2e/quantum/quantum-pod-mock.yaml --wait=false || true
diff --git a/test/e2e/04-sidecar-ungate.sh b/test/e2e/04-sidecar-ungate.sh
deleted file mode 100644
index 9ffefc8..0000000
--- a/test/e2e/04-sidecar-ungate.sh
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env bash
-# Sidecar webhook test.
-#
-# Verifies that when a PodGroup of size > 1 with QPU resources is submitted:
-#   1. The webhook creates fluence-sidecar RBAC in the namespace automatically
-#   2. The leader pod gets the sidecar container injected
-#   3. The worker pod gets the quantum.braket/ready scheduling gate added
-#   4. The worker pod gets fluence-quantum-classical priority class set
-#
-# Does NOT test the sidecar itself (task discovery, interceptor,
-# queue position polling). Those require real AWS credentials and are covered
-# by sidecars/providers/braket/test/integration.sh which is run locally.
-set -euo pipefail
-HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
-
-log "TEST 4: sidecar webhook — RBAC creation, gate injection, sidecar injection"
-
-kubectl apply -f examples/test/e2e/sidecar-mock-pods.yaml
-
-# Give webhook time to process the leader pod admission
-sleep 3
-
-# Print webhook logs — always show these so we can see what happened
-log "--- webhook logs ---"
-kubectl logs -n kube-system deployment/fluence-webhook --tail=50 || true
-log "--- end webhook logs ---"
-
-# 1. Webhook should have created fluence-sidecar ServiceAccount
-log "checking webhook created fluence-sidecar ServiceAccount..."
-for i in $(seq 1 30); do
-  kubectl get serviceaccount fluence-sidecar -n default > /dev/null 2>&1 && break
-  sleep 2
-done
-kubectl get serviceaccount fluence-sidecar -n default \
-  || fail "webhook did not create fluence-sidecar ServiceAccount"
-log "  fluence-sidecar ServiceAccount created"
-
-# 2. Webhook should have created fluence-sidecar Role
-kubectl get role fluence-sidecar -n default \
-  || fail "webhook did not create fluence-sidecar Role"
-log "  fluence-sidecar Role created"
-
-# 3. Webhook should have created fluence-sidecar RoleBinding
-kubectl get rolebinding fluence-sidecar -n default \
-  || fail "webhook did not create fluence-sidecar RoleBinding"
-log "  fluence-sidecar RoleBinding created"
-
-# 4. Leader pod should have the fluence-stage init container injected (Model C:
-#    it stages the fluence Python package into a shared volume on PYTHONPATH).
-log "checking webhook injected the fluence-stage init container..."
-wait_pod_phase sidecar-test-leader Running 120 \
-  || { kubectl describe pod sidecar-test-leader; fail "sidecar-test-leader did not reach Running"; }
-initc=$(kubectl get pod sidecar-test-leader \
-  -o jsonpath='{.spec.initContainers[*].name}')
-echo "$initc" | grep -q "fluence-stage" \
-  || fail "fluence-stage init container not injected (initContainers: $initc)"
-log "  fluence-stage init container injected"
-
-# 5. Leader pod should have the sidecar container injected
-log "checking sidecar injected into leader pod..."
-containers=$(kubectl get pod sidecar-test-leader \
-  -o jsonpath='{.spec.containers[*].name}')
-echo "$containers" | grep -q "fluence-sidecar" \
-  || fail "fluence-sidecar container not injected into leader (containers: $containers)"
-log "  fluence-sidecar container injected into leader"
-
-# 6. Worker pod should have scheduling gate added by webhook
-gate=$(kubectl get pod sidecar-test-worker \
-  -o jsonpath='{.spec.schedulingGates[0].name}')
-[ "$gate" = "quantum.braket/ready" ] \
-  || fail "worker pod does not have quantum.braket/ready gate (got: $gate)"
-log "  quantum.braket/ready gate set on worker"
-
-# 7. Worker pod should have the fluence-quantum-classical priority class set by
-#    the webhook at admission (so it schedules reliably once ungated).
-pc=$(kubectl get pod sidecar-test-worker -o jsonpath='{.spec.priorityClassName}')
-[ "$pc" = "fluence-quantum-classical" ] \
-  || fail "worker pod missing fluence-quantum-classical priority class (got: $pc)"
-log "  fluence-quantum-classical priority class set on worker"
-
-log "PASS: webhook correctly created RBAC, injected sidecar, gated worker"
-log "NOTE: fluence-quantum-classical priority is set by the webhook at admission (immutable post-creation)"
-log "NOTE: braket sidecar integration test (SDK intercept, tag discovery,"
-log "      queue polling) is in sidecars/providers/braket/test/integration.sh"
-
-# Only clean up pods and PodGroup — RBAC is namespace infrastructure
-# that persists for future quantum workflows in this namespace
-kubectl delete -f examples/test/e2e/sidecar-mock-pods.yaml
diff --git a/test/e2e/01-classical-gang.sh b/test/e2e/gang/01-classical-gang.sh
old mode 100644
new mode 100755
similarity index 71%
rename from test/e2e/01-classical-gang.sh
rename to test/e2e/gang/01-classical-gang.sh
index d2018ac..1ebfc64
--- a/test/e2e/01-classical-gang.sh
+++ b/test/e2e/gang/01-classical-gang.sh
@@ -1,10 +1,10 @@
 #!/usr/bin/env bash
 # Classical gang scheduling: a PodGroup of 2 must be placed all-or-nothing on real nodes.
 set -euo pipefail
-HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE}/lib.sh"
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
 
 log "TEST 1: classical gang scheduling"
-kubectl apply -f examples/single-podgroup.yaml
+kubectl apply -f examples/test/e2e/gang/single-podgroup.yaml
 
 # All pods in the 'training' deployment must reach Running (scheduled + started).
 # Wait for the pod to EXIST before waiting for Ready — kubectl wait errors out
@@ -25,5 +25,9 @@ count="$(kubectl get pods -l app=training --no-headers | wc -l | tr -d ' ')"
 [ "$count" = "1" ] || fail "expected 2 training pods, got $count"
 
 log "PASS: classical gang placed all $count pods via fluence"
-kubectl delete -f examples/single-podgroup.yaml --wait=false || true
+kubectl delete -f examples/test/e2e/gang/single-podgroup.yaml --wait=false || true
 kubectl patch podgroup training --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+# Wait for the pods to actually be gone before the next test runs — otherwise a
+# terminating 'training' pod (same name/labels reused by other scenarios) can be
+# misread as the next test's placement.
+kubectl wait --for=delete pod -l app=training --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/gang/02-postfilter-rematch.sh b/test/e2e/gang/02-postfilter-rematch.sh
new file mode 100755
index 0000000..f74c87b
--- /dev/null
+++ b/test/e2e/gang/02-postfilter-rematch.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+# PostFilter re-match: when another scheduler plugin (TaintToleration) rejects a
+# node Fluxion allocated, Fluence must abandon that allocation, exclude the node,
+# and re-match onto an untainted node. Safety: the gang's RUNNING pod must NEVER
+# bind to the tainted node.
+#
+# This test is self-isolating: it uses its own workload name (pf-rematch) and
+# labels, distinct from the other e2e scenarios, and ensures a clean slate first,
+# so a pod left over (terminating) from a previous test can never be mistaken for
+# this test's placement. It also ignores terminating pods when asserting.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+NAME=pf-rematch
+SEL="app=${NAME}"
+
+log "TEST 5: PostFilter abandons a taint-rejected allocation and re-matches"
+
+# --- clean slate: no leftover pods from earlier tests under our name ----------
+kubectl delete deployment "$NAME" --ignore-not-found >/dev/null 2>&1 || true
+kubectl delete podgroup "$NAME" --ignore-not-found >/dev/null 2>&1 || true
+kubectl patch podgroup "$NAME" --type=merge \
+  -p '{"metadata":{"finalizers":null}}' >/dev/null 2>&1 || true
+kubectl wait --for=delete pod -l "$SEL" --timeout=60s >/dev/null 2>&1 || true
+# Defensive: a prior test's workload left running would occupy the only
+# untainted worker and make this test fail with a (correct) fluxion
+# allocate -1 for lack of capacity. Ensure none lingers.
+kubectl delete deployment training --ignore-not-found --wait=false >/dev/null 2>&1 || true
+kubectl wait --for=delete pod -l app=training --timeout=60s >/dev/null 2>&1 || true
+
+TAINTED="$(kubectl get nodes -l '!node-role.kubernetes.io/control-plane' \
+  -o jsonpath='{.items[0].metadata.name}')"
+[ -n "$TAINTED" ] || fail "no worker node found to taint"
+log "tainting node $TAINTED with fluence-e2e=blocked:NoSchedule"
+kubectl taint nodes "$TAINTED" fluence-e2e=blocked:NoSchedule --overwrite
+
+cleanup() {
+  kubectl taint nodes "$TAINTED" fluence-e2e- 2>/dev/null || true
+  kubectl delete deployment "$NAME" --ignore-not-found --wait=false 2>/dev/null || true
+  kubectl delete podgroup "$NAME" --ignore-not-found --wait=false 2>/dev/null || true
+  kubectl patch podgroup "$NAME" --type=merge \
+    -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+}
+trap cleanup EXIT
+
+# --- our own workload (distinct name/labels; does NOT tolerate the taint) ------
+kubectl apply -f - <<YAML
+apiVersion: scheduling.k8s.io/v1alpha2
+kind: PodGroup
+metadata:
+  name: ${NAME}
+spec:
+  schedulingPolicy:
+    gang:
+      minCount: 1
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ${NAME}
+spec:
+  replicas: 1
+  selector:
+    matchLabels: {app: ${NAME}}
+  template:
+    metadata:
+      labels: {app: ${NAME}}
+    spec:
+      schedulerName: fluence
+      schedulingGroup:
+        podGroupName: ${NAME}
+      containers:
+        - name: worker
+          image: busybox
+          command: ["sleep", "3600"]
+          resources:
+            requests:
+              cpu: "1"
+YAML
+
+log "waiting for the gang to schedule (must avoid the tainted node)"
+wait_pods_ready "$SEL" 1 180 \
+  || fail "gang never became Ready — PostFilter re-match did not recover (likely stuck on the taint-rejected allocation)"
+
+# SAFETY: among NON-terminating (Running, no deletionTimestamp) pods, none may be
+# on the tainted node. Terminating leftovers are ignored by construction (we use
+# a unique name and cleaned the slate), but we still filter defensively.
+checked=0
+while read -r name node deleted; do
+  [ -z "$name" ] && continue
+  # custom-columns prints "<none>" for empty fields, so an empty deletionTimestamp
+  # shows as "<none>", NOT "". Treat "<none>" as empty for both columns.
+  if [ "$deleted" != "<none>" ] && [ -n "$deleted" ]; then continue; fi   # skip terminating
+  if [ "$node" = "<none>" ] || [ -z "$node" ]; then continue; fi          # skip not-yet-bound
+  checked=$((checked+1))
+  if [ "$node" = "$TAINTED" ]; then
+    fail "SAFETY VIOLATION: running pod $name is bound to the tainted node $TAINTED"
+  fi
+  log "$name correctly placed on $node (not the tainted $TAINTED)"
+done < <(kubectl get pods -l "$SEL" \
+  -o custom-columns='N:.metadata.name,NODE:.spec.nodeName,DEL:.metadata.deletionTimestamp' \
+  --no-headers)
+
+[ "$checked" -ge 1 ] || fail "no running ${NAME} pod found to check"
+
+# Informational: did PostFilter actually fire (Fluxion picked the tainted node
+# first and we re-matched), or did Fluxion place on the good node directly?
+POD="$(kubectl -n kube-system get pods -l app=fluence \
+  -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
+if [ -n "$POD" ] && kubectl -n kube-system logs "$POD" 2>/dev/null \
+     | grep -q "unschedulable: abandoning allocation"; then
+  log "observed PostFilter abandonment in scheduler log (re-match path exercised)"
+else
+  log "note: Fluxion placed on the untainted node directly this run (PostFilter not needed)"
+fi
+
+log "PASS: gang scheduled on an untainted node; no running pod on the tainted node"
diff --git a/test/e2e/gang/03-multi-gang.sh b/test/e2e/gang/03-multi-gang.sh
new file mode 100755
index 0000000..9f01ae5
--- /dev/null
+++ b/test/e2e/gang/03-multi-gang.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# Multi-pod gang scheduling on real nodes. Guards the two failures that the
+# single-pod 01 test could NOT catch (and that shipped a minCount=1 bug):
+#   A) a multi-pod gang must place ALL of them (minCount must equal the gang size, not 1)
+#   B) under contention, a gang that cannot fully fit stays ENTIRELY pending —
+#      never partially placed (no stranded pods holding nodes).
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+# ---- A) all-or-nothing placement of a 3-pod gang -------------------------------
+log "TEST 6A: multi-pod gang (2) places all-or-nothing"
+kubectl apply -f examples/test/e2e/gang/multi-gang.yaml
+
+# the webhook must have created the PodGroup with minCount = 2 (the bug set it to 1)
+log "checking PodGroup minCount == 2 (set by webhook from group-size)"
+for i in $(seq 1 30); do
+  mc="$(kubectl get podgroup gang3 -o jsonpath='{.spec.schedulingPolicy.gang.minCount}' 2>/dev/null || true)"
+  [ -n "$mc" ] && break; sleep 2
+done
+[ "$mc" = "2" ] || fail "PodGroup gang3 minCount=$mc, want 2 (minCount=1 bug -> partial gangs)"
+
+log "waiting for all 2 gang pods to be Ready"
+wait_pods_ready "app=gang3" 2 180 || fail "gang3 did not place all 2 pods (gang scheduling failed)"
+
+count="$(kubectl get pods -l app=gang3 --field-selector=status.phase=Running --no-headers | wc -l | tr -d ' ')"
+[ "$count" = "2" ] || fail "expected 2 Running gang3 pods, got $count (partial placement)"
+for p in $(kubectl get pods -l app=gang3 -o name); do
+  pod="${p#pod/}"
+  sched="$(kubectl get pod "$pod" -o jsonpath='{.spec.schedulerName}')"
+  [ "$sched" = "fluence" ] || fail "$pod not scheduled by fluence (got: $sched)"
+done
+log "PASS 6A: 2-pod gang placed atomically by fluence (minCount=2)"
+
+kubectl delete -f examples/test/e2e/gang/multi-gang.yaml --wait=false || true
+kubectl patch podgroup gang3 --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+kubectl wait --for=delete pod -l app=gang3 --timeout=60s 2>/dev/null || true
+
+# ---- B) contention: the gang that can't fully fit stays ENTIRELY pending --------
+log "TEST 6B: contention — a gang that cannot fully fit must NOT partially place"
+kubectl apply -f examples/test/e2e/gang/multi-gang-contention.yaml
+
+# wait until the cluster settles. Three possible outcomes:
+#   - one gang fully Running, other fully Pending  -> contention; assert no partial
+#   - BOTH fully Running                            -> runner big enough, no contention to test (skip)
+#   - any partial (1 of 2 in a gang scheduled)      -> the bug, fail
+log "waiting for gangs to settle"
+winner=""; loser=""; both=""
+for i in $(seq 1 90); do
+  ra="$(kubectl get pods -l app=gang-a --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l | tr -d ' ')"
+  rb="$(kubectl get pods -l app=gang-b --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l | tr -d ' ')"
+  if [ "$ra" = "2" ] && [ "$rb" = "2" ]; then both=1; break; fi
+  if [ "$ra" = "2" ] && [ "$rb" = "0" ]; then winner=gang-a; loser=gang-b; break; fi
+  if [ "$rb" = "2" ] && [ "$ra" = "0" ]; then winner=gang-b; loser=gang-a; break; fi
+  sleep 2
+done
+
+if [ -n "$both" ]; then
+  log "SKIP 6B: cluster placed both gangs (>=4 schedulable cores) — no contention on this runner"
+else
+  [ -n "$winner" ] || fail "no clean settle: gang-a=$ra gang-b=$rb running (possible PARTIAL placement)"
+  log "winner=$winner (2 running), loser=$loser (expected 0 running)"
+  # the loser must have ZERO pods scheduled to a node — the all-or-nothing guarantee.
+  # A single scheduled loser pod = partial placement = the bug.
+  scheduled_loser="$(kubectl get pods -l app=$loser -o jsonpath='{range .items[*]}{.spec.nodeName}{"\n"}{end}' | grep -c . || true)"
+  [ "$scheduled_loser" = "0" ] || fail "$loser has $scheduled_loser pod(s) on a node — PARTIAL placement (gang violated)"
+  log "PASS 6B: $loser stayed entirely pending — no partial placement under contention"
+fi
+
+kubectl delete -f examples/test/e2e/gang/multi-gang-contention.yaml --wait=false || true
+for g in gang-a gang-b; do
+  kubectl patch podgroup $g --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l app=gang-a --timeout=60s 2>/dev/null || true
+kubectl wait --for=delete pod -l app=gang-b --timeout=60s 2>/dev/null || true
+log "PASS: multi-gang all-or-nothing verified"
diff --git a/test/e2e/gang/04-requeue-on-capacity.sh b/test/e2e/gang/04-requeue-on-capacity.sh
new file mode 100755
index 0000000..f41aa71
--- /dev/null
+++ b/test/e2e/gang/04-requeue-on-capacity.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# Requeue-on-capacity + gang atomicity under contention.
+#
+# Two 2-pod gangs contend for a cluster that can only run one at a time. This
+# guards two invariants that the GKE contention runs exposed:
+#   1. ALL-OR-NOTHING: each gang places ALL its pods or NONE — never a partial
+#      (e.g. 1-of-2 scheduled). The winner must be a clean 2/2; the loser a clean
+#      0/2 while it waits.
+#   2. REQUEUE: when the winner completes and frees its nodes, the loser is
+#      re-attempted on its own (no manual nudge) and then ALSO places atomically
+#      (2/2), driven by the shortened --pod-max-in-unschedulable-pods-duration.
+#
+# SCOPE / LIMITATION: this is a 3-node kind cluster with small (1-core) pods. It
+# verifies the INVARIANTS on a minimal contention case. It does NOT reproduce the
+# GKE-scale dynamics where the bug was first seen — one-pod-per-node (~80-core)
+# saturation and ~20 simultaneous mixed-size gangs draining in sequence. That
+# scale behavior is validated on the real cluster, not in CI; a pass here means
+# the invariants hold on the simple case, not that large-scale draining is proven.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+# running-pod count for a gang (job-name label set by the Job controller)
+running() { kubectl get pods -l job-name="$1" --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l | tr -d ' '; }
+# count of a gang's pods actually bound to a node (Running OR already Succeeded)
+on_nodes() { kubectl get pods -l job-name="$1" -o jsonpath='{range .items[*]}{.spec.nodeName}{"\n"}{end}' 2>/dev/null | grep -c . || true; }
+
+log "TEST 9: contended gangs stay all-or-nothing, loser requeues when capacity frees"
+kubectl apply -f examples/test/e2e/gang/multi-gang-requeue.yaml
+
+# ---- 1. one gang wins CLEANLY (2/2); the other places NOTHING (0/2) ------------
+log "waiting for a clean 2/0 split (one whole gang runs, the other entirely waits)"
+winner=""; loser=""
+for i in $(seq 1 60); do
+  rw="$(running gang-win)"; ra="$(running gang-wait)"
+  if [ "$rw" = "2" ] && [ "$ra" = "0" ]; then winner=gang-win;  loser=gang-wait; break; fi
+  if [ "$ra" = "2" ] && [ "$rw" = "0" ]; then winner=gang-wait; loser=gang-win;  break; fi
+  # a 1/x or x/1 state that persists is a PARTIAL gang — fail fast on it
+  if [ "$rw" = "1" ] || [ "$ra" = "1" ]; then
+    sleep 6  # allow a transient mid-bind moment to resolve
+    rw="$(running gang-win)"; ra="$(running gang-wait)"
+    { [ "$rw" = "1" ] || [ "$ra" = "1" ]; } && \
+      fail "PARTIAL gang: gang-win=$rw gang-wait=$ra running (all-or-nothing violated)"
+  fi
+  sleep 2
+done
+[ -n "$winner" ] || fail "no clean 2/0 split (gang-win=$(running gang-win) gang-wait=$(running gang-wait))"
+log "  winner=$winner (2/2 running), loser=$loser"
+
+# loser must have ZERO pods on any node — not even one (that would be a partial)
+sl="$(on_nodes "$loser")"
+[ "$sl" = "0" ] || fail "$loser has $sl pod(s) bound while it should be entirely pending — PARTIAL placement"
+log "  $loser entirely pending (0 pods bound) — all-or-nothing holds"
+
+# ---- 2. winner completes -> loser is requeued AND places atomically ------------
+log "waiting for winner=$winner to complete and free its nodes"
+kubectl wait --for=condition=complete job/$winner --timeout=120s || fail "$winner did not complete"
+log "  $winner completed; capacity freed"
+
+# The loser must now place ALL its pods (2/2), on its own, within a window above
+# the 30s recheck flush but below the 5m default — proving the shortened timeout
+# is in effect AND that the requeued gang is still atomic (not a partial).
+log "asserting $loser requeues and places ATOMICALLY (2/2) within ~75s"
+ok=""
+for i in $(seq 1 38); do   # ~75s
+  rl="$(running $loser)"
+  dl="$(kubectl get pods -l job-name=$loser --field-selector=status.phase=Succeeded --no-headers 2>/dev/null | wc -l | tr -d ' ')"
+  # both pods accounted for (running and/or already completed) = atomic placement
+  [ "$((rl + dl))" = "2" ] && { ok=1; break; }
+  # a lone 1/2 that lingers = partial placement of the requeued gang
+  if [ "$((rl + dl))" = "1" ]; then
+    sleep 6
+    rl="$(running $loser)"; dl="$(kubectl get pods -l job-name=$loser --field-selector=status.phase=Succeeded --no-headers 2>/dev/null | wc -l | tr -d ' ')"
+    [ "$((rl + dl))" = "1" ] && fail "$loser placed 1 of 2 pods — PARTIAL placement of the requeued gang"
+  fi
+  sleep 2
+done
+[ -n "$ok" ] || fail "$loser did NOT place both pods within 75s of capacity freeing — \
+either the shortened --pod-max-in-unschedulable-pods-duration is not taking effect \
+(gang stuck) or the requeued gang did not assemble"
+log "PASS 9: $loser requeued and placed atomically (2/2) after $winner freed capacity"
+
+kubectl delete -f examples/test/e2e/gang/multi-gang-requeue.yaml --wait=false || true
+for g in gang-win gang-wait; do
+  kubectl patch podgroup $g --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l job-name=gang-win  --timeout=60s 2>/dev/null || true
+kubectl wait --for=delete pod -l job-name=gang-wait --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/lib.sh b/test/e2e/lib.sh
index cad6a2e..13390c9 100644
--- a/test/e2e/lib.sh
+++ b/test/e2e/lib.sh
@@ -44,7 +44,7 @@ wait_fluence_ready() {
 
 show_webhook() {
   pod=$1
-  echo "FAIL: QRMI_BACKEND mismatch"
+  echo "FAIL: FLUXION_BACKEND mismatch"
   kubectl get pod $pod -o jsonpath='{.spec.containers[0].env}'; echo
   kubectl get pod $pod -o jsonpath='{.metadata.annotations}'; echo
   kubectl -n kube-system logs deploy/fluence-webhook --tail=50
diff --git a/test/e2e/quantum/01-quantum-placement.sh b/test/e2e/quantum/01-quantum-placement.sh
new file mode 100755
index 0000000..8f5c475
--- /dev/null
+++ b/test/e2e/quantum/01-quantum-placement.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# Quantum placement: a qpu pod is matched to a backend and the webhook injects FLUXION_BACKEND.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+ANN="fluence.flux-framework.org/backend"
+
+log "TEST 2: quantum placement and backend handoff"
+kubectl apply -f examples/test/e2e/quantum/quantum-pod-mock.yaml
+
+wait_pod_phase sampler-mock Running 120 || fail "sampler-mock did not reach Running"
+
+# fluence must have stamped the chosen backend annotation.
+backend="$(kubectl get pod sampler-mock -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" 2>/dev/null || true)"
+[ -n "$backend" ] || (show_webhook sampler-mock && fail "backend annotation ($ANN) was not set by fluence")
+log "fluence chose backend: $backend"
+
+# The webhook must have surfaced it as FLUXION_BACKEND inside the container.
+out="$(kubectl logs sampler-mock || true)"
+if ! echo "$out" | grep -q "BACKEND=${backend}"; then
+  # Diagnostic (CI has no interactive shell): show whether the env var is ABSENT
+  # (not injected -> webhook issue) or PRESENT-BUT-EMPTY (annotation not resolved
+  # at container start -> delivery/timing issue), and what the container actually got.
+  log "--- diagnostic: container env spec ---"
+  kubectl get pod sampler-mock -o jsonpath='{.spec.containers[0].env}' ; echo
+  log "--- diagnostic: live value via exec ---"
+  kubectl exec sampler-mock -- sh -c 'echo "FLUXION_BACKEND=[$FLUXION_BACKEND]"' 2>&1 || true
+  log "--- diagnostic: backend annotation on pod ---"
+  kubectl get pod sampler-mock -o jsonpath="{.metadata.annotations.${ANN//./\\.}}" ; echo
+  show_webhook sampler-mock
+  fail "FLUXION_BACKEND in container ('$out') does not match annotation ($backend)"
+fi
+
+log "PASS: qpu pod scheduled, backend '$backend' chosen and injected as FLUXION_BACKEND"
+kubectl delete -f examples/test/e2e/quantum/quantum-pod-mock.yaml --wait=false || true
diff --git a/test/e2e/quantum/02-sidecar-ungate.sh b/test/e2e/quantum/02-sidecar-ungate.sh
new file mode 100755
index 0000000..a4ae79f
--- /dev/null
+++ b/test/e2e/quantum/02-sidecar-ungate.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+# Shared-coordination webhook test (producer/consumer, no submitter pod).
+#
+# When a shared quantum gang (coordination=shared, N pods all requesting QPU) is
+# submitted, the webhook must:
+#   1. create the fluence-sidecar RBAC in the namespace automatically
+#   2. gate every CONSUMER pod with quantum.braket/ready
+#   3. raise every CONSUMER pod to the fluence-quantum-classical priority class
+#   4. leave the PRODUCER (completion index 0) UNGATED, as a real member (NOT a
+#      separate spawned pod)
+#   5. inject the fluence-stage init container + the sidecar container into the
+#      producer (Model C staging + the real coordinator)
+#
+# Does NOT test the sidecar runtime (task discovery, interceptor, queue polling)
+# — that needs real AWS creds (sidecars/providers/braket/test/integration.sh).
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+GROUP=qgang
+PRODUCER=${GROUP}-0   # completion index 0
+CONSUMER=${GROUP}-1   # completion index 1
+
+log "TEST 4: shared-gang webhook — RBAC, consumer gating, priority, producer wiring"
+kubectl apply -f examples/test/e2e/quantum/quantum-gang-pods.yaml
+sleep 3
+
+log "--- webhook logs ---"
+kubectl logs -n kube-system deployment/fluence-webhook --tail=50 || true
+log "--- end webhook logs ---"
+
+# 1. RBAC created by the webhook (idempotent, per-namespace).
+log "checking webhook created fluence-sidecar RBAC..."
+for i in $(seq 1 30); do
+  kubectl get serviceaccount fluence-sidecar -n default >/dev/null 2>&1 && break
+  sleep 2
+done
+kubectl get serviceaccount fluence-sidecar -n default || fail "no fluence-sidecar ServiceAccount"
+kubectl get role            fluence-sidecar -n default || fail "no fluence-sidecar Role"
+kubectl get rolebinding     fluence-sidecar -n default || fail "no fluence-sidecar RoleBinding"
+log "  RBAC present"
+
+# 2 + 3. The CONSUMER is gated and at the preempting priority class.
+gate="$(kubectl get pod "$CONSUMER" -o jsonpath='{.spec.schedulingGates[0].name}' 2>/dev/null || true)"
+[ "$gate" = "quantum.braket/ready" ] || fail "$CONSUMER not gated (gate=$gate)"
+pc="$(kubectl get pod "$CONSUMER" -o jsonpath='{.spec.priorityClassName}' 2>/dev/null || true)"
+[ "$pc" = "fluence-quantum-classical" ] || fail "$CONSUMER priorityClass=$pc, want fluence-quantum-classical"
+log "  consumer gated + fluence-quantum-classical priority"
+
+# 4. The PRODUCER is NOT a separate spawned pod and is NOT gated. No <group>-submitter.
+if kubectl get pod "${GROUP}-submitter" -n default >/dev/null 2>&1; then
+  fail "found ${GROUP}-submitter pod — the obsolete separate-submitter model must not exist"
+fi
+pgate="$(kubectl get pod "$PRODUCER" -o jsonpath='{.spec.schedulingGates[0].name}' 2>/dev/null || true)"
+[ -z "$pgate" ] || fail "producer must NOT be gated (gate=$pgate)"
+log "  producer is a real member, not gated; no separate submitter pod"
+
+# 5. Producer has the staging init container + the sidecar container.
+wait_pod_phase "$PRODUCER" Running 120 \
+  || { kubectl describe pod "$PRODUCER"; fail "$PRODUCER did not reach Running"; }
+initc="$(kubectl get pod "$PRODUCER" -o jsonpath='{.spec.initContainers[*].name}')"
+echo "$initc" | grep -q fluence-stage || fail "fluence-stage init container not injected (init: $initc)"
+conts="$(kubectl get pod "$PRODUCER" -o jsonpath='{.spec.containers[*].name}')"
+echo "$conts" | grep -q fluence-sidecar || fail "fluence-sidecar container not injected (containers: $conts)"
+log "  producer has fluence-stage + fluence-sidecar"
+
+log "PASS: webhook gated the consumers, set priority, created RBAC + wired the producer"
+log "NOTE: priority is set at admission (immutable post-creation)"
+log "NOTE: braket sidecar runtime (SDK intercept, tag discovery, queue polling)"
+log "      is in sidecars/providers/braket/test/integration.sh"
+
+# Clean up pods + PodGroups; RBAC is namespace infra and persists.
+kubectl delete -f examples/test/e2e/quantum/quantum-gang-pods.yaml --wait=false || true
+for g in "$GROUP" "${GROUP}-producer"; do
+  kubectl patch podgroup "$g" --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l app="$GROUP" --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/quantum/03-gang-producer.sh b/test/e2e/quantum/03-gang-producer.sh
new file mode 100644
index 0000000..fce4248
--- /dev/null
+++ b/test/e2e/quantum/03-gang-producer.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# Producer/consumer structure (replaces the old leader/worker and submitter-pod
+# models).
+#
+# The structural guarantee the ungate path depends on: a shared quantum gang of
+# size N is split, by completion index, into the CONSUMER gang <group>
+# (minCount N-1, gated) and the PRODUCER's group-of-one <group>-producer
+# (minCount 1, not gated). The producer is a real member of the user's workload —
+# there is NO separate <group>-submitter pod, NO <group>-workers subgroup, and no
+# leader among the user's pods. (The runtime ungate is covered by the braket
+# integration test; here we prove the shape.)
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+GROUP=qgang
+PRODUCER_GROUP=${GROUP}-producer
+PRODUCER=${GROUP}-0   # completion index 0
+CONSUMER=${GROUP}-1   # completion index 1
+
+log "TEST 7: consumer gang(N-1, gated) + producer(1, member) structure"
+kubectl apply -f examples/test/e2e/quantum/quantum-gang-pods.yaml
+
+# Consumer PodGroup <group> exists with minCount N-1 = 1 (the split).
+log "checking consumer group '$GROUP' minCount == 1 (N-1)"
+for i in $(seq 1 30); do
+  gc="$(kubectl get podgroup "$GROUP" -o jsonpath='{.spec.schedulingPolicy.gang.minCount}' 2>/dev/null || true)"
+  [ -n "$gc" ] && break; sleep 2
+done
+[ "$gc" = "1" ] || fail "consumer group $GROUP minCount=$gc, want 1 (N-1)"
+
+# There must be NO <group>-workers subgroup and NO <group>-submitter pod.
+if kubectl get podgroup "${GROUP}-workers" >/dev/null 2>&1; then
+  fail "found ${GROUP}-workers PodGroup — the obsolete leader/worker split must not exist"
+fi
+if kubectl get pod "${GROUP}-submitter" >/dev/null 2>&1; then
+  fail "found ${GROUP}-submitter pod — the obsolete separate-submitter model must not exist"
+fi
+log "  consumer group minCount=1, no -workers subgroup, no -submitter pod"
+
+# Producer PodGroup <group>-producer exists with minCount 1 (schedules alone).
+log "checking producer group '$PRODUCER_GROUP' minCount == 1"
+for i in $(seq 1 30); do
+  sc="$(kubectl get podgroup "$PRODUCER_GROUP" -o jsonpath='{.spec.schedulingPolicy.gang.minCount}' 2>/dev/null || true)"
+  [ -n "$sc" ] && break; sleep 2
+done
+[ "$sc" = "1" ] || fail "producer group $PRODUCER_GROUP minCount=$sc, want 1"
+
+# Producer pod (index 0) is relinked into its own group-of-one and is NOT gated.
+pl="$(kubectl get pod "$PRODUCER" -o jsonpath='{.metadata.labels.fluence\.flux-framework\.org/group}' 2>/dev/null || true)"
+[ "$pl" = "$PRODUCER_GROUP" ] || fail "producer group label=$pl, want $PRODUCER_GROUP"
+pgate="$(kubectl get pod "$PRODUCER" -o jsonpath='{.spec.schedulingGates[0].name}' 2>/dev/null || true)"
+[ -z "$pgate" ] || fail "producer must NOT be gated (gate=$pgate)"
+log "  producer in '$PRODUCER_GROUP' (minCount 1), not gated"
+
+# Consumer pod (index 1+) stays in <group> and is gated.
+g="$(kubectl get pod "$CONSUMER" -o jsonpath='{.metadata.labels.fluence\.flux-framework\.org/group}' 2>/dev/null || true)"
+[ "$g" = "$GROUP" ] || fail "$CONSUMER group label=$g, want $GROUP"
+gate="$(kubectl get pod "$CONSUMER" -o jsonpath='{.spec.schedulingGates[0].name}' 2>/dev/null || true)"
+[ "$gate" = "quantum.braket/ready" ] || fail "$CONSUMER not gated (gate=$gate)"
+# The consumer's dependency points at the producer group.
+dp="$(kubectl get pod "$CONSUMER" -o jsonpath='{.metadata.annotations.fluence\.flux-framework\.org/depends-on-producer}' 2>/dev/null || true)"
+[ "$dp" = "$PRODUCER_GROUP" ] || fail "consumer depends-on-producer=$dp, want $PRODUCER_GROUP"
+log "  consumer in '$GROUP', gated, depends on '$PRODUCER_GROUP'"
+
+log "PASS 7: consumer gang(N-1, gated) + producer(1, member, ungates gang), no submitter/leader/worker"
+kubectl delete -f examples/test/e2e/quantum/quantum-gang-pods.yaml --wait=false || true
+for g in "$GROUP" "$PRODUCER_GROUP"; do
+  kubectl patch podgroup "$g" --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l app="$GROUP" --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/quantum/04-gang-env-contract.sh b/test/e2e/quantum/04-gang-env-contract.sh
new file mode 100755
index 0000000..157f78b
--- /dev/null
+++ b/test/e2e/quantum/04-gang-env-contract.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# Env-contract e2e (producer/consumer): verify the webhook injects, at admission,
+# the env the runtime depends on — IN-CLUSTER, on the real pod specs, with no
+# Braket/AWS and WITHOUT requiring scheduling. Guards the seam that, if broken,
+# makes a gang schedule then hang or double-submit.
+#
+# Spec layer only (these are downward-API valueFrom refs whose VALUES resolve at
+# placement, but whose PRESENCE is deterministic at admission), so no scheduling,
+# no qpu capacity, no logs — it cannot flake on capacity. Contract:
+#   consumer (role):  FLUENCE_COORDINATION_ROLE=consumer, FLUENCE_QUANTUM_JOB_ID, FLUXION_BACKEND
+#                     (NO interceptor/PYTHONPATH — a consumer never submits)
+#   producer (role):  FLUENCE_COORDINATION_ROLE=producer + FLUENCE_GANG_GROUP on the sidecar
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+
+GROUP=qgang
+PRODUCER=${GROUP}-0   # completion index 0
+CONSUMER=${GROUP}-1   # completion index 1
+
+log "TEST 8: producer/consumer env contract — spec layer"
+kubectl apply -f examples/test/e2e/quantum/quantum-gang-pods.yaml
+
+# does container $2 of pod $1 have an env entry named $3 ? (spec-level only)
+has_env() {
+  kubectl get pod "$1" -o jsonpath="{.spec.containers[?(@.name=='$2')].env[*].name}" \
+    2>/dev/null | tr ' ' '\n' | grep -qx "$3"
+}
+# value of env $3 in container $2 of pod $1 (empty if absent)
+env_val() {
+  kubectl get pod "$1" -o jsonpath="{.spec.containers[?(@.name=='$2')].env[?(@.name=='$3')].value}" \
+    2>/dev/null || true
+}
+
+log "checking the webhook wired the consumer role contract"
+for i in $(seq 1 15); do has_env "$CONSUMER" app FLUENCE_COORDINATION_ROLE && break; sleep 2; done
+# Present: the role (=consumer), the producer's task id, and the backend.
+for v in FLUENCE_COORDINATION_ROLE FLUENCE_QUANTUM_JOB_ID FLUXION_BACKEND; do
+  has_env "$CONSUMER" app "$v" \
+    || { kubectl get pod "$CONSUMER" -o yaml | sed -n '/containers:/,/status:/p'; \
+         fail "consumer 'app' container missing env '$v'"; }
+  log "  consumer has env: $v"
+done
+role="$(env_val "$CONSUMER" app FLUENCE_COORDINATION_ROLE)"
+[ "$role" = "consumer" ] || fail "consumer role=$role, want consumer"
+# Absent: a consumer never submits, so no interceptor staging and no faux flag.
+for v in PYTHONPATH FLUENCE_FAUX_SUBMIT; do
+  ! has_env "$CONSUMER" app "$v" || fail "consumer must NOT carry '$v' (it does not submit)"
+done
+log "  consumer role=consumer, no interceptor/faux"
+
+# The producer's sidecar must know which consumer group to ungate.
+log "checking the producer sidecar has FLUENCE_GANG_GROUP=$GROUP"
+for i in $(seq 1 30); do kubectl get pod "$PRODUCER" >/dev/null 2>&1 && break; sleep 2; done
+gg="$(kubectl get pod "$PRODUCER" \
+  -o jsonpath="{.spec.containers[?(@.name=='fluence-sidecar')].env[?(@.name=='FLUENCE_GANG_GROUP')].value}" \
+  2>/dev/null || true)"
+[ "$gg" = "$GROUP" ] || fail "producer sidecar FLUENCE_GANG_GROUP=$gg, want $GROUP"
+log "  producer sidecar has FLUENCE_GANG_GROUP=$gg"
+
+# The producer carries role=producer and is the real submitter (no consumer id).
+prole="$(env_val "$PRODUCER" app FLUENCE_COORDINATION_ROLE)"
+[ "$prole" = "producer" ] || fail "producer role=$prole, want producer"
+if has_env "$PRODUCER" app FLUENCE_QUANTUM_JOB_ID; then
+  fail "producer must NOT carry FLUENCE_QUANTUM_JOB_ID (it submits its own task)"
+fi
+log "  producer role=producer, submits its own task"
+
+log "PASS 8: webhook injects the consumer(role) + producer(role) env contract at admission"
+
+kubectl delete -f examples/test/e2e/quantum/quantum-gang-pods.yaml --wait=false || true
+for g in "$GROUP" "${GROUP}-producer"; do
+  kubectl patch podgroup "$g" --type=merge -p '{"metadata":{"finalizers":null}}' 2>/dev/null || true
+done
+kubectl wait --for=delete pod -l app="$GROUP" --timeout=60s 2>/dev/null || true
diff --git a/test/e2e/quantum/setup.sh b/test/e2e/quantum/setup.sh
new file mode 100644
index 0000000..57f375a
--- /dev/null
+++ b/test/e2e/quantum/setup.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Quantum suite setup (run by the e2e-suite workflow before the NN-*.sh tests).
+#
+# Installs the qpu add-on so nodes advertise fluxion.flux-framework.org/qpu —
+# without it every quantum pod stays Pending (fluence matches in its own graph,
+# but the default NodeResourcesFit plugin rejects each node because the extended
+# resource is not in allocatable, so the match is rolled back). The base deploy
+# (deploy/fluence-test.yaml) does NOT include this; it is quantum-only.
+#
+# Also points the webhook-injected sidecar/stage image at the CI-loaded image:
+# the default sidecar image (ghcr.io/.../fluence-sidecar:latest) is not loaded in
+# kind, so the producer's containers could not pull. The fluence-stage init is
+# fail-soft (no python in this image -> it logs and exits 0), which is fine for
+# the structural assertions; the producer still schedules and runs.
+set -euo pipefail
+HERE="$(cd "$(dirname "$0")" && pwd)"; . "${HERE%/test/e2e/*}/test/e2e/lib.sh"
+IMAGE="${IMAGE:-vanessa/fluence:test}"
+
+log "quantum setup: installing the qpu add-on (resources ConfigMap + device plugin)"
+kubectl apply -f deploy/fluence-resources-test.yaml
+
+# Run the device plugin from the CI-loaded image (its manifest ships a registry
+# image that kind has not pulled). Container name is 'deviceplugin'.
+kubectl -n kube-system set image daemonset/fluence-deviceplugin deviceplugin="$IMAGE"
+kubectl -n kube-system patch daemonset/fluence-deviceplugin --type=json \
+  -p '[{"op":"replace","path":"/spec/template/spec/containers/0/imagePullPolicy","value":"IfNotPresent"}]' \
+  2>/dev/null || true
+
+# Injected sidecar + stage init must use a present image too (see header).
+kubectl -n kube-system set env deployment/fluence-webhook FLUENCE_SIDECAR_IMAGE="$IMAGE"
+kubectl -n kube-system rollout status deployment/fluence-webhook --timeout=180s
+
+# Scheduler re-reads the resources config now that the ConfigMap exists.
+kubectl -n kube-system rollout restart deployment/fluence
+kubectl -n kube-system rollout status  deployment/fluence --timeout=180s
+
+log "waiting for the device plugin DaemonSet to be Ready"
+kubectl -n kube-system rollout status daemonset/fluence-deviceplugin --timeout=180s
+
+# Block until at least one node advertises the qpu extended resource, so the
+# tests do not race the kubelet's device registration.
+log "waiting for nodes to advertise fluxion.flux-framework.org/qpu"
+ok=0
+for i in $(seq 1 60); do
+  if kubectl get nodes -o jsonpath='{.items[*].status.allocatable}' 2>/dev/null \
+       | grep -q 'fluxion.flux-framework.org/qpu'; then
+    ok=1; break
+  fi
+  sleep 3
+done
+[ "$ok" = 1 ] || fail "no node advertised fluxion.flux-framework.org/qpu after the add-on (device plugin not registering)"
+log "qpu advertised on at least one node"
+
+log "quantum setup complete: qpu add-on installed, scheduler restarted, sidecar image=$IMAGE"
\ No newline at end of file