astubbs · astubbs · Apr 11, 2026 · Apr 11, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
@@ -12,12 +12,7 @@ on:
 
 jobs:
   claude-review:
-    # Optional: Filter by PR author
-    # if: |
-    #   github.event.pull_request.user.login == 'external-contributor' ||
-    #   github.event.pull_request.user.login == 'new-developer' ||
-    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
-
+    if: github.event.sender.type != 'Bot'
     runs-on: ubuntu-latest
     permissions:
       contents: read

diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml
@@ -0,0 +1,82 @@
+# Performance test suite, run on a self-hosted Windows runner with Docker Desktop.
+#
+# These tests are tagged @Tag("performance") and excluded from the regular CI
+# build because they need substantial hardware (CPU, memory, disk). They run
+# on dedicated machines where the user has labelled their runner with the
+# "performance" custom label.
+#
+# Triggers:
+#   - workflow_dispatch (manual)   - primary trigger
+#   - schedule (weekly)            - automated regression check
+#   - NOT on PRs from forks        - self-hosted runners + untrusted code = bad
+#
+# See docs/SELF_HOSTED_RUNNER.md for one-time runner setup instructions.
+
+name: Performance Tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      kafka_version:
+        description: 'Kafka version to test against (default: project default)'
+        required: false
+        type: string
+        default: ''
+  schedule:
+    # Weekly on Sunday at 02:00 UTC
+    - cron: '0 2 * * 0'
+
+concurrency:
+  # Only run one performance test at a time per branch - they're slow and resource-heavy
+  group: performance-${{ github.ref }}
+  cancel-in-progress: false
+
+permissions:
+  contents: read
+
+jobs:
+  performance:
+    name: "Performance suite (self-hosted)"
+    # Targets a self-hosted runner labelled "performance" running Windows.
+    # The "self-hosted" label is automatic; "windows" and "performance" are
+    # added when the runner is registered. See docs/SELF_HOSTED_RUNNER.md.
+    runs-on: [self-hosted, windows, performance]
+    timeout-minutes: 180
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Setup JDK 17
+        uses: actions/setup-java@v5
+        with:
+          distribution: 'temurin'
+          java-version: '17'
+          # Don't cache here - self-hosted runners persist .m2 across runs already
+          cache: ''
+
+      - name: Show environment
+        shell: cmd
+        run: |
+          java -version
+          docker --version
+          docker info
+
+      - name: Run performance tests
+        shell: cmd
+        env:
+          KAFKA_VERSION: ${{ inputs.kafka_version }}
+        run: |
+          if defined KAFKA_VERSION (
+            call bin\performance-test.cmd -Dkafka.version=%KAFKA_VERSION%
+          ) else (
+            call bin\performance-test.cmd
+          )
+
+      - name: Upload test reports
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: performance-reports-${{ github.run_number }}
+          path: '**/target/*-reports/*.xml'
+          retention-days: 30
diff --git a/.mvn/maven.config b/.mvn/maven.config
@@ -0,0 +1,6 @@
+-Dmaven.wagon.http.connectionTimeout=10000
+-Dmaven.wagon.http.readTimeout=120000
+-Dmaven.wagon.httpconnectionManager.ttlSeconds=120
+-Dmaven.wagon.http.retryHandler.count=3
+-Daether.connector.connectTimeout=10000
+-Daether.connector.requestTimeout=120000
diff --git a/AGENTS.md b/AGENTS.md
@@ -47,6 +47,7 @@ bin/ci-build.sh 3.9.1
 - **Integration tests**: `mvn verify` / failsafe plugin. Source in `src/test-integration/java/`. Uses TestContainers with `confluentinc/cp-kafka` Docker image.
 - **Test exclusion patterns**: `**/integrationTest*/**/*.java` and `**/*IT.java` are excluded from surefire, included in failsafe.
 - **Kafka version matrix**: CI tests against multiple Kafka versions via `-Dkafka.version=X.Y.Z`.
+- **Performance tests**: Tagged `@Tag("performance")` and excluded from regular CI by default. They run on a self-hosted runner via `.github/workflows/performance.yml` (see `docs/SELF_HOSTED_RUNNER.md`). Run locally with `bin/performance-test.sh` (or `bin/performance-test.cmd` on Windows). Override the test group selection with Maven properties: `-Dincluded.groups=performance` to run only perf, `-Dexcluded.groups=` to run everything.
 
 ## Known Issues
 
@@ -56,10 +57,101 @@ bin/ci-build.sh 3.9.1
 
 - **Lombok**: Used extensively (builders, getters, logging). IntelliJ Lombok plugin required.
 - **EditorConfig**: Enforced via `.editorconfig` - 4-space indent for Java, 120 char line length.
-- **License headers**: Managed by `license-maven-plugin` (Mycila). Use `-Dlicense.skip` locally to skip checks.
+- **License headers**: Managed by `license-maven-plugin` (Mycila). See "License headers" section below.
 - **Google Truth**: Used for test assertions alongside JUnit 5 and Mockito.
 
+## License headers
+
+The Mycila `license-maven-plugin` enforces a Confluent copyright header on all source files. It uses git-derived years via `${license.git.copyrightYears}`.
+
+**Skipping the check** (for any Maven goal):
+```
+./mvnw <goal> -Dlicense.skip
+```
+
+**When to skip:**
+- Running builds inside a git worktree — the git-years lookup fails with `Bare Repository has neither a working tree, nor an index`
+- Local iteration where you don't want years auto-bumped on touched files
+- Any command other than the canonical `mvn install` flow when copyright drift would create noise in `git status`
+
+The default behavior on macOS dev machines is `format` mode (auto-fixes headers) via the `license-format` profile (auto-activated). The `ci` profile flips this to `check` mode (fails the build on drift). Both `bin/build.sh` and `bin/ci-build.sh` already pass `-Dlicense.skip` for this reason.
+
+**When NOT to skip:**
+- You're intentionally running `mvn license:format` to update headers
+- You want to verify CI's check would pass before pushing
+
+## Agent Rules
+
+### Git Safety
+- **NEVER commit or push without explicitly asking the user first.** Wait for approval. This is the #1 rule.
+- **When creating a stacked PR, include `depends on #N` in the PR description** (where `#N` is the parent PR it stacks on). This fork runs a PR dependency gating action (see `.github/workflows/check-dependencies.yml`) which blocks child PRs from merging until the parent is merged. One `depends on` line per parent. Keep the list accurate if the chain changes.
+- Branch off master for upstream contributor cherry-picks so PRs show only their change.
+- Never commit without tests and documentation in the same pass.
+- Run tests before committing. If they fail, fix them first.
+- When you fix something or finish implementing something, record what lessons you learnt.
+
+### Development Discipline
+- **Skateboard first.** Build the simplest end-to-end thing that works. Before starting a feature, ask: "Is this blocking the next public milestone?" If not, flag it and move on.
+- **Never paper over the real problem** - make the proper fix.
+- Don't propose workarounds that require user action when the software can solve it. If the software has enough information to derive the right answer, it should just do it.
+- If constructing data in memory that is eventually going to be saved, save it as soon as it's created. Don't delay in case the programme crashes or the user exits.
+
+### Code Quality
+- **Be DRY.** Reuse existing functions. Don't copy code - refactor where necessary. Refactor out common patterns.
+- Never weaken test assertions - classify exceptions instead of ignoring them.
+- Wire components through PCModule DI - don't bypass the dependency injection system.
+- Validate user input - don't let bad input cause silent failures.
+- Handle errors visibly - don't swallow exceptions.
+- Give things meaningful names that describe what they do. Never use random or generic names.
+
+### Test Discipline
+- Search for existing test harnesses and utilities before creating new ones.
+- Run the complete test suite periodically, not just targeted tests.
+- Maintain good high-level test coverage. Only get detailed on particularly complex functions that benefit from fine-grained testing.
+
+### CI and Automation
+- Always set up continuous integration, code coverage, and automated dependency checking.
+- Make scripts for common end-user requirements with helpful, suggestive CLI interfaces.
+
+### Documentation
+- Keep a diary of major plans and their milestones.
+- **Keep a developer-facing product specification** that outlines product features, functionality, and implementation architecture - separately from end-user documentation. With agentic programming, the developer can lose sight of architecture and implementation details. This document exposes the interesting, novel, and important implementation decisions so the developer maintains a clear mental model of the system even when agents are doing most of the coding.
+- Keep end-user documentation updated.
+- Keep documentation tables of contents updated.
+
+### Communication
+- Use precise terminology - if the project defines specific terms, use them consistently. Don't use ambiguous words.
+- Don't write with em dash characters.
+
+### Rule Sync
+- Keep this AGENTS.md in sync with any global CLAUDE.md rules. If you have rules in your global config that are missing here, suggest to the user that they be added. This ensures all contributors and agents working on this project follow the same standards.
+
+### Working Directory
+- Always run commands from the project root directory.
+- Use `./mvnw` or `bin/*.sh` scripts - don't cd into submodules.
+- Use `-pl module-name -am` for module-specific builds.
+
 ## CI
 
-- **GitHub Actions**: `.github/workflows/maven.yml` - runs on push/PR to master with Kafka version matrix.
-- **Semaphore** (Confluent internal): `.semaphore/semaphore.yml` - primary CI for upstream.
+PR builds run these jobs in parallel (fail-fast cancels others if any fails):
+
+| Job | Script / Tool | Purpose |
+|-----|--------------|---------|
+| **Unit Tests** | `bin/ci-unit-test.sh` | Surefire tests, no Docker |
+| **Integration Tests** | `bin/ci-integration-test.sh` | Failsafe tests, TestContainers |
+| **Performance Tests** | `bin/performance-test.sh` | `@Tag("performance")` volume tests |
+| **SpotBugs** | Maven spotbugs plugin | Static analysis for bugs |
+| **Duplicate Code Check** | PMD CPD | Detect Java copy-paste blocks (base-vs-PR comparison) |
+| **Dependency Vulnerabilities** | GitHub dependency-review-action | CVE scanning |
+| **Mutation Testing (PIT)** | pitest-maven | Test quality verification |
+
+Push builds (master): Full Kafka version matrix (3.1.0, 3.7.0, 3.9.1 + experimental [3.9.1,5) for 4.x).
+
+- **Code coverage**: JaCoCo → [Codecov](https://app.codecov.io/gh/astubbs/parallel-consumer). PRs fail if overall coverage drops by more than 1%.
+- **Semaphore** (Confluent internal): `.semaphore/semaphore.yml` — primary CI for upstream.
+
+### Required secrets
+
+| Secret | Purpose |
+|--------|---------|
+| `CODECOV_TOKEN` | Codecov upload token — required because branch protection is enabled. Get it from [Codecov settings](https://app.codecov.io/gh/astubbs/parallel-consumer/settings). |
diff --git a/README.adoc b/README.adoc
@@ -283,7 +283,7 @@ The user just has to provide a function to extract from the message the HTTP cal
 
 === Illustrative Performance Example
 
-.(see link:./parallel-consumer-core/src/test-integration/java/io/confluent/parallelconsumer/integrationTests/VolumeTests.java[VolumeTests.java])
+.(see link:./parallel-consumer-core/src/test-integration/java/io/confluent/parallelconsumer/integrationTests/VeryLargeMessageVolumeTest.java[VeryLargeMessageVolumeTest.java])
 These performance comparison results below, even though are based on real performance measurement results, are for illustrative purposes.
 To see how the performance of the tool is related to instance counts, partition counts, key distribution and how it would relate to the vanilla client.
 Actual results will vary wildly depending upon the setup being deployed into.
@@ -1341,6 +1341,59 @@ Note::
 See https://github.com/confluentinc/parallel-consumer/issues/162[issue #162]
 and this https://stackoverflow.com/questions/4786881/why-is-test-jar-dependency-required-for-mvn-compile[Stack Overflow question].
 
+=== Build Scripts
+
+Helper scripts are in the `bin/` directory:
+
+[qanda]
+Quick local build (compile + unit tests)::
+`bin/build.sh`
+
+Unit tests only (no Docker needed)::
+`bin/ci-unit-test.sh`
+
+Integration tests only (requires Docker for TestContainers)::
+`bin/ci-integration-test.sh`
+
+Full CI build with all tests (unit + integration)::
+`bin/ci-build.sh`
+
+CI build against a specific Kafka version::
+`bin/ci-build.sh 3.9.1`
+
+Performance test suite (also `bin/performance-test.cmd` on Windows)::
+`bin/performance-test.sh`
+
+All `ci-*` scripts use the `-Pci` Maven profile which enables license checking and disables parallel test execution. The GitHub Actions CI workflow uses these scripts, so running them locally reproduces the CI environment.
+
+=== Performance Tests
+
+Tests tagged `@Tag("performance")` are excluded from the regular CI build because they need substantial hardware. They run on a dedicated self-hosted runner via `.github/workflows/performance.yml` (manual trigger or weekly schedule).
+
+To run the performance suite locally, use `bin/performance-test.sh`. To set up your own self-hosted runner for these tests, see link:./docs/SELF_HOSTED_RUNNER.md[docs/SELF_HOSTED_RUNNER.md].
+
+=== Releasing
+
+The `pom.xml` version is the source of truth for publishing — there is no `maven-release-plugin` step.
+
+On every push to `master`, `.github/workflows/publish.yml` deploys to Maven Central:
+
+* If the version ends in `-SNAPSHOT` → publishes a snapshot
+* If the version does not end in `-SNAPSHOT` → publishes a full release, creates a `v<version>` git tag, and creates a GitHub release
+
+To cut a release:
+
+. Open a PR removing `-SNAPSHOT` from `<version>` in the parent `pom.xml` (e.g. `0.6.0.0-SNAPSHOT` → `0.6.0.0`)
+. Merge it to master → CI publishes the release
+. Open another PR bumping to the next snapshot (e.g. `0.6.0.1-SNAPSHOT`) and merge
+
+Required GitHub repository secrets:
+
+* `MAVEN_CENTRAL_USERNAME` — Sonatype Central Portal token username
+* `MAVEN_CENTRAL_PASSWORD` — Sonatype Central Portal token password
+* `MAVEN_GPG_PRIVATE_KEY` — Armored GPG private key for signing artifacts
+* `MAVEN_GPG_PASSPHRASE` — Passphrase for the GPG key
+
 === Testing
 
 The project has good automated test coverage, of all features.

diff --git a/bin/ci-build.sh b/bin/ci-build.sh
@@ -22,4 +22,6 @@ fi
   -Pci \
   clean verify \
   $KAFKA_VERSION_ARG \
-  -Dlicense.skip
+  -Dlicense.skip \
+  -Dexcluded.groups=performance \
+  -Dsurefire.rerunFailingTestsCount=2
diff --git a/bin/ci-integration-test.sh b/bin/ci-integration-test.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020-2026 Confluent, Inc.
+#
+
+# Run integration tests only (failsafe, requires Docker for TestContainers).
+# Skips unit tests to avoid duplicate work.
+# Usage: bin/ci-integration-test.sh
+
+set -euo pipefail
+
+./mvnw --batch-mode \
+  -Pci \
+  clean verify \
+  -DskipUTs=true \
+  -Dlicense.skip \
+  -Dexcluded.groups=performance \
+  -Dsurefire.rerunFailingTestsCount=2
diff --git a/bin/ci-unit-test.sh b/bin/ci-unit-test.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020-2026 Confluent, Inc.
+#
+
+# Run unit tests only (surefire, no Docker/TestContainers needed).
+# Usage: bin/ci-unit-test.sh
+
+set -euo pipefail
+
+./mvnw --batch-mode \
+  -Pci \
+  clean test \
+  -Dlicense.skip \
+  -Dexcluded.groups=performance \
+  -Dsurefire.rerunFailingTestsCount=2
diff --git a/bin/performance-test.cmd b/bin/performance-test.cmd
@@ -0,0 +1,18 @@
+@REM Copyright (C) 2020-2026 Confluent, Inc.
+@REM
+@REM Run only the performance test suite (tests tagged @Tag("performance")).
+@REM These are excluded from the regular CI build because they take a long time
+@REM and need substantial hardware. Used by the self-hosted Windows runner.
+@REM
+@REM Usage: bin\performance-test.cmd [extra-maven-args...]
+
+@echo off
+setlocal
+
+call mvnw.cmd --batch-mode ^
+  -Pci ^
+  clean verify ^
+  -Dincluded.groups=performance ^
+  -Dexcluded.groups= ^
+  -Dlicense.skip ^
+  %*
diff --git a/bin/performance-test.sh b/bin/performance-test.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2020-2026 Confluent, Inc.
+#
+
+# Run only the performance test suite (tests tagged @Tag("performance")).
+# These are excluded from the regular CI build because they take a long time
+# and need substantial hardware. The self-hosted runner workflow
+# (.github/workflows/performance.yml) calls this script.
+#
+# Usage: bin/performance-test.sh [extra-maven-args...]
+
+set -euo pipefail
+
+./mvnw --batch-mode \
+  -Pci \
+  clean verify \
+  -Dincluded.groups=performance \
+  -Dexcluded.groups= \
+  -Dlicense.skip \
+  "$@"
diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,11 @@
+coverage:
+  status:
+    project:
+      default:
+        # Fail if overall coverage drops from the base branch
+        target: auto
+        threshold: 1%
+    patch:
+      default:
+        # Don't enforce a minimum on new code — just track it
+        informational: true