ByteVeda · pratyush618 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/README.md b/README.md
@@ -273,26 +273,6 @@ Optional modules for automatic capture with popular frameworks:
 
 ---
 
-## Build & CI/CD Plugins
-
-### Maven Plugin
-
-```xml
-<plugin>
-    <groupId>org.byteveda.agenteval</groupId>
-    <artifactId>agenteval-maven-plugin</artifactId>
-    <version>0.1.0-SNAPSHOT</version>
-    <executions>
-        <execution>
-            <goals><goal>evaluate</goal></goals>
-        </execution>
-    </executions>
-</plugin>
-```
-
-```bash
-mvn agenteval:evaluate
-```
 
 ### Gradle Plugin
 
@@ -350,6 +330,12 @@ agenteval-langchain4j/  — LangChain4j auto-capture (optional)
 agenteval-langgraph4j/  — LangGraph4j graph execution capture (optional)
 agenteval-mcp/          — MCP Java SDK tool call capture (optional)
 agenteval-redteam/      — Adversarial testing, 20 attack templates
+agenteval-contracts/    — Contract testing, behavioral invariant verification
+agenteval-statistics/   — Statistical rigor: confidence intervals, significance tests
+agenteval-chaos/        — Chaos engineering, agent resilience testing
+agenteval-replay/       — Deterministic record & replay for $0 regression tests
+agenteval-mutation/     — Prompt mutation testing, eval quality verification
+agenteval-fingerprint/  — Agent capability profiling across 8 dimensions
 agenteval-maven-plugin/ — Maven build integration
 agenteval-gradle-plugin/— Gradle build integration
 agenteval-github-actions/ — GitHub Actions composite action

diff --git a/agenteval-bom/pom.xml b/agenteval-bom/pom.xml
@@ -171,6 +171,27 @@
                 <artifactId>agenteval-chaos</artifactId>
                 <version>${project.version}</version>
             </dependency>
+
+            <!-- Deterministic Replay (optional) -->
+            <dependency>
+                <groupId>org.byteveda.agenteval</groupId>
+                <artifactId>agenteval-replay</artifactId>
+                <version>${project.version}</version>
+            </dependency>
+
+            <!-- Mutation Testing (optional) -->
+            <dependency>
+                <groupId>org.byteveda.agenteval</groupId>
+                <artifactId>agenteval-mutation</artifactId>
+                <version>${project.version}</version>
+            </dependency>
+
+            <!-- Capability Fingerprinting (optional) -->
+            <dependency>
+                <groupId>org.byteveda.agenteval</groupId>
+                <artifactId>agenteval-fingerprint</artifactId>
+                <version>${project.version}</version>
+            </dependency>
         </dependencies>
     </dependencyManagement>
 </project>
diff --git a/agenteval-fingerprint/pom.xml b/agenteval-fingerprint/pom.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.byteveda.agenteval</groupId>
+        <artifactId>agenteval-parent</artifactId>
+        <version>0.1.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>agenteval-fingerprint</artifactId>
+    <name>AgentEval Fingerprint</name>
+    <description>Capability profiling and fingerprinting for AI agents</description>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.byteveda.agenteval</groupId>
+            <artifactId>agenteval-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.byteveda.agenteval</groupId>
+            <artifactId>agenteval-metrics</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.byteveda.agenteval</groupId>
+            <artifactId>agenteval-judge</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-core</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+</project>
diff --git a/...al-fingerprint/src/main/java/org/byteveda/agenteval/fingerprint/CapabilityComparison.java b/...al-fingerprint/src/main/java/org/byteveda/agenteval/fingerprint/CapabilityComparison.java
@@ -0,0 +1,68 @@
+package org.byteveda.agenteval.fingerprint;
+
+import java.util.ArrayList;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * Utility for comparing two {@link CapabilityProfile} instances.
+ */
+public final class CapabilityComparison {
+
+    private CapabilityComparison() {}
+
+    /**
+     * Compares two capability profiles and returns a comparison result.
+     *
+     * <p>For each dimension present in both profiles, computes the delta
+     * (B minus A). Positive deltas indicate improvement in profile B;
+     * negative deltas indicate regression.</p>
+     *
+     * @param profileA the baseline profile
+     * @param profileB the profile to compare against the baseline
+     * @return the comparison result
+     */
+    public static CapabilityComparisonResult compare(
+            CapabilityProfile profileA, CapabilityProfile profileB) {
+        Objects.requireNonNull(profileA, "profileA must not be null");
+        Objects.requireNonNull(profileB, "profileB must not be null");
+
+        Map<CapabilityDimension, Double> deltas = new EnumMap<>(CapabilityDimension.class);
+        List<CapabilityDimension> improvements = new ArrayList<>();
+        List<CapabilityDimension> regressions = new ArrayList<>();
+
+        Set<CapabilityDimension> allDimensions = profileA.scores().keySet();
+
+        for (CapabilityDimension dim : allDimensions) {
+            ProfileScore scoreA = profileA.scores().get(dim);
+            ProfileScore scoreB = profileB.scores().get(dim);
+
+            if (scoreA != null && scoreB != null) {
+                double delta = scoreB.score() - scoreA.score();
+                deltas.put(dim, delta);
+
+                if (delta > 0.0) {
+                    improvements.add(dim);
+                } else if (delta < 0.0) {
+                    regressions.add(dim);
+                }
+            }
+        }
+
+        // Also check dimensions only in B
+        for (CapabilityDimension dim : profileB.scores().keySet()) {
+            if (!deltas.containsKey(dim)) {
+                ProfileScore scoreB = profileB.scores().get(dim);
+                deltas.put(dim, scoreB.score());
+                improvements.add(dim);
+            }
+        }
+
+        return new CapabilityComparisonResult(
+                profileA, profileB, deltas, improvements, regressions
+        );
+    }
+}
diff --git a/...gerprint/src/main/java/org/byteveda/agenteval/fingerprint/CapabilityComparisonResult.java b/...gerprint/src/main/java/org/byteveda/agenteval/fingerprint/CapabilityComparisonResult.java
@@ -0,0 +1,41 @@
+package org.byteveda.agenteval.fingerprint;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Result of comparing two capability profiles.
+ *
+ * @param profileA    the first profile
+ * @param profileB    the second profile
+ * @param deltas      score differences per dimension (B minus A)
+ * @param improvements dimensions where B scored higher than A
+ * @param regressions  dimensions where B scored lower than A
+ */
+public record CapabilityComparisonResult(
+        CapabilityProfile profileA,
+        CapabilityProfile profileB,
+        Map<CapabilityDimension, Double> deltas,
+        List<CapabilityDimension> improvements,
+        List<CapabilityDimension> regressions
+) {
+
+    public CapabilityComparisonResult {
+        Objects.requireNonNull(profileA, "profileA must not be null");
+        Objects.requireNonNull(profileB, "profileB must not be null");
+        Objects.requireNonNull(deltas, "deltas must not be null");
+        deltas = Map.copyOf(deltas);
+        improvements = improvements == null ? List.of() : List.copyOf(improvements);
+        regressions = regressions == null ? List.of() : List.copyOf(regressions);
+    }
+
+    /**
+     * Returns the overall score delta (B minus A).
+     *
+     * @return the overall delta
+     */
+    public double overallDelta() {
+        return profileB.overallScore() - profileA.overallScore();
+    }
+}
diff --git a/...val-fingerprint/src/main/java/org/byteveda/agenteval/fingerprint/CapabilityDimension.java b/...val-fingerprint/src/main/java/org/byteveda/agenteval/fingerprint/CapabilityDimension.java
@@ -0,0 +1,60 @@
+package org.byteveda.agenteval.fingerprint;
+
+/**
+ * Dimensions along which an agent's capabilities are profiled.
+ *
+ * <p>Each dimension represents a distinct aspect of agent behavior that can
+ * be independently measured and compared across agents or model versions.</p>
+ */
+public enum CapabilityDimension {
+
+    ACCURACY("Accuracy",
+            "Correctness and factual precision of agent responses"),
+
+    RELEVANCY("Relevancy",
+            "How well the agent's responses address the user's query"),
+
+    FAITHFULNESS("Faithfulness",
+            "Adherence to provided context without fabrication"),
+
+    COHERENCE("Coherence",
+            "Logical consistency and readability of responses"),
+
+    SAFETY("Safety",
+            "Avoidance of toxic, biased, or harmful content"),
+
+    TOOL_USE("Tool Use",
+            "Accuracy and appropriateness of tool selection and invocation"),
+
+    TASK_COMPLETION("Task Completion",
+            "Ability to fully accomplish assigned tasks"),
+
+    CONTEXT_UTILIZATION("Context Utilization",
+            "Effective use of retrieval context and provided information");
+
+    private final String displayName;
+    private final String description;
+
+    CapabilityDimension(String displayName, String description) {
+        this.displayName = displayName;
+        this.description = description;
+    }
+
+    /**
+     * Returns the human-readable display name.
+     *
+     * @return the display name
+     */
+    public String displayName() {
+        return displayName;
+    }
+
+    /**
+     * Returns a description of what this dimension measures.
+     *
+     * @return the description
+     */
+    public String description() {
+        return description;
+    }
+}
diff --git a/...teval-fingerprint/src/main/java/org/byteveda/agenteval/fingerprint/CapabilityProfile.java b/...teval-fingerprint/src/main/java/org/byteveda/agenteval/fingerprint/CapabilityProfile.java
@@ -0,0 +1,84 @@
+package org.byteveda.agenteval.fingerprint;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Complete capability profile for an agent, containing scores across all dimensions.
+ *
+ * @param agentName  the name of the profiled agent
+ * @param scores     scores keyed by dimension
+ * @param durationMs total profiling time in milliseconds
+ */
+public record CapabilityProfile(
+        String agentName,
+        Map<CapabilityDimension, ProfileScore> scores,
+        long durationMs
+) {
+
+    public CapabilityProfile {
+        Objects.requireNonNull(agentName, "agentName must not be null");
+        Objects.requireNonNull(scores, "scores must not be null");
+        scores = Map.copyOf(scores);
+    }
+
+    /**
+     * Returns the overall score as the average across all dimensions.
+     *
+     * @return the average score (0.0 to 1.0), or 0.0 if no scores
+     */
+    public double overallScore() {
+        if (scores.isEmpty()) {
+            return 0.0;
+        }
+        return scores.values().stream()
+                .mapToDouble(ProfileScore::score)
+                .average()
+                .orElse(0.0);
+    }
+
+    /**
+     * Returns dimensions where the score is at or above the given threshold.
+     *
+     * @param threshold the minimum score to qualify as a strength
+     * @return list of strong dimensions
+     */
+    public List<CapabilityDimension> strengths(double threshold) {
+        return scores.entrySet().stream()
+                .filter(e -> e.getValue().score() >= threshold)
+                .map(Map.Entry::getKey)
+                .toList();
+    }
+
+    /**
+     * Returns dimensions with strengths at or above 0.8.
+     *
+     * @return list of strong dimensions
+     */
+    public List<CapabilityDimension> strengths() {
+        return strengths(0.8);
+    }
+
+    /**
+     * Returns dimensions where the score is below the given threshold.
+     *
+     * @param threshold the score below which a dimension is considered weak
+     * @return list of weak dimensions
+     */
+    public List<CapabilityDimension> weaknesses(double threshold) {
+        return scores.entrySet().stream()
+                .filter(e -> e.getValue().score() < threshold)
+                .map(Map.Entry::getKey)
+                .toList();
+    }
+
+    /**
+     * Returns dimensions with weaknesses below 0.5.
+     *
+     * @return list of weak dimensions
+     */
+    public List<CapabilityDimension> weaknesses() {
+        return weaknesses(0.5);
+    }
+}