diff --git a/CMakeLists.txt b/CMakeLists.txt
index f715c38..e72f62a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -170,6 +170,26 @@ add_executable(test_aggregate_drop_budget tests/unit/flow/test_aggregate_drop_bu
 target_link_libraries(test_aggregate_drop_budget PRIVATE openpenny)
 add_test(NAME aggregate_drop_budget COMMAND test_aggregate_drop_budget)
 
+add_executable(test_terminal_snapshot_resolution tests/unit/flow/test_terminal_snapshot_resolution.cpp)
+target_link_libraries(test_terminal_snapshot_resolution PRIVATE openpenny)
+add_test(NAME terminal_snapshot_resolution COMMAND test_terminal_snapshot_resolution)
+
+add_executable(test_aggregate_pending_resolution tests/unit/flow/test_aggregate_pending_resolution.cpp)
+target_link_libraries(test_aggregate_pending_resolution PRIVATE openpenny)
+add_test(NAME aggregate_pending_resolution COMMAND test_aggregate_pending_resolution)
+
+add_executable(test_aggregate_freeze_at_drop_limit tests/unit/flow/test_aggregate_freeze_at_drop_limit.cpp)
+target_link_libraries(test_aggregate_freeze_at_drop_limit PRIVATE openpenny)
+add_test(NAME aggregate_freeze_at_drop_limit COMMAND test_aggregate_freeze_at_drop_limit)
+
+add_executable(test_aggregate_duplicate_fallback tests/unit/flow/test_aggregate_duplicate_fallback.cpp)
+target_link_libraries(test_aggregate_duplicate_fallback PRIVATE openpenny)
+add_test(NAME aggregate_duplicate_fallback COMMAND test_aggregate_duplicate_fallback)
+
+add_executable(test_flow_evaluation_phase_gate tests/unit/flow/test_flow_evaluation_phase_gate.cpp)
+target_link_libraries(test_flow_evaluation_phase_gate PRIVATE openpenny)
+add_test(NAME flow_evaluation_phase_gate COMMAND test_flow_evaluation_phase_gate)
+
 add_executable(test_cli_options tests/unit/cli/test_cli_options.cpp)
 target_link_libraries(test_cli_options PRIVATE openpenny)
 add_test(NAME cli_options COMMAND test_cli_options)
@@ -178,6 +198,10 @@ add_executable(test_traffic_match tests/unit/net/test_traffic_match.cpp)
 target_link_libraries(test_traffic_match PRIVATE openpenny)
 add_test(NAME traffic_match COMMAND test_traffic_match)
 
+add_executable(test_packet_parser tests/unit/net/test_packet_parser.cpp)
+target_link_libraries(test_packet_parser PRIVATE openpenny)
+add_test(NAME packet_parser COMMAND test_packet_parser)
+
 add_executable(test_control_planner tests/unit/control/test_control_planner.cpp)
 target_link_libraries(test_control_planner PRIVATE openpenny)
 add_test(NAME control_planner COMMAND test_control_planner)
diff --git a/include/openpenny/agg/FlowKey.h b/include/openpenny/agg/FlowKey.h
new file mode 100644
index 0000000..4d3e295
--- /dev/null
+++ b/include/openpenny/agg/FlowKey.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace openpenny {
+
+struct FlowKey {
+    /**
+     * @brief Protocol-aware flow tuple in host byte order.
+     *
+     * Encodes IPv4 source/destination, L4 ports, and the IPv4 protocol
+     * number so TCP/UDP traffic with the same addresses/ports do not
+     * alias to the same key.
+     */
+    std::uint32_t src{0};
+    std::uint32_t dst{0};
+    std::uint16_t sport{0};
+    std::uint16_t dport{0};
+    std::uint8_t ip_proto{0};
+
+    bool operator==(const FlowKey& o) const noexcept {
+        return src == o.src &&
+               dst == o.dst &&
+               sport == o.sport &&
+               dport == o.dport &&
+               ip_proto == o.ip_proto;
+    }
+};
+
+struct FlowKeyHash {
+    /**
+     * @brief Mix all FlowKey fields into a single hash using 64-bit avalanching.
+     */
+    std::size_t operator()(const FlowKey& k) const noexcept {
+        const std::uint64_t addr_pair =
+            (static_cast<std::uint64_t>(k.src) << 32) | k.dst;
+        const std::uint64_t ports_proto =
+            (static_cast<std::uint64_t>(k.sport) << 24) |
+            (static_cast<std::uint64_t>(k.dport) << 8) |
+            static_cast<std::uint64_t>(k.ip_proto);
+
+        std::uint64_t v =
+            addr_pair ^ (ports_proto + 0x9e3779b97f4a7c15ULL +
+                         (addr_pair << 6) + (addr_pair >> 2));
+        v ^= (v >> 33);
+        v *= 0xff51afd7ed558ccdULL;
+        v ^= (v >> 33);
+        v *= 0xc4ceb9fe1a85ec53ULL;
+        v ^= (v >> 33);
+        return static_cast<std::size_t>(v);
+    }
+};
+
+template <typename T>
+using FlowMap = std::unordered_map<FlowKey, T, FlowKeyHash>;
+
+using FlowSet = std::unordered_set<FlowKey, FlowKeyHash>;
+
+} // namespace openpenny
diff --git a/include/openpenny/agg/Stats.h b/include/openpenny/agg/Stats.h
index e735282..7fe61a8 100644
--- a/include/openpenny/agg/Stats.h
+++ b/include/openpenny/agg/Stats.h
@@ -5,41 +5,18 @@
  * @file Stats.h
  * @brief Per-flow and aggregated statistics with a striped hash table.
  */
+#include "openpenny/agg/FlowKey.h"
+
 #include <atomic>
 #include <cstdint>
 #include <string>
 #include <vector>
-#include <unordered_map>
 #include <shared_mutex>
 #include <mutex>
 #include <chrono>
 
 namespace openpenny {
 
-struct FlowKey {
-    /**
-     * @brief Tuple identifying a TCP/UDP flow in host byte order.
-     */
-    uint32_t src; uint32_t dst; uint16_t sport; uint16_t dport;
-    bool operator==(const FlowKey& o) const noexcept {
-        return src==o.src && dst==o.dst && sport==o.sport && dport==o.dport;
-    }
-};
-
-struct FlowKeyHash {
-    /**
-     * @brief Mix all FlowKey fields into a single hash using 64-bit avalanching.
-     */
-    size_t operator()(const FlowKey& k) const noexcept {
-        uint64_t v = (static_cast<uint64_t>(k.src) << 32) ^ k.dst;
-        v ^= (static_cast<uint64_t>(k.sport) << 16) ^ k.dport;
-        v ^= (v >> 33); v *= 0xff51afd7ed558ccdULL;
-        v ^= (v >> 33); v *= 0xc4ceb9fe1a85ec53ULL;
-        v ^= (v >> 33);
-        return static_cast<size_t>(v);
-    }
-};
-
 /**
  * @brief Per-flow counters that mirror the BPF-side stats exposed to users.
  */
@@ -91,7 +68,7 @@ class FlowTable {
 private:
     struct Shard {
         mutable std::shared_mutex mutex;
-        std::unordered_map<FlowKey, Counters, FlowKeyHash> map;
+        FlowMap<Counters> map;
     };
     std::vector<Shard> shards_;
     FlowKeyHash hash_;
diff --git a/include/openpenny/app/core/ActiveTestPipeline.h b/include/openpenny/app/core/ActiveTestPipeline.h
index ae736f6..266fb37 100644
--- a/include/openpenny/app/core/ActiveTestPipeline.h
+++ b/include/openpenny/app/core/ActiveTestPipeline.h
@@ -168,8 +168,17 @@ class ActiveTestPipelineRunner : public IPipelineStrategy {
     /** Expire idle flows based on configured timeout. */
     void expire_idle_flows(const std::chrono::steady_clock::time_point& now);
 
-    /** Sweep pending snapshots and expire those past timeout. */
-    void sweep_expired_snapshots(const std::chrono::steady_clock::time_point& now);
+    /** Return true once the aggregate phase has completed and per-flow tests may run. */
+    bool individual_flow_evaluation_enabled() const;
+
+    /** Evaluate already-tracked flows once per-flow testing becomes active. */
+    void evaluate_individual_flows_if_enabled();
+
+    /** Complete terminal flows once all pending drop snapshots are resolved. */
+    void complete_resolved_terminal_flows();
+
+    /** Complete a flow and preserve a printable closed-loop summary if applicable. */
+    void complete_flow_with_summary(const FlowKey& key, const char* reason);
 
     // -------------------------------------------------------------------------
     // Member state
@@ -232,6 +241,8 @@ class ActiveTestPipelineRunner : public IPipelineStrategy {
      */
     std::size_t total_pkts_forwarded_{0};
     std::size_t total_forward_errors_{0};
+    std::vector<std::string> closed_loop_flow_summaries_;
+    std::vector<std::string> duplicate_exceeded_flow_summaries_;
 
     /**
      * Last time we logged global stats (prevents log flooding).
diff --git a/include/openpenny/app/core/DropCollectorBinding.h b/include/openpenny/app/core/DropCollectorBinding.h
index 2e26416..a20dea6 100644
--- a/include/openpenny/app/core/DropCollectorBinding.h
+++ b/include/openpenny/app/core/DropCollectorBinding.h
@@ -5,34 +5,24 @@
 #include "openpenny/app/core/OpenpennyPipelineDriver.h"
 #include "openpenny/agg/Stats.h"
 
-#include <mutex>
 #include <string>
-#include <unordered_map>
-
-namespace openpenny::penny {
-class FlowEngine;
-}
+#include <utility>
+#include <vector>
 
 namespace openpenny::app {
 
 /**
- * @brief Maintains FlowEngine -> DropCollector bindings and installs the
- * snapshot hook so drop events are mirrored into the shared collector.
+ * @brief Mirrors per-flow drop snapshots into the shared collector.
+ *
+ * New drops are inserted one at a time via upsert(). Snapshot state changes
+ * that affect a suffix of the per-flow snapshot vector (duplicate/rtx/expire)
+ * are mirrored via refresh_from() so the collector can rescan the already
+ * contiguous, append-only snapshot storage directly.
  */
 class DropCollectorBinding {
 public:
     static DropCollectorBinding& instance();
 
-    // Ensure the global timer snapshot hook is installed exactly once.
-    void ensure_snapshot_hook();
-
-    void bind(penny::FlowEngine* flow,
-              DropCollectorPtr collector,
-              const std::string& thread_name,
-              std::size_t shard_index);
-
-    void unbind(penny::FlowEngine* flow);
-
     void upsert(DropCollectorPtr collector,
                 const std::string& thread_name,
                 std::size_t shard_index,
@@ -40,23 +30,23 @@ class DropCollectorBinding {
                 penny::PacketDropId packet_id,
                 const penny::PacketDropSnapshot& snap);
 
-private:
-    struct BindingContext {
-        DropCollectorPtr collector;
-        std::string thread_name;
-        std::size_t shard_index{0};
-    };
+    void refresh_from(
+        DropCollectorPtr collector,
+        const std::string& thread_name,
+        std::size_t shard_index,
+        const FlowKey& key,
+        const std::vector<std::pair<penny::PacketDropId, penny::PacketDropSnapshot>>& snapshots,
+        std::size_t start_index);
 
+private:
     DropCollectorBinding() = default;
-    BindingContext lookup(penny::FlowEngine* flow) const;
-    void upsert_locked(const BindingContext& binding,
+
+    void upsert_locked(DropCollector& collector,
+                       DropCollector::Shard& shard,
+                       const std::string& thread_name,
                        const FlowKey& key,
                        penny::PacketDropId packet_id,
                        const penny::PacketDropSnapshot& snap);
-
-    mutable std::mutex mtx_;
-    std::once_flag hook_once_;
-    std::unordered_map<penny::FlowEngine*, BindingContext> bindings_;
 };
 
 } // namespace openpenny::app
diff --git a/include/openpenny/app/core/OpenpennyPipelineDriver.h b/include/openpenny/app/core/OpenpennyPipelineDriver.h
index e8bca0b..17e7d27 100644
--- a/include/openpenny/app/core/OpenpennyPipelineDriver.h
+++ b/include/openpenny/app/core/OpenpennyPipelineDriver.h
@@ -3,7 +3,7 @@
 #pragma once
 
 #include "openpenny/config/Config.h"
-#include "openpenny/agg/Stats.h"
+#include "openpenny/agg/FlowKey.h"
 #include "openpenny/egress/PacketSink.h"
 #include "openpenny/penny/flow/state/PennySnapshot.h"
 #include "openpenny/penny/flow/state/PacketDropId.h"
@@ -124,6 +124,10 @@ struct DropCollector {
 
     std::atomic<bool> accepting{true};
     std::size_t shard_count{1};
+    std::size_t snapshot_limit{0};
+    std::atomic<std::size_t> accepted_snapshot_count{0};
+    mutable std::mutex frozen_aggregate_counters_mtx;
+    std::optional<openpenny::app::AggregatedCounters> frozen_aggregate_counters;
     std::array<Shard, kMaxShards> shards{};
 
     std::size_t clamp_shard_index(std::size_t idx) const noexcept {
@@ -160,10 +164,13 @@ struct ModeResult {
     std::size_t flows_tracked_data = 0;
     bool penny_completed = false; // True when Penny heuristics triggered shutdown.
     bool aggregates_penny_completed = false; // Flag representing aggregate Penny status.
+    bool closed_loop_stop_hit = false; // True when the configured min_closed_loop_flows threshold was observed.
     // Passive-mode gap summary.
     std::size_t passive_flows_with_open_gaps = 0;
     std::size_t passive_open_gaps = 0;
     std::vector<std::string> passive_gap_summaries;
+    std::vector<std::string> closed_loop_flow_summaries;
+    std::vector<std::string> duplicate_exceeded_flow_summaries;
     std::size_t passive_flows_rst = 0;
     std::size_t passive_flows_syn_only = 0;
     std::size_t passive_flows_finished = 0;
diff --git a/include/openpenny/app/core/PassiveTestPipeline.h b/include/openpenny/app/core/PassiveTestPipeline.h
index 64d2df7..26167d4 100644
--- a/include/openpenny/app/core/PassiveTestPipeline.h
+++ b/include/openpenny/app/core/PassiveTestPipeline.h
@@ -2,7 +2,7 @@
 
 #pragma once
 
-#include "openpenny/agg/Stats.h"
+#include "openpenny/agg/FlowKey.h"
 #include "openpenny/app/core/OpenpennyPipelineDriver.h"
 #include "openpenny/app/core/PipelineRunner.h"
 #include "openpenny/config/Config.h"
@@ -13,8 +13,6 @@
 #include <memory>
 #include <optional>
 #include <string>
-#include <unordered_map>
-#include <unordered_set>
 #include <vector>
 
 namespace openpenny {
@@ -75,17 +73,19 @@ class PassiveTestPipelineRunner : public IPipelineStrategy {
     void finalize(ModeResult& result) override;
 
 private:
+    void reserve_for_config();
+
     const Config& cfg_;
     const PipelineOptions& opts_;
     FlowMatcher matcher_;
     net::PacketSourcePtr source_;
-    std::unordered_map<FlowKey, PassiveFlowState, FlowKeyHash> flows_;
+    FlowMap<PassiveFlowState> flows_;
     std::chrono::steady_clock::time_point start_time_{std::chrono::steady_clock::now()};
     std::size_t flows_seen_{0};
     std::size_t flows_finished_{0};
     std::vector<PassiveFlowState> finished_flows_;
-    std::unordered_map<FlowKey, std::size_t, FlowKeyHash> finished_index_;
-    std::unordered_set<FlowKey, FlowKeyHash> finished_keys_;
+    FlowMap<std::size_t> finished_index_;
+    FlowSet finished_keys_;
     bool stop_grace_active_{false};
     std::chrono::steady_clock::time_point stop_grace_start_{};
     bool stop_requested_{false};
diff --git a/include/openpenny/app/core/PerThreadStats.h b/include/openpenny/app/core/PerThreadStats.h
index d897da4..b3fe671 100644
--- a/include/openpenny/app/core/PerThreadStats.h
+++ b/include/openpenny/app/core/PerThreadStats.h
@@ -6,7 +6,7 @@
 #include <string>
 #include <vector>
 
-#include "openpenny/agg/Stats.h" // for FlowKey
+#include "openpenny/agg/FlowKey.h"
 #include "openpenny/penny/flow/state/PacketDropId.h"
 
 namespace openpenny::app {
diff --git a/include/openpenny/app/core/RuntimeSetup.h b/include/openpenny/app/core/RuntimeSetup.h
index dc99db9..0b624d4 100644
--- a/include/openpenny/app/core/RuntimeSetup.h
+++ b/include/openpenny/app/core/RuntimeSetup.h
@@ -18,4 +18,13 @@ const RuntimeSetupSnapshot& current_runtime_setup();
 // Mutable view for helpers that need to update status fields.
 RuntimeSetupSnapshot& runtime_setup_mutable();
 
+bool current_aggregates_active() noexcept;
+void set_current_aggregates_active(bool value) noexcept;
+
+RuntimeStatus::AggregatesStatus current_aggregates_status() noexcept;
+void set_current_aggregates_status(RuntimeStatus::AggregatesStatus status) noexcept;
+
+bool current_has_aggregate_eval() noexcept;
+void set_current_has_aggregate_eval(bool value) noexcept;
+
 } // namespace openpenny
diff --git a/include/openpenny/egress/PacketSink.h b/include/openpenny/egress/PacketSink.h
index 45d56e8..6f68534 100644
--- a/include/openpenny/egress/PacketSink.h
+++ b/include/openpenny/egress/PacketSink.h
@@ -142,9 +142,9 @@ class PacketSink {
      * @brief Emit a parsed packet. Must be thread-safe.
      *
      * Returns true on a successful write, false on any error. Transient
-     * EAGAIN/EWOULDBLOCK are counted as errors==0 (pipeline drops the
-     * packet) because the pipeline is not responsible for reliable
-     * delivery -- it's a passive mirror.
+     * EAGAIN/EWOULDBLOCK still mean the packet was dropped; sinks may count
+     * those in stats_.errors as backpressure-induced loss so operators can
+     * distinguish real reinjection congestion from intentional Penny drops.
      */
     virtual bool write(const net::PacketView& packet) = 0;
 
diff --git a/include/openpenny/egress/RawNicSink.h b/include/openpenny/egress/RawNicSink.h
index e75bd23..9d5c1dc 100644
--- a/include/openpenny/egress/RawNicSink.h
+++ b/include/openpenny/egress/RawNicSink.h
@@ -16,6 +16,10 @@
 
 #include "openpenny/egress/PacketSink.h"
 
+#include <atomic>
+#include <mutex>
+#include <vector>
+
 namespace openpenny::egress {
 
 class RawNicSink : public PacketSink {
@@ -30,9 +34,15 @@ class RawNicSink : public PacketSink {
     EgressKind kind() const noexcept override { return EgressKind::RawNic; }
 
 private:
+    int open_socket_fd(bool resolve_ifindex, bool log_failures);
+    int thread_fd();
+
     EgressConfig cfg_{};
     int fd_ = -1;
     int if_index_ = -1; ///< Cached ifindex for sendto(2).
+    std::mutex fds_mtx_;
+    std::vector<int> additional_fds_;
+    std::atomic<bool> backpressure_logged_{false};
 };
 
 } // namespace openpenny::egress
diff --git a/include/openpenny/egress/RawSocketSink.h b/include/openpenny/egress/RawSocketSink.h
index 1b9754f..e8427a7 100644
--- a/include/openpenny/egress/RawSocketSink.h
+++ b/include/openpenny/egress/RawSocketSink.h
@@ -15,6 +15,8 @@
 #include "openpenny/egress/PacketSink.h"
 
 #include <atomic>
+#include <mutex>
+#include <vector>
 
 namespace openpenny::egress {
 
@@ -30,8 +32,14 @@ class RawSocketSink : public PacketSink {
     EgressKind kind() const noexcept override { return EgressKind::RawSocket; }
 
 private:
+    int open_socket_fd(bool log_failures);
+    int thread_fd();
+
     EgressConfig cfg_{};
     int fd_ = -1;
+    std::mutex fds_mtx_;
+    std::vector<int> additional_fds_;
+    std::atomic<bool> backpressure_logged_{false};
     /// Latched once we have logged the first EMSGSIZE failure. The kernel
     /// returns EMSGSIZE for any IP datagram larger than the egress
     /// interface MTU (raw sockets cannot fragment), and on a busy
diff --git a/include/openpenny/egress/TunSink.h b/include/openpenny/egress/TunSink.h
index e261cd3..e194966 100644
--- a/include/openpenny/egress/TunSink.h
+++ b/include/openpenny/egress/TunSink.h
@@ -15,6 +15,7 @@
 
 #include "openpenny/egress/PacketSink.h"
 
+#include <atomic>
 #include <mutex>
 #include <vector>
 
@@ -54,6 +55,7 @@ class TunSink : public PacketSink {
     /// the `thread_local` cache are lock-free after the first call.
     std::mutex fds_mtx_;
     std::vector<int> additional_fds_;
+    std::atomic<bool> backpressure_logged_{false};
 };
 
 } // namespace openpenny::egress
diff --git a/include/openpenny/net/Packet.h b/include/openpenny/net/Packet.h
index 218e5e1..35e7503 100644
--- a/include/openpenny/net/Packet.h
+++ b/include/openpenny/net/Packet.h
@@ -2,7 +2,7 @@
 
 #pragma once
 
-#include "openpenny/agg/Stats.h" // for FlowKey
+#include "openpenny/agg/FlowKey.h"
 #include "openpenny/dataplane/Session.h"
 #include "openpenny/penny/flow/state/PacketDropId.h"
 
@@ -104,9 +104,9 @@ struct TcpHeaderView {
  * All pointers into the packet buffer are valid only during the handler call.
  */
 struct PacketView {
-    FlowKey    flow{};            ///< Flow identifier (5-tuple or 4-tuple depending on source).
+    FlowKey    flow{};            ///< Protocol-aware flow identifier (IPv4 src/dst, L4 ports, IP proto).
     TcpHeaderView tcp{};          ///< Minimal parsed TCP header subset.
-    uint8_t    ip_proto{0};        ///< IPv4 protocol number (TCP=6, UDP=17, etc.).
+    uint8_t    ip_proto{0};        ///< IPv4 protocol number (TCP=6, UDP=17, etc.); mirrors flow.ip_proto.
     uint64_t   payload_bytes{0};  ///< L4 payload length (0 for pure ACKs or empty payloads).
     uint64_t   timestamp_ns{0};   ///< Packet capture timestamp in nanoseconds.
     
diff --git a/include/openpenny/penny/flow/engine/FlowEngine.h b/include/openpenny/penny/flow/engine/FlowEngine.h
index 5d59cff..f328c0a 100644
--- a/include/openpenny/penny/flow/engine/FlowEngine.h
+++ b/include/openpenny/penny/flow/engine/FlowEngine.h
@@ -44,6 +44,10 @@ class FlowEngine {
     using DropSnapshotSink = std::function<void(const FlowKey&,
                                                 PacketDropId,
                                                 const PacketDropSnapshot&)>;
+    using SnapshotRefreshSink = std::function<void(
+        const FlowKey&,
+        const std::vector<std::pair<PacketDropId, PacketDropSnapshot>>&,
+        std::size_t start_index)>;
 
     /// High-level decision / outcome for this flow.
     enum class FlowDecision {
@@ -152,10 +156,10 @@ class FlowEngine {
     // Flow identity
     // ---------------------------------------------------------------------
 
-    /// Attach the 5-tuple (or equivalent) key to this flow.
+    /// Attach the protocol-aware flow key to this flow.
     void set_flow_key(const FlowKey& key) noexcept { flow_key_ = key; }
 
-    /// Return the flow key (5-tuple) associated with this FlowEngine.
+    /// Return the protocol-aware flow key associated with this FlowEngine.
     FlowKey flow_key() const noexcept { return flow_key_; }
 
     // ---------------------------------------------------------------------
@@ -187,6 +191,9 @@ class FlowEngine {
     /// Install a sink to receive drop snapshots as they are created.
     void set_drop_sink(DropSnapshotSink sink);
 
+    /// Install a sink to mirror in-place snapshot updates from a given suffix onward.
+    void set_snapshot_refresh_sink(SnapshotRefreshSink sink);
+
     // ---------------------------------------------------------------------
     // Sequence interval classification
     // ---------------------------------------------------------------------
@@ -335,6 +342,9 @@ class FlowEngine {
     /// Mark all pending snapshots as expired (used on shutdown/cleanup).
     void expire_all_pending_snapshots();
 
+    /// Resolve pending snapshots at teardown using the configured timeout.
+    void resolve_pending_snapshots(const std::chrono::steady_clock::time_point& now);
+
 private:
     /**
      * @brief Compute the final classification decision for this flow based on
@@ -342,6 +352,12 @@ class FlowEngine {
      */
     FlowDecision evaluate() const;
 
+    /// Mirror snapshot updates affecting [start_index, end) to any external collector.
+    void publish_snapshot_refresh(std::size_t start_index);
+
+    /// Publish a single-snapshot update when no bulk refresh sink is installed.
+    void publish_single_snapshot_update(PacketDropId packet_id, std::size_t snapshot_index);
+
     // ---------------------------------------------------------------------
     // Internal gap bookkeeping structures
     // ---------------------------------------------------------------------
@@ -383,6 +399,7 @@ class FlowEngine {
     /// Mapping from snapshot packet_id to its index in flow_drop_snapshots_.
     std::unordered_map<PacketDropId, size_t> flow_snapshot_index_by_id_;
     DropSnapshotSink drop_sink_{};
+    SnapshotRefreshSink snapshot_refresh_sink_{};
 
     /**
      * @brief Shared liveness flag observed by timer entries.
@@ -432,7 +449,7 @@ class FlowEngine {
     // Flow identity
     // ---------------------------------------------------------------------
 
-    FlowKey flow_key_{};  ///< 5-tuple (or equivalent) identifying this flow.
+    FlowKey flow_key_{};  ///< Protocol-aware tuple identifying this flow.
 };
 
 } // namespace openpenny::penny
diff --git a/include/openpenny/penny/flow/manager/ThreadFlowManager.h b/include/openpenny/penny/flow/manager/ThreadFlowManager.h
index 488f368..27a22ec 100644
--- a/include/openpenny/penny/flow/manager/ThreadFlowManager.h
+++ b/include/openpenny/penny/flow/manager/ThreadFlowManager.h
@@ -2,14 +2,13 @@
 
 #pragma once
 
+#include "openpenny/agg/FlowKey.h"
 #include "openpenny/penny/flow/engine/FlowEngine.h"
 #include "openpenny/penny/flow/state/PennyStats.h"
 #include "openpenny/net/Packet.h"
 #include "openpenny/app/core/PerThreadStats.h"
 
 #include <limits>
-#include <unordered_map>
-#include <unordered_set>
 #include <stdexcept>
 #include <vector>
 #include <algorithm>
@@ -125,18 +124,20 @@ class ThreadFlowManager {
      * @param is_syn        True if the first packet carried a SYN flag.
      * @param ts            Timestamp of the first packet (for data timing).
      *
-     * @return true if a new flow entry was inserted, false if the flow already existed
-     *         or had been monitored before.
+     * @return pointer to the new flow entry when inserted, nullptr otherwise.
      */
-    bool add_new_flow(const FlowKey& key,
-                      uint32_t seq,
-                      uint32_t payload_bytes,
-                      bool is_syn,
-                      const std::chrono::steady_clock::time_point& ts);
+    FlowEngineEntry* add_new_flow(const FlowKey& key,
+                                  uint32_t seq,
+                                  uint32_t payload_bytes,
+                                  bool is_syn,
+                                  const std::chrono::steady_clock::time_point& ts);
 
     /// Install a sink that receives drop snapshots from all managed FlowEngines.
     void set_drop_sink(FlowEngine::DropSnapshotSink sink);
 
+    /// Install a sink that mirrors in-place snapshot updates from managed FlowEngines.
+    void set_snapshot_refresh_sink(FlowEngine::SnapshotRefreshSink sink);
+
     /**
      * @brief Update or create the FlowEngine entry corresponding to a packet.
      *
@@ -254,6 +255,8 @@ class ThreadFlowManager {
     }
 
 private:
+    void reserve_for_config(const Config::ActiveConfig& cfg);
+
     /**
      * @brief Count how many flows are currently considered "active".
      *
@@ -279,12 +282,13 @@ class ThreadFlowManager {
     PennyStats stats_{};
 
     /// Map from flow key to the corresponding FlowEngineEntry for active or tracked flows.
-    std::unordered_map<FlowKey, FlowEngineEntry, FlowKeyHash> table_active_flows_;
+    FlowMap<FlowEngineEntry> table_active_flows_;
 
     /// Set of flow keys that have already been fully processed / completed.
-    std::unordered_set<FlowKey, FlowKeyHash> table_completed_flows_;
+    FlowSet table_completed_flows_;
 
     FlowEngine::DropSnapshotSink drop_sink_{};
+    FlowEngine::SnapshotRefreshSink snapshot_refresh_sink_{};
 };
 
 } // namespace openpenny::penny
diff --git a/include/openpenny/penny/flow/timer/ThreadFlowEventTimer.h b/include/openpenny/penny/flow/timer/ThreadFlowEventTimer.h
index 516cbb4..53a2340 100644
--- a/include/openpenny/penny/flow/timer/ThreadFlowEventTimer.h
+++ b/include/openpenny/penny/flow/timer/ThreadFlowEventTimer.h
@@ -2,19 +2,18 @@
 
 #pragma once
 
-#include "openpenny/agg/Stats.h" // for FlowKey
+#include "openpenny/agg/FlowKey.h"
 #include "openpenny/penny/flow/state/PacketDropId.h"
 
 #include <atomic>
 #include <chrono>
-#include <condition_variable>
 #include <cstdint>
 #include <deque>
+#include <limits>
 #include <memory>
 #include <mutex>
 #include <queue>
 #include <string>
-#include <thread>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -30,34 +29,34 @@ class FlowEngine;
  *
  * High-level design
  * -----------------
- *  - A single background thread runs timer_loop().
- *  - Packet-processing threads never mutate FlowEngine snapshots directly. Instead, they:
+ *  - Each worker thread owns a thread-local manager instance.
+ *  - Packet-processing code never mutates FlowEngine snapshots directly from nested
+ *    helper paths. Instead, it:
  *      * register drops (with deadlines),
  *      * enqueue retransmission / duplicate events.
- *  - The timer thread:
+ *  - The worker periodically calls drain_callbacks(), which:
  *      * pops expired entries from a min-heap,
  *      * consumes queued events,
  *      * turns them into callbacks,
- *      * and executes those callbacks itself (without holding the manager mutex).
+ *      * and executes those callbacks on the same worker thread.
  *
  * As a result:
- *  - All snapshot mutations are single-threaded (in the timer thread).
- *  - The packet path stays lightweight and avoids locking around FlowEngine state.
+ *  - All snapshot mutations stay on the queue worker that owns the flow.
+ *  - We avoid one extra timer thread and the associated context switching per queue.
  */
 class ThreadFlowEventTimerManager {
 public:
     /**
      * @brief Access the thread-local timer manager instance.
      *
-     * Each packet-processing thread gets its own manager (and timer thread),
-     * so queues are isolated.
+     * Each packet-processing thread gets its own manager, so queues are isolated.
      */
     static ThreadFlowEventTimerManager& instance();
 
     ~ThreadFlowEventTimerManager();
 
     /**
-     * @brief Start the timer thread with a given drop timeout.
+     * @brief Initialise the per-thread timer state with a given drop timeout.
      *
      * @param timeout_sec Timeout in seconds after which an un-repaired drop snapshot
      *                    is considered expired.
@@ -65,7 +64,7 @@ class ThreadFlowEventTimerManager {
     void start(double timeout_sec);
 
     /**
-     * @brief Stop the timer thread and flush internal state.
+     * @brief Stop and flush internal state.
      *
      * Safe to call multiple times; subsequent calls after the first have no effect.
      */
@@ -94,8 +93,8 @@ class ThreadFlowEventTimerManager {
     /**
      * @brief Queue an asynchronous "retransmitted" event from the packet path.
      *
-     * The timer thread will later convert this into a callback that updates
-     * the relevant snapshot in the owning FlowEngine.
+     * The owning worker thread will later convert this into a callback that
+     * updates the relevant snapshot in the owning FlowEngine.
      */
     void enqueue_retransmitted(PacketDropId packet_id, FlowEngine* flow);
 
@@ -116,26 +115,10 @@ class ThreadFlowEventTimerManager {
     void purge_flow(FlowEngine* flow);
 
     /**
-     * @brief Optional manual draining of callbacks.
-     *
-     * Historically used when callbacks were executed from the packet-processing
-     * thread; kept for compatibility. In the current design, the timer thread
-     * is responsible for draining and executing callbacks via run_callbacks().
+     * @brief Drain due expirations and queued events on the current worker thread.
      */
     void drain_callbacks();
 
-    enum class SnapshotEventKind { Expire, Retransmit, Duplicate };
-
-    /**
-     * @brief Install a hook invoked after a snapshot event is applied.
-     *
-     * The hook runs in the packet-processing thread context when callbacks
-     * are drained.
-     */
-    static void set_snapshot_hook(std::function<void(FlowEngine*,
-                                                     PacketDropId,
-                                                     SnapshotEventKind)> hook);
-
 private:
     // ---------------------------------------------------------------------
     // Internal helper types
@@ -187,7 +170,7 @@ class ThreadFlowEventTimerManager {
     };
 
     /**
-     * @brief Event generated by the packet path and consumed by the timer thread.
+     * @brief Event generated by the packet path and consumed by drain_callbacks().
      *
      * These events are cheap to enqueue in the packet-processing context and
      * later turned into callbacks against FlowEngine.
@@ -206,7 +189,7 @@ class ThreadFlowEventTimerManager {
     };
 
     /**
-     * @brief Callback to be executed against FlowEngine by the timer thread.
+     * @brief Callback to be executed against FlowEngine on the worker thread.
      *
      * This is the only place where snapshots and FlowEngine state are mutated.
      */
@@ -227,25 +210,25 @@ class ThreadFlowEventTimerManager {
     ThreadFlowEventTimerManager(const ThreadFlowEventTimerManager&) = delete;
     ThreadFlowEventTimerManager& operator=(const ThreadFlowEventTimerManager&) = delete;
 
-    // Main thread loop: waits for timers or events, then processes them.
-    void timer_loop();
-
-    // Notify the timer thread that new timers/events are available (mutex_ held).
-    void wake_locked();
-
     // Run and clear the callbacks in @p pending, without holding mutex_.
     void run_callbacks(std::deque<Callback>& pending);
 
+    // Collect all due expirations and queued events into @p pending (mutex_ held).
+    void collect_ready_callbacks(std::deque<Callback>& pending,
+                                 const std::chrono::steady_clock::time_point& now);
+
+    // Discard cancelled heap entries and refresh the lock-free earliest-deadline hint (mutex_ held).
+    void refresh_next_deadline_locked();
+
     // ---------------------------------------------------------------------
     // Synchronisation / thread state
     // ---------------------------------------------------------------------
 
     std::mutex mutex_;
-    std::condition_variable cv_;
-    std::thread thread_;
+    using DeadlineRep = std::chrono::steady_clock::duration::rep;
+    static constexpr DeadlineRep kNoDeadline = std::numeric_limits<DeadlineRep>::max();
 
-    bool running_{false};    ///< True once the timer thread has been started.
-    bool stop_flag_{false};  ///< Set to request shutdown of the timer thread.
+    bool running_{false};    ///< True once start() has initialised this worker-local manager.
     double timeout_sec_{0.0};
     std::uint64_t next_token_{1};
 
@@ -260,7 +243,7 @@ class ThreadFlowEventTimerManager {
     std::unordered_map<PacketKey, Entry, PacketKeyHash> by_id_;
 
     /// Record of flow+packet_id pairs already handled as retransmitted.
-    std::vector<PacketKey> retransmit_seen_;
+    std::unordered_set<PacketKey, PacketKeyHash> retransmit_seen_;
 
     /// Map from FlowEngine* to active timer tokens (for bulk purge_flow()).
     std::unordered_multimap<FlowEngine*, std::uint64_t> by_flow_;
@@ -272,33 +255,20 @@ class ThreadFlowEventTimerManager {
     // Asynchronous events and callbacks
     // ---------------------------------------------------------------------
 
-    /// Events queued by the packet-processing path for the timer thread.
+    /// Events queued by the packet-processing path for drain_callbacks().
     std::deque<Event> events_;
 
     /**
-     * @brief Pending callbacks to execute against FlowEngine.
+     * @brief Lock-free fast-path size of `events_`.
      *
-     * These are built while holding mutex_, but always executed by the timer
-     * thread via run_callbacks() without the lock, avoiding lock contention
-     * during snapshot updates.
+     * This lets drain_callbacks() skip taking mutex_ when there are no queued
+     * retransmit/duplicate events and no drop deadline has elapsed yet.
      */
-    std::deque<Callback> callbacks_;
+    std::atomic<std::size_t> queued_event_count_{0};
 
-    /**
-     * @brief Lock-free fast-path size of `callbacks_`.
-     *
-     * Every per-packet poll iteration on every worker calls
-     * `drain_callbacks()`. With many AF_XDP queue workers in busy-poll
-     * mode that adds up to millions of mutex acquires per second on
-     * `mutex_` even when no callbacks are pending. This counter lets
-     * `drain_callbacks()` skip the lock entirely on the common
-     * "nothing to drain" path. It is incremented under `mutex_` whenever
-     * we push to `callbacks_`, and reset to 0 inside `drain_callbacks()`
-     * after we swap the deque out.
-     */
-    std::atomic<std::size_t> pending_callbacks_{0};
+    /// Lock-free hint for the earliest outstanding drop deadline.
+    std::atomic<DeadlineRep> next_deadline_{kNoDeadline};
 
-    static std::function<void(FlowEngine*, PacketDropId, SnapshotEventKind)> snapshot_hook_;
 };
 
 } // namespace openpenny::penny
diff --git a/src/app/cli/penny_cli.cpp b/src/app/cli/penny_cli.cpp
index 5aa4a66..08d270a 100644
--- a/src/app/cli/penny_cli.cpp
+++ b/src/app/cli/penny_cli.cpp
@@ -862,12 +862,8 @@ int main(int argc, char** argv) {
     //
     //   End state: Passive pipeline completed (flows=42)
     if (result.active) {
-        const auto agg_snapshot =
-            (result.active->aggregates_snapshot
-                ? *result.active->aggregates_snapshot
-                : openpenny::app::aggregate_counters());
-
         const auto agg_live = openpenny::app::aggregate_counters();
+        const auto& agg_snapshot = agg_live;
         const auto runtime  = openpenny::current_runtime_setup();
 
         const bool is_passive =
@@ -892,6 +888,15 @@ int main(int argc, char** argv) {
             result.aggregates_enabled &&
             runtime.aggregates_status !=
                 openpenny::RuntimeStatus::AggregatesStatus::PENDING;
+        const std::uint64_t closed_loop_flows_observed = std::max(
+            agg_snapshot.flows_closed_loop,
+            agg_live.flows_closed_loop);
+        const std::uint64_t closed_loop_flows_found = std::max<std::uint64_t>(
+            closed_loop_flows_observed,
+            result.active->closed_loop_flow_summaries.size());
+        const std::uint64_t duplicate_exceeded_flows_found = std::max<std::uint64_t>(
+            agg_snapshot.flows_duplicates_exceeded,
+            result.active->duplicate_exceeded_flow_summaries.size());
 
         // --- Run ---------------------------------------------------------
         print_section(std::cout, "Run");
@@ -1020,40 +1025,84 @@ int main(int argc, char** argv) {
                             agg_snapshot.flows_duplicates_exceeded);
         }
 
-        // --- Per-flow detail (passive only, if any) ----------------------
+        // --- Per-flow detail ---------------------------------------------
         if (is_passive && !result.active->passive_gap_summaries.empty()) {
             print_section(std::cout, "Per-flow detail");
             for (const auto& g : result.active->passive_gap_summaries) {
                 std::cout << "  " << g << "\n";
             }
         }
+        if (!is_passive && !result.active->closed_loop_flow_summaries.empty()) {
+            print_section(std::cout, "Closed-loop flows");
+            for (const auto& s : result.active->closed_loop_flow_summaries) {
+                std::cout << "  " << s << "\n";
+            }
+        }
+        if (!is_passive && !result.active->duplicate_exceeded_flow_summaries.empty()) {
+            print_section(std::cout, "Duplicate-exceeded flows");
+            for (const auto& s : result.active->duplicate_exceeded_flow_summaries) {
+                std::cout << "  " << s << "\n";
+            }
+        }
 
         // --- End state ---------------------------------------------------
-        std::ostringstream end_state;
+        std::ostringstream end_state_primary;
+        std::ostringstream end_state_closed_loop_suffix;
+        std::ostringstream end_state_duplicate_suffix;
         const char* end_color = "";
+        const char* closed_loop_suffix_color = "";
+        const char* duplicate_suffix_color = "";
         if (!is_passive && agg_done) {
-            end_state << "Aggregates completed (" << agg_status_str << ")";
+            end_state_primary << "Aggregates completed (" << agg_status_str << ")";
+            if (closed_loop_flows_found > 0) {
+                end_state_closed_loop_suffix << ", found " << fmt_count(closed_loop_flows_found)
+                                             << " closed-loop flow"
+                                             << (closed_loop_flows_found == 1 ? "" : "s");
+                closed_loop_suffix_color = kAnsiBlue;
+            }
+            if (duplicate_exceeded_flows_found > 0) {
+                end_state_duplicate_suffix << ", found "
+                                           << fmt_count(duplicate_exceeded_flows_found)
+                                           << " duplicate-exceeded flow"
+                                           << (duplicate_exceeded_flows_found == 1 ? "" : "s");
+                duplicate_suffix_color = kAnsiYellow;
+            }
             end_color = color_for_agg_status(agg_status_str);
         } else if (result.active->penny_completed) {
             if (is_passive) {
-                end_state << "Passive pipeline completed (flows="
-                          << result.active->passive_flows_finished << ")";
+                end_state_primary << "Passive pipeline completed (flows="
+                                  << result.active->passive_flows_finished << ")";
                 end_color = kAnsiGreen;
             } else {
-                end_state << "Penny heuristics completed";
+                end_state_primary << "Penny heuristics completed";
+                if (closed_loop_flows_found > 0) {
+                    end_state_closed_loop_suffix << ", found " << fmt_count(closed_loop_flows_found)
+                                                 << " closed-loop flow"
+                                                 << (closed_loop_flows_found == 1 ? "" : "s");
+                    closed_loop_suffix_color = kAnsiBlue;
+                }
+                if (duplicate_exceeded_flows_found > 0) {
+                    end_state_duplicate_suffix << ", found "
+                                               << fmt_count(duplicate_exceeded_flows_found)
+                                               << " duplicate-exceeded flow"
+                                               << (duplicate_exceeded_flows_found == 1 ? "" : "s");
+                    duplicate_suffix_color = kAnsiYellow;
+                }
                 end_color = kAnsiGreen;
             }
         } else if (g_stop_requested != 0) {
-            end_state << "Stopped via signal (Ctrl+C)";
+            end_state_primary << "Stopped via signal (Ctrl+C)";
             end_color = kAnsiYellow;
         } else {
-            end_state << "Reader/pipeline error (see logs)";
+            end_state_primary << "Reader/pipeline error (see logs)";
             end_color = kAnsiRed;
         }
 
         std::cout << "\n"
                   << ansi(kAnsiBold) << "End state:" << ansi(kAnsiReset) << " "
-                  << ansi(end_color) << end_state.str() << ansi(kAnsiReset)
+                  << ansi(end_color) << end_state_primary.str() << ansi(kAnsiReset)
+                  << ansi(closed_loop_suffix_color) << end_state_closed_loop_suffix.str() << ansi(kAnsiReset)
+                  << ansi(duplicate_suffix_color) << end_state_duplicate_suffix.str() << ansi(kAnsiReset)
                   << "\n";
     } else {
         // No active result usually means no packets were processed or the
@@ -1071,4 +1120,4 @@ int main(int argc, char** argv) {
     // of the forwarding fd is needed here any more.
     run_detach_command();
     return 0;
-}
\ No newline at end of file
+}
diff --git a/src/app/core/AggregatesController.cpp b/src/app/core/AggregatesController.cpp
index c5cf349..7b3108d 100644
--- a/src/app/core/AggregatesController.cpp
+++ b/src/app/core/AggregatesController.cpp
@@ -16,10 +16,8 @@ DropCollector::TimestampRep snapshot_timestamp(
     return snap.timestamp.time_since_epoch().count();
 }
 
-void decorate_snapshot_record(DropSnapshotRecord& record,
-                              const openpenny::app::AggregatedCounters& agg) {
-    record.counters = agg;
-    record.snapshot.stats.overwrite_from_aggregates(agg);
+bool is_pending_snapshot(const penny::PacketDropSnapshot& snap) noexcept {
+    return snap.state == penny::SnapshotState::Pending;
 }
 
 void set_runtime_eval_counters(RuntimeStatus& runtime,
@@ -46,9 +44,21 @@ void store_aggregate_snapshot_once(
     if (!snapshot_slot) snapshot_slot = agg;
 }
 
-std::vector<DropSnapshotRecord> collect_all_drop_snapshots(
-    const DropCollector& collector,
+std::optional<openpenny::app::AggregatedCounters> collect_frozen_aggregate_counters(
+    const DropCollector& collector) {
+    std::lock_guard<std::mutex> lock(collector.frozen_aggregate_counters_mtx);
+    return collector.frozen_aggregate_counters;
+}
+
+penny::PennyStats make_eval_stats_from_aggregates(
     const openpenny::app::AggregatedCounters& agg) {
+    penny::PennyStats stats;
+    stats.overwrite_from_aggregates(agg);
+    return stats;
+}
+
+std::vector<DropSnapshotRecord> collect_all_drop_snapshots(
+    const DropCollector& collector) {
     std::vector<DropSnapshotRecord> out;
     std::size_t total = 0;
     for (std::size_t shard_index = 0; shard_index < collector.shard_count; ++shard_index) {
@@ -61,15 +71,11 @@ std::vector<DropSnapshotRecord> collect_all_drop_snapshots(
         std::lock_guard<std::mutex> lock(shard.mtx);
         out.insert(out.end(), shard.snapshots.begin(), shard.snapshots.end());
     }
-    for (auto& record : out) {
-        decorate_snapshot_record(record, agg);
-    }
     return out;
 }
 
 std::optional<DropSnapshotRecord> collect_latest_drop_snapshot(
-    const DropCollector& collector,
-    const openpenny::app::AggregatedCounters& agg) {
+    const DropCollector& collector) {
     std::size_t best_shard_index = 0;
     auto best_timestamp = DropCollector::kNoSnapshotTimestamp;
     for (std::size_t shard_index = 0; shard_index < collector.shard_count; ++shard_index) {
@@ -96,7 +102,6 @@ std::optional<DropSnapshotRecord> collect_latest_drop_snapshot(
         if (latest_index < best_shard.snapshots.size()) {
             auto record = best_shard.snapshots[latest_index];
             if (snapshot_timestamp(record.snapshot) == best_timestamp) {
-                decorate_snapshot_record(record, agg);
                 return record;
             }
         }
@@ -119,9 +124,6 @@ std::optional<DropSnapshotRecord> collect_latest_drop_snapshot(
             latest = *it;
         }
     }
-    if (latest) {
-        decorate_snapshot_record(*latest, agg);
-    }
     return latest;
 }
 
@@ -141,6 +143,29 @@ CollectorSnapshotSummary summarize_collector_snapshots(const DropCollector& coll
     return summary;
 }
 
+CollectorSnapshotSummary summarize_drop_snapshots(
+    const std::vector<DropSnapshotRecord>& snapshots) {
+    CollectorSnapshotSummary summary;
+    summary.snapshot_count = snapshots.size();
+    summary.pending_snapshot_count = static_cast<std::size_t>(std::count_if(
+        snapshots.begin(),
+        snapshots.end(),
+        [](const DropSnapshotRecord& record) {
+            return is_pending_snapshot(record.snapshot);
+        }));
+    return summary;
+}
+
+bool aggregates_ready_for_evaluation(std::size_t required_drops,
+                                     std::size_t snapshot_count,
+                                     std::size_t pending_snapshot_count,
+                                     std::uint64_t pending_rtx_count) noexcept {
+    return required_drops > 0 &&
+           snapshot_count >= required_drops &&
+           pending_snapshot_count == 0 &&
+           pending_rtx_count == 0;
+}
+
 } // namespace
 
 AggregatesController::AggregatesController(const Config& cfg,
@@ -159,7 +184,11 @@ AggregatesController::AggregatesController(const Config& cfg,
       individual_limit_enabled_{opts.mode == PipelineOptions::Mode::Active &&
                                 cfg.active.stop_after_individual_flows > 0},
       min_closed_loop_enabled_{opts.mode == PipelineOptions::Mode::Active &&
-                               cfg.active.min_closed_loop_flows > 0} {}
+                               cfg.active.min_closed_loop_flows > 0} {
+    if (collector_enabled_ && collector_) {
+        collector_->snapshot_limit = required_drops_;
+    }
+}
 
 void AggregatesController::start() {
     if (collector_enabled_) {
@@ -214,8 +243,7 @@ std::optional<openpenny::app::AggregatedCounters> AggregatesController::aggregat
 
 void AggregatesController::populate_drop_snapshots(PipelineSummary& summary) const {
     if (!collector_) return;
-    const auto agg = openpenny::app::aggregate_counters();
-    auto snaps = collect_all_drop_snapshots(*collector_, agg);
+    auto snaps = collect_all_drop_snapshots(*collector_);
     std::sort(
         snaps.begin(),
         snaps.end(),
@@ -228,14 +256,33 @@ void AggregatesController::populate_drop_snapshots(PipelineSummary& summary) con
 void AggregatesController::evaluate_pending_if_needed(const Config& cfg,
                                                       PipelineSummary& summary) {
     auto& runtime = runtime_setup_mutable();
+    const auto snapshot_summary = summarize_drop_snapshots(summary.drop_snapshots);
+    const auto agg = openpenny::app::aggregate_counters();
+    const auto frozen_agg =
+        collector_ ? collect_frozen_aggregate_counters(*collector_) : std::nullopt;
+    const auto pending_rtx_count =
+        frozen_agg ? frozen_agg->pending_retransmissions : agg.pending_retransmissions;
+    const bool ready = aggregates_ready_for_evaluation(
+        required_drops_,
+        snapshot_summary.snapshot_count,
+        snapshot_summary.pending_snapshot_count,
+        pending_rtx_count);
     if (!cfg.active.aggregates_enabled ||
-        runtime.aggregates_status != RuntimeStatus::AggregatesStatus::PENDING ||
-        !aggregates_ready_.load(std::memory_order_relaxed) ||
+        current_aggregates_status() != RuntimeStatus::AggregatesStatus::PENDING ||
+        !ready ||
         summary.drop_snapshots.empty()) {
         return;
     }
+    aggregates_ready_.store(true, std::memory_order_relaxed);
+    if (frozen_agg) {
+        store_aggregate_snapshot_once(aggregates_snapshot_, aggregates_snapshot_mtx_, *frozen_agg);
+    } else {
+        store_aggregate_snapshot_once(aggregates_snapshot_, aggregates_snapshot_mtx_, agg);
+    }
     const auto& latest = summary.drop_snapshots.front();
-    const auto& stats = latest.snapshot.stats;
+    const auto stats = frozen_agg
+        ? make_eval_stats_from_aggregates(*frozen_agg)
+        : latest.snapshot.stats;
     const auto miss_prob = std::clamp(
         cfg.active.retransmission_miss_probability,
         0.0,
@@ -245,14 +292,20 @@ void AggregatesController::evaluate_pending_if_needed(const Config& cfg,
         miss_prob,
         cfg.active.max_duplicate_fraction);
     if (eval.decision == penny::FlowEngine::FlowDecision::FINISHED_CLOSED_LOOP) {
-        runtime.aggregates_status = RuntimeStatus::AggregatesStatus::CLOSED_LOOP;
+        set_current_aggregates_status(RuntimeStatus::AggregatesStatus::CLOSED_LOOP);
     } else if (eval.decision == penny::FlowEngine::FlowDecision::FINISHED_NOT_CLOSED_LOOP) {
-        runtime.aggregates_status = RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP;
+        set_current_aggregates_status(RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP);
+    } else if (eval.decision == penny::FlowEngine::FlowDecision::FINISHED_DUPLICATE_EXCEEDED) {
+        set_current_aggregates_status(RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED);
     } else {
-        runtime.aggregates_status = RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED;
+        set_current_aggregates_status(RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP);
+    }
+    set_current_has_aggregate_eval(true);
+    if (frozen_agg) {
+        set_runtime_eval_counters(runtime, *frozen_agg);
+    } else {
+        set_runtime_eval_counters(runtime, stats);
     }
-    runtime.has_aggregate_eval = true;
-    set_runtime_eval_counters(runtime, stats);
     collector_completed_.store(true, std::memory_order_relaxed);
 }
 
@@ -266,16 +319,15 @@ void AggregatesController::collector_loop() {
     //   2. Evaluate the aggregate stats once.
     //         - bidirectional / closed-loop -> stop the pipeline and
     //           report CLOSED_LOOP.
-    //         - duplicates exceeded         -> stop and report
-    //           DUPLICATES_EXCEEDED.
-    //         - anything else (NON_CLOSED_LOOP or no verdict yet)
-    //           -> fall through to step 3.
+    //         - anything else
+    //           (NON_CLOSED_LOOP or DUPLICATES_EXCEEDED)
+    //           -> freeze the aggregate verdict, then switch to the
+    //           separate per-flow phase.
     //   3. Watch the per-flow CLOSED_LOOP termination tally and stop as
     //      soon as it reaches `min_closed_loop_flows` (defaulting to 2
-    //      when the operator did not configure it). This is the
-    //      "look for the min flows" path and gives the run a chance
-    //      to upgrade to CLOSED_LOOP via per-flow evidence even when
-    //      the one-shot aggregate eval did not.
+    //      when the operator did not configure it). This is a separate
+    //      per-flow stop condition; it does NOT rewrite the aggregate
+    //      verdict from step 2.
     auto& runtime = runtime_setup_mutable();
     bool aggregate_eval_done = false;
     bool wait_for_closed_loops = false;
@@ -288,39 +340,87 @@ void AggregatesController::collector_loop() {
         cfg_.active.min_closed_loop_flows > 0
             ? cfg_.active.min_closed_loop_flows
             : static_cast<std::size_t>(2);
+    auto finalize_aggregate_verdict =
+        [&](RuntimeStatus::AggregatesStatus status,
+            const std::optional<openpenny::app::AggregatedCounters>& frozen_agg,
+            const openpenny::app::AggregatedCounters& agg_now,
+            const std::optional<penny::PennyStats>& stats) {
+            set_current_aggregates_status(status);
+            set_current_aggregates_active(false);
+            set_current_has_aggregate_eval(true);
+            if (frozen_agg) {
+                set_runtime_eval_counters(runtime, *frozen_agg);
+                store_aggregate_snapshot_once(
+                    aggregates_snapshot_,
+                    aggregates_snapshot_mtx_,
+                    *frozen_agg);
+            } else if (stats) {
+                set_runtime_eval_counters(runtime, *stats);
+                store_aggregate_snapshot_once(
+                    aggregates_snapshot_,
+                    aggregates_snapshot_mtx_,
+                    agg_now);
+            } else {
+                set_runtime_eval_counters(runtime, agg_now);
+                store_aggregate_snapshot_once(
+                    aggregates_snapshot_,
+                    aggregates_snapshot_mtx_,
+                    agg_now);
+            }
+        };
+    auto switch_to_individual_flow_phase =
+        [&](RuntimeStatus::AggregatesStatus status,
+            const char* verdict_text,
+            const std::optional<openpenny::app::AggregatedCounters>& frozen_agg,
+            const openpenny::app::AggregatedCounters& agg_now,
+            const std::optional<penny::PennyStats>& stats) {
+            finalize_aggregate_verdict(status, frozen_agg, agg_now, stats);
+            aggregate_eval_done = true;
+            wait_for_closed_loops = true;
+            TCPLOG_INFO(
+                "[agg_phase] action=switch_to_individual agg_status=%s drops=%zu "
+                "next=individual wait_closed_loop_flows=%llu",
+                verdict_text,
+                required_drops_,
+                static_cast<unsigned long long>(closed_loop_required));
+        };
     while (!stop_flag_.load(std::memory_order_relaxed)) {
         if (user_should_stop_ && user_should_stop_()) break;
         if (wait_for_closed_loops) {
             auto agg = openpenny::app::aggregate_counters();
             if (agg.flows_closed_loop >= closed_loop_required) {
                 TCPLOG_INFO(
-                    "[aggregates_closed_loop] flows_closed_loop=%llu flows_not_closed_loop=%llu flows_finished=%llu",
+                    "[closed_loop_threshold] flows_closed_loop=%llu flows_not_closed_loop=%llu flows_finished=%llu "
+                    "aggregate_status=%d",
                     static_cast<unsigned long long>(agg.flows_closed_loop),
                     static_cast<unsigned long long>(agg.flows_not_closed_loop),
-                    static_cast<unsigned long long>(agg.flows_finished));
-                runtime.aggregates_status = RuntimeStatus::AggregatesStatus::CLOSED_LOOP;
-                runtime.has_aggregate_eval = true;
-                set_runtime_eval_counters(runtime, agg);
+                    static_cast<unsigned long long>(agg.flows_finished),
+                    static_cast<int>(current_aggregates_status()));
                 collector_completed_.store(true, std::memory_order_relaxed);
-                store_aggregate_snapshot_once(aggregates_snapshot_, aggregates_snapshot_mtx_, agg);
+                closed_loop_stop_hit_.store(true, std::memory_order_relaxed);
                 stop_flag_.store(true, std::memory_order_relaxed);
                 break;
             }
+            std::this_thread::sleep_for(25ms);
+            continue;
         }
         bool ready = false;
-        bool pending = false;
-        bool pending_rtx = false;
         std::size_t snapshot_count = 0;
         std::size_t pending_snapshot_count = 0;
         std::uint64_t pending_rtx_count = 0;
         {
             const auto collector_summary = summarize_collector_snapshots(*collector_);
+            const auto frozen_agg = collect_frozen_aggregate_counters(*collector_);
             snapshot_count = collector_summary.snapshot_count;
             pending_snapshot_count = collector_summary.pending_snapshot_count;
-            pending = pending_snapshot_count > 0;
-            pending_rtx_count = openpenny::app::aggregate_counters().pending_retransmissions;
-            pending_rtx = pending_rtx_count > 0;
-            ready = snapshot_count >= required_drops_ && !pending && !pending_rtx;
+            pending_rtx_count = frozen_agg
+                ? frozen_agg->pending_retransmissions
+                : openpenny::app::aggregate_counters().pending_retransmissions;
+            ready = aggregates_ready_for_evaluation(
+                required_drops_,
+                snapshot_count,
+                pending_snapshot_count,
+                pending_rtx_count);
         }
         // Periodic gate diagnostic: when snapshot_count has reached the
         // required threshold but ready stays false, this line tells the
@@ -336,9 +436,8 @@ void AggregatesController::collector_loop() {
                 g_last_gate_log_ns.compare_exchange_strong(
                     last, next, std::memory_order_acq_rel)) {
                 TCPLOG_INFO(
-                    "[aggregates_gate] snapshots=%zu/%zu pending_snapshots=%zu "
-                    "pending_rtx=%llu (waiting for both to reach 0 before "
-                    "evaluating)",
+                    "[agg_wait] drops=%zu/%zu pending_snapshots=%zu pending_rtx=%llu "
+                    "state=waiting",
                     snapshot_count,
                     required_drops_,
                     pending_snapshot_count,
@@ -349,41 +448,49 @@ void AggregatesController::collector_loop() {
             aggregates_ready_.store(true, std::memory_order_relaxed);
             if (!ready_logged) {
                 TCPLOG_INFO(
-                    "Aggregates have %zu drops ready (required=%zu)",
+                    "[agg_ready] drops=%zu required=%zu",
                     snapshot_count,
                     required_drops_);
                 ready_logged = true;
             }
             collector_->accepting.store(false, std::memory_order_relaxed);
             const auto agg_now = openpenny::app::aggregate_counters();
+            const auto frozen_agg = collect_frozen_aggregate_counters(*collector_);
+            const auto eval_stats = frozen_agg
+                ? make_eval_stats_from_aggregates(*frozen_agg)
+                : penny::PennyStats{};
             if (cfg_.active.max_duplicate_fraction > 0.0) {
-                if (agg_now.data_packets > 0) {
-                    const double agg_dup_ratio = static_cast<double>(agg_now.duplicate_packets) /
-                                                 static_cast<double>(agg_now.data_packets);
+                const auto dup_data_packets =
+                    frozen_agg ? eval_stats.data_packets() : agg_now.data_packets;
+                const auto dup_packets =
+                    frozen_agg ? eval_stats.duplicate_packets() : agg_now.duplicate_packets;
+                if (dup_data_packets > 0) {
+                    const double agg_dup_ratio = static_cast<double>(dup_packets) /
+                                                 static_cast<double>(dup_data_packets);
                     if (agg_dup_ratio > cfg_.active.max_duplicate_fraction) {
-                        runtime.aggregates_status = RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED;
-                        runtime.aggregates_active = false;
-                        runtime.has_aggregate_eval = true;
-                        set_runtime_eval_counters(runtime, agg_now);
-                        collector_completed_.store(true, std::memory_order_relaxed);
-                        store_aggregate_snapshot_once(
-                            aggregates_snapshot_,
-                            aggregates_snapshot_mtx_,
-                            agg_now);
-                        stop_flag_.store(true, std::memory_order_relaxed);
-                        break;
+                        switch_to_individual_flow_phase(
+                            RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED,
+                            "duplicates_exceeded",
+                            frozen_agg,
+                            agg_now,
+                            std::nullopt);
                     }
                 }
             }
             if (!aggregate_eval_done) {
                 aggregate_eval_done = true;
-                auto latest_snapshot = collect_latest_drop_snapshot(*collector_, agg_now);
+                auto latest_snapshot = collect_latest_drop_snapshot(*collector_);
 
                 if (latest_snapshot) {
-                    if (agg_now.pending_retransmissions > 0) {
+                    const auto pending_window_rtx = frozen_agg
+                        ? frozen_agg->pending_retransmissions
+                        : agg_now.pending_retransmissions;
+                    if (pending_window_rtx > 0) {
                         continue;
                     }
-                    auto stats = latest_snapshot->snapshot.stats;
+                    const auto stats = frozen_agg
+                        ? make_eval_stats_from_aggregates(*frozen_agg)
+                        : latest_snapshot->snapshot.stats;
                     const auto miss_prob = std::clamp(
                         cfg_.active.retransmission_miss_probability,
                         0.0,
@@ -399,12 +506,26 @@ void AggregatesController::collector_loop() {
                         miss_prob,
                         cfg_.active.max_duplicate_fraction);
                     const auto packet_id_text = penny::format_packet_drop_id(latest_snapshot->packet_id);
-
-                    const auto denom = eval.p_closed + eval.p_not_closed;
+                    const auto* eval_verdict_text = [&]() -> const char* {
+                        switch (eval.decision) {
+                            case penny::FlowEngine::FlowDecision::FINISHED_CLOSED_LOOP:
+                                return "closed_loop";
+                            case penny::FlowEngine::FlowDecision::FINISHED_NOT_CLOSED_LOOP:
+                                return "not_closed_loop";
+                            case penny::FlowEngine::FlowDecision::FINISHED_DUPLICATE_EXCEEDED:
+                                return "duplicates_exceeded";
+                            case penny::FlowEngine::FlowDecision::FINISHED_NO_DECISION:
+                                return "no_decision";
+                            case penny::FlowEngine::FlowDecision::PENDING:
+                            default:
+                                return "pending";
+                        }
+                    }();
                     TCPLOG_INFO(
-                        "[agg_eval] data_pkts=%llu dup_pkts=%llu rtx_pkts=%llu non_rtx_pkts=%llu "
-                        "dup_ratio=%.6f miss_prob=%.6f p_closed=%.6f p_not_closed=%.6f denom=%.6f closed_weight=%.6f decision=%s "
+                        "[agg_eval] verdict=%s data=%llu dup=%llu rtx=%llu non_rtx=%llu "
+                        "dup_ratio=%.6f miss_prob=%.6f p_closed=%.6f p_not_closed=%.6f closed_weight=%.6f "
                         "packet_id=%s thread=%s",
+                        eval_verdict_text,
                         static_cast<unsigned long long>(stats.data_packets()),
                         static_cast<unsigned long long>(stats.duplicate_packets()),
                         static_cast<unsigned long long>(stats.retransmitted_packets()),
@@ -413,70 +534,38 @@ void AggregatesController::collector_loop() {
                         miss_prob,
                         eval.p_closed,
                         eval.p_not_closed,
-                        denom,
                         eval.closed_weight,
-                        penny::flow_decision_to_string(eval.decision),
                         packet_id_text.c_str(),
                         latest_snapshot->thread_name.c_str());
 
                     if (dup_threshold_hit) {
-                        runtime.aggregates_status = RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED;
-                        runtime.aggregates_active = false;
-                        runtime.has_aggregate_eval = true;
-                        set_runtime_eval_counters(runtime, stats);
-                        collector_completed_.store(true, std::memory_order_relaxed);
-                        store_aggregate_snapshot_once(
-                            aggregates_snapshot_,
-                            aggregates_snapshot_mtx_,
-                            agg_now);
-                        break;
+                        switch_to_individual_flow_phase(
+                            RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED,
+                            "duplicates_exceeded",
+                            frozen_agg,
+                            agg_now,
+                            stats);
+                        continue;
                     }
 
                     if (eval.decision == penny::FlowEngine::FlowDecision::FINISHED_CLOSED_LOOP) {
-                        runtime.aggregates_status = RuntimeStatus::AggregatesStatus::CLOSED_LOOP;
-                        store_aggregate_snapshot_once(
-                            aggregates_snapshot_,
-                            aggregates_snapshot_mtx_,
-                            agg_now);
-                        runtime.has_aggregate_eval = true;
-                        set_runtime_eval_counters(runtime, stats);
-                        collector_completed_.store(true, std::memory_order_relaxed);
-                        stop_flag_.store(true, std::memory_order_relaxed);
-                        break;
-                    } else if (eval.decision == penny::FlowEngine::FlowDecision::FINISHED_NOT_CLOSED_LOOP) {
-                        runtime.aggregates_status = RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP;
-                    }
-
-                    set_runtime_eval_counters(runtime, stats);
-                    runtime.has_aggregate_eval = true;
-
-                    if (cfg_.active.aggregates_enabled &&
-                        eval.decision != penny::FlowEngine::FlowDecision::FINISHED_CLOSED_LOOP) {
-                        // Aggregate eval at `required_drops_` drops did not
-                        // produce a bidirectional verdict; switch to
-                        // step 3 of the contract and wait for
-                        // closed_loop_required per-flow CLOSED_LOOP
-                        // terminations before declaring the run done.
-                        runtime.aggregates_active = false;
-                        wait_for_closed_loops = true;
-                        TCPLOG_INFO(
-                            "[agg_eval_fallback] aggregate verdict %s after %zu drops; "
-                            "waiting for %llu closed-loop flow%s before finishing",
-                            penny::flow_decision_to_string(eval.decision),
-                            required_drops_,
-                            static_cast<unsigned long long>(closed_loop_required),
-                            closed_loop_required == 1 ? "" : "s");
-                    } else {
-                        store_aggregate_snapshot_once(
-                            aggregates_snapshot_,
-                            aggregates_snapshot_mtx_,
-                            agg_now);
+                        finalize_aggregate_verdict(
+                            RuntimeStatus::AggregatesStatus::CLOSED_LOOP,
+                            frozen_agg,
+                            agg_now,
+                            stats);
                         collector_completed_.store(true, std::memory_order_relaxed);
                         stop_flag_.store(true, std::memory_order_relaxed);
                         break;
                     }
+                    switch_to_individual_flow_phase(
+                        RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP,
+                        "not_closed_loop",
+                        frozen_agg,
+                        agg_now,
+                        stats);
                 } else {
-                    runtime.aggregates_status = RuntimeStatus::AggregatesStatus::PENDING;
+                    set_current_aggregates_status(RuntimeStatus::AggregatesStatus::PENDING);
                 }
             }
         }
@@ -488,7 +577,7 @@ void AggregatesController::individual_limit_loop() {
     using namespace std::chrono_literals;
     while (!stop_flag_.load(std::memory_order_relaxed)) {
         if (collector_enabled_ &&
-            runtime_setup_mutable().aggregates_status == RuntimeStatus::AggregatesStatus::PENDING) {
+            current_aggregates_status() == RuntimeStatus::AggregatesStatus::PENDING) {
             std::this_thread::sleep_for(100ms);
             continue;
         }
@@ -518,7 +607,7 @@ void AggregatesController::min_closed_loop_loop() {
     // and the aggregate eval (if enabled) is not still pending.
     while (!stop_flag_.load(std::memory_order_relaxed)) {
         if (collector_enabled_ &&
-            runtime_setup_mutable().aggregates_status == RuntimeStatus::AggregatesStatus::PENDING) {
+            current_aggregates_status() == RuntimeStatus::AggregatesStatus::PENDING) {
             std::this_thread::sleep_for(100ms);
             continue;
         }
@@ -533,14 +622,11 @@ void AggregatesController::min_closed_loop_loop() {
                 static_cast<unsigned long long>(agg.flows_not_closed_loop),
                 static_cast<unsigned long long>(agg.flows_rst),
                 static_cast<unsigned long long>(agg.flows_duplicates_exceeded));
-            store_aggregate_snapshot_once(aggregates_snapshot_, aggregates_snapshot_mtx_, agg);
-            // If the aggregate eval has not produced a verdict yet, mark
-            // it CLOSED_LOOP since we have collected enough closed-loop
-            // evidence on its own.
             auto& runtime = runtime_setup_mutable();
-            if (runtime.aggregates_status == RuntimeStatus::AggregatesStatus::PENDING) {
-                runtime.aggregates_status = RuntimeStatus::AggregatesStatus::CLOSED_LOOP;
-                runtime.has_aggregate_eval = true;
+            if (current_aggregates_status() == RuntimeStatus::AggregatesStatus::PENDING) {
+                store_aggregate_snapshot_once(aggregates_snapshot_, aggregates_snapshot_mtx_, agg);
+                set_current_aggregates_status(RuntimeStatus::AggregatesStatus::CLOSED_LOOP);
+                set_current_has_aggregate_eval(true);
                 set_runtime_eval_counters(runtime, agg);
             }
             collector_completed_.store(true, std::memory_order_relaxed);
diff --git a/src/app/core/DropCollectorBinding.cpp b/src/app/core/DropCollectorBinding.cpp
index 1494cda..7ed9e14 100644
--- a/src/app/core/DropCollectorBinding.cpp
+++ b/src/app/core/DropCollectorBinding.cpp
@@ -2,11 +2,9 @@
 
 #include "openpenny/app/core/DropCollectorBinding.h"
 
-#include "openpenny/penny/flow/engine/FlowEngine.h"
-#include "openpenny/penny/flow/timer/ThreadFlowEventTimer.h"
+#include "openpenny/app/core/PerThreadStats.h"
 
 #include <algorithm>
-#include <utility>
 
 namespace openpenny::app {
 namespace {
@@ -20,57 +18,62 @@ bool is_pending_snapshot(const penny::PacketDropSnapshot& snap) noexcept {
     return snap.state == penny::SnapshotState::Pending;
 }
 
-} // namespace
-
-DropCollectorBinding& DropCollectorBinding::instance() {
-    static DropCollectorBinding inst;
-    return inst;
+bool try_reserve_snapshot_slot(DropCollector& collector) noexcept {
+    if (collector.snapshot_limit == 0) {
+        return true;
+    }
+    auto reserved = collector.accepted_snapshot_count.load(std::memory_order_relaxed);
+    while (reserved < collector.snapshot_limit) {
+        if (collector.accepted_snapshot_count.compare_exchange_weak(
+                reserved,
+                reserved + 1,
+                std::memory_order_acq_rel,
+                std::memory_order_relaxed)) {
+            return true;
+        }
+    }
+    return false;
 }
 
-void DropCollectorBinding::ensure_snapshot_hook() {
-    std::call_once(hook_once_, []() {
-        penny::ThreadFlowEventTimerManager::set_snapshot_hook(
-            [](penny::FlowEngine* flow,
-               penny::PacketDropId packet_id,
-               penny::ThreadFlowEventTimerManager::SnapshotEventKind /*kind*/) {
-                auto& self = DropCollectorBinding::instance();
-                const auto binding = self.lookup(flow);
-                if (!binding.collector) return;
-                if (!binding.collector->accepting.load(std::memory_order_relaxed)) return;
-
-                const auto& snaps = flow->drop_snapshots();
-                const auto key = flow->flow_key();
-                auto& shard = binding.collector->shard_for(binding.shard_index);
-
-                std::lock_guard<std::mutex> lock(shard.mtx);
-                if (!binding.collector->accepting.load(std::memory_order_relaxed)) return;
-                // Mirror any updated packet drop snapshots from the FlowEngine into
-                // the shared collector so aggregate decisions see fresh stats.
-                for (const auto& pair : snaps) {
-                    if (packet_id != 0 && pair.first != packet_id) continue;
-                    self.upsert_locked(binding, key, pair.first, pair.second);
-                }
-            });
-    });
+void maybe_freeze_aggregate_window(DropCollector& collector,
+                                   const openpenny::app::AggregatedCounters& agg) {
+    if (collector.snapshot_limit == 0 ||
+        collector.accepted_snapshot_count.load(std::memory_order_relaxed) < collector.snapshot_limit) {
+        return;
+    }
+    std::lock_guard<std::mutex> lock(collector.frozen_aggregate_counters_mtx);
+    if (!collector.frozen_aggregate_counters) {
+        collector.frozen_aggregate_counters = agg;
+    }
 }
 
-void DropCollectorBinding::bind(penny::FlowEngine* flow,
-                                DropCollectorPtr collector,
-                                const std::string& thread_name,
-                                std::size_t shard_index) {
-    if (!flow || !collector) return;
-    std::lock_guard<std::mutex> lock(mtx_);
-    bindings_[flow] = BindingContext{
-        std::move(collector),
-        thread_name,
-        shard_index
-    };
+void apply_frozen_aggregate_transition(DropCollector& collector,
+                                       const penny::PacketDropSnapshot& before,
+                                       const penny::PacketDropSnapshot& after) {
+    if (before.state == after.state) {
+        return;
+    }
+    std::lock_guard<std::mutex> lock(collector.frozen_aggregate_counters_mtx);
+    if (!collector.frozen_aggregate_counters) {
+        return;
+    }
+    auto& agg = *collector.frozen_aggregate_counters;
+    if (before.state == penny::SnapshotState::Pending &&
+        agg.pending_retransmissions > 0) {
+        --agg.pending_retransmissions;
+    }
+    if (after.state == penny::SnapshotState::Retransmitted) {
+        ++agg.retransmitted_packets;
+    } else if (after.state == penny::SnapshotState::Expired) {
+        ++agg.non_retransmitted_packets;
+    }
 }
 
-void DropCollectorBinding::unbind(penny::FlowEngine* flow) {
-    if (!flow) return;
-    std::lock_guard<std::mutex> lock(mtx_);
-    bindings_.erase(flow);
+} // namespace
+
+DropCollectorBinding& DropCollectorBinding::instance() {
+    static DropCollectorBinding inst;
+    return inst;
 }
 
 void DropCollectorBinding::upsert(DropCollectorPtr collector,
@@ -84,30 +87,43 @@ void DropCollectorBinding::upsert(DropCollectorPtr collector,
     auto& shard = collector->shard_for(shard_index);
     std::lock_guard<std::mutex> lock(shard.mtx);
     if (!collector->accepting.load(std::memory_order_relaxed)) return;
-    upsert_locked(BindingContext{collector, thread_name, shard_index}, key, packet_id, snap);
+    upsert_locked(*collector, shard, thread_name, key, packet_id, snap);
 }
 
-DropCollectorBinding::BindingContext DropCollectorBinding::lookup(penny::FlowEngine* flow) const {
-    std::lock_guard<std::mutex> lock(mtx_);
-    auto it = bindings_.find(flow);
-    if (it != bindings_.end()) {
-        return it->second;
+void DropCollectorBinding::refresh_from(
+    DropCollectorPtr collector,
+    const std::string& thread_name,
+    std::size_t shard_index,
+    const FlowKey& key,
+    const std::vector<std::pair<penny::PacketDropId, penny::PacketDropSnapshot>>& snapshots,
+    std::size_t start_index) {
+    if (!collector) return;
+    if (!collector->accepting.load(std::memory_order_relaxed)) return;
+    if (start_index >= snapshots.size()) return;
+
+    auto& shard = collector->shard_for(shard_index);
+    std::lock_guard<std::mutex> lock(shard.mtx);
+    if (!collector->accepting.load(std::memory_order_relaxed)) return;
+
+    for (std::size_t i = start_index; i < snapshots.size(); ++i) {
+        const auto& pair = snapshots[i];
+        upsert_locked(*collector, shard, thread_name, key, pair.first, pair.second);
     }
-    return {};
 }
 
-void DropCollectorBinding::upsert_locked(const BindingContext& binding,
+void DropCollectorBinding::upsert_locked(DropCollector& collector,
+                                         DropCollector::Shard& shard,
+                                         const std::string& thread_name,
                                          const FlowKey& key,
                                          penny::PacketDropId packet_id,
                                          const penny::PacketDropSnapshot& snap) {
-    if (!binding.collector) return;
-    auto& shard = binding.collector->shard_for(binding.shard_index);
     auto& snapshots = shard.snapshots;
     DropCollector::SnapshotKey snapshot_key{key, packet_id};
 
     auto index_it = shard.snapshot_index.find(snapshot_key);
     if (index_it != shard.snapshot_index.end()) {
         auto& rec = snapshots[index_it->second];
+        const auto previous_snapshot = rec.snapshot;
         auto pending_count = shard.pending_snapshot_count.load(std::memory_order_relaxed);
         const bool was_pending = is_pending_snapshot(rec.snapshot);
         const bool now_pending = is_pending_snapshot(snap);
@@ -120,9 +136,14 @@ void DropCollectorBinding::upsert_locked(const BindingContext& binding,
             }
             shard.pending_snapshot_count.store(pending_count, std::memory_order_relaxed);
         }
+        apply_frozen_aggregate_transition(collector, previous_snapshot, snap);
     } else {
+        if (!try_reserve_snapshot_slot(collector)) {
+            return;
+        }
+        const auto agg_now = openpenny::app::aggregate_counters();
         const auto idx = snapshots.size();
-        snapshots.push_back(DropSnapshotRecord{key, packet_id, snap, {}, binding.thread_name});
+        snapshots.push_back(DropSnapshotRecord{key, packet_id, snap, agg_now, thread_name});
         shard.snapshot_index.emplace(std::move(snapshot_key), idx);
         shard.snapshot_count.store(snapshots.size(), std::memory_order_relaxed);
         if (is_pending_snapshot(snap)) {
@@ -137,6 +158,7 @@ void DropCollectorBinding::upsert_locked(const BindingContext& binding,
             shard.latest_snapshot_index.store(idx, std::memory_order_relaxed);
             shard.latest_snapshot_timestamp.store(ts, std::memory_order_relaxed);
         }
+        maybe_freeze_aggregate_window(collector, agg_now);
     }
 }
 
diff --git a/src/app/core/OpenpennyPipelineDriver.cpp b/src/app/core/OpenpennyPipelineDriver.cpp
index 0667b01..7768f69 100644
--- a/src/app/core/OpenpennyPipelineDriver.cpp
+++ b/src/app/core/OpenpennyPipelineDriver.cpp
@@ -205,6 +205,9 @@ PipelineSummary drive_pipeline(const Config& cfg_in, const PipelineOptions& opts
     TCPLOG_INFO("[openpenny] traffic match: %s",
                 net::describe_traffic_match(opts_local.traffic_match).c_str());
 
+    // Number of queues to process traffic.
+    const unsigned qcount = std::max(1u, opts_local.queue_count);
+
     // Capture the runtime setup at worker start so observers can inspect it.
     set_runtime_setup(cfg,
                       opts_local,
@@ -219,8 +222,6 @@ PipelineSummary drive_pipeline(const Config& cfg_in, const PipelineOptions& opts
     auto matcher = [&](const FlowKey& key) {
         return net::traffic_matches_flow(opts_local.traffic_match, key);
     };
-    // Number of queues to process traffic.
-    const unsigned qcount = std::max(1u, opts_local.queue_count);
 
     // ------------------------------------------------------------------
     // One-line startup summary at INFO. With many queues the per-worker
@@ -338,6 +339,7 @@ PipelineSummary drive_pipeline(const Config& cfg_in, const PipelineOptions& opts
     aggregates_controller.join();
     const auto agg_counters_now = openpenny::app::aggregate_counters();
     bool individual_stop_hit = aggregates_controller.individual_stop_hit();
+    bool closed_loop_stop_hit = aggregates_controller.closed_loop_stop_hit();
     if (!individual_stop_hit &&
         cfg.active.stop_after_individual_flows > 0 &&
         opts_local.mode == PipelineOptions::Mode::Active &&
@@ -346,12 +348,18 @@ PipelineSummary drive_pipeline(const Config& cfg_in, const PipelineOptions& opts
     }
     if (individual_stop_hit &&
         cfg.active.aggregates_enabled &&
-        runtime_setup_mutable().aggregates_status == RuntimeStatus::AggregatesStatus::PENDING &&
+        current_aggregates_status() == RuntimeStatus::AggregatesStatus::PENDING &&
         aggregates_controller.aggregates_ready()) {
-        runtime_setup_mutable().aggregates_status = RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED;
+        set_current_aggregates_status(RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP);
     }
     aggregates_controller.populate_drop_snapshots(summary);
     aggregates_controller.evaluate_pending_if_needed(cfg, summary);
+    if (!closed_loop_stop_hit &&
+        opts_local.mode == PipelineOptions::Mode::Active &&
+        cfg.active.min_closed_loop_flows > 0 &&
+        agg_counters_now.flows_closed_loop >= cfg.active.min_closed_loop_flows) {
+        closed_loop_stop_hit = true;
+    }
 
     // Fold per-thread results into a single aggregated ModeResult.
     ModeResult aggregate{};
@@ -385,12 +393,26 @@ PipelineSummary drive_pipeline(const Config& cfg_in, const PipelineOptions& opts
                 r->passive_gap_summaries.begin(),
                 r->passive_gap_summaries.end());
         }
+        if (!r->closed_loop_flow_summaries.empty()) {
+            aggregate.closed_loop_flow_summaries.insert(
+                aggregate.closed_loop_flow_summaries.end(),
+                r->closed_loop_flow_summaries.begin(),
+                r->closed_loop_flow_summaries.end());
+        }
+        if (!r->duplicate_exceeded_flow_summaries.empty()) {
+            aggregate.duplicate_exceeded_flow_summaries.insert(
+                aggregate.duplicate_exceeded_flow_summaries.end(),
+                r->duplicate_exceeded_flow_summaries.begin(),
+                r->duplicate_exceeded_flow_summaries.end());
+        }
 
         // Completion flags are combined with logical OR.
         aggregate.penny_completed =
             aggregate.penny_completed || r->penny_completed;
         aggregate.aggregates_penny_completed =
             aggregate.aggregates_penny_completed || r->aggregates_penny_completed;
+        aggregate.closed_loop_stop_hit =
+            aggregate.closed_loop_stop_hit || r->closed_loop_stop_hit;
     }
     // Use aggregated counters to avoid undercounting packets processed.
     aggregate.packets_processed = std::max<std::size_t>(
@@ -398,16 +420,23 @@ PipelineSummary drive_pipeline(const Config& cfg_in, const PipelineOptions& opts
         static_cast<std::size_t>(agg_counters_now.packets));
     if (aggregates_controller.collector_completed()) {
         const bool agg_done_status =
-            runtime_setup_mutable().aggregates_status != RuntimeStatus::AggregatesStatus::PENDING;
+            current_aggregates_status() != RuntimeStatus::AggregatesStatus::PENDING;
         aggregate.aggregates_penny_completed = agg_done_status;
         aggregate.penny_completed = agg_done_status;
     }
     if (individual_stop_hit) {
         aggregate.penny_completed = true;
     }
+    if (closed_loop_stop_hit) {
+        aggregate.closed_loop_stop_hit = true;
+    }
     if (auto snapshot = aggregates_controller.aggregates_snapshot()) {
         aggregate.aggregates_snapshot = snapshot;
     }
+    std::sort(aggregate.closed_loop_flow_summaries.begin(),
+              aggregate.closed_loop_flow_summaries.end());
+    std::sort(aggregate.duplicate_exceeded_flow_summaries.begin(),
+              aggregate.duplicate_exceeded_flow_summaries.end());
 
     // Only populate the summary if at least one worker reported results.
     if (any) {
diff --git a/src/app/core/PerThreadStats.cpp b/src/app/core/PerThreadStats.cpp
index fa5b934..9c1ee35 100644
--- a/src/app/core/PerThreadStats.cpp
+++ b/src/app/core/PerThreadStats.cpp
@@ -60,6 +60,9 @@ static std::atomic<std::size_t> g_counters_size{1};
 void init_thread_counters(std::size_t count) {
     const auto clamped = std::min(count, kMaxCounters);
 
+    for (auto& counter : g_counters) {
+        counter = {};
+    }
     for (auto& counter : g_drop_budget_counters) {
         counter.drops.store(0, std::memory_order_relaxed);
     }
diff --git a/src/app/core/RuntimeSetup.cpp b/src/app/core/RuntimeSetup.cpp
index 0093c82..6a8095e 100644
--- a/src/app/core/RuntimeSetup.cpp
+++ b/src/app/core/RuntimeSetup.cpp
@@ -2,9 +2,15 @@
 
 #include "openpenny/app/core/RuntimeSetup.h"
 
+#include <atomic>
+
 namespace openpenny {
 namespace {
 RuntimeSetupSnapshot g_runtime_setup;
+std::atomic<bool> g_aggregates_active{true};
+std::atomic<int> g_aggregates_status{
+    static_cast<int>(RuntimeStatus::AggregatesStatus::PENDING)};
+std::atomic<bool> g_has_aggregate_eval{false};
 }
 
 void set_runtime_setup(const Config& cfg,
@@ -15,6 +21,16 @@ void set_runtime_setup(const Config& cfg,
     g_runtime_setup.options = opts;
     g_runtime_setup.use_xdp = use_xdp;
     g_runtime_setup.use_dpdk = use_dpdk;
+    g_runtime_setup.aggregates_active = true;
+    g_runtime_setup.testing_finished = false;
+    g_runtime_setup.aggregates_status = RuntimeStatus::AggregatesStatus::PENDING;
+    g_runtime_setup.aggregate_eval_counters = {};
+    g_runtime_setup.has_aggregate_eval = false;
+    g_aggregates_active.store(true, std::memory_order_release);
+    g_aggregates_status.store(
+        static_cast<int>(RuntimeStatus::AggregatesStatus::PENDING),
+        std::memory_order_release);
+    g_has_aggregate_eval.store(false, std::memory_order_release);
 }
 
 const RuntimeSetupSnapshot& current_runtime_setup() {
@@ -25,4 +41,32 @@ RuntimeSetupSnapshot& runtime_setup_mutable() {
     return g_runtime_setup;
 }
 
+bool current_aggregates_active() noexcept {
+    return g_aggregates_active.load(std::memory_order_acquire);
+}
+
+void set_current_aggregates_active(bool value) noexcept {
+    g_runtime_setup.aggregates_active = value;
+    g_aggregates_active.store(value, std::memory_order_release);
+}
+
+RuntimeStatus::AggregatesStatus current_aggregates_status() noexcept {
+    return static_cast<RuntimeStatus::AggregatesStatus>(
+        g_aggregates_status.load(std::memory_order_acquire));
+}
+
+void set_current_aggregates_status(RuntimeStatus::AggregatesStatus status) noexcept {
+    g_runtime_setup.aggregates_status = status;
+    g_aggregates_status.store(static_cast<int>(status), std::memory_order_release);
+}
+
+bool current_has_aggregate_eval() noexcept {
+    return g_has_aggregate_eval.load(std::memory_order_acquire);
+}
+
+void set_current_has_aggregate_eval(bool value) noexcept {
+    g_runtime_setup.has_aggregate_eval = value;
+    g_has_aggregate_eval.store(value, std::memory_order_release);
+}
+
 } // namespace openpenny
diff --git a/src/app/core/active/ActiveTestPipeline.cpp b/src/app/core/active/ActiveTestPipeline.cpp
index b0f7348..a36158b 100644
--- a/src/app/core/active/ActiveTestPipeline.cpp
+++ b/src/app/core/active/ActiveTestPipeline.cpp
@@ -7,6 +7,7 @@
 #include <cstring>
 #include <exception>
 #include <iostream>
+#include <sstream>
 #include <string>
 #include <mutex>
 #include <thread>
@@ -20,6 +21,7 @@
 #include "openpenny/app/core/PipelineRunner.h"
 #include "openpenny/app/core/PerThreadStats.h"
 #include "openpenny/app/core/DropCollectorBinding.h"
+#include "openpenny/app/core/RuntimeSetup.h"
 #include "openpenny/log/Log.h"
 #include "openpenny/penny/flow/engine/FlowEngine.h"
 #include "openpenny/penny/flow/timer/ThreadFlowEventTimer.h"
@@ -30,6 +32,20 @@ namespace openpenny {
 
 namespace {
 thread_local ActiveTestPipelineRunner* tls_runner = nullptr;
+
+std::string format_closed_loop_flow_summary(const FlowKey& key,
+                                            const penny::FlowEngine& flow) {
+    std::ostringstream summary;
+    summary << flow_debug_details(key)
+            << " data=" << flow.data_packets()
+            << " dropped=" << flow.dropped_packets()
+            << " rtx=" << flow.retransmitted_packets()
+            << " non_rtx=" << flow.non_retransmitted_packets()
+            << " dup=" << flow.duplicate_packets()
+            << " in_order=" << flow.in_order_packets()
+            << " out_of_order=" << flow.out_of_order_packets();
+    return summary.str();
+}
 } // namespace
 
 // Constructs an active OpenPenny traffic processing pipeline runner.
@@ -53,7 +69,6 @@ ActiveTestPipelineRunner::ActiveTestPipelineRunner(
         std::chrono::duration<double>(cfg.active.flow_idle_timeout_seconds))} // Idle expiry window.
 {
     if (drop_collector_) {
-        app::DropCollectorBinding::instance().ensure_snapshot_hook();
         flow_manager_.set_drop_sink(
             [collector = drop_collector_,
              name = thread_name_,
@@ -68,6 +83,21 @@ ActiveTestPipelineRunner::ActiveTestPipelineRunner(
                 packet_id,
                 snapshot);
         });
+        flow_manager_.set_snapshot_refresh_sink(
+            [collector = drop_collector_,
+             name = thread_name_,
+             shard_index = drop_collector_shard_index_](
+                const FlowKey& key,
+                const std::vector<std::pair<penny::PacketDropId, penny::PacketDropSnapshot>>& snapshots,
+                std::size_t start_index) {
+                app::DropCollectorBinding::instance().refresh_from(
+                    collector,
+                    name,
+                    shard_index,
+                    key,
+                    snapshots,
+                    start_index);
+            });
     }
 }
 
@@ -178,7 +208,8 @@ void ActiveTestPipelineRunner::after_poll(
     if (idle_timeout_.count() > 0) {
         expire_idle_flows(now);
     }
-    sweep_expired_snapshots(now);
+    evaluate_individual_flows_if_enabled();
+    complete_resolved_terminal_flows();
     // Mirrors the post-loop drain in the legacy run() so deferred
     // expirations aren't stranded between iterations.
     penny::ThreadFlowEventTimerManager::instance().drain_callbacks();
@@ -187,17 +218,23 @@ void ActiveTestPipelineRunner::after_poll(
 void ActiveTestPipelineRunner::on_closing() {
     // Flush any callbacks that arrived after the final poll iteration.
     penny::ThreadFlowEventTimerManager::instance().drain_callbacks();
-    sweep_expired_snapshots(std::chrono::steady_clock::now());
+    evaluate_individual_flows_if_enabled();
+    complete_resolved_terminal_flows();
 }
 
 void ActiveTestPipelineRunner::finalize(ModeResult& result) {
-    // Expire any pending snapshots on remaining flows to ensure expirations are logged/applied.
+    // Resolve any pending snapshots on remaining flows without bypassing the
+    // configured retransmission timeout at shutdown.
     flow_manager_.for_each_flow([](const FlowKey&, penny::FlowEngineEntry& entry) {
-        entry.flow.expire_all_pending_snapshots();
+        entry.flow.resolve_pending_snapshots(std::chrono::steady_clock::now());
     });
+    evaluate_individual_flows_if_enabled();
+    complete_resolved_terminal_flows();
 
     result.packets_forwarded = total_pkts_forwarded_;
     result.forward_errors = total_forward_errors_;
+    result.closed_loop_flow_summaries = closed_loop_flow_summaries_;
+    result.duplicate_exceeded_flow_summaries = duplicate_exceeded_flow_summaries_;
 }
 
 // ---------------------------------------------------------------------------
@@ -209,32 +246,105 @@ void ActiveTestPipelineRunner::expire_idle_flows(const std::chrono::steady_clock
     if (idle_timeout_.count() <= 0) return;
     auto expired = flow_manager_.collect_idle_flows(now, idle_timeout_);
     for (const auto& key : expired) {
-        if (auto* entry = flow_manager_.find(key)) {
-            app::DropCollectorBinding::instance().unbind(&entry->flow);
-        }
-        flow_manager_.complete_flow(key, "idle_timeout");
+        complete_flow_with_summary(key, "idle_timeout");
     }
 }
 
-void ActiveTestPipelineRunner::sweep_expired_snapshots(const std::chrono::steady_clock::time_point& now) {
-    // Expire packet drop snapshots using the configured retransmission timeout (seconds).
-    const auto retransmission_timeout = std::chrono::duration<double>(cfg_.active.rtt_timeout_factor);
-    if (retransmission_timeout.count() <= 0.0) return;
-    flow_manager_.for_each_flow([&](const FlowKey&, penny::FlowEngineEntry& entry) {
-        const auto& snaps = entry.flow.drop_snapshots();
-        for (const auto& pair : snaps) {
-            if (pair.second.state != penny::SnapshotState::Pending) continue;
-            if (now - pair.second.timestamp >= retransmission_timeout) {
-                if (TCPLOG_ENABLED(INFO)) {
-                    const auto packet_id_text = penny::format_packet_drop_id(pair.first);
-                    TCPLOG_INFO("[packet_expired] flow=%s packet_id=%s",
-                                flow_debug_details(entry.flow.flow_key()).c_str(),
-                                packet_id_text.c_str());
+bool ActiveTestPipelineRunner::individual_flow_evaluation_enabled() const {
+    const bool aggregate_phase_configured =
+        cfg_.active.aggregates_enabled &&
+        cfg_.active.max_drops_aggregates > 0;
+    if (!aggregate_phase_configured) {
+        return true;
+    }
+    const auto status = openpenny::current_aggregates_status();
+    return status == RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP ||
+           status == RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED;
+}
+
+void ActiveTestPipelineRunner::evaluate_individual_flows_if_enabled() {
+    if (!individual_flow_evaluation_enabled()) {
+        return;
+    }
+
+    flow_manager_.for_each_flow([&](const FlowKey& key, penny::FlowEngineEntry& entry) {
+        const bool immutable_terminal_state =
+            entry.state == penny::FlowTrackingState::INTERRUPTED_RST ||
+            entry.state == penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED ||
+            entry.state == penny::FlowTrackingState::INTERRUPTED_OUT_OF_ORDER_EXCEEDED ||
+            entry.state == penny::FlowTrackingState::FINISHED;
+
+        if (!immutable_terminal_state) {
+            if (flow_out_of_order_threshold_exceeded(entry.flow)) {
+                entry.state = penny::FlowTrackingState::INTERRUPTED_OUT_OF_ORDER_EXCEEDED;
+                if (TCPLOG_ENABLED(DEBUG)) {
+                    const auto flow_tag = flow_debug_details(key);
+                    TCPLOG_DEBUG("Out-of-order threshold exceeded %s", flow_tag.c_str());
                 }
-                entry.flow.mark_snapshot_expired(pair.first);
+                return;
             }
+            if (flow_duplicate_threshold_exceeded(entry.flow)) {
+                entry.state = penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED;
+                if (TCPLOG_ENABLED(DEBUG)) {
+                    const auto flow_tag = flow_debug_details(key);
+                    TCPLOG_DEBUG("Duplicate threshold exceeded %s", flow_tag.c_str());
+                }
+                return;
+            }
+        }
+
+        if (entry.flow.final_decision() == penny::FlowEngine::FlowDecision::PENDING) {
+            entry.flow.evaluate_if_ready();
+        }
+
+        if (entry.state != penny::FlowTrackingState::CONNECTION_CLOSED_FIN &&
+            !immutable_terminal_state &&
+            entry.flow.final_decision() != penny::FlowEngine::FlowDecision::PENDING) {
+            entry.state = penny::FlowTrackingState::FINISHED;
+        }
+    });
+}
+
+void ActiveTestPipelineRunner::complete_resolved_terminal_flows() {
+    std::vector<FlowKey> completed_keys;
+    const bool individual_eval_enabled = individual_flow_evaluation_enabled();
+    flow_manager_.for_each_flow([&](const FlowKey& key, penny::FlowEngineEntry& entry) {
+        const bool terminal_state =
+            entry.state == penny::FlowTrackingState::INTERRUPTED_RST ||
+            entry.state == penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED ||
+            entry.state == penny::FlowTrackingState::INTERRUPTED_OUT_OF_ORDER_EXCEEDED ||
+            entry.state == penny::FlowTrackingState::CONNECTION_CLOSED_FIN ||
+            entry.state == penny::FlowTrackingState::FINISHED;
+        if (!terminal_state) return;
+        if (!individual_eval_enabled &&
+            entry.flow.final_decision() == penny::FlowEngine::FlowDecision::PENDING) {
+            return;
         }
+        if (entry.flow.pending_retransmissions() != 0) return;
+        completed_keys.push_back(key);
     });
+
+    for (const auto& key : completed_keys) {
+        complete_flow_with_summary(key, "terminal_state");
+    }
+}
+
+void ActiveTestPipelineRunner::complete_flow_with_summary(const FlowKey& key, const char* reason) {
+    auto* existing = flow_manager_.find(key);
+    if (!existing) {
+        return;
+    }
+    existing->flow.resolve_pending_snapshots(std::chrono::steady_clock::now());
+    const auto final_decision = existing->flow.final_decision();
+    const auto summary = format_closed_loop_flow_summary(key, existing->flow);
+    if (final_decision == penny::FlowEngine::FlowDecision::FINISHED_CLOSED_LOOP) {
+        closed_loop_flow_summaries_.push_back(summary);
+    }
+    if (existing->state == penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED ||
+        final_decision == penny::FlowEngine::FlowDecision::FINISHED_DUPLICATE_EXCEEDED) {
+        duplicate_exceeded_flow_summaries_.push_back(summary);
+    }
+    flow_manager_.complete_flow(key, reason);
 }
 
 void ActiveTestPipelineRunner::handle_packet(const net::PacketView& packet,
@@ -276,45 +386,37 @@ void ActiveTestPipelineRunner::handle_packet(const net::PacketView& packet,
 penny::FlowEngineEntry* ActiveTestPipelineRunner::admit_or_forward_flow(
     const net::PacketView& packet,
     const std::chrono::steady_clock::time_point& now) {
+    auto* flow_entry = flow_manager_.find(packet.flow);
 
     // Skip flows we've already monitored in the past.
-    if (flow_manager_.was_completed(packet.flow)) {
+    if (!flow_entry && flow_manager_.was_completed(packet.flow)) {
         forward_packet(packet);
         return nullptr;
     }
 
-    const auto monitor_state = flow_manager_.flow_state(packet.flow);
-    if (monitor_state == penny::FlowTrackingState::NOT_ACTIONABLE &&
-        flow_manager_.is_flow_monitoring_capacity_full()) {
+    if (!flow_entry && flow_manager_.is_flow_monitoring_capacity_full()) {
         // Flow is not tracked, and there are no spare monitoring slots.
         forward_packet(packet);
         return nullptr;
     }
 
-    if (monitor_state == penny::FlowTrackingState::INTERRUPTED_RST ||
-        monitor_state == penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED ||
-        monitor_state == penny::FlowTrackingState::INTERRUPTED_OUT_OF_ORDER_EXCEEDED ||
-        monitor_state == penny::FlowTrackingState::CONNECTION_CLOSED_FIN ||
-        monitor_state == penny::FlowTrackingState::FINISHED) {
-        // Mark flow as complete and free the monitoring slot.
-        if (auto* existing = flow_manager_.find(packet.flow)) {
-            app::DropCollectorBinding::instance().unbind(&existing->flow);
+    if (flow_entry &&
+        (flow_entry->state == penny::FlowTrackingState::INTERRUPTED_RST ||
+         flow_entry->state == penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED ||
+         flow_entry->state == penny::FlowTrackingState::INTERRUPTED_OUT_OF_ORDER_EXCEEDED ||
+         flow_entry->state == penny::FlowTrackingState::CONNECTION_CLOSED_FIN ||
+         flow_entry->state == penny::FlowTrackingState::FINISHED)) {
+        // Terminal flows with unresolved drops stay resident until the
+        // retransmission gap is filled or the timeout expires.
+        if (flow_entry->flow.pending_retransmissions() == 0) {
+            complete_flow_with_summary(packet.flow, "terminal_state");
         }
-        flow_manager_.complete_flow(packet.flow, "terminal_state");
         forward_packet(packet);
         return nullptr;
     }
 
-    // Check whether the packet belongs to one of the flows currently being monitored.
-    auto* flow_entry = flow_manager_.find(packet.flow);
-
     if (flow_entry) {
         const auto penny_flow_decision = flow_entry->flow.final_decision();
-        if (penny_flow_decision != penny::FlowEngine::FlowDecision::PENDING){
-            // From Penny perspective the test for the flow is done.
-
-
-        }
         const bool terminal_state =
         flow_entry->state == penny::FlowTrackingState::INTERRUPTED_RST ||
         flow_entry->state == penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED ||
@@ -324,35 +426,25 @@ penny::FlowEngineEntry* ActiveTestPipelineRunner::admit_or_forward_flow(
 
         if (!terminal_state && penny_flow_decision != penny::FlowEngine::FlowDecision::PENDING) {
             flow_entry->state = penny::FlowTrackingState::FINISHED;
-            app::DropCollectorBinding::instance().unbind(&flow_entry->flow);
-            flow_manager_.complete_flow(packet.flow, "penny_decision");
+            complete_flow_with_summary(packet.flow, "penny_decision");
             forward_packet(packet);
             return nullptr;
         }
     }
 
-    if (!flow_entry && !flow_manager_.is_flow_monitoring_capacity_full()) {
+    if (!flow_entry) {
         try {
             const bool is_syn = packet.tcp.flags_view().syn;
-            const bool inserted = flow_manager_.add_new_flow(
+            flow_entry = flow_manager_.add_new_flow(
                 packet.flow,
                 packet.tcp.seq,
                 static_cast<uint32_t>(packet.payload_bytes),
                 is_syn,
                 now);
-            if (inserted) {
-                if (drop_collector_) {
-                    if (auto* entry = flow_manager_.find(packet.flow)) {
-                        app::DropCollectorBinding::instance().bind(
-                            &entry->flow,
-                            drop_collector_,
-                            thread_name_,
-                            drop_collector_shard_index_);
-                    }
-                }
+            if (flow_entry) {
                 if (TCPLOG_ENABLED(INFO)) {
                     const auto flow_tag = flow_debug_details(packet.flow);
-                    TCPLOG_INFO("[monitor_start] %s flow=%s seq=%" PRIu32 " payload_bytes=%zu",
+                    TCPLOG_INFO("[flow_track] action=start trigger=%s flow=%s seq=%" PRIu32 " payload=%zu",
                         is_syn ? "syn" : "data",
                         flow_tag.c_str(),
                         packet.tcp.seq,
@@ -426,7 +518,7 @@ bool ActiveTestPipelineRunner::promote_pending_flow(
     return false;
 }
 
-// Fast-path check for RST that marks outstanding drop snapshots as expired.
+// Fast-path check for RST that marks outstanding drop snapshots as invalid.
 void ActiveTestPipelineRunner::handle_rst(penny::FlowEngineEntry& entry, const net::PacketView& packet) {
     if ((packet.tcp.flags & 0x04) == 0) return; // RST bit not set.
 
@@ -450,7 +542,8 @@ void ActiveTestPipelineRunner::handle_rst(penny::FlowEngineEntry& entry, const n
     entry.state = penny::FlowTrackingState::INTERRUPTED_RST;
 }
 
-// Fast-path check for FIN that marks outstanding drop snapshots as expired.
+// Fast-path check for FIN. A clean close means any still-missing dropped
+// payload was not retransmitted before teardown, so we resolve it immediately.
 void ActiveTestPipelineRunner::handle_fin(penny::FlowEngineEntry& entry, const net::PacketView& packet) {
     if ((packet.tcp.flags & 0x01) == 0) return; // FIN bit not set.
 
@@ -460,13 +553,12 @@ void ActiveTestPipelineRunner::handle_fin(penny::FlowEngineEntry& entry, const n
         for (const auto& snap_pair : snapshots) {
             const auto& snapshot = snap_pair.second;
 
-            // Skip snapshots already decided.
             if (snapshot.state != penny::SnapshotState::Pending ||
                 snapshot.stats.pending_retransmissions() == 0) {
                 continue;
             }
 
-            flow.mark_snapshot_invalid(snap_pair.first); // Treat pending gaps as invalid on close.
+            flow.mark_snapshot_expired(snap_pair.first);
             if (flow.pending_retransmissions() == 0) break;
         }
         penny::ThreadFlowEventTimerManager::instance().purge_flow(&flow);
@@ -525,7 +617,9 @@ void ActiveTestPipelineRunner::handle_data_packet(penny::FlowEngineEntry& entry,
                          end_seq,
                          entry.flow.highest_sequence());
         }
-        const bool ooo_exceeded = flow_out_of_order_threshold_exceeded(entry.flow);
+        const bool ooo_exceeded =
+            individual_flow_evaluation_enabled() &&
+            flow_out_of_order_threshold_exceeded(entry.flow);
         if (ooo_exceeded) {
             entry.state = penny::FlowTrackingState::INTERRUPTED_OUT_OF_ORDER_EXCEEDED;
             if (TCPLOG_ENABLED(DEBUG)) {
@@ -553,7 +647,9 @@ void ActiveTestPipelineRunner::handle_data_packet(penny::FlowEngineEntry& entry,
             penny::ThreadFlowEventTimerManager::instance().enqueue_duplicate(&entry.flow, start_seq, packet.payload_bytes);
             // Logging handled in timer callback.
 
-            const bool dup_exceeded = flow_duplicate_threshold_exceeded(entry.flow);
+            const bool dup_exceeded =
+                individual_flow_evaluation_enabled() &&
+                flow_duplicate_threshold_exceeded(entry.flow);
             if (dup_exceeded) {
                 entry.state = penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED;
                 if (TCPLOG_ENABLED(DEBUG)) {
@@ -576,7 +672,9 @@ void ActiveTestPipelineRunner::handle_data_packet(penny::FlowEngineEntry& entry,
             penny::ThreadFlowEventTimerManager::instance().enqueue_duplicate(&entry.flow, start_seq, packet.payload_bytes);
             // Logging handled in timer callback.
 
-            const bool dup_exceeded = flow_duplicate_threshold_exceeded(entry.flow);
+            const bool dup_exceeded =
+                individual_flow_evaluation_enabled() &&
+                flow_duplicate_threshold_exceeded(entry.flow);
             if (dup_exceeded) {
                 entry.state = penny::FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED;
                 if (TCPLOG_ENABLED(DEBUG)) {
diff --git a/src/app/core/passive/PassiveTestPipeline.cpp b/src/app/core/passive/PassiveTestPipeline.cpp
index 4843516..982a052 100644
--- a/src/app/core/passive/PassiveTestPipeline.cpp
+++ b/src/app/core/passive/PassiveTestPipeline.cpp
@@ -31,7 +31,21 @@ PassiveTestPipelineRunner::PassiveTestPipelineRunner(const Config& cfg,
     : cfg_(cfg),
       opts_(opts),
       matcher_(std::move(matcher)),
-      source_(std::move(source)) {}
+      source_(std::move(source)) {
+    reserve_for_config();
+}
+
+void PassiveTestPipelineRunner::reserve_for_config() {
+    if (cfg_.passive.max_parallel_flows > 0) {
+        flows_.reserve(cfg_.passive.max_parallel_flows);
+    }
+
+    if (cfg_.passive.min_number_of_flows_to_finish > 0) {
+        finished_flows_.reserve(cfg_.passive.min_number_of_flows_to_finish);
+        finished_index_.reserve(cfg_.passive.min_number_of_flows_to_finish);
+        finished_keys_.reserve(cfg_.passive.min_number_of_flows_to_finish);
+    }
+}
 
 std::optional<ModeResult> PassiveTestPipelineRunner::run() {
     PipelineRunner runner(cfg_,
diff --git a/src/app/core/utils/FlowDebug.cpp b/src/app/core/utils/FlowDebug.cpp
index ccc4628..4f8686a 100644
--- a/src/app/core/utils/FlowDebug.cpp
+++ b/src/app/core/utils/FlowDebug.cpp
@@ -6,6 +6,21 @@
 
 namespace openpenny {
 
+namespace {
+
+std::string proto_label(std::uint8_t proto) {
+    switch (proto) {
+    case 6:
+        return "tcp";
+    case 17:
+        return "udp";
+    default:
+        return std::to_string(static_cast<unsigned>(proto));
+    }
+}
+
+} // namespace
+
 std::string to_ipv4_string(uint32_t host_order_ip) {
     std::ostringstream out;
     out << ((host_order_ip >> 24) & 0xff) << '.'
@@ -18,9 +33,14 @@ std::string to_ipv4_string(uint32_t host_order_ip) {
 std::string flow_debug_details(const FlowKey& flow) {
     const auto src_ip = to_ipv4_string(flow.src);
     const auto dst_ip = to_ipv4_string(flow.dst);
+    const bool have_proto = flow.ip_proto != 0;
     std::string tag;
-    tag.reserve(src_ip.size() + dst_ip.size() + 16);
+    tag.reserve(src_ip.size() + dst_ip.size() + (have_proto ? 24 : 16));
     tag.push_back('{');
+    if (have_proto) {
+        tag.append(proto_label(flow.ip_proto));
+        tag.push_back('-');
+    }
     tag.append(src_ip);
     tag.push_back('-');
     tag.append(dst_ip);
diff --git a/src/app/worker/penny_worker.cpp b/src/app/worker/penny_worker.cpp
index 85aa22c..e82edf4 100644
--- a/src/app/worker/penny_worker.cpp
+++ b/src/app/worker/penny_worker.cpp
@@ -6,6 +6,7 @@
 #include "openpenny/egress/PacketSink.h"
 #include "openpenny/log/Log.h"
 
+#include <algorithm>
 #include <filesystem>
 #include <iostream>
 #include <string>
@@ -297,9 +298,7 @@ int main(int argc, char** argv) {
     const uint64_t aggregates_snapshots = aggregates_enabled ? summary.drop_snapshots.size() : 0;
     openpenny::app::AggregatedCounters agg_snapshot{};
     if (is_active_mode) {
-        agg_snapshot = res.aggregates_snapshot
-                           ? *res.aggregates_snapshot
-                           : openpenny::app::aggregate_counters();
+        agg_snapshot = openpenny::app::aggregate_counters();
     }
     std::cout << "aggregates_status=" << aggregates_status_str << "\n";
     std::cout << "aggregates_decision_complete=" << (aggregates_done ? 1 : 0) << "\n";
@@ -315,6 +314,12 @@ int main(int argc, char** argv) {
     std::cout << "aggregate_flows_not_closed_loop=" << agg_snapshot.flows_not_closed_loop << "\n";
     std::cout << "aggregate_flows_rst=" << agg_snapshot.flows_rst << "\n";
     std::cout << "aggregate_flows_duplicates_exceeded=" << agg_snapshot.flows_duplicates_exceeded << "\n";
+    const uint64_t closed_loop_flows_found = std::max<std::uint64_t>(
+        agg_snapshot.flows_closed_loop,
+        res.closed_loop_flow_summaries.size());
+    const uint64_t duplicate_exceeded_flows_found = std::max<std::uint64_t>(
+        agg_snapshot.flows_duplicates_exceeded,
+        res.duplicate_exceeded_flow_summaries.size());
     // Emit JSON summary similar to CLI output.
     nlohmann::json j;
     j["test_id"] = args.test_id;
@@ -357,6 +362,35 @@ int main(int argc, char** argv) {
         {"rst", agg_snapshot.flows_rst},
         {"duplicates_exceeded", agg_snapshot.flows_duplicates_exceeded}
     };
+    j["closed_loop_flows_found"] = closed_loop_flows_found;
+    j["duplicate_exceeded_flows_found"] = duplicate_exceeded_flows_found;
+    j["closed_loop_flows"] = nlohmann::json::array();
+    for (const auto& line : res.closed_loop_flow_summaries) {
+        j["closed_loop_flows"].push_back(line);
+    }
+    j["duplicate_exceeded_flows"] = nlohmann::json::array();
+    for (const auto& line : res.duplicate_exceeded_flow_summaries) {
+        j["duplicate_exceeded_flows"].push_back(line);
+    }
+    std::string end_state;
+    if (aggregates_done) {
+        end_state = "Aggregates completed (" + aggregates_status_str + ")";
+    } else if (res.penny_completed) {
+        end_state = is_active_mode
+            ? "Penny heuristics completed"
+            : "Passive pipeline completed (flows=" + std::to_string(res.passive_flows_finished) + ")";
+    } else {
+        end_state = "Reader/pipeline error";
+    }
+    if (closed_loop_flows_found > 0) {
+        end_state += ", found " + std::to_string(closed_loop_flows_found) + " closed-loop flow";
+        if (closed_loop_flows_found != 1) end_state += "s";
+    }
+    if (duplicate_exceeded_flows_found > 0) {
+        end_state += ", found " + std::to_string(duplicate_exceeded_flows_found) + " duplicate-exceeded flow";
+        if (duplicate_exceeded_flows_found != 1) end_state += "s";
+    }
+    j["end_state"] = end_state;
     // Aggregate snapshot counters, if available.
     if (res.passive_flows_finished > 0 || !res.passive_gap_summaries.empty()) {
         nlohmann::json passive;
diff --git a/src/egress/RawNicSink.cpp b/src/egress/RawNicSink.cpp
index 963c4ae..291b935 100644
--- a/src/egress/RawNicSink.cpp
+++ b/src/egress/RawNicSink.cpp
@@ -33,10 +33,12 @@ RawNicSink::~RawNicSink() {
     close();
 }
 
-bool RawNicSink::open() {
+int RawNicSink::open_socket_fd(bool resolve_ifindex, bool log_failures) {
     if (cfg_.device.empty()) {
-        TCPLOG_ERROR("RawNicSink: device name is required%s", "");
-        return false;
+        if (log_failures) {
+            TCPLOG_ERROR("RawNicSink: device name is required%s", "");
+        }
+        return -1;
     }
 
     // SOCK_RAW (not SOCK_DGRAM): we want to forward the original frame
@@ -47,60 +49,85 @@ bool RawNicSink::open() {
     // destination. SOCK_RAW preserves the original L2 verbatim.
     //
     // ETH_P_ALL on the protocol so we can write any frame type.
-    fd_ = ::socket(AF_PACKET, SOCK_RAW | SOCK_NONBLOCK, htons(ETH_P_ALL));
-    if (fd_ < 0) {
-        TCPLOG_ERROR("RawNicSink: socket(AF_PACKET, SOCK_RAW) failed: %s (need CAP_NET_RAW)",
-                     std::strerror(errno));
-        return false;
+    int fd = ::socket(AF_PACKET, SOCK_RAW | SOCK_NONBLOCK, htons(ETH_P_ALL));
+    if (fd < 0) {
+        if (log_failures) {
+            TCPLOG_ERROR("RawNicSink: socket(AF_PACKET, SOCK_RAW) failed: %s (need CAP_NET_RAW)",
+                         std::strerror(errno));
+        }
+        return -1;
     }
 
-    // Resolve ifindex once so the hot path doesn't need another syscall.
-    ifreq ifr{};
-    std::strncpy(ifr.ifr_name, cfg_.device.c_str(), IFNAMSIZ - 1);
-    if (::ioctl(fd_, SIOCGIFINDEX, &ifr) != 0) {
-        const int saved = errno;
-        TCPLOG_ERROR("RawNicSink: SIOCGIFINDEX('%s') failed: %s",
-                     cfg_.device.c_str(), std::strerror(saved));
-        ::close(fd_);
-        fd_ = -1;
-        errno = saved;
-        return false;
+    if (resolve_ifindex || if_index_ <= 0) {
+        ifreq ifr{};
+        std::strncpy(ifr.ifr_name, cfg_.device.c_str(), IFNAMSIZ - 1);
+        if (::ioctl(fd, SIOCGIFINDEX, &ifr) != 0) {
+            const int saved = errno;
+            if (log_failures) {
+                TCPLOG_ERROR("RawNicSink: SIOCGIFINDEX('%s') failed: %s",
+                             cfg_.device.c_str(), std::strerror(saved));
+            }
+            ::close(fd);
+            errno = saved;
+            return -1;
+        }
+        if_index_ = ifr.ifr_ifindex;
     }
-    if_index_ = ifr.ifr_ifindex;
 
-    // Bind to the interface so sendto(2) without a sockaddr works too, and
-    // so the kernel drops incoming frames targeted at other ifaces.
     sockaddr_ll addr{};
     addr.sll_family = AF_PACKET;
     addr.sll_protocol = htons(ETH_P_ALL);
     addr.sll_ifindex = if_index_;
-    if (::bind(fd_, reinterpret_cast<sockaddr*>(&addr), sizeof(addr)) != 0) {
+    if (::bind(fd, reinterpret_cast<sockaddr*>(&addr), sizeof(addr)) != 0) {
         const int saved = errno;
-        TCPLOG_ERROR("RawNicSink: bind to '%s' (ifindex=%d) failed: %s",
-                     cfg_.device.c_str(), if_index_, std::strerror(saved));
-        ::close(fd_);
-        fd_ = -1;
-        if_index_ = -1;
+        if (log_failures) {
+            TCPLOG_ERROR("RawNicSink: bind to '%s' (ifindex=%d) failed: %s",
+                         cfg_.device.c_str(), if_index_, std::strerror(saved));
+        }
+        ::close(fd);
         errno = saved;
-        return false;
+        return -1;
     }
 
     if (cfg_.raw_nic_bind_device) {
-        // Redundant with the bind() above on modern kernels, but harmless,
-        // and it mirrors the IPPROTO_RAW path for consistency.
-        if (::setsockopt(fd_, SOL_SOCKET, SO_BINDTODEVICE,
+        if (::setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
                          cfg_.device.c_str(), cfg_.device.size()) != 0) {
             TCPLOG_WARN("RawNicSink: SO_BINDTODEVICE('%s') failed: %s",
                         cfg_.device.c_str(), std::strerror(errno));
         }
     }
 
+    int sndbuf = 16 * 1024 * 1024;
+    if (::setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf)) != 0) {
+        TCPLOG_WARN("RawNicSink: SO_SNDBUF(%d) failed: %s",
+                    sndbuf, std::strerror(errno));
+    }
+
+    return fd;
+}
+
+bool RawNicSink::open() {
+    fd_ = open_socket_fd(true, true);
+    if (fd_ < 0) {
+        return false;
+    }
+
     TCPLOG_INFO("RawNicSink: opened (fd=%d, device='%s', ifindex=%d)",
                 fd_, cfg_.device.c_str(), if_index_);
     return true;
 }
 
 void RawNicSink::close() noexcept {
+    std::vector<int> to_close;
+    {
+        std::lock_guard<std::mutex> lock(fds_mtx_);
+        to_close.swap(additional_fds_);
+    }
+    for (int fd : to_close) {
+        if (fd >= 0) {
+            ::close(fd);
+        }
+    }
     if (fd_ >= 0) {
         ::close(fd_);
         fd_ = -1;
@@ -108,8 +135,37 @@ void RawNicSink::close() noexcept {
     if_index_ = -1;
 }
 
+int RawNicSink::thread_fd() {
+    thread_local int t_fd = -1;
+    thread_local const RawNicSink* t_owner = nullptr;
+    if (t_owner == this && t_fd >= 0) {
+        return t_fd;
+    }
+    if (fd_ < 0 || if_index_ <= 0) {
+        t_owner = this;
+        t_fd = -1;
+        return t_fd;
+    }
+
+    int fd = open_socket_fd(false, false);
+    if (fd < 0) {
+        t_owner = this;
+        t_fd = fd_;
+        return t_fd;
+    }
+
+    {
+        std::lock_guard<std::mutex> lock(fds_mtx_);
+        additional_fds_.push_back(fd);
+    }
+    t_owner = this;
+    t_fd = fd;
+    return t_fd;
+}
+
 bool RawNicSink::write(const net::PacketView& packet) {
-    if (fd_ < 0) {
+    const int fd = thread_fd();
+    if (fd < 0) {
         return false;
     }
 
@@ -147,7 +203,7 @@ bool RawNicSink::write(const net::PacketView& packet) {
     dst.sll_protocol = htons(ETH_P_ALL);
     dst.sll_ifindex = if_index_;
 
-    const ssize_t written = ::sendto(fd_,
+    const ssize_t written = ::sendto(fd,
                                      buf,
                                      static_cast<size_t>(len),
                                      0,
@@ -158,12 +214,21 @@ bool RawNicSink::write(const net::PacketView& packet) {
         return true;
     }
     const int err = errno;
-    if (err != EAGAIN && err != EWOULDBLOCK) {
-        TCPLOG_WARN("RawNicSink::write (%u bytes) failed on fd=%d (device='%s'): %s",
-                    static_cast<unsigned>(len), fd_,
-                    cfg_.device.c_str(), std::strerror(err));
+    if (err == EAGAIN || err == EWOULDBLOCK) {
         stats_.errors.fetch_add(1, std::memory_order_relaxed);
+        if (!backpressure_logged_.exchange(true, std::memory_order_relaxed)) {
+            TCPLOG_WARN(
+                "RawNicSink: TX backpressure on fd=%d (EAGAIN/EWOULDBLOCK); "
+                "dropping packets. This can induce real TCP retransmissions at "
+                "high rates because OpenPenny does not keep a copy-backed TX queue.",
+                fd);
+        }
+        return false;
     }
+    TCPLOG_WARN("RawNicSink::write (%u bytes) failed on fd=%d (device='%s'): %s",
+                static_cast<unsigned>(len), fd,
+                cfg_.device.c_str(), std::strerror(err));
+    stats_.errors.fetch_add(1, std::memory_order_relaxed);
     return false;
 }
 
diff --git a/src/egress/RawSocketSink.cpp b/src/egress/RawSocketSink.cpp
index 59c7707..2d516d0 100644
--- a/src/egress/RawSocketSink.cpp
+++ b/src/egress/RawSocketSink.cpp
@@ -28,30 +28,42 @@ RawSocketSink::~RawSocketSink() {
     close();
 }
 
-bool RawSocketSink::open() {
-    fd_ = ::socket(AF_INET, SOCK_RAW | SOCK_NONBLOCK, IPPROTO_RAW);
-    if (fd_ < 0) {
-        TCPLOG_ERROR("RawSocketSink: socket(AF_INET, SOCK_RAW, IPPROTO_RAW) failed: %s",
-                     std::strerror(errno));
-        return false;
+int RawSocketSink::open_socket_fd(bool log_failures) {
+    int fd = ::socket(AF_INET, SOCK_RAW | SOCK_NONBLOCK, IPPROTO_RAW);
+    if (fd < 0) {
+        if (log_failures) {
+            TCPLOG_ERROR("RawSocketSink: socket(AF_INET, SOCK_RAW, IPPROTO_RAW) failed: %s",
+                         std::strerror(errno));
+        }
+        return -1;
     }
 
     if (!cfg_.device.empty()) {
-        if (::setsockopt(fd_, SOL_SOCKET, SO_BINDTODEVICE,
+        if (::setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
                          cfg_.device.c_str(), cfg_.device.size()) != 0) {
             const int saved = errno;
             TCPLOG_WARN("RawSocketSink: SO_BINDTODEVICE('%s') failed: %s",
                         cfg_.device.c_str(), std::strerror(saved));
-            // SO_BINDTODEVICE requires CAP_NET_RAW; treat as non-fatal so
-            // the sink still works when the operator just hasn't named a
-            // preferred egress device.
         }
     }
 
-    // IPPROTO_RAW already implies IP_HDRINCL, but set it explicitly so the
-    // behaviour is obvious to reviewers tracing packet construction.
     int one = 1;
-    (void)::setsockopt(fd_, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
+    (void)::setsockopt(fd, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one));
+
+    int sndbuf = 16 * 1024 * 1024;
+    if (::setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf)) != 0) {
+        TCPLOG_WARN("RawSocketSink: SO_SNDBUF(%d) failed: %s",
+                    sndbuf, std::strerror(errno));
+    }
+
+    return fd;
+}
+
+bool RawSocketSink::open() {
+    fd_ = open_socket_fd(true);
+    if (fd_ < 0) {
+        return false;
+    }
 
     TCPLOG_INFO("RawSocketSink: opened (fd=%d, device='%s')",
                 fd_, cfg_.device.c_str());
@@ -59,18 +71,60 @@ bool RawSocketSink::open() {
 }
 
 void RawSocketSink::close() noexcept {
+    std::vector<int> to_close;
+    {
+        std::lock_guard<std::mutex> lock(fds_mtx_);
+        to_close.swap(additional_fds_);
+    }
+    for (int fd : to_close) {
+        if (fd >= 0) {
+            ::close(fd);
+        }
+    }
     if (fd_ >= 0) {
         ::close(fd_);
         fd_ = -1;
     }
 }
 
+int RawSocketSink::thread_fd() {
+    thread_local int t_fd = -1;
+    thread_local const RawSocketSink* t_owner = nullptr;
+    if (t_owner == this && t_fd >= 0) {
+        return t_fd;
+    }
+    if (fd_ < 0) {
+        t_owner = this;
+        t_fd = -1;
+        return t_fd;
+    }
+
+    int fd = open_socket_fd(false);
+    if (fd < 0) {
+        t_owner = this;
+        t_fd = fd_;
+        return t_fd;
+    }
+
+    {
+        std::lock_guard<std::mutex> lock(fds_mtx_);
+        additional_fds_.push_back(fd);
+    }
+    t_owner = this;
+    t_fd = fd;
+    return t_fd;
+}
+
 bool RawSocketSink::write(const net::PacketView& packet) {
-    if (fd_ < 0 || !packet.layer3_ptr || packet.layer3_length < 20) {
+    if (!packet.layer3_ptr || packet.layer3_length < 20) {
         // IPv4 header is at least 20 bytes; anything shorter isn't a
         // routable datagram and the kernel would reject it anyway.
         return false;
     }
+    const int fd = thread_fd();
+    if (fd < 0) {
+        return false;
+    }
 
     sockaddr_in dst{};
     dst.sin_family = AF_INET;
@@ -80,7 +134,7 @@ bool RawSocketSink::write(const net::PacketView& packet) {
     std::memcpy(&dst.sin_addr.s_addr, packet.layer3_ptr + 16,
                 sizeof(dst.sin_addr.s_addr));
 
-    const ssize_t written = ::sendto(fd_,
+    const ssize_t written = ::sendto(fd,
                                      packet.layer3_ptr,
                                      static_cast<size_t>(packet.layer3_length),
                                      0,
@@ -92,9 +146,14 @@ bool RawSocketSink::write(const net::PacketView& packet) {
     }
     const int err = errno;
     if (err == EAGAIN || err == EWOULDBLOCK) {
-        // Transient back-pressure on a non-blocking raw socket; the
-        // packet is dropped and no error is recorded (the same policy
-        // the active path uses).
+        stats_.errors.fetch_add(1, std::memory_order_relaxed);
+        if (!backpressure_logged_.exchange(true, std::memory_order_relaxed)) {
+            TCPLOG_WARN(
+                "RawSocketSink: TX backpressure on fd=%d (EAGAIN/EWOULDBLOCK); "
+                "dropping packets. This can induce real TCP retransmissions at "
+                "high rates because OpenPenny does not keep a copy-backed TX queue.",
+                fd);
+        }
         return false;
     }
     if (err == EMSGSIZE) {
@@ -116,14 +175,14 @@ bool RawSocketSink::write(const net::PacketView& packet) {
                 "the packet size. Further oversized drops will be "
                 "counted silently.",
                 static_cast<unsigned>(packet.layer3_length),
-                fd_,
+                fd,
                 cfg_.device.empty() ? "<egress-iface>" : cfg_.device.c_str(),
                 static_cast<unsigned>(packet.layer3_length));
         }
         return false;
     }
     TCPLOG_WARN("RawSocketSink::write (%u bytes) failed on fd=%d: %s",
-                static_cast<unsigned>(packet.layer3_length), fd_,
+                static_cast<unsigned>(packet.layer3_length), fd,
                 std::strerror(err));
     stats_.errors.fetch_add(1, std::memory_order_relaxed);
     return false;
diff --git a/src/egress/TunSink.cpp b/src/egress/TunSink.cpp
index bac3cbf..996fd15 100644
--- a/src/egress/TunSink.cpp
+++ b/src/egress/TunSink.cpp
@@ -294,12 +294,21 @@ bool TunSink::write(const net::PacketView& packet) {
         return true;
     }
     const int err = errno;
-    if (err != EAGAIN && err != EWOULDBLOCK) {
-        TCPLOG_WARN("TunSink::write (%u bytes) failed on fd=%d: %s",
-                    static_cast<unsigned>(packet.layer3_length), fd,
-                    std::strerror(err));
+    if (err == EAGAIN || err == EWOULDBLOCK) {
         stats_.errors.fetch_add(1, std::memory_order_relaxed);
+        if (!backpressure_logged_.exchange(true, std::memory_order_relaxed)) {
+            TCPLOG_WARN(
+                "TunSink: TX backpressure on fd=%d (EAGAIN/EWOULDBLOCK); "
+                "dropping packets. This can induce real TCP retransmissions at "
+                "high rates because OpenPenny does not keep a copy-backed TX queue.",
+                fd);
+        }
+        return false;
     }
+    TCPLOG_WARN("TunSink::write (%u bytes) failed on fd=%d: %s",
+                static_cast<unsigned>(packet.layer3_length), fd,
+                std::strerror(err));
+    stats_.errors.fetch_add(1, std::memory_order_relaxed);
     return false;
 }
 
diff --git a/src/grpc/PennyService.cpp b/src/grpc/PennyService.cpp
index 46ac615..08e524a 100644
--- a/src/grpc/PennyService.cpp
+++ b/src/grpc/PennyService.cpp
@@ -1041,8 +1041,15 @@ ::grpc::Status PennyServiceImpl::StartTest(::grpc::ServerContext*,
                                                       ? (aggregates_decision_complete ? "completed" : "running")
                                                       : "n/a";
 
-    // Build a JSON summary akin to the CLI output.
-    nlohmann::json summary;
+    // Build a JSON summary akin to the CLI output, preserving any
+    // worker-emitted detail sections that do not have dedicated proto fields.
+    nlohmann::json summary = nlohmann::json::object();
+    if (!response->json_summary().empty()) {
+        auto parsed = nlohmann::json::parse(response->json_summary(), nullptr, false);
+        if (parsed.is_object()) {
+            summary = std::move(parsed);
+        }
+    }
     summary["test_id"] = response->test_id();
     summary["status"] = response->status();
     summary["packets"] = {
diff --git a/src/ingress/af_xdp/XdpReader.cpp b/src/ingress/af_xdp/XdpReader.cpp
index cc7c379..0ed8dd0 100644
--- a/src/ingress/af_xdp/XdpReader.cpp
+++ b/src/ingress/af_xdp/XdpReader.cpp
@@ -53,7 +53,7 @@ static uint64_t now_ns() {
 
 struct SharedAttachState {
     std::mutex mutex;
-    unsigned refs{0};
+    unsigned refs{0};         ///< Workers currently opening or opened on this shared attach state.
     bool rss_checked{false};   ///< Only the first-opening worker runs the RSS coverage check.
 #ifdef OPENPENNY_WITH_LIBBPF
     bool attached{false};
@@ -511,10 +511,10 @@ bool XdpReader::open(const std::string& ifname, unsigned queue) {
         return false;
     }
 
-    // Serialise the per-interface attach / map-pin dance across worker
-    // threads so two queue workers on the same NIC can't race when creating
-    // or pinning the shared BPF objects.
-    std::lock_guard<std::mutex> shared_lock(impl.shared_attach->mutex);
+    // Serialize queue-worker bring-up against the shared attach state so
+    // xsks_map publication and live-rule activation happen in a well-defined
+    // order across every queue.
+    std::unique_lock<std::mutex> shared_lock(impl.shared_attach->mutex);
 
     if (impl.tuning.verbose) {
         TCPLOG_INFO("Attempting AF_XDP reader on %s queue %u", ifname.c_str(), queue);
@@ -930,10 +930,17 @@ bool XdpReader::open(const std::string& ifname, unsigned queue) {
 
     const bool shared_reader_already_open = impl.shared_attach->refs > 0;
     bool pins_ok = false;
-    if (shared_reader_already_open && open_maps_from_pins()) {
-        pins_ok = true;
+    if (shared_reader_already_open) {
+        if (!open_maps_from_pins()) {
+            TCPLOG_ERROR("Shared AF_XDP maps are unavailable for %s queue %u; "
+                         "ensure bpffs pins remain accessible while using "
+                         "multiple queues.",
+                         ifname.c_str(), queue);
+            cleanup();
+            return false;
+        }
         rs.pinned_maps = true;
-        rs.xdp_flags = impl.shared_attach->xdp_flags;
+        pins_ok = true;
     } else if (impl.tuning.reuse_pins && open_maps_from_pins()) {
         bool stale_pins = false;
         bpf_map_info conf_info{};
@@ -988,11 +995,6 @@ bool XdpReader::open(const std::string& ifname, unsigned queue) {
             pins_ok = true;
             rs.pinned_maps = true;
         }
-    } else if (shared_reader_already_open) {
-        TCPLOG_ERROR("Pinned AF_XDP maps are not available for shared queue startup on %s.",
-                     ifname.c_str());
-        cleanup();
-        return false;
     }
 
     if (!pins_ok) {
@@ -1104,27 +1106,18 @@ bool XdpReader::open(const std::string& ifname, unsigned queue) {
 
     // Real match rules are deferred to the last worker.
     //
-    // Why: worker setup is serialised through shared_attach->mutex and
-    // takes ~80-100 ms per worker (UMEM alloc + bind + fill-ring prime).
-    // With queue_count=63 that's a 5+ second startup window. If worker 0
-    // publishes the real rules during ITS open(), the BPF program starts
-    // redirecting matched packets immediately — but only xsks_map[0] is
-    // populated, so packets to queues 1..62 hit xsk_miss until each later
-    // worker registers. We saw this in the wild: after a 9k-packet burst,
-    // 2946 xsk_hit (queue 0) and 6213 xsk_miss (the rest).
+    // Why: worker setup is serialized through shared_attach->mutex and can
+    // take noticeable time per queue (UMEM alloc + bind + fill-ring prime).
+    // If worker 0 publishes the real rules during its own open(), the BPF
+    // program starts redirecting matched packets immediately while later
+    // queues still have no xsks_map entry yet.
     //
     // Fix: every worker publishes pass-only-defaults during worker 0's
     // open (so the program never blackholes), then the LAST worker swaps
     // to the real rules once every queue has registered its socket.
     //
-    // "Last worker" check: we bump refs BEFORE the check so refs reflects
-    // the total number of workers that have completed setup, including
-    // this one. With queue_count=N, the worker that observes refs == N
-    // after its own increment is the last and owns the rule swap.
-    //
-    // Transfer ownership of the attach from this reader to the shared
-    // state first so the program stays attached if this worker closes
-    // early, then bump refs and -- if we're last -- publish real rules.
+    // "Last worker" check: refs is bumped after this worker finishes setup,
+    // so the worker that observes refs == queue_count owns the real-rule swap.
     if (rs.attached) {
         impl.shared_attach->attached = true;
         impl.shared_attach->ifindex = rs.ifindex;
diff --git a/src/net/PacketParser.cpp b/src/net/PacketParser.cpp
index e889a16..41239ea 100644
--- a/src/net/PacketParser.cpp
+++ b/src/net/PacketParser.cpp
@@ -170,6 +170,7 @@ bool PacketParser::decode(const uint8_t* frame, std::size_t length, PacketView&
     view.flow.dst   = dst;
     view.flow.sport = sport;
     view.flow.dport = dport;
+    view.flow.ip_proto = proto;
 
     view.ip_proto = proto;
 
diff --git a/src/net/TrafficMatch.cpp b/src/net/TrafficMatch.cpp
index 8c25bf8..a03858c 100644
--- a/src/net/TrafficMatch.cpp
+++ b/src/net/TrafficMatch.cpp
@@ -39,9 +39,7 @@ bool ip_matches(std::uint32_t value, const TrafficIpPrefix& prefix) {
     return (value & prefix.mask_host) == (prefix.prefix_host & prefix.mask_host);
 }
 
-bool rule_matches_flow(const TrafficMatchRule& rule, const FlowKey& key) {
-    if (!rule.enabled) return false;
-
+bool rule_matches_endpoints(const TrafficMatchRule& rule, const FlowKey& key) {
     if (rule.src_ip && !ip_matches(key.src, *rule.src_ip)) return false;
     if (rule.dst_ip && !ip_matches(key.dst, *rule.dst_ip)) return false;
 
@@ -51,8 +49,16 @@ bool rule_matches_flow(const TrafficMatchRule& rule, const FlowKey& key) {
     return true;
 }
 
+bool rule_matches_flow(const TrafficMatchRule& rule, const FlowKey& key) {
+    if (!rule.enabled) return false;
+    if (!rule_matches_endpoints(rule, key)) return false;
+    if (rule.ip_proto && key.ip_proto != *rule.ip_proto) return false;
+    return true;
+}
+
 bool rule_matches_packet(const TrafficMatchRule& rule, const PacketView& packet) {
-    if (!rule_matches_flow(rule, packet.flow)) return false;
+    if (!rule.enabled) return false;
+    if (!rule_matches_endpoints(rule, packet.flow)) return false;
     if (rule.ip_proto && packet.ip_proto != *rule.ip_proto) return false;
     return true;
 }
diff --git a/src/penny/flow/engine/FlowEngine.cpp b/src/penny/flow/engine/FlowEngine.cpp
index df645b5..c579495 100644
--- a/src/penny/flow/engine/FlowEngine.cpp
+++ b/src/penny/flow/engine/FlowEngine.cpp
@@ -4,6 +4,7 @@
 #include "openpenny/penny/flow/engine/FlowEvaluation.h"
 #include "openpenny/app/core/OpenpennyPipelineDriver.h"
 #include "openpenny/app/core/PerThreadStats.h"
+#include "openpenny/app/core/RuntimeSetup.h"
 #include "openpenny/log/Log.h"
 #include "openpenny/app/core/utils/FlowDebug.h"
 
@@ -33,6 +34,35 @@ void FlowEngine::set_drop_sink(DropSnapshotSink sink) {
     drop_sink_ = std::move(sink);
 }
 
+void FlowEngine::set_snapshot_refresh_sink(SnapshotRefreshSink sink) {
+    snapshot_refresh_sink_ = std::move(sink);
+}
+
+void FlowEngine::publish_snapshot_refresh(std::size_t start_index) {
+    if (!snapshot_refresh_sink_) {
+        return;
+    }
+    if (start_index >= flow_drop_snapshots_.size()) {
+        return;
+    }
+    snapshot_refresh_sink_(flow_key_, flow_drop_snapshots_, start_index);
+}
+
+void FlowEngine::publish_single_snapshot_update(PacketDropId packet_id,
+                                                std::size_t snapshot_index) {
+    if (snapshot_refresh_sink_) {
+        publish_snapshot_refresh(snapshot_index);
+        return;
+    }
+    if (!drop_sink_) {
+        return;
+    }
+    if (snapshot_index >= flow_drop_snapshots_.size()) {
+        return;
+    }
+    drop_sink_(flow_key_, packet_id, flow_drop_snapshots_[snapshot_index].second);
+}
+
 void FlowEngine::reset() {
     ThreadFlowEventTimerManager::instance().purge_flow(this);
     flow_drops_enforced_ = 0;
@@ -215,15 +245,20 @@ void FlowEngine::register_duplicate_snapshot(uint32_t seq) {
     // Snanpshots are ordered by insertion; once we find the first snapshot whose coverage
     // includes this seq (highest_seq >= seq), all later snapshots should reflect the duplicate.
     bool update = false;
+    std::size_t first_updated_index = flow_drop_snapshots_.size();
     for (size_t i = 0; i < flow_drop_snapshots_.size(); ++i) {
         auto& snap = flow_drop_snapshots_[i].second;
         if (!update && snap.stats.highest_seq() >= seq) {
             update = true;
+            first_updated_index = i;
         }
         if (update) {
             snap.stats.record_duplicate_packet();
         }
     }
+    if (update) {
+        publish_snapshot_refresh(first_updated_index);
+    }
 }
 
 void FlowEngine::evaluate_snapshot_duplicate_threshold() {
@@ -273,8 +308,7 @@ bool FlowEngine::drop_packet(uint32_t start,
     }
 
     if (max_drops_in_aggregates > 0) {
-        const auto& runtime = openpenny::current_runtime_setup();
-        if (runtime.aggregates_active &&
+        if (openpenny::current_aggregates_active() &&
             !openpenny::app::try_reserve_aggregate_drop(max_drops_in_aggregates)) {
             // Best-effort global drop budget has been exhausted. The atomic
             // per-worker counters may still allow a small overshoot under
@@ -319,7 +353,8 @@ bool FlowEngine::drop_packet(uint32_t start,
     const size_t snapshot_index = flow_drop_snapshots_.size() - 1;
     flow_snapshot_index_by_id_[packet_id] = snapshot_index;
     
-    // Timer thread will later emit a callback (via ThreadFlowEventTimerManager) that we apply on this thread.
+    // The owning worker thread will later drain this scheduled timeout/event
+    // via ThreadFlowEventTimerManager and apply the callback inline.
     ThreadFlowEventTimerManager::instance().register_drop(key, packet_id, snap.timestamp, flow_alive_flag_, this, snapshot_index);
     
     // Register the gap in the SEQ space.
@@ -328,7 +363,7 @@ bool FlowEngine::drop_packet(uint32_t start,
     if (TCPLOG_ENABLED(INFO)) {
         const auto flow_tag = ::openpenny::flow_debug_details(key);
         TCPLOG_INFO(
-            "[drop] flow=%s seq_range=%" PRIu32 "-%" PRIu32 " (len=%" PRIu32 ")",
+            "[drop_event] action=drop flow=%s start_seq=%" PRIu32 " end_seq=%" PRIu32 " len=%" PRIu32,
             flow_tag.c_str(),
             start,
             end,
@@ -406,6 +441,8 @@ void FlowEngine::mark_snapshot_retransmitted(PacketDropId packet_id) {
 
     // Remove the packet → snapshot mapping; the snapshot is resolved.
     flow_snapshot_index_by_id_.erase(index_it);
+
+    publish_single_snapshot_update(packet_id, idx);
 }
 
 /**
@@ -481,9 +518,7 @@ void FlowEngine::mark_snapshot_expired(PacketDropId packet_id) {
     // We no longer need to look up this snapshot by packet ID.
     flow_snapshot_index_by_id_.erase(index_it);
 
-    if (drop_sink_) {
-        drop_sink_(flow_key_, packet_id, snapshot);
-    }
+    publish_single_snapshot_update(packet_id, idx);
 }
 
 /**
@@ -531,6 +566,8 @@ void FlowEngine::mark_snapshot_invalid(PacketDropId packet_id) {
 
     // Adjust flow-wide pending retransmission statistics.
     flow_stats_.dec_pending_retransmission();
+    auto& counters = openpenny::app::current_thread_counters();
+    if (counters.pending_retransmissions > 0) counters.pending_retransmissions--;
 
     // Ensure snapshots recorded after this one remain statistically consistent.
     // They may still include this packet as pending, so remove that dependency.
@@ -545,6 +582,8 @@ void FlowEngine::mark_snapshot_invalid(PacketDropId packet_id) {
 
     // Remove the packet → snapshot index mapping; this snapshot is now resolved.
     flow_snapshot_index_by_id_.erase(index_it);
+
+    publish_single_snapshot_update(packet_id, idx);
 }
 
 void FlowEngine::expire_all_pending_snapshots() {
@@ -560,6 +599,33 @@ void FlowEngine::expire_all_pending_snapshots() {
     }
 }
 
+void FlowEngine::resolve_pending_snapshots(const std::chrono::steady_clock::time_point& now) {
+    std::vector<PacketDropId> expired_ids;
+    std::vector<PacketDropId> invalid_ids;
+    expired_ids.reserve(flow_drop_snapshots_.size());
+    invalid_ids.reserve(flow_drop_snapshots_.size());
+
+    const bool timeout_enabled = flow_cfg_.rtt_timeout_factor > 0.0;
+    const auto timeout = std::chrono::duration_cast<std::chrono::steady_clock::duration>(
+        std::chrono::duration<double>(flow_cfg_.rtt_timeout_factor));
+
+    for (const auto& pair : flow_drop_snapshots_) {
+        if (pair.second.state != SnapshotState::Pending) continue;
+        if (timeout_enabled && now - pair.second.timestamp >= timeout) {
+            expired_ids.push_back(pair.first);
+        } else {
+            invalid_ids.push_back(pair.first);
+        }
+    }
+
+    for (const auto& id : expired_ids) {
+        mark_snapshot_expired(id);
+    }
+    for (const auto& id : invalid_ids) {
+        mark_snapshot_invalid(id);
+    }
+}
+
 
 FlowEngine::FlowDecision FlowEngine::evaluate() const {
     const auto eval = evaluate_flow_decision(
@@ -575,11 +641,27 @@ FlowEngine::FlowDecision FlowEngine::evaluate() const {
         const double miss_prob = std::clamp(flow_cfg_.retransmission_miss_probability, 0.0, 1.0);
 
         const auto flow_tag = flow_debug_details(flow_key_);
+        const auto* verdict_text = [&]() -> const char* {
+            switch (eval.decision) {
+                case FlowDecision::FINISHED_CLOSED_LOOP:
+                    return "closed_loop";
+                case FlowDecision::FINISHED_NOT_CLOSED_LOOP:
+                    return "not_closed_loop";
+                case FlowDecision::FINISHED_DUPLICATE_EXCEEDED:
+                    return "duplicates_exceeded";
+                case FlowDecision::FINISHED_NO_DECISION:
+                    return "no_decision";
+                case FlowDecision::PENDING:
+                default:
+                    return "pending";
+            }
+        }();
 
         TCPLOG_INFO(
-            "[flow_eval] flow=%s data_pkts=%llu dup_pkts=%llu rtx_pkts=%llu non_rtx_pkts=%llu "
-            "dup_ratio=%.6f miss_prob=%.6f p_closed=%.6f p_not_closed=%.6f denom=%.6f closed_weight=%.6f",
+            "[flow_eval] flow=%s verdict=%s data=%llu dup=%llu rtx=%llu non_rtx=%llu "
+            "dup_ratio=%.6f miss_prob=%.6f p_closed=%.6f p_not_closed=%.6f closed_weight=%.6f",
             flow_tag.c_str(),
+            verdict_text,
             static_cast<unsigned long long>(data_pkts),
             static_cast<unsigned long long>(dup_pkts),
             static_cast<unsigned long long>(retransmitted),
@@ -588,7 +670,6 @@ FlowEngine::FlowDecision FlowEngine::evaluate() const {
             miss_prob,
             eval.p_closed,
             eval.p_not_closed,
-            eval.p_closed + eval.p_not_closed,
             eval.closed_weight);
     }
 
@@ -611,6 +692,16 @@ void FlowEngine::evaluate_if_ready() {
         return; // Decision already made; keep it.
     }
 
+    const bool aggregate_phase_configured =
+        flow_cfg_.aggregates_enabled &&
+        flow_cfg_.max_drops_aggregates > 0;
+    const auto aggregates_status = openpenny::current_aggregates_status();
+    if (aggregate_phase_configured &&
+        aggregates_status != RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP &&
+        aggregates_status != RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED) {
+        return;
+    }
+
     // Do not evaluate if we have not observed any data packets; the classifier
     // requires data-bearing evidence.
     if (flow_stats_.data_packets() == 0) {
diff --git a/src/penny/flow/manager/ThreadFlowManager.cpp b/src/penny/flow/manager/ThreadFlowManager.cpp
index c5703a2..417e8f8 100644
--- a/src/penny/flow/manager/ThreadFlowManager.cpp
+++ b/src/penny/flow/manager/ThreadFlowManager.cpp
@@ -9,16 +9,27 @@ namespace openpenny::penny {
 
 ThreadFlowManager::ThreadFlowManager() = default;
 
-ThreadFlowManager::ThreadFlowManager(const Config::ActiveConfig& cfg) : table_cfg_(cfg) {}
+ThreadFlowManager::ThreadFlowManager(const Config::ActiveConfig& cfg) : table_cfg_(cfg) {
+    reserve_for_config(cfg);
+}
 
 void ThreadFlowManager::configure(const Config::ActiveConfig& cfg) {
     table_cfg_ = cfg;
+    reserve_for_config(cfg);
     for (auto& [_, entry] : table_active_flows_) {
         entry.flow.configure(table_cfg_);
         entry.flow.set_drop_sink(drop_sink_);
+        entry.flow.set_snapshot_refresh_sink(snapshot_refresh_sink_);
     }
 }
 
+void ThreadFlowManager::reserve_for_config(const Config::ActiveConfig& cfg) {
+    if (cfg.max_tracked_flows == 0) return;
+
+    table_active_flows_.reserve(cfg.max_tracked_flows);
+    table_completed_flows_.reserve(cfg.max_tracked_flows);
+}
+
 void ThreadFlowManager::set_drop_sink(FlowEngine::DropSnapshotSink sink) {
     drop_sink_ = std::move(sink);
     for (auto& [_, entry] : table_active_flows_) {
@@ -26,28 +37,35 @@ void ThreadFlowManager::set_drop_sink(FlowEngine::DropSnapshotSink sink) {
     }
 }
 
-bool ThreadFlowManager::add_new_flow(const FlowKey& key,
-                                  uint32_t seq,
-                                  uint32_t payload_bytes,
-                                  bool is_syn,
-                                  const std::chrono::steady_clock::time_point& ts) {
-    
+void ThreadFlowManager::set_snapshot_refresh_sink(FlowEngine::SnapshotRefreshSink sink) {
+    snapshot_refresh_sink_ = std::move(sink);
+    for (auto& [_, entry] : table_active_flows_) {
+        entry.flow.set_snapshot_refresh_sink(snapshot_refresh_sink_);
+    }
+}
+
+FlowEngineEntry* ThreadFlowManager::add_new_flow(const FlowKey& key,
+                                                 uint32_t seq,
+                                                 uint32_t payload_bytes,
+                                                 bool is_syn,
+                                                 const std::chrono::steady_clock::time_point& ts) {
     // Ignore ACK packets with no payload when deciding whether to start monitoring a new flow.
     if (payload_bytes == 0 && !is_syn) {
-        return false;
+        return nullptr;
     }
 
     // try_emplace: insert a new entry if the key is absent, otherwise return the existing one without extra copies.
     auto [it, inserted] = table_active_flows_.try_emplace(key);
     auto& entry = it->second;
     if (!inserted) {
-        return false;
+        return nullptr;
     }
     auto& counters = openpenny::app::current_thread_counters();
     counters.flows_monitored++;
     counters.active_flows++;
     entry.flow.configure(table_cfg_); // apply current config for counters/thresholds
     entry.flow.set_drop_sink(drop_sink_);
+    entry.flow.set_snapshot_refresh_sink(snapshot_refresh_sink_);
     entry.flow.set_flow_key(key); // stash identifiers once
     entry.last_seen = ts;
     entry.first_seen = ts;
@@ -61,7 +79,7 @@ bool ThreadFlowManager::add_new_flow(const FlowKey& key,
         (void)end_seq; // end_seq retained for potential future use
     }
     entry.flow.record_packet(); // count the first packet
-    return true;
+    return &entry;
 }
 
 void ThreadFlowManager::track_packet(const ::openpenny::net::PacketView& packet,
@@ -71,20 +89,20 @@ void ThreadFlowManager::track_packet(const ::openpenny::net::PacketView& packet,
     const auto now = ts;
 
     auto it = table_active_flows_.find(packet.flow);
+    FlowEngineEntry* new_entry = nullptr;
     if (it == table_active_flows_.end()) {
         if (max_flows != 0 && active_flow_count(max_flows) >= max_flows) {
             return;
         }
-        add_new_flow(packet.flow,
-                     packet.tcp.seq,
-                     static_cast<uint32_t>(packet.payload_bytes),
-                     is_syn,
-                     now);
-        it = table_active_flows_.find(packet.flow);
+        new_entry = add_new_flow(packet.flow,
+                                 packet.tcp.seq,
+                                 static_cast<uint32_t>(packet.payload_bytes),
+                                 is_syn,
+                                 now);
+        if (!new_entry) return;
     }
-    if (it == table_active_flows_.end()) return;
 
-    auto& entry = it->second;
+    auto& entry = (it != table_active_flows_.end()) ? it->second : *new_entry;
     auto& flow = entry.flow;
     entry.last_seen = now;
     // Flow starts in PENDING_SEEN_DATA when we first see payload without SYN.
@@ -156,16 +174,16 @@ bool ThreadFlowManager::complete_flow(const FlowKey& key, const char* reason) {
     const auto* test_status_text = [] (FlowEngine::FlowDecision status) -> const char* {
         switch (status) {
             case FlowEngine::FlowDecision::FINISHED_CLOSED_LOOP:
-                return "FINISHED_CLOSED_LOOP";
+                return "closed_loop";
             case FlowEngine::FlowDecision::FINISHED_NOT_CLOSED_LOOP:
-                return "FINISHED_NOT_CLOSED_LOOP";
+                return "not_closed_loop";
             case FlowEngine::FlowDecision::FINISHED_DUPLICATE_EXCEEDED:
-                return "FINISHED_DUPLICATE_EXCEEDED";
+                return "duplicates_exceeded";
             case FlowEngine::FlowDecision::FINISHED_NO_DECISION:
-                return "FINISHED_NO_DECISION";
+                return "no_decision";
             case FlowEngine::FlowDecision::PENDING:
             default:
-                return "PENDING";
+                return "pending";
         }
     }(flow.final_decision());
 
@@ -173,9 +191,9 @@ bool ThreadFlowManager::complete_flow(const FlowKey& key, const char* reason) {
         const auto flow_tag = flow_debug_details(key);
 
         TCPLOG_INFO(
-            "[flow_complete] reason=%s tcp_state=%s test_status=%s flow=%s "
-            "data_pkts=%llu dup_pkts=%llu in_order_pkts=%llu out_of_order_pkts=%llu "
-            "rtx_pkts=%llu non_rtx_pkts=%llu pending_rtx_pkts=%llu",
+            "[flow_result] stage=complete reason=%s tcp_state=%s verdict=%s flow=%s "
+            "data=%llu dup=%llu in_order=%llu out_of_order=%llu "
+            "rtx=%llu non_rtx=%llu pending_rtx=%llu",
             reason ? reason : "completed",
             tcp_state_text,
             test_status_text,
@@ -189,8 +207,8 @@ bool ThreadFlowManager::complete_flow(const FlowKey& key, const char* reason) {
             static_cast<unsigned long long>(flow.pending_retransmissions()));
     }
 
-    // Expire any remaining pending snapshots before tearing down the flow.
-    entry.flow.expire_all_pending_snapshots();
+    // Resolve any remaining pending snapshots before tearing down the flow.
+    entry.flow.resolve_pending_snapshots(std::chrono::steady_clock::now());
 
     table_completed_flows_.insert(it->first);
     table_active_flows_.erase(it);
@@ -214,7 +232,9 @@ bool ThreadFlowManager::complete_flow(const FlowKey& key, const char* reason) {
             counters.flows_not_closed_loop++;
             break;
         case FlowEngine::FlowDecision::FINISHED_DUPLICATE_EXCEEDED:
-            counters.flows_duplicates_exceeded++;
+            if (entry.state != FlowTrackingState::INTERRUPTED_DUPLICATE_EXCEEDED) {
+                counters.flows_duplicates_exceeded++;
+            }
             break;
         default:
             break;
diff --git a/src/penny/flow/timer/ThreadFlowEventTimer.cpp b/src/penny/flow/timer/ThreadFlowEventTimer.cpp
index 7fef6d7..c02408b 100644
--- a/src/penny/flow/timer/ThreadFlowEventTimer.cpp
+++ b/src/penny/flow/timer/ThreadFlowEventTimer.cpp
@@ -8,8 +8,8 @@
  * Design principles:
  *   1. Expirations are prioritised to ensure snapshots age out promptly.
  *   2. Flow mutation never happens while holding internal locks.
- *   3. All callbacks execute in the timer thread itself to avoid
- *      cross-thread data races.
+ *   3. All callbacks execute on the owning worker thread when it drains
+ *      this manager, avoiding per-queue helper-thread context switches.
  *   4. Cancelled events are garbage collected lazily using a token heap.
  */
 
@@ -32,11 +32,8 @@ ThreadFlowEventTimerManager& ThreadFlowEventTimerManager::instance() {
     return mgr;
 }
 
-std::function<void(FlowEngine*, PacketDropId, ThreadFlowEventTimerManager::SnapshotEventKind)>
-    ThreadFlowEventTimerManager::snapshot_hook_{};
-
 ThreadFlowEventTimerManager::~ThreadFlowEventTimerManager() {
-    stop(); // Ensure the timer thread is terminated cleanly.
+    stop(); // Ensure the worker-local timer state is flushed cleanly.
 }
 
 // -----------------------------------------------------------------------------
@@ -46,40 +43,25 @@ ThreadFlowEventTimerManager::~ThreadFlowEventTimerManager() {
 void ThreadFlowEventTimerManager::start(double timeout_sec) {
     std::lock_guard<std::mutex> lock(mutex_);
     timeout_sec_ = timeout_sec;
-
-    if (running_) return; // Prevent multiple timer threads from starting.
-
-    stop_flag_ = false;
+    if (running_) return;
     running_ = true;
-    thread_ = std::thread(&ThreadFlowEventTimerManager::timer_loop, this); // Spawn background timer loop.
+    next_deadline_.store(kNoDeadline, std::memory_order_release);
+    queued_event_count_.store(0, std::memory_order_release);
 }
 
 void ThreadFlowEventTimerManager::stop() {
-    {
-        std::lock_guard<std::mutex> lock(mutex_);
-        if (!running_) return; // No action needed if thread is not running.
-        stop_flag_ = true;
-    }
-
-    cv_.notify_all(); // Wake sleeping thread so it can terminate.
-
-    if (thread_.joinable()) {
-        thread_.join(); // Wait for graceful thread shutdown.
-    }
-
-    // Reset all internal state after stopping.
-    {
-        std::lock_guard<std::mutex> lock(mutex_);
-        running_ = false;
-        heap_ = {};
-        by_id_.clear();
-        by_flow_.clear();
-        cancelled_.clear();
-        retransmit_seen_.clear();
-        events_.clear();
-        callbacks_.clear();
-        next_token_ = 1;
-    }
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (!running_) return;
+    running_ = false;
+    heap_ = {};
+    by_id_.clear();
+    by_flow_.clear();
+    cancelled_.clear();
+    retransmit_seen_.clear();
+    events_.clear();
+    queued_event_count_.store(0, std::memory_order_release);
+    next_deadline_.store(kNoDeadline, std::memory_order_release);
+    next_token_ = 1;
 }
 
 // -----------------------------------------------------------------------------
@@ -111,8 +93,11 @@ void ThreadFlowEventTimerManager::register_drop(const ::openpenny::FlowKey& key,
     heap_.push(e); // Add to min-heap ordered by nearest expiry first.
     by_id_[PacketKey{flow, packet_id}] = e; // Register lookup by (flow, packet_id).
     by_flow_.emplace(flow, e.token); // Track token association to flow.
-
-    wake_locked(); // Wake timer thread to re-evaluate scheduling.
+    const auto deadline = e.deadline.time_since_epoch().count();
+    const auto current = next_deadline_.load(std::memory_order_relaxed);
+    if (deadline < current) {
+        next_deadline_.store(deadline, std::memory_order_release);
+    }
 }
 
 void ThreadFlowEventTimerManager::enqueue_retransmitted(PacketDropId packet_id, FlowEngine* flow) {
@@ -121,8 +106,7 @@ void ThreadFlowEventTimerManager::enqueue_retransmitted(PacketDropId packet_id,
 
     // Queue retransmission event for later servicing.
     events_.push_back(Event{Event::Kind::Retransmit, packet_id, flow, 0});
-
-    wake_locked(); // Wake timer loop.
+    queued_event_count_.store(events_.size(), std::memory_order_release);
 }
 
 void ThreadFlowEventTimerManager::enqueue_duplicate(FlowEngine* flow, std::uint32_t seq, std::uint32_t payload) {
@@ -131,8 +115,7 @@ void ThreadFlowEventTimerManager::enqueue_duplicate(FlowEngine* flow, std::uint3
 
     // Queue duplicate detection event for later servicing.
     events_.push_back(Event{Event::Kind::Duplicate, {}, flow, seq, payload});
-
-    wake_locked(); // Wake timer loop.
+    queued_event_count_.store(events_.size(), std::memory_order_release);
 }
 
 // -----------------------------------------------------------------------------
@@ -150,28 +133,19 @@ void ThreadFlowEventTimerManager::purge_flow(FlowEngine* flow) {
     }
 
     by_flow_.erase(flow); // Remove all tokens referencing flow.
-    retransmit_seen_.erase(
-        std::remove_if(retransmit_seen_.begin(),
-                       retransmit_seen_.end(),
-                       [flow](const PacketKey& k) { return k.flow == flow; }),
-        retransmit_seen_.end()
-    );
-
-    // Remove pending callbacks that reference the purged flow.
-    callbacks_.erase(
-        std::remove_if(callbacks_.begin(), callbacks_.end(),
-                       [flow](const Callback& cb) { return cb.flow == flow; }),
-        callbacks_.end()
-    );
-    // Resync the lock-free counter with the post-erase deque size so the
-    // drain_callbacks() fast path doesn't keep firing on stale entries.
-    pending_callbacks_.store(callbacks_.size(), std::memory_order_release);
-
-    wake_locked(); // Wake timer loop to apply purge.
-}
-
-void ThreadFlowEventTimerManager::wake_locked() {
-    cv_.notify_all(); // Wake timer thread (called while holding mutex_).
+    for (auto it = retransmit_seen_.begin(); it != retransmit_seen_.end();) {
+        if (it->flow == flow) {
+            it = retransmit_seen_.erase(it);
+        } else {
+            ++it;
+        }
+    }
+    events_.erase(
+        std::remove_if(events_.begin(), events_.end(),
+                       [flow](const Event& ev) { return ev.flow == flow; }),
+        events_.end());
+    queued_event_count_.store(events_.size(), std::memory_order_release);
+    refresh_next_deadline_locked();
 }
 
 // -----------------------------------------------------------------------------
@@ -185,56 +159,52 @@ void ThreadFlowEventTimerManager::run_callbacks(std::deque<Callback>& pending) {
         // Dispatch callback by type (snapshot mutation).
         if (cb.kind == Callback::Kind::Expire) {
             cb.flow->mark_snapshot_expired(cb.packet_id);
-            if (snapshot_hook_) snapshot_hook_(cb.flow, cb.packet_id, SnapshotEventKind::Expire);
         }
         else if (cb.kind == Callback::Kind::Retransmit) {
             cb.flow->mark_snapshot_retransmitted(cb.packet_id);
-            if (snapshot_hook_) snapshot_hook_(cb.flow, cb.packet_id, SnapshotEventKind::Retransmit);
         }
         else if (cb.kind == Callback::Kind::Duplicate) {
             cb.flow->register_duplicate_snapshot(cb.seq);
             cb.flow->evaluate_snapshot_duplicate_threshold();
-            if (snapshot_hook_) snapshot_hook_(cb.flow, 0, SnapshotEventKind::Duplicate);
         }
 
         cb.flow->evaluate_if_ready(); // Re-check whether the flow now satisfies its scheduling thresholds.
     }
 }
 
-// -----------------------------------------------------------------------------
-// Timer loop (long running background scheduling thread)
-// -----------------------------------------------------------------------------
+void ThreadFlowEventTimerManager::refresh_next_deadline_locked() {
+    while (!heap_.empty() && cancelled_.count(heap_.top().token)) {
+        cancelled_.erase(heap_.top().token);
+        heap_.pop();
+    }
 
-void ThreadFlowEventTimerManager::timer_loop() {
-    std::unique_lock<std::mutex> lock(mutex_);
+    if (heap_.empty()) {
+        next_deadline_.store(kNoDeadline, std::memory_order_release);
+    } else {
+        next_deadline_.store(
+            heap_.top().deadline.time_since_epoch().count(),
+            std::memory_order_release);
+    }
+}
 
+void ThreadFlowEventTimerManager::collect_ready_callbacks(
+    std::deque<Callback>& pending,
+    const std::chrono::steady_clock::time_point& now) {
     while (true) {
-        if (stop_flag_) break; // Stop signal received.
-
-        const auto now = std::chrono::steady_clock::now();
-
-        // Remove stale cancelled entries at the top of the heap.
-        while (!heap_.empty() && cancelled_.count(heap_.top().token)) {
-            cancelled_.erase(heap_.top().token);
-            heap_.pop();
-        }
-
+        refresh_next_deadline_locked();
         bool processed_item = false;
 
-        // 1) Process the next expiry if it is due.
         if (!heap_.empty() && now >= heap_.top().deadline) {
             auto entry = heap_.top();
             heap_.pop();
 
-            // Remove entry from lookup maps if not already invalidated.
             auto id_it = by_id_.find(PacketKey{entry.flow, entry.packet_id});
             if (id_it != by_id_.end() && id_it->second.token == entry.token) {
                 by_id_.erase(id_it);
             }
 
-            // Remove only the token that matches this entry for the given flow.
             auto range = by_flow_.equal_range(entry.flow);
-            for (auto it = range.first; it != range.second; ) {
+            for (auto it = range.first; it != range.second;) {
                 if (it->second == entry.token) {
                     it = by_flow_.erase(it);
                     break;
@@ -243,47 +213,34 @@ void ThreadFlowEventTimerManager::timer_loop() {
                 }
             }
 
-            // Ensure we only schedule snapshot mutation if the flow is still alive.
             if (auto alive = entry.flow_alive.lock(); alive && *alive && entry.flow) {
                 if (TCPLOG_ENABLED(INFO)) {
                     const auto packet_id_text = format_packet_drop_id(entry.packet_id);
                     TCPLOG_INFO("[packet_expired] flow=%s packet_id=%s token=%" PRIu64,
-                        flow_debug_details(entry.flow->flow_key()).c_str(),
-                        packet_id_text.c_str(),
-                        entry.token
-                    );
+                                flow_debug_details(entry.flow->flow_key()).c_str(),
+                                packet_id_text.c_str(),
+                                entry.token);
                 }
-
-                // Schedule expiration callback for lock-free handling.
-                callbacks_.push_back(Callback{
-                    Callback::Kind::Expire, entry.packet_id, entry.flow, 0
-                });
-                pending_callbacks_.fetch_add(1, std::memory_order_release);
+                pending.push_back(
+                    Callback{Callback::Kind::Expire, entry.packet_id, entry.flow, 0});
             }
 
             processed_item = true;
-        }
-
-        // 2) If no expiration was ready, service one queued event.
-        else if (!events_.empty()) {
+        } else if (!events_.empty()) {
             auto ev = events_.front();
             events_.pop_front();
+            queued_event_count_.store(events_.size(), std::memory_order_release);
 
             if (ev.kind == Event::Kind::Retransmit && ev.flow) {
                 auto it = by_id_.find(PacketKey{ev.flow, ev.packet_id});
                 if (it != by_id_.end()) {
                     const auto token = it->second.token;
-
-                    // Skip duplicate retransmit handling for the same flow/packet_id.
                     const PacketKey key{ev.flow, ev.packet_id};
-                    if (std::find(retransmit_seen_.begin(), retransmit_seen_.end(), key) != retransmit_seen_.end()) {
+                    const auto [_, inserted] = retransmit_seen_.insert(key);
+                    if (!inserted) {
                         processed_item = true;
                         continue;
                     }
-                    retransmit_seen_.push_back(key);
-
-                    // If we've already cancelled this token (due to an earlier
-                    // retransmit event), skip duplicate handling/logging.
                     if (cancelled_.find(token) != cancelled_.end()) {
                         processed_item = true;
                         continue;
@@ -293,88 +250,55 @@ void ThreadFlowEventTimerManager::timer_loop() {
 
                     if (TCPLOG_ENABLED(INFO)) {
                         const auto packet_id_text = format_packet_drop_id(ev.packet_id);
-                        TCPLOG_INFO("[packet_retransmitted] flow=%s packet_id=%s seq=%" PRIu32,
+                        TCPLOG_INFO(
+                            "[drop_event] action=retransmitted flow=%s packet_id=%s seq=%" PRIu32,
                             flow_debug_details(ev.flow->flow_key()).c_str(),
                             packet_id_text.c_str(),
-                            ev.seq
-                        );
+                            ev.seq);
                     }
 
-                    callbacks_.push_back(Callback{
-                        Callback::Kind::Retransmit, ev.packet_id, it->second.flow, 0
-                    });
-                    pending_callbacks_.fetch_add(1, std::memory_order_release);
+                    pending.push_back(
+                        Callback{Callback::Kind::Retransmit, ev.packet_id, it->second.flow, 0});
                 }
-            }
-            else if (ev.kind == Event::Kind::Duplicate && ev.flow) {
+            } else if (ev.kind == Event::Kind::Duplicate && ev.flow) {
                 if (TCPLOG_ENABLED(DEBUG)) {
                     TCPLOG_DEBUG("[duplicate_detected] flow=%s seq=%" PRIu32 " payload=%u",
-                        flow_debug_details(ev.flow->flow_key()).c_str(),
-                        ev.seq,
-                        ev.payload);
+                                 flow_debug_details(ev.flow->flow_key()).c_str(),
+                                 ev.seq,
+                                 ev.payload);
                 }
-
-                callbacks_.push_back(Callback{
-                    Callback::Kind::Duplicate, {}, ev.flow, ev.seq
-                });
-                pending_callbacks_.fetch_add(1, std::memory_order_release);
+                pending.push_back(Callback{Callback::Kind::Duplicate, {}, ev.flow, ev.seq});
             }
 
             processed_item = true;
         }
 
-        // 2.5) Run callbacks immediately if any were produced.
-        if (processed_item && !callbacks_.empty()) {
-            std::deque<Callback> pending;
-            pending.swap(callbacks_); // Extract callbacks without copying.
-
-            lock.unlock();
-            run_callbacks(pending); // Execute snapshot mutations in lock-free mode.
-            lock.lock();
-
-            continue; // Re-evaluate loop state after callback execution.
-        }
-
-        if (processed_item) continue;
-
-        // 3) No action needed right now: sleep until the next expiry or event wake.
-        if (!heap_.empty() && timeout_sec_ > 0.0) {
-            cv_.wait_until(lock, heap_.top().deadline, [&] {
-                return stop_flag_ || !events_.empty();
-            });
-        } else {
-            cv_.wait(lock, [&] {
-                return stop_flag_ || !events_.empty() ||
-                       (!heap_.empty() && timeout_sec_ > 0.0);
-            });
+        if (!processed_item) {
+            refresh_next_deadline_locked();
+            return;
         }
     }
 }
 
 void ThreadFlowEventTimerManager::drain_callbacks() {
-    // Lock-free fast path. drain_callbacks() is called from every worker's
-    // before_poll() — i.e. potentially millions of times per second across
-    // busy-polling AF_XDP workers. Acquiring mutex_ on every call serialises
-    // the hot path on a single global lock; with many workers this becomes
-    // the dominant bottleneck. Skip the lock entirely when no callbacks
-    // are queued, which is the overwhelming common case.
-    if (pending_callbacks_.load(std::memory_order_acquire) == 0) {
-        return;
+    const auto now = std::chrono::steady_clock::now();
+    if (queued_event_count_.load(std::memory_order_acquire) == 0) {
+        const auto next_deadline = next_deadline_.load(std::memory_order_acquire);
+        if (next_deadline == kNoDeadline ||
+            now.time_since_epoch().count() < next_deadline) {
+            return;
+        }
     }
 
     std::deque<Callback> pending;
     {
         std::lock_guard<std::mutex> lock(mutex_);
-        pending.swap(callbacks_);
-        pending_callbacks_.store(0, std::memory_order_release);
+        if (!running_) {
+            return;
+        }
+        collect_ready_callbacks(pending, now);
     }
     run_callbacks(pending);
 }
 
-void ThreadFlowEventTimerManager::set_snapshot_hook(std::function<void(FlowEngine*,
-                                                                       PacketDropId,
-                                                                       SnapshotEventKind)> hook) {
-    snapshot_hook_ = std::move(hook);
-}
-
 } // namespace openpenny::penny
diff --git a/tests/unit/flow/test_aggregate_duplicate_fallback.cpp b/tests/unit/flow/test_aggregate_duplicate_fallback.cpp
new file mode 100644
index 0000000..2449e60
--- /dev/null
+++ b/tests/unit/flow/test_aggregate_duplicate_fallback.cpp
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+#include "openpenny/app/core/AggregatesController.h"
+#include "openpenny/app/core/DropCollectorBinding.h"
+#include "openpenny/app/core/PerThreadStats.h"
+#include "openpenny/app/core/RuntimeSetup.h"
+#include "openpenny/config/Config.h"
+
+#include <atomic>
+#include <cassert>
+#include <chrono>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <thread>
+
+namespace {
+
+openpenny::FlowKey make_key() {
+    openpenny::FlowKey key{};
+    key.src = 0x0a000011;
+    key.dst = 0x0a000012;
+    key.sport = 2222;
+    key.dport = 5201;
+    key.ip_proto = 6;
+    return key;
+}
+
+openpenny::penny::PacketDropSnapshot make_duplicate_exceeded_snapshot() {
+    openpenny::penny::PacketDropSnapshot snap{};
+    snap.timestamp = std::chrono::steady_clock::now();
+    snap.state = openpenny::penny::SnapshotState::Expired;
+    for (int i = 0; i < 10; ++i) {
+        snap.stats.record_data_packet();
+        snap.stats.record_droppable_packet();
+    }
+    for (int i = 0; i < 2; ++i) {
+        snap.stats.record_duplicate_packet();
+    }
+    snap.stats.record_drop();
+    snap.stats.inc_non_retransmitted();
+    return snap;
+}
+
+} // namespace
+
+int main() {
+    openpenny::app::init_thread_counters(1);
+    openpenny::app::set_thread_counter_index(0);
+
+    openpenny::Config cfg;
+    cfg.active.aggregates_enabled = true;
+    cfg.active.max_drops_aggregates = 1;
+    cfg.active.max_duplicate_fraction = 0.1;
+    cfg.active.retransmission_miss_probability = 0.0;
+    cfg.active.min_closed_loop_flows = 0;
+
+    openpenny::PipelineOptions opts{};
+    opts.mode = openpenny::PipelineOptions::Mode::Active;
+
+    openpenny::set_runtime_setup(cfg, opts, false, false);
+    auto& runtime = openpenny::runtime_setup_mutable();
+    runtime.aggregates_status = openpenny::RuntimeStatus::AggregatesStatus::PENDING;
+    runtime.aggregate_eval_counters = {};
+    runtime.has_aggregate_eval = false;
+    runtime.aggregates_active = true;
+
+    std::atomic<bool> stop_flag{false};
+    auto collector = std::make_shared<openpenny::DropCollector>(1);
+    openpenny::AggregatesController controller(
+        cfg,
+        opts,
+        collector,
+        stop_flag,
+        std::function<bool()>{});
+    controller.start();
+
+    auto& counters = openpenny::app::current_thread_counters();
+    counters.droppable_packets = 10;
+    counters.data_packets = 10;
+    counters.duplicate_packets = 2;
+    counters.dropped_packets = 1;
+    counters.non_retransmitted_packets = 1;
+    counters.pending_retransmissions = 0;
+
+    openpenny::app::DropCollectorBinding::instance().upsert(
+        collector,
+        "worker-0",
+        0,
+        make_key(),
+        openpenny::penny::make_packet_drop_id(2000, 100),
+        make_duplicate_exceeded_snapshot());
+
+    const auto deadline = std::chrono::steady_clock::now() + std::chrono::seconds(1);
+    while (runtime.aggregates_status == openpenny::RuntimeStatus::AggregatesStatus::PENDING &&
+           std::chrono::steady_clock::now() < deadline) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    }
+
+    assert(runtime.aggregates_status ==
+           openpenny::RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED);
+    assert(runtime.has_aggregate_eval);
+    assert(runtime.aggregate_eval_counters.data_packets == 10);
+    assert(runtime.aggregate_eval_counters.duplicate_packets == 2);
+    assert(!runtime.aggregates_active);
+    assert(!collector->accepting.load(std::memory_order_relaxed));
+    assert(!controller.collector_completed());
+    assert(!stop_flag.load(std::memory_order_relaxed));
+
+    stop_flag.store(true, std::memory_order_relaxed);
+    controller.join();
+    return 0;
+}
diff --git a/tests/unit/flow/test_aggregate_freeze_at_drop_limit.cpp b/tests/unit/flow/test_aggregate_freeze_at_drop_limit.cpp
new file mode 100644
index 0000000..f3e5e37
--- /dev/null
+++ b/tests/unit/flow/test_aggregate_freeze_at_drop_limit.cpp
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+#include "openpenny/app/core/AggregatesController.h"
+#include "openpenny/app/core/DropCollectorBinding.h"
+#include "openpenny/app/core/PerThreadStats.h"
+#include "openpenny/app/core/RuntimeSetup.h"
+#include "openpenny/config/Config.h"
+
+#include <atomic>
+#include <cassert>
+#include <chrono>
+#include <cstdint>
+#include <functional>
+#include <memory>
+
+namespace {
+
+openpenny::FlowKey make_key(std::uint16_t sport) {
+    openpenny::FlowKey key{};
+    key.src = 0x0a000001;
+    key.dst = 0x0a000002;
+    key.sport = sport;
+    key.dport = 5201;
+    key.ip_proto = 6;
+    return key;
+}
+
+openpenny::penny::PacketDropSnapshot make_pending_snapshot() {
+    openpenny::penny::PacketDropSnapshot snap{};
+    snap.timestamp = std::chrono::steady_clock::now();
+    snap.state = openpenny::penny::SnapshotState::Pending;
+    snap.stats.record_data_packet();
+    snap.stats.record_droppable_packet();
+    snap.stats.record_drop();
+    snap.stats.inc_pending_retransmission();
+    return snap;
+}
+
+} // namespace
+
+int main() {
+    openpenny::app::init_thread_counters(1);
+    openpenny::app::set_thread_counter_index(0);
+
+    openpenny::Config cfg;
+    cfg.active.aggregates_enabled = true;
+    cfg.active.max_drops_aggregates = 1;
+    cfg.active.max_duplicate_fraction = 1.0;
+    cfg.active.retransmission_miss_probability = 0.0;
+
+    openpenny::PipelineOptions opts{};
+    opts.mode = openpenny::PipelineOptions::Mode::Active;
+
+    openpenny::set_runtime_setup(cfg, opts, false, false);
+    auto& runtime = openpenny::runtime_setup_mutable();
+    runtime.aggregates_status = openpenny::RuntimeStatus::AggregatesStatus::PENDING;
+    runtime.aggregate_eval_counters = {};
+    runtime.has_aggregate_eval = false;
+    runtime.aggregates_active = true;
+
+    std::atomic<bool> stop_flag{false};
+    auto collector = std::make_shared<openpenny::DropCollector>(1);
+    openpenny::AggregatesController controller(
+        cfg,
+        opts,
+        collector,
+        stop_flag,
+        std::function<bool()>{});
+
+    auto& counters = openpenny::app::current_thread_counters();
+    counters.droppable_packets = 10;
+    counters.data_packets = 10;
+    counters.dropped_packets = 1;
+    counters.pending_retransmissions = 1;
+
+    const auto first_key = make_key(40001);
+    const auto first_id = openpenny::penny::make_packet_drop_id(1000, 100);
+    auto first_snapshot = make_pending_snapshot();
+    openpenny::app::DropCollectorBinding::instance().upsert(
+        collector,
+        "worker-0",
+        0,
+        first_key,
+        first_id,
+        first_snapshot);
+
+    counters.droppable_packets = 100;
+    counters.data_packets = 100;
+    counters.dropped_packets = 2;
+    counters.pending_retransmissions = 2;
+
+    const auto second_key = make_key(40002);
+    const auto second_id = openpenny::penny::make_packet_drop_id(2000, 100);
+    auto second_snapshot = make_pending_snapshot();
+    openpenny::app::DropCollectorBinding::instance().upsert(
+        collector,
+        "worker-0",
+        0,
+        second_key,
+        second_id,
+        second_snapshot);
+
+    assert(collector->accepted_snapshot_count.load(std::memory_order_relaxed) == 1);
+
+    counters.pending_retransmissions = 0;
+    counters.non_retransmitted_packets = 50;
+    first_snapshot.state = openpenny::penny::SnapshotState::Expired;
+    first_snapshot.stats.dec_pending_retransmission();
+    first_snapshot.stats.inc_non_retransmitted();
+    openpenny::app::DropCollectorBinding::instance().upsert(
+        collector,
+        "worker-0",
+        0,
+        first_key,
+        first_id,
+        first_snapshot);
+
+    openpenny::PipelineSummary summary;
+    controller.populate_drop_snapshots(summary);
+    assert(summary.drop_snapshots.size() == 1);
+
+    controller.evaluate_pending_if_needed(cfg, summary);
+
+    assert(runtime.aggregates_status ==
+           openpenny::RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP);
+    assert(runtime.has_aggregate_eval);
+    assert(runtime.aggregate_eval_counters.data_packets == 10);
+    assert(runtime.aggregate_eval_counters.duplicate_packets == 0);
+    assert(runtime.aggregate_eval_counters.retransmitted_packets == 0);
+    assert(runtime.aggregate_eval_counters.non_retransmitted_packets == 1);
+
+    return 0;
+}
diff --git a/tests/unit/flow/test_aggregate_pending_resolution.cpp b/tests/unit/flow/test_aggregate_pending_resolution.cpp
new file mode 100644
index 0000000..e676e7e
--- /dev/null
+++ b/tests/unit/flow/test_aggregate_pending_resolution.cpp
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+#include "openpenny/app/core/AggregatesController.h"
+#include "openpenny/app/core/PerThreadStats.h"
+#include "openpenny/app/core/RuntimeSetup.h"
+#include "openpenny/config/Config.h"
+
+#include <atomic>
+#include <cassert>
+#include <chrono>
+
+namespace {
+
+openpenny::DropSnapshotRecord make_expired_snapshot_record() {
+    openpenny::DropSnapshotRecord record{};
+    record.key.src = 0x0a000001;
+    record.key.dst = 0x0a000002;
+    record.key.sport = 1111;
+    record.key.dport = 5201;
+    record.key.ip_proto = 6;
+    record.packet_id = openpenny::penny::make_packet_drop_id(1000, 100);
+    record.snapshot.timestamp = std::chrono::steady_clock::now();
+    record.snapshot.state = openpenny::penny::SnapshotState::Expired;
+    for (int i = 0; i < 5; ++i) {
+        record.snapshot.stats.record_data_packet();
+        record.snapshot.stats.record_droppable_packet();
+    }
+    record.snapshot.stats.record_drop();
+    record.snapshot.stats.inc_non_retransmitted();
+    return record;
+}
+
+openpenny::DropSnapshotRecord make_invalid_snapshot_record() {
+    auto record = make_expired_snapshot_record();
+    record.snapshot.state = openpenny::penny::SnapshotState::Invalid;
+    record.snapshot.stats = {};
+    for (int i = 0; i < 5; ++i) {
+        record.snapshot.stats.record_data_packet();
+        record.snapshot.stats.record_droppable_packet();
+    }
+    record.snapshot.stats.record_drop();
+    return record;
+}
+
+openpenny::DropSnapshotRecord make_duplicate_exceeded_snapshot_record() {
+    openpenny::DropSnapshotRecord record{};
+    record.key.src = 0x0a000011;
+    record.key.dst = 0x0a000012;
+    record.key.sport = 2222;
+    record.key.dport = 5201;
+    record.key.ip_proto = 6;
+    record.packet_id = openpenny::penny::make_packet_drop_id(2000, 100);
+    record.snapshot.timestamp = std::chrono::steady_clock::now();
+    record.snapshot.state = openpenny::penny::SnapshotState::Expired;
+    for (int i = 0; i < 10; ++i) {
+        record.snapshot.stats.record_data_packet();
+        record.snapshot.stats.record_droppable_packet();
+    }
+    for (int i = 0; i < 2; ++i) {
+        record.snapshot.stats.record_duplicate_packet();
+    }
+    record.snapshot.stats.record_drop();
+    record.snapshot.stats.inc_non_retransmitted();
+    return record;
+}
+
+} // namespace
+
+int main() {
+    openpenny::app::init_thread_counters(1);
+    openpenny::app::set_thread_counter_index(0);
+
+    openpenny::Config cfg;
+    cfg.active.aggregates_enabled = true;
+    cfg.active.max_drops_aggregates = 1;
+    cfg.active.max_duplicate_fraction = 1.0;
+    cfg.active.retransmission_miss_probability = 0.0;
+
+    openpenny::PipelineOptions opts{};
+    opts.mode = openpenny::PipelineOptions::Mode::Active;
+
+    openpenny::set_runtime_setup(cfg, opts, false, false);
+    auto& runtime = openpenny::runtime_setup_mutable();
+    openpenny::set_current_aggregates_status(
+        openpenny::RuntimeStatus::AggregatesStatus::PENDING);
+    runtime.aggregate_eval_counters = {};
+    openpenny::set_current_has_aggregate_eval(false);
+    openpenny::set_current_aggregates_active(true);
+
+    std::atomic<bool> stop_flag{false};
+    auto collector = std::make_shared<openpenny::DropCollector>(1);
+    openpenny::AggregatesController controller(
+        cfg,
+        opts,
+        collector,
+        stop_flag,
+        std::function<bool()>{});
+
+    openpenny::PipelineSummary summary;
+    summary.drop_snapshots.push_back(make_expired_snapshot_record());
+
+    controller.evaluate_pending_if_needed(cfg, summary);
+
+    assert(runtime.aggregates_status ==
+           openpenny::RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP);
+    assert(runtime.has_aggregate_eval);
+    assert(controller.aggregates_ready());
+    assert(controller.collector_completed());
+
+    openpenny::set_current_aggregates_status(
+        openpenny::RuntimeStatus::AggregatesStatus::PENDING);
+    runtime.aggregate_eval_counters = {};
+    openpenny::set_current_has_aggregate_eval(false);
+    openpenny::set_current_aggregates_active(true);
+
+    openpenny::PipelineSummary invalid_summary;
+    invalid_summary.drop_snapshots.push_back(make_invalid_snapshot_record());
+
+    controller.evaluate_pending_if_needed(cfg, invalid_summary);
+
+    assert(runtime.aggregates_status ==
+           openpenny::RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP);
+    assert(runtime.has_aggregate_eval);
+
+    openpenny::set_current_aggregates_status(
+        openpenny::RuntimeStatus::AggregatesStatus::PENDING);
+    runtime.aggregate_eval_counters = {};
+    openpenny::set_current_has_aggregate_eval(false);
+    openpenny::set_current_aggregates_active(true);
+    cfg.active.max_duplicate_fraction = 0.1;
+
+    openpenny::PipelineSummary duplicate_summary;
+    duplicate_summary.drop_snapshots.push_back(make_duplicate_exceeded_snapshot_record());
+
+    controller.evaluate_pending_if_needed(cfg, duplicate_summary);
+
+    assert(runtime.aggregates_status ==
+           openpenny::RuntimeStatus::AggregatesStatus::DUPLICATES_EXCEEDED);
+    assert(runtime.has_aggregate_eval);
+    assert(runtime.aggregate_eval_counters.data_packets == 10);
+    assert(runtime.aggregate_eval_counters.duplicate_packets == 2);
+
+    return 0;
+}
diff --git a/tests/unit/flow/test_drop_snapshot_updates.cpp b/tests/unit/flow/test_drop_snapshot_updates.cpp
index c27d27c..0e7131d 100644
--- a/tests/unit/flow/test_drop_snapshot_updates.cpp
+++ b/tests/unit/flow/test_drop_snapshot_updates.cpp
@@ -22,9 +22,10 @@
 //
 // Synchronization caveat:
 //   `FlowEngine::register_filled_gaps()` enqueues a Retransmit event on
-//   the global `ThreadFlowEventTimerManager`; the actual mutation happens
-//   on the timer thread. The test polls until the mutation is observed
-//   so we don't race against the background thread.
+//   the thread-local `ThreadFlowEventTimerManager`; the actual mutation
+//   is applied when the worker drains callbacks. The test polls and
+//   drives that drain explicitly so the assertions observe the updated
+//   snapshots deterministically.
 
 #include "openpenny/config/Config.h"
 #include "openpenny/penny/flow/engine/FlowEngine.h"
@@ -40,15 +41,16 @@ using namespace std::chrono;
 namespace {
 
 // Wait up to `timeout` for `predicate()` to become true. Used to
-// synchronise the test thread with the FlowEngine timer thread, which
-// processes Retransmit events asynchronously.
+// synchronise the test thread with the cooperative timer manager.
 template <class Predicate>
 bool wait_for(Predicate predicate, milliseconds timeout = milliseconds{2000}) {
     const auto deadline = steady_clock::now() + timeout;
     while (steady_clock::now() < deadline) {
+        openpenny::penny::ThreadFlowEventTimerManager::instance().drain_callbacks();
         if (predicate()) return true;
         std::this_thread::sleep_for(milliseconds{5});
     }
+    openpenny::penny::ThreadFlowEventTimerManager::instance().drain_callbacks();
     return predicate();
 }
 
@@ -63,11 +65,11 @@ int main() {
     // is deterministic regardless of the random number generator state.
     cfg.active.drop_probability = 1.0;
     // Long retransmission timeout. With `now = steady_clock::now()`, the
-    // deadline = `now + 60s` lies far in the future so the timer-manager
-    // background thread's expiry path never runs during this test —
-    // only the explicit `register_filled_gaps()` events do. (The test's
-    // assertions break if `mark_snapshot_expired` runs concurrently and
-    // decrements pending on entries we haven't filled yet.)
+    // deadline = `now + 60s` lies far in the future so the cooperative
+    // expiry path never runs during this test — only the explicit
+    // `register_filled_gaps()` events do. (The test's assertions break
+    // if `mark_snapshot_expired` also runs and decrements pending on
+    // entries we haven't filled yet.)
     cfg.active.rtt_timeout_factor = 60.0;
 
     openpenny::penny::FlowEngine flow(cfg.active);
@@ -120,7 +122,7 @@ int main() {
     // Phase 2: drop1 is retransmitted (gap filled by a later packet)
     // ----------------------------------------------------------------
     // register_filled_gaps() queues a Retransmit event on the timer
-    // manager. The timer thread picks it up and calls
+    // manager. drain_callbacks() then applies
     // mark_snapshot_retransmitted on this thread's FlowEngine, which:
     //   - decrements flow_stats_.pending_retransmissions by 1,
     //   - increments flow_stats_.retransmitted_packets by 1,
@@ -130,8 +132,7 @@ int main() {
     //     decrementing its frozen pending count.
     flow.register_filled_gaps(std::vector<openpenny::penny::PacketDropId>{drop1_id});
 
-    // Wait for the timer thread to process the event before asserting.
-    // Without this, the assertions race against the background thread.
+    // Drain the queued retransmit event before asserting.
     assert(wait_for([&] { return flow.retransmitted_packets() == 1; }));
 
     // Phase 2 verification: flow-wide counters
diff --git a/tests/unit/flow/test_drop_timer.cpp b/tests/unit/flow/test_drop_timer.cpp
index df9f6b6..e7b98b0 100644
--- a/tests/unit/flow/test_drop_timer.cpp
+++ b/tests/unit/flow/test_drop_timer.cpp
@@ -4,6 +4,7 @@
 #include "openpenny/penny/flow/timer/ThreadFlowEventTimer.h"
 #include "openpenny/penny/flow/state/PennySnapshot.h"
 #include "openpenny/penny/flow/engine/FlowEngine.h"
+#include "openpenny/app/core/PerThreadStats.h"
 #include "openpenny/net/Packet.h"
 
 #include <cassert>
@@ -84,6 +85,40 @@ int main() {
         assert(flow.non_retransmitted_packets() == 0);
     }
 
+    // Timer callbacks must publish into the same per-thread counter shard as the
+    // worker that owns the flow; otherwise multi-queue aggregate pending_rtx can
+    // stay stuck forever.
+    openpenny::penny::ThreadFlowEventTimerManager::instance().stop();
+    openpenny::app::init_thread_counters(2);
+    openpenny::app::set_thread_counter_index(1);
+    {
+        openpenny::Config cfg;
+        cfg.active.drop_probability = 1.0;
+        cfg.active.rtt_timeout_factor = 0.05;
+
+        openpenny::penny::FlowEngine flow(cfg.active);
+        openpenny::FlowKey key{};
+        const auto now = std::chrono::steady_clock::now();
+        const auto packet_id = openpenny::penny::make_packet_drop_id(3000, 100);
+
+        flow.record_data(3000, now);
+        const bool dropped = flow.drop_packet(3000, 3100, packet_id, key, now);
+        assert(dropped);
+        assert(openpenny::app::aggregate_counters().pending_retransmissions == 1);
+
+        sleep_for_ms(80);
+        openpenny::penny::ThreadFlowEventTimerManager::instance().drain_callbacks();
+
+        const auto counters = openpenny::app::thread_counters();
+        assert(counters.size() >= 2);
+        assert(counters[0].pending_retransmissions == 0);
+        assert(counters[0].non_retransmitted_packets == 0);
+        assert(counters[1].pending_retransmissions == 0);
+        assert(counters[1].non_retransmitted_packets == 1);
+        assert(openpenny::app::aggregate_counters().pending_retransmissions == 0);
+        assert(openpenny::app::aggregate_counters().non_retransmitted_packets == 1);
+    }
+
     // Clean shutdown for other tests.
     openpenny::penny::ThreadFlowEventTimerManager::instance().stop();
     return 0;
diff --git a/tests/unit/flow/test_flow_evaluation_phase_gate.cpp b/tests/unit/flow/test_flow_evaluation_phase_gate.cpp
new file mode 100644
index 0000000..8259dd3
--- /dev/null
+++ b/tests/unit/flow/test_flow_evaluation_phase_gate.cpp
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+#include "openpenny/app/core/PerThreadStats.h"
+#include "openpenny/app/core/RuntimeSetup.h"
+#include "openpenny/config/Config.h"
+#include "openpenny/penny/flow/engine/FlowEngine.h"
+
+#include <cassert>
+
+int main() {
+    openpenny::app::init_thread_counters(1);
+    openpenny::app::set_thread_counter_index(0);
+
+    openpenny::Config cfg;
+    cfg.active.aggregates_enabled = true;
+    cfg.active.max_drops_aggregates = 1;
+    cfg.active.max_duplicate_fraction = 0.5;
+
+    openpenny::PipelineOptions opts{};
+    opts.mode = openpenny::PipelineOptions::Mode::Active;
+
+    openpenny::set_runtime_setup(cfg, opts, false, false);
+    openpenny::set_current_aggregates_status(
+        openpenny::RuntimeStatus::AggregatesStatus::PENDING);
+    openpenny::set_current_aggregates_active(true);
+
+    openpenny::penny::FlowEngine flow(cfg.active);
+    flow.record_data_packet();
+    flow.record_duplicate_packet();
+
+    flow.evaluate_if_ready();
+    assert(flow.final_decision() ==
+           openpenny::penny::FlowEngine::FlowDecision::PENDING);
+
+    openpenny::set_current_aggregates_status(
+        openpenny::RuntimeStatus::AggregatesStatus::CLOSED_LOOP);
+    openpenny::set_current_aggregates_active(false);
+
+    flow.evaluate_if_ready();
+    assert(flow.final_decision() ==
+           openpenny::penny::FlowEngine::FlowDecision::PENDING);
+
+    openpenny::set_current_aggregates_status(
+        openpenny::RuntimeStatus::AggregatesStatus::NON_CLOSED_LOOP);
+    openpenny::set_current_aggregates_active(false);
+
+    flow.evaluate_if_ready();
+    assert(flow.final_decision() ==
+           openpenny::penny::FlowEngine::FlowDecision::FINISHED_DUPLICATE_EXCEEDED);
+
+    return 0;
+}
diff --git a/tests/unit/flow/test_gap_management.cpp b/tests/unit/flow/test_gap_management.cpp
index cafa2e9..2a9d31a 100644
--- a/tests/unit/flow/test_gap_management.cpp
+++ b/tests/unit/flow/test_gap_management.cpp
@@ -29,7 +29,7 @@ int main() {
     cfg.active.rtt_timeout_factor = 3.0;
 
     openpenny::penny::ThreadFlowManager table(cfg.active);
-    openpenny::FlowKey flow{10, 20, 1111, 2222};
+    openpenny::FlowKey flow{10, 20, 1111, 2222, 6};
     auto now = steady_clock::time_point{};
 
     // Register a gap representing a dropped packet.
diff --git a/tests/unit/flow/test_initial_flow_monitoring.cpp b/tests/unit/flow/test_initial_flow_monitoring.cpp
index 7cbbe0a..346d964 100644
--- a/tests/unit/flow/test_initial_flow_monitoring.cpp
+++ b/tests/unit/flow/test_initial_flow_monitoring.cpp
@@ -24,7 +24,7 @@ namespace net = openpenny::net;
     auto now = steady_clock::time_point{};
 
     // Case 1: Flow starts with SYN.
-    openpenny::FlowKey flow_syn{1, 2, 1000, 2000};
+    openpenny::FlowKey flow_syn{1, 2, 1000, 2000, 6};
     net::PacketView syn_pkt{};
     syn_pkt.flow = flow_syn;
     syn_pkt.tcp.seq = 100;
@@ -47,7 +47,7 @@ namespace net = openpenny::net;
     auto& syn_entry_data = *syn_entry_data_ptr;
 
     // Case 2: Flow starts with data (no SYN yet).
-    openpenny::FlowKey flow_data{3, 4, 3000, 4000};
+    openpenny::FlowKey flow_data{3, 4, 3000, 4000, 6};
     auto t0 = steady_clock::time_point{};
     net::PacketView data_pkt0{};
     data_pkt0.flow = flow_data;
@@ -81,7 +81,7 @@ namespace net = openpenny::net;
     assert(data_entry3.flow.highest_sequence() == 60);
 
     // Case 3: Flow receives SYN after data-first start.
-    openpenny::FlowKey flow_data_then_syn{5, 6, 1234, 4321};
+    openpenny::FlowKey flow_data_then_syn{5, 6, 1234, 4321, 6};
     auto td0 = steady_clock::time_point{};
     net::PacketView first_data_pkt{};
     first_data_pkt.flow = flow_data_then_syn;
diff --git a/tests/unit/flow/test_terminal_snapshot_resolution.cpp b/tests/unit/flow/test_terminal_snapshot_resolution.cpp
new file mode 100644
index 0000000..3a70eff
--- /dev/null
+++ b/tests/unit/flow/test_terminal_snapshot_resolution.cpp
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+#include "openpenny/config/Config.h"
+#include "openpenny/app/core/PerThreadStats.h"
+#include "openpenny/penny/flow/engine/FlowEngine.h"
+#include "openpenny/penny/flow/timer/ThreadFlowEventTimer.h"
+
+#include <cassert>
+#include <chrono>
+#include <cstdint>
+#include <vector>
+
+namespace {
+
+openpenny::FlowKey make_flow_key(std::uint16_t sport) {
+    openpenny::FlowKey key{};
+    key.src = 0x0a000001;
+    key.dst = 0x0a000002;
+    key.sport = sport;
+    key.dport = 5201;
+    key.ip_proto = 6;
+    return key;
+}
+
+} // namespace
+
+int main() {
+    using Clock = std::chrono::steady_clock;
+
+    openpenny::penny::ThreadFlowEventTimerManager::instance().stop();
+    openpenny::app::init_thread_counters(1);
+    openpenny::app::set_thread_counter_index(0);
+
+    openpenny::Config cfg;
+    cfg.active.drop_probability = 1.0;
+    cfg.active.rtt_timeout_factor = 60.0;
+
+    {
+        openpenny::penny::FlowEngine flow(cfg.active);
+        std::vector<openpenny::penny::SnapshotState> observed_states;
+        flow.set_flow_key(make_flow_key(1111));
+        flow.set_drop_sink([&observed_states](const openpenny::FlowKey&,
+                                              openpenny::penny::PacketDropId,
+                                              const openpenny::penny::PacketDropSnapshot& snapshot) {
+            observed_states.push_back(snapshot.state);
+        });
+        const auto drop_time = Clock::now();
+        const auto key = make_flow_key(1111);
+        const auto packet_id = openpenny::penny::make_packet_drop_id(1000, 100);
+
+        flow.record_data(1000, drop_time);
+        assert(flow.drop_packet(1000, 1100, packet_id, key, drop_time));
+        assert(openpenny::app::aggregate_counters().pending_retransmissions == 1);
+
+        // Generic teardown before the timeout should NOT mark the drop expired.
+        flow.resolve_pending_snapshots(drop_time + std::chrono::seconds(1));
+
+        assert(flow.pending_retransmissions() == 0);
+        assert(flow.non_retransmitted_packets() == 0);
+        assert(openpenny::app::aggregate_counters().pending_retransmissions == 0);
+        assert(flow.drop_snapshots().size() == 1);
+        assert(flow.drop_snapshots().front().second.state ==
+               openpenny::penny::SnapshotState::Invalid);
+        assert(observed_states.size() == 2);
+        assert(observed_states.front() == openpenny::penny::SnapshotState::Pending);
+        assert(observed_states.back() == openpenny::penny::SnapshotState::Invalid);
+    }
+
+    {
+        openpenny::penny::FlowEngine flow(cfg.active);
+        flow.set_flow_key(make_flow_key(1112));
+        const auto drop_time = Clock::now();
+        const auto key = make_flow_key(1112);
+        const auto packet_id = openpenny::penny::make_packet_drop_id(1500, 100);
+
+        flow.record_data(1500, drop_time);
+        assert(flow.drop_packet(1500, 1600, packet_id, key, drop_time));
+
+        // FIN semantics are immediate: outstanding drops become non-retransmitted.
+        flow.mark_snapshot_expired(packet_id);
+
+        assert(flow.pending_retransmissions() == 0);
+        assert(flow.non_retransmitted_packets() == 1);
+        assert(flow.drop_snapshots().size() == 1);
+        assert(flow.drop_snapshots().front().second.state ==
+               openpenny::penny::SnapshotState::Expired);
+    }
+
+    {
+        openpenny::penny::FlowEngine flow(cfg.active);
+        std::vector<openpenny::penny::SnapshotState> observed_states;
+        flow.set_flow_key(make_flow_key(1113));
+        flow.set_drop_sink([&observed_states](const openpenny::FlowKey&,
+                                              openpenny::penny::PacketDropId,
+                                              const openpenny::penny::PacketDropSnapshot& snapshot) {
+            observed_states.push_back(snapshot.state);
+        });
+        const auto drop_time = Clock::now();
+        const auto key = make_flow_key(1113);
+        const auto packet_id = openpenny::penny::make_packet_drop_id(2000, 100);
+
+        flow.record_data(2000, drop_time);
+        assert(flow.drop_packet(2000, 2100, packet_id, key, drop_time));
+        assert(openpenny::app::aggregate_counters().pending_retransmissions == 1);
+
+        // Once the timeout has elapsed, teardown should promote to Expired.
+        flow.resolve_pending_snapshots(drop_time + std::chrono::seconds(61));
+
+        assert(flow.pending_retransmissions() == 0);
+        assert(flow.non_retransmitted_packets() == 1);
+        assert(openpenny::app::aggregate_counters().pending_retransmissions == 0);
+        assert(flow.drop_snapshots().size() == 1);
+        assert(flow.drop_snapshots().front().second.state ==
+               openpenny::penny::SnapshotState::Expired);
+        assert(observed_states.size() == 2);
+        assert(observed_states.front() == openpenny::penny::SnapshotState::Pending);
+        assert(observed_states.back() == openpenny::penny::SnapshotState::Expired);
+    }
+
+    openpenny::penny::ThreadFlowEventTimerManager::instance().stop();
+    return 0;
+}
diff --git a/tests/unit/net/test_packet_parser.cpp b/tests/unit/net/test_packet_parser.cpp
index a74be2f..343186d 100644
--- a/tests/unit/net/test_packet_parser.cpp
+++ b/tests/unit/net/test_packet_parser.cpp
@@ -73,6 +73,7 @@ void assert_decodes(const std::vector<std::uint8_t>& frame) {
     assert(packet.flow.dst == 0xc0a82902u);
     assert(packet.flow.sport == 40000);
     assert(packet.flow.dport == 5201);
+    assert(packet.flow.ip_proto == 6);
     assert(packet.ip_proto == 6);
 }
 
diff --git a/tests/unit/net/test_traffic_match.cpp b/tests/unit/net/test_traffic_match.cpp
index f8b52dd..4ed40c4 100644
--- a/tests/unit/net/test_traffic_match.cpp
+++ b/tests/unit/net/test_traffic_match.cpp
@@ -17,6 +17,7 @@ int main() {
     matching.dst = 0xc0000201u;
     matching.sport = 12345;
     matching.dport = 443;
+    matching.ip_proto = 6;
 
     openpenny::FlowKey non_matching = matching;
     non_matching.src = 0x0a020203u;
@@ -57,12 +58,18 @@ int main() {
     cfg.rules.clear();
     cfg.rules.push_back(tcp_https);
 
+    assert(openpenny::net::traffic_matches_flow(cfg, matching));
+    auto wrong_proto = matching;
+    wrong_proto.ip_proto = 17;
+    assert(!openpenny::net::traffic_matches_flow(cfg, wrong_proto));
+
     openpenny::net::PacketView packet{};
     packet.flow = matching;
     packet.ip_proto = 6;
     assert(openpenny::net::traffic_matches_packet(cfg, packet));
 
     packet.ip_proto = 17;
+    packet.flow.ip_proto = 17;
     assert(!openpenny::net::traffic_matches_packet(cfg, packet));
 
     cfg.default_action = openpenny::net::TrafficRuleAction::RedirectToUserspace;