From fd872037d3843c5a51c6fedfb0afb297329bbee3 Mon Sep 17 00:00:00 2001 From: Marcos Date: Wed, 6 May 2026 01:45:17 -0300 Subject: [PATCH] test(global_mem_controller): regression for core1_rd_data stability after ack (#22) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #19 changed `core1_rd_data` in src/global_mem_controller.sv from `output reg` (registered) to `output wire` (combinational). Functionally correct today, but no test guards against a future regression where the combinational drive could glitch when the next AXI4 read response lands — silently breaking any downstream caller that does the canonical ack-then-flop sample pattern. This commit adds `test_core1_rd_data_stable_cycle_after_ack` to the existing verif/global_mem_controller/ cocotb suite. The test: 1. Pre-loads a known value via the controller loader back-door. 2. Issues a core1 read and waits for `core1_ack`. 3. Samples `core1_rd_data` on the ack cycle (T). 4. Holds the bus idle for one more clock with no new request. 5. Re-samples `core1_rd_data` on cycle T+1. 6. Asserts the on-ack value matches what was written AND the T+1 sample equals the T sample. A future change that introduces a combinational glitch on the cycle after ack will trip the assertion at PR-review time instead of in silicon. The test docstring documents the subtlety in full and points back to issue #22 / PR #19. Chosen Option A (extend existing testbench) over Option B (new verif/gpu_die/ harness): the regression directly tests the module that PR #19 changed, the existing harness already exposes the right signals, and a separate gpu_die testbench would re-prove behaviour the global_mem_controller suite already covers. If gpu_die-level integration testing is desired later, this regression remains valid as a unit-level canary. Test evidence (verif/global_mem_controller/): TESTS=8 PASS=8 FAIL=0 SKIP=0 Closes #22. Authored by Agent 1 (RTL Architect). Signed-off-by: Marcos --- .../test_global_mem_controller.py | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/verif/global_mem_controller/test_global_mem_controller.py b/verif/global_mem_controller/test_global_mem_controller.py index 0023171..cbbfe53 100644 --- a/verif/global_mem_controller/test_global_mem_controller.py +++ b/verif/global_mem_controller/test_global_mem_controller.py @@ -35,6 +35,13 @@ * `test_arbiter_priority_core1_wins` — when core1 and contr_rd request simultaneously, core1 wins and contr_rd waits. + * `test_core1_rd_data_stable_cycle_after_ack` — regression test + for MAST issue #22. Guards against + downstream callers silently breaking + when `core1_rd_data` switched from + `output reg` to `output wire` in + PR #19. See test docstring for the + full subtlety. The bit[47]-set address case from the dispatch brief is observed INSIDE `test_axi4_widths_wired` (where we read the actual wire width @@ -327,3 +334,111 @@ async def test_arbiter_priority_core1_wins(dut): assert contr_data == word_b, ( f"contr_rd got wrong data: 0x{contr_data:08x}, expected 0x{word_b:08x}" ) + + +# ---------------------------------------------------------------------------- +# Regression: core1_rd_data stability around the ack cycle (MAST issue #22) +# ---------------------------------------------------------------------------- + +@cocotb.test() +async def test_core1_rd_data_stable_cycle_after_ack(dut): + """Regression test for MAST issue #22 (post-PR-#19 follow-up). + + SUBTLETY (load-bearing — read carefully): + + PR #19 changed `core1_rd_data` in `src/global_mem_controller.sv` + from `output reg` (registered) to `output wire` (combinational + `assign core1_rd_data = cm_rd_data;`). All upstream callers + (`gpu_die.sv`, `test/behav/compute_unit_and_mem.sv`, + `test/behav/core_and_mem.sv`) connect via plain wire assignments + — there is currently no caller that latches `core1_rd_data` + independently of `core1_ack`. + + A downstream caller (RISC-V load-store unit, DMA engine, etc.) + that observes `core1_ack=1` on cycle T and reads `core1_rd_data` + via a flop on cycle T+1 is doing the textbook handshake-and- + sample pattern. That pattern depends on `core1_rd_data` being + *stable* until at least the next active edge after the ack. + + With the new combinational drive this is true *today* because + `cm_rd_data` (driven by the AXI4 adapter's response register) + holds its value until the next AXI4 read response lands — + which, in the current single-outstanding adapter, only happens + after the caller issues a new `core1_rd_req`. So a future + optimization (multi-outstanding adapter, response forwarding, + pipelined arbiter) could silently glitch `core1_rd_data` between + the ack cycle and the cycle after, producing wrong silicon + without breaking any existing test. + + THIS TEST is the canary. It: + 1. Issues a core1 read for a known value at addr_A. + 2. Waits for `core1_ack`. + 3. Samples `core1_rd_data` on the ack cycle (T). + 4. Holds all inputs idle for one more clock (no new req). + 5. Re-samples `core1_rd_data` on cycle T+1. + 6. Asserts the on-ack value matches what was written AND the + T+1 sample equals the T sample (i.e. the data did not + glitch in the cycle after ack while the bus was idle). + + A future regression that introduces a combinational glitch — + for example because `cm_rd_data` starts mirroring the AXI4 + R-channel before the response is captured — will trip this + assertion and flag the change at PR-review time instead of + in silicon. + + Out of scope (deliberate): re-engineering `core1_rd_data` back + to a registered output. That's a design decision that needs + its own ADR; this test only guards the current contract. + """ + cocotb.start_soon(Clock(dut.clk, CLK_PERIOD_NS, unit="ns").start()) + await reset_dut(dut) + + # Pre-load a known value at a known address via the controller + # loader back-door (same pattern as test_contr_loader_then_core1_read, + # so we don't depend on the write path also being correct). + addr = 0x0000_0080 # cache-line aligned, slot 0 + word = 0xA5A5_5A5A + await contr_loader_write(dut, addr, word) + await RisingEdge(dut.clk) # let the loader NBA commit + + # Issue a core1 read. + dut.core1_addr.value = addr + dut.core1_rd_req.value = 1 + await RisingEdge(dut.clk) + dut.core1_rd_req.value = 0 + + # Spin until the ack cycle. On the cycle when core1_ack is high, + # capture core1_rd_data IMMEDIATELY (before the next RisingEdge), + # which is the value any downstream flop sampling on (clk && + # core1_ack) would latch. + on_ack_data = None + for _ in range(300): + await RisingEdge(dut.clk) + if dut.core1_ack.value == 1: + on_ack_data = int(dut.core1_rd_data.value) + break + assert on_ack_data is not None, "core1 read never acked" + assert on_ack_data == word, ( + f"on-ack core1_rd_data mismatch: got 0x{on_ack_data:08x}, " + f"expected 0x{word:08x}" + ) + + # Hold the bus idle (no new req) for one more clock and re-sample. + # If a future change makes core1_rd_data combinationally track an + # AXI4 response that lands without our request, this sample will + # diverge. + assert dut.core1_rd_req.value == 0 + assert dut.core1_wr_req.value == 0 + await RisingEdge(dut.clk) + cycle_after_ack_data = int(dut.core1_rd_data.value) + + assert cycle_after_ack_data == on_ack_data, ( + "core1_rd_data glitched in the cycle AFTER ack while the bus " + "was idle — a downstream caller doing the canonical " + "ack-then-flop pattern would now latch the wrong value. " + f"on-ack=0x{on_ack_data:08x} vs cycle-after=0x{cycle_after_ack_data:08x}. " + "See MAST issue #22 / PR #19 for context. If this is an " + "intentional protocol change, please re-register core1_rd_data " + "as `output reg` in src/global_mem_controller.sv (deferred " + "design decision — needs an ADR)." + )