From d7959492ccb340a29d0e676c15137ca0824735f9 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 24 Dec 2025 09:33:41 +0100 Subject: [PATCH 01/25] Add comments to clarify redmule_inst_decoder These comments are generated by Copilot (Claude Sonnet 4.5) and should be checked, but they do seem legit. This is a preliminary action to reproduce a similar programming interface as XIF in a memory-mapped (hwpe-ctrl) setting. --- rtl/redmule_inst_decoder.sv | 112 ++++++++++++++++++++++++++++++------ 1 file changed, 95 insertions(+), 17 deletions(-) diff --git a/rtl/redmule_inst_decoder.sv b/rtl/redmule_inst_decoder.sv index a4963af..f612871 100644 --- a/rtl/redmule_inst_decoder.sv +++ b/rtl/redmule_inst_decoder.sv @@ -49,33 +49,52 @@ module redmule_inst_decoder input logic x_result_ready_i ); + // Calculate the width needed to represent hart IDs (minimum 1 bit) localparam int unsigned HartIdWidth = XifNumHarts > 1 ? $clog2(XifNumHarts) : 1; + // Compose full instruction encoding patterns for the three custom instructions: + // MCNFIG: Matrix configuration (sets dimensions, data flow control) + // MARITH: Matrix arithmetic operation (triggers computation with addresses) + // MOPCNT: Matrix operation count (returns number of completed operations) localparam logic [11:0] MCNFIG = {McnfigFunct2,McnfigFunct3,McnfigOpCode}; localparam logic [11:0] MARITH = {MarithFunct2,MarithFunct3,MarithOpCode}; localparam logic [11:0] MOPCNT = {MopcntFunct2,MopcntFunct3,MopcntOpCode}; + // Per-hart FIFO status flags for instruction and register packets logic [XifNumHarts-1:0] issue_fifo_full, register_fifo_full, issue_fifo_empty, register_fifo_empty; + + // Hart ID of the currently executing operation (tracked through pipeline) logic [HartIdWidth-1:0] current_hartid_d, current_hartid_q; + // Current instruction issue request and register data at head of each hart's FIFO x_issue_req_t [XifNumHarts-1:0] cur_issue; x_register_t [XifNumHarts-1:0] cur_register; + // TODO unused: x_result_t x_result_d, x_result_q; + // Per-hart operation ID counters: + // op_id_counter_in_q: Increments when operations are issued (tags for tracking) + // op_id_counter_out_q: Increments when operations complete (for MOPCNT instruction) logic [XifNumHarts-1:0] [OpIdWidth-1:0] op_id_counter_in_q, op_id_counter_out_q; + // Round-robin arbitration state for fair scheduling across harts logic [HartIdWidth-1:0] rr_counter_d, rr_counter_q; logic [XifNumHarts-1:0][HartIdWidth-1:0] rr_priority; logic [HartIdWidth-1:0] winner; + // Flag indicating whether the incoming instruction is a recognized RedMule custom instruction logic legal_inst; + // Per-hart configuration registers holding matrix operation parameters redmule_config_t [XifNumHarts-1:0] config_d, config_q; + // Control signal to enable popping from instruction FIFOs (delayed for MARITH until tiler ready) logic pop_enable; + // Decode incoming instruction to determine if it's a legal RedMule custom instruction + // Checks funct2[26:25], funct3[14:12], and opcode[6:0] fields always_comb begin : legal_inst_assignment legal_inst = 1'b0; @@ -85,21 +104,26 @@ module redmule_inst_decoder endcase end + // Generate XIF issue response indicating whether instruction is accepted and resource needs always_comb begin : x_issue_resp_assignment + // Accept instruction only if it's a legal RedMule custom instruction x_issue_resp_o.accept = legal_inst; unique case ({x_issue_req_i.instr[26:25],x_issue_req_i.instr[14:12],x_issue_req_i.instr[6:0]}) MCNFIG: begin + // MCNFIG: No writeback (configuration only), reads 3 source registers x_issue_resp_o.writeback = 'b0; - x_issue_resp_o.register_read = 'b111; + x_issue_resp_o.register_read = 'b111; // Read rs1, rs2, rs3 end MARITH: begin + // MARITH: Writeback if rd != x0 (returns operation ID), reads 3 source registers x_issue_resp_o.writeback = x_issue_req_i.instr[11:7] != 0; - x_issue_resp_o.register_read = 'b111; + x_issue_resp_o.register_read = 'b111; // Read rs1, rs2, rs3 (addresses) end MOPCNT: begin + // MOPCNT: Writeback if rd != x0 (returns completion count), no register reads x_issue_resp_o.writeback = x_issue_req_i.instr[11:7] != 0; - x_issue_resp_o.register_read = 'b0; + x_issue_resp_o.register_read = 'b0; // No source registers needed end default: begin x_issue_resp_o.writeback = 'b0; @@ -109,22 +133,27 @@ module redmule_inst_decoder end + // Construct result packet to write back to CPU register file always_comb begin : x_result_assignment + // Result valid when both instruction and register data available for winning hart x_result_valid_o = ~issue_fifo_empty[winner] && ~register_fifo_empty[winner]; x_result_o.hartid = cur_issue[winner].hartid; x_result_o.id = cur_issue[winner].id; - x_result_o.rd = cur_issue[winner].instr[11:7]; + x_result_o.rd = cur_issue[winner].instr[11:7]; // Destination register unique case ({cur_issue[winner].instr[26:25],cur_issue[winner].instr[14:12],cur_issue[winner].instr[6:0]}) MCNFIG: begin + // MCNFIG: No writeback, configuration stored internally x_result_o.we = 'b0; x_result_o.data = 'b0; end MARITH: begin + // MARITH: Write operation ID to rd (for tracking/synchronization) x_result_o.we = cur_issue[winner].instr[11:7] != 0; x_result_o.data = op_id_counter_in_q[winner]; end MOPCNT: begin + // MOPCNT: Write completion counter to rd (number of finished operations) x_result_o.we = cur_issue[winner].instr[11:7] != 0; x_result_o.data = op_id_counter_out_q[winner]; end @@ -135,12 +164,18 @@ module redmule_inst_decoder endcase end + // Output configuration from the winning hart to the RedMule tiler/controller assign config_o = config_d[winner]; + + // Configuration valid only for MARITH instructions when both FIFOs have data and CPU is ready + // (MCNFIG updates config but doesn't trigger execution) assign config_valid_o = ~issue_fifo_empty[winner] && ~register_fifo_empty[winner] && x_result_ready_i && {cur_issue[winner].instr[26:25],cur_issue[winner].instr[14:12],cur_issue[winner].instr[6:0]} == MARITH; + // Signal readiness to accept new instruction issue based on target hart's FIFO availability always_comb begin : x_issue_ready_assignment x_issue_ready_o = 1'b0; + // Find the hart matching the incoming request and check its issue FIFO status for (int unsigned i = 0; i < XifNumHarts; i++) begin if (x_issue_req_i.hartid == i) begin x_issue_ready_o = ~issue_fifo_full[i]; @@ -148,9 +183,11 @@ module redmule_inst_decoder end end + // Signal readiness to accept new register packet based on target hart's FIFO availability always_comb begin : x_register_ready_assignment x_register_ready_o = 1'b0; + // Find the hart matching the incoming register data and check its register FIFO status for (int unsigned i = 0; i < XifNumHarts; i++) begin if (x_register_i.hartid == i) begin x_register_ready_o = ~register_fifo_full[i]; @@ -158,6 +195,8 @@ module redmule_inst_decoder end end + // Round-robin counter for fair arbitration across multiple harts + // Advances each time a configuration is successfully accepted by downstream logic always_ff @(posedge clk_i, negedge rst_ni) begin : round_robin_counter if(~rst_ni) begin rr_counter_q <= '0; @@ -170,17 +209,23 @@ module redmule_inst_decoder end end + // Wrap counter to 0 after reaching the last hart assign rr_counter_d = rr_counter_q == XifNumHarts-1 ? 0 : rr_counter_q + 1; + // Calculate priority order for round-robin arbitration + // Creates a rotated sequence starting from current counter position always_comb begin : round_robin_priority for(int i = 0; i < XifNumHarts; i++) begin rr_priority[i] = (rr_counter_q + i < XifNumHarts) ? rr_counter_q + i : rr_counter_q + i - XifNumHarts; end end + // Select winning hart using round-robin priority among harts with ready instructions + // Scans in priority order and selects first hart with both issue and register data available always_comb begin : winner_assignment - winner = rr_counter_q; + winner = rr_counter_q; // Default to current counter position + // Override with first ready hart in priority order for(int i = 0; i < XifNumHarts; i++) begin if (~issue_fifo_empty[rr_priority[i]] && ~register_fifo_empty[rr_priority[i]]) begin winner = rr_priority[i]; @@ -188,6 +233,9 @@ module redmule_inst_decoder end end + // FIFO tracking which hart each in-flight operation belongs to + // Pushed when operation starts, popped when operation completes + // Used to correctly increment the completion counter for MOPCNT instruction fifo_v3 #( .FALL_THROUGH ( 0 ), .DEPTH ( InstFifoDepth * XifNumHarts ), @@ -200,13 +248,16 @@ module redmule_inst_decoder .full_o ( ), .empty_o ( ), .usage_o ( ), - .data_i ( winner ), - .push_i ( config_ready_i && config_valid_o ), - .data_o ( current_hartid_q ), - .pop_i ( op_done_i ) + .data_i ( winner ), // Push winning hart ID + .push_i ( config_ready_i && config_valid_o ), // On operation issue + .data_o ( current_hartid_q ), // Hart of completing op + .pop_i ( op_done_i ) // On operation completion ); + // Per-hart operation ID counters for tracking issued and completed operations for (genvar i = 0; i < XifNumHarts; i++) begin : gen_op_id_counters + // Input counter: increments when MARITH instruction is issued to this hart + // Returns this value to CPU as operation ID for software tracking always_ff @(posedge clk_i or negedge rst_ni) begin : op_id_counter_in if (~rst_ni) begin op_id_counter_in_q[i] <= 0; @@ -219,6 +270,9 @@ module redmule_inst_decoder end end + // Output counter: increments when any operation from this hart completes + // Returns this value for MOPCNT instruction to check completion status + // Initialized to all 1's to detect first completion (wraps to 0) always_ff @(posedge clk_i or negedge rst_ni) begin : op_id_counter_out if (~rst_ni) begin op_id_counter_out_q[i] <= '1; @@ -232,7 +286,9 @@ module redmule_inst_decoder end end - // Pop the fifos the first cycle the tiler is no longer busy if we detect a MARITH instruction + // Control when to pop instruction/register FIFOs: + // - MARITH: delay pop until config accepted by tiler (config_ready_i && config_valid_o) + // - Others: pop immediately since they don't require tiler resources assign pop_enable = ({cur_issue[winner].instr[26:25],cur_issue[winner].instr[14:12],cur_issue[winner].instr[6:0]} == MARITH ? config_ready_i && config_valid_o : 1'b1); for (genvar i = 0; i < XifNumHarts; i++) begin : gen_instruction_fifos @@ -250,6 +306,8 @@ module redmule_inst_decoder logic issue_push, register_push, issue_pop, register_pop; + // Register holding the most recent committed (non-killed) instruction ID for this hart + // Used to track successful instruction commits from the CPU always_ff @(posedge clk_i or negedge rst_ni) begin : commit_id_register if (~rst_ni) begin commit_id_q <= '0; @@ -262,8 +320,11 @@ module redmule_inst_decoder end end + // Capture commit ID when a valid, non-killed commit occurs for this hart assign commit_id_d = (x_commit_valid_i && ~x_commit_i.commit_kill && x_commit_i.hartid == i) ? x_commit_i.id : commit_id_q; + // Valid flag for commit_id, indicates whether we have a pending committed instruction + // Cleared when the matching instruction is popped from FIFO always_ff @(posedge clk_i or negedge rst_ni) begin : commid_id_valid_register if (~rst_ni) begin commit_id_valid_q <= 1'b0; @@ -276,9 +337,13 @@ module redmule_inst_decoder end end + // Set valid when commit arrives, hold until instruction processed assign commit_id_valid_d = (x_commit_valid_i && ~x_commit_i.commit_kill && x_commit_i.hartid == i) ? 1'b1 : commit_id_valid_q; + // Clear valid flag when the committed instruction is popped from FIFO assign commit_id_valid_flush = issue_pop && cur_issue[i].id == commit_id_d && ~issue_fifo_empty[i]; + // Register holding the most recent killed instruction ID for this hart + // CPU sends kill signal for speculative instructions that should be discarded always_ff @(posedge clk_i or negedge rst_ni) begin : kill_id_register if (~rst_ni) begin kill_id_q <= '0; @@ -291,8 +356,11 @@ module redmule_inst_decoder end end + // Capture kill ID when a commit with kill flag occurs for this hart assign kill_id_d = (x_commit_valid_i && x_commit_i.commit_kill && x_commit_i.hartid == i) ? x_commit_i.id : kill_id_q; + // Valid flag for kill_id, indicates whether we have a pending kill request + // Cleared after FIFO flush completes always_ff @(posedge clk_i or negedge rst_ni) begin : kill_id_valid_register if (~rst_ni) begin kill_id_valid_q <= 1'b0; @@ -305,13 +373,19 @@ module redmule_inst_decoder end end + // Set valid when kill arrives, hold until FIFO flushed assign kill_id_valid_d = (x_commit_valid_i && x_commit_i.commit_kill && x_commit_i.hartid == i) ? 1'b1 : kill_id_valid_q; + // Clear valid flag after FIFO has been flushed assign kill_id_valid_flush = fifo_flush; + // Trigger FIFO flush when head instruction matches a killed instruction ID assign fifo_flush = cur_issue[i].id == kill_id_d && kill_id_valid_d && ~issue_fifo_empty[i]; + // Push to issue FIFO when: legal instruction, FIFO not full, matches this hart assign issue_push = x_issue_valid_i && legal_inst && ~issue_fifo_full[i] && x_commit_i.hartid == i; + // Pop from issue FIFO when: this hart wins arbitration, pop enabled, CPU ready, both FIFOs have data assign issue_pop = winner == i && pop_enable && x_result_ready_i && ~issue_fifo_empty[i] && ~register_fifo_empty[i]; + // Register FIFO pops in sync with issue FIFO assign register_pop = issue_pop; fifo_v3 #( @@ -332,7 +406,9 @@ module redmule_inst_decoder .pop_i ( issue_pop ) ); - if (XifIssueRegisterSplit == 0) begin : gen_register_fifo // Register packets are guaranteed to arrive at the same time as the issue signal + // Non-split mode: register packets arrive synchronously with issue + if (XifIssueRegisterSplit == 0) begin : gen_register_fifo + // Push to register FIFO in sync with valid register packet for legal instruction assign register_push = x_register_valid_i & legal_inst & x_commit_i.hartid == i; fifo_v3 #( @@ -353,14 +429,15 @@ module redmule_inst_decoder .pop_i ( register_pop ) ); - end else begin : gen_register_buffer // If register split is enabled, we could receive register packets out of order + end else begin : gen_register_buffer + // Split mode: register packets may arrive out-of-order relative to issue // When an instruction is marked as valid, reserve a slot for the instruction in the buffer - // The buffer has a number of slots equal to InstFifoDepth - - // TODO: implement + // TODO: implement out-of-order register packet buffering end + // Configuration register for this hart, holds accumulated matrix operation parameters + // Updated when instructions are popped (MCNFIG sets params, MARITH uses them) always_ff @(posedge clk_i or negedge rst_ni) begin : config_register if (~rst_ni) begin config_q[i] <= '0; @@ -373,8 +450,9 @@ module redmule_inst_decoder end end + // Decode instruction and extract configuration parameters from register file values always_comb begin : config_assignment - config_d[i] = config_q[i]; + config_d[i] = config_q[i]; // Default: retain previous configuration unique case ({cur_issue[i].instr[26:25],cur_issue[i].instr[14:12],cur_issue[i].instr[6:0]}) MCNFIG: begin @@ -396,7 +474,7 @@ module redmule_inst_decoder config_d[i].gemm_input_fmt = redmule_pkg::Float16; config_d[i].gemm_output_fmt = redmule_pkg::Float16; end - default: config_d[i] = config_q[i]; + default: config_d[i] = config_q[i]; // Other instructions don't modify config endcase end end From 1fce877b4018fa6058159ca29dba154ae57834cb Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 24 Dec 2025 12:46:27 +0100 Subject: [PATCH 02/25] Add draft SystemRDL register interface for RedMulE This renewed register interface mimics identical structure to the XIF that is currently the only supported one on the development branch of RedMulE. --- rtl/ctrl/gen_regif.sh | 6 + rtl/ctrl/redmule_regif.rdl | 268 ++++++++++ rtl/ctrl/regif/redmule_regif.sv | 787 ++++++++++++++++++++++++++++ rtl/ctrl/regif/redmule_regif_pkg.sv | 243 +++++++++ 4 files changed, 1304 insertions(+) create mode 100755 rtl/ctrl/gen_regif.sh create mode 100644 rtl/ctrl/redmule_regif.rdl create mode 100644 rtl/ctrl/regif/redmule_regif.sv create mode 100644 rtl/ctrl/regif/redmule_regif_pkg.sv diff --git a/rtl/ctrl/gen_regif.sh b/rtl/ctrl/gen_regif.sh new file mode 100755 index 0000000..c9c3eb1 --- /dev/null +++ b/rtl/ctrl/gen_regif.sh @@ -0,0 +1,6 @@ +#!/bin/bash +peakrdl regblock redmule_regif.rdl -o regif/ --cpuif obi-flat --default-reset arst_n --hwif-report --addr-width 32 +peakrdl html redmule_regif.rdl -o regif/html/ +peakrdl c-header redmule_regif.rdl -o regif/hwpe_ctrl_target.h +# PeakRDL uses unpacked structs to avoid issues at compile time, which is commendable, but incompatible with FIFOing the output of the job! (use portable sed syntax that works on both Linux and macOS) +sed 's/typedef[[:space:]]\+struct\b/typedef struct packed/g' regif/redmule_regif_pkg.sv > regif/redmule_regif_pkg.sv.tmp && mv regif/redmule_regif_pkg.sv.tmp regif/redmule_regif_pkg.sv diff --git a/rtl/ctrl/redmule_regif.rdl b/rtl/ctrl/redmule_regif.rdl new file mode 100644 index 0000000..42013c1 --- /dev/null +++ b/rtl/ctrl/redmule_regif.rdl @@ -0,0 +1,268 @@ +/* + * redmule_regif.rdl + * Francesco Conti + * + * Copyright (C) 2025 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + + /* + * This file contains the address map for RedMulE, expressed in SystemRDL. + */ + +// Enumeration for GEMM operation types (aligned with redmule_pkg::gemm_op_e) +enum gemm_op_e { + MATMUL = 3'h0 { name = "MATMUL"; desc = "Matrix multiplication"; }; + GEMM = 3'h1 { name = "GEMM"; desc = "General matrix multiply"; }; + ADDMAX = 3'h2 { name = "ADDMAX"; desc = "Addition with maximum"; }; + ADDMIN = 3'h3 { name = "ADDMIN"; desc = "Addition with minimum"; }; + MULMAX = 3'h4 { name = "MULMAX"; desc = "Multiplication with maximum"; }; + MULMIN = 3'h5 { name = "MULMIN"; desc = "Multiplication with minimum"; }; + MAXMIN = 3'h6 { name = "MAXMIN"; desc = "Maximum with minimum"; }; + MINMAX = 3'h7 { name = "MINMAX"; desc = "Minimum with maximum"; }; +}; + +// Enumeration for floating-point formats (aligned with redmule_pkg::gemm_fmt_e) +enum gemm_fmt_e { + Float8 = 2'h0 { name = "Float8"; desc = "FP8 format"; }; + Float16 = 2'h1 { name = "Float16"; desc = "FP16 format"; }; + Float8Alt = 2'h2 { name = "Float8Alt"; desc = "FP8 alternative format"; }; + Float16Alt = 2'h3 { name = "Float16Alt"; desc = "FP16 alternative format"; }; +}; + +addrmap redmule_regif { + name = "NEureka register interface"; + desc = "Control register map for RedMulE, including mandatory control/status registers and job-dependent configuration registers."; + // Mandatory COMMIT_TRIGGER register. Not to be updated inside HWPEs. + reg hwpe_commit_trigger { + field { + name = "reserved"; + desc = "Reserved."; + hw = r; + sw = r; + } r0[31:2] = 0; + field { + name = "commit_trigger"; + desc = "Write 0 to commit job & start execution, unlock controller; write `0x1` value to commit job & unlock controller without starting execution, which will be started when the next job is committed and triggered; write `0x2` value to trigger the current job queue without committing any new job."; + hw = r; + sw = w; + swacc = true; + } commit_trigger[1:0] = 0; + }; + // Mandatory ACQUIRE register. Not to be updated inside HWPEs. + reg hwpe_acquire { + field { + name = "acquire"; + desc = "On read starts a job offload, locks controller. Returns job ID."; + hw = w; + sw = r; + swacc = true; + } acquire[31:0] = 0; + }; + // Mandatory RESERVED register. Not to be updated inside HWPEs. + reg hwpe_reserved { + field { + name = "reserved"; + desc = "Reserved."; + hw = r; + sw = r; + } reserved[31:0] = 0; + }; + // Mandatory STATUS register. Not to be updated inside HWPEs. + reg hwpe_status { + field { + name = "status"; + desc = "Status of currently running job."; + hw = w; + sw = r; + } status0[31:0] = 0; + }; + // Mandatory RUNNING_JOB register. Not to be updated inside HWPEs. + reg hwpe_running_job { + field { + name = "reserved"; + desc = "Reserved."; + hw = r; + sw = r; + } r0[31:8] = 0; + field { + name = "running_job"; + desc = "Returns ID of currently running job if any job is running; otherwise, of the last job that has been run."; + hw = w; + sw = r; + } running_job[7:0] = 0; + }; + // Mandatory SOFT_CLEAR register. Not to be updated inside HWPEs. + reg hwpe_soft_clear { + field { + name = "reserved"; + desc = "Reserved."; + hw = r; + sw = r; + } r0[31:2] = 0; + field { + name = "soft_clear"; + desc = "Write `0x0` to clear the full status of the accelerator IP, including the register file; write `0x1` to clear the status of the accelerator IP, except for the register file; write `0x2` to clear only the register file."; + hw = r; + sw = w; + swacc = true; + } soft_clear[1:0] = 0; + }; + + // "mandatory" set of HWPE registers (CONTROL regs). Not to be updated inside HWPEs. + regfile hwpe_ctrl_mandatory { + hwpe_commit_trigger commit_trigger @ 0x00; + hwpe_acquire acquire @ 0x04; + hwpe_reserved reserved0 @ 0x08; + hwpe_status status @ 0x0c; + hwpe_running_job running_job @ 0x10; + hwpe_soft_clear soft_clear @ 0x14; + hwpe_reserved reserved1 @ 0x18; + hwpe_reserved reserved2 @ 0x1c; + }; + + // RedMulE job-dependent registers + reg mcnfig0 { + name = "MCNFIG0"; + field { + name = "k_size"; + desc = "K dimension (cols of X, rows of W)."; + hw = r; + sw = rw; + } k_size[31:16] = 0; + field { + name = "m_size"; + desc = "M dimension (rows of X/Z)."; + hw = r; + sw = rw; + } m_size[15:0] = 0; + }; + reg mcnfig1 { + name = "MCNFIG1"; + field { // TODO: this register is not aligned with the current XIF interface + name = "gemm_output_fmt"; + desc = "Output format."; + hw = r; + sw = rw; + encode = gemm_fmt_e; + } gemm_output_fmt[26:25] = 0; + field { // TODO: this register is not aligned with the current XIF interface + name = "gemm_input_fmt"; + desc = "Input format."; + hw = r; + sw = rw; + encode = gemm_fmt_e; + } gemm_input_fmt[24:23] = 0; + field { // TODO: this register is not aligned with the current XIF interface + name = "gemm_ops"; + desc = "Operation type."; + hw = r; + sw = rw; + encode = gemm_op_e; + } gemm_ops[22:20] = 0; + field { + name = "send_w"; + desc = "Broadcast W to external stream."; + hw = r; + sw = rw; + } send_w[19:19] = 0; + field { + name = "receive_w"; + desc = "Receive W to external stream."; + hw = r; + sw = rw; + } receive_w[18:18] = 0; + field { + name = "send_x"; + desc = "Broadcast X to external stream."; + hw = r; + sw = rw; + } send_x[17:17] = 0; + field { + name = "receive_x"; + desc = "Receive X from external stream."; + hw = r; + sw = rw; + } receive_x[16:16] = 0; + field { + name = "n_size"; + desc = "N dimension (cols of W/Z)."; + hw = r; + sw = rw; + } n_size[15:0] = 0; + }; + reg mcnfig2 { + name = "MCNFIG2"; + field { + name = "y_offs"; + desc = "Y buffer offset for bias addition."; + hw = r; + sw = rw; + } y_offs[31:0] = 0; + }; + reg marith0 { + name = "MARITH0"; + field { + name = "x_addr"; + desc = "X matrix base address."; + hw = r; + sw = rw; + } x_addr[31:0] = 0; + }; + reg marith1 { + name = "MARITH1"; + field { + name = "w_addr"; + desc = "W matrix base address."; + hw = r; + sw = rw; + } w_addr[31:0] = 0; + }; + reg marith2 { + name = "MARITH2"; + field { + name = "z_addr"; + desc = "Z matrix base address."; + hw = r; + sw = rw; + } z_addr[31:0] = 0; + }; + reg mopcnt { + name = "MOPCNT"; + field { + name = "op_id_cnt"; + desc = "Operations complete."; + hw = w; + sw = r; + } op_id_cnt[31:0] = 0; + }; + + // no "job-independent" registers in NEUREKA + regfile hwpe_ctrl_job_indep { + hwpe_reserved reserved; + }; + + // "job-dependent" set of HWPE registers. Update inside HWPEs. + regfile hwpe_ctrl_job_dep { + mcnfig0 mcnfig0 @ 0x00; + mcnfig1 mcnfig1 @ 0x04; + mcnfig2 mcnfig2 @ 0x08; + marith0 marith0 @ 0x0c; + marith1 marith1 @ 0x10; + marith2 marith2 @ 0x14; + mopcnt mopcnt @ 0x18; + }; + + // HWPE control address map. Update inside HWPEs + hwpe_ctrl_mandatory hwpe_ctrl @ 0x00; + hwpe_ctrl_job_dep hwpe_job_dep @ 0x20; + hwpe_ctrl_job_indep hwpe_job_indep; + +}; diff --git a/rtl/ctrl/regif/redmule_regif.sv b/rtl/ctrl/regif/redmule_regif.sv new file mode 100644 index 0000000..442f344 --- /dev/null +++ b/rtl/ctrl/regif/redmule_regif.sv @@ -0,0 +1,787 @@ +// Generated by PeakRDL-regblock - A free and open-source SystemVerilog generator +// https://github.com/SystemRDL/PeakRDL-regblock + +module redmule_regif #( + parameter ID_WIDTH = 1 + ) ( + input wire clk, + input wire arst_n, + + input wire s_obi_req, + output logic s_obi_gnt, + input wire [31:0] s_obi_addr, + input wire s_obi_we, + input wire [3:0] s_obi_be, + input wire [31:0] s_obi_wdata, + input wire [ID_WIDTH-1:0] s_obi_aid, + output logic s_obi_rvalid, + input wire s_obi_rready, + output logic [31:0] s_obi_rdata, + output logic s_obi_err, + output logic [ID_WIDTH-1:0] s_obi_rid, + + input redmule_regif_pkg::redmule_regif__in_t hwif_in, + output redmule_regif_pkg::redmule_regif__out_t hwif_out + ); + + //-------------------------------------------------------------------------- + // CPU Bus interface logic + //-------------------------------------------------------------------------- + logic cpuif_req; + logic cpuif_req_is_wr; + logic [31:0] cpuif_addr; + logic [31:0] cpuif_wr_data; + logic [31:0] cpuif_wr_biten; + logic cpuif_req_stall_wr; + logic cpuif_req_stall_rd; + + logic cpuif_rd_ack; + logic cpuif_rd_err; + logic [31:0] cpuif_rd_data; + + logic cpuif_wr_ack; + logic cpuif_wr_err; + + // State & holding regs + logic is_active; // A request is being served (not yet fully responded) + logic gnt_q; // one-cycle grant for A-channel + logic rsp_pending; // response ready but not yet accepted by manager + logic [31:0] rsp_rdata_q; + logic rsp_err_q; + logic [$bits(s_obi_rid)-1:0] rid_q; + + // Latch AID on accept to echo back the response + always_ff @(posedge clk or negedge arst_n) begin + if (~arst_n) begin + is_active <= 1'b0; + gnt_q <= 1'b0; + rsp_pending <= 1'b0; + rsp_rdata_q <= '0; + rsp_err_q <= 1'b0; + rid_q <= '0; + + cpuif_req <= '0; + cpuif_req_is_wr <= '0; + cpuif_addr <= '0; + cpuif_wr_data <= '0; + cpuif_wr_biten <= '0; + end else begin + // defaults + cpuif_req <= 1'b0; + gnt_q <= s_obi_req & ~is_active; + + // Accept new request when idle + if (~is_active) begin + if (s_obi_req) begin + is_active <= 1'b1; + cpuif_req <= 1'b1; + cpuif_req_is_wr <= s_obi_we; + cpuif_addr <= {s_obi_addr[31:2], 2'b0}; + cpuif_wr_data <= s_obi_wdata; + rid_q <= s_obi_aid; + for (int i = 0; i < 4; i++) begin + cpuif_wr_biten[i*8 +: 8] <= {8{ s_obi_be[i] }}; + end + end + end + + // Capture response + if (is_active && (cpuif_rd_ack || cpuif_wr_ack)) begin + rsp_pending <= 1'b1; + rsp_rdata_q <= cpuif_rd_data; + rsp_err_q <= cpuif_rd_err | cpuif_wr_err; + // NOTE: Keep 'is_active' asserted until the external R handshake completes + end + + // Complete external R-channel handshake only if manager ready + if (rsp_pending && s_obi_rvalid && s_obi_rready) begin + rsp_pending <= 1'b0; + is_active <= 1'b0; // free to accept the next request + end + end + end + + // R-channel outputs (held stable while rsp_pending=1) + assign s_obi_rvalid = rsp_pending; + assign s_obi_rdata = rsp_rdata_q; + assign s_obi_err = rsp_err_q; + assign s_obi_rid = rid_q; + + // A-channel grant (registered one-cycle pulse when we accept a request) + assign s_obi_gnt = gnt_q; + + logic cpuif_req_masked; + + // Read & write latencies are balanced. Stalls not required + assign cpuif_req_stall_rd = '0; + assign cpuif_req_stall_wr = '0; + assign cpuif_req_masked = cpuif_req + & !(!cpuif_req_is_wr & cpuif_req_stall_rd) + & !(cpuif_req_is_wr & cpuif_req_stall_wr); + + //-------------------------------------------------------------------------- + // Address Decode + //-------------------------------------------------------------------------- + typedef struct { + struct { + logic commit_trigger; + logic acquire; + logic reserved0; + logic status; + logic running_job; + logic soft_clear; + logic reserved1; + logic reserved2; + } hwpe_ctrl; + struct { + logic mcnfig0; + logic mcnfig1; + logic mcnfig2; + logic marith0; + logic marith1; + logic marith2; + logic mopcnt; + } hwpe_job_dep; + struct { + logic reserved; + } hwpe_job_indep; + } decoded_reg_strb_t; + decoded_reg_strb_t decoded_reg_strb; + logic decoded_err; + logic decoded_req; + logic decoded_req_is_wr; + logic [31:0] decoded_wr_data; + logic [31:0] decoded_wr_biten; + + always_comb begin + automatic logic is_valid_addr; + automatic logic is_invalid_rw; + is_valid_addr = '1; // No error checking on valid address access + is_invalid_rw = '0; + decoded_reg_strb.hwpe_ctrl.commit_trigger = cpuif_req_masked & (cpuif_addr == 32'h0); + decoded_reg_strb.hwpe_ctrl.acquire = cpuif_req_masked & (cpuif_addr == 32'h4) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_ctrl.reserved0 = cpuif_req_masked & (cpuif_addr == 32'h8) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_ctrl.status = cpuif_req_masked & (cpuif_addr == 32'hc) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_ctrl.running_job = cpuif_req_masked & (cpuif_addr == 32'h10) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_ctrl.soft_clear = cpuif_req_masked & (cpuif_addr == 32'h14); + decoded_reg_strb.hwpe_ctrl.reserved1 = cpuif_req_masked & (cpuif_addr == 32'h18) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_ctrl.reserved2 = cpuif_req_masked & (cpuif_addr == 32'h1c) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_job_dep.mcnfig0 = cpuif_req_masked & (cpuif_addr == 32'h20); + decoded_reg_strb.hwpe_job_dep.mcnfig1 = cpuif_req_masked & (cpuif_addr == 32'h24); + decoded_reg_strb.hwpe_job_dep.mcnfig2 = cpuif_req_masked & (cpuif_addr == 32'h28); + decoded_reg_strb.hwpe_job_dep.marith0 = cpuif_req_masked & (cpuif_addr == 32'h2c); + decoded_reg_strb.hwpe_job_dep.marith1 = cpuif_req_masked & (cpuif_addr == 32'h30); + decoded_reg_strb.hwpe_job_dep.marith2 = cpuif_req_masked & (cpuif_addr == 32'h34); + decoded_reg_strb.hwpe_job_dep.mopcnt = cpuif_req_masked & (cpuif_addr == 32'h38) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_job_indep.reserved = cpuif_req_masked & (cpuif_addr == 32'h3c) & !cpuif_req_is_wr; + decoded_err = (~is_valid_addr | is_invalid_rw) & decoded_req; + end + + // Pass down signals to next stage + assign decoded_req = cpuif_req_masked; + assign decoded_req_is_wr = cpuif_req_is_wr; + assign decoded_wr_data = cpuif_wr_data; + assign decoded_wr_biten = cpuif_wr_biten; + + //-------------------------------------------------------------------------- + // Field logic + //-------------------------------------------------------------------------- + typedef struct { + struct { + struct { + struct { + logic [1:0] next; + logic load_next; + } commit_trigger; + } commit_trigger; + struct { + struct { + logic [1:0] next; + logic load_next; + } soft_clear; + } soft_clear; + } hwpe_ctrl; + struct { + struct { + struct { + logic [15:0] next; + logic load_next; + } m_size; + struct { + logic [15:0] next; + logic load_next; + } k_size; + } mcnfig0; + struct { + struct { + logic [15:0] next; + logic load_next; + } n_size; + struct { + logic next; + logic load_next; + } receive_x; + struct { + logic next; + logic load_next; + } send_x; + struct { + logic next; + logic load_next; + } receive_w; + struct { + logic next; + logic load_next; + } send_w; + struct { + logic [2:0] next; + logic load_next; + } gemm_ops; + struct { + logic [1:0] next; + logic load_next; + } gemm_input_fmt; + struct { + logic [1:0] next; + logic load_next; + } gemm_output_fmt; + } mcnfig1; + struct { + struct { + logic [31:0] next; + logic load_next; + } y_offs; + } mcnfig2; + struct { + struct { + logic [31:0] next; + logic load_next; + } x_addr; + } marith0; + struct { + struct { + logic [31:0] next; + logic load_next; + } w_addr; + } marith1; + struct { + struct { + logic [31:0] next; + logic load_next; + } z_addr; + } marith2; + } hwpe_job_dep; + } field_combo_t; + field_combo_t field_combo; + + typedef struct { + struct { + struct { + struct { + logic [1:0] value; + } commit_trigger; + } commit_trigger; + struct { + struct { + logic [1:0] value; + } soft_clear; + } soft_clear; + } hwpe_ctrl; + struct { + struct { + struct { + logic [15:0] value; + } m_size; + struct { + logic [15:0] value; + } k_size; + } mcnfig0; + struct { + struct { + logic [15:0] value; + } n_size; + struct { + logic value; + } receive_x; + struct { + logic value; + } send_x; + struct { + logic value; + } receive_w; + struct { + logic value; + } send_w; + struct { + logic [2:0] value; + } gemm_ops; + struct { + logic [1:0] value; + } gemm_input_fmt; + struct { + logic [1:0] value; + } gemm_output_fmt; + } mcnfig1; + struct { + struct { + logic [31:0] value; + } y_offs; + } mcnfig2; + struct { + struct { + logic [31:0] value; + } x_addr; + } marith0; + struct { + struct { + logic [31:0] value; + } w_addr; + } marith1; + struct { + struct { + logic [31:0] value; + } z_addr; + } marith2; + } hwpe_job_dep; + } field_storage_t; + field_storage_t field_storage; + + // Field: redmule_regif.hwpe_ctrl.commit_trigger.commit_trigger + always_comb begin + automatic logic [1:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_ctrl.commit_trigger.commit_trigger.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_ctrl.commit_trigger && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_ctrl.commit_trigger.commit_trigger.value & ~decoded_wr_biten[1:0]) | (decoded_wr_data[1:0] & decoded_wr_biten[1:0]); + load_next_c = '1; + end + field_combo.hwpe_ctrl.commit_trigger.commit_trigger.next = next_c; + field_combo.hwpe_ctrl.commit_trigger.commit_trigger.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_ctrl.commit_trigger.commit_trigger.value <= 2'h0; + end else begin + if(field_combo.hwpe_ctrl.commit_trigger.commit_trigger.load_next) begin + field_storage.hwpe_ctrl.commit_trigger.commit_trigger.value <= field_combo.hwpe_ctrl.commit_trigger.commit_trigger.next; + end + end + end + assign hwif_out.hwpe_ctrl.commit_trigger.commit_trigger.value = field_storage.hwpe_ctrl.commit_trigger.commit_trigger.value; + assign hwif_out.hwpe_ctrl.commit_trigger.commit_trigger.swacc = decoded_reg_strb.hwpe_ctrl.commit_trigger; + assign hwif_out.hwpe_ctrl.commit_trigger.r0.value = 30'h0; + assign hwif_out.hwpe_ctrl.acquire.acquire.swacc = decoded_reg_strb.hwpe_ctrl.acquire; + assign hwif_out.hwpe_ctrl.reserved0.reserved.value = 32'h0; + assign hwif_out.hwpe_ctrl.running_job.r0.value = 24'h0; + // Field: redmule_regif.hwpe_ctrl.soft_clear.soft_clear + always_comb begin + automatic logic [1:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_ctrl.soft_clear.soft_clear.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_ctrl.soft_clear && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_ctrl.soft_clear.soft_clear.value & ~decoded_wr_biten[1:0]) | (decoded_wr_data[1:0] & decoded_wr_biten[1:0]); + load_next_c = '1; + end + field_combo.hwpe_ctrl.soft_clear.soft_clear.next = next_c; + field_combo.hwpe_ctrl.soft_clear.soft_clear.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_ctrl.soft_clear.soft_clear.value <= 2'h0; + end else begin + if(field_combo.hwpe_ctrl.soft_clear.soft_clear.load_next) begin + field_storage.hwpe_ctrl.soft_clear.soft_clear.value <= field_combo.hwpe_ctrl.soft_clear.soft_clear.next; + end + end + end + assign hwif_out.hwpe_ctrl.soft_clear.soft_clear.value = field_storage.hwpe_ctrl.soft_clear.soft_clear.value; + assign hwif_out.hwpe_ctrl.soft_clear.soft_clear.swacc = decoded_reg_strb.hwpe_ctrl.soft_clear; + assign hwif_out.hwpe_ctrl.soft_clear.r0.value = 30'h0; + assign hwif_out.hwpe_ctrl.reserved1.reserved.value = 32'h0; + assign hwif_out.hwpe_ctrl.reserved2.reserved.value = 32'h0; + // Field: redmule_regif.hwpe_job_dep.mcnfig0.m_size + always_comb begin + automatic logic [15:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig0.m_size.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig0 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig0.m_size.value & ~decoded_wr_biten[15:0]) | (decoded_wr_data[15:0] & decoded_wr_biten[15:0]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig0.m_size.next = next_c; + field_combo.hwpe_job_dep.mcnfig0.m_size.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig0.m_size.value <= 16'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig0.m_size.load_next) begin + field_storage.hwpe_job_dep.mcnfig0.m_size.value <= field_combo.hwpe_job_dep.mcnfig0.m_size.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig0.m_size.value = field_storage.hwpe_job_dep.mcnfig0.m_size.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig0.k_size + always_comb begin + automatic logic [15:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig0.k_size.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig0 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig0.k_size.value & ~decoded_wr_biten[31:16]) | (decoded_wr_data[31:16] & decoded_wr_biten[31:16]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig0.k_size.next = next_c; + field_combo.hwpe_job_dep.mcnfig0.k_size.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig0.k_size.value <= 16'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig0.k_size.load_next) begin + field_storage.hwpe_job_dep.mcnfig0.k_size.value <= field_combo.hwpe_job_dep.mcnfig0.k_size.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig0.k_size.value = field_storage.hwpe_job_dep.mcnfig0.k_size.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig1.n_size + always_comb begin + automatic logic [15:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig1.n_size.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig1.n_size.value & ~decoded_wr_biten[15:0]) | (decoded_wr_data[15:0] & decoded_wr_biten[15:0]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig1.n_size.next = next_c; + field_combo.hwpe_job_dep.mcnfig1.n_size.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig1.n_size.value <= 16'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig1.n_size.load_next) begin + field_storage.hwpe_job_dep.mcnfig1.n_size.value <= field_combo.hwpe_job_dep.mcnfig1.n_size.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig1.n_size.value = field_storage.hwpe_job_dep.mcnfig1.n_size.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig1.receive_x + always_comb begin + automatic logic [0:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig1.receive_x.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig1.receive_x.value & ~decoded_wr_biten[16:16]) | (decoded_wr_data[16:16] & decoded_wr_biten[16:16]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig1.receive_x.next = next_c; + field_combo.hwpe_job_dep.mcnfig1.receive_x.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig1.receive_x.value <= 1'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig1.receive_x.load_next) begin + field_storage.hwpe_job_dep.mcnfig1.receive_x.value <= field_combo.hwpe_job_dep.mcnfig1.receive_x.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig1.receive_x.value = field_storage.hwpe_job_dep.mcnfig1.receive_x.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig1.send_x + always_comb begin + automatic logic [0:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig1.send_x.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig1.send_x.value & ~decoded_wr_biten[17:17]) | (decoded_wr_data[17:17] & decoded_wr_biten[17:17]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig1.send_x.next = next_c; + field_combo.hwpe_job_dep.mcnfig1.send_x.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig1.send_x.value <= 1'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig1.send_x.load_next) begin + field_storage.hwpe_job_dep.mcnfig1.send_x.value <= field_combo.hwpe_job_dep.mcnfig1.send_x.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig1.send_x.value = field_storage.hwpe_job_dep.mcnfig1.send_x.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig1.receive_w + always_comb begin + automatic logic [0:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig1.receive_w.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig1.receive_w.value & ~decoded_wr_biten[18:18]) | (decoded_wr_data[18:18] & decoded_wr_biten[18:18]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig1.receive_w.next = next_c; + field_combo.hwpe_job_dep.mcnfig1.receive_w.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig1.receive_w.value <= 1'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig1.receive_w.load_next) begin + field_storage.hwpe_job_dep.mcnfig1.receive_w.value <= field_combo.hwpe_job_dep.mcnfig1.receive_w.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig1.receive_w.value = field_storage.hwpe_job_dep.mcnfig1.receive_w.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig1.send_w + always_comb begin + automatic logic [0:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig1.send_w.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig1.send_w.value & ~decoded_wr_biten[19:19]) | (decoded_wr_data[19:19] & decoded_wr_biten[19:19]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig1.send_w.next = next_c; + field_combo.hwpe_job_dep.mcnfig1.send_w.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig1.send_w.value <= 1'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig1.send_w.load_next) begin + field_storage.hwpe_job_dep.mcnfig1.send_w.value <= field_combo.hwpe_job_dep.mcnfig1.send_w.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig1.send_w.value = field_storage.hwpe_job_dep.mcnfig1.send_w.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig1.gemm_ops + always_comb begin + automatic logic [2:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig1.gemm_ops.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig1.gemm_ops.value & ~decoded_wr_biten[22:20]) | (decoded_wr_data[22:20] & decoded_wr_biten[22:20]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig1.gemm_ops.next = next_c; + field_combo.hwpe_job_dep.mcnfig1.gemm_ops.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig1.gemm_ops.value <= 3'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig1.gemm_ops.load_next) begin + field_storage.hwpe_job_dep.mcnfig1.gemm_ops.value <= field_combo.hwpe_job_dep.mcnfig1.gemm_ops.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig1.gemm_ops.value = field_storage.hwpe_job_dep.mcnfig1.gemm_ops.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig1.gemm_input_fmt + always_comb begin + automatic logic [1:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig1.gemm_input_fmt.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig1.gemm_input_fmt.value & ~decoded_wr_biten[24:23]) | (decoded_wr_data[24:23] & decoded_wr_biten[24:23]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig1.gemm_input_fmt.next = next_c; + field_combo.hwpe_job_dep.mcnfig1.gemm_input_fmt.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig1.gemm_input_fmt.value <= 2'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig1.gemm_input_fmt.load_next) begin + field_storage.hwpe_job_dep.mcnfig1.gemm_input_fmt.value <= field_combo.hwpe_job_dep.mcnfig1.gemm_input_fmt.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig1.gemm_input_fmt.value = field_storage.hwpe_job_dep.mcnfig1.gemm_input_fmt.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig1.gemm_output_fmt + always_comb begin + automatic logic [1:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig1.gemm_output_fmt.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig1.gemm_output_fmt.value & ~decoded_wr_biten[26:25]) | (decoded_wr_data[26:25] & decoded_wr_biten[26:25]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig1.gemm_output_fmt.next = next_c; + field_combo.hwpe_job_dep.mcnfig1.gemm_output_fmt.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig1.gemm_output_fmt.value <= 2'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig1.gemm_output_fmt.load_next) begin + field_storage.hwpe_job_dep.mcnfig1.gemm_output_fmt.value <= field_combo.hwpe_job_dep.mcnfig1.gemm_output_fmt.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig1.gemm_output_fmt.value = field_storage.hwpe_job_dep.mcnfig1.gemm_output_fmt.value; + // Field: redmule_regif.hwpe_job_dep.mcnfig2.y_offs + always_comb begin + automatic logic [31:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.mcnfig2.y_offs.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.mcnfig2 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.mcnfig2.y_offs.value & ~decoded_wr_biten[31:0]) | (decoded_wr_data[31:0] & decoded_wr_biten[31:0]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.mcnfig2.y_offs.next = next_c; + field_combo.hwpe_job_dep.mcnfig2.y_offs.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.mcnfig2.y_offs.value <= 32'h0; + end else begin + if(field_combo.hwpe_job_dep.mcnfig2.y_offs.load_next) begin + field_storage.hwpe_job_dep.mcnfig2.y_offs.value <= field_combo.hwpe_job_dep.mcnfig2.y_offs.next; + end + end + end + assign hwif_out.hwpe_job_dep.mcnfig2.y_offs.value = field_storage.hwpe_job_dep.mcnfig2.y_offs.value; + // Field: redmule_regif.hwpe_job_dep.marith0.x_addr + always_comb begin + automatic logic [31:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.marith0.x_addr.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.marith0 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.marith0.x_addr.value & ~decoded_wr_biten[31:0]) | (decoded_wr_data[31:0] & decoded_wr_biten[31:0]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.marith0.x_addr.next = next_c; + field_combo.hwpe_job_dep.marith0.x_addr.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.marith0.x_addr.value <= 32'h0; + end else begin + if(field_combo.hwpe_job_dep.marith0.x_addr.load_next) begin + field_storage.hwpe_job_dep.marith0.x_addr.value <= field_combo.hwpe_job_dep.marith0.x_addr.next; + end + end + end + assign hwif_out.hwpe_job_dep.marith0.x_addr.value = field_storage.hwpe_job_dep.marith0.x_addr.value; + // Field: redmule_regif.hwpe_job_dep.marith1.w_addr + always_comb begin + automatic logic [31:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.marith1.w_addr.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.marith1 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.marith1.w_addr.value & ~decoded_wr_biten[31:0]) | (decoded_wr_data[31:0] & decoded_wr_biten[31:0]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.marith1.w_addr.next = next_c; + field_combo.hwpe_job_dep.marith1.w_addr.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.marith1.w_addr.value <= 32'h0; + end else begin + if(field_combo.hwpe_job_dep.marith1.w_addr.load_next) begin + field_storage.hwpe_job_dep.marith1.w_addr.value <= field_combo.hwpe_job_dep.marith1.w_addr.next; + end + end + end + assign hwif_out.hwpe_job_dep.marith1.w_addr.value = field_storage.hwpe_job_dep.marith1.w_addr.value; + // Field: redmule_regif.hwpe_job_dep.marith2.z_addr + always_comb begin + automatic logic [31:0] next_c; + automatic logic load_next_c; + next_c = field_storage.hwpe_job_dep.marith2.z_addr.value; + load_next_c = '0; + if(decoded_reg_strb.hwpe_job_dep.marith2 && decoded_req_is_wr) begin // SW write + next_c = (field_storage.hwpe_job_dep.marith2.z_addr.value & ~decoded_wr_biten[31:0]) | (decoded_wr_data[31:0] & decoded_wr_biten[31:0]); + load_next_c = '1; + end + field_combo.hwpe_job_dep.marith2.z_addr.next = next_c; + field_combo.hwpe_job_dep.marith2.z_addr.load_next = load_next_c; + end + always_ff @(posedge clk or negedge arst_n) begin + if(~arst_n) begin + field_storage.hwpe_job_dep.marith2.z_addr.value <= 32'h0; + end else begin + if(field_combo.hwpe_job_dep.marith2.z_addr.load_next) begin + field_storage.hwpe_job_dep.marith2.z_addr.value <= field_combo.hwpe_job_dep.marith2.z_addr.next; + end + end + end + assign hwif_out.hwpe_job_dep.marith2.z_addr.value = field_storage.hwpe_job_dep.marith2.z_addr.value; + assign hwif_out.hwpe_job_indep.reserved.reserved.value = 32'h0; + + //-------------------------------------------------------------------------- + // Write response + //-------------------------------------------------------------------------- + assign cpuif_wr_ack = decoded_req & decoded_req_is_wr; + // Writes are always granted with no error response + assign cpuif_wr_err = '0; + + //-------------------------------------------------------------------------- + // Readback + //-------------------------------------------------------------------------- + + logic readback_err; + logic readback_done; + logic [31:0] readback_data; + + // Assign readback values to a flattened array + logic [31:0] readback_array[16]; + assign readback_array[0][1:0] = '0; + assign readback_array[0][31:2] = (decoded_reg_strb.hwpe_ctrl.commit_trigger && !decoded_req_is_wr) ? 30'h0 : '0; + assign readback_array[1][31:0] = (decoded_reg_strb.hwpe_ctrl.acquire && !decoded_req_is_wr) ? hwif_in.hwpe_ctrl.acquire.acquire.next : '0; + assign readback_array[2][31:0] = (decoded_reg_strb.hwpe_ctrl.reserved0 && !decoded_req_is_wr) ? 32'h0 : '0; + assign readback_array[3][31:0] = (decoded_reg_strb.hwpe_ctrl.status && !decoded_req_is_wr) ? hwif_in.hwpe_ctrl.status.status0.next : '0; + assign readback_array[4][7:0] = (decoded_reg_strb.hwpe_ctrl.running_job && !decoded_req_is_wr) ? hwif_in.hwpe_ctrl.running_job.running_job.next : '0; + assign readback_array[4][31:8] = (decoded_reg_strb.hwpe_ctrl.running_job && !decoded_req_is_wr) ? 24'h0 : '0; + assign readback_array[5][1:0] = '0; + assign readback_array[5][31:2] = (decoded_reg_strb.hwpe_ctrl.soft_clear && !decoded_req_is_wr) ? 30'h0 : '0; + assign readback_array[6][31:0] = (decoded_reg_strb.hwpe_ctrl.reserved1 && !decoded_req_is_wr) ? 32'h0 : '0; + assign readback_array[7][31:0] = (decoded_reg_strb.hwpe_ctrl.reserved2 && !decoded_req_is_wr) ? 32'h0 : '0; + assign readback_array[8][15:0] = (decoded_reg_strb.hwpe_job_dep.mcnfig0 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig0.m_size.value : '0; + assign readback_array[8][31:16] = (decoded_reg_strb.hwpe_job_dep.mcnfig0 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig0.k_size.value : '0; + assign readback_array[9][15:0] = (decoded_reg_strb.hwpe_job_dep.mcnfig1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig1.n_size.value : '0; + assign readback_array[9][16:16] = (decoded_reg_strb.hwpe_job_dep.mcnfig1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig1.receive_x.value : '0; + assign readback_array[9][17:17] = (decoded_reg_strb.hwpe_job_dep.mcnfig1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig1.send_x.value : '0; + assign readback_array[9][18:18] = (decoded_reg_strb.hwpe_job_dep.mcnfig1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig1.receive_w.value : '0; + assign readback_array[9][19:19] = (decoded_reg_strb.hwpe_job_dep.mcnfig1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig1.send_w.value : '0; + assign readback_array[9][22:20] = (decoded_reg_strb.hwpe_job_dep.mcnfig1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig1.gemm_ops.value : '0; + assign readback_array[9][24:23] = (decoded_reg_strb.hwpe_job_dep.mcnfig1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig1.gemm_input_fmt.value : '0; + assign readback_array[9][26:25] = (decoded_reg_strb.hwpe_job_dep.mcnfig1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig1.gemm_output_fmt.value : '0; + assign readback_array[9][31:27] = '0; + assign readback_array[10][31:0] = (decoded_reg_strb.hwpe_job_dep.mcnfig2 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.mcnfig2.y_offs.value : '0; + assign readback_array[11][31:0] = (decoded_reg_strb.hwpe_job_dep.marith0 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.marith0.x_addr.value : '0; + assign readback_array[12][31:0] = (decoded_reg_strb.hwpe_job_dep.marith1 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.marith1.w_addr.value : '0; + assign readback_array[13][31:0] = (decoded_reg_strb.hwpe_job_dep.marith2 && !decoded_req_is_wr) ? field_storage.hwpe_job_dep.marith2.z_addr.value : '0; + assign readback_array[14][31:0] = (decoded_reg_strb.hwpe_job_dep.mopcnt && !decoded_req_is_wr) ? hwif_in.hwpe_job_dep.mopcnt.op_id_cnt.next : '0; + assign readback_array[15][31:0] = (decoded_reg_strb.hwpe_job_indep.reserved && !decoded_req_is_wr) ? 32'h0 : '0; + + // Reduce the array + always_comb begin + automatic logic [31:0] readback_data_var; + readback_done = decoded_req & ~decoded_req_is_wr; + readback_err = '0; + readback_data_var = '0; + for(int i=0; i<16; i++) readback_data_var |= readback_array[i]; + readback_data = readback_data_var; + end + + assign cpuif_rd_ack = readback_done; + assign cpuif_rd_data = readback_data; + assign cpuif_rd_err = readback_err; +endmodule diff --git a/rtl/ctrl/regif/redmule_regif_pkg.sv b/rtl/ctrl/regif/redmule_regif_pkg.sv new file mode 100644 index 0000000..4172fa6 --- /dev/null +++ b/rtl/ctrl/regif/redmule_regif_pkg.sv @@ -0,0 +1,243 @@ +// Generated by PeakRDL-regblock - A free and open-source SystemVerilog generator +// https://github.com/SystemRDL/PeakRDL-regblock + +package redmule_regif_pkg; + + localparam REDMULE_REGIF_DATA_WIDTH = 32; + localparam REDMULE_REGIF_MIN_ADDR_WIDTH = 32; + localparam REDMULE_REGIF_SIZE = 'h40; + + typedef struct { + logic [31:0] next; + } redmule_regif__hwpe_acquire__acquire__in_t; + + typedef struct { + redmule_regif__hwpe_acquire__acquire__in_t acquire; + } redmule_regif__hwpe_acquire__in_t; + + typedef struct { + logic [31:0] next; + } redmule_regif__hwpe_status__status0__in_t; + + typedef struct { + redmule_regif__hwpe_status__status0__in_t status0; + } redmule_regif__hwpe_status__in_t; + + typedef struct { + logic [7:0] next; + } redmule_regif__hwpe_running_job__running_job__in_t; + + typedef struct { + redmule_regif__hwpe_running_job__running_job__in_t running_job; + } redmule_regif__hwpe_running_job__in_t; + + typedef struct { + redmule_regif__hwpe_acquire__in_t acquire; + redmule_regif__hwpe_status__in_t status; + redmule_regif__hwpe_running_job__in_t running_job; + } redmule_regif__hwpe_ctrl_mandatory__in_t; + + typedef struct { + logic [31:0] next; + } redmule_regif__mopcnt__op_id_cnt__in_t; + + typedef struct { + redmule_regif__mopcnt__op_id_cnt__in_t op_id_cnt; + } redmule_regif__mopcnt__in_t; + + typedef struct { + redmule_regif__mopcnt__in_t mopcnt; + } redmule_regif__hwpe_ctrl_job_dep__in_t; + + typedef struct { + redmule_regif__hwpe_ctrl_mandatory__in_t hwpe_ctrl; + redmule_regif__hwpe_ctrl_job_dep__in_t hwpe_job_dep; + } redmule_regif__in_t; + + typedef struct { + logic [1:0] value; + logic swacc; + } redmule_regif__hwpe_commit_trigger__commit_trigger__out_t; + + typedef struct { + logic [29:0] value; + } redmule_regif__hwpe_commit_trigger__r0__out_t; + + typedef struct { + redmule_regif__hwpe_commit_trigger__commit_trigger__out_t commit_trigger; + redmule_regif__hwpe_commit_trigger__r0__out_t r0; + } redmule_regif__hwpe_commit_trigger__out_t; + + typedef struct { + logic swacc; + } redmule_regif__hwpe_acquire__acquire__out_t; + + typedef struct { + redmule_regif__hwpe_acquire__acquire__out_t acquire; + } redmule_regif__hwpe_acquire__out_t; + + typedef struct { + logic [31:0] value; + } redmule_regif__hwpe_reserved__reserved__out_t; + + typedef struct { + redmule_regif__hwpe_reserved__reserved__out_t reserved; + } redmule_regif__hwpe_reserved__out_t; + + typedef struct { + logic [23:0] value; + } redmule_regif__hwpe_running_job__r0__out_t; + + typedef struct { + redmule_regif__hwpe_running_job__r0__out_t r0; + } redmule_regif__hwpe_running_job__out_t; + + typedef struct { + logic [1:0] value; + logic swacc; + } redmule_regif__hwpe_soft_clear__soft_clear__out_t; + + typedef struct { + logic [29:0] value; + } redmule_regif__hwpe_soft_clear__r0__out_t; + + typedef struct { + redmule_regif__hwpe_soft_clear__soft_clear__out_t soft_clear; + redmule_regif__hwpe_soft_clear__r0__out_t r0; + } redmule_regif__hwpe_soft_clear__out_t; + + typedef struct { + redmule_regif__hwpe_commit_trigger__out_t commit_trigger; + redmule_regif__hwpe_acquire__out_t acquire; + redmule_regif__hwpe_reserved__out_t reserved0; + redmule_regif__hwpe_running_job__out_t running_job; + redmule_regif__hwpe_soft_clear__out_t soft_clear; + redmule_regif__hwpe_reserved__out_t reserved1; + redmule_regif__hwpe_reserved__out_t reserved2; + } redmule_regif__hwpe_ctrl_mandatory__out_t; + + typedef struct { + logic [15:0] value; + } redmule_regif__mcnfig0__m_size__out_t; + + typedef struct { + logic [15:0] value; + } redmule_regif__mcnfig0__k_size__out_t; + + typedef struct { + redmule_regif__mcnfig0__m_size__out_t m_size; + redmule_regif__mcnfig0__k_size__out_t k_size; + } redmule_regif__mcnfig0__out_t; + + typedef struct { + logic [15:0] value; + } redmule_regif__mcnfig1__n_size__out_t; + + typedef struct { + logic value; + } redmule_regif__mcnfig1__receive_x__out_t; + + typedef struct { + logic value; + } redmule_regif__mcnfig1__send_x__out_t; + + typedef struct { + logic value; + } redmule_regif__mcnfig1__receive_w__out_t; + + typedef struct { + logic value; + } redmule_regif__mcnfig1__send_w__out_t; + + typedef struct { + logic [2:0] value; + } redmule_regif__mcnfig1__gemm_ops__out_t; + + typedef struct { + logic [1:0] value; + } redmule_regif__mcnfig1__gemm_input_fmt__out_t; + + typedef struct { + logic [1:0] value; + } redmule_regif__mcnfig1__gemm_output_fmt__out_t; + + typedef struct { + redmule_regif__mcnfig1__n_size__out_t n_size; + redmule_regif__mcnfig1__receive_x__out_t receive_x; + redmule_regif__mcnfig1__send_x__out_t send_x; + redmule_regif__mcnfig1__receive_w__out_t receive_w; + redmule_regif__mcnfig1__send_w__out_t send_w; + redmule_regif__mcnfig1__gemm_ops__out_t gemm_ops; + redmule_regif__mcnfig1__gemm_input_fmt__out_t gemm_input_fmt; + redmule_regif__mcnfig1__gemm_output_fmt__out_t gemm_output_fmt; + } redmule_regif__mcnfig1__out_t; + + typedef struct { + logic [31:0] value; + } redmule_regif__mcnfig2__y_offs__out_t; + + typedef struct { + redmule_regif__mcnfig2__y_offs__out_t y_offs; + } redmule_regif__mcnfig2__out_t; + + typedef struct { + logic [31:0] value; + } redmule_regif__marith0__x_addr__out_t; + + typedef struct { + redmule_regif__marith0__x_addr__out_t x_addr; + } redmule_regif__marith0__out_t; + + typedef struct { + logic [31:0] value; + } redmule_regif__marith1__w_addr__out_t; + + typedef struct { + redmule_regif__marith1__w_addr__out_t w_addr; + } redmule_regif__marith1__out_t; + + typedef struct { + logic [31:0] value; + } redmule_regif__marith2__z_addr__out_t; + + typedef struct { + redmule_regif__marith2__z_addr__out_t z_addr; + } redmule_regif__marith2__out_t; + + typedef struct { + redmule_regif__mcnfig0__out_t mcnfig0; + redmule_regif__mcnfig1__out_t mcnfig1; + redmule_regif__mcnfig2__out_t mcnfig2; + redmule_regif__marith0__out_t marith0; + redmule_regif__marith1__out_t marith1; + redmule_regif__marith2__out_t marith2; + } redmule_regif__hwpe_ctrl_job_dep__out_t; + + typedef struct { + redmule_regif__hwpe_reserved__out_t reserved; + } redmule_regif__hwpe_ctrl_job_indep__out_t; + + typedef struct { + redmule_regif__hwpe_ctrl_mandatory__out_t hwpe_ctrl; + redmule_regif__hwpe_ctrl_job_dep__out_t hwpe_job_dep; + redmule_regif__hwpe_ctrl_job_indep__out_t hwpe_job_indep; + } redmule_regif__out_t; + + typedef enum logic [2:0] { + gemm_op_e__MATMUL = 'h0, + gemm_op_e__GEMM = 'h1, + gemm_op_e__ADDMAX = 'h2, + gemm_op_e__ADDMIN = 'h3, + gemm_op_e__MULMAX = 'h4, + gemm_op_e__MULMIN = 'h5, + gemm_op_e__MAXMIN = 'h6, + gemm_op_e__MINMAX = 'h7 + } gemm_op_e_e; + + typedef enum logic [1:0] { + gemm_fmt_e__Float8 = 'h0, + gemm_fmt_e__Float16 = 'h1, + gemm_fmt_e__Float8Alt = 'h2, + gemm_fmt_e__Float16Alt = 'h3 + } gemm_fmt_e_e; +endpackage From e8aa5cd915746d9a54977e35076969810d061cb4 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 24 Dec 2025 14:21:55 +0100 Subject: [PATCH 03/25] Add (draft, unchecked, not even compiled!) register-intf target --- rtl/ctrl/redmule_target_decoder.sv | 200 +++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 rtl/ctrl/redmule_target_decoder.sv diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv new file mode 100644 index 0000000..a725a5d --- /dev/null +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -0,0 +1,200 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Francesco Conti +// + +// +// This file contains a memory-mapped target interface for RedMulE. +// + +module redmule_target_decoder + import redmule_pkg::*; + import redmule_regif_pkg::*; +#( + parameter int unsigned InstFifoDepth = 4, + parameter int unsigned OpIdWidth = 4 +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, + input logic config_ready_i, + input logic op_done_i, + output logic config_valid_o, + output redmule_config_t config_o, + // periph slave port + hwpe_ctrl_intf_periph.slave periph +); + + // target signals + logic job_trigger; + logic job_done, job_done_q; + logic [31:0] job_status; + redmule_regif__hwpe_ctrl_job_indep__out_t job_indep_regs; + logic job_dep_regs_valid; + redmule_regif__hwpe_ctrl_job_dep__out_t job_dep_regs; + + // OBI plug target <-> regif + logic target_obi_req; + logic target_obi_gnt; + logic [31:0] target_obi_addr; + logic target_obi_we; + logic [3:0] target_obi_be; + logic [31:0] target_obi_wdata; + logic [ID_WIDTH-1:0] target_obi_aid; + logic target_obi_rvalid; + logic target_obi_rready; + logic [31:0] target_obi_rdata; + logic target_obi_err; + logic [ID_WIDTH-1:0] target_obi_rid; + + redmule_regif__in_t hwif_in; + redmule_regif__out_t hwif_out; + + /* HWPE controller target port */ + hwpe_ctrl_target #( + .NB_CONTEXT ( 2 ), + .ID_WIDTH ( ID ), + .ADDR_WIDTH ( 10 ), + .hwpe_ctrl_regif_in_t ( redmule_regif__in_t ), + .hwpe_ctrl_regif_out_t ( redmule_regif__out_t ), + .hwpe_ctrl_job_indep_t ( redmule_regif__hwpe_ctrl_job_indep__out_t ), + .hwpe_ctrl_job_dep_t ( redmule_regif__hwpe_ctrl_job_dep__out_t ) + ) i_target ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_o ( clear_o ), + .target ( periph ), + .job_trigger_o ( job_trigger ), + .job_done_i ( job_done ), + .job_status_i ( job_status ), + .job_indep_regs_o ( job_indep_regs ), + .job_dep_regs_valid_o ( job_dep_regs_valid ), + .job_dep_regs_o ( job_dep_regs ), + .target_obi_req_o ( target_obi_req ), + .target_obi_gnt_i ( target_obi_gnt ), + .target_obi_addr_o ( target_obi_addr ), + .target_obi_we_o ( target_obi_we ), + .target_obi_be_o ( target_obi_be ), + .target_obi_wdata_o ( target_obi_wdata ), + .target_obi_aid_o ( target_obi_aid ), + .target_obi_rvalid_i ( target_obi_rvalid ), + .target_obi_rready_o ( target_obi_rready ), + .target_obi_rdata_i ( target_obi_rdata ), + .target_obi_err_i ( target_obi_err ), + .target_obi_rid_i ( target_obi_rid ), + .hwif_in ( hwif_in ), + .hwif_out ( hwif_out ) + ); + + /* RedMulE SystemRDL-generated register interface */ + redmule_regif #( + .ID_WIDTH ( ID_WIDTH ) + ) i_regif ( + .clk ( clk_i ), + .arst_n ( rst_ni ), + .s_obi_req ( target_obi_req ), + .s_obi_gnt ( target_obi_gnt ), + .s_obi_addr ( target_obi_addr ), + .s_obi_we ( target_obi_we ), + .s_obi_be ( target_obi_be ), + .s_obi_wdata ( target_obi_wdata ), + .s_obi_aid ( target_obi_aid ), + .s_obi_rvalid ( target_obi_rvalid ), + .s_obi_rready ( target_obi_rready ), + .s_obi_rdata ( target_obi_rdata ), + .s_obi_err ( target_obi_err ), + .s_obi_rid ( target_obi_rid ), + .hwif_in ( hwif_in ), + .hwif_out ( hwif_out ) + ); + + assign job_done = op_done_i; + + logic config_valid_q; + always_ff @(posedge clk_i or negedge rst_ni) begin : config_valid_ff + if(~rst_ni) begin + config_valid_q <= '0; + end else begin + if(clear_i) begin // TODO: connect target-generated clear as well! + config_valid_q <= '0; + end + else if(job_trigger) begin + config_valid_q <= 1'b1; + end + else if(op_done_i) begin + config_valid_q <= '0; + end + end + end + assign config_valid_o = config_valid_q; + + always_ff @(posedge clk_i or negedge rst_ni) begin : job_status_trigger + if(~rst_ni) begin + job_status <= '0; + end else begin + if(clear_i) begin // TODO: connect target-generated clear as well! + job_status <= '0; + end + else if((config_valid_q | job_trigger) & config_ready_i) begin + job_status <= 32'h1; + end + else if(op_done_i) begin + job_status <= '0; + end + end + end + + // Decode instruction and extract configuration parameters from register file values + assign config_o.m_size = hwif_out.hwpe_job_dep.mcnfig0.m_size.value; + assign config_o.n_size = hwif_out.hwpe_job_dep.mcnfig1.n_size.value; + assign config_o.k_size = hwif_out.hwpe_job_dep.mcnfig0.k_size.value; + assign config_o.receive_x = hwif_out.hwpe_job_dep.mcnfig1.send_x.value; + assign config_o.send_x = hwif_out.hwpe_job_dep.mcnfig1.send_x.value; + assign config_o.receive_w = hwif_out.hwpe_job_dep.mcnfig1.receive_w.value; + assign config_o.send_w = hwif_out.hwpe_job_dep.mcnfig1.send_w.value; + assign config_o.y_offs = hwif_out.hwpe_job_dep.mcnfig2.y_offs.value; + assign config_o.x_addr = hwif_out.hwpe_job_dep.marith0.x_addr.value; + assign config_o.w_addr = hwif_out.hwpe_job_dep.marith1.w_addr.value; + assign config_o.z_addr = hwif_out.hwpe_job_dep.marith2.z_addr.value; + assign config_o.gemm_ops = redmule_gemm_ops_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_ops.value); + assign config_o.gemm_input_fmt = redmule_gemm_fmt_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_input_fmt.value); + assign config_o.gemm_output_fmt = redmule_gemm_fmt_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_output_fmt.value); + + // Operation ID counter: + // op_id_counter_in_q: Increments when operations are issued (tags for tracking) + // op_id_counter_out_q: Increments when operations complete (for MOPCNT instruction) + logic [OpIdWidth-1:0] op_id_counter_in_q, op_id_counter_out_q; + + // Input counter: increments when MARITH instruction is issued + // Returns this value to CPU as operation ID for software tracking + always_ff @(posedge clk_i or negedge rst_ni) begin : op_id_counter_in + if (~rst_ni) begin + op_id_counter_in_q <= 0; + end else begin + if (clear_i) begin + op_id_counter_in_q <= 0; + end else if (job_trigger) begin + op_id_counter_in_q <= op_id_counter_in_q + 1; + end + end + end + + // Output counter: increments when any operation completes + // Returns this value for MOPCNT instruction to check completion status + // Initialized to all 1's to detect first completion (wraps to 0) + always_ff @(posedge clk_i or negedge rst_ni) begin : op_id_counter_out + if (~rst_ni) begin + op_id_counter_out_q <= '1; + end else begin + if (clear_i) begin + op_id_counter_out_q <= '1; + end else if (op_done_i) begin + op_id_counter_out_q <= op_id_counter_out_q + 1; + end + end + end + assign hwif_in.hwpe_job_dep.mopcnt.op_id_cnt.next = op_id_counter_out_q; + +endmodule: redmule_target_decoder From 871293f62de2b86172a49b26a417e0748ea939a1 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 24 Dec 2025 14:48:06 +0100 Subject: [PATCH 04/25] Draft (uncompiled, untested) integration of redmule_target_decoder into redmule_top Also, propagate target_clear to the controller when using HWPE_TARGET interface (XIF currently does not provide a software-based soft clear mechanism, which is a significant liability...) --- rtl/ctrl/redmule_target_decoder.sv | 20 ++--- rtl/redmule_ctrl.sv | 3 +- rtl/redmule_pkg.sv | 2 + rtl/redmule_top.sv | 125 ++++++++++++++++++----------- 4 files changed, 93 insertions(+), 57 deletions(-) diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index a725a5d..87e7341 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -13,18 +13,18 @@ module redmule_target_decoder import redmule_pkg::*; import redmule_regif_pkg::*; #( - parameter int unsigned InstFifoDepth = 4, parameter int unsigned OpIdWidth = 4 )( input logic clk_i, input logic rst_ni, - input logic clear_i, + input logic clear_i | target_clear_o, + output logic regif_clear_o, input logic config_ready_i, input logic op_done_i, output logic config_valid_o, output redmule_config_t config_o, - // periph slave port - hwpe_ctrl_intf_periph.slave periph + // target port + hwpe_ctrl_intf_target.slave target ); // target signals @@ -64,8 +64,8 @@ module redmule_target_decoder ) i_target ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), - .clear_o ( clear_o ), - .target ( periph ), + .clear_o ( regif_clear_o ), + .target ( target ), .job_trigger_o ( job_trigger ), .job_done_i ( job_done ), .job_status_i ( job_status ), @@ -117,7 +117,7 @@ module redmule_target_decoder if(~rst_ni) begin config_valid_q <= '0; end else begin - if(clear_i) begin // TODO: connect target-generated clear as well! + if(clear_i | target_clear_o) begin // TODO: connect target-generated clear as well! config_valid_q <= '0; end else if(job_trigger) begin @@ -134,7 +134,7 @@ module redmule_target_decoder if(~rst_ni) begin job_status <= '0; end else begin - if(clear_i) begin // TODO: connect target-generated clear as well! + if(clear_i | target_clear_o) begin // TODO: connect target-generated clear as well! job_status <= '0; end else if((config_valid_q | job_trigger) & config_ready_i) begin @@ -173,7 +173,7 @@ module redmule_target_decoder if (~rst_ni) begin op_id_counter_in_q <= 0; end else begin - if (clear_i) begin + if (clear_i | target_clear_o) begin op_id_counter_in_q <= 0; end else if (job_trigger) begin op_id_counter_in_q <= op_id_counter_in_q + 1; @@ -188,7 +188,7 @@ module redmule_target_decoder if (~rst_ni) begin op_id_counter_out_q <= '1; end else begin - if (clear_i) begin + if (clear_i | target_clear_o) begin op_id_counter_out_q <= '1; end else if (op_done_i) begin op_id_counter_out_q <= op_id_counter_out_q + 1; diff --git a/rtl/redmule_ctrl.sv b/rtl/redmule_ctrl.sv index 8268939..81acc10 100644 --- a/rtl/redmule_ctrl.sv +++ b/rtl/redmule_ctrl.sv @@ -21,6 +21,7 @@ module redmule_ctrl input logic rst_ni , input logic test_mode_i , output logic busy_o , + input logic target_clear_i , output logic clear_o , output logic evt_o , input redmule_config_t config_i , @@ -156,6 +157,6 @@ module redmule_ctrl /* Other combinational assigmnets */ /*---------------------------------------------------------------------------------------------*/ assign evt_o = flgs_streamer_i.z_stream_sink_flags.done; - assign clear_o = latch_clear || current == REDMULE_FINISHED; + assign clear_o = target_clear_i || latch_clear || current == REDMULE_FINISHED; endmodule : redmule_ctrl diff --git a/rtl/redmule_pkg.sv b/rtl/redmule_pkg.sv index ddb8e2a..f55469e 100644 --- a/rtl/redmule_pkg.sv +++ b/rtl/redmule_pkg.sv @@ -22,6 +22,8 @@ package redmule_pkg; parameter int unsigned WsourceStreamId = 1; parameter int unsigned YsourceStreamId = 2; + typedef enum logic { HWPE_TARGET, XIF } ctrl_intf_e; + typedef enum logic { LD_IN_FMP, LD_WEIGHT } source_sel_e; typedef enum logic { LOAD, STORE } ld_st_sel_e; diff --git a/rtl/redmule_top.sv b/rtl/redmule_top.sv index 52b2e9b..609a81d 100644 --- a/rtl/redmule_top.sv +++ b/rtl/redmule_top.sv @@ -24,7 +24,9 @@ module redmule_top parameter bit LatchBuffers = 0, parameter fpnew_pkg::fmt_logic_t FpFmtConfig = 6'b001101, parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000, - // Custom instrunctions + // Choose interface + parameter ctrl_intf_e CtrlIntfConfig = XIF; + // Custom instructions parameter logic [6:0] McnfigOpCode = 7'b0001011, parameter logic [6:0] MarithOpCode = 7'b0001011, parameter logic [6:0] MopcntOpCode = 7'b0001011, @@ -59,7 +61,7 @@ module redmule_top hwpe_stream_intf_stream.source w_stream_o , // Broadcasted X stream hwpe_stream_intf_stream.source x_stream_o , - // XIF ports + // XIF ports (unused if CtrlIntfConfig = HWPE_TARGET) input x_issue_req_t x_issue_req_i, output x_issue_resp_t x_issue_resp_o, input logic x_issue_valid_i, @@ -76,7 +78,9 @@ module redmule_top output logic sync_o, input logic sync_i, // TCDM master ports for the memory side - hci_core_intf.initiator tcdm + hci_core_intf.initiator tcdm, + // HWPE-ctrl target port (unused if CtrlIntfConfig = XIF) + hwpe_ctrl_intf_target.slave target ); localparam int unsigned FpWidth = fp_width(FpFormat); @@ -86,6 +90,7 @@ logic clk_acc; logic fsm_z_clk_en, ctrl_z_clk_en; logic enable, clear; +logic target_clear; logic y_buffer_depth_count, y_buffer_load, z_buffer_fill, @@ -498,7 +503,7 @@ redmule_memory_scheduler #( ) i_memory_scheduler ( .clk_i ( clk_acc ), .rst_ni ( rst_ni ), - .clear_i ( '0 ), + .clear_i ( target_clear ), .z_priority_i ( z_priority ), .config_i ( redmule_config ), .config_valid_i ( cfg_complete ), @@ -512,52 +517,79 @@ redmule_memory_scheduler #( ); /*---------------------------------------------------------------*/ -/* | Instruction Decoder | */ +/* | Instruction Decoder (XIF) or Target Decoder (HWPE_TARGET) | */ /*---------------------------------------------------------------*/ logic tiler_busy; redmule_config_t dec_config_q; -redmule_inst_decoder #( - .InstFifoDepth ( 4 ), - .McnfigOpCode ( McnfigOpCode ), - .MarithOpCode ( MarithOpCode ), - .MopcntOpCode ( MopcntOpCode ), - .McnfigFunct3 ( McnfigFunct3 ), - .MarithFunct3 ( MarithFunct3 ), - .MopcntFunct3 ( MopcntFunct3 ), - .McnfigFunct2 ( McnfigFunct2 ), - .MarithFunct2 ( MarithFunct2 ), - .MopcntFunct2 ( MopcntFunct2 ), - .XifIdWidth ( XifIdWidth ), - .XifNumHarts ( XifNumHarts ), - .XifIssueRegisterSplit ( XifIssueRegisterSplit ), - .x_issue_req_t ( x_issue_req_t ), - .x_issue_resp_t ( x_issue_resp_t ), - .x_register_t ( x_register_t ), - .x_commit_t ( x_commit_t ), - .x_result_t ( x_result_t ) -) i_inst_decoder ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .clear_i ( '0 ), - .config_ready_i ( ~config_fifo_full ), - .op_done_i ( flgs_streamer.z_stream_sink_flags.done ), - .config_valid_o ( dec_config_valid ), - .config_o ( dec_config ), - .x_issue_req_i ( x_issue_req_i ), - .x_issue_resp_o ( x_issue_resp_o ), - .x_issue_valid_i ( x_issue_valid_i ), - .x_issue_ready_o ( x_issue_ready_o ), - .x_register_i ( x_register_i ), - .x_register_valid_i ( x_register_valid_i ), - .x_register_ready_o ( x_register_ready_o ), - .x_commit_i ( x_commit_i ), - .x_commit_valid_i ( x_commit_valid_i ), - .x_result_o ( x_result_o ), - .x_result_valid_o ( x_result_valid_o ), - .x_result_ready_i ( x_result_ready_i ) -); +if(CtrlIntfConfig == XIF) begin : xif_ctrl_intf_gen + redmule_inst_decoder #( + .InstFifoDepth ( 4 ), + .McnfigOpCode ( McnfigOpCode ), + .MarithOpCode ( MarithOpCode ), + .MopcntOpCode ( MopcntOpCode ), + .McnfigFunct3 ( McnfigFunct3 ), + .MarithFunct3 ( MarithFunct3 ), + .MopcntFunct3 ( MopcntFunct3 ), + .McnfigFunct2 ( McnfigFunct2 ), + .MarithFunct2 ( MarithFunct2 ), + .MopcntFunct2 ( MopcntFunct2 ), + .XifIdWidth ( XifIdWidth ), + .XifNumHarts ( XifNumHarts ), + .XifIssueRegisterSplit ( XifIssueRegisterSplit ), + .x_issue_req_t ( x_issue_req_t ), + .x_issue_resp_t ( x_issue_resp_t ), + .x_register_t ( x_register_t ), + .x_commit_t ( x_commit_t ), + .x_result_t ( x_result_t ) + ) i_inst_decoder ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( '0 ), // TODO: fixme, not having a software-based clear mechanism is a bad idea. + .config_ready_i ( ~config_fifo_full ), + .op_done_i ( flgs_streamer.z_stream_sink_flags.done ), + .config_valid_o ( dec_config_valid ), + .config_o ( dec_config ), + .x_issue_req_i ( x_issue_req_i ), + .x_issue_resp_o ( x_issue_resp_o ), + .x_issue_valid_i ( x_issue_valid_i ), + .x_issue_ready_o ( x_issue_ready_o ), + .x_register_i ( x_register_i ), + .x_register_valid_i ( x_register_valid_i ), + .x_register_ready_o ( x_register_ready_o ), + .x_commit_i ( x_commit_i ), + .x_commit_valid_i ( x_commit_valid_i ), + .x_result_o ( x_result_o ), + .x_result_valid_o ( x_result_valid_o ), + .x_result_ready_i ( x_result_ready_i ) + ); + // bind unused HWPE_TARGET signals + assign target_clear = '0; // TODO: a software-accessible clear should be added also to the XIF interface + assign target.gnt = '1; + assign target.r_data = '0; + assign target.r_valid = '0; + assign target.r_id = '0; +end +else begin + redmule_target_decoder i_target_decoder ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( '0 ), // ORed internally with target_clear + .target_clear_o ( target_clear ), + .config_ready_i ( ~config_fifo_full ), + .op_done_i ( flgs_streamer.z_stream_sink_flags.done ), + .config_valid_o ( dec_config_valid ), + .config_o ( dec_config ), + .target ( target ) + ); + // bind unused XIF signals + assign x_issue_resp_o = '0; + assign x_issue_ready_o = '0; + assign x_register_ready_o = '0; + assign x_result_o = '0; + assign x_result_valid_o = '0; +end fifo_v3 #( .FALL_THROUGH ( 0 ), @@ -594,6 +626,7 @@ redmule_ctrl #( .flgs_streamer_i ( flgs_streamer ), .busy_o ( busy_o ), .tiler_busy_o ( tiler_busy ), + .target_clear_i ( target_clear ), .clear_o ( clear ), .evt_o ( evt_o ), .config_i ( dec_config_q ), @@ -623,7 +656,7 @@ redmule_scheduler #( .clk_i ( clk_acc ), .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), - .clear_i ( '0 ), + .clear_i ( target_clear ), .x_valid_i ( x_buffer_fifo.valid ), .w_valid_i ( w_buffer_fifo.valid ), .y_valid_i ( y_buffer_fifo.valid ), From 6e9f18613dfce7b4b4172d40be9460298faa8339 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 24 Dec 2025 14:52:05 +0100 Subject: [PATCH 05/25] Update Bender.yml --- Bender.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Bender.yml b/Bender.yml index 53809e5..8e41d08 100644 --- a/Bender.yml +++ b/Bender.yml @@ -48,6 +48,9 @@ sources: - rtl/redmule_memory_scheduler.sv - rtl/redmule_mux.sv - rtl/redmule_inst_decoder.sv + - rtl/ctrl/regif/redmule_regif_pkg.sv + - rtl/ctrl/regif/redmule_regif.sv + - rtl/ctrl/redmule_target_decoder.sv - target: any(redmule_test_complex, redmule_test_hwpe) files: From af4528004568a3012a5b43303f4e4585a653731f Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 24 Dec 2025 14:58:19 +0100 Subject: [PATCH 06/25] Fix silly replacements --- Bender.lock | 4 ++-- rtl/ctrl/redmule_target_decoder.sv | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Bender.lock b/Bender.lock index 1d89adb..f25ca40 100644 --- a/Bender.lock +++ b/Bender.lock @@ -79,10 +79,10 @@ packages: - redundancy_cells - register_interface hwpe-ctrl: - revision: 0e95510c0f4d43452d21b7723d766ae92e45c101 + revision: null version: null source: - Git: https://github.com/pulp-platform/hwpe-ctrl.git + Path: working_dir/hwpe-ctrl dependencies: - tech_cells_generic hwpe-stream: diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index 87e7341..016f723 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -17,8 +17,8 @@ module redmule_target_decoder )( input logic clk_i, input logic rst_ni, - input logic clear_i | target_clear_o, - output logic regif_clear_o, + input logic clear_i, + output logic target_clear_o, input logic config_ready_i, input logic op_done_i, output logic config_valid_o, From df85b6b82798a5a6598a9196f0eb0867e0d6c63d Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 24 Dec 2025 14:58:55 +0100 Subject: [PATCH 07/25] Fix more silly replacements --- rtl/ctrl/redmule_target_decoder.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index 016f723..c3b951f 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -64,7 +64,7 @@ module redmule_target_decoder ) i_target ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), - .clear_o ( regif_clear_o ), + .clear_o ( target_clear_o ), .target ( target ), .job_trigger_o ( job_trigger ), .job_done_i ( job_done ), From 79d8aa34a1122272769b0fb63ae613593e8fb9bd Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Fri, 6 Feb 2026 18:40:44 +0100 Subject: [PATCH 08/25] cosmetic changes to RDL --- rtl/ctrl/redmule_regif.rdl | 10 +++++----- rtl/ctrl/redmule_target_decoder.sv | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/rtl/ctrl/redmule_regif.rdl b/rtl/ctrl/redmule_regif.rdl index 42013c1..4c72604 100644 --- a/rtl/ctrl/redmule_regif.rdl +++ b/rtl/ctrl/redmule_regif.rdl @@ -31,14 +31,14 @@ enum gemm_op_e { // Enumeration for floating-point formats (aligned with redmule_pkg::gemm_fmt_e) enum gemm_fmt_e { - Float8 = 2'h0 { name = "Float8"; desc = "FP8 format"; }; - Float16 = 2'h1 { name = "Float16"; desc = "FP16 format"; }; - Float8Alt = 2'h2 { name = "Float8Alt"; desc = "FP8 alternative format"; }; - Float16Alt = 2'h3 { name = "Float16Alt"; desc = "FP16 alternative format"; }; + Float8 = 2'h0 { name = "Float8"; desc = "FP8 format (E4M3)"; }; + Float16 = 2'h1 { name = "Float16"; desc = "FP16 format (IEEE)"; }; + Float8Alt = 2'h2 { name = "Float8Alt"; desc = "FP8 alternative format (E5M2)"; }; + Float16Alt = 2'h3 { name = "Float16Alt"; desc = "FP16 alternative format (BF16)"; }; }; addrmap redmule_regif { - name = "NEureka register interface"; + name = "RedMulE register interface"; desc = "Control register map for RedMulE, including mandatory control/status registers and job-dependent configuration registers."; // Mandatory COMMIT_TRIGGER register. Not to be updated inside HWPEs. reg hwpe_commit_trigger { diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index c3b951f..377daa9 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -13,7 +13,7 @@ module redmule_target_decoder import redmule_pkg::*; import redmule_regif_pkg::*; #( - parameter int unsigned OpIdWidth = 4 + parameter int unsigned OpIdWidth = 4 )( input logic clk_i, input logic rst_ni, From fddf96ae862996a9c9bb6b6cb3c99e476ba6b1ca Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 11 Feb 2026 22:07:06 +0100 Subject: [PATCH 09/25] fix register interface with packed structs --- rtl/ctrl/gen_regif.sh | 2 +- rtl/ctrl/regif/redmule_regif_pkg.sv | 94 ++++++++++++++--------------- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/rtl/ctrl/gen_regif.sh b/rtl/ctrl/gen_regif.sh index c9c3eb1..b5824a6 100755 --- a/rtl/ctrl/gen_regif.sh +++ b/rtl/ctrl/gen_regif.sh @@ -3,4 +3,4 @@ peakrdl regblock redmule_regif.rdl -o regif/ --cpuif obi-flat --default-reset a peakrdl html redmule_regif.rdl -o regif/html/ peakrdl c-header redmule_regif.rdl -o regif/hwpe_ctrl_target.h # PeakRDL uses unpacked structs to avoid issues at compile time, which is commendable, but incompatible with FIFOing the output of the job! (use portable sed syntax that works on both Linux and macOS) -sed 's/typedef[[:space:]]\+struct\b/typedef struct packed/g' regif/redmule_regif_pkg.sv > regif/redmule_regif_pkg.sv.tmp && mv regif/redmule_regif_pkg.sv.tmp regif/redmule_regif_pkg.sv +sed -E 's/typedef[[:space:]]+struct([[:space:]])/typedef struct packed\1/g' regif/redmule_regif_pkg.sv > regif/redmule_regif_pkg.sv.tmp && mv regif/redmule_regif_pkg.sv.tmp regif/redmule_regif_pkg.sv diff --git a/rtl/ctrl/regif/redmule_regif_pkg.sv b/rtl/ctrl/regif/redmule_regif_pkg.sv index 4172fa6..24e8058 100644 --- a/rtl/ctrl/regif/redmule_regif_pkg.sv +++ b/rtl/ctrl/regif/redmule_regif_pkg.sv @@ -7,106 +7,106 @@ package redmule_regif_pkg; localparam REDMULE_REGIF_MIN_ADDR_WIDTH = 32; localparam REDMULE_REGIF_SIZE = 'h40; - typedef struct { + typedef struct packed { logic [31:0] next; } redmule_regif__hwpe_acquire__acquire__in_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_acquire__acquire__in_t acquire; } redmule_regif__hwpe_acquire__in_t; - typedef struct { + typedef struct packed { logic [31:0] next; } redmule_regif__hwpe_status__status0__in_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_status__status0__in_t status0; } redmule_regif__hwpe_status__in_t; - typedef struct { + typedef struct packed { logic [7:0] next; } redmule_regif__hwpe_running_job__running_job__in_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_running_job__running_job__in_t running_job; } redmule_regif__hwpe_running_job__in_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_acquire__in_t acquire; redmule_regif__hwpe_status__in_t status; redmule_regif__hwpe_running_job__in_t running_job; } redmule_regif__hwpe_ctrl_mandatory__in_t; - typedef struct { + typedef struct packed { logic [31:0] next; } redmule_regif__mopcnt__op_id_cnt__in_t; - typedef struct { + typedef struct packed { redmule_regif__mopcnt__op_id_cnt__in_t op_id_cnt; } redmule_regif__mopcnt__in_t; - typedef struct { + typedef struct packed { redmule_regif__mopcnt__in_t mopcnt; } redmule_regif__hwpe_ctrl_job_dep__in_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_ctrl_mandatory__in_t hwpe_ctrl; redmule_regif__hwpe_ctrl_job_dep__in_t hwpe_job_dep; } redmule_regif__in_t; - typedef struct { + typedef struct packed { logic [1:0] value; logic swacc; } redmule_regif__hwpe_commit_trigger__commit_trigger__out_t; - typedef struct { + typedef struct packed { logic [29:0] value; } redmule_regif__hwpe_commit_trigger__r0__out_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_commit_trigger__commit_trigger__out_t commit_trigger; redmule_regif__hwpe_commit_trigger__r0__out_t r0; } redmule_regif__hwpe_commit_trigger__out_t; - typedef struct { + typedef struct packed { logic swacc; } redmule_regif__hwpe_acquire__acquire__out_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_acquire__acquire__out_t acquire; } redmule_regif__hwpe_acquire__out_t; - typedef struct { + typedef struct packed { logic [31:0] value; } redmule_regif__hwpe_reserved__reserved__out_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_reserved__reserved__out_t reserved; } redmule_regif__hwpe_reserved__out_t; - typedef struct { + typedef struct packed { logic [23:0] value; } redmule_regif__hwpe_running_job__r0__out_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_running_job__r0__out_t r0; } redmule_regif__hwpe_running_job__out_t; - typedef struct { + typedef struct packed { logic [1:0] value; logic swacc; } redmule_regif__hwpe_soft_clear__soft_clear__out_t; - typedef struct { + typedef struct packed { logic [29:0] value; } redmule_regif__hwpe_soft_clear__r0__out_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_soft_clear__soft_clear__out_t soft_clear; redmule_regif__hwpe_soft_clear__r0__out_t r0; } redmule_regif__hwpe_soft_clear__out_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_commit_trigger__out_t commit_trigger; redmule_regif__hwpe_acquire__out_t acquire; redmule_regif__hwpe_reserved__out_t reserved0; @@ -116,52 +116,52 @@ package redmule_regif_pkg; redmule_regif__hwpe_reserved__out_t reserved2; } redmule_regif__hwpe_ctrl_mandatory__out_t; - typedef struct { + typedef struct packed { logic [15:0] value; } redmule_regif__mcnfig0__m_size__out_t; - typedef struct { + typedef struct packed { logic [15:0] value; } redmule_regif__mcnfig0__k_size__out_t; - typedef struct { + typedef struct packed { redmule_regif__mcnfig0__m_size__out_t m_size; redmule_regif__mcnfig0__k_size__out_t k_size; } redmule_regif__mcnfig0__out_t; - typedef struct { + typedef struct packed { logic [15:0] value; } redmule_regif__mcnfig1__n_size__out_t; - typedef struct { + typedef struct packed { logic value; } redmule_regif__mcnfig1__receive_x__out_t; - typedef struct { + typedef struct packed { logic value; } redmule_regif__mcnfig1__send_x__out_t; - typedef struct { + typedef struct packed { logic value; } redmule_regif__mcnfig1__receive_w__out_t; - typedef struct { + typedef struct packed { logic value; } redmule_regif__mcnfig1__send_w__out_t; - typedef struct { + typedef struct packed { logic [2:0] value; } redmule_regif__mcnfig1__gemm_ops__out_t; - typedef struct { + typedef struct packed { logic [1:0] value; } redmule_regif__mcnfig1__gemm_input_fmt__out_t; - typedef struct { + typedef struct packed { logic [1:0] value; } redmule_regif__mcnfig1__gemm_output_fmt__out_t; - typedef struct { + typedef struct packed { redmule_regif__mcnfig1__n_size__out_t n_size; redmule_regif__mcnfig1__receive_x__out_t receive_x; redmule_regif__mcnfig1__send_x__out_t send_x; @@ -172,39 +172,39 @@ package redmule_regif_pkg; redmule_regif__mcnfig1__gemm_output_fmt__out_t gemm_output_fmt; } redmule_regif__mcnfig1__out_t; - typedef struct { + typedef struct packed { logic [31:0] value; } redmule_regif__mcnfig2__y_offs__out_t; - typedef struct { + typedef struct packed { redmule_regif__mcnfig2__y_offs__out_t y_offs; } redmule_regif__mcnfig2__out_t; - typedef struct { + typedef struct packed { logic [31:0] value; } redmule_regif__marith0__x_addr__out_t; - typedef struct { + typedef struct packed { redmule_regif__marith0__x_addr__out_t x_addr; } redmule_regif__marith0__out_t; - typedef struct { + typedef struct packed { logic [31:0] value; } redmule_regif__marith1__w_addr__out_t; - typedef struct { + typedef struct packed { redmule_regif__marith1__w_addr__out_t w_addr; } redmule_regif__marith1__out_t; - typedef struct { + typedef struct packed { logic [31:0] value; } redmule_regif__marith2__z_addr__out_t; - typedef struct { + typedef struct packed { redmule_regif__marith2__z_addr__out_t z_addr; } redmule_regif__marith2__out_t; - typedef struct { + typedef struct packed { redmule_regif__mcnfig0__out_t mcnfig0; redmule_regif__mcnfig1__out_t mcnfig1; redmule_regif__mcnfig2__out_t mcnfig2; @@ -213,11 +213,11 @@ package redmule_regif_pkg; redmule_regif__marith2__out_t marith2; } redmule_regif__hwpe_ctrl_job_dep__out_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_reserved__out_t reserved; } redmule_regif__hwpe_ctrl_job_indep__out_t; - typedef struct { + typedef struct packed { redmule_regif__hwpe_ctrl_mandatory__out_t hwpe_ctrl; redmule_regif__hwpe_ctrl_job_dep__out_t hwpe_job_dep; redmule_regif__hwpe_ctrl_job_indep__out_t hwpe_job_indep; From 22e3815846784f200dd8092c1b6d1b3c2d576da8 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 11 Feb 2026 22:11:57 +0100 Subject: [PATCH 10/25] Syntax fixes, tested in integration in MAGIA tile --- rtl/ctrl/redmule_target_decoder.sv | 16 ++++++++-------- rtl/redmule_top.sv | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index 377daa9..31500a2 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -24,7 +24,7 @@ module redmule_target_decoder output logic config_valid_o, output redmule_config_t config_o, // target port - hwpe_ctrl_intf_target.slave target + hwpe_ctrl_intf_periph.slave target ); // target signals @@ -42,12 +42,12 @@ module redmule_target_decoder logic target_obi_we; logic [3:0] target_obi_be; logic [31:0] target_obi_wdata; - logic [ID_WIDTH-1:0] target_obi_aid; + logic [OpIdWidth-1:0] target_obi_aid; logic target_obi_rvalid; logic target_obi_rready; logic [31:0] target_obi_rdata; logic target_obi_err; - logic [ID_WIDTH-1:0] target_obi_rid; + logic [OpIdWidth-1:0] target_obi_rid; redmule_regif__in_t hwif_in; redmule_regif__out_t hwif_out; @@ -55,7 +55,7 @@ module redmule_target_decoder /* HWPE controller target port */ hwpe_ctrl_target #( .NB_CONTEXT ( 2 ), - .ID_WIDTH ( ID ), + .ID_WIDTH ( OpIdWidth ), .ADDR_WIDTH ( 10 ), .hwpe_ctrl_regif_in_t ( redmule_regif__in_t ), .hwpe_ctrl_regif_out_t ( redmule_regif__out_t ), @@ -90,7 +90,7 @@ module redmule_target_decoder /* RedMulE SystemRDL-generated register interface */ redmule_regif #( - .ID_WIDTH ( ID_WIDTH ) + .ID_WIDTH ( OpIdWidth ) ) i_regif ( .clk ( clk_i ), .arst_n ( rst_ni ), @@ -158,9 +158,9 @@ module redmule_target_decoder assign config_o.x_addr = hwif_out.hwpe_job_dep.marith0.x_addr.value; assign config_o.w_addr = hwif_out.hwpe_job_dep.marith1.w_addr.value; assign config_o.z_addr = hwif_out.hwpe_job_dep.marith2.z_addr.value; - assign config_o.gemm_ops = redmule_gemm_ops_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_ops.value); - assign config_o.gemm_input_fmt = redmule_gemm_fmt_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_input_fmt.value); - assign config_o.gemm_output_fmt = redmule_gemm_fmt_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_output_fmt.value); + assign config_o.gemm_ops = gemm_op_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_ops.value); + assign config_o.gemm_input_fmt = gemm_fmt_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_input_fmt.value); + assign config_o.gemm_output_fmt = gemm_fmt_e'(hwif_out.hwpe_job_dep.mcnfig1.gemm_output_fmt.value); // Operation ID counter: // op_id_counter_in_q: Increments when operations are issued (tags for tracking) diff --git a/rtl/redmule_top.sv b/rtl/redmule_top.sv index 609a81d..0f81567 100644 --- a/rtl/redmule_top.sv +++ b/rtl/redmule_top.sv @@ -25,7 +25,7 @@ module redmule_top parameter fpnew_pkg::fmt_logic_t FpFmtConfig = 6'b001101, parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000, // Choose interface - parameter ctrl_intf_e CtrlIntfConfig = XIF; + parameter ctrl_intf_e CtrlIntfConfig = XIF, // Custom instructions parameter logic [6:0] McnfigOpCode = 7'b0001011, parameter logic [6:0] MarithOpCode = 7'b0001011, @@ -80,7 +80,7 @@ module redmule_top // TCDM master ports for the memory side hci_core_intf.initiator tcdm, // HWPE-ctrl target port (unused if CtrlIntfConfig = XIF) - hwpe_ctrl_intf_target.slave target + hwpe_ctrl_intf_periph.slave target ); localparam int unsigned FpWidth = fp_width(FpFormat); From f840e12182057c7376c468fa826ab1fdc3de6454 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 11 Feb 2026 22:24:24 +0100 Subject: [PATCH 11/25] Update Bender.yml --- Bender.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Bender.yml b/Bender.yml index 8e41d08..0df685a 100644 --- a/Bender.yml +++ b/Bender.yml @@ -16,8 +16,8 @@ dependencies: cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: "redmule-v1.0" } ibex : { git: "https://github.com/pulp-platform/ibex.git" , rev: pulpissimo-v6.1.2 } hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , version: 1.9.2 } - hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , rev: 0e95510c0f4d43452d21b7723d766ae92e45c101 } # branch: yt/task-interfaces - hci : { git: "https://github.com/pulp-platform/hci.git" , version: 2.2.0 } + hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , rev: 7d64db39f1f1e4b7032bcd6e3cbfed8201e13005 } # branch: fc/rdl + hci : { git: "https://github.com/pulp-platform/hci.git" , rev: 4acc35cb2ec7692df57f2dfa73c6a76cc9eb5a28 } # branch: fc/magia fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: "pulp-v0.1.3" } common_cells : { git: "https://github.com/pulp-platform/common_cells.git" , version: 1.21.0 } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.11 } From caa18e556ca5ba37ee216aecefe238552187f22a Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 11 Feb 2026 22:57:17 +0100 Subject: [PATCH 12/25] Fix doubly driwn hwif_in signal --- rtl/ctrl/redmule_target_decoder.sv | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index 31500a2..337ebcc 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -50,6 +50,7 @@ module redmule_target_decoder logic [OpIdWidth-1:0] target_obi_rid; redmule_regif__in_t hwif_in; + redmule_regif__in_t hwif_in_target; redmule_regif__out_t hwif_out; /* HWPE controller target port */ @@ -84,7 +85,7 @@ module redmule_target_decoder .target_obi_rdata_i ( target_obi_rdata ), .target_obi_err_i ( target_obi_err ), .target_obi_rid_i ( target_obi_rid ), - .hwif_in ( hwif_in ), + .hwif_in ( hwif_in_target ), .hwif_out ( hwif_out ) ); @@ -195,6 +196,12 @@ module redmule_target_decoder end end end - assign hwif_in.hwpe_job_dep.mopcnt.op_id_cnt.next = op_id_counter_out_q; + + // Combine hwif_in from hwpe_ctrl_target with RedMulE-specific fields + always_comb + begin + hwif_in = hwif_in_target; + hwif_in.hwpe_job_dep.mopcnt.op_id_cnt.next = op_id_counter_out_q; + end endmodule: redmule_target_decoder From 05c4247a2b4a08097d883899116bc0e3eec5b652 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Thu, 12 Feb 2026 11:58:34 +0100 Subject: [PATCH 13/25] regif: Move job-dep @ offset 0x40 --- rtl/ctrl/redmule_regif.rdl | 6 +++--- rtl/ctrl/regif/redmule_regif.sv | 2 +- rtl/ctrl/regif/redmule_regif_pkg.sv | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rtl/ctrl/redmule_regif.rdl b/rtl/ctrl/redmule_regif.rdl index 4c72604..bb6349b 100644 --- a/rtl/ctrl/redmule_regif.rdl +++ b/rtl/ctrl/redmule_regif.rdl @@ -261,8 +261,8 @@ addrmap redmule_regif { }; // HWPE control address map. Update inside HWPEs - hwpe_ctrl_mandatory hwpe_ctrl @ 0x00; - hwpe_ctrl_job_dep hwpe_job_dep @ 0x20; - hwpe_ctrl_job_indep hwpe_job_indep; + hwpe_ctrl_mandatory hwpe_ctrl @ 0x00; + hwpe_ctrl_job_dep hwpe_job_dep @ 0x20; + hwpe_ctrl_job_indep hwpe_job_indep @ 0x40; }; diff --git a/rtl/ctrl/regif/redmule_regif.sv b/rtl/ctrl/regif/redmule_regif.sv index 442f344..2dfb7cc 100644 --- a/rtl/ctrl/regif/redmule_regif.sv +++ b/rtl/ctrl/regif/redmule_regif.sv @@ -173,7 +173,7 @@ module redmule_regif #( decoded_reg_strb.hwpe_job_dep.marith1 = cpuif_req_masked & (cpuif_addr == 32'h30); decoded_reg_strb.hwpe_job_dep.marith2 = cpuif_req_masked & (cpuif_addr == 32'h34); decoded_reg_strb.hwpe_job_dep.mopcnt = cpuif_req_masked & (cpuif_addr == 32'h38) & !cpuif_req_is_wr; - decoded_reg_strb.hwpe_job_indep.reserved = cpuif_req_masked & (cpuif_addr == 32'h3c) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_job_indep.reserved = cpuif_req_masked & (cpuif_addr == 32'h40) & !cpuif_req_is_wr; decoded_err = (~is_valid_addr | is_invalid_rw) & decoded_req; end diff --git a/rtl/ctrl/regif/redmule_regif_pkg.sv b/rtl/ctrl/regif/redmule_regif_pkg.sv index 24e8058..0ee01ea 100644 --- a/rtl/ctrl/regif/redmule_regif_pkg.sv +++ b/rtl/ctrl/regif/redmule_regif_pkg.sv @@ -5,7 +5,7 @@ package redmule_regif_pkg; localparam REDMULE_REGIF_DATA_WIDTH = 32; localparam REDMULE_REGIF_MIN_ADDR_WIDTH = 32; - localparam REDMULE_REGIF_SIZE = 'h40; + localparam REDMULE_REGIF_SIZE = 'h44; typedef struct packed { logic [31:0] next; From 311710d60b6478dbf1b0237e3677e7f2c32263be Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Thu, 12 Feb 2026 16:56:59 +0100 Subject: [PATCH 14/25] Revert "regif: Move job-dep @ offset 0x40" This reverts commit b748ba1fc03ac76a565a6e6271e933aa3735d355. --- rtl/ctrl/redmule_regif.rdl | 6 +++--- rtl/ctrl/regif/redmule_regif.sv | 2 +- rtl/ctrl/regif/redmule_regif_pkg.sv | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rtl/ctrl/redmule_regif.rdl b/rtl/ctrl/redmule_regif.rdl index bb6349b..4c72604 100644 --- a/rtl/ctrl/redmule_regif.rdl +++ b/rtl/ctrl/redmule_regif.rdl @@ -261,8 +261,8 @@ addrmap redmule_regif { }; // HWPE control address map. Update inside HWPEs - hwpe_ctrl_mandatory hwpe_ctrl @ 0x00; - hwpe_ctrl_job_dep hwpe_job_dep @ 0x20; - hwpe_ctrl_job_indep hwpe_job_indep @ 0x40; + hwpe_ctrl_mandatory hwpe_ctrl @ 0x00; + hwpe_ctrl_job_dep hwpe_job_dep @ 0x20; + hwpe_ctrl_job_indep hwpe_job_indep; }; diff --git a/rtl/ctrl/regif/redmule_regif.sv b/rtl/ctrl/regif/redmule_regif.sv index 2dfb7cc..442f344 100644 --- a/rtl/ctrl/regif/redmule_regif.sv +++ b/rtl/ctrl/regif/redmule_regif.sv @@ -173,7 +173,7 @@ module redmule_regif #( decoded_reg_strb.hwpe_job_dep.marith1 = cpuif_req_masked & (cpuif_addr == 32'h30); decoded_reg_strb.hwpe_job_dep.marith2 = cpuif_req_masked & (cpuif_addr == 32'h34); decoded_reg_strb.hwpe_job_dep.mopcnt = cpuif_req_masked & (cpuif_addr == 32'h38) & !cpuif_req_is_wr; - decoded_reg_strb.hwpe_job_indep.reserved = cpuif_req_masked & (cpuif_addr == 32'h40) & !cpuif_req_is_wr; + decoded_reg_strb.hwpe_job_indep.reserved = cpuif_req_masked & (cpuif_addr == 32'h3c) & !cpuif_req_is_wr; decoded_err = (~is_valid_addr | is_invalid_rw) & decoded_req; end diff --git a/rtl/ctrl/regif/redmule_regif_pkg.sv b/rtl/ctrl/regif/redmule_regif_pkg.sv index 0ee01ea..24e8058 100644 --- a/rtl/ctrl/regif/redmule_regif_pkg.sv +++ b/rtl/ctrl/regif/redmule_regif_pkg.sv @@ -5,7 +5,7 @@ package redmule_regif_pkg; localparam REDMULE_REGIF_DATA_WIDTH = 32; localparam REDMULE_REGIF_MIN_ADDR_WIDTH = 32; - localparam REDMULE_REGIF_SIZE = 'h44; + localparam REDMULE_REGIF_SIZE = 'h40; typedef struct packed { logic [31:0] next; From 9645dee9eae110c9706688427e3eb828050602c3 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Thu, 12 Feb 2026 22:31:22 +0100 Subject: [PATCH 15/25] Fix target decoder to properly trigger job --- rtl/ctrl/redmule_target_decoder.sv | 25 ++++--------------------- rtl/redmule_top.sv | 2 +- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index 337ebcc..97c45e2 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -29,7 +29,7 @@ module redmule_target_decoder // target signals logic job_trigger; - logic job_done, job_done_q; + logic job_done; logic [31:0] job_status; redmule_regif__hwpe_ctrl_job_indep__out_t job_indep_regs; logic job_dep_regs_valid; @@ -57,7 +57,7 @@ module redmule_target_decoder hwpe_ctrl_target #( .NB_CONTEXT ( 2 ), .ID_WIDTH ( OpIdWidth ), - .ADDR_WIDTH ( 10 ), + .ADDR_WIDTH ( 8 ), .hwpe_ctrl_regif_in_t ( redmule_regif__in_t ), .hwpe_ctrl_regif_out_t ( redmule_regif__out_t ), .hwpe_ctrl_job_indep_t ( redmule_regif__hwpe_ctrl_job_indep__out_t ), @@ -112,24 +112,7 @@ module redmule_target_decoder ); assign job_done = op_done_i; - - logic config_valid_q; - always_ff @(posedge clk_i or negedge rst_ni) begin : config_valid_ff - if(~rst_ni) begin - config_valid_q <= '0; - end else begin - if(clear_i | target_clear_o) begin // TODO: connect target-generated clear as well! - config_valid_q <= '0; - end - else if(job_trigger) begin - config_valid_q <= 1'b1; - end - else if(op_done_i) begin - config_valid_q <= '0; - end - end - end - assign config_valid_o = config_valid_q; + assign config_valid_o = job_trigger; always_ff @(posedge clk_i or negedge rst_ni) begin : job_status_trigger if(~rst_ni) begin @@ -138,7 +121,7 @@ module redmule_target_decoder if(clear_i | target_clear_o) begin // TODO: connect target-generated clear as well! job_status <= '0; end - else if((config_valid_q | job_trigger) & config_ready_i) begin + else if(job_trigger & config_ready_i) begin job_status <= 32'h1; end else if(op_done_i) begin diff --git a/rtl/redmule_top.sv b/rtl/redmule_top.sv index 0f81567..b9a9bd8 100644 --- a/rtl/redmule_top.sv +++ b/rtl/redmule_top.sv @@ -571,7 +571,7 @@ if(CtrlIntfConfig == XIF) begin : xif_ctrl_intf_gen assign target.r_valid = '0; assign target.r_id = '0; end -else begin +else begin : mm_ctrl_intf_gen redmule_target_decoder i_target_decoder ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), From d78b71ba7f4aeacdef7158fe57cbd1abd11d6a24 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Fri, 20 Feb 2026 00:28:53 +0100 Subject: [PATCH 16/25] Update to released HCI, HWPE-CTRL --- Bender.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Bender.yml b/Bender.yml index 0df685a..87407fe 100644 --- a/Bender.yml +++ b/Bender.yml @@ -16,8 +16,8 @@ dependencies: cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: "redmule-v1.0" } ibex : { git: "https://github.com/pulp-platform/ibex.git" , rev: pulpissimo-v6.1.2 } hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , version: 1.9.2 } - hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , rev: 7d64db39f1f1e4b7032bcd6e3cbfed8201e13005 } # branch: fc/rdl - hci : { git: "https://github.com/pulp-platform/hci.git" , rev: 4acc35cb2ec7692df57f2dfa73c6a76cc9eb5a28 } # branch: fc/magia + hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , version: 3.0.0 } + hci : { git: "https://github.com/pulp-platform/hci.git" , version: 2.3.0 } fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: "pulp-v0.1.3" } common_cells : { git: "https://github.com/pulp-platform/common_cells.git" , version: 1.21.0 } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.11 } From aa576236f57e6646ca2fe973a7fb59ff9ff643d2 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Mon, 23 Feb 2026 23:47:38 +0100 Subject: [PATCH 17/25] [streamer] re-enabled misaligned access support, with proper parametrization and assert safeguards --- rtl/redmule_pkg.sv | 1 + rtl/redmule_streamer.sv | 130 +++++++++++++++++++++++++--------------- rtl/redmule_tiler.sv | 9 +++ rtl/redmule_top.sv | 58 +++++++++--------- 4 files changed, 123 insertions(+), 75 deletions(-) diff --git a/rtl/redmule_pkg.sv b/rtl/redmule_pkg.sv index f55469e..ac96dfd 100644 --- a/rtl/redmule_pkg.sv +++ b/rtl/redmule_pkg.sv @@ -15,6 +15,7 @@ package redmule_pkg; parameter int unsigned MaxPipeRegs = 4; parameter int unsigned MaxDepth = MaxDim * MaxPipeRegs; parameter int unsigned MaxDataW = MaxDepth * 16; + parameter int unsigned MisalignedAccessSupportDefault = 0; // default to 0 for compatibility with Snitch parameter int unsigned NumStreamSources = 3; // X, W, Y diff --git a/rtl/redmule_streamer.sv b/rtl/redmule_streamer.sv index 221e1b9..a57c4e7 100644 --- a/rtl/redmule_streamer.sv +++ b/rtl/redmule_streamer.sv @@ -16,9 +16,9 @@ module redmule_streamer import hwpe_stream_package::*; #( parameter int unsigned DataW = MaxDataW, + parameter int unsigned MisalignedAccessSupport = MisalignedAccessSupportDefault, parameter int unsigned FpFormat = FP16 , parameter int unsigned EccChunkSize = 32 , - localparam int unsigned REALIGN = 0 , parameter fpnew_pkg::fmt_logic_t FpFmtConfig = 6'b001101, parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000, parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '0 @@ -45,29 +45,17 @@ module redmule_streamer output flgs_streamer_t flags_o ); -localparam int unsigned UW = `HCI_SIZE_GET_UW(tcdm); localparam int unsigned EW = `HCI_SIZE_GET_EW(tcdm); -// this localparam is reused for all internal, non-ecc HCI interfaces +// Non-ECC variant of tcdm size params, used for all internal (non-ECC) interfaces localparam hci_size_parameter_t `HCI_SIZE_PARAM(ldst_tcdm) = '{ - DW: DataW, - AW: DEFAULT_AW, - BW: DEFAULT_BW, - UW: UW, - IW: DEFAULT_IW, + DW: `HCI_SIZE_GET_DW(tcdm), + AW: `HCI_SIZE_GET_AW(tcdm), + BW: `HCI_SIZE_GET_BW(tcdm), + UW: `HCI_SIZE_GET_UW(tcdm), + IW: `HCI_SIZE_GET_IW(tcdm), EW: DEFAULT_EW, - EHW: DEFAULT_EHW -}; - -// this localparam is reused for the internal ecc HCI interface -localparam hci_size_parameter_t `HCI_SIZE_PARAM(ecc_ldst_tcdm) = '{ - DW: DataW, - AW: DEFAULT_AW, - BW: DEFAULT_BW, - UW: UW, - IW: DEFAULT_IW, - EW: EW, - EHW: DEFAULT_EHW + EHW: `HCI_SIZE_GET_EHW(tcdm) }; // Here the dynamic mux for virtual_tcdm interfaces @@ -77,8 +65,13 @@ hci_core_intf #( .WAIVE_RSP3_ASSERT ( 1'b1 ), // waive RSP-3 on memory-side of HCI FIFO .WAIVE_RSP5_ASSERT ( 1'b1 ), // waive RSP-5 on memory-side of HCI FIFO `endif - .DW ( DataW ), - .UW ( UW ) + .DW ( `HCI_SIZE_GET_DW(ldst_tcdm) ), + .AW ( `HCI_SIZE_GET_AW(ldst_tcdm) ), + .BW ( `HCI_SIZE_GET_BW(ldst_tcdm) ), + .UW ( `HCI_SIZE_GET_UW(ldst_tcdm) ), + .IW ( `HCI_SIZE_GET_IW(ldst_tcdm) ), + .EW ( `HCI_SIZE_GET_EW(ldst_tcdm) ), + .EHW ( `HCI_SIZE_GET_EHW(ldst_tcdm) ) ) ldst_tcdm ( .clk ( clk_i ) ); hci_core_intf #( @@ -86,8 +79,13 @@ hci_core_intf #( .WAIVE_RSP3_ASSERT ( 1'b1 ), // waive RSP-3 on memory-side of HCI FIFO .WAIVE_RSP5_ASSERT ( 1'b1 ), // waive RSP-5 on memory-side of HCI FIFO `endif - .DW ( DataW ), - .UW ( UW ) + .DW ( `HCI_SIZE_GET_DW(ldst_tcdm) ), + .AW ( `HCI_SIZE_GET_AW(ldst_tcdm) ), + .BW ( `HCI_SIZE_GET_BW(ldst_tcdm) ), + .UW ( `HCI_SIZE_GET_UW(ldst_tcdm) ), + .IW ( `HCI_SIZE_GET_IW(ldst_tcdm) ), + .EW ( `HCI_SIZE_GET_EW(ldst_tcdm) ), + .EHW ( `HCI_SIZE_GET_EHW(ldst_tcdm) ) ) ldst_tcdm_pre_r_id ( .clk ( clk_i ) ); hci_core_intf #( @@ -95,18 +93,23 @@ hci_core_intf #( .WAIVE_RSP3_ASSERT ( 1'b1 ), // waive RSP-3 on memory-side of HCI FIFO .WAIVE_RSP5_ASSERT ( 1'b1 ), // waive RSP-5 on memory-side of HCI FIFO `endif - .DW ( DataW ), - .UW ( UW ) + .DW ( `HCI_SIZE_GET_DW(ldst_tcdm) ), + .AW ( `HCI_SIZE_GET_AW(ldst_tcdm) ), + .BW ( `HCI_SIZE_GET_BW(ldst_tcdm) ), + .UW ( `HCI_SIZE_GET_UW(ldst_tcdm) ), + .IW ( `HCI_SIZE_GET_IW(ldst_tcdm) ), + .EW ( `HCI_SIZE_GET_EW(ldst_tcdm) ), + .EHW ( `HCI_SIZE_GET_EHW(ldst_tcdm) ) ) ldst_tcdm_pre_r_valid ( .clk ( clk_i ) ); if (EW > 1) begin : gen_ecc_encoder - logic [DataW/EccChunkSize-1:0] data_single_err, data_multi_err; + logic [`HCI_SIZE_GET_DW(tcdm)/EccChunkSize-1:0] data_single_err, data_multi_err; logic meta_single_err, meta_multi_err; hci_ecc_enc #( - .DW ( DataW ), - .`HCI_SIZE_PARAM(tcdm_target) ( `HCI_SIZE_PARAM(ldst_tcdm) ), - .`HCI_SIZE_PARAM(tcdm_initiator) ( `HCI_SIZE_PARAM(ecc_ldst_tcdm) ) + .DW ( `HCI_SIZE_GET_DW(tcdm) ), + .`HCI_SIZE_PARAM(tcdm_target) ( `HCI_SIZE_PARAM(ldst_tcdm) ), + .`HCI_SIZE_PARAM(tcdm_initiator) ( `HCI_SIZE_PARAM(tcdm) ) ) i_ecc_enc ( .r_data_single_err_o ( data_single_err ), .r_data_multi_err_o ( data_multi_err ), @@ -127,8 +130,13 @@ hci_core_intf #( .WAIVE_RQ3_ASSERT ( 1'b1 ), .WAIVE_RQ4_ASSERT ( 1'b1 ), `endif - .DW ( DataW ), - .UW ( UW ) + .DW ( `HCI_SIZE_GET_DW(ldst_tcdm) ), + .AW ( `HCI_SIZE_GET_AW(ldst_tcdm) ), + .BW ( `HCI_SIZE_GET_BW(ldst_tcdm) ), + .UW ( `HCI_SIZE_GET_UW(ldst_tcdm) ), + .IW ( `HCI_SIZE_GET_IW(ldst_tcdm) ), + .EW ( `HCI_SIZE_GET_EW(ldst_tcdm) ), + .EHW ( `HCI_SIZE_GET_EHW(ldst_tcdm) ) ) virt_tcdm [0:NumStreamSources+1] ( .clk ( clk_i ) ); redmule_mux #( @@ -161,10 +169,9 @@ hci_core_r_valid_filter #( * side (virt_tcdm[NumStreamSources]) of the LD/ST multiplexer. */ // Sink module that turns the incoming Z stream into TCDM. -hci_core_intf #( .DW ( DataW ), - .UW ( UW ) ) zstream2cast ( .clk ( clk_i ) ); +`HCI_INTF_EXPLICIT_PARAM(zstream2cast, clk_i, `HCI_SIZE_PARAM(ldst_tcdm)); hci_core_sink #( - .MISALIGNED_ACCESSES ( REALIGN ), + .MISALIGNED_ACCESSES ( MisalignedAccessSupport ), .`HCI_SIZE_PARAM(tcdm) ( `HCI_SIZE_PARAM(ldst_tcdm) ) ) i_stream_sink ( .clk_i ( clk_i ), @@ -184,11 +191,15 @@ hci_core_intf #( .WAIVE_RSP3_ASSERT ( 1'b1 ), // waive RSP-3 on memory-side of HCI FIFO .WAIVE_RSP5_ASSERT ( 1'b1 ), // waive RSP-5 on memory-side of HCI FIFO `endif - .DW ( DataW ), - .UW ( UW ) + .DW ( `HCI_SIZE_GET_DW(ldst_tcdm) ), + .AW ( `HCI_SIZE_GET_AW(ldst_tcdm) ), + .BW ( `HCI_SIZE_GET_BW(ldst_tcdm) ), + .UW ( `HCI_SIZE_GET_UW(ldst_tcdm) ), + .IW ( `HCI_SIZE_GET_IW(ldst_tcdm) ), + .EW ( `HCI_SIZE_GET_EW(ldst_tcdm) ), + .EHW ( `HCI_SIZE_GET_EHW(ldst_tcdm) ) ) z_fifo_d ( .clk ( clk_i ) ); -hci_core_intf #( .DW ( DataW ), - .UW ( UW ) ) z_fifo_q ( .clk ( clk_i ) ); +`HCI_INTF_EXPLICIT_PARAM(z_fifo_q, clk_i, `HCI_SIZE_PARAM(ldst_tcdm)); logic cast; assign cast = (ctrl_i.input_cast_src_fmt == fpnew_pkg::FP16) ? 1'b0: 1'b1; @@ -236,7 +247,7 @@ flags_fifo_t store_fifo_flags; // HCI store fifo. hci_core_fifo #( - .FIFO_DEPTH ( 2 ), + .FIFO_DEPTH ( 2 ), .`HCI_SIZE_PARAM(tcdm_initiator) ( `HCI_SIZE_PARAM(ldst_tcdm) ) ) i_store_fifo ( .clk_i ( clk_i ), @@ -275,8 +286,13 @@ hci_core_intf #( .WAIVE_RSP5_ASSERT ( 1'b1 ), // waive RSP-5 on memory-side of HCI FIFO .WAIVE_RQ4_ASSERT ( 1'b1 ), `endif - .DW ( DataW ), - .UW ( UW ) + .DW ( `HCI_SIZE_GET_DW(ldst_tcdm) ), + .AW ( `HCI_SIZE_GET_AW(ldst_tcdm) ), + .BW ( `HCI_SIZE_GET_BW(ldst_tcdm) ), + .UW ( `HCI_SIZE_GET_UW(ldst_tcdm) ), + .IW ( `HCI_SIZE_GET_IW(ldst_tcdm) ), + .EW ( `HCI_SIZE_GET_EW(ldst_tcdm) ), + .EHW ( `HCI_SIZE_GET_EHW(ldst_tcdm) ) ) load_fifo_d [0:NumStreamSources-1] ( .clk ( clk_i ) ); hci_core_intf #( @@ -285,8 +301,13 @@ hci_core_intf #( .WAIVE_RSP5_ASSERT ( 1'b1 ), .WAIVE_RQ4_ASSERT ( 1'b1 ), `endif - .DW ( DataW ), - .UW ( UW ) + .DW ( `HCI_SIZE_GET_DW(ldst_tcdm) ), + .AW ( `HCI_SIZE_GET_AW(ldst_tcdm) ), + .BW ( `HCI_SIZE_GET_BW(ldst_tcdm) ), + .UW ( `HCI_SIZE_GET_UW(ldst_tcdm) ), + .IW ( `HCI_SIZE_GET_IW(ldst_tcdm) ), + .EW ( `HCI_SIZE_GET_EW(ldst_tcdm) ), + .EHW ( `HCI_SIZE_GET_EHW(ldst_tcdm) ) ) load_fifo_q [0:NumStreamSources-1] ( .clk ( clk_i ) ); hci_core_intf #( @@ -295,8 +316,14 @@ hci_core_intf #( .WAIVE_RSP5_ASSERT ( 1'b1 ), .WAIVE_RQ4_ASSERT ( 1'b1 ), `endif - .DW ( DataW ), - .UW ( UW ) ) tcdm_cast [0:NumStreamSources-1] ( .clk ( clk_i ) ); + .DW ( `HCI_SIZE_GET_DW(ldst_tcdm) ), + .AW ( `HCI_SIZE_GET_AW(ldst_tcdm) ), + .BW ( `HCI_SIZE_GET_BW(ldst_tcdm) ), + .UW ( `HCI_SIZE_GET_UW(ldst_tcdm) ), + .IW ( `HCI_SIZE_GET_IW(ldst_tcdm) ), + .EW ( `HCI_SIZE_GET_EW(ldst_tcdm) ), + .EHW ( `HCI_SIZE_GET_EHW(ldst_tcdm) ) +) tcdm_cast [0:NumStreamSources-1] ( .clk ( clk_i ) ); hwpe_stream_intf_stream #( .DATA_WIDTH ( DataW ) ) out_stream [0:NumStreamSources-1] ( .clk( clk_i ) ); @@ -376,7 +403,7 @@ for (genvar i = 0; i < NumStreamSources; i++) begin: gen_tcdm2stream end hci_core_source #( - .MISALIGNED_ACCESSES ( REALIGN ), + .MISALIGNED_ACCESSES ( MisalignedAccessSupport ), .`HCI_SIZE_PARAM(tcdm) ( `HCI_SIZE_PARAM(ldst_tcdm) ) ) i_stream_source ( .clk_i ( clk_i ), @@ -406,4 +433,13 @@ hwpe_stream_assign i_wstream_assign ( .push_i( out_stream[WsourceStreamId] ) , hwpe_stream_assign i_ystream_assign ( .push_i( out_stream[YsourceStreamId] ) , .pop_o ( y_stream_o ) ); +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS +initial + tcdm_size_check_dw : assert(`HCI_SIZE_PARAM(tcdm).DW == ((MisalignedAccessSupport == 1) ? (DataW + 32) : DataW)); +`endif +`endif +`endif + endmodule : redmule_streamer diff --git a/rtl/redmule_tiler.sv b/rtl/redmule_tiler.sv index 544ca1f..b9b37e1 100644 --- a/rtl/redmule_tiler.sv +++ b/rtl/redmule_tiler.sv @@ -273,4 +273,13 @@ end assign config_o = config_q; +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS +initial + dataw : assert(DataW == Height*(PipeRegs+1)*16); +`endif +`endif +`endif + endmodule: redmule_tiler diff --git a/rtl/redmule_top.sv b/rtl/redmule_top.sv index b9a9bd8..066ca35 100644 --- a/rtl/redmule_top.sv +++ b/rtl/redmule_top.sv @@ -14,32 +14,33 @@ module redmule_top import hwpe_ctrl_package::*; import hwpe_stream_package::*; #( - parameter int unsigned DataW = MaxDataW, // TCDM port dimension (in bits) - parameter fp_format_e FpFormat = FP16, // Data format (default is FP16) - parameter int unsigned Height = MaxDim, // Number of PEs within a row - parameter int unsigned Width = MaxDim, // Number of parallel rows - parameter int unsigned NumPipeRegs = MaxPipeRegs-1, // Number of pipeline registers within each PE - parameter pipe_config_t PipeConfig = DISTRIBUTED, - parameter int unsigned EccChunkSize = 32, - parameter bit LatchBuffers = 0, - parameter fpnew_pkg::fmt_logic_t FpFmtConfig = 6'b001101, - parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000, + parameter int unsigned DataW = MaxDataW, // TCDM port dimension (in bits) + parameter int unsigned MisalignedAccessSupport = MisalignedAccessSupportDefault, // set to 1 to support misaligned accesses on TCDM + parameter fp_format_e FpFormat = FP16, // Data format (default is FP16) + parameter int unsigned Height = MaxDim, // Number of PEs within a row + parameter int unsigned Width = MaxDim, // Number of parallel rows + parameter int unsigned NumPipeRegs = MaxPipeRegs-1, // Number of pipeline registers within each PE + parameter pipe_config_t PipeConfig = DISTRIBUTED, + parameter int unsigned EccChunkSize = 32, + parameter bit LatchBuffers = 0, + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = 6'b001101, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000, // Choose interface - parameter ctrl_intf_e CtrlIntfConfig = XIF, + parameter ctrl_intf_e CtrlIntfConfig = XIF, // Custom instructions - parameter logic [6:0] McnfigOpCode = 7'b0001011, - parameter logic [6:0] MarithOpCode = 7'b0001011, - parameter logic [6:0] MopcntOpCode = 7'b0001011, - parameter logic [2:0] McnfigFunct3 = 3'b000, - parameter logic [2:0] MarithFunct3 = 3'b001, - parameter logic [2:0] MopcntFunct3 = 3'b010, - parameter logic [1:0] McnfigFunct2 = 2'b00, - parameter logic [1:0] MarithFunct2 = 2'b00, - parameter logic [1:0] MopcntFunct2 = 2'b00, + parameter logic [6:0] McnfigOpCode = 7'b0001011, + parameter logic [6:0] MarithOpCode = 7'b0001011, + parameter logic [6:0] MopcntOpCode = 7'b0001011, + parameter logic [2:0] McnfigFunct3 = 3'b000, + parameter logic [2:0] MarithFunct3 = 3'b001, + parameter logic [2:0] MopcntFunct3 = 3'b010, + parameter logic [1:0] McnfigFunct2 = 2'b00, + parameter logic [1:0] MarithFunct2 = 2'b00, + parameter logic [1:0] MopcntFunct2 = 2'b00, // XIF parameters - parameter int unsigned XifNumHarts = 1, - parameter int unsigned XifIdWidth = 1, - parameter int unsigned XifIssueRegisterSplit = 0, + parameter int unsigned XifNumHarts = 1, + parameter int unsigned XifIdWidth = 1, + parameter int unsigned XifIssueRegisterSplit = 0, // XIF types parameter type x_issue_req_t = logic, parameter type x_issue_resp_t = logic, @@ -174,11 +175,12 @@ hwpe_stream_intf_stream #( .DATA_WIDTH ( DataW ) ) z_buffer_fifo ( .clk( cl // The streamer will present a single master TCDM port used to stream data to and from the memeory. redmule_streamer #( - .DataW ( DataW ), - .EccChunkSize ( EccChunkSize ), - .FpFormat ( FpFormat ), - .FpFmtConfig ( FpFmtConfig ), - .IntFmtConfig ( IntFmtConfig ), + .DataW ( DataW ), + .MisalignedAccessSupport ( MisalignedAccessSupport ), + .EccChunkSize ( EccChunkSize ), + .FpFormat ( FpFormat ), + .FpFmtConfig ( FpFmtConfig ), + .IntFmtConfig ( IntFmtConfig ), .`HCI_SIZE_PARAM(tcdm) ( `HCI_SIZE_PARAM(tcdm) ) ) i_streamer ( .clk_i ( clk_acc ), From 8c69519e5db5e399ad54e3045bf3c4d93aba977c Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Tue, 24 Feb 2026 08:37:49 +0100 Subject: [PATCH 18/25] Update rtl/redmule_tiler.sv Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- rtl/redmule_tiler.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rtl/redmule_tiler.sv b/rtl/redmule_tiler.sv index b9b37e1..a0c61bb 100644 --- a/rtl/redmule_tiler.sv +++ b/rtl/redmule_tiler.sv @@ -276,8 +276,9 @@ assign config_o = config_q; `ifndef SYNTHESIS `ifndef VERILATOR `ifndef VCS -initial - dataw : assert(DataW == Height*(PipeRegs+1)*16); +initial begin + dataw: assert (DataW == Height*(PipeRegs+1)*16); +end `endif `endif `endif From c1d2a537fa717f68cab362f94e2a06ed0049c124 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Tue, 24 Feb 2026 08:38:33 +0100 Subject: [PATCH 19/25] Update rtl/ctrl/redmule_target_decoder.sv Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- rtl/ctrl/redmule_target_decoder.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index 97c45e2..5759eba 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -118,7 +118,7 @@ module redmule_target_decoder if(~rst_ni) begin job_status <= '0; end else begin - if(clear_i | target_clear_o) begin // TODO: connect target-generated clear as well! + if(clear_i | target_clear_o) begin // Clear job status on external or target-generated clear. job_status <= '0; end else if(job_trigger & config_ready_i) begin From faf5a20172f2530248f13e515ac7153deb89e71c Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Tue, 24 Feb 2026 08:38:54 +0100 Subject: [PATCH 20/25] Update rtl/ctrl/redmule_regif.rdl Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- rtl/ctrl/redmule_regif.rdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/ctrl/redmule_regif.rdl b/rtl/ctrl/redmule_regif.rdl index 4c72604..d1d9d9c 100644 --- a/rtl/ctrl/redmule_regif.rdl +++ b/rtl/ctrl/redmule_regif.rdl @@ -175,7 +175,7 @@ addrmap redmule_regif { } send_w[19:19] = 0; field { name = "receive_w"; - desc = "Receive W to external stream."; + desc = "Receive W from external stream."; hw = r; sw = rw; } receive_w[18:18] = 0; From d56f74af917ebda0be77cb6c7748a0e6e2a35719 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Tue, 24 Feb 2026 08:39:16 +0100 Subject: [PATCH 21/25] Update Bender.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- Bender.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Bender.yml b/Bender.yml index 87407fe..ba99370 100644 --- a/Bender.yml +++ b/Bender.yml @@ -16,8 +16,8 @@ dependencies: cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: "redmule-v1.0" } ibex : { git: "https://github.com/pulp-platform/ibex.git" , rev: pulpissimo-v6.1.2 } hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , version: 1.9.2 } - hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , version: 3.0.0 } - hci : { git: "https://github.com/pulp-platform/hci.git" , version: 2.3.0 } + hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , version: 3.0.0 } + hci : { git: "https://github.com/pulp-platform/hci.git" , version: 2.3.0 } fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: "pulp-v0.1.3" } common_cells : { git: "https://github.com/pulp-platform/common_cells.git" , version: 1.21.0 } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.11 } From 4ebcf3fd894c746ec653bd4f82e888955199388f Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Tue, 24 Feb 2026 08:41:34 +0100 Subject: [PATCH 22/25] Update rtl/ctrl/redmule_target_decoder.sv Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- rtl/ctrl/redmule_target_decoder.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/ctrl/redmule_target_decoder.sv b/rtl/ctrl/redmule_target_decoder.sv index 5759eba..4504a8c 100644 --- a/rtl/ctrl/redmule_target_decoder.sv +++ b/rtl/ctrl/redmule_target_decoder.sv @@ -134,7 +134,7 @@ module redmule_target_decoder assign config_o.m_size = hwif_out.hwpe_job_dep.mcnfig0.m_size.value; assign config_o.n_size = hwif_out.hwpe_job_dep.mcnfig1.n_size.value; assign config_o.k_size = hwif_out.hwpe_job_dep.mcnfig0.k_size.value; - assign config_o.receive_x = hwif_out.hwpe_job_dep.mcnfig1.send_x.value; + assign config_o.receive_x = hwif_out.hwpe_job_dep.mcnfig1.receive_x.value; assign config_o.send_x = hwif_out.hwpe_job_dep.mcnfig1.send_x.value; assign config_o.receive_w = hwif_out.hwpe_job_dep.mcnfig1.receive_w.value; assign config_o.send_w = hwif_out.hwpe_job_dep.mcnfig1.send_w.value; From 3522559ad052840ba04f913f6c270a55f96d603c Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Tue, 24 Feb 2026 08:42:18 +0100 Subject: [PATCH 23/25] add begin/end to initial assert --- rtl/redmule_streamer.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rtl/redmule_streamer.sv b/rtl/redmule_streamer.sv index a57c4e7..6e3fcb5 100644 --- a/rtl/redmule_streamer.sv +++ b/rtl/redmule_streamer.sv @@ -436,8 +436,9 @@ hwpe_stream_assign i_ystream_assign ( .push_i( out_stream[YsourceStreamId] ) , `ifndef SYNTHESIS `ifndef VERILATOR `ifndef VCS -initial +initial begin tcdm_size_check_dw : assert(`HCI_SIZE_PARAM(tcdm).DW == ((MisalignedAccessSupport == 1) ? (DataW + 32) : DataW)); +end `endif `endif `endif From 9a87cc9f820f9833af598dd411f939a156b5cbb5 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Tue, 24 Feb 2026 08:49:49 +0100 Subject: [PATCH 24/25] Add back comments to inst_decoder (were removed due to rebase conflict) --- rtl/redmule_inst_decoder.sv | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/rtl/redmule_inst_decoder.sv b/rtl/redmule_inst_decoder.sv index f612871..d859226 100644 --- a/rtl/redmule_inst_decoder.sv +++ b/rtl/redmule_inst_decoder.sv @@ -456,21 +456,23 @@ module redmule_inst_decoder unique case ({cur_issue[i].instr[26:25],cur_issue[i].instr[14:12],cur_issue[i].instr[6:0]}) MCNFIG: begin - config_d[i].m_size = cur_register[i].rs[0][15:0]; - config_d[i].n_size = cur_register[i].rs[1][15:0]; - config_d[i].k_size = cur_register[i].rs[0][31:16]; - config_d[i].receive_x = cur_register[i].rs[1][16]; - config_d[i].send_x = cur_register[i].rs[1][17]; - config_d[i].receive_w = cur_register[i].rs[1][18]; - config_d[i].send_w = cur_register[i].rs[1][19]; + // Matrix configuration: extract dimensions and data flow control from rs1, rs2, rs3 + config_d[i].m_size = cur_register[i].rs[0][15:0]; // M dimension (rows of X/Z) + config_d[i].n_size = cur_register[i].rs[1][15:0]; // N dimension (cols of W/Z) + config_d[i].k_size = cur_register[i].rs[0][31:16]; // K dimension (cols of X, rows of W) + config_d[i].receive_x = cur_register[i].rs[1][16]; // Receive X from external stream + config_d[i].send_x = cur_register[i].rs[1][17]; // Broadcast X to external stream + config_d[i].receive_w = cur_register[i].rs[1][18]; // Receive W from external stream + config_d[i].send_w = cur_register[i].rs[1][19]; // Broadcast W to external stream config_d[i].gemm_ops = cur_register[i].rs[1][20] ? MATMUL : GEMM; - config_d[i].y_offs = cur_register[i].rs[2][31:0]; + config_d[i].y_offs = cur_register[i].rs[2][31:0]; // Y buffer offset for bias addition end MARITH: begin - config_d[i].x_addr = cur_register[i].rs[0][31:0]; - config_d[i].w_addr = cur_register[i].rs[1][31:0]; - config_d[i].z_addr = cur_register[i].rs[2][31:0]; - // TODO: These are fixed for now + // Matrix arithmetic: extract memory addresses from rs1, rs2, rs3 + config_d[i].x_addr = cur_register[i].rs[0][31:0]; // X matrix base address + config_d[i].w_addr = cur_register[i].rs[1][31:0]; // W matrix base address + config_d[i].z_addr = cur_register[i].rs[2][31:0]; // Z matrix base address (output) + // TODO: These operation parameters are fixed for now, could be made configurable config_d[i].gemm_input_fmt = redmule_pkg::Float16; config_d[i].gemm_output_fmt = redmule_pkg::Float16; end From abad16365cb7712dae541c63a0cdae5b98fea426 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Thu, 5 Mar 2026 10:52:49 +0100 Subject: [PATCH 25/25] Add XIF and memory-mapped wrappers for redmule top --- Bender.yml | 2 + rtl/redmule_mm_wrap.sv | 116 +++++++++++++++++++++++++++++ rtl/redmule_xif_wrap.sv | 157 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 275 insertions(+) create mode 100644 rtl/redmule_mm_wrap.sv create mode 100644 rtl/redmule_xif_wrap.sv diff --git a/Bender.yml b/Bender.yml index ba99370..b0f62e6 100644 --- a/Bender.yml +++ b/Bender.yml @@ -45,6 +45,8 @@ sources: - rtl/redmule_row.sv - rtl/redmule_engine.sv - rtl/redmule_top.sv + - rtl/redmule_xif_wrap.sv + - rtl/redmule_mm_wrap.sv - rtl/redmule_memory_scheduler.sv - rtl/redmule_mux.sv - rtl/redmule_inst_decoder.sv diff --git a/rtl/redmule_mm_wrap.sv b/rtl/redmule_mm_wrap.sv new file mode 100644 index 0000000..6b8769b --- /dev/null +++ b/rtl/redmule_mm_wrap.sv @@ -0,0 +1,116 @@ +// Copyright 2026 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Francesco Conti + +// Wrapper for redmule_top that exposes only the HWPE-ctrl memory-mapped target +// interface. XIF ports are hidden (inputs tied to '0) and external streaming +// ports (w_stream_i/o, x_stream_i/o) are hidden (source-side valid/data/strb +// tied to '0, sink-side ready tied to '0). + +`include "hci_helpers.svh" + +module redmule_mm_wrap + import fpnew_pkg::*; + import redmule_pkg::*; + import hci_package::*; + import hwpe_ctrl_package::*; + import hwpe_stream_package::*; +#( + parameter int unsigned DataW = MaxDataW, + parameter int unsigned MisalignedAccessSupport = MisalignedAccessSupportDefault, + parameter fp_format_e FpFormat = FP16, + parameter int unsigned Height = MaxDim, + parameter int unsigned Width = MaxDim, + parameter int unsigned NumPipeRegs = MaxPipeRegs-1, + parameter pipe_config_t PipeConfig = DISTRIBUTED, + parameter int unsigned EccChunkSize = 32, + parameter bit LatchBuffers = 0, + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = 6'b001101, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000, + parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '0 + // NOTE: XIF parameters are not exposed; CtrlIntfConfig is fixed to HWPE_TARGET. + // External stream ports are also hidden. +)( + input logic clk_i , + input logic rst_ni , + input logic test_mode_i, + output logic busy_o , + output logic evt_o , + // Synchronization ports + output logic sync_o , + input logic sync_i , + // TCDM master ports for the memory side + hci_core_intf.initiator tcdm , + // HWPE-ctrl target port + hwpe_ctrl_intf_periph.slave target + // NOTE: XIF ports are hidden; all XIF inputs are tied to '0 internally. + // NOTE: external stream ports (w_stream_i/o, x_stream_i/o) are hidden; + // source-side handshake signals are tied to '0 internally. +); + + // Local dummy stream interfaces for the hidden external stream ports. + // w_stream_i / x_stream_i are sink ports in redmule_top (redmule consumes); + // the external source drives valid, data, strb — tie to '0. + hwpe_stream_intf_stream #(.DATA_WIDTH(DataW)) w_stream_i (.clk(clk_i)); + hwpe_stream_intf_stream #(.DATA_WIDTH(DataW)) x_stream_i (.clk(clk_i)); + assign w_stream_i.valid = '0; + assign w_stream_i.data = '0; + assign w_stream_i.strb = '0; + assign x_stream_i.valid = '0; + assign x_stream_i.data = '0; + assign x_stream_i.strb = '0; + + // w_stream_o / x_stream_o are source ports in redmule_top (redmule produces); + // the external sink drives ready — tie to '0. + hwpe_stream_intf_stream #(.DATA_WIDTH(DataW)) w_stream_o (.clk(clk_i)); + hwpe_stream_intf_stream #(.DATA_WIDTH(DataW)) x_stream_o (.clk(clk_i)); + assign w_stream_o.ready = '0; + assign x_stream_o.ready = '0; + + redmule_top #( + .DataW ( DataW ), + .MisalignedAccessSupport ( MisalignedAccessSupport ), + .FpFormat ( FpFormat ), + .Height ( Height ), + .Width ( Width ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .EccChunkSize ( EccChunkSize ), + .LatchBuffers ( LatchBuffers ), + .FpFmtConfig ( FpFmtConfig ), + .IntFmtConfig ( IntFmtConfig ), + .CtrlIntfConfig ( HWPE_TARGET ), + .`HCI_SIZE_PARAM(tcdm) ( `HCI_SIZE_PARAM(tcdm) ) + // XIF parameters left at their defaults (logic); unused in HWPE_TARGET mode. + ) i_redmule_top ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .busy_o ( busy_o ), + .evt_o ( evt_o ), + .w_stream_i ( w_stream_i ), + .x_stream_i ( x_stream_i ), + .w_stream_o ( w_stream_o ), + .x_stream_o ( x_stream_o ), + // XIF inputs tied to '0; outputs left unconnected (driven to '0 by redmule_top) + .x_issue_req_i ( '0 ), + .x_issue_resp_o ( ), + .x_issue_valid_i ( '0 ), + .x_issue_ready_o ( ), + .x_register_i ( '0 ), + .x_register_valid_i ( '0 ), + .x_register_ready_o ( ), + .x_commit_i ( '0 ), + .x_commit_valid_i ( '0 ), + .x_result_o ( ), + .x_result_valid_o ( ), + .x_result_ready_i ( '0 ), + .sync_o ( sync_o ), + .sync_i ( sync_i ), + .tcdm ( tcdm ), + .target ( target ) + ); + +endmodule : redmule_mm_wrap diff --git a/rtl/redmule_xif_wrap.sv b/rtl/redmule_xif_wrap.sv new file mode 100644 index 0000000..2055d57 --- /dev/null +++ b/rtl/redmule_xif_wrap.sv @@ -0,0 +1,157 @@ +// Copyright 2026 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Francesco Conti + +// Wrapper for redmule_top that exposes the XIF control interface and hides the +// HWPE-ctrl memory-mapped target port. The target port's master-driven inputs +// (req, add, wen, be, data, id) are tied to '0; gnt is driven to '1 internally +// by redmule_top when CtrlIntfConfig == XIF. + +`include "hci_helpers.svh" + +module redmule_xif_wrap + import fpnew_pkg::*; + import redmule_pkg::*; + import hci_package::*; + import hwpe_ctrl_package::*; + import hwpe_stream_package::*; +#( + parameter int unsigned DataW = MaxDataW, + parameter int unsigned MisalignedAccessSupport = MisalignedAccessSupportDefault, + parameter fp_format_e FpFormat = FP16, + parameter int unsigned Height = MaxDim, + parameter int unsigned Width = MaxDim, + parameter int unsigned NumPipeRegs = MaxPipeRegs-1, + parameter pipe_config_t PipeConfig = DISTRIBUTED, + parameter int unsigned EccChunkSize = 32, + parameter bit LatchBuffers = 0, + parameter fpnew_pkg::fmt_logic_t FpFmtConfig = 6'b001101, + parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000, + // Custom instructions + parameter logic [6:0] McnfigOpCode = 7'b0001011, + parameter logic [6:0] MarithOpCode = 7'b0001011, + parameter logic [6:0] MopcntOpCode = 7'b0001011, + parameter logic [2:0] McnfigFunct3 = 3'b000, + parameter logic [2:0] MarithFunct3 = 3'b001, + parameter logic [2:0] MopcntFunct3 = 3'b010, + parameter logic [1:0] McnfigFunct2 = 2'b00, + parameter logic [1:0] MarithFunct2 = 2'b00, + parameter logic [1:0] MopcntFunct2 = 2'b00, + // XIF parameters + parameter int unsigned XifNumHarts = 1, + parameter int unsigned XifIdWidth = 1, + parameter int unsigned XifIssueRegisterSplit = 0, + // XIF types + parameter type x_issue_req_t = logic, + parameter type x_issue_resp_t = logic, + parameter type x_register_t = logic, + parameter type x_commit_t = logic, + parameter type x_result_t = logic, + parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '0 +)( + input logic clk_i , + input logic rst_ni , + input logic test_mode_i, + output logic busy_o , + output logic evt_o , + // External W stream + hwpe_stream_intf_stream.sink w_stream_i , + // External X stream + hwpe_stream_intf_stream.sink x_stream_i , + // Broadcasted W stream + hwpe_stream_intf_stream.source w_stream_o , + // Broadcasted X stream + hwpe_stream_intf_stream.source x_stream_o , + // XIF ports + input x_issue_req_t x_issue_req_i , + output x_issue_resp_t x_issue_resp_o , + input logic x_issue_valid_i , + output logic x_issue_ready_o , + input x_register_t x_register_i , + input logic x_register_valid_i , + output logic x_register_ready_o , + input x_commit_t x_commit_i , + input logic x_commit_valid_i , + output x_result_t x_result_o , + output logic x_result_valid_o , + input logic x_result_ready_i , + // Synchronization ports + output logic sync_o , + input logic sync_i , + // TCDM master ports for the memory side + hci_core_intf.initiator tcdm + // NOTE: hwpe_ctrl_intf_periph target is hidden; master inputs are tied to '0 + // and gnt is driven to '1 internally (CtrlIntfConfig == XIF). +); + + // Local dummy HWPE-ctrl target interface — master-side inputs tied to '0. + hwpe_ctrl_intf_periph #(.ID_WIDTH(0)) target (.clk(clk_i)); + assign target.req = '0; + assign target.add = '0; + assign target.wen = '0; + assign target.be = '0; + assign target.data = '0; + assign target.id = '0; + + redmule_top #( + .DataW ( DataW ), + .MisalignedAccessSupport ( MisalignedAccessSupport ), + .FpFormat ( FpFormat ), + .Height ( Height ), + .Width ( Width ), + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .EccChunkSize ( EccChunkSize ), + .LatchBuffers ( LatchBuffers ), + .FpFmtConfig ( FpFmtConfig ), + .IntFmtConfig ( IntFmtConfig ), + .CtrlIntfConfig ( XIF ), + .McnfigOpCode ( McnfigOpCode ), + .MarithOpCode ( MarithOpCode ), + .MopcntOpCode ( MopcntOpCode ), + .McnfigFunct3 ( McnfigFunct3 ), + .MarithFunct3 ( MarithFunct3 ), + .MopcntFunct3 ( MopcntFunct3 ), + .McnfigFunct2 ( McnfigFunct2 ), + .MarithFunct2 ( MarithFunct2 ), + .MopcntFunct2 ( MopcntFunct2 ), + .XifNumHarts ( XifNumHarts ), + .XifIdWidth ( XifIdWidth ), + .XifIssueRegisterSplit ( XifIssueRegisterSplit ), + .x_issue_req_t ( x_issue_req_t ), + .x_issue_resp_t ( x_issue_resp_t ), + .x_register_t ( x_register_t ), + .x_commit_t ( x_commit_t ), + .x_result_t ( x_result_t ), + .`HCI_SIZE_PARAM(tcdm) ( `HCI_SIZE_PARAM(tcdm) ) + ) i_redmule_top ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .busy_o ( busy_o ), + .evt_o ( evt_o ), + .w_stream_i ( w_stream_i ), + .x_stream_i ( x_stream_i ), + .w_stream_o ( w_stream_o ), + .x_stream_o ( x_stream_o ), + .x_issue_req_i ( x_issue_req_i ), + .x_issue_resp_o ( x_issue_resp_o ), + .x_issue_valid_i ( x_issue_valid_i ), + .x_issue_ready_o ( x_issue_ready_o ), + .x_register_i ( x_register_i ), + .x_register_valid_i ( x_register_valid_i ), + .x_register_ready_o ( x_register_ready_o ), + .x_commit_i ( x_commit_i ), + .x_commit_valid_i ( x_commit_valid_i ), + .x_result_o ( x_result_o ), + .x_result_valid_o ( x_result_valid_o ), + .x_result_ready_i ( x_result_ready_i ), + .sync_o ( sync_o ), + .sync_i ( sync_i ), + .tcdm ( tcdm ), + .target ( target ) + ); + +endmodule : redmule_xif_wrap