From 133f28cc34d215bc975b3723b693b0d64c4d1f2f Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 6 Feb 2024 14:51:32 +0100 Subject: [PATCH 1/7] Create wrapper for PULP cluster --- Bender.yml | 3 + src/pulp_icache_wrap.sv | 239 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 src/pulp_icache_wrap.sv diff --git a/Bender.yml b/Bender.yml index a5876d1..6bdfef8 100644 --- a/Bender.yml +++ b/Bender.yml @@ -37,6 +37,9 @@ sources: # Level 3 - src/snitch_icache.sv - src/snitch_read_only_cache.sv + - target: pulp + files: + - src/pulp_icache_wrap.sv - target: test files: - test/snitch_icache_l0_tb.sv diff --git a/src/pulp_icache_wrap.sv b/src/pulp_icache_wrap.sv new file mode 100644 index 0000000..a161263 --- /dev/null +++ b/src/pulp_icache_wrap.sv @@ -0,0 +1,239 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Michael Rogenmoser + +`include "common_cells/registers.svh" + +/// Porting from hier-icache: +/// Unsupported: different line width, banks in L1, L0 not fully associative +/// [SH_FETCH_DATA_WIDTH == Cache line width] +/// [SH_NB_BANKS == 1] +/// [PRI_NB_WAYS == L0_LINE_COUNT] -> here fully associative +/// [SH_CACHE_LINE == PRI_CACHE_LINE] +/// NumFetchPorts = NB_CORES +/// L0_LINE_COUNT = PRI_CACHE_SIZE/(bytes per line) +/// LINE_WIDTH = X_CACHE_LINE * DATA_WIDTH -> Use >= 32*NB_CORES for optimal performance +/// LINE_COUNT = SH_CACHE_SIZE/(bytes per line) +/// WAY_COUNT = SH_NB_WAYS +/// FetchAddrWidth = FETCH_ADDR_WIDTH +/// FetchDataWidth = PRI_FETCH_DATA_WIDTH +/// AxiAddrWidth = AXI_ADDR +/// AxiDataWidth = AXI_DATA +module pulp_icache_wrap #( + /// Number of request (fetch) ports + parameter int NumFetchPorts = -1, + /// L0 Cache Line Count + parameter int L0_LINE_COUNT = -1, + /// Cache Line Width + /// For optimal performance, use >= 32*NumFetchPorts to allow execution of 32-bit instructions + /// for each core before requiring another L0-L1 fetch. + parameter int LINE_WIDTH = -1, + /// The number of cache lines per set. Power of two; >= 2. + parameter int LINE_COUNT = -1, + /// The set associativity of the cache. Power of two; >= 1. + parameter int WAY_COUNT = 1, + /// Fetch interface address width. Same as FILL_AW; >= 1. + parameter int FetchAddrWidth = -1, + /// Fetch interface data width. Power of two; >= 8. + parameter int FetchDataWidth = -1, + /// Fill interface address width. Same as FETCH_AW; >= 1. + parameter int AxiAddrWidth = -1, + /// Fill interface data width. Power of two; >= 8. + parameter int AxiDataWidth = -1, + /// Configuration input types for memory cuts used in implementation. + parameter type sram_cfg_data_t = logic, + parameter type sram_cfg_tag_t = logic, + + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + + // Processor interface + input logic [NumFetchPorts-1:0] fetch_req_i, + input logic [NumFetchPorts-1:0][FetchAddrWidth-1:0] fetch_addr_i, + output logic [NumFetchPorts-1:0] fetch_gnt_o, + output logic [NumFetchPorts-1:0] fetch_rvalid_o, + output logic [NumFetchPorts-1:0][FetchDataWidth-1:0] fetch_rdata_o, + output logic [NumFetchPorts-1:0] fetch_rerror_o, + + input logic enable_prefetching_i, + output snitch_icache_pkg::icache_events_t [NumFetchPorts-1:0] icache_events_o, + input logic [NumFetchPorts-1:0] flush_valid_i, + output logic [NumFetchPorts-1:0] flush_ready_o, + + // SRAM configs + input sram_cfg_data_t sram_cfg_data_i, + input sram_cfg_tag_t sram_cfg_tag_i, + + // AXI interface + output axi_req_t axi_req_o, + input axi_rsp_t axi_rsp_i +); + localparam int unsigned AdapterType = 1; + + logic [NumFetchPorts-1:0] fetch_valid, fetch_ready, fetch_rerror; + logic [NumFetchPorts-1:0][FetchAddrWidth-1:0] fetch_addr; + logic [NumFetchPorts-1:0][FetchDataWidth-1:0] fetch_rdata; + + for (genvar i = 0; i < NumFetchPorts; i++) begin : gen_adapter + if (AdapterType == 0) begin : gen_response_cut + + // Reuquires the core to keep data applied steady while req is high, may not be guaranteed... + spill_register #( + .T (logic [FetchDataWidth-1+1:0]), + .Bypass(1'b0) + ) i_spill_reg ( + .clk_i, + .rst_ni, + .valid_i ( fetch_ready [i] ), + .ready_o ( /* Unconnected as always ready */ ), + .data_i ( {fetch_rdata [i], fetch_rerror [i]} ), + .valid_o ( fetch_rvalid_o[i] ), + .ready_i ( '1 ), + .data_o ( {fetch_rdata_o[i], fetch_rerror_o[i]} ) + ); + + assign fetch_addr[i] = fetch_addr_i[i]; + assign fetch_valid[i] = fetch_req_i[i]; + assign fetch_gnt_o[i] = fetch_ready[i]; + + end else if (AdapterType == 1) begin : gen_request_cut + + logic gnt; + + assign fetch_gnt_o[i] = gnt & fetch_req_i[i]; + + spill_register #( + .T (logic [FetchAddrWidth-1:0]), + .Bypass(1'b0) + ) i_spill_reg ( + .clk_i, + .rst_ni, + .valid_i ( fetch_req_i [i] ), + .ready_o ( gnt ), + .data_i ( fetch_addr_i[i] ), + .valid_o ( fetch_valid [i] ), + .ready_i ( fetch_ready [i] ), + .data_o ( fetch_addr [i] ) + ); + + assign fetch_rdata_o [i] = fetch_rdata [i]; + assign fetch_rerror_o[i] = fetch_rerror[i]; + assign fetch_rvalid_o[i] = fetch_ready [i] & fetch_valid[i]; + + end else begin : gen_flexible_cut + // This can still be improved, there is still an extra stall cycle sometimes AFAIK... + + logic stalled_d, stalled_q; + + logic spill_valid, spill_ready; + logic [FetchAddrWidth-1:0] spill_addr; + + spill_register #( + .T (logic [FetchAddrWidth-1:0]), + .Bypass(1'b0) + ) i_req_spill_reg ( + .clk_i, + .rst_ni, + .valid_i ( fetch_req_i [i] ), + .ready_o ( fetch_gnt_o [i] ), + .data_i ( fetch_addr_i[i] ), + .valid_o ( spill_valid ), + .ready_i ( spill_ready ), + .data_o ( spill_addr ) + ); + + always_comb begin + // Keep steady state + stalled_d = stalled_q; + + // If already stalled + if (stalled_q) begin + // only revert back to unstalled state with sufficient gap + if (!spill_valid && !fetch_req_i[i]) + stalled_d = 1'b0; + end else begin + if (fetch_req_i[i] && !fetch_ready[i]) + stalled_d = 1'b1; + end + end + `FF(stalled_q, stalled_d, '0) + + assign fetch_valid[i] = stalled_q ? spill_valid : fetch_req_i[i]; + assign fetch_addr [i] = stalled_q ? spill_addr : fetch_addr_i[i]; + + logic spill_rvalid; + logic spill_rerror; + logic [FetchDataWidth-1:0] spill_rdata; + + spill_register #( + .T (logic [FetchDataWidth-1+1:0]), + .Bypass(1'b0) + ) i_rsp_spill_reg ( + .clk_i, + .rst_ni, + .valid_i ( fetch_ready [i] ), + .ready_o ( /* Unconnected as always ready */ ), + .data_i ( {fetch_rdata[i], fetch_rerror[i]} ), + .valid_o ( spill_rvalid ), + .ready_i ( '1 ), + .data_o ( {spill_rdata , spill_rerror } ) + ); + + assign fetch_rvalid_o[i] = stalled_q ? fetch_ready[i] : spill_rvalid; + assign fetch_rdata_o [i] = stalled_q ? fetch_rdata [i] : spill_rdata; + assign fetch_rerror_o[i] = stalled_q ? fetch_rerror[i] : spill_rerror; + + end + end + + snitch_icache #( + .NR_FETCH_PORTS ( NumFetchPorts ), + .L0_LINE_COUNT ( L0_LINE_COUNT ), + .LINE_WIDTH ( LINE_WIDTH ), + .LINE_COUNT ( LINE_COUNT ), + .WAY_COUNT ( WAY_COUNT ), + .FETCH_AW ( FetchAddrWidth ), + .FETCH_DW ( FetchDataWidth ), + .FILL_AW ( AxiAddrWidth ), + .FILL_DW ( AxiDataWidth ), + .FETCH_PRIORITY ( 1 ), + .MERGE_FETCHES ( 1 ), + .L1_TAG_SCM ( 1 ), + .SERIAL_LOOKUP ( 1 ), + .NUM_AXI_OUTSTANDING( 4 ), + .EARLY_LATCH ( 0 ), + .ISO_CROSSING ( 0 ), + .sram_cfg_data_t ( sram_cfg_data_t ), + .sram_cfg_tag_t ( sram_cfg_tag_t ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_rsp_t ) + ) i_snitch_icache ( + .clk_i, + .clk_d2_i ( clk_i ), + .rst_ni, + + .enable_prefetching_i, + .icache_events_o, + .flush_valid_i, + .flush_ready_o, + + .inst_addr_i ( fetch_addr ), + .inst_data_o ( fetch_rdata ), + .inst_cacheable_i ( {NumFetchPorts{1'b1}} ), + .inst_valid_i ( fetch_valid ), + .inst_ready_o ( fetch_ready ), + .inst_error_o ( fetch_rerror ), + + .sram_cfg_data_i, + .sram_cfg_tag_i, + + .axi_req_o, + .axi_rsp_i + ); + +endmodule From 016ed7b95fa162c1767011eb6ad6e3de590bfd96 Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 23 Jul 2024 17:50:47 +0200 Subject: [PATCH 2/7] Extract l0_to_bypass module to independent file --- Bender.yml | 1 + src/l0_to_bypass.sv | 152 +++++++++++++++++++++++++++++++++++++++++++ src/snitch_icache.sv | 144 ---------------------------------------- 3 files changed, 153 insertions(+), 144 deletions(-) create mode 100644 src/l0_to_bypass.sv diff --git a/Bender.yml b/Bender.yml index 6bdfef8..07ada2a 100644 --- a/Bender.yml +++ b/Bender.yml @@ -26,6 +26,7 @@ sources: - src/riscv_instr_branch.sv - src/multi_accept_rr_arb.sv # Level 1 + - src/l0_to_bypass.sv - src/snitch_axi_to_cache.sv - src/snitch_icache_l0.sv - src/snitch_icache_refill.sv diff --git a/src/l0_to_bypass.sv b/src/l0_to_bypass.sv new file mode 100644 index 0000000..f37e363 --- /dev/null +++ b/src/l0_to_bypass.sv @@ -0,0 +1,152 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Fabian Schuiki +// Florian Zaruba + +`include "common_cells/registers.svh" + +// Translate register interface to refill requests. +// Used for bypassable accesses. +module l0_to_bypass #( + parameter snitch_icache_pkg::config_t CFG = '0 +) ( + input logic clk_i, + input logic rst_ni, + + input logic [CFG.NR_FETCH_PORTS-1:0] in_valid_i, + output logic [CFG.NR_FETCH_PORTS-1:0] in_ready_o, + input logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_AW-1:0] in_addr_i, + output logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_DW-1:0] in_data_o, + output logic [CFG.NR_FETCH_PORTS-1:0] in_error_o, + + output logic [CFG.FETCH_AW-1:0] refill_req_addr_o, + output logic refill_req_bypass_o, + output logic refill_req_valid_o, + input logic refill_req_ready_i, + + input logic [CFG.LINE_WIDTH-1:0] refill_rsp_data_i, + input logic refill_rsp_error_i, + input logic refill_rsp_valid_i, + output logic refill_rsp_ready_o +); + + assign refill_req_bypass_o = 1'b1; + + logic [CFG.NR_FETCH_PORTS-1:0] in_valid; + logic [CFG.NR_FETCH_PORTS-1:0] in_ready; + + typedef enum logic [1:0] { + Idle, RequestData, WaitResponse, PresentResponse + } state_e; + state_e [CFG.NR_FETCH_PORTS-1:0] state_d , state_q; + + // Mask address so that it is aligned to the cache-line width. + logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_AW-1:0] in_addr_masked; + for (genvar i = 0; i < CFG.NR_FETCH_PORTS; i++) begin : gen_masked_addr + assign in_addr_masked[i] = {in_addr_i[i][CFG.FETCH_AW-1:CFG.LINE_ALIGN], + {CFG.LINE_ALIGN{1'b0}}}; + end + stream_arbiter #( + .DATA_T ( logic [CFG.FETCH_AW-1:0] ), + .N_INP ( CFG.NR_FETCH_PORTS ) + ) i_stream_arbiter ( + .clk_i, + .rst_ni, + .inp_data_i ( in_addr_masked ), + .inp_valid_i ( in_valid ), + .inp_ready_o ( in_ready ), + .oup_data_o ( refill_req_addr_o ), + .oup_valid_o ( refill_req_valid_o ), + .oup_ready_i ( refill_req_ready_i ) + ); + + localparam int unsigned NrFetchPortsBin = + CFG.NR_FETCH_PORTS == 1 ? 1 : $clog2(CFG.NR_FETCH_PORTS); + + logic [CFG.NR_FETCH_PORTS-1:0] rsp_fifo_mux; + logic [NrFetchPortsBin-1:0] onehot_mux; + logic [CFG.NR_FETCH_PORTS-1:0] rsp_fifo_pop; + logic rsp_fifo_full; + + logic [CFG.NR_FETCH_PORTS-1:0] rsp_valid; + logic [CFG.NR_FETCH_PORTS-1:0] rsp_ready; + + fifo_v3 #( + .DATA_WIDTH ( CFG.NR_FETCH_PORTS ), + .DEPTH ( 4 ) + ) rsp_fifo ( + .clk_i, + .rst_ni, + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .full_o ( rsp_fifo_full ), + .empty_o ( ), + .usage_o ( ), + .data_i ( {in_valid & in_ready} ), + .push_i ( |{in_valid & in_ready}), + .data_o ( rsp_fifo_mux ), + .pop_i ( |rsp_fifo_pop ) + ); + + + onehot_to_bin #( + .ONEHOT_WIDTH (CFG.NR_FETCH_PORTS) + ) i_onehot_to_bin ( + .onehot (rsp_fifo_mux), + .bin (onehot_mux) + ); + + assign rsp_ready = '1; + + stream_demux #( + .N_OUP ( CFG.NR_FETCH_PORTS ) + ) i_stream_mux_miss_refill ( + .inp_valid_i ( refill_rsp_valid_i ), + .inp_ready_o ( refill_rsp_ready_o ), + .oup_sel_i ( onehot_mux ), + .oup_valid_o ( rsp_valid ), + .oup_ready_i ( rsp_ready ) + ); + + for (genvar i = 0; i < CFG.NR_FETCH_PORTS; i++) begin : gen_bypass_request + always_comb begin + state_d[i] = state_q[i]; + in_ready_o[i] = 1'b0; + rsp_fifo_pop[i] = 1'b0; + in_valid[i] = 1'b0; + unique case (state_q[i]) + // latch data when idle + Idle: if (in_valid_i[i]) state_d[i] = RequestData; + RequestData: begin + // check that there is still space for the response to be accepted. + if (!rsp_fifo_full) begin + in_valid[i] = 1'b1; + if (in_ready[i]) state_d[i] = WaitResponse; + end + end + WaitResponse: begin + if (rsp_valid[i]) begin + rsp_fifo_pop[i] = 1'b1; + state_d[i] = PresentResponse; + end + end + // The response will be served from the register and is valid for one cycle. + PresentResponse: begin + state_d[i] = Idle; + in_ready_o[i] = 1'b1; + end + default:; + endcase + end + logic [CFG.FILL_DW-1:0] fill_rsp_data; + assign fill_rsp_data = + refill_rsp_data_i >> (in_addr_i[i][CFG.LINE_ALIGN-1:CFG.FETCH_ALIGN] * CFG.FETCH_DW); + `FFLNR({in_data_o[i], in_error_o[i]}, {fill_rsp_data[CFG.FETCH_DW-1:0], refill_rsp_error_i}, + rsp_valid[i], clk_i) + end + + `FF(state_q, state_d, '{default: Idle}) + +endmodule diff --git a/src/snitch_icache.sv b/src/snitch_icache.sv index 013a3ae..3785410 100644 --- a/src/snitch_icache.sv +++ b/src/snitch_icache.sv @@ -704,147 +704,3 @@ module snitch_icache import snitch_icache_pkg::*; #( ); endmodule - -// Translate register interface to refill requests. -// Used for bypassable accesses. -module l0_to_bypass #( - parameter snitch_icache_pkg::config_t CFG = '0 -) ( - input logic clk_i, - input logic rst_ni, - - input logic [CFG.NR_FETCH_PORTS-1:0] in_valid_i, - output logic [CFG.NR_FETCH_PORTS-1:0] in_ready_o, - input logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_AW-1:0] in_addr_i, - output logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_DW-1:0] in_data_o, - output logic [CFG.NR_FETCH_PORTS-1:0] in_error_o, - - output logic [CFG.FETCH_AW-1:0] refill_req_addr_o, - output logic refill_req_bypass_o, - output logic refill_req_valid_o, - input logic refill_req_ready_i, - - input logic [CFG.LINE_WIDTH-1:0] refill_rsp_data_i, - input logic refill_rsp_error_i, - input logic refill_rsp_valid_i, - output logic refill_rsp_ready_o -); - - assign refill_req_bypass_o = 1'b1; - - logic [CFG.NR_FETCH_PORTS-1:0] in_valid; - logic [CFG.NR_FETCH_PORTS-1:0] in_ready; - - typedef enum logic [1:0] { - Idle, RequestData, WaitResponse, PresentResponse - } state_e; - state_e [CFG.NR_FETCH_PORTS-1:0] state_d , state_q; - - // Mask address so that it is aligned to the cache-line width. - logic [CFG.NR_FETCH_PORTS-1:0][CFG.FETCH_AW-1:0] in_addr_masked; - for (genvar i = 0; i < CFG.NR_FETCH_PORTS; i++) begin : gen_masked_addr - assign in_addr_masked[i] = {in_addr_i[i][CFG.FETCH_AW-1:CFG.LINE_ALIGN], - {CFG.LINE_ALIGN{1'b0}}}; - end - stream_arbiter #( - .DATA_T ( logic [CFG.FETCH_AW-1:0] ), - .N_INP ( CFG.NR_FETCH_PORTS ) - ) i_stream_arbiter ( - .clk_i, - .rst_ni, - .inp_data_i ( in_addr_masked ), - .inp_valid_i ( in_valid ), - .inp_ready_o ( in_ready ), - .oup_data_o ( refill_req_addr_o ), - .oup_valid_o ( refill_req_valid_o ), - .oup_ready_i ( refill_req_ready_i ) - ); - - localparam int unsigned NrFetchPortsBin = - CFG.NR_FETCH_PORTS == 1 ? 1 : $clog2(CFG.NR_FETCH_PORTS); - - logic [CFG.NR_FETCH_PORTS-1:0] rsp_fifo_mux; - logic [NrFetchPortsBin-1:0] onehot_mux; - logic [CFG.NR_FETCH_PORTS-1:0] rsp_fifo_pop; - logic rsp_fifo_full; - - logic [CFG.NR_FETCH_PORTS-1:0] rsp_valid; - logic [CFG.NR_FETCH_PORTS-1:0] rsp_ready; - - fifo_v3 #( - .DATA_WIDTH ( CFG.NR_FETCH_PORTS ), - .DEPTH ( 4 ) - ) rsp_fifo ( - .clk_i, - .rst_ni, - .flush_i ( 1'b0 ), - .testmode_i ( 1'b0 ), - .full_o ( rsp_fifo_full ), - .empty_o ( ), - .usage_o ( ), - .data_i ( {in_valid & in_ready} ), - .push_i ( |{in_valid & in_ready}), - .data_o ( rsp_fifo_mux ), - .pop_i ( |rsp_fifo_pop ) - ); - - - onehot_to_bin #( - .ONEHOT_WIDTH (CFG.NR_FETCH_PORTS) - ) i_onehot_to_bin ( - .onehot (rsp_fifo_mux), - .bin (onehot_mux) - ); - - assign rsp_ready = '1; - - stream_demux #( - .N_OUP ( CFG.NR_FETCH_PORTS ) - ) i_stream_mux_miss_refill ( - .inp_valid_i ( refill_rsp_valid_i ), - .inp_ready_o ( refill_rsp_ready_o ), - .oup_sel_i ( onehot_mux ), - .oup_valid_o ( rsp_valid ), - .oup_ready_i ( rsp_ready ) - ); - - for (genvar i = 0; i < CFG.NR_FETCH_PORTS; i++) begin : gen_bypass_request - always_comb begin - state_d[i] = state_q[i]; - in_ready_o[i] = 1'b0; - rsp_fifo_pop[i] = 1'b0; - in_valid[i] = 1'b0; - unique case (state_q[i]) - // latch data when idle - Idle: if (in_valid_i[i]) state_d[i] = RequestData; - RequestData: begin - // check that there is still space for the response to be accepted. - if (!rsp_fifo_full) begin - in_valid[i] = 1'b1; - if (in_ready[i]) state_d[i] = WaitResponse; - end - end - WaitResponse: begin - if (rsp_valid[i]) begin - rsp_fifo_pop[i] = 1'b1; - state_d[i] = PresentResponse; - end - end - // The response will be served from the register and is valid for one cycle. - PresentResponse: begin - state_d[i] = Idle; - in_ready_o[i] = 1'b1; - end - default:; - endcase - end - logic [CFG.FILL_DW-1:0] fill_rsp_data; - assign fill_rsp_data = - refill_rsp_data_i >> (in_addr_i[i][CFG.LINE_ALIGN-1:CFG.FETCH_ALIGN] * CFG.FETCH_DW); - `FFLNR({in_data_o[i], in_error_o[i]}, {fill_rsp_data[CFG.FETCH_DW-1:0], refill_rsp_error_i}, - rsp_valid[i], clk_i) - end - - `FF(state_q, state_d, '{default: Idle}) - -endmodule From c8cde36e03bb72e5df2b1b256d18ec0df616966e Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 23 Jul 2024 17:53:28 +0200 Subject: [PATCH 3/7] Extract axi_burst_splitter_table module to independent file --- Bender.yml | 1 + src/axi_burst_splitter_table.sv | 161 ++++++++++++++++++++++++++++++++ src/snitch_axi_to_cache.sv | 151 ------------------------------ 3 files changed, 162 insertions(+), 151 deletions(-) create mode 100644 src/axi_burst_splitter_table.sv diff --git a/Bender.yml b/Bender.yml index 07ada2a..971dbee 100644 --- a/Bender.yml +++ b/Bender.yml @@ -22,6 +22,7 @@ sources: # package. Files in level 1 only depend on files in level 0, files in level 2 on files in # levels 1 and 0, etc. Files within a level are ordered alphabetically. # Level 0 + - src/axi_burst_splitter_table.sv - src/snitch_icache_pkg.sv - src/riscv_instr_branch.sv - src/multi_accept_rr_arb.sv diff --git a/src/axi_burst_splitter_table.sv b/src/axi_burst_splitter_table.sv new file mode 100644 index 0000000..014baab --- /dev/null +++ b/src/axi_burst_splitter_table.sv @@ -0,0 +1,161 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Author: Samuel Riedel +// +// Adapted from the axi_burst_splitter authored by: +// Andreas Kurth +// Florian Zaruba +// Wolfgang Roenninger + +`include "common_cells/registers.svh" +/// Stores the burst length and the corresponding address offset for the axi_to_cache module. +/// Adapted from axi_burst_splitter_counters +module axi_burst_splitter_table #( + parameter int unsigned MaxTrans = 0, + parameter int unsigned IdWidth = 0, + parameter type offset_t = logic, + parameter type id_t = logic [IdWidth-1:0] +) ( + input logic clk_i, + input logic rst_ni, + + input id_t alloc_id_i, + input axi_pkg::len_t alloc_len_i, + input offset_t alloc_offset_i, + input logic alloc_req_i, + output logic alloc_gnt_o, + + input id_t cnt_id_i, + output axi_pkg::len_t cnt_len_o, + output offset_t cnt_offset_o, + input logic cnt_set_err_i, + output logic cnt_err_o, + input logic cnt_len_dec_i, + input logic cnt_offset_inc_i, + input logic cnt_req_i, + output logic cnt_gnt_o +); + localparam int unsigned CntIdxWidth = (MaxTrans > 1) ? $clog2(MaxTrans) : 32'd1; + + typedef logic [CntIdxWidth-1:0] cnt_idx_t; + typedef logic [$bits(axi_pkg::len_t):0] cnt_t; + + cnt_idx_t cnt_free_idx, cnt_r_idx; + logic [MaxTrans-1:0] cnt_len_dec, cnt_offset_inc, cnt_free, cnt_set, err_d, err_q; + cnt_t cnt_len_inp; + cnt_t [MaxTrans-1:0] cnt_len_oup; + offset_t cnt_offset_inp; + offset_t [MaxTrans-1:0] cnt_offset_oup; + for (genvar i = 0; i < MaxTrans; i++) begin : gen_cnt + counter #( + .WIDTH ( $bits(cnt_t) ) + ) i_cnt_len ( + .clk_i, + .rst_ni, + .clear_i ( 1'b0 ), + .en_i ( cnt_len_dec[i] ), + .load_i ( cnt_set[i] ), + .down_i ( 1'b1 ), + .d_i ( cnt_len_inp ), + .q_o ( cnt_len_oup[i] ), + .overflow_o ( ) // not used + ); + counter #( + .WIDTH ( $bits(offset_t) ) + ) i_cnt_offset ( + .clk_i, + .rst_ni, + .clear_i ( 1'b0 ), + .en_i ( cnt_offset_inc[i] ), + .load_i ( cnt_set[i] ), + .down_i ( 1'b0 ), + .d_i ( cnt_offset_inp ), + .q_o ( cnt_offset_oup[i] ), + .overflow_o ( ) // not used + ); + assign cnt_free[i] = (cnt_len_oup[i] == '0); + end + assign cnt_len_inp = {1'b0, alloc_len_i} + 1; + assign cnt_offset_inp = alloc_offset_i; + + lzc #( + .WIDTH ( MaxTrans ), + .MODE ( 1'b0 ) + ) i_lzc ( + .in_i ( cnt_free ), + .cnt_o ( cnt_free_idx ), + .empty_o ( ) + ); + + logic idq_inp_req, idq_inp_gnt; + logic idq_oup_gnt, idq_oup_valid, idq_oup_pop; + id_queue #( + .ID_WIDTH ( $bits(id_t) ), + .CAPACITY ( MaxTrans ), + .FULL_BW ( 1'b1 ), + .data_t ( cnt_idx_t ) + ) i_idq ( + .clk_i, + .rst_ni, + .inp_id_i ( alloc_id_i ), + .inp_data_i ( cnt_free_idx ), + .inp_req_i ( idq_inp_req ), + .inp_gnt_o ( idq_inp_gnt ), + .exists_data_i ( '0 ), + .exists_mask_i ( '0 ), + .exists_req_i ( 1'b0 ), + .exists_o ( /* unused */ ), + .exists_gnt_o ( /* unused */ ), + .oup_id_i ( cnt_id_i ), + .oup_pop_i ( idq_oup_pop ), + .oup_req_i ( cnt_req_i ), + .oup_data_o ( cnt_r_idx ), + .oup_data_valid_o ( idq_oup_valid ), + .oup_gnt_o ( idq_oup_gnt ) + ); + logic [8:0] read_len; + assign idq_inp_req = alloc_req_i & alloc_gnt_o; + assign alloc_gnt_o = idq_inp_gnt & |(cnt_free); + assign cnt_gnt_o = idq_oup_gnt & idq_oup_valid; + assign read_len = cnt_len_oup[cnt_r_idx] - 1; + assign cnt_len_o = read_len[7:0]; + assign cnt_offset_o = cnt_offset_oup[cnt_r_idx]; + assign idq_oup_pop = cnt_req_i & cnt_gnt_o & cnt_len_dec_i & (cnt_len_o == 8'd0); + + always_comb begin + cnt_len_dec = '0; + cnt_len_dec[cnt_r_idx] = cnt_req_i & cnt_gnt_o & cnt_len_dec_i; + end + always_comb begin + cnt_offset_inc = '0; + cnt_offset_inc[cnt_r_idx] = cnt_req_i & cnt_gnt_o & cnt_offset_inc_i; + end + always_comb begin + cnt_set = '0; + cnt_set[cnt_free_idx] = alloc_req_i & alloc_gnt_o; + end + always_comb begin + err_d = err_q; + cnt_err_o = err_q[cnt_r_idx]; + if (cnt_req_i && cnt_gnt_o && cnt_set_err_i) begin + err_d[cnt_r_idx] = 1'b1; + cnt_err_o = 1'b1; + end + if (alloc_req_i && alloc_gnt_o) begin + err_d[cnt_free_idx] = 1'b0; + end + end + + // registers + `FF(err_q, err_d, '0, clk_i, rst_ni) + + `ifndef VERILATOR + // pragma translate_off + assume property (@(posedge clk_i) idq_oup_gnt |-> idq_oup_valid) + else begin $warning("Invalid output at ID queue, read not granted!"); $finish(); end + // pragma translate_on + `endif + +endmodule diff --git a/src/snitch_axi_to_cache.sv b/src/snitch_axi_to_cache.sv index 1099862..0ecc082 100644 --- a/src/snitch_axi_to_cache.sv +++ b/src/snitch_axi_to_cache.sv @@ -348,154 +348,3 @@ module snitch_axi_to_cache #( `FF(r_offset_q, r_offset_d, '0, clk_i, rst_ni) endmodule - - -/// Stores the burst length and the corresponding address offset for the axi_to_cache module. -/// Adapted from axi_burst_splitter_counters -module axi_burst_splitter_table #( - parameter int unsigned MaxTrans = 0, - parameter int unsigned IdWidth = 0, - parameter type offset_t = logic, - parameter type id_t = logic [IdWidth-1:0] -) ( - input logic clk_i, - input logic rst_ni, - - input id_t alloc_id_i, - input axi_pkg::len_t alloc_len_i, - input offset_t alloc_offset_i, - input logic alloc_req_i, - output logic alloc_gnt_o, - - input id_t cnt_id_i, - output axi_pkg::len_t cnt_len_o, - output offset_t cnt_offset_o, - input logic cnt_set_err_i, - output logic cnt_err_o, - input logic cnt_len_dec_i, - input logic cnt_offset_inc_i, - input logic cnt_req_i, - output logic cnt_gnt_o -); - localparam int unsigned CntIdxWidth = (MaxTrans > 1) ? $clog2(MaxTrans) : 32'd1; - - typedef logic [CntIdxWidth-1:0] cnt_idx_t; - typedef logic [$bits(axi_pkg::len_t):0] cnt_t; - - cnt_idx_t cnt_free_idx, cnt_r_idx; - logic [MaxTrans-1:0] cnt_len_dec, cnt_offset_inc, cnt_free, cnt_set, err_d, err_q; - cnt_t cnt_len_inp; - cnt_t [MaxTrans-1:0] cnt_len_oup; - offset_t cnt_offset_inp; - offset_t [MaxTrans-1:0] cnt_offset_oup; - for (genvar i = 0; i < MaxTrans; i++) begin : gen_cnt - counter #( - .WIDTH ( $bits(cnt_t) ) - ) i_cnt_len ( - .clk_i, - .rst_ni, - .clear_i ( 1'b0 ), - .en_i ( cnt_len_dec[i] ), - .load_i ( cnt_set[i] ), - .down_i ( 1'b1 ), - .d_i ( cnt_len_inp ), - .q_o ( cnt_len_oup[i] ), - .overflow_o ( ) // not used - ); - counter #( - .WIDTH ( $bits(offset_t) ) - ) i_cnt_offset ( - .clk_i, - .rst_ni, - .clear_i ( 1'b0 ), - .en_i ( cnt_offset_inc[i] ), - .load_i ( cnt_set[i] ), - .down_i ( 1'b0 ), - .d_i ( cnt_offset_inp ), - .q_o ( cnt_offset_oup[i] ), - .overflow_o ( ) // not used - ); - assign cnt_free[i] = (cnt_len_oup[i] == '0); - end - assign cnt_len_inp = {1'b0, alloc_len_i} + 1; - assign cnt_offset_inp = alloc_offset_i; - - lzc #( - .WIDTH ( MaxTrans ), - .MODE ( 1'b0 ) - ) i_lzc ( - .in_i ( cnt_free ), - .cnt_o ( cnt_free_idx ), - .empty_o ( ) - ); - - logic idq_inp_req, idq_inp_gnt; - logic idq_oup_gnt, idq_oup_valid, idq_oup_pop; - id_queue #( - .ID_WIDTH ( $bits(id_t) ), - .CAPACITY ( MaxTrans ), - .FULL_BW ( 1'b1 ), - .data_t ( cnt_idx_t ) - ) i_idq ( - .clk_i, - .rst_ni, - .inp_id_i ( alloc_id_i ), - .inp_data_i ( cnt_free_idx ), - .inp_req_i ( idq_inp_req ), - .inp_gnt_o ( idq_inp_gnt ), - .exists_data_i ( '0 ), - .exists_mask_i ( '0 ), - .exists_req_i ( 1'b0 ), - .exists_o ( /* unused */ ), - .exists_gnt_o ( /* unused */ ), - .oup_id_i ( cnt_id_i ), - .oup_pop_i ( idq_oup_pop ), - .oup_req_i ( cnt_req_i ), - .oup_data_o ( cnt_r_idx ), - .oup_data_valid_o ( idq_oup_valid ), - .oup_gnt_o ( idq_oup_gnt ) - ); - logic [8:0] read_len; - assign idq_inp_req = alloc_req_i & alloc_gnt_o; - assign alloc_gnt_o = idq_inp_gnt & |(cnt_free); - assign cnt_gnt_o = idq_oup_gnt & idq_oup_valid; - assign read_len = cnt_len_oup[cnt_r_idx] - 1; - assign cnt_len_o = read_len[7:0]; - assign cnt_offset_o = cnt_offset_oup[cnt_r_idx]; - assign idq_oup_pop = cnt_req_i & cnt_gnt_o & cnt_len_dec_i & (cnt_len_o == 8'd0); - - always_comb begin - cnt_len_dec = '0; - cnt_len_dec[cnt_r_idx] = cnt_req_i & cnt_gnt_o & cnt_len_dec_i; - end - always_comb begin - cnt_offset_inc = '0; - cnt_offset_inc[cnt_r_idx] = cnt_req_i & cnt_gnt_o & cnt_offset_inc_i; - end - always_comb begin - cnt_set = '0; - cnt_set[cnt_free_idx] = alloc_req_i & alloc_gnt_o; - end - always_comb begin - err_d = err_q; - cnt_err_o = err_q[cnt_r_idx]; - if (cnt_req_i && cnt_gnt_o && cnt_set_err_i) begin - err_d[cnt_r_idx] = 1'b1; - cnt_err_o = 1'b1; - end - if (alloc_req_i && alloc_gnt_o) begin - err_d[cnt_free_idx] = 1'b0; - end - end - - // registers - `FF(err_q, err_d, '0, clk_i, rst_ni) - - `ifndef VERILATOR - // pragma translate_off - assume property (@(posedge clk_i) idq_oup_gnt |-> idq_oup_valid) - else begin $warning("Invalid output at ID queue, read not granted!"); $finish(); end - // pragma translate_on - `endif - -endmodule From e85090a99ad8e39399a11d5eacbaced4de32dd6c Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 23 Jul 2024 17:57:12 +0200 Subject: [PATCH 4/7] Rename pulp_icache_wrap to obi_icache_wrap --- Bender.yml | 5 ++--- src/{pulp_icache_wrap.sv => obi_icache_wrap.sv} | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) rename src/{pulp_icache_wrap.sv => obi_icache_wrap.sv} (99%) diff --git a/Bender.yml b/Bender.yml index 971dbee..cd3865c 100644 --- a/Bender.yml +++ b/Bender.yml @@ -39,9 +39,8 @@ sources: # Level 3 - src/snitch_icache.sv - src/snitch_read_only_cache.sv - - target: pulp - files: - - src/pulp_icache_wrap.sv + # Level 4 + - src/obi_icache_wrap.sv - target: test files: - test/snitch_icache_l0_tb.sv diff --git a/src/pulp_icache_wrap.sv b/src/obi_icache_wrap.sv similarity index 99% rename from src/pulp_icache_wrap.sv rename to src/obi_icache_wrap.sv index a161263..37d0fc1 100644 --- a/src/pulp_icache_wrap.sv +++ b/src/obi_icache_wrap.sv @@ -21,7 +21,7 @@ /// FetchDataWidth = PRI_FETCH_DATA_WIDTH /// AxiAddrWidth = AXI_ADDR /// AxiDataWidth = AXI_DATA -module pulp_icache_wrap #( +module obi_icache_wrap #( /// Number of request (fetch) ports parameter int NumFetchPorts = -1, /// L0 Cache Line Count From 4362489932343a8ef81853e2fa81716221a818fc Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Wed, 24 Jul 2024 11:12:33 +0200 Subject: [PATCH 5/7] update changelog --- Changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Changelog.md b/Changelog.md index 8838dac..6d8788e 100644 --- a/Changelog.md +++ b/Changelog.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added - Add statistics signals output for shared L1. +- Add OBI-request-compatible instruction cache variant. ## 0.1.1 - 28.06.2024 ### Added From e7e9de38cb35f68e031af521a150b2b32f7fa8aa Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 20 Aug 2024 17:22:40 +0200 Subject: [PATCH 6/7] obi_icache_wrap: Propagate parameters from icache --- src/obi_icache_wrap.sv | 88 +++++++++++++++++++++++++++--------------- src/snitch_icache.sv | 6 +-- 2 files changed, 60 insertions(+), 34 deletions(-) diff --git a/src/obi_icache_wrap.sv b/src/obi_icache_wrap.sv index 37d0fc1..5f4a46b 100644 --- a/src/obi_icache_wrap.sv +++ b/src/obi_icache_wrap.sv @@ -10,13 +10,13 @@ /// Unsupported: different line width, banks in L1, L0 not fully associative /// [SH_FETCH_DATA_WIDTH == Cache line width] /// [SH_NB_BANKS == 1] -/// [PRI_NB_WAYS == L0_LINE_COUNT] -> here fully associative +/// [PRI_NB_WAYS == L0LineCount] -> here fully associative /// [SH_CACHE_LINE == PRI_CACHE_LINE] /// NumFetchPorts = NB_CORES -/// L0_LINE_COUNT = PRI_CACHE_SIZE/(bytes per line) -/// LINE_WIDTH = X_CACHE_LINE * DATA_WIDTH -> Use >= 32*NB_CORES for optimal performance -/// LINE_COUNT = SH_CACHE_SIZE/(bytes per line) -/// WAY_COUNT = SH_NB_WAYS +/// L0LineCount = PRI_CACHE_SIZE/(bytes per line) +/// LineWidth = X_CACHE_LINE * DATA_WIDTH -> Use >= 32*NB_CORES for optimal performance +/// LineCount = SH_CACHE_SIZE/(bytes per line) +/// WayCount = SH_NB_WAYS /// FetchAddrWidth = FETCH_ADDR_WIDTH /// FetchDataWidth = PRI_FETCH_DATA_WIDTH /// AxiAddrWidth = AXI_ADDR @@ -25,23 +25,47 @@ module obi_icache_wrap #( /// Number of request (fetch) ports parameter int NumFetchPorts = -1, /// L0 Cache Line Count - parameter int L0_LINE_COUNT = -1, + parameter int L0LineCount = -1, /// Cache Line Width /// For optimal performance, use >= 32*NumFetchPorts to allow execution of 32-bit instructions /// for each core before requiring another L0-L1 fetch. - parameter int LINE_WIDTH = -1, - /// The number of cache lines per set. Power of two; >= 2. - parameter int LINE_COUNT = -1, + parameter int LineWidth = -1, + /// The number of cache lines per way. Power of two; >= 2. + parameter int LineCount = -1, /// The set associativity of the cache. Power of two; >= 1. - parameter int WAY_COUNT = 1, + parameter int WayCount = 1, /// Fetch interface address width. Same as FILL_AW; >= 1. parameter int FetchAddrWidth = -1, /// Fetch interface data width. Power of two; >= 8. parameter int FetchDataWidth = -1, - /// Fill interface address width. Same as FETCH_AW; >= 1. + /// Fill interface address width. Same as FetchAddrWidth; >= 1. parameter int AxiAddrWidth = -1, /// Fill interface data width. Power of two; >= 8. parameter int AxiDataWidth = -1, + /// Allow fetches to have priority over prefetches for L0 to L1 + parameter bit FetchPriority = 1'b1, + /// Merge L0-L1 fetches if requesting the same address + parameter bit MergeFetches = 1'b1, + /// Serialize the L1 lookup (parallel tag/data lookup by default) + parameter bit SerialLookup = 1'b1, + /// Replace the L1 tag banks with latch-based SCM. + parameter bit L1TagScm = 1'b1, + /// Number of pending response beats for the L1 cache. + parameter int unsigned NumAxiOutstanding = 4, + /// This reduces area impact at the cost of + /// increased hassle of having latches in + /// the design. + /// i_snitch_icache/gen_prefetcher*i_snitch_icache_l0/data*/Q + parameter bit EarlyLatch = 1'b0, + /// Tag width of the data determining logic, this can reduce the + /// the critical path into the L0 cache when small. The trade-off + /// is a higher miss-rate in case the smaller tag matches more + /// tags. The tag must be smaller than the necessary L0 tag. + /// If configured to `-1` the entire tag is used, effectively + /// disabling this feature. + parameter int L0EarlyTagWidth = -1, + /// Operate L0 cache in slower clock-domain + parameter bit IsoCrossing = 1, /// Configuration input types for memory cuts used in implementation. parameter type sram_cfg_data_t = logic, parameter type sram_cfg_tag_t = logic, @@ -73,6 +97,7 @@ module obi_icache_wrap #( output axi_req_t axi_req_o, input axi_rsp_t axi_rsp_i ); + // AdapterType 1 is the only tested variant localparam int unsigned AdapterType = 1; logic [NumFetchPorts-1:0] fetch_valid, fetch_ready, fetch_rerror; @@ -192,26 +217,27 @@ module obi_icache_wrap #( end snitch_icache #( - .NR_FETCH_PORTS ( NumFetchPorts ), - .L0_LINE_COUNT ( L0_LINE_COUNT ), - .LINE_WIDTH ( LINE_WIDTH ), - .LINE_COUNT ( LINE_COUNT ), - .WAY_COUNT ( WAY_COUNT ), - .FETCH_AW ( FetchAddrWidth ), - .FETCH_DW ( FetchDataWidth ), - .FILL_AW ( AxiAddrWidth ), - .FILL_DW ( AxiDataWidth ), - .FETCH_PRIORITY ( 1 ), - .MERGE_FETCHES ( 1 ), - .L1_TAG_SCM ( 1 ), - .SERIAL_LOOKUP ( 1 ), - .NUM_AXI_OUTSTANDING( 4 ), - .EARLY_LATCH ( 0 ), - .ISO_CROSSING ( 0 ), - .sram_cfg_data_t ( sram_cfg_data_t ), - .sram_cfg_tag_t ( sram_cfg_tag_t ), - .axi_req_t ( axi_req_t ), - .axi_rsp_t ( axi_rsp_t ) + .NR_FETCH_PORTS ( NumFetchPorts ), + .L0_LINE_COUNT ( L0LineCount ), + .LINE_WIDTH ( LineWidth ), + .LINE_COUNT ( LineCount ), + .WAY_COUNT ( WayCount ), + .FETCH_AW ( FetchAddrWidth ), + .FETCH_DW ( FetchDataWidth ), + .FILL_AW ( AxiAddrWidth ), + .FILL_DW ( AxiDataWidth ), + .FETCH_PRIORITY ( FetchPriority ), + .MERGE_FETCHES ( MergeFetches ), + .SERIAL_LOOKUP ( SerialLookup ), + .L1_TAG_SCM ( L1TagScm ), + .NUM_AXI_OUTSTANDING( NumAxiOutstanding ), + .EARLY_LATCH ( EarlyLatch ), + .L0_EARLY_TAG_WIDTH ( L0EarlyTagWidth ), + .ISO_CROSSING ( IsoCrossing ), + .sram_cfg_data_t ( sram_cfg_data_t ), + .sram_cfg_tag_t ( sram_cfg_tag_t ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_rsp_t ) ) i_snitch_icache ( .clk_i, .clk_d2_i ( clk_i ), diff --git a/src/snitch_icache.sv b/src/snitch_icache.sv index 3785410..2623641 100644 --- a/src/snitch_icache.sv +++ b/src/snitch_icache.sv @@ -31,16 +31,16 @@ module snitch_icache import snitch_icache_pkg::*; #( /// Merge L0-L1 fetches if requesting the same address parameter bit MERGE_FETCHES = 1'b0, /// Serialize the L1 lookup (parallel tag/data lookup by default) - parameter bit SERIAL_LOOKUP = 0, + parameter bit SERIAL_LOOKUP = 1'b0, /// Replace the L1 tag banks with latch-based SCM. - parameter bit L1_TAG_SCM = 0, + parameter bit L1_TAG_SCM = 1'b0, /// Number of pending response beats for the L1 cache. parameter int unsigned NUM_AXI_OUTSTANDING = 2, /// This reduces area impact at the cost of /// increased hassle of having latches in /// the design. /// i_snitch_icache/gen_prefetcher*i_snitch_icache_l0/data*/Q - parameter bit EARLY_LATCH = 0, + parameter bit EARLY_LATCH = 1'b0, /// Tag width of the data determining logic, this can reduce the /// the critical path into the L0 cache when small. The trade-off /// is a higher miss-rate in case the smaller tag matches more From 08608872a6fbd9e1d14a6f7726f2d663d464a910 Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 20 Aug 2024 17:30:35 +0200 Subject: [PATCH 7/7] obi_icache_wrap: fix performance events --- src/obi_icache_wrap.sv | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/obi_icache_wrap.sv b/src/obi_icache_wrap.sv index 5f4a46b..f30802d 100644 --- a/src/obi_icache_wrap.sv +++ b/src/obi_icache_wrap.sv @@ -21,7 +21,7 @@ /// FetchDataWidth = PRI_FETCH_DATA_WIDTH /// AxiAddrWidth = AXI_ADDR /// AxiDataWidth = AXI_DATA -module obi_icache_wrap #( +module obi_icache_wrap import snitch_icache_pkg::*; #( /// Number of request (fetch) ports parameter int NumFetchPorts = -1, /// L0 Cache Line Count @@ -73,29 +73,30 @@ module obi_icache_wrap #( parameter type axi_req_t = logic, parameter type axi_rsp_t = logic ) ( - input logic clk_i, - input logic rst_ni, + input logic clk_i, + input logic rst_ni, // Processor interface - input logic [NumFetchPorts-1:0] fetch_req_i, - input logic [NumFetchPorts-1:0][FetchAddrWidth-1:0] fetch_addr_i, - output logic [NumFetchPorts-1:0] fetch_gnt_o, - output logic [NumFetchPorts-1:0] fetch_rvalid_o, - output logic [NumFetchPorts-1:0][FetchDataWidth-1:0] fetch_rdata_o, - output logic [NumFetchPorts-1:0] fetch_rerror_o, - - input logic enable_prefetching_i, - output snitch_icache_pkg::icache_events_t [NumFetchPorts-1:0] icache_events_o, - input logic [NumFetchPorts-1:0] flush_valid_i, - output logic [NumFetchPorts-1:0] flush_ready_o, + input logic [NumFetchPorts-1:0] fetch_req_i, + input logic [NumFetchPorts-1:0][FetchAddrWidth-1:0] fetch_addr_i, + output logic [NumFetchPorts-1:0] fetch_gnt_o, + output logic [NumFetchPorts-1:0] fetch_rvalid_o, + output logic [NumFetchPorts-1:0][FetchDataWidth-1:0] fetch_rdata_o, + output logic [NumFetchPorts-1:0] fetch_rerror_o, + + input logic enable_prefetching_i, + output icache_l0_events_t [NumFetchPorts-1:0] icache_l0_events_o, + output icache_l1_events_t icache_l1_events_o, + input logic [NumFetchPorts-1:0] flush_valid_i, + output logic [NumFetchPorts-1:0] flush_ready_o, // SRAM configs - input sram_cfg_data_t sram_cfg_data_i, - input sram_cfg_tag_t sram_cfg_tag_i, + input sram_cfg_data_t sram_cfg_data_i, + input sram_cfg_tag_t sram_cfg_tag_i, // AXI interface - output axi_req_t axi_req_o, - input axi_rsp_t axi_rsp_i + output axi_req_t axi_req_o, + input axi_rsp_t axi_rsp_i ); // AdapterType 1 is the only tested variant localparam int unsigned AdapterType = 1; @@ -244,7 +245,8 @@ module obi_icache_wrap #( .rst_ni, .enable_prefetching_i, - .icache_events_o, + .icache_l0_events_o, + .icache_l1_events_o, .flush_valid_i, .flush_ready_o,