From e557c5f2edc0e4f29108e4c85bdb1b0710c8d6a2 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 20 Jun 2025 17:52:37 +0200 Subject: [PATCH 1/4] rr_arb_tree: Add Verilator pragmas to split tree node signals This enables Verilator optimizations as it now understands there are no combinatorial loops. For the Cheshire SoC, this can reduce simulation time by around 3%. --- src/rr_arb_tree.sv | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/rr_arb_tree.sv b/src/rr_arb_tree.sv index dd07f0c7..8b277f9a 100644 --- a/src/rr_arb_tree.sv +++ b/src/rr_arb_tree.sv @@ -121,11 +121,13 @@ module rr_arb_tree #( end else begin : gen_arbiter localparam int unsigned NumLevels = unsigned'($clog2(NumIn)); - /* verilator lint_off UNOPTFLAT */ - idx_t [2**NumLevels-2:0] index_nodes; // used to propagate the indices - DataType [2**NumLevels-2:0] data_nodes; // used to propagate the data - logic [2**NumLevels-2:0] gnt_nodes; // used to propagate the grant to masters - logic [2**NumLevels-2:0] req_nodes; // used to propagate the requests to slave + /* verilator lint_off SPLITVAR */ // disable warning that is issued if bitwidth is 1 + idx_t [2**NumLevels-2:0] index_nodes /* verilator split_var */; // used to propagate the indices + DataType [2**NumLevels-2:0] data_nodes /* verilator split_var */; // used to propagate the data + logic [2**NumLevels-2:0] gnt_nodes /* verilator split_var */; // used to propagate the grant to masters + logic [2**NumLevels-2:0] req_nodes /* verilator split_var */; // used to propagate the requests to slave + /* verilator lint_on SPLITVAR */ + /* lint_off */ idx_t rr_q; logic [NumIn-1:0] req_d; From 67959c4642d51d4adaa8128cb6a23523c5e3d40a Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 20 Jun 2025 17:54:51 +0200 Subject: [PATCH 2/4] lzc: Add Verilator pragmas to split tree node signals This enables Verilator optimizations as it now understands there are no combinatorial loops. For the Cheshire SoC, this can reduce simulation time by around 2.5%. --- src/lzc.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lzc.sv b/src/lzc.sv index eccc31a5..8250434e 100644 --- a/src/lzc.sv +++ b/src/lzc.sv @@ -46,8 +46,8 @@ module lzc #( `endif logic [WIDTH-1:0][NumLevels-1:0] index_lut; - logic [2**NumLevels-1:0] sel_nodes; - logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes; + logic [2**NumLevels-1:0] sel_nodes /* verilator split_var */; + logic [2**NumLevels-1:0][NumLevels-1:0] index_nodes /* verilator split_var */; logic [WIDTH-1:0] in_tmp; From 709bb1e1d7cec53b31290ae53a845561d5e87acd Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 20 Jun 2025 17:55:19 +0200 Subject: [PATCH 3/4] lzc: Optimize reversing of input vector for Verilator speedup In case the vector is not flipped, using a direct assignment can lead to an approximately 2% decrease in total system simulation time (measured using the Cheshire SoC). --- src/lzc.sv | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/lzc.sv b/src/lzc.sv index 8250434e..c082efbd 100644 --- a/src/lzc.sv +++ b/src/lzc.sv @@ -51,11 +51,16 @@ module lzc #( logic [WIDTH-1:0] in_tmp; - // reverse vector if required - always_comb begin : flip_vector - for (int unsigned i = 0; i < WIDTH; i++) begin - in_tmp[i] = (MODE) ? in_i[WIDTH-1-i] : in_i[i]; + if (MODE) begin : g_flip + // Mode 1 (leading zero): flip input vector + always_comb begin : flip_vector + for (int unsigned i = 0; i < WIDTH; i++) begin + in_tmp[i] = in_i[WIDTH-1-i]; + end end + end else begin + // Mode 0 (trailing zero) + assign in_tmp = in_i; end for (genvar j = 0; unsigned'(j) < WIDTH; j++) begin : g_index_lut From 9df8b5d4ff686f1d5b21d6ef9f655a10782aa3ef Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 20 Jun 2025 18:06:12 +0200 Subject: [PATCH 4/4] cb_filter: Logic simplification for Verilator speed-up This decreases total simulation time of the Cheshire SoC by around 1.8%. --- src/cb_filter.sv | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/cb_filter.sv b/src/cb_filter.sv index 8786defd..f77a54ae 100644 --- a/src/cb_filter.sv +++ b/src/cb_filter.sv @@ -230,10 +230,8 @@ module hash_block #( // output assignment always_comb begin : proc_hash_or indicator_o = '0; - for (int unsigned i = 0; i < (2**HashWidth); i++) begin - for (int unsigned j = 0; j < NoHashes; j++) begin - indicator_o[i] = indicator_o[i] | hashes[j][i]; - end + for (int unsigned j = 0; j < NoHashes; j++) begin + indicator_o = indicator_o | hashes[j]; end end