diff --git a/Makefile b/Makefile index 60b657f..cf880d4 100644 --- a/Makefile +++ b/Makefile @@ -68,11 +68,11 @@ python-venv-clean: ## Clean Python virtual environment # Documentation # ################# -.PHONY: help +.PHONY: help h Black=\033[0m Green=\033[1;32m -help: ## Show an overview of all Makefile targets. +help h: ## Show an overview of all Makefile targets. @echo -e "Makefile ${Green}targets${Black} for chimera" @echo -e "Use 'make ' where is one of:" @echo -e "" diff --git a/README.md b/README.md index b308242..1a03ed6 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ which riscv32-unknown-elf-gcc ### 🛠️ Build RTL If you have all needed dependencies and you want to build the full Chimera SoC, both RTL and SW, run: ``` sh +bender checkout make chim-all ``` Or for more selective builds: diff --git a/hw/chimera_clu_domain.sv b/hw/chimera_clu_domain.sv index 10d1b82..13feb73 100644 --- a/hw/chimera_clu_domain.sv +++ b/hw/chimera_clu_domain.sv @@ -25,8 +25,10 @@ module chimera_clu_domain parameter type wide_out_resp_t = logic ) ( input logic soc_clk_i, - input logic [ ExtClusters-1:0] clu_clk_i, + input logic clu_clk_i, input logic [ ExtClusters-1:0] rst_ni, + // Signal to enable or disable the cluster clock signal + input logic [ ExtClusters-1:0] clu_clk_en_i, input logic [ ExtClusters-1:0] widemem_bypass_i, input logic [ 31:0] boot_addr_i, //----------------------------- @@ -183,11 +185,13 @@ module chimera_clu_domain .narrow_out_req_t (narrow_out_req_t), .narrow_out_resp_t(narrow_out_resp_t), .wide_out_req_t (wide_out_req_t), - .wide_out_resp_t (wide_out_resp_t) + .wide_out_resp_t (wide_out_resp_t), + .EnAxiCdc (ChimeraClusterCfg.EnAxiCdc[extClusterIdx]) ) i_chimera_cluster ( .soc_clk_i(soc_clk_i), - .clu_clk_i(clu_clk_i[extClusterIdx]), + .clu_clk_i(clu_clk_i), .rst_ni(rst_ni[extClusterIdx]), + .clu_clk_en_i(clu_clk_en_i[extClusterIdx]), .widemem_bypass_i(widemem_bypass_i[extClusterIdx]), .debug_req_i(debug_req_i[`PREVNRCORES(extClusterIdx)+:`NRCORES(extClusterIdx)]), .meip_i(xeip_i[`PREVNRCORES(extClusterIdx)+:`NRCORES(extClusterIdx)]), diff --git a/hw/chimera_cluster_adapter.sv b/hw/chimera_cluster_adapter.sv index 7e8c97c..7696c63 100644 --- a/hw/chimera_cluster_adapter.sv +++ b/hw/chimera_cluster_adapter.sv @@ -13,6 +13,9 @@ module chimera_cluster_adapter #( parameter int WidePassThroughRegionStart = '0, // End address of Memory Island parameter int WidePassThroughRegionEnd = '0, + // Add AXI CDC between the cluster and SoC, + // When this parameter is disabled, you must ensure clu_clk_i = soc_clk_i + parameter bit EnAxiCdc = 1'b0, parameter type narrow_in_req_t = logic, parameter type narrow_in_resp_t = logic, @@ -330,71 +333,92 @@ module chimera_cluster_adapter #( .mst_resp_i(axi_from_cluster_wide_resp) ); - // AXI Narrow CDC from SoC to Cluster - - axi_cdc #( - .aw_chan_t (axi_narrow_soc_in_aw_chan_t), - .w_chan_t (axi_narrow_soc_in_w_chan_t), - .b_chan_t (axi_narrow_soc_in_b_chan_t), - .ar_chan_t (axi_narrow_soc_in_ar_chan_t), - .r_chan_t (axi_narrow_soc_in_r_chan_t), - .axi_req_t (narrow_in_req_t), - .axi_resp_t(narrow_in_resp_t) - ) narrow_slv_cdc ( - .src_clk_i (soc_clk_i), - .src_rst_ni(rst_ni), - .src_req_i (narrow_in_req_i), - .src_resp_o(narrow_in_resp_o), - - .dst_clk_i (clu_clk_i), - .dst_rst_ni(rst_ni), - .dst_req_o (axi_to_cluster_narrow_req), - .dst_resp_i(axi_to_cluster_narrow_resp) - ); - - // AXI Narrow CDC from Cluster to SoC - - axi_cdc #( - .aw_chan_t (axi_narrow_soc_out_aw_chan_t), - .w_chan_t (axi_narrow_soc_out_w_chan_t), - .b_chan_t (axi_narrow_soc_out_b_chan_t), - .ar_chan_t (axi_narrow_soc_out_ar_chan_t), - .r_chan_t (axi_narrow_soc_out_r_chan_t), - .axi_req_t (narrow_out_req_t), - .axi_resp_t(narrow_out_resp_t) - ) narrow_mst_cdc ( - .src_clk_i (clu_clk_i), - .src_rst_ni(rst_ni), - .src_req_i (axi_from_cluster_narrow_req), - .src_resp_o(axi_from_cluster_narrow_resp), - - .dst_clk_i (soc_clk_i), - .dst_rst_ni(rst_ni), - .dst_req_o (narrow_out_req_o[0]), - .dst_resp_i(narrow_out_resp_i[0]) - ); - - // AXI Wide CDC from Cluster to SoC - - axi_cdc #( - .aw_chan_t (axi_wide_clu_out_aw_chan_t), - .w_chan_t (axi_wide_clu_out_w_chan_t), - .b_chan_t (axi_wide_clu_out_b_chan_t), - .ar_chan_t (axi_wide_clu_out_ar_chan_t), - .r_chan_t (axi_wide_clu_out_r_chan_t), - .axi_req_t (wide_out_req_t), - .axi_resp_t(wide_out_resp_t) - ) wide_mst_cdc ( - .src_clk_i (clu_clk_i), - .src_rst_ni(rst_ni), - .src_req_i (axi_from_cluster_wide_req), - .src_resp_o(axi_from_cluster_wide_resp), - - .dst_clk_i (soc_clk_i), - .dst_rst_ni(rst_ni), - .dst_req_o (axi_from_cluster_wide_premux_req), - .dst_resp_i(axi_from_cluster_wide_premux_resp) - ); + // Choose carefully if you need or not AXI CDC. + // If so, check the FIFO_DEPTH and SYNC_STAGES parameters in axi_cdc. + // They have to be set consciously not to limit the bandwidth. + if (EnAxiCdc) begin : gen_axi_cdcs + // AXI Narrow CDC from SoC to Cluster + axi_cdc #( + .aw_chan_t (axi_narrow_soc_in_aw_chan_t), + .w_chan_t (axi_narrow_soc_in_w_chan_t), + .b_chan_t (axi_narrow_soc_in_b_chan_t), + .ar_chan_t (axi_narrow_soc_in_ar_chan_t), + .r_chan_t (axi_narrow_soc_in_r_chan_t), + .axi_req_t (narrow_in_req_t), + .axi_resp_t(narrow_in_resp_t), + .LogDepth (3), + .SyncStages(2) + ) narrow_slv_cdc ( + .src_clk_i (soc_clk_i), + .src_rst_ni(rst_ni), + .src_req_i (narrow_in_req_i), + .src_resp_o(narrow_in_resp_o), + + .dst_clk_i (clu_clk_i), + .dst_rst_ni(rst_ni), + .dst_req_o (axi_to_cluster_narrow_req), + .dst_resp_i(axi_to_cluster_narrow_resp) + ); + + // AXI Narrow CDC from Cluster to SoC + + axi_cdc #( + .aw_chan_t (axi_narrow_soc_out_aw_chan_t), + .w_chan_t (axi_narrow_soc_out_w_chan_t), + .b_chan_t (axi_narrow_soc_out_b_chan_t), + .ar_chan_t (axi_narrow_soc_out_ar_chan_t), + .r_chan_t (axi_narrow_soc_out_r_chan_t), + .axi_req_t (narrow_out_req_t), + .axi_resp_t(narrow_out_resp_t), + .LogDepth (3), + .SyncStages(2) + ) narrow_mst_cdc ( + .src_clk_i (clu_clk_i), + .src_rst_ni(rst_ni), + .src_req_i (axi_from_cluster_narrow_req), + .src_resp_o(axi_from_cluster_narrow_resp), + + .dst_clk_i (soc_clk_i), + .dst_rst_ni(rst_ni), + .dst_req_o (narrow_out_req_o[0]), + .dst_resp_i(narrow_out_resp_i[0]) + ); + + // AXI Wide CDC from Cluster to SoC + + axi_cdc #( + .aw_chan_t (axi_wide_clu_out_aw_chan_t), + .w_chan_t (axi_wide_clu_out_w_chan_t), + .b_chan_t (axi_wide_clu_out_b_chan_t), + .ar_chan_t (axi_wide_clu_out_ar_chan_t), + .r_chan_t (axi_wide_clu_out_r_chan_t), + .axi_req_t (wide_out_req_t), + .axi_resp_t(wide_out_resp_t), + .LogDepth (3), + .SyncStages(2) + ) wide_mst_cdc ( + .src_clk_i (clu_clk_i), + .src_rst_ni(rst_ni), + .src_req_i (axi_from_cluster_wide_req), + .src_resp_o(axi_from_cluster_wide_resp), + + .dst_clk_i (soc_clk_i), + .dst_rst_ni(rst_ni), + .dst_req_o (axi_from_cluster_wide_premux_req), + .dst_resp_i(axi_from_cluster_wide_premux_resp) + ); + + end else begin : gen_no_axi_cdcs + // Direct connections if no CDC is needed + assign narrow_in_resp_o = axi_to_cluster_narrow_resp; + assign axi_to_cluster_narrow_req = narrow_in_req_i; + + assign narrow_out_req_o[0] = axi_from_cluster_narrow_req; + assign axi_from_cluster_narrow_resp = narrow_out_resp_i[0]; + + assign axi_from_cluster_wide_premux_req = axi_from_cluster_wide_req; + assign axi_from_cluster_wide_resp = axi_from_cluster_wide_premux_resp; + end // Validate parameters `ifndef VERILATOR diff --git a/hw/chimera_pkg.sv b/hw/chimera_pkg.sv index 3caef91..88c2aef 100644 --- a/hw/chimera_pkg.sv +++ b/hw/chimera_pkg.sv @@ -30,6 +30,7 @@ package chimera_pkg; logic [iomsb(ExtClusters):0] hasWideMasterPort; byte_bt [iomsb(ExtClusters):0] NrCores; cluster_type_e [iomsb(ExtClusters):0] ClusterType; + logic [iomsb(ExtClusters):0] EnAxiCdc; } cluster_config_t; // For each instantiated cluster, you need to specify three parameters: @@ -40,7 +41,8 @@ package chimera_pkg; localparam cluster_config_t ChimeraClusterCfg = '{ hasWideMasterPort: {1'b1, 1'b1, 1'b1, 1'b1, 1'b1}, NrCores: {8'h9, 8'h9, 8'h9, 8'h9, 8'h9}, - ClusterType: {SNITCH, SNITCH, SNITCH, SNITCH, SNITCH} + ClusterType: {SNITCH, SNITCH, SNITCH, SNITCH, SNITCH}, + EnAxiCdc: {1'b1, 1'b1, 1'b1, 1'b1, 1'b1} }; function automatic int _sumVector(byte_bt [iomsb(ExtClusters):0] vector, int vectorLen); diff --git a/hw/chimera_top_wrapper.sv b/hw/chimera_top_wrapper.sv index a534bdb..f924472 100644 --- a/hw/chimera_top_wrapper.sv +++ b/hw/chimera_top_wrapper.sv @@ -133,9 +133,6 @@ module chimera_top_wrapper .axi_ext_llc_rsp_t(axi_mst_rsp_t), .axi_ext_mst_req_t(axi_mst_req_t), .axi_ext_mst_rsp_t(axi_mst_rsp_t), - // lleone: TODO: remove from here - // .axi_ext_wide_mst_req_t(axi_wide_mst_req_t), - // .axi_ext_wide_mst_rsp_t(axi_wide_mst_rsp_t), .axi_ext_slv_req_t(axi_slv_req_t), .axi_ext_slv_rsp_t(axi_slv_rsp_t), .reg_ext_req_t (reg_req_t), @@ -315,7 +312,11 @@ module chimera_top_wrapper }; logic [ExtClusters-1:0] cluster_clock_gate_en; - logic [ExtClusters-1:0] clu_clk_gated; + // This is the enable clk gate, i.e. + // - enable = 1 -> clock is gated (off) + // - enable = 0 -> clock is running (on) + // It will be used to drive the actual clk eneable signal in each cluster. + // For this reason it's inverted when connected to the cluster. assign cluster_clock_gate_en = { reg2hw.cluster_4_clk_gate_en, reg2hw.cluster_3_clk_gate_en, @@ -324,14 +325,6 @@ module chimera_top_wrapper reg2hw.cluster_0_clk_gate_en }; - for (genvar extClusterIdx = 0; extClusterIdx < ExtClusters; extClusterIdx++) begin : gen_clk_gates - tc_clk_gating i_cluster_clk_gate ( - .clk_i (clu_clk_i), - .en_i (~cluster_clock_gate_en[extClusterIdx]), - .test_en_i(1'b0), - .clk_o (clu_clk_gated[extClusterIdx]) - ); - end logic [ExtClusters-1:0] cluster_rst_n; logic [ExtClusters-1:0] cluster_soft_rst_n; @@ -363,8 +356,9 @@ module chimera_top_wrapper .wide_out_resp_t (axi_wide_mst_rsp_t) ) i_cluster_domain ( .soc_clk_i (soc_clk_i), - .clu_clk_i (clu_clk_gated), + .clu_clk_i (clu_clk_i), .rst_ni (cluster_rst_n), + .clu_clk_en_i (~cluster_clock_gate_en), .widemem_bypass_i (wide_mem_bypass_mode), .boot_addr_i (reg2hw.snitch_configurable_boot_addr.q), .debug_req_i (dbg_ext_req), diff --git a/hw/clusters/chimera_cluster.sv b/hw/clusters/chimera_cluster.sv index 6f633fa..8447c33 100644 --- a/hw/clusters/chimera_cluster.sv +++ b/hw/clusters/chimera_cluster.sv @@ -17,11 +17,13 @@ module chimera_cluster parameter type narrow_out_req_t = logic, parameter type narrow_out_resp_t = logic, parameter type wide_out_req_t = logic, - parameter type wide_out_resp_t = logic + parameter type wide_out_resp_t = logic, + parameter bit EnAxiCdc = 0 ) ( input logic soc_clk_i, input logic clu_clk_i, input logic rst_ni, + input logic clu_clk_en_i, input logic widemem_bypass_i, //----------------------------- // Interrupt ports @@ -118,6 +120,15 @@ module chimera_cluster axi_cluster_out_wide_req_t clu_axi_wide_mst_req; axi_cluster_out_wide_resp_t clu_axi_wide_mst_resp; + // Cluster clk signal after the clk gating cell + logic clu_clk_gated; + + tc_clk_gating i_cluster_clk_gate ( + .clk_i (clu_clk_i), + .en_i (clu_clk_en_i), + .test_en_i(1'b0), + .clk_o (clu_clk_gated) + ); if (ClusterDataWidth != Cfg.ChsCfg.AxiDataWidth) begin : gen_narrow_adapter @@ -180,11 +191,13 @@ module chimera_cluster .wide_out_resp_t(wide_out_resp_t), .clu_wide_out_req_t (axi_cluster_out_wide_req_t), - .clu_wide_out_resp_t(axi_cluster_out_wide_resp_t) + .clu_wide_out_resp_t(axi_cluster_out_wide_resp_t), + // Make sure the SoC and Clusters run at the same frequency if CDCs are disabled + .EnAxiCdc (EnAxiCdc) ) i_cluster_axi_adapter ( .soc_clk_i(soc_clk_i), - .clu_clk_i(clu_clk_i), + .clu_clk_i(clu_clk_gated), .rst_ni, .narrow_in_req_i (clu_axi_narrow_slv_req), @@ -286,7 +299,7 @@ module chimera_cluster .RegisterExtNarrow('0) ) i_test_cluster ( - .clk_i (clu_clk_i), + .clk_i (clu_clk_gated), .clk_d2_bypass_i('0), .rst_ni, diff --git a/iis-env.sh b/iis-env.sh index 0c111cc..beb9298 100755 --- a/iis-env.sh +++ b/iis-env.sh @@ -3,17 +3,17 @@ # Solderpad Hardware License, Version 0.51, see LICENSE for details. # SPDX-License-Identifier: SHL-0.51 +export BENDER='bender-0.31.0' export VSIM="questa-2022.3 vsim" export VOPT="questa-2022.3 vopt" export VLIB="questa-2022.3 vlib" export BASE_PYTHON=/usr/local/anaconda3/bin/python3.11 -export CHS_SW_GCC_BINROOT=/usr/pack/riscv-1.0-kgf/riscv64-gcc-12.2.0/bin +export CHS_SW_32_GCC_BINROOT=/usr/pack/riscv-1.0-kgf/pulp-gcc-2.5.0/bin export RISCV_GCC_BINROOT=/usr/pack/riscv-1.0-kgf/pulp-gcc-2.5.0/bin export CC=/usr/pack/gcc-11.2.0-af/linux-x64/bin/gcc export CXX=/usr/pack/gcc-11.2.0-af/linux-x64/bin/g++ export CMAKE=cmake-3.28.3 export SN_LLVM_BINROOT=/usr/scratch2/vulcano/colluca/tools/riscv32-snitch-llvm-almalinux8-15.0.0-snitch-0.2.0/bin -export BENDER='bender-0.29.1' # Create the python venv if [ ! -d ".venv" ]; then diff --git a/target/sim/src/fixture_chimera_soc.sv b/target/sim/src/fixture_chimera_soc.sv index 93687ce..ad7e0bc 100644 --- a/target/sim/src/fixture_chimera_soc.sv +++ b/target/sim/src/fixture_chimera_soc.sv @@ -22,6 +22,8 @@ module fixture_chimera_soc #( localparam chimera_cfg_t DutCfg = ChimeraCfg[SelectedCfg]; localparam cheshire_cfg_t ChsCfg = DutCfg.ChsCfg; + localparam time ClkPeriodClu = 2ns; + localparam time ClkPeriodSys = 5ns; `CHESHIRE_TYPEDEF_ALL(, ChsCfg) `CHIMERA_TYPEDEF_ALL(, DutCfg) @@ -157,6 +159,8 @@ module fixture_chimera_soc #( vip_chimera_soc #( .DutCfg (ChsCfg), + .ClkPeriodClu (ClkPeriodClu), + .ClkPeriodSys (ClkPeriodSys), // Determine whether we preload the hyperram model or not User preload. If 0, the memory model // is not preloaded at time 0. .HypUserPreload (`HYP_USER_PRELOAD), @@ -169,4 +173,17 @@ module fixture_chimera_soc #( .* ); + + ////////////////// + // CDCs check // + ///////////////// + initial begin + for (int extClusterIdx = 0; extClusterIdx < ExtClusters; extClusterIdx++) begin : gen_cdc_check + if (!ChimeraClusterCfg.EnAxiCdc[extClusterIdx] && (ClkPeriodClu != ClkPeriodSys)) begin + $fatal(1, "ClusterIdx:%d, EnAxiCDC = %d: Missing CDCs", extClusterIdx, + ChimeraClusterCfg.EnAxiCdc[extClusterIdx]); + end + end + end + endmodule