From da354c182e02545efdd067ab51b73f691908136d Mon Sep 17 00:00:00 2001 From: ckormanyos Date: Fri, 13 Feb 2026 19:11:50 +0100 Subject: [PATCH 1/4] Get esp32_p4 benches running via hybrid startup --- .../src/mcal/xtensa_esp32_p4/mcal_benchmark.h | 8 +- ref_app/src/mcal/xtensa_esp32_p4/mcal_port.h | 185 +++++++------- ref_app/src/mcal/xtensa_esp32_p4/mcal_reg.h | 4 + ref_app/target.vcxproj | 3 + ref_app/target.vcxproj.filters | 12 + .../xtensa_esp32_p4/make/xtensa_esp32_p4.ld | 75 ++---- .../make/xtensa_esp32_p4_files.gmk | 5 +- .../make/xtensa_esp32_p4_flags.gmk | 4 +- .../startup/Code/Startup/Startup.c | 70 +----- .../startup/Code/StdLib/StdLib.cpp | 228 ++++++++++++++++++ .../xtensa_esp32_p4/startup/crt0_init_ram.cpp | 55 +++++ .../micros/xtensa_esp32_p4/startup/crt1.cpp | 47 ++++ 12 files changed, 477 insertions(+), 219 deletions(-) create mode 100644 ref_app/target/micros/xtensa_esp32_p4/startup/Code/StdLib/StdLib.cpp create mode 100644 ref_app/target/micros/xtensa_esp32_p4/startup/crt0_init_ram.cpp create mode 100644 ref_app/target/micros/xtensa_esp32_p4/startup/crt1.cpp diff --git a/ref_app/src/mcal/xtensa_esp32_p4/mcal_benchmark.h b/ref_app/src/mcal/xtensa_esp32_p4/mcal_benchmark.h index 790060f1a..5d0e8fb86 100644 --- a/ref_app/src/mcal/xtensa_esp32_p4/mcal_benchmark.h +++ b/ref_app/src/mcal/xtensa_esp32_p4/mcal_benchmark.h @@ -5,8 +5,8 @@ // or copy at http://www.boost.org/LICENSE_1_0.txt) // -#ifndef MCAL_BENCHMARK_2014_04_16_H_ - #define MCAL_BENCHMARK_2014_04_16_H_ +#ifndef MCAL_BENCHMARK_2014_04_16_H + #define MCAL_BENCHMARK_2014_04_16_H #include #include @@ -17,8 +17,8 @@ { namespace benchmark { - typedef mcal::port::port_pin benchmark_port_type; + typedef mcal::port::port_pin benchmark_port_type; } } -#endif // MCAL_BENCHMARK_2014_04_16_H_ +#endif // MCAL_BENCHMARK_2014_04_16_H diff --git a/ref_app/src/mcal/xtensa_esp32_p4/mcal_port.h b/ref_app/src/mcal/xtensa_esp32_p4/mcal_port.h index 5b8254a6f..5e8abc50e 100644 --- a/ref_app/src/mcal/xtensa_esp32_p4/mcal_port.h +++ b/ref_app/src/mcal/xtensa_esp32_p4/mcal_port.h @@ -24,8 +24,8 @@ ******************************************************************************************/ -#ifndef MCAL_PORT_2025_02_22_H - #define MCAL_PORT_2025_02_22_H +#ifndef MCAL_PORT_2026_02_13_H + #define MCAL_PORT_2026_02_13_H #include @@ -46,29 +46,25 @@ private: static constexpr std::uint8_t my_pin { static_cast(PortIndex) }; - static constexpr uintptr_t GPIO_BASE { (uintptr_t) UINT32_C(0x500E0000) }; - static constexpr uintptr_t LP_IO_MUX_BASE { (uintptr_t) UINT32_C(0x5012B000) }; - static constexpr uintptr_t IO_MUX_BASE { (uintptr_t) UINT32_C(0x500E1000) }; - typedef union { - volatile uint32_t reg; + volatile std::uint32_t reg; struct { - volatile uint32_t MCU_OE : 1; - volatile uint32_t SLP_SEL : 1; - volatile uint32_t MCU_WPD : 1; - volatile uint32_t MCU_WPU : 1; - volatile uint32_t MCU_IE : 1; - volatile uint32_t MCU_DRV : 2; - volatile uint32_t FUN_WPD : 1; - volatile uint32_t FUN_WPU : 1; - volatile uint32_t FUN_IE : 1; - volatile uint32_t FUN_DRV : 2; - volatile uint32_t MCU_SEL : 3; - volatile uint32_t FILTER_EN : 1; - uint32_t : 16; + volatile std::uint32_t MCU_OE : 1; + volatile std::uint32_t SLP_SEL : 1; + volatile std::uint32_t MCU_WPD : 1; + volatile std::uint32_t MCU_WPU : 1; + volatile std::uint32_t MCU_IE : 1; + volatile std::uint32_t MCU_DRV : 2; + volatile std::uint32_t FUN_WPD : 1; + volatile std::uint32_t FUN_WPU : 1; + volatile std::uint32_t FUN_IE : 1; + volatile std::uint32_t FUN_DRV : 2; + volatile std::uint32_t MCU_SEL : 3; + volatile std::uint32_t FILTER_EN : 1; + std::uint32_t : 16; } bit; } @@ -76,15 +72,15 @@ typedef union { - volatile uint32_t reg; + volatile std::uint32_t reg; struct { - volatile uint32_t OUT_SEL : 9; - volatile uint32_t INV_SEL : 1; - volatile uint32_t OEN_SEL : 1; - volatile uint32_t OEN_INV_SEL : 1; - uint32_t : 20; + volatile std::uint32_t OUT_SEL : 9; + volatile std::uint32_t INV_SEL : 1; + volatile std::uint32_t OEN_SEL : 1; + volatile std::uint32_t OEN_INV_SEL : 1; + std::uint32_t : 20; } bit; } @@ -92,76 +88,29 @@ typedef union { - volatile uint32_t reg; + volatile std::uint32_t reg; struct { - volatile uint32_t REG_PAD_DRV : 2; - volatile uint32_t REG_PAD_RDE : 1; - volatile uint32_t REG_PAD_RUE : 1; - volatile uint32_t REG_PAD_MUX_SEL : 1; - volatile uint32_t REG_PAD_FUN_SEL : 2; - volatile uint32_t REG_PAD_SLP_SEL : 1; - volatile uint32_t REG_PAD_SLP_IE : 1; - volatile uint32_t REG_PAD_SLP_OE : 1; - volatile uint32_t REG_PAD_FUN_IE : 1; - volatile uint32_t REG_PAD_FILTER_EN : 1; - uint32_t : 20; + volatile std::uint32_t REG_PAD_DRV : 2; + volatile std::uint32_t REG_PAD_RDE : 1; + volatile std::uint32_t REG_PAD_RUE : 1; + volatile std::uint32_t REG_PAD_MUX_SEL : 1; + volatile std::uint32_t REG_PAD_FUN_SEL : 2; + volatile std::uint32_t REG_PAD_SLP_SEL : 1; + volatile std::uint32_t REG_PAD_SLP_IE : 1; + volatile std::uint32_t REG_PAD_SLP_OE : 1; + volatile std::uint32_t REG_PAD_FUN_IE : 1; + volatile std::uint32_t REG_PAD_FILTER_EN : 1; + std::uint32_t : 20; } bit; } LP_IO_MUX_GPIO; - static void gpio_cfg_output() - { - if((unsigned) my_pin <= 54u) - { - volatile IO_MUX_GPIO* pIO_MUX_GPIO = (volatile IO_MUX_GPIO*)(IO_MUX_BASE + 4u + 4u * (unsigned) my_pin); - volatile GPIO_FUNC_OUT_SEL_CFG* pGPIO_FUNC_OUT_SEL_CFG = (volatile GPIO_FUNC_OUT_SEL_CFG*)(GPIO_BASE + 0x558ul + 4u * (unsigned) my_pin); - volatile LP_IO_MUX_GPIO* pLP_IO_MUX_GPIO = (volatile LP_IO_MUX_GPIO*)(LP_IO_MUX_BASE + 8u + 4u * (unsigned) my_pin); - volatile uint32_t* pGPIO_OUTx_W1TC = (volatile uint32_t*)(GPIO_BASE + (((unsigned) my_pin < 32u) ? 0x0Cu : 0x18u)); - volatile uint32_t* pGPIO_ENABLE1x_W1TS = (volatile uint32_t*)(GPIO_BASE + (((unsigned) my_pin < 32u) ? 0x24u : 0x30u)); - - /* configure the pinmux */ - pIO_MUX_GPIO->bit.FUN_DRV = 2; - pIO_MUX_GPIO->bit.FUN_IE = 0; - pIO_MUX_GPIO->bit.MCU_SEL = 1; - - /* set the output configuration */ - pGPIO_FUNC_OUT_SEL_CFG->bit.OUT_SEL = 256; - pGPIO_FUNC_OUT_SEL_CFG->bit.OEN_SEL = 1; - - if((unsigned) my_pin < 16u) - { - /* make LP_GPIO use HP_IO_MUX */ - pLP_IO_MUX_GPIO->bit.REG_PAD_MUX_SEL = 0; - } - - /* drive the IO output low */ - *pGPIO_OUTx_W1TC = (uint32_t)(1u << (((unsigned) my_pin < 32u) ? (unsigned) my_pin : ((unsigned) my_pin - 32u))); - *pGPIO_ENABLE1x_W1TS = (uint32_t)(1u << (((unsigned) my_pin < 32u) ? (unsigned) my_pin : ((unsigned) my_pin - 32u))); - } - } - - static void gpio_set_output_level(const uint8_t level) - { - if(((unsigned) my_pin <= 54u) && ((unsigned) level <= 1u)) - { - volatile uint32_t* pGPIO_OUT_W1Tx = (volatile uint32_t*)(GPIO_BASE + (((unsigned) my_pin < 32u) ? 8u : 0x14u) + (((unsigned) level == 1u) ? (0u) : (4u))); - - *pGPIO_OUT_W1Tx = (uint32_t)(1u << (((unsigned) my_pin < 32u) ? (unsigned) my_pin : ((unsigned) my_pin - 32u))); - } - } - - static void gpio_toggle_output_level() - { - if((unsigned) my_pin <= 54u) - { - volatile uint32_t* pGPIO_OUT = (volatile uint32_t*)(GPIO_BASE + 4u + (((unsigned) my_pin < 32u) ? 0u : 0xCu)); - - *pGPIO_OUT ^= (uint32_t)(1u << (((unsigned) my_pin < 32u) ? (unsigned) my_pin : ((unsigned) my_pin - 32u))); - } - } + static void gpio_cfg_output(); + static void gpio_set_output_level(const std::uint8_t level); + static void gpio_toggle_output_level(); public: static auto set_direction_output() -> void @@ -193,7 +142,61 @@ gpio_toggle_output_level(); } }; - } - } -#endif // MCAL_PORT_2025_02_22_H + template + void port_pin::gpio_cfg_output() + { + if(unsigned { my_pin } <= 54u) + { + volatile IO_MUX_GPIO* pIO_MUX_GPIO { reinterpret_cast (mcal::reg::io_mux_base + 4u + 4u * unsigned { my_pin }) }; + volatile GPIO_FUNC_OUT_SEL_CFG* pGPIO_FUNC_OUT_SEL_CFG { reinterpret_cast(mcal::reg::gpio_base + 0x558u + 4u * unsigned { my_pin }) }; + volatile LP_IO_MUX_GPIO* pLP_IO_MUX_GPIO { reinterpret_cast (mcal::reg::lp_io_mux_base + 8u + 4u * unsigned { my_pin }) }; + volatile std::uint32_t* pGPIO_OUTx_W1TC { reinterpret_cast (mcal::reg::gpio_base + ((unsigned { my_pin } < 32u) ? 0x0Cu : 0x18u)) }; + volatile std::uint32_t* pGPIO_ENABLE1x_W1TS { reinterpret_cast (mcal::reg::gpio_base + ((unsigned { my_pin } < 32u) ? 0x24u : 0x30u)) }; + + // Configure the pinmux. + pIO_MUX_GPIO->bit.FUN_DRV = 2; + pIO_MUX_GPIO->bit.FUN_IE = 0; + pIO_MUX_GPIO->bit.MCU_SEL = 1; + + // Set the output configuration. + pGPIO_FUNC_OUT_SEL_CFG->bit.OUT_SEL = 256; + pGPIO_FUNC_OUT_SEL_CFG->bit.OEN_SEL = 1; + + if(unsigned { my_pin } < 16u) + { + // Make LP_GPIO use HP_IO_MUX. + pLP_IO_MUX_GPIO->bit.REG_PAD_MUX_SEL = 0; + } + + // Drive the IO output low. + *pGPIO_OUTx_W1TC = static_cast(1u << ((unsigned { my_pin } < 32u) ? unsigned { my_pin } : (unsigned { my_pin } - 32u))); + *pGPIO_ENABLE1x_W1TS = static_cast(1u << ((unsigned { my_pin } < 32u) ? unsigned { my_pin } : (unsigned { my_pin } - 32u))); + } + } + + template + void port_pin::gpio_set_output_level(const uint8_t level) + { + if((unsigned { my_pin } <= 54u) && (unsigned { level } <= 1u)) + { + volatile std::uint32_t* pGPIO_OUT_W1Tx { reinterpret_cast(mcal::reg::gpio_base + ((unsigned { my_pin } < 32u) ? 8u : 0x14u) + (((unsigned) level == 1u) ? (0u) : (4u))) }; + + *pGPIO_OUT_W1Tx = static_cast(1u << ((unsigned { my_pin } < 32u) ? unsigned { my_pin } : (unsigned { my_pin } - 32u))); + } + } + + template + void port_pin::gpio_toggle_output_level() + { + if(unsigned { my_pin } <= 54u) + { + volatile std::uint32_t* pGPIO_OUT { reinterpret_cast(mcal::reg::gpio_base + 4u + ((unsigned { my_pin } < 32u) ? 0u : 0xCu)) }; + + *pGPIO_OUT ^= static_cast(1u << ((unsigned { my_pin } < 32u) ? unsigned { my_pin } : (unsigned { my_pin } - 32u))); + } + } + } // namespace port + } // namespace mcal + +#endif // MCAL_PORT_2026_02_13_H diff --git a/ref_app/src/mcal/xtensa_esp32_p4/mcal_reg.h b/ref_app/src/mcal/xtensa_esp32_p4/mcal_reg.h index fcec144d1..99f5df784 100644 --- a/ref_app/src/mcal/xtensa_esp32_p4/mcal_reg.h +++ b/ref_app/src/mcal/xtensa_esp32_p4/mcal_reg.h @@ -21,6 +21,10 @@ constexpr std::uint32_t clint_mtimecmph { clint_base + static_cast(UINT32_C(0x00004004)) }; constexpr std::uint32_t clic_base { UINT32_C(0x20800000) }; + + constexpr std::uint32_t gpio_base { UINT32_C(0x500E0000) }; + constexpr std::uint32_t lp_io_mux_base { UINT32_C(0x5012B000) }; + constexpr std::uint32_t io_mux_base { UINT32_C(0x500E1000) }; } } diff --git a/ref_app/target.vcxproj b/ref_app/target.vcxproj index a5db7249b..990577012 100644 --- a/ref_app/target.vcxproj +++ b/ref_app/target.vcxproj @@ -1287,6 +1287,9 @@ + + + diff --git a/ref_app/target.vcxproj.filters b/ref_app/target.vcxproj.filters index a7292ae56..b27cbdfcc 100644 --- a/ref_app/target.vcxproj.filters +++ b/ref_app/target.vcxproj.filters @@ -358,6 +358,9 @@ {17889d6f-9174-40cb-bcad-01e3995207e5} + + {65c5c603-a019-41d0-8a19-3cadaca7fd3c} + @@ -1175,6 +1178,15 @@ micros\xtensa_esp32_p4\startup\Code\Appli + + micros\xtensa_esp32_p4\startup + + + micros\xtensa_esp32_p4\startup + + + micros\xtensa_esp32_p4\startup\Code\StdLib + diff --git a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld index df26f371d..7318a0e19 100644 --- a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld +++ b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld @@ -23,7 +23,7 @@ ENTRY(_start) /****************************************************************************************** Link librariess ******************************************************************************************/ -/* INPUT(libc.a libm.a libgcc.a) */ +INPUT(libc.a libm.a libgcc.a) /****************************************************************************************** Globals @@ -57,24 +57,16 @@ SECTIONS . = ALIGN(4); *(.boot) . = ALIGN(4); + _ctors_begin = .; + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array*)) + _ctors_end = .; *(.progmem*) . = ALIGN(4); *(.text) . = ALIGN(4); *(.text*) . = ALIGN(4); - *(.glue_7) - . = ALIGN(4); - *(.glue_7t) - . = ALIGN(4); - . = ALIGN(4); - } > FLASH - - /* Read-only data (.rodata) */ - .rodata : ALIGN(4) - { - PROVIDE(__RODATA_BASE_ADDRESS = .); - . = ALIGN(4); *(.rodata) . = ALIGN(4); *(.rodata*) @@ -82,52 +74,11 @@ SECTIONS *(.srodata) . = ALIGN(4); *(.srodata*) - } > FLASH - - /* Section for constructors */ - .ctors : ALIGN(4) - { - PROVIDE(__CTOR_LIST__ = .); - KEEP (*(SORT(.ctors.*))) - KEEP (*(.ctors)) - KEEP (*(SORT(.init_array.*))) - KEEP (*(.init_array)) - LONG(-1) ; - PROVIDE(__CTOR_END__ = .); . = ALIGN(4); - } > FLASH - - - /* Section for destructors */ - .dtors : ALIGN(4) - { - PROVIDE(__DTOR_LIST__ = .); - KEEP (*(SORT(.dtors.*))) - KEEP (*(.dtors)) - KEEP (*(SORT(.fini_array.*))) - KEEP (*(.fini_array)) - LONG(-1) ; - PROVIDE(__DTOR_END__ = .); - . = ALIGN(4); - } > FLASH - - /* Runtime clear table */ - .clear_sec : ALIGN(4) - { - PROVIDE(__RUNTIME_CLEAR_TABLE = .) ; - LONG(0 + ADDR(.bss)); LONG(SIZEOF(.bss)); - LONG(-1); LONG(-1); + *(.glue_7) . = ALIGN(4); - } > FLASH - - /* Runtime copy table */ - .copy_sec : ALIGN(4) - { - PROVIDE(__RUNTIME_COPY_TABLE = .) ; - LONG(LOADADDR(.data)); LONG(0 + ADDR(.data)); LONG(SIZEOF(.data)); - LONG(-1); LONG(-1); LONG(-1); + *(.glue_7t) . = ALIGN(4); - PROVIDE(__CODE_END_ADDRESS = .); } > FLASH .riscv.extab : @@ -156,17 +107,27 @@ SECTIONS /* The ROM-to-RAM initialized data sections */ .data : ALIGN(4) { + _data_begin = .; *(.data) + . = ALIGN(4); + KEEP (*(.data)) *(.data*) . = ALIGN(4); + KEEP (*(.data*)) + _data_end = .; } > RAM AT>FLASH /* The uninitialized (zero-cleared) data sections */ .bss : ALIGN(4) { + _bss_begin = .; *(.bss) + . = ALIGN(4); + KEEP (*(.bss)) *(.bss*) . = ALIGN(4); + KEEP (*(.bss*)) + _bss_end = .; } > RAM PROVIDE(end = .); @@ -191,4 +152,6 @@ SECTIONS /* ROM APIs */ printf = 0x4fc00024; + + _rom_data_begin = LOADADDR(.data); } diff --git a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk index 188fcb972..9c0f12a72 100644 --- a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk +++ b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk @@ -10,8 +10,11 @@ # ------------------------------------------------------------------------------ FILES_TGT := $(PATH_APP)/mcal/mcal_gcc_cxx_completion \ + $(PATH_TGT)/startup/crt0_init_ram \ + $(PATH_TGT)/startup/crt1 \ $(PATH_TGT)/startup/Code/Appli/main \ $(PATH_TGT)/startup/Code/Appli/main_cores \ $(PATH_TGT)/startup/Code/Startup/boot \ $(PATH_TGT)/startup/Code/Startup/intvect \ - $(PATH_TGT)/startup/Code/Startup/Startup + $(PATH_TGT)/startup/Code/Startup/Startup \ + $(PATH_TGT)/startup/Code/StdLib/StdLib diff --git a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_flags.gmk b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_flags.gmk index e4e32320f..86c1f3dc3 100644 --- a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_flags.gmk +++ b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_flags.gmk @@ -35,8 +35,8 @@ TGT_CXXFLAGS = -std=c++23 TGT_INCLUDES = -I$(PATH_TGT)/startup/Code \ -I$(PATH_TGT)/startup/Code/Appli \ - -I$(PATH_TGT)/startup/Code/Startup - + -I$(PATH_TGT)/startup/Code/Startup \ + -isystem $(PATH_APP)/util/STL TGT_AFLAGS = diff --git a/ref_app/target/micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c b/ref_app/target/micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c index e57215024..4a3f383de 100644 --- a/ref_app/target/micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c +++ b/ref_app/target/micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c @@ -22,38 +22,6 @@ #include -//========================================================================================= -// Types definitions -//========================================================================================= -typedef struct -{ - unsigned long sourceAddr; /* Source Address (section in ROM memory) */ - unsigned long targetAddr; /* Target Address (section in RAM memory) */ - unsigned long size; /* length of section (bytes) */ -} -runtimeCopyTable_t; - -typedef struct -{ - unsigned long addr; /* Source Address (section in RAM memory) */ - unsigned long size; /* Length of section (bytes) */ -} -runtimeClearTable_t; - -//========================================================================================= -// Linker variables -//========================================================================================= -extern const runtimeCopyTable_t __RUNTIME_COPY_TABLE[]; -extern const runtimeClearTable_t __RUNTIME_CLEAR_TABLE[]; -extern unsigned long __CTOR_LIST__[]; - -//========================================================================================= -// Defines -//========================================================================================= -#define __STARTUP_RUNTIME_COPYTABLE (runtimeCopyTable_t*)(&__RUNTIME_COPY_TABLE[0]) -#define __STARTUP_RUNTIME_CLEARTABLE (runtimeClearTable_t*)(&__RUNTIME_CLEAR_TABLE[0]) -#define __STARTUP_RUNTIME_CTORS (unsigned long*)(&__CTOR_LIST__[0]) - //========================================================================================= // Function prototype //========================================================================================= @@ -100,34 +68,9 @@ void Startup_Init(void) //----------------------------------------------------------------------------------------- static void Startup_InitRam(void) { - unsigned long ClearTableIdx = 0; - unsigned long CopyTableIdx = 0; - - /* Clear Table */ - while((__STARTUP_RUNTIME_CLEARTABLE)[ClearTableIdx].addr != (unsigned long)-1 && (__STARTUP_RUNTIME_CLEARTABLE)[ClearTableIdx].size != (unsigned long)-1) - { - for(unsigned long idx = 0; idx < ((unsigned long)((__STARTUP_RUNTIME_CLEARTABLE)[ClearTableIdx].size) / 4); idx++) - { - ((unsigned long*)((__STARTUP_RUNTIME_CLEARTABLE)[ClearTableIdx].addr))[idx] = 0; - } - - ClearTableIdx++; - } + extern void crt_init_ram(void); - /* Copy Table */ - while((__STARTUP_RUNTIME_COPYTABLE)[CopyTableIdx].sourceAddr != (unsigned long)-1 && - (__STARTUP_RUNTIME_COPYTABLE)[CopyTableIdx].targetAddr != (unsigned long)-1 && - (__STARTUP_RUNTIME_COPYTABLE)[CopyTableIdx].size != (unsigned long)-1 - ) - { - for(unsigned long idx = 0; idx < ((unsigned long)((__STARTUP_RUNTIME_COPYTABLE)[CopyTableIdx].size) / 4); idx++) - { - ((unsigned long*)((__STARTUP_RUNTIME_COPYTABLE)[CopyTableIdx].targetAddr))[idx] = - ((unsigned long*)((__STARTUP_RUNTIME_COPYTABLE)[CopyTableIdx].sourceAddr))[idx]; - } - - CopyTableIdx++; - } + crt_init_ram(); } //----------------------------------------------------------------------------------------- @@ -139,12 +82,9 @@ static void Startup_InitRam(void) //----------------------------------------------------------------------------------------- static void Startup_InitCtors(void) { - unsigned long CtorIdx = 0U; - - while((__STARTUP_RUNTIME_CTORS)[CtorIdx] != ((unsigned long)-1)) - { - ((void (*)(void))((__STARTUP_RUNTIME_CTORS)[CtorIdx++]))(); - } + extern void crt_init_ctors(); + + crt_init_ctors(); } //----------------------------------------------------------------------------------------- diff --git a/ref_app/target/micros/xtensa_esp32_p4/startup/Code/StdLib/StdLib.cpp b/ref_app/target/micros/xtensa_esp32_p4/startup/Code/StdLib/StdLib.cpp new file mode 100644 index 000000000..753d9b0dd --- /dev/null +++ b/ref_app/target/micros/xtensa_esp32_p4/startup/Code/StdLib/StdLib.cpp @@ -0,0 +1,228 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright Christopher Kormanyos 2025. +// Distributed under the Boost Software License, +// Version 1.0. (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +// Originally from (but strongly modified from): +/****************************************************************************************** + Filename : StdLib.c + + Core : Xtensa LX7 + + MCU : ESP32-S3 + + Author : Chalandi Amine + + Owner : Chalandi Amine + + Date : 22.02.2025 + + Description : Hand-written StdLib functions + +******************************************************************************************/ + +#include +#include +#include + +extern "C" { + +using DItype = signed long long; +using UDItype = unsigned long long; +using USItype = unsigned int; + +extern auto __builtin_clzll(unsigned long long) -> int; + +using DWtype = DItype; +using UDWtype = UDItype; +using UWtype = USItype; + +auto __udivdi3 (UDWtype n, UDWtype d) -> UDWtype; +auto __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) -> UDWtype; +auto __umoddi3 (UDWtype u, UDWtype v) -> UDWtype; + +auto __udivdi3 (UDWtype n, UDWtype d) -> UDWtype +{ + return __udivmoddi4 (n, d, (UDWtype *) 0); +} + +auto __umoddi3 (UDWtype u, UDWtype v) -> UDWtype +{ + UDWtype w; + + (void) __udivmoddi4 (u, v, &w); + + return w; +} + +auto __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) -> UDWtype +{ + UDWtype q = 0, r = n, y = d; + UWtype lz1, lz2, i, k; + + // Implements align divisor shift dividend method. This algorithm + // aligns the divisor under the dividend and then perform number of + // test-subtract iterations which shift the dividend left. Number of + // iterations is k + 1 where k is the number of bit positions the + // divisor must be shifted left to align it under the dividend. + // quotient bits can be saved in the rightmost positions of the dividend + // as it shifts left on each test-subtract iteration. + + if (y <= r) + { + lz1 = static_cast(__builtin_clzll(static_cast(d))); + lz2 = static_cast(__builtin_clzll(static_cast(n))); + + k = lz1 - lz2; + y = (y << k); + + // Dividend can exceed 2 ^ (width - 1) - 1 but still be less than the + // aligned divisor. Normal iteration can drops the high order bit + // of the dividend. Therefore, first test-subtract iteration is a + // special case, saving its quotient bit in a separate location and + // not shifting the dividend. + + if (r >= y) + { + r = r - y; + q = (1ULL << k); + } + + if (k > 0) + { + y = y >> 1; + + // k additional iterations where k regular test subtract shift + // dividend iterations are done. + + i = k; + + do + { + if (r >= y) + { + r = ((r - y) << 1) + 1; + } + else + { + r = (r << 1); + } + + i = i - 1; + } + while (i != 0); + + // First quotient bit is combined with the quotient bits resulting + // from the k regular iterations. + + q = q + r; + r = r >> k; + q = q - (r << k); + } + } + + if (rp) + { + *rp = r; + } + + return q; +} + +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-align" +#endif + +auto memset(void* str, int c, size_t n) -> void* +{ + std::uint8_t* ptr { reinterpret_cast(str) }; + + const std::uint8_t uc { static_cast(c) }; + + std::uint32_t value = static_cast(uc); + + // Set value to repeat the byte across a 32-bit word. + value |= value << unsigned { UINT8_C( 8) }; + value |= value << unsigned { UINT8_C(16) }; + + // Align to the next 32-bit boundary. + while ( (static_cast(reinterpret_cast(ptr) & unsigned { UINT8_C(3) }) != 0U) + && (n > std::size_t { UINT8_C(0) })) + { + *ptr++ = uc; + + --n; + } + + // Set memory in 32-bit chunks. + std::uint32_t* ptr32 { reinterpret_cast(ptr) }; + + while (n >= std::size_t { UINT8_C(4) }) + { + *ptr32++ = value; + + n -= std::size_t { UINT8_C(4) }; + } + + // Handle any remaining bytes. + ptr = reinterpret_cast(ptr32); + + while (n > std::size_t { UINT8_C(0) }) + { + *ptr++ = uc; + + --n; + } + + return str; +} + +auto memcpy (void* dest, const void* src, size_t n) -> void* +{ + std::uint8_t* d { reinterpret_cast(dest) }; + const std::uint8_t* s { reinterpret_cast(src) }; + + // Align destination to the next 32-bit boundary. + while ( (static_cast(reinterpret_cast(d) & unsigned { UINT8_C(3) }) != 0U) + && (n > std::size_t { UINT8_C(0) })) + { + *d++ = *s++; + + --n; + } + + // Copy memory in 32-bit chunks. + + std::uint32_t* d32 { reinterpret_cast(d) }; + const std::uint32_t* s32 { reinterpret_cast(s) }; + + while (n >= std::size_t { UINT8_C(4) }) + { + *d32++ = *s32++; + + n -= std::size_t { UINT8_C(4) }; + } + + // Handle any remaining bytes. + + d = reinterpret_cast(d32); + s = reinterpret_cast(s32); + + while (n > std::size_t { UINT8_C(0) }) + { + *d++ = *s++; + + --n; + } + + return dest; +} + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + +} // extern "C" diff --git a/ref_app/target/micros/xtensa_esp32_p4/startup/crt0_init_ram.cpp b/ref_app/target/micros/xtensa_esp32_p4/startup/crt0_init_ram.cpp new file mode 100644 index 000000000..0247ca261 --- /dev/null +++ b/ref_app/target/micros/xtensa_esp32_p4/startup/crt0_init_ram.cpp @@ -0,0 +1,55 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright Christopher Kormanyos 2007 - 2022. +// Distributed under the Boost Software License, +// Version 1.0. (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#include +#include +#include + +extern "C" +{ + extern std::uintptr_t _rom_data_begin; // Start address for the initialization values of the rom-to-ram section. + extern std::uintptr_t _data_begin; // Start address for the .data section. + extern std::uintptr_t _data_end; // End address for the .data section. + extern std::uintptr_t _bss_begin; // Start address for the .bss section. + extern std::uintptr_t _bss_end; // End address for the .bss section. +} + +namespace crt +{ + void init_ram(); +} + +extern "C" +{ + void crt_init_ram(void); + + void crt_init_ram(void) + { + crt::init_ram(); + } +} + +void crt::init_ram() +{ + using memory_aligned_type = std::uint8_t; + + // Copy the data segment initializers from ROM to RAM. + // Note that all data segments are aligned by 1. + const std::size_t size_data = + std::size_t( static_cast(static_cast(&_data_end)) + - static_cast(static_cast(&_data_begin))); + + std::copy(static_cast(static_cast(&_rom_data_begin)), + static_cast(static_cast(&_rom_data_begin)) + size_data, + static_cast< memory_aligned_type*>(static_cast< void*>(&_data_begin))); + + // Clear the bss segment. + // Note that the bss segment is aligned by 1. + std::fill(static_cast(static_cast(&_bss_begin)), + static_cast(static_cast(&_bss_end)), + static_cast(0U)); +} diff --git a/ref_app/target/micros/xtensa_esp32_p4/startup/crt1.cpp b/ref_app/target/micros/xtensa_esp32_p4/startup/crt1.cpp new file mode 100644 index 000000000..a31157e78 --- /dev/null +++ b/ref_app/target/micros/xtensa_esp32_p4/startup/crt1.cpp @@ -0,0 +1,47 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright Christopher Kormanyos 2007 - 2022. +// Distributed under the Boost Software License, +// Version 1.0. (See accompanying file LICENSE_1_0.txt +// or copy at http://www.boost.org/LICENSE_1_0.txt) +// + +#include +#include + +extern "C" +{ + struct ctor_type + { + using function_type = void(*)(); + }; + + extern ctor_type::function_type _ctors_end; + extern ctor_type::function_type _ctors_begin; +} + +namespace crt +{ + void init_ctors(); +} + +extern "C" +{ + void crt_init_ctors(); + + void crt_init_ctors() + { + crt::init_ctors(); + } +} + +void crt::init_ctors() +{ + using local_const_reverse_iterator = std::reverse_iterator; + + std::for_each(local_const_reverse_iterator(&_ctors_end), + local_const_reverse_iterator(&_ctors_begin), + [](const ctor_type::function_type pf) + { + pf(); + }); +} From b15118ab6c5a60317cfbc8859dc4af40b5f758ea Mon Sep 17 00:00:00 2001 From: ckormanyos Date: Fri, 13 Feb 2026 19:34:28 +0100 Subject: [PATCH 2/4] Update benchmark docs --- ref_app/src/app/benchmark/readme.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ref_app/src/app/benchmark/readme.md b/ref_app/src/app/benchmark/readme.md index 331d6f200..d9e2d4134 100644 --- a/ref_app/src/app/benchmark/readme.md +++ b/ref_app/src/app/benchmark/readme.md @@ -85,6 +85,7 @@ The benchmark used is a ${\sim}100$ decimal digit AGM $\pi$ calculation. |---------------------------|-----------------|------------| | `am6254_soc` | 0.37 | 1.0 | | `am335x` | 1.5 | 4.1 | +| `xtensa_esp32_p4` | 2.5 | 6.8 | | `stm32f446` | 5.1 | 14 | | `rpi_pico2_rp2350` | 6.3 | 17 | | `wch_ch32v307` | 8.0 | 22 | @@ -121,6 +122,9 @@ running the benchmark in about $10~{\text{ms}}$. Running on only one core (core0) of the $32$-bit controller of the `xtensa_esp32_s3` board results in a runtime of $9.1~{\text{ms}}$ for the calculation. +The next generation `xtensa_esp32_p4` with a dual RISC-V core +architecture is significantly faster coming in at $2.5~{\text{ms}}$ +(running the benchmark on one core). Using only one core (core1) on the $32$-bit ARM(R) Cortex(R) M0+ controller of the `rpi_pico_rp2040` board results in a calculation From 2e786dc7b772b25bc9c757413609e326cdc6c6df Mon Sep 17 00:00:00 2001 From: ckormanyos Date: Fri, 13 Feb 2026 20:09:19 +0100 Subject: [PATCH 3/4] Cleanup esp32_p4 Stdlib manual funcs --- .../startup/Code/StdLib/StdLib.cpp | 120 +----------------- 1 file changed, 7 insertions(+), 113 deletions(-) diff --git a/ref_app/target/micros/xtensa_esp32_p4/startup/Code/StdLib/StdLib.cpp b/ref_app/target/micros/xtensa_esp32_p4/startup/Code/StdLib/StdLib.cpp index 753d9b0dd..dc1dc04db 100644 --- a/ref_app/target/micros/xtensa_esp32_p4/startup/Code/StdLib/StdLib.cpp +++ b/ref_app/target/micros/xtensa_esp32_p4/startup/Code/StdLib/StdLib.cpp @@ -1,5 +1,5 @@ /////////////////////////////////////////////////////////////////////////////// -// Copyright Christopher Kormanyos 2025. +// Copyright Christopher Kormanyos 2025 - 2026. // Distributed under the Boost Software License, // Version 1.0. (See accompanying file LICENSE_1_0.txt // or copy at http://www.boost.org/LICENSE_1_0.txt) @@ -8,19 +8,15 @@ // Originally from (but strongly modified from): /****************************************************************************************** Filename : StdLib.c - - Core : Xtensa LX7 - - MCU : ESP32-S3 - + Author : Chalandi Amine - + Owner : Chalandi Amine - + Date : 22.02.2025 - - Description : Hand-written StdLib functions - + + Description : Handwritten StdLib functions + ******************************************************************************************/ #include @@ -29,108 +25,6 @@ extern "C" { -using DItype = signed long long; -using UDItype = unsigned long long; -using USItype = unsigned int; - -extern auto __builtin_clzll(unsigned long long) -> int; - -using DWtype = DItype; -using UDWtype = UDItype; -using UWtype = USItype; - -auto __udivdi3 (UDWtype n, UDWtype d) -> UDWtype; -auto __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) -> UDWtype; -auto __umoddi3 (UDWtype u, UDWtype v) -> UDWtype; - -auto __udivdi3 (UDWtype n, UDWtype d) -> UDWtype -{ - return __udivmoddi4 (n, d, (UDWtype *) 0); -} - -auto __umoddi3 (UDWtype u, UDWtype v) -> UDWtype -{ - UDWtype w; - - (void) __udivmoddi4 (u, v, &w); - - return w; -} - -auto __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) -> UDWtype -{ - UDWtype q = 0, r = n, y = d; - UWtype lz1, lz2, i, k; - - // Implements align divisor shift dividend method. This algorithm - // aligns the divisor under the dividend and then perform number of - // test-subtract iterations which shift the dividend left. Number of - // iterations is k + 1 where k is the number of bit positions the - // divisor must be shifted left to align it under the dividend. - // quotient bits can be saved in the rightmost positions of the dividend - // as it shifts left on each test-subtract iteration. - - if (y <= r) - { - lz1 = static_cast(__builtin_clzll(static_cast(d))); - lz2 = static_cast(__builtin_clzll(static_cast(n))); - - k = lz1 - lz2; - y = (y << k); - - // Dividend can exceed 2 ^ (width - 1) - 1 but still be less than the - // aligned divisor. Normal iteration can drops the high order bit - // of the dividend. Therefore, first test-subtract iteration is a - // special case, saving its quotient bit in a separate location and - // not shifting the dividend. - - if (r >= y) - { - r = r - y; - q = (1ULL << k); - } - - if (k > 0) - { - y = y >> 1; - - // k additional iterations where k regular test subtract shift - // dividend iterations are done. - - i = k; - - do - { - if (r >= y) - { - r = ((r - y) << 1) + 1; - } - else - { - r = (r << 1); - } - - i = i - 1; - } - while (i != 0); - - // First quotient bit is combined with the quotient bits resulting - // from the k regular iterations. - - q = q + r; - r = r >> k; - q = q - (r << k); - } - } - - if (rp) - { - *rp = r; - } - - return q; -} - #if defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wcast-align" From 5b318d7efbf750ac9acac446c9d82faea63258ac Mon Sep 17 00:00:00 2001 From: ckormanyos Date: Fri, 13 Feb 2026 20:14:10 +0100 Subject: [PATCH 4/4] Clarify comments and dates --- .../target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld | 8 ++++++++ .../micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk | 2 +- .../micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld index 7318a0e19..4f6e0a55e 100644 --- a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld +++ b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4.ld @@ -1,3 +1,11 @@ +/* + Copyright Christopher Kormanyos 2026. + Distributed under the Boost Software License, + Version 1.0. (See accompanying file LICENSE_1_0.txt + or copy at http://www.boost.org/LICENSE_1_0.txt) +*/ + +/* Originally from: */ /****************************************************************************************** Filename : Memory_Map.ld diff --git a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk index 9c0f12a72..bcecc958d 100644 --- a/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk +++ b/ref_app/target/micros/xtensa_esp32_p4/make/xtensa_esp32_p4_files.gmk @@ -1,5 +1,5 @@ # -# Copyright Christopher Kormanyos 2025 - 2026. +# Copyright Christopher Kormanyos 2026. # Distributed under the Boost Software License, # Version 1.0. (See accompanying file LICENSE_1_0.txt # or copy at http://www.boost.org/LICENSE_1_0.txt) diff --git a/ref_app/target/micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c b/ref_app/target/micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c index 4a3f383de..92e55b9a2 100644 --- a/ref_app/target/micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c +++ b/ref_app/target/micros/xtensa_esp32_p4/startup/Code/Startup/Startup.c @@ -68,6 +68,8 @@ void Startup_Init(void) //----------------------------------------------------------------------------------------- static void Startup_InitRam(void) { + // Use my own standard static RAM initialization. + extern void crt_init_ram(void); crt_init_ram(); @@ -82,6 +84,8 @@ static void Startup_InitRam(void) //----------------------------------------------------------------------------------------- static void Startup_InitCtors(void) { + // Use my own standard static constructor initialization. + extern void crt_init_ctors(); crt_init_ctors();