-
Notifications
You must be signed in to change notification settings - Fork 115
Add profiling instrumentation for NAM building blocks #219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
sdatkinson
merged 7 commits into
sdatkinson:main-profiling
from
jfsantos:feature/profiling
Feb 14, 2026
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
4832cda
Add profiling instrumentation for NAM building blocks
5c53524
Fixed build flags for benchmodel
84deb8a
Added a command line tool to output memory usage for a given .nam file
725c8ca
Bugfix - checking that condition_dsp is not null in the JSON (#220)
jfsantos c956055
[BUGFIX, BREAKING] Make activation base class abstract, fix PReLU imp…
sdatkinson d499f74
Add TONE3000 support note in README.md (#224)
sdatkinson d68514d
Replace hardcoded profiling struct with dynamic registry
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| #include "profiling.h" | ||
|
|
||
| #ifdef NAM_PROFILING | ||
|
|
||
| #if defined(__ARM_ARCH_7EM__) || defined(ARM_MATH_CM7) | ||
| // ARM Cortex-M7: Use DWT cycle counter for precise timing | ||
| #include "stm32h7xx.h" | ||
|
|
||
| namespace nam { | ||
| namespace profiling { | ||
|
|
||
| ProfilingEntry g_entries[MAX_PROFILING_TYPES] = {}; | ||
| int g_num_entries = 0; | ||
|
|
||
| // CPU frequency in MHz (Daisy runs at 480 MHz) | ||
| static constexpr uint32_t CPU_FREQ_MHZ = 480; | ||
|
|
||
| uint32_t get_time_us() { | ||
| // DWT->CYCCNT gives cycle count | ||
| // Divide by CPU_FREQ_MHZ to get microseconds | ||
| return DWT->CYCCNT / CPU_FREQ_MHZ; | ||
| } | ||
|
|
||
| } // namespace profiling | ||
| } // namespace nam | ||
|
|
||
| #else | ||
| // Non-ARM: Use std::chrono for timing (for testing on desktop) | ||
| #include <chrono> | ||
|
|
||
| namespace nam { | ||
| namespace profiling { | ||
|
|
||
| ProfilingEntry g_entries[MAX_PROFILING_TYPES] = {}; | ||
| int g_num_entries = 0; | ||
|
|
||
| uint32_t get_time_us() { | ||
| using namespace std::chrono; | ||
| static auto start = high_resolution_clock::now(); | ||
| auto now = high_resolution_clock::now(); | ||
| return (uint32_t)duration_cast<microseconds>(now - start).count(); | ||
| } | ||
|
|
||
| } // namespace profiling | ||
| } // namespace nam | ||
|
|
||
| #endif // ARM check | ||
|
|
||
| namespace nam { | ||
| namespace profiling { | ||
|
|
||
| int register_type(const char* name) { | ||
| int idx = g_num_entries++; | ||
| g_entries[idx].name = name; | ||
| g_entries[idx].accumulated_us = 0; | ||
| return idx; | ||
| } | ||
|
|
||
| void reset() { | ||
| for (int i = 0; i < g_num_entries; i++) | ||
| g_entries[i].accumulated_us = 0; | ||
| } | ||
|
|
||
| void print_results() { | ||
| uint32_t total = 0; | ||
| for (int i = 0; i < g_num_entries; i++) | ||
| total += g_entries[i].accumulated_us; | ||
|
|
||
| printf("\nProfiling breakdown:\n"); | ||
| printf("%-12s %8s %6s\n", "Category", "Time(ms)", "%"); | ||
| printf("%-12s %8s %6s\n", "--------", "--------", "----"); | ||
|
|
||
| for (int i = 0; i < g_num_entries; i++) { | ||
| uint32_t us = g_entries[i].accumulated_us; | ||
| if (us > 0) { | ||
| uint32_t pct = total > 0 ? (us * 100 / total) : 0; | ||
| printf("%-12s %8.1f %5lu%%\n", g_entries[i].name, us / 1000.0f, (unsigned long)pct); | ||
| } | ||
| } | ||
|
|
||
| printf("%-12s %8s %6s\n", "--------", "--------", "----"); | ||
| printf("%-12s %8.1f %5s\n", "Total", total / 1000.0f, "100%"); | ||
| } | ||
|
|
||
| } // namespace profiling | ||
| } // namespace nam | ||
|
|
||
| #endif // NAM_PROFILING |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| #pragma once | ||
|
|
||
| // Dynamic profiling registry for NAM building blocks | ||
| // Enable with -DNAM_PROFILING | ||
| // | ||
| // Usage: | ||
| // 1. Register profiling types at file scope (static init): | ||
| // static int PROF_FOO = nam::profiling::register_type("Foo"); | ||
| // 2. Call nam::profiling::reset() before benchmark | ||
| // 3. In hot path: | ||
| // NAM_PROFILE_START(); | ||
| // // ... code ... | ||
| // NAM_PROFILE_ADD(PROF_FOO); | ||
| // 4. Call nam::profiling::print_results() to display breakdown | ||
|
|
||
| #ifdef NAM_PROFILING | ||
|
|
||
| #include <cstdint> | ||
| #include <cstdio> | ||
|
|
||
| namespace nam { | ||
| namespace profiling { | ||
|
|
||
| constexpr int MAX_PROFILING_TYPES = 32; | ||
|
|
||
| struct ProfilingEntry { | ||
| const char* name; | ||
| uint32_t accumulated_us; | ||
| }; | ||
|
|
||
| extern ProfilingEntry g_entries[MAX_PROFILING_TYPES]; | ||
| extern int g_num_entries; | ||
|
|
||
| // Register a named profiling type. Returns index for fast accumulation. | ||
| // Called at static-init time or during setup, NOT in the hot path. | ||
| int register_type(const char* name); | ||
|
|
||
| // Get current time in microseconds (platform-specific) | ||
| uint32_t get_time_us(); | ||
|
|
||
| // Reset all profiling counters | ||
| void reset(); | ||
|
|
||
| // Print profiling results to stdout | ||
| void print_results(); | ||
|
|
||
| // Helper macros for timing sections | ||
| // Usage: | ||
| // NAM_PROFILE_START(); | ||
| // // ... code to profile ... | ||
| // NAM_PROFILE_ADD(PROF_FOO); // Adds elapsed time to entry, resets timer | ||
|
|
||
| #define NAM_PROFILE_START() uint32_t _prof_start = nam::profiling::get_time_us() | ||
| #define NAM_PROFILE_ADD(idx) do { \ | ||
| uint32_t _prof_now = nam::profiling::get_time_us(); \ | ||
| nam::profiling::g_entries[idx].accumulated_us += (_prof_now - _prof_start); \ | ||
| _prof_start = _prof_now; \ | ||
| } while(0) | ||
|
|
||
| // Variant that doesn't reset the timer (for one-shot measurements) | ||
| #define NAM_PROFILE_ADD_NORESTART(idx) \ | ||
| nam::profiling::g_entries[idx].accumulated_us += (nam::profiling::get_time_us() - _prof_start) | ||
|
|
||
| // Reset the timer without recording (for re-syncing mid-function) | ||
| #define NAM_PROFILE_RESTART() _prof_start = nam::profiling::get_time_us() | ||
|
|
||
| } // namespace profiling | ||
| } // namespace nam | ||
|
|
||
| #else // NAM_PROFILING not defined | ||
|
|
||
| // No-op macros when profiling is disabled | ||
| #define NAM_PROFILE_START() ((void)0) | ||
| #define NAM_PROFILE_ADD(idx) ((void)0) | ||
| #define NAM_PROFILE_ADD_NORESTART(idx) ((void)0) | ||
| #define NAM_PROFILE_RESTART() ((void)0) | ||
|
|
||
| namespace nam { | ||
| namespace profiling { | ||
| inline void reset() {} | ||
| inline void print_results() {} | ||
| } // namespace profiling | ||
| } // namespace nam | ||
|
|
||
| #endif // NAM_PROFILING |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this have to be included? Feels weird to include it if I'm doing e.g. a release build.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair, it should at very least be inside an
#ifdef. I'll change that.