@@ -228,45 +228,46 @@ namespace {
228228 }
229229
230230 struct Row {
231- std::string name;
232231 char type = ' ' ;
233- double kernel_time = -1.0 ;
232+ std::string name;
233+ uint32_t count = 0 ;
234+ double gpu_time = -1.0 ;
234235 double cpu_time = -1.0 ;
235236 double total_time = -1.0 ;
236237 uint32_t memSize = 0 ;
237- uint32_t count = 0 ;
238+ uint32_t statNEvents ;
238239
239- void test (std::ostream& stream, uint32_t statNEvents ) {
240+ void test (std::ostream& stream) {
240241 // static constexpr const char* fmt = "| {0:6} | {1:7} | {2:43} | {3:15.0f} | {4:15.0f} | {5:15.2f} | {6:15.0f} | {7:10.3f} | {8:10.0f} | {9:15.0f} |\n";
241242 double scale = 1000000.0 / statNEvents;
242243 stream << " | " << type << " | " ;
243244 if (count != 0 ) stream << std::format (" {:6} |" , count);
244245 else stream << " |" ;
245- stream << std::format (" {:41}" , name) << " |" << std::format (" {:10.0f}" , kernel_time * scale) << " |" ;
246+ stream << std::format (" {:41}" , name) << " |" << std::format (" {:10.0f}" , gpu_time * scale) << " |" ;
246247 if (cpu_time != -1.0 ) stream << std::format (" {:10.0f} |" , cpu_time * scale);
247248 else stream << " |" ;
248249 if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:8.2f} |" , cpu_time / total_time);
249250 else stream << " |" ;
250251 if (total_time != -1.0 ) stream << std::format (" {:10.0f} |" , total_time * scale);
251252 else stream << " |" ;
252- if (memSize != 0 && count != 0 ) stream << std::format (" {:10.3f} |" , memSize / kernel_time * 1e-9 ) << std::format (" {:14} |" , memSize / statNEvents) << std::format (" {:14} |" , memSize / statNEvents / count);
253+ if (memSize != 0 && count != 0 ) stream << std::format (" {:10.3f} |" , memSize / gpu_time * 1e-9 ) << std::format (" {:14} |" , memSize / statNEvents) << std::format (" {:14} |" , memSize / statNEvents / count);
253254 else stream << " | | |" ;
254255 stream << std::endl;
255256 }
256257
257- void write (std::ostream& stream, uint32_t statNEvents ) {
258+ void write (std::ostream& stream) {
258259 double scale = 1000000.0 / statNEvents;
259- // stream << std::format("{0},{1},{2},{3:.0f},{4:.0f},{5:.2f},{6:.0f},{7:.3f},{8},{9}\n", type, count, name, kernel_time * scale, cpu_time * scale, cpu_time / total_time, total_time * scale, memSize / kernel_time * 1e-9, memSize / statNEvents, memSize / statNEvents / count);
260+ // stream << std::format("{0},{1},{2},{3:.0f},{4:.0f},{5:.2f},{6:.0f},{7:.3f},{8},{9}\n", type, count, name, gpu_time * scale, cpu_time * scale, cpu_time / total_time, total_time * scale, memSize / gpu_time * 1e-9, memSize / statNEvents, memSize / statNEvents / count);
260261 stream << type << " ," ;
261262 if (count != 0 ) stream << count;
262- stream << " ," << name << " ," << std::format (" {:.0f}" , kernel_time * scale) << " ," ;
263+ stream << " ," << name << " ," << std::format (" {:.0f}" , gpu_time * scale) << " ," ;
263264 if (cpu_time != -1.0 ) stream << std::format (" {:.0f}" , cpu_time * scale);
264265 stream << " ," ;
265266 if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:.2f}" , cpu_time / total_time);
266267 stream << " ," ;
267268 if (total_time != -1.0 ) stream << std::format (" {:.0f}" , total_time * scale);
268269 stream << " ," ;
269- if (memSize != 0 && count != 0 ) stream << std::format (" {:.3f}" , memSize / kernel_time * 1e-9 ) << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
270+ if (memSize != 0 && count != 0 ) stream << std::format (" {:.3f}" , memSize / gpu_time * 1e-9 ) << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
270271 else stream << " ,," ;
271272 stream << std::endl;
272273 }
@@ -360,14 +361,15 @@ int32_t GPUReconstructionCPU::RunChains()
360361 Row task_row;
361362 task_row.type = ' K' ;
362363 task_row.name = mTimers [i]->name .c_str ();
363- task_row.kernel_time = time;
364+ task_row.gpu_time = time;
364365 task_row.count = mTimers [i]->count ;
366+ task_row.statNEvents = mStatNEvents ;
365367 if (mTimers [i]->memSize && mStatNEvents && time != 0 .) {
366368 task_row.memSize = mTimers [i]->memSize ;
367369 snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , mTimers [i]->memSize / time * 1e-9 , mTimers [i]->memSize / mStatNEvents , mTimers [i]->memSize / mStatNEvents / mTimers [i]->count );
368370 }
369- if (benchmarkCSV.is_open ()) task_row.write (benchmarkCSV, mStatNEvents );
370- task_row.test (std::cout, mStatNEvents );
371+ if (benchmarkCSV.is_open ()) task_row.write (benchmarkCSV);
372+ task_row.test (std::cout);
371373 // printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
372374 if (GetProcessingSettings ().resetTimers ) {
373375 mTimers [i]->count = 0 ;
@@ -380,35 +382,38 @@ int32_t GPUReconstructionCPU::RunChains()
380382 if (kernelStepTimes[i] != 0 . || mTimersRecoSteps [i].timerTotal .GetElapsedTime () != 0 .) {
381383 Row reco_step_row;
382384 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (Tasks)" ;
383- reco_step_row.kernel_time = kernelStepTimes[i];
385+ reco_step_row.gpu_time = kernelStepTimes[i];
384386 reco_step_row.cpu_time = mTimersRecoSteps [i].timerCPU ;
385387 reco_step_row.total_time = mTimersRecoSteps [i].timerTotal .GetElapsedTime ();
386- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
387- reco_step_row.test (std::cout, mStatNEvents );
388+ reco_step_row.statNEvents = mStatNEvents ;
389+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV);
390+ reco_step_row.test (std::cout);
388391 // printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
389392 // gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
390393 }
391394 if (mTimersRecoSteps [i].bytesToGPU ) {
392395 Row reco_step_row;
393396 reco_step_row.type = ' D' ;
394397 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to GPU)" ;
395- reco_step_row.kernel_time = mTimersRecoSteps [i].timerToGPU .GetElapsedTime ();
398+ reco_step_row.gpu_time = mTimersRecoSteps [i].timerToGPU .GetElapsedTime ();
396399 reco_step_row.memSize = mTimersRecoSteps [i].bytesToGPU ;
397400 reco_step_row.count = mTimersRecoSteps [i].countToGPU ;
398- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
399- reco_step_row.test (std::cout, mStatNEvents );
401+ reco_step_row.statNEvents = mStatNEvents ;
402+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV);
403+ reco_step_row.test (std::cout);
400404 // printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
401405 // mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU);
402406 }
403407 if (mTimersRecoSteps [i].bytesToHost ) {
404408 Row reco_step_row;
405409 reco_step_row.type = ' D' ;
406410 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to Host)" ;
407- reco_step_row.kernel_time = mTimersRecoSteps [i].timerToHost .GetElapsedTime ();
411+ reco_step_row.gpu_time = mTimersRecoSteps [i].timerToHost .GetElapsedTime ();
408412 reco_step_row.memSize = mTimersRecoSteps [i].bytesToHost ;
409413 reco_step_row.count = mTimersRecoSteps [i].countToHost ;
410- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
411- reco_step_row.test (std::cout, mStatNEvents );
414+ reco_step_row.statNEvents = mStatNEvents ;
415+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV);
416+ reco_step_row.test (std::cout);
412417 // printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
413418 // mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost);
414419 }
@@ -426,23 +431,25 @@ int32_t GPUReconstructionCPU::RunChains()
426431 if (mTimersGeneralSteps [i].GetElapsedTime () != 0 .) {
427432 Row general_step_row;
428433 general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES[i];
429- general_step_row.kernel_time = mTimersGeneralSteps [i].GetElapsedTime ();
430- if (benchmarkCSV.is_open ()) general_step_row.write (benchmarkCSV, mStatNEvents );
431- general_step_row.test (std::cout, mStatNEvents );
434+ general_step_row.gpu_time = mTimersGeneralSteps [i].GetElapsedTime ();
435+ general_step_row.statNEvents = mStatNEvents ;
436+ if (benchmarkCSV.is_open ()) general_step_row.write (benchmarkCSV);
437+ general_step_row.test (std::cout);
432438 // printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
433439 }
434440 }
435441 Row wall_row;
436442 wall_row.name = " Wall" ;
437443 if (GetProcessingSettings ().debugLevel >= 1 ) {
438- wall_row.kernel_time = kernelTotal;
444+ wall_row.gpu_time = kernelTotal;
439445 mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
440446 // printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
441447 }
442448 wall_row.cpu_time = mStatCPUTime ;
443449 wall_row.total_time = mStatWallTime * mStatNEvents / 1000000 ;
444- if (benchmarkCSV.is_open ()) wall_row.write (benchmarkCSV, mStatNEvents );
445- wall_row.test (std::cout, mStatNEvents );
450+ wall_row.statNEvents = mStatNEvents ;
451+ if (benchmarkCSV.is_open ()) wall_row.write (benchmarkCSV);
452+ wall_row.test (std::cout);
446453 // printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
447454 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
448455 GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
0 commit comments