Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions tracer/p2p-events.C
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ tw_stime exec_task(
m->saved_task = ns->my_pe->currentTask;
ns->my_pe->currentTask = task_id.taskid;


#if TRACER_BIGSIM_TRACES
//For each entry of the task, create a recv event and send them out to
//whereever it belongs
Expand Down Expand Up @@ -717,6 +718,30 @@ tw_stime exec_task(
if(PE_isEndEvent(ns->my_pe, task_id.taskid)) {
ns->end_ts = tw_now(lp);
}
/* Marks beginning of region */
if(t->event_id == TRACER_LOOP_EVT && ns->region_start == 0){
ns->computation_t = 0;
ns->region_start_sim_time = finish_time;
ns->region_start = 1;
}
/* Marks End of Region */
else if (t->event_id == TRACER_LOOP_EVT && ns->region_start == 1){
ns->region_end_sim_time = tw_now(lp);
#if DEBUG_PRINT
printf("[%d:%d] COMP: ns->computation_t %f start_time %f finish_time %f tw_now(lp) %f region_end_sim_time %f\n", ns->my_job,ns->my_pe_num, ns->computation_t, ns->region_start_sim_time, finish_time, tw_now(lp), ns->region_end_sim_time);
#endif
ns->region_end = 1;
}

/*Computation time add*/
if(t->event_id == TRACER_USER_EVT && ns->region_end == 0){
ns->computation_t += time;
#if DEBUG_PRINT
printf("[%d:%d] COMP: ns->computation_t %f finish_time %f codes_local_latency(lp) %f sendFinishTime %f recvFinishTime %f time %f tw_now(lp) %f\n", ns->my_job,
ns->my_pe_num, ns->computation_t, finish_time, codes_local_latency(lp), sendFinishTime, recvFinishTime, time, tw_now(lp));
#endif

}
//Return the execution time of the task
return time;
}
Expand Down
71 changes: 68 additions & 3 deletions tracer/tracer-driver.C
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,15 @@ extern "C" {
char tracer_input[256]; /* filename for tracer input file */

CoreInf *global_rank; /* core to job ID and process ID */
TimeInfo *time_rank; /* core to comm time and comp time*/
JobInf *jobs;
int default_mapping;
int total_ranks;
tw_stime *jobTimes;
tw_stime *commTimes;
tw_stime *compTimes;
tw_stime *maxCompTime;
tw_stime *maxCommTime;
int num_jobs = 0;
tw_stime soft_delay_mpi = 100;
tw_stime nic_delay = 400;
Expand Down Expand Up @@ -206,6 +211,16 @@ int main(int argc, char **argv)
global_rank[i].jobID = -1;
}

time_rank = (TimeInfo*) malloc(num_servers * sizeof(TimeInfo));

for(int i = 0; i < num_servers; i++) {
time_rank[i].jobID = -1;
time_rank[i].rank = -1;
time_rank[i].comm_time = 0;
time_rank[i].comp_time = 0;
}


/* read in the mapping file and populating global_rank */
if(dump_topo_only || strcmp("NA", globalIn) == 0) {
if(!rank) printf("Using default linear mapping of jobs\n");
Expand Down Expand Up @@ -238,8 +253,11 @@ int main(int argc, char **argv)
fscanf(jobIn, "%d", &num_jobs); /* number of jobs */
jobs = (JobInf*) malloc(num_jobs * sizeof(JobInf));
jobTimes = (tw_stime*) malloc(num_jobs * sizeof(tw_stime));
compTimes = (tw_stime*) malloc(num_jobs * sizeof(tw_stime));
commTimes = (tw_stime*) malloc(num_jobs * sizeof(tw_stime));
total_ranks = 0;

maxCommTime = (tw_stime*) malloc(num_jobs * sizeof(tw_stime));
maxCompTime = (tw_stime*) malloc(num_jobs * sizeof(tw_stime));
/* read per job information */
for(int i = 0; i < num_jobs; i++) {
#if TRACER_BIGSIM_TRACES
Expand All @@ -256,6 +274,10 @@ int main(int argc, char **argv)
jobs[i].rankMap = (int*) malloc(jobs[i].numRanks * sizeof(int));
jobs[i].skipMsgId = -1;
jobTimes[i] = 0;
compTimes[i] = 0;
commTimes[i] = 0;
maxCompTime[i] = 0;
maxCommTime[i] = 0;
if(!rank) {
printf("Job %d - ranks %d, trace folder %s, rank file %s, iters %d\n",
i, jobs[i].numRanks, jobs[i].traceDir, jobs[i].map_file, jobs[i].numIters);
Expand Down Expand Up @@ -403,6 +425,31 @@ int main(int argc, char **argv)
for(int i = 0; i < num_jobs; i++) {
printf("Job %d Time %f s\n", i, ns_to_s(jobTimesMax[i]));
}
#ifdef WRITE_MPI_TIMES
FILE *fptr = fopen("mpi_rank_times.txt","w");
for (int i = 0; i < num_servers; i++){
fprintf(fptr,"Job %d Rank %d Comp Time %f Comm Time %f\n", time_rank[i].jobID, time_rank[i].rank, time_rank[i].comp_time, time_rank[i].comm_time);
}
fclose(fptr);
#endif
for (int i = 0; i < num_servers; i++){
if (time_rank[i].jobID >= 0){
commTimes[time_rank[i].jobID] += time_rank[i].comm_time;
compTimes[time_rank[i].jobID] += time_rank[i].comp_time;
if((time_rank[i].comm_time + time_rank[i].comp_time) > (maxCommTime[time_rank[i].jobID] + maxCompTime[time_rank[i].jobID])){
maxCommTime[time_rank[i].jobID] = time_rank[i].comm_time;
maxCompTime[time_rank[i].jobID] = time_rank[i].comp_time;
}
}
}
for (int i = 0; i < num_jobs; i++){
printf("Job[%d] : Total Communication Time %f, Total Computation Time %f\n", i, ns_to_s(commTimes[i]), ns_to_s(compTimes[i]));
}
for (int i = 0; i < num_jobs; i++){
printf("Job[%d] : Max Communication Time %f, Max Computation Time %f\n", i, ns_to_s(maxCommTime[i]), ns_to_s(maxCompTime[i]));
}


}

model_net_report_stats(net_id);
Expand Down Expand Up @@ -460,6 +507,17 @@ void proc_init(
/* skew each kickoff event slightly to help avoid event ties later on */
kickoff_time = startTime + g_tw_lookahead + tw_rand_unif(lp->rng);
ns->end_ts = 0;

/* Initialize net time spend in computation by a processes to 0*/
ns->computation_t = 0;
/* Flag to indicate the start of a region*/
ns->region_start = 0;
/* Flag to indicate the end of a region*/
ns->region_end = 0;
/* Initialize simulation time during start of a region to 0*/
ns->region_start_sim_time = 0;
/* Initialize simulation time during end of a region to 0*/
ns->region_end_sim_time = 0;
/* maintain message sequencing for MPI */
ns->my_pe->sendSeq = new int64_t[jobs[ns->my_job].numRanks];
ns->my_pe->recvSeq = new int64_t[jobs[ns->my_job].numRanks];
Expand Down Expand Up @@ -673,8 +731,11 @@ void proc_finalize(
if(dump_topo_only) return;

tw_stime jobTime = ns->end_ts - ns->start_ts;
tw_stime finalTime = tw_now(lp);

tw_stime commTime = ((ns->region_end_sim_time - ns->region_start_sim_time) - ns->computation_t);
time_rank[lpid_to_global_rank(lp->gid)].jobID = ns->my_job;
time_rank[lpid_to_global_rank(lp->gid)].rank = ns->my_pe_num;
time_rank[lpid_to_global_rank(lp->gid)].comp_time = ns->computation_t;
time_rank[lpid_to_global_rank(lp->gid)].comm_time = commTime;
if(lpid_to_pe(lp->gid) == 0)
printf("Job[%d]PE[%d]: FINALIZE in %f seconds.\n", ns->my_job,
ns->my_pe_num, ns_to_s(tw_now(lp)-ns->start_ts));
Expand Down Expand Up @@ -940,6 +1001,10 @@ inline int lpid_to_job(int lp_gid){
inline int pe_to_job(int pe){
return global_rank[pe].jobID;;
}
inline int lpid_to_global_rank(int lp_gid){
int server_num = codes_mapping_get_lp_relative_id(lp_gid, 0, NULL);
return server_num;
}

bool isPEonThisRank(int jobID, int i) {
int lpid = pe_to_lpid(i, jobID);
Expand Down
14 changes: 14 additions & 0 deletions tracer/tracer-driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,24 @@ typedef struct CoreInf {
int mapsTo, jobID;
} CoreInf;

/* stores mapping of core to Communication Time and Computation Time */
typedef struct TimeInfo{
int jobID;
int rank;
tw_stime comm_time;
tw_stime comp_time;
} TimeInfo;

/* ROSS level state information for each core */
struct proc_state
{
tw_stime start_ts; /* time when first event is processed */
tw_stime end_ts; /* time when last event is processed */
tw_stime computation_t; /* store time spend in computation*/
int region_start;/* flag to mark the start of a region*/
int region_end;/* flag to mark end of a region*/
tw_stime region_start_sim_time;/* store current simulation time when the region starts*/
tw_stime region_end_sim_time;/* store current simulation time when region ends*/
PE* my_pe; /* stores all core information */
#if TRACER_BIGSIM_TRACES
TraceReader* trace_reader; /* for reading the bigsim traces */
Expand Down Expand Up @@ -131,6 +144,7 @@ int pe_to_lpid(int pe, int job);
int pe_to_job(int pe);
int lpid_to_pe(int lp_gid);
int lpid_to_job(int lp_gid);
int lpid_to_global_rank(int lp_gid);

/* change of units for time */
tw_stime ns_to_s(tw_stime ns);
Expand Down