-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparallelize_mpi.c
More file actions
117 lines (93 loc) · 3.54 KB
/
parallelize_mpi.c
File metadata and controls
117 lines (93 loc) · 3.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
// Define names if not defined already
#ifndef COMPUTE_NAME
#define COMPUTE_NAME baseline
#endif
#ifndef DISTRIBUTE_DATA_NAME
#define DISTRIBUTE_DATA_NAME baseline_distribute
#endif
#ifndef COLLECT_DATA_NAME
#define COLLECT_DATA_NAME baseline_collect
#endif
#ifndef DISTRIBUTED_ALLOCATE_NAME
#define DISTRIBUTED_ALLOCATE_NAME baseline_allocate
#endif
#ifndef DISTRIBUTED_FREE_NAME
#define DISTRIBUTED_FREE_NAME baseline_free
#endif
void COMPUTE_NAME(int m0, int k0,
float *input_distributed,
float *weights_distributed,
float *output_distributed)
{
int rid, num_ranks;
MPI_Comm_rank(MPI_COMM_WORLD, &rid);
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
// Computing on all ranks
for (int i0 = 0; i0 < m0; ++i0) {
float res = 0.0f;
for (int p0 = 0; p0 < k0; ++p0) {
res += input_distributed[(p0 + i0) % m0] * weights_distributed[p0];
}
output_distributed[i0] = res;
}
}
void DISTRIBUTED_ALLOCATE_NAME(int m0, int k0, float **input_distributed, float **weights_distributed, float **output_distributed)
{
*input_distributed = (float *)malloc(sizeof(float) * m0);
*output_distributed = (float *)malloc(sizeof(float) * m0);
*weights_distributed = (float *)malloc(sizeof(float) * k0);
}
void DISTRIBUTE_DATA_NAME(int m0, int k0, float *input_sequential, float *weights_sequential, float *input_distributed, float *weights_distributed) {
int rid, num_ranks;
MPI_Comm_rank(MPI_COMM_WORLD, &rid);
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
MPI_Bcast(weights_sequential, k0, MPI_FLOAT, 0, MPI_COMM_WORLD);
if (rid == 0) {
for (int i = 0; i < k0; ++i) {
weights_distributed[i] = weights_sequential[i];
}
}
// Handle the possibility that m0 is not divisible by num_ranks
int portion = m0 / num_ranks; // Basic portion size
int extra = m0 % num_ranks; // Extra elements
int *sendcounts = malloc(num_ranks * sizeof(int));
int *displs = malloc(num_ranks * sizeof(int));
int cum_sum = 0;
for (int i = 0; i < num_ranks; i++) {
sendcounts[i] = (i < extra ? portion + 1 : portion) * k0;
displs[i] = cum_sum * k0;
cum_sum += sendcounts[i] / k0;
}
int local_m0 = (rid < extra ? portion + 1 : portion) * k0;
MPI_Scatterv(input_sequential, sendcounts, displs, MPI_FLOAT, input_distributed, local_m0, MPI_FLOAT, 0, MPI_COMM_WORLD);
free(sendcounts);
free(displs);
}
void COLLECT_DATA_NAME(int m0, int k0, float *output_distributed, float *output_sequential) {
int rid, num_ranks;
MPI_Comm_rank(MPI_COMM_WORLD, &rid);
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
int portion = m0 / num_ranks;
int extra = m0 % num_ranks;
int *recvcounts = malloc(num_ranks * sizeof(int));
int *displs = malloc(num_ranks * sizeof(int));
int cum_sum = 0;
for (int i = 0; i < num_ranks; i++) {
recvcounts[i] = (i < extra ? portion + 1 : portion) * k0;
displs[i] = cum_sum * k0;
cum_sum += recvcounts[i] / k0;
}
int local_m0 = (rid < extra ? portion + 1 : portion) * k0;
MPI_Gatherv(output_distributed, local_m0, MPI_FLOAT, output_sequential, recvcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
free(recvcounts);
free(displs);
}
void DISTRIBUTED_FREE_NAME(int m0, int k0, float *input_distributed, float *weights_distributed, float *output_distributed)
{
free(input_distributed);
free(weights_distributed);
free(output_distributed);
}