52 int butterfly_init(
int *indices,
int count,
int **R,
int *nR,
int **S,
int *nS,
53 int **com_indices,
int *com_count,
int steps,
57 int rank, size, rk, sk;
59 MPI_Request s_request, r_request;
64 MPI_Comm_size(comm, &size);
65 MPI_Comm_rank(comm, &rank);
67 I = (
int **) malloc(steps *
sizeof(
int *));
68 nI = (
int *) malloc(steps *
sizeof(
int));
72 for (k = 0; k < steps;
74 sk = (rank + size - p2k) % size;
75 rk = (rank + p2k) % size;
79 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
80 memcpy(S[k], indices, nS[k] *
sizeof(
int));
82 nS[k] =
card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
83 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
84 set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
87 MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
89 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
91 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
92 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
94 I[steps - k - 1] = (
int *) malloc(nI[steps - k - 1] *
sizeof(
int));
97 MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
99 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
101 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
102 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
108 J = (
int **) malloc(steps *
sizeof(
int *));
109 nJ = (
int *) malloc(steps *
sizeof(
int));
113 for (k = 0; k < steps;
116 sk = (rank + p2k) % size;
117 rk = (rank + size - p2k) % size;
120 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
121 memcpy(J[k], indices, nJ[k] *
sizeof(
int));
123 nJ[k] =
card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
124 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
125 set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
129 if (k != steps - 1) {
130 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
131 MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
132 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
133 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
135 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
138 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
139 MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
140 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
141 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
150 for (k = 0; k < steps; k++) {
152 sk = (rank + p2k) % size;
153 rk = (rank + size - p2k) % size;
155 nS[k] =
card_and(I[k], nI[k], J[k], nJ[k]);
156 S[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
157 set_and(I[k], nI[k], J[k], nJ[k], S[k]);
162 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
164 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request);
165 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
166 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
168 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
171 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
173 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
175 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
176 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
183 int **USR, *nUSR, **U, *nU;
185 USR = (
int **) malloc(steps *
sizeof(
int *));
186 nUSR = (
int *) malloc(steps *
sizeof(
int));
187 U = (
int **) malloc(steps *
sizeof(
int *));
188 nU = (
int *) malloc(steps *
sizeof(
int));
190 for (k = 0; k < steps; k++) {
191 nUSR[k] =
card_or(S[k], nS[k], R[k], nR[k]);
192 USR[k] = (
int *) malloc(nUSR[k] *
sizeof(
int));
193 set_or(S[k], nS[k], R[k], nR[k], USR[k]);
195 for (k = 0; k < steps; k++) {
198 U[k] = (
int *) malloc(nU[k] *
sizeof(
int));
199 memcpy(U[k], USR[k], nU[k] *
sizeof(
int));
201 nU[k] =
card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
202 U[k] = (
int *) malloc(nU[k] *
sizeof(
int *));
203 set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
206 *com_count = nU[steps - 1];
207 *com_indices = (
int *) malloc(*com_count *
sizeof(
int));
208 memcpy(*com_indices, U[steps - 1], *com_count *
sizeof(
int));
211 for (k = 0; k < steps; k++) {
212 subset2map(*com_indices, *com_count, S[k], nS[k]);
213 subset2map(*com_indices, *com_count, R[k], nR[k]);
237 double *val,
int steps, MPI_Comm comm) {
241 int rank, size, rk, sk;
242 MPI_Request s_request, r_request;
245 MPI_Comm_size(comm, &size);
246 MPI_Comm_rank(comm, &rank);
248 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
249 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
253 for (k = 0; k < steps; k++) {
255 rk = (rank + size - p2k) % size;
256 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
257 sk = (rank + p2k) % size;
258 m2s(val, sbuf, S[k], nS[k]);
259 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
260 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
265 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
291 int *nS,
int nSmax,
double *val,
int steps,
296 int rank, size, rk, sk;
300 MPI_Comm_size(comm, &size);
301 MPI_Comm_rank(comm, &rank);
303 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
304 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
308 for (k = 0; k < steps; k++) {
310 sk = (rank + p2k) % size;
311 m2s(val, sbuf, S[k], nS[k]);
312 MPI_Send(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm);
313 rk = (rank + size - p2k) % size;
314 MPI_Recv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &status);
341 int *nS,
int nSmax,
double *val,
int steps,
346 int rank, size, rk, sk;
350 MPI_Comm_size(comm, &size);
351 MPI_Comm_rank(comm, &rank);
353 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
354 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
358 for (k = 0; k < steps; k++) {
360 sk = (rank + p2k) % size;
361 rk = (rank + size - p2k) % size;
362 m2s(val, sbuf, S[k], nS[k]);
363 MPI_Sendrecv(sbuf, nS[k], MPI_DOUBLE, sk, tag, rbuf, nR[k], MPI_DOUBLE,
364 rk, tag, comm, &status);
void m2s(double *mapval, double *submapval, int *subset, int count)
void s2m_sum(double *mapval, double *submapval, int *subset, int count)
Sum submap values the submap values array.
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
int set_and(int *A1, int n1, int *A2, int n2, int *A1andA2)
int card_or(int *A1, int n1, int *A2, int n2)
int card_and(int *A1, int n1, int *A2, int n2)
void subset2map(int *A, int nA, int *subA, int nsubA)
int butterfly_blocking_2instr_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme.
int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme.
int butterfly_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
Initialize tables for butterfly-like communication scheme This routine set up needed tables for the b...
int butterfly_blocking_1instr_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme.