11 int butterfly_reduce(
int **R,
int *nR,
int nRmax,
int **S,
int *nS,
int nSmax,
12 double *val,
int steps, MPI_Comm comm);
14 int nSmax,
double *val,
int steps, MPI_Comm comm);
16 int *nS,
int **com_indices,
int *com_count,
int steps,
19 int count_out,
int **R,
int *nR,
int **S,
int *nS,
20 int **com_indices,
int *com_count,
int steps,
22 int set_or(
int *A1,
int n1,
int *A2,
int n2,
int *A1orA2);
23 int set_and(
int *A1,
int n1,
int *A2,
int n2,
int *A1andA2);
24 int card_or(
int *A1,
int n1,
int *A2,
int n2);
25 void m2s(
double *mapval,
double *submapval,
int *subset,
int count);
26 void subset2map(
int *A,
int nA,
int *subA,
int nsubA);
27 void s2m_sum(
double *mapval,
double *submapval,
int *subset,
int count);
28 void s2m_copy(
double *mapval,
double *submapval,
int *subset,
int count);
31 int *nS,
int **com_indices,
int *com_count,
int steps,
39 int rank, size, rk, sk;
41 MPI_Request s_request, r_request;
46 MPI_Comm_size(comm, &size);
47 MPI_Comm_rank(comm, &rank);
49 I = (
int **) malloc(steps *
sizeof(
int *));
50 nI = (
int *) malloc(steps *
sizeof(
int));
54 for (k = 0; k < steps;
56 sk = (rank + size - p2k) % size;
57 rk = (rank + p2k) % size;
62 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
69 nS[k] =
card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
70 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
71 set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
74 MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
76 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
78 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
79 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
81 I[steps - k - 1] = (
int *) malloc(nI[steps - k - 1] *
sizeof(
int));
84 MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
86 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
88 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
89 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
95 J = (
int **) malloc(steps *
sizeof(
int *));
96 nJ = (
int *) malloc(steps *
sizeof(
int));
100 for (k = 0; k < steps;
103 sk = (rank + p2k) % size;
104 rk = (rank + size - p2k) % size;
107 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
108 memcpy(J[k], indices, nJ[k] *
sizeof(
int));
110 nJ[k] =
card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
111 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
112 set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
116 if (k != steps - 1) {
117 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
118 MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
119 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
120 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
122 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
125 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
126 MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
127 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
128 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
137 for (k = 0; k < steps;
140 sk = (rank + p2k) % size;
141 rk = (rank + size - p2k) % size;
143 nS[k] =
card_and(I[k], nI[k], J[k], nJ[k]);
144 S[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
145 set_and(I[k], nI[k], J[k], nJ[k], S[k]);
150 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
152 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
154 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
155 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
157 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
160 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
162 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
164 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
165 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
172 int **USR, *nUSR, **U, *nU;
174 USR = (
int **) malloc(steps *
sizeof(
int *));
175 nUSR = (
int *) malloc(steps *
sizeof(
int));
176 U = (
int **) malloc(steps *
sizeof(
int *));
177 nU = (
int *) malloc(steps *
sizeof(
int));
179 for (k = 0; k < steps; k++) {
180 nUSR[k] =
card_or(S[k], nS[k], R[k], nR[k]);
181 USR[k] = (
int *) malloc(nUSR[k] *
sizeof(
int));
182 set_or(S[k], nS[k], R[k], nR[k], USR[k]);
184 for (k = 0; k < steps; k++) {
187 U[k] = (
int *) malloc(nU[k] *
sizeof(
int));
188 memcpy(U[k], USR[k], nU[k] *
sizeof(
int));
190 nU[k] =
card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
191 U[k] = (
int *) malloc(nU[k] *
sizeof(
int *));
192 set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
195 *com_count = nU[steps - 1];
196 *com_indices = (
int *) malloc(*com_count *
sizeof(
int));
197 memcpy(*com_indices, U[steps - 1], *com_count *
sizeof(
int));
201 for (k = 0; k < steps; k++) {
202 subset2map(*com_indices, *com_count, S[k], nS[k]);
203 subset2map(*com_indices, *com_count, R[k], nR[k]);
212 int count_out,
int **R,
int *nR,
int **S,
int *nS,
213 int **com_indices,
int *com_count,
int steps,
227 int rank, size, rk, sk;
229 MPI_Request s_request, r_request;
234 MPI_Comm_size(comm, &size);
235 MPI_Comm_rank(comm, &rank);
237 I = (
int **) malloc(steps *
sizeof(
int *));
238 nI = (
int *) malloc(steps *
sizeof(
int));
242 for (k = 0; k < steps;
244 sk = (rank + size - p2k) % size;
245 rk = (rank + p2k) % size;
250 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
251 memcpy(S[k], indices_out, nS[k] *
sizeof(
int));
254 nS[k] =
card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
256 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
257 set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
261 MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
263 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
265 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
266 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
268 if (nI[steps - k - 1])
269 I[steps - k - 1] = (
int *) malloc(nI[steps - k - 1] *
sizeof(
int));
272 if (nI[steps - k - 1])
273 MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag,
276 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
278 if (nI[steps - k - 1]) MPI_Wait(&r_request, MPI_STATUS_IGNORE);
279 if (nS[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
285 J = (
int **) malloc(steps *
sizeof(
int *));
286 nJ = (
int *) malloc(steps *
sizeof(
int));
290 for (k = 0; k < steps;
293 sk = (rank + p2k) % size;
294 rk = (rank + size - p2k) % size;
298 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
299 memcpy(J[k], indices_in, nJ[k] *
sizeof(
int));
302 nJ[k] =
card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
304 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
305 set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
310 if (k != steps - 1) {
311 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
312 MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
313 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
314 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
316 if (nR[k]) R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
320 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
322 MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
323 if (nR[k]) MPI_Wait(&r_request, MPI_STATUS_IGNORE);
324 if (nJ[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
333 for (k = 0; k < steps;
336 sk = (rank + p2k) % size;
337 rk = (rank + size - p2k) % size;
339 nS[k] =
card_and(I[k], nI[k], J[k], nJ[k]);
341 S[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
342 set_and(I[k], nI[k], J[k], nJ[k], S[k]);
345 if (nI[k]) free(I[k]);
346 if (nJ[k]) free(J[k]);
348 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
350 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
352 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
353 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
355 if (nR[k]) R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
359 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
362 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
364 if (nR[k]) MPI_Wait(&r_request, MPI_STATUS_IGNORE);
365 if (nS[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
372 int **USR, *nUSR, **U, *nU;
374 USR = (
int **) malloc(steps *
sizeof(
int *));
375 nUSR = (
int *) malloc(steps *
sizeof(
int));
376 U = (
int **) malloc(steps *
sizeof(
int *));
377 nU = (
int *) malloc(steps *
sizeof(
int));
379 for (k = 0; k < steps; k++) {
380 nUSR[k] =
card_or(S[k], nS[k], R[k], nR[k]);
382 USR[k] = (
int *) malloc(nUSR[k] *
sizeof(
int));
383 set_or(S[k], nS[k], R[k], nR[k], USR[k]);
386 for (k = 0; k < steps; k++) {
390 U[k] = (
int *) malloc(nU[k] *
sizeof(
int));
391 memcpy(U[k], USR[k], nU[k] *
sizeof(
int));
394 nU[k] =
card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
396 U[k] = (
int *) malloc(nU[k] *
sizeof(
int *));
397 set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
401 *com_count = nU[steps - 1];
403 *com_indices = (
int *) malloc(*com_count *
sizeof(
int));
404 memcpy(*com_indices, U[steps - 1],
405 *com_count *
sizeof(
int));
415 for (k = 0; k < steps; k++) {
416 if (nS[k])
subset2map(*com_indices, *com_count, S[k], nS[k]);
417 if (nR[k])
subset2map(*com_indices, *com_count, R[k], nR[k]);
427 double *val,
int steps, MPI_Comm comm) {
441 int rank, size, rk, sk;
442 MPI_Request s_request, r_request;
445 MPI_Comm_size(comm, &size);
446 MPI_Comm_rank(comm, &rank);
448 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
449 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
453 for (k = 0; k < steps; k++) {
455 rk = (rank + size - p2k) % size;
457 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
458 sk = (rank + p2k) % size;
460 m2s(val, sbuf, S[k], nS[k]);
461 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
464 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
465 s2m_sum(val, rbuf, R[k], nR[k]);
470 if (nS[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
479 int nSmax,
double *val,
int steps, MPI_Comm comm) {
496 int rank, size, rk, sk;
497 MPI_Request s_request, r_request;
500 MPI_Comm_size(comm, &size);
501 MPI_Comm_rank(comm, &rank);
503 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
504 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
508 for (k = 0; k < steps; k++) {
510 rk = (rank + size - p2k) % size;
512 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
513 sk = (rank + p2k) % size;
515 m2s(val, sbuf, S[k], nS[k]);
516 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
519 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
525 if (nS[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
533 void m2s(
double *mapval,
double *submapval,
int *subset,
int count) {
537 for (i = 0; i < count; i++) { submapval[i] = mapval[subset[i]]; }
540 void s2m_sum(
double *mapval,
double *submapval,
int *subset,
int count) {
543 for (i = 0; i < count; i++) { mapval[subset[i]] += submapval[i]; }
546 void s2m_copy(
double *mapval,
double *submapval,
int *subset,
int count) {
550 for (i = 0; i < count; i++) { mapval[subset[i]] = submapval[i]; }
552 int set_or(
int *A1,
int n1,
int *A2,
int n2,
int *A1orA2) {
557 int i = 0, j = 0, k = 0;
560 while (i < n1 || j < n2) {
569 }
else if (A1[i] > A2[j]) {
586 for (j = 0; j < n2; j++) A1orA2[j] = A2[j];
589 for (i = 0; i < n1; i++) A1orA2[i] = A1[i];
597 int set_and(
int *A1,
int n1,
int *A2,
int n2,
601 int i = 0, j = 0, k = 0;
602 while (i < n1 && j < n2) {
605 }
else if (A1[i] > A2[j]) {
617 int card_or(
int *A1,
int n1,
int *A2,
int n2) {
622 int i = 0, j = 0, k = 0;
625 while (i < n1 || j < n2) {
632 }
else if (A1[i] > A2[j]) {
645 k = (n1 == 0) ? k = n2 : k = n1;
650 void subset2map(
int *A,
int nA,
int *subA,
int nsubA) {
652 while (i < nA && j < nsubA) {
653 if (A[i] < subA[j]) {
int card_and(int *A1, int n1, int *A2, int n2)
void m2s(double *mapval, double *submapval, int *subset, int count)
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
int set_and(int *A1, int n1, int *A2, int n2, int *A1andA2)
int card_or(int *A1, int n1, int *A2, int n2)
int butterfly_reshuffle(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
void s2m_copy(double *mapval, double *submapval, int *subset, int count)
void s2m_sum(double *mapval, double *submapval, int *subset, int count)
void subset2map(int *A, int nA, int *subA, int nsubA)
int butterfly_reduce_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
int butterfly_reshuffle_init(int *indices_in, int count_in, int *indices_out, int count_out, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)