51 int butterfly_init(
int *indices,
int count,
int **R,
int *nR,
int **S,
int *nS,
52 int **com_indices,
int *com_count,
int steps,
56 int rank, size, rk, sk;
58 MPI_Request s_request, r_request;
63 MPI_Comm_size(comm, &size);
64 MPI_Comm_rank(comm, &rank);
66 I = (
int **) malloc(steps *
sizeof(
int *));
67 nI = (
int *) malloc(steps *
sizeof(
int));
71 for (k = 0; k < steps;
73 sk = (rank + size - p2k) % size;
74 rk = (rank + p2k) % size;
78 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
79 memcpy(S[k], indices, nS[k] *
sizeof(
int));
81 nS[k] =
card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
82 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
83 set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
86 MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
88 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
90 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
91 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
93 I[steps - k - 1] = (
int *) malloc(nI[steps - k - 1] *
sizeof(
int));
96 MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
98 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
100 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
101 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
107 J = (
int **) malloc(steps *
sizeof(
int *));
108 nJ = (
int *) malloc(steps *
sizeof(
int));
112 for (k = 0; k < steps;
115 sk = (rank + p2k) % size;
116 rk = (rank + size - p2k) % size;
119 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
120 memcpy(J[k], indices, nJ[k] *
sizeof(
int));
122 nJ[k] =
card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
123 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
124 set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
128 if (k != steps - 1) {
129 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
130 MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
131 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
132 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
134 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
137 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
138 MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
139 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
140 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
149 for (k = 0; k < steps; k++) {
151 sk = (rank + p2k) % size;
152 rk = (rank + size - p2k) % size;
154 nS[k] =
card_and(I[k], nI[k], J[k], nJ[k]);
155 S[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
156 set_and(I[k], nI[k], J[k], nJ[k], S[k]);
161 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
163 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request);
164 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
165 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
167 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
170 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
172 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
174 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
175 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
182 int **USR, *nUSR, **U, *nU;
184 USR = (
int **) malloc(steps *
sizeof(
int *));
185 nUSR = (
int *) malloc(steps *
sizeof(
int));
186 U = (
int **) malloc(steps *
sizeof(
int *));
187 nU = (
int *) malloc(steps *
sizeof(
int));
189 for (k = 0; k < steps; k++) {
190 nUSR[k] =
card_or(S[k], nS[k], R[k], nR[k]);
191 USR[k] = (
int *) malloc(nUSR[k] *
sizeof(
int));
192 set_or(S[k], nS[k], R[k], nR[k], USR[k]);
194 for (k = 0; k < steps; k++) {
197 U[k] = (
int *) malloc(nU[k] *
sizeof(
int));
198 memcpy(U[k], USR[k], nU[k] *
sizeof(
int));
200 nU[k] =
card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
201 U[k] = (
int *) malloc(nU[k] *
sizeof(
int *));
202 set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
205 *com_count = nU[steps - 1];
206 *com_indices = (
int *) malloc(*com_count *
sizeof(
int));
207 memcpy(*com_indices, U[steps - 1], *com_count *
sizeof(
int));
210 for (k = 0; k < steps; k++) {
211 subset2map(*com_indices, *com_count, S[k], nS[k]);
212 subset2map(*com_indices, *com_count, R[k], nR[k]);
236 int nSmax,
double *val,
int steps, MPI_Comm comm) {
240 int rank, size, rk, sk;
241 MPI_Request s_request, r_request;
244 MPI_Comm_size(comm, &size);
245 MPI_Comm_rank(comm, &rank);
247 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
248 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
252 for (k = 0; k < steps; k++) {
253 sk = (rank + p2k) % size;
254 rk = (rank + size - p2k) % size;
256 m2s(val, sbuf, S[k], nS[k]);
259 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
260 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
262 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
263 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
265 t = t + MPI_Wtime() - st;
267 s2m_sum(val, rbuf, R[k], nR[k]);
278 int *nS,
int **com_indices,
int *com_count,
int steps,
282 int rank, size, rk, sk;
284 MPI_Request s_request, r_request;
289 MPI_Comm_size(comm, &size);
290 MPI_Comm_rank(comm, &rank);
292 I = (
int **) malloc(steps *
sizeof(
int *));
293 nI = (
int *) malloc(steps *
sizeof(
int));
298 for (k = 0; k < steps;
301 if (rank % p2k1 < p2k) sk = rk = rank + p2k;
303 sk = rk = rank - p2k;
307 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
308 memcpy(S[k], indices, nS[k] *
sizeof(
int));
310 nS[k] =
card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
311 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
312 set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
315 MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
317 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
319 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
320 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
322 I[steps - k - 1] = (
int *) malloc(nI[steps - k - 1] *
sizeof(
int));
325 MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
327 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
329 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
330 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
337 J = (
int **) malloc(steps *
sizeof(
int *));
338 nJ = (
int *) malloc(steps *
sizeof(
int));
343 for (k = 0; k < steps;
347 if (rank % p2k1 < p2k) sk = rk = rank + p2k;
349 sk = rk = rank - p2k;
353 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
354 memcpy(J[k], indices, nJ[k] *
sizeof(
int));
356 nJ[k] =
card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
357 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
358 set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
362 if (k != steps - 1) {
363 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
364 MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
365 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
366 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
368 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
371 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
372 MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
373 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
374 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
385 for (k = 0; k < steps; k++) {
388 if (rank % p2k1 < p2k) sk = rk = rank + p2k;
390 sk = rk = rank - p2k;
392 nS[k] =
card_and(I[k], nI[k], J[k], nJ[k]);
393 S[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
394 set_and(I[k], nI[k], J[k], nJ[k], S[k]);
399 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
401 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request);
402 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
403 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
405 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
408 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
410 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
412 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
413 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
421 int **USR, *nUSR, **U, *nU;
423 USR = (
int **) malloc(steps *
sizeof(
int *));
424 nUSR = (
int *) malloc(steps *
sizeof(
int));
425 U = (
int **) malloc(steps *
sizeof(
int *));
426 nU = (
int *) malloc(steps *
sizeof(
int));
428 for (k = 0; k < steps; k++) {
429 nUSR[k] =
card_or(S[k], nS[k], R[k], nR[k]);
430 USR[k] = (
int *) malloc(nUSR[k] *
sizeof(
int));
431 set_or(S[k], nS[k], R[k], nR[k], USR[k]);
433 for (k = 0; k < steps; k++) {
436 U[k] = (
int *) malloc(nU[k] *
sizeof(
int));
437 memcpy(U[k], USR[k], nU[k] *
sizeof(
int));
439 nU[k] =
card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
440 U[k] = (
int *) malloc(nU[k] *
sizeof(
int *));
441 set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
444 *com_count = nU[steps - 1];
445 *com_indices = (
int *) malloc(*com_count *
sizeof(
int));
446 memcpy(*com_indices, U[steps - 1], *com_count *
sizeof(
int));
449 for (k = 0; k < steps; k++) {
450 subset2map(*com_indices, *com_count, S[k], nS[k]);
451 subset2map(*com_indices, *com_count, R[k], nR[k]);
475 int nSmax,
double *val,
int steps, MPI_Comm comm) {
478 int k, p2k, p2k1, tag;
479 int rank, size, rk, sk;
481 MPI_Request s_request, r_request;
484 MPI_Comm_size(comm, &size);
485 MPI_Comm_rank(comm, &rank);
487 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
488 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
493 for (k = 0; k < steps; k++) {
495 if (rank % p2k1 < p2k) {
497 sk = rk = rank + p2k;
504 m2s(val, sbuf, S[k], nS[k]);
505 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
506 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
508 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
509 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
510 s2m_sum(val, rbuf, R[k], nR[k]);
513 t = t + MPI_Wtime() - st;
517 sk = rk = rank - p2k;
521 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
522 m2s(val, sbuf, S[k], nS[k]);
523 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
525 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
526 s2m_sum(val, rbuf, R[k], nR[k]);
528 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
533 t = t + MPI_Wtime() - st;
void m2s(double *mapval, double *submapval, int *subset, int count)
void s2m_sum(double *mapval, double *submapval, int *subset, int count)
Sum submap values the submap values array.
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
int set_and(int *A1, int n1, int *A2, int n2, int *A1andA2)
int card_or(int *A1, int n1, int *A2, int n2)
int card_and(int *A1, int n1, int *A2, int n2)
void subset2map(int *A, int nA, int *subA, int nsubA)