51 int butterfly_init(
int *indices, 
int count, 
int **R, 
int *nR, 
int **S, 
int *nS,
 
   52                    int **com_indices, 
int *com_count, 
int steps,
 
   56     int         rank, size, rk, sk;
 
   58     MPI_Request s_request, r_request;
 
   63     MPI_Comm_size(comm, &size);
 
   64     MPI_Comm_rank(comm, &rank);
 
   66     I   = (
int **) malloc(steps * 
sizeof(
int *));
 
   67     nI  = (
int *) malloc(steps * 
sizeof(
int));
 
   71     for (k = 0; k < steps;
 
   73         sk = (rank + size - p2k) % size;
 
   74         rk = (rank + p2k) % size;
 
   78             S[k]  = (
int *) malloc(nS[k] * 
sizeof(
int));
 
   79             memcpy(S[k], indices, nS[k] * 
sizeof(
int));
 
   81             nS[k] = 
card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
 
   82             S[k]  = (
int *) malloc(nS[k] * 
sizeof(
int));
 
   83             set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
 
   86         MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
 
   88         MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
 
   90         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
   91         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
   93         I[steps - k - 1] = (
int *) malloc(nI[steps - k - 1] * 
sizeof(
int));
 
   96         MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
 
   98         MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
 
  100         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  101         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  107     J  = (
int **) malloc(steps * 
sizeof(
int *));
 
  108     nJ = (
int *) malloc(steps * 
sizeof(
int));
 
  112     for (k = 0; k < steps;
 
  115         sk = (rank + p2k) % size;
 
  116         rk = (rank + size - p2k) % size;
 
  119             J[k]  = (
int *) malloc(nJ[k] * 
sizeof(
int));
 
  120             memcpy(J[k], indices, nJ[k] * 
sizeof(
int));
 
  122             nJ[k] = 
card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
 
  123             J[k]  = (
int *) malloc(nJ[k] * 
sizeof(
int));
 
  124             set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
 
  128         if (k != steps - 1) {
 
  129             MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
 
  130             MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
 
  131             MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  132             MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  134             R[k] = (
int *) malloc(nR[k] * 
sizeof(
int));
 
  137             MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
 
  138             MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
 
  139             MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  140             MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  149     for (k = 0; k < steps; k++) { 
 
  151         sk = (rank + p2k) % size;
 
  152         rk = (rank + size - p2k) % size;
 
  154         nS[k] = 
card_and(I[k], nI[k], J[k], nJ[k]);
 
  155         S[k]  = (
int *) malloc(nJ[k] * 
sizeof(
int));
 
  156         set_and(I[k], nI[k], J[k], nJ[k], S[k]); 
 
  161         MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
 
  163         MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request); 
 
  164         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  165         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  167         R[k] = (
int *) malloc(nR[k] * 
sizeof(
int));
 
  170         MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
 
  172         MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
 
  174         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  175         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  182     int **USR, *nUSR, **U, *nU;
 
  184     USR  = (
int **) malloc(steps * 
sizeof(
int *));
 
  185     nUSR = (
int *) malloc(steps * 
sizeof(
int));
 
  186     U    = (
int **) malloc(steps * 
sizeof(
int *));
 
  187     nU   = (
int *) malloc(steps * 
sizeof(
int));
 
  189     for (k = 0; k < steps; k++) {
 
  190         nUSR[k] = 
card_or(S[k], nS[k], R[k], nR[k]);
 
  191         USR[k]  = (
int *) malloc(nUSR[k] * 
sizeof(
int));
 
  192         set_or(S[k], nS[k], R[k], nR[k], USR[k]);
 
  194     for (k = 0; k < steps; k++) {
 
  197             U[k]  = (
int *) malloc(nU[k] * 
sizeof(
int));
 
  198             memcpy(U[k], USR[k], nU[k] * 
sizeof(
int));
 
  200             nU[k] = 
card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
 
  201             U[k]  = (
int *) malloc(nU[k] * 
sizeof(
int *));
 
  202             set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
 
  205     *com_count   = nU[steps - 1];
 
  206     *com_indices = (
int *) malloc(*com_count * 
sizeof(
int));
 
  207     memcpy(*com_indices, U[steps - 1], *com_count * 
sizeof(
int));
 
  210     for (k = 0; k < steps; k++) {
 
  211         subset2map(*com_indices, *com_count, S[k], nS[k]);
 
  212         subset2map(*com_indices, *com_count, R[k], nR[k]);
 
  236                         int nSmax, 
double *val, 
int steps, MPI_Comm comm) {
 
  240     int         rank, size, rk, sk;
 
  241     MPI_Request s_request, r_request;
 
  244     MPI_Comm_size(comm, &size);
 
  245     MPI_Comm_rank(comm, &rank);
 
  247     sbuf = (
double *) malloc(nSmax * 
sizeof(
double));
 
  248     rbuf = (
double *) malloc(nRmax * 
sizeof(
double));
 
  252     for (k = 0; k < steps; k++) {
 
  253         sk = (rank + p2k) % size;
 
  254         rk = (rank + size - p2k) % size;
 
  256         m2s(val, sbuf, S[k], nS[k]); 
 
  259         MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
 
  260         MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
 
  262         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  263         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  265         t = t + MPI_Wtime() - st;
 
  267         s2m_sum(val, rbuf, R[k], nR[k]); 
 
  278                        int *nS, 
int **com_indices, 
int *com_count, 
int steps,
 
  282     int         rank, size, rk, sk;
 
  284     MPI_Request s_request, r_request;
 
  289     MPI_Comm_size(comm, &size);
 
  290     MPI_Comm_rank(comm, &rank);
 
  292     I    = (
int **) malloc(steps * 
sizeof(
int *));
 
  293     nI   = (
int *) malloc(steps * 
sizeof(
int));
 
  298     for (k = 0; k < steps;
 
  301         if (rank % p2k1 < p2k) sk = rk = rank + p2k;
 
  303             sk = rk = rank - p2k;
 
  307             S[k]  = (
int *) malloc(nS[k] * 
sizeof(
int));
 
  308             memcpy(S[k], indices, nS[k] * 
sizeof(
int));
 
  310             nS[k] = 
card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
 
  311             S[k]  = (
int *) malloc(nS[k] * 
sizeof(
int));
 
  312             set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
 
  315         MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
 
  317         MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
 
  319         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  320         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  322         I[steps - k - 1] = (
int *) malloc(nI[steps - k - 1] * 
sizeof(
int));
 
  325         MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
 
  327         MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
 
  329         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  330         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  337     J  = (
int **) malloc(steps * 
sizeof(
int *));
 
  338     nJ = (
int *) malloc(steps * 
sizeof(
int));
 
  343     for (k = 0; k < steps;
 
  347         if (rank % p2k1 < p2k) sk = rk = rank + p2k;
 
  349             sk = rk = rank - p2k;
 
  353             J[k]  = (
int *) malloc(nJ[k] * 
sizeof(
int));
 
  354             memcpy(J[k], indices, nJ[k] * 
sizeof(
int));
 
  356             nJ[k] = 
card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
 
  357             J[k]  = (
int *) malloc(nJ[k] * 
sizeof(
int));
 
  358             set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
 
  362         if (k != steps - 1) {
 
  363             MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
 
  364             MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
 
  365             MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  366             MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  368             R[k] = (
int *) malloc(nR[k] * 
sizeof(
int));
 
  371             MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
 
  372             MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
 
  373             MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  374             MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  385     for (k = 0; k < steps; k++) { 
 
  388         if (rank % p2k1 < p2k) sk = rk = rank + p2k;
 
  390             sk = rk = rank - p2k;
 
  392         nS[k] = 
card_and(I[k], nI[k], J[k], nJ[k]);
 
  393         S[k]  = (
int *) malloc(nJ[k] * 
sizeof(
int));
 
  394         set_and(I[k], nI[k], J[k], nJ[k], S[k]); 
 
  399         MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
 
  401         MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request); 
 
  402         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  403         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  405         R[k] = (
int *) malloc(nR[k] * 
sizeof(
int));
 
  408         MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
 
  410         MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
 
  412         MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  413         MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  421     int **USR, *nUSR, **U, *nU;
 
  423     USR  = (
int **) malloc(steps * 
sizeof(
int *));
 
  424     nUSR = (
int *) malloc(steps * 
sizeof(
int));
 
  425     U    = (
int **) malloc(steps * 
sizeof(
int *));
 
  426     nU   = (
int *) malloc(steps * 
sizeof(
int));
 
  428     for (k = 0; k < steps; k++) {
 
  429         nUSR[k] = 
card_or(S[k], nS[k], R[k], nR[k]);
 
  430         USR[k]  = (
int *) malloc(nUSR[k] * 
sizeof(
int));
 
  431         set_or(S[k], nS[k], R[k], nR[k], USR[k]);
 
  433     for (k = 0; k < steps; k++) {
 
  436             U[k]  = (
int *) malloc(nU[k] * 
sizeof(
int));
 
  437             memcpy(U[k], USR[k], nU[k] * 
sizeof(
int));
 
  439             nU[k] = 
card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
 
  440             U[k]  = (
int *) malloc(nU[k] * 
sizeof(
int *));
 
  441             set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
 
  444     *com_count   = nU[steps - 1];
 
  445     *com_indices = (
int *) malloc(*com_count * 
sizeof(
int));
 
  446     memcpy(*com_indices, U[steps - 1], *com_count * 
sizeof(
int));
 
  449     for (k = 0; k < steps; k++) {
 
  450         subset2map(*com_indices, *com_count, S[k], nS[k]);
 
  451         subset2map(*com_indices, *com_count, R[k], nR[k]);
 
  475                             int nSmax, 
double *val, 
int steps, MPI_Comm comm) {
 
  478     int         k, p2k, p2k1, tag;
 
  479     int         rank, size, rk, sk;
 
  481     MPI_Request s_request, r_request;
 
  484     MPI_Comm_size(comm, &size);
 
  485     MPI_Comm_rank(comm, &rank);
 
  487     sbuf = (
double *) malloc(nSmax * 
sizeof(
double));
 
  488     rbuf = (
double *) malloc(nRmax * 
sizeof(
double));
 
  493     for (k = 0; k < steps; k++) {
 
  495         if (rank % p2k1 < p2k) {
 
  497             sk = rk = rank + p2k;
 
  504             m2s(val, sbuf, S[k], nS[k]); 
 
  505             MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
 
  506             MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
 
  508             MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  509             MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  510             s2m_sum(val, rbuf, R[k], nR[k]); 
 
  513             t = t + MPI_Wtime() - st;
 
  517             sk = rk = rank - p2k;
 
  521             MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
 
  522             m2s(val, sbuf, S[k], nS[k]); 
 
  523             MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
 
  525             MPI_Wait(&r_request, MPI_STATUS_IGNORE);
 
  526             s2m_sum(val, rbuf, R[k], nR[k]); 
 
  528             MPI_Wait(&s_request, MPI_STATUS_IGNORE);
 
  533             t = t + MPI_Wtime() - st;
 
void m2s(double *mapval, double *submapval, int *subset, int count)
 
void s2m_sum(double *mapval, double *submapval, int *subset, int count)
Sum submap values the submap values array.
 
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
 
int set_and(int *A1, int n1, int *A2, int n2, int *A1andA2)
 
int card_or(int *A1, int n1, int *A2, int n2)
 
int card_and(int *A1, int n1, int *A2, int n2)
 
void subset2map(int *A, int nA, int *subA, int nsubA)