52 int *nS,
int **com_indices,
int *com_count,
int steps,
56 int rank, size, rk, sk;
58 MPI_Request s_request, r_request;
63 MPI_Comm_size(comm, &size);
64 MPI_Comm_rank(comm, &rank);
66 I = (
int **) malloc(steps *
sizeof(
int *));
67 nI = (
int *) malloc(steps *
sizeof(
int));
72 for (k = 0; k < steps;
75 if (rank % p2k1 < p2k) sk = rk = rank + p2k;
81 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
82 memcpy(S[k], indices, nS[k] *
sizeof(
int));
84 nS[k] =
card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
85 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
86 set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
89 MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
91 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
93 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
94 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
96 I[steps - k - 1] = (
int *) malloc(nI[steps - k - 1] *
sizeof(
int));
99 MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
101 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
103 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
104 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
111 J = (
int **) malloc(steps *
sizeof(
int *));
112 nJ = (
int *) malloc(steps *
sizeof(
int));
117 for (k = 0; k < steps;
121 if (rank % p2k1 < p2k) sk = rk = rank + p2k;
123 sk = rk = rank - p2k;
127 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
128 memcpy(J[k], indices, nJ[k] *
sizeof(
int));
130 nJ[k] =
card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
131 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
132 set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
136 if (k != steps - 1) {
137 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
138 MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
139 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
140 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
142 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
145 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
146 MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
147 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
148 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
159 for (k = 0; k < steps; k++) {
162 if (rank % p2k1 < p2k) sk = rk = rank + p2k;
164 sk = rk = rank - p2k;
166 nS[k] =
card_and(I[k], nI[k], J[k], nJ[k]);
167 S[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
168 set_and(I[k], nI[k], J[k], nJ[k], S[k]);
173 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
175 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request);
176 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
177 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
179 R[k] = (
int *) malloc(nR[k] *
sizeof(
int));
182 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
184 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
186 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
187 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
195 int **USR, *nUSR, **U, *nU;
197 USR = (
int **) malloc(steps *
sizeof(
int *));
198 nUSR = (
int *) malloc(steps *
sizeof(
int));
199 U = (
int **) malloc(steps *
sizeof(
int *));
200 nU = (
int *) malloc(steps *
sizeof(
int));
202 for (k = 0; k < steps; k++) {
203 nUSR[k] =
card_or(S[k], nS[k], R[k], nR[k]);
204 USR[k] = (
int *) malloc(nUSR[k] *
sizeof(
int));
205 set_or(S[k], nS[k], R[k], nR[k], USR[k]);
207 for (k = 0; k < steps; k++) {
210 U[k] = (
int *) malloc(nU[k] *
sizeof(
int));
211 memcpy(U[k], USR[k], nU[k] *
sizeof(
int));
213 nU[k] =
card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
214 U[k] = (
int *) malloc(nU[k] *
sizeof(
int *));
215 set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
218 *com_count = nU[steps - 1];
219 *com_indices = (
int *) malloc(*com_count *
sizeof(
int));
220 memcpy(*com_indices, U[steps - 1], *com_count *
sizeof(
int));
223 for (k = 0; k < steps; k++) {
224 subset2map(*com_indices, *com_count, S[k], nS[k]);
225 subset2map(*com_indices, *com_count, R[k], nR[k]);
249 int nSmax,
double *val,
int steps, MPI_Comm comm) {
252 int k, p2k, p2k1, tag;
253 int rank, size, rk, sk;
255 MPI_Request s_request, r_request;
258 MPI_Comm_size(comm, &size);
259 MPI_Comm_rank(comm, &rank);
261 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
262 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
267 for (k = 0; k < steps; k++) {
269 if (rank % p2k1 < p2k) {
271 sk = rk = rank + p2k;
278 m2s(val, sbuf, S[k], nS[k]);
279 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
280 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
282 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
283 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
284 s2m_sum(val, rbuf, R[k], nR[k]);
291 sk = rk = rank - p2k;
295 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
296 m2s(val, sbuf, S[k], nS[k]);
297 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
299 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
300 s2m_sum(val, rbuf, R[k], nR[k]);
302 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
void m2s(double *mapval, double *submapval, int *subset, int count)
void s2m_sum(double *mapval, double *submapval, int *subset, int count)
Sum submap values the submap values array.
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
int set_and(int *A1, int n1, int *A2, int n2, int *A1andA2)
int card_or(int *A1, int n1, int *A2, int n2)
int card_and(int *A1, int n1, int *A2, int n2)
void subset2map(int *A, int nA, int *subA, int nsubA)
int truebutterfly_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
Initialize tables for butterfly-like communication scheme (true means pair wise) This routine set up ...
int truebutterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme (tru...