MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
truebutterfly.c
Go to the documentation of this file.
1 
22 #ifdef W_MPI
23 #include <mpi.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 
51 int truebutterfly_init(int *indices, int count, int **R, int *nR, int **S,
52  int *nS, int **com_indices, int *com_count, int steps,
53  MPI_Comm comm) {
54 
55  int i, k, p2k, p2k1;
56  int rank, size, rk, sk;
57  int tag;
58  MPI_Request s_request, r_request;
59  int nbuf, *buf;
60  int **I, *nI;
61  int **J, *nJ;
62 
63  MPI_Comm_size(comm, &size);
64  MPI_Comm_rank(comm, &rank);
65 
66  I = (int **) malloc(steps * sizeof(int *));
67  nI = (int *) malloc(steps * sizeof(int));
68  tag = 0;
69  p2k = size / 2;
70  p2k1 = 2 * p2k;
71 
72  for (k = 0; k < steps;
73  k++) { // butterfly first pass : bottom up (fill tabs nI and I)
74 
75  if (rank % p2k1 < p2k) sk = rk = rank + p2k;
76  else
77  sk = rk = rank - p2k;
78 
79  if (k == 0) { // S^0 := A
80  nS[k] = count;
81  S[k] = (int *) malloc(nS[k] * sizeof(int));
82  memcpy(S[k], indices, nS[k] * sizeof(int));
83  } else { // S^k := S^{k-1} \cup R^{k-1}
84  nS[k] = card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
85  S[k] = (int *) malloc(nS[k] * sizeof(int));
86  set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
87  }
88 
89  MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
90  &r_request); // receive number of indices
91  MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
92  &s_request); // send number of indices
93  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
94  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
95 
96  I[steps - k - 1] = (int *) malloc(nI[steps - k - 1] * sizeof(int));
97 
98  tag++;
99  MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
100  &r_request); // receive indices
101  MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
102  &s_request); // send indices
103  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
104  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
105 
106  p2k /= 2;
107  p2k1 /= 2;
108  tag++;
109  }
110 
111  J = (int **) malloc(steps * sizeof(int *));
112  nJ = (int *) malloc(steps * sizeof(int));
113 
114  tag = 0;
115  p2k = 1;
116  p2k1 = p2k * 2;
117  for (k = 0; k < steps;
118  k++) { // buuterfly second pass : top down (fill tabs nJ and J)
119  free(S[k]);
120 
121  if (rank % p2k1 < p2k) sk = rk = rank + p2k;
122  else
123  sk = rk = rank - p2k;
124 
125  if (k == 0) {
126  nJ[k] = count;
127  J[k] = (int *) malloc(nJ[k] * sizeof(int));
128  memcpy(J[k], indices, nJ[k] * sizeof(int));
129  } else {
130  nJ[k] = card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
131  J[k] = (int *) malloc(nJ[k] * sizeof(int));
132  set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
133  J[k]); // J^k=R^k-1 \cup J^k-1
134  free(R[k - 1]);
135  }
136  if (k != steps - 1) {
137  MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
138  MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
139  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
140  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
141 
142  R[k] = (int *) malloc(nR[k] * sizeof(int));
143  tag++;
144 
145  MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
146  MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
147  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
148  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
149  }
150  p2k *= 2;
151  p2k1 *= 2;
152  tag++;
153  }
154 
155 
156  tag = 0;
157  p2k = 1;
158  p2k1 = p2k * 2;
159  for (k = 0; k < steps; k++) { // butterfly last pass : know that Sending tab
160  // is S = I \cap J, so send S and we'll get R
161 
162  if (rank % p2k1 < p2k) sk = rk = rank + p2k;
163  else
164  sk = rk = rank - p2k;
165 
166  nS[k] = card_and(I[k], nI[k], J[k], nJ[k]);
167  S[k] = (int *) malloc(nJ[k] * sizeof(int));
168  set_and(I[k], nI[k], J[k], nJ[k], S[k]); // S^k=I^k \cap J^k
169 
170  free(I[k]);
171  free(J[k]);
172 
173  MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
174  &r_request); // receive size
175  MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request); // send size
176  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
177  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
178 
179  R[k] = (int *) malloc(nR[k] * sizeof(int));
180  tag++;
181 
182  MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
183  &r_request); // receive indices
184  MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
185  &s_request); // send indices
186  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
187  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
188 
189  p2k *= 2;
190  p2k1 *= 2;
191  tag++;
192  }
193 
194  // Now we work locally
195  int **USR, *nUSR, **U, *nU;
196 
197  USR = (int **) malloc(steps * sizeof(int *));
198  nUSR = (int *) malloc(steps * sizeof(int));
199  U = (int **) malloc(steps * sizeof(int *));
200  nU = (int *) malloc(steps * sizeof(int));
201 
202  for (k = 0; k < steps; k++) {
203  nUSR[k] = card_or(S[k], nS[k], R[k], nR[k]);
204  USR[k] = (int *) malloc(nUSR[k] * sizeof(int));
205  set_or(S[k], nS[k], R[k], nR[k], USR[k]);
206  }
207  for (k = 0; k < steps; k++) {
208  if (k == 0) {
209  nU[k] = nUSR[k];
210  U[k] = (int *) malloc(nU[k] * sizeof(int));
211  memcpy(U[k], USR[k], nU[k] * sizeof(int));
212  } else {
213  nU[k] = card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
214  U[k] = (int *) malloc(nU[k] * sizeof(int *));
215  set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
216  }
217  }
218  *com_count = nU[steps - 1];
219  *com_indices = (int *) malloc(*com_count * sizeof(int));
220  memcpy(*com_indices, U[steps - 1], *com_count * sizeof(int));
221  //====================================================================
222 
223  for (k = 0; k < steps; k++) {
224  subset2map(*com_indices, *com_count, S[k], nS[k]);
225  subset2map(*com_indices, *com_count, R[k], nR[k]);
226  }
227  free(USR);
228  free(U);
229 
230  return 0;
231 }
232 
233 
248 int truebutterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS,
249  int nSmax, double *val, int steps, MPI_Comm comm) {
250  // double st, t;
251  // t=0.0;
252  int k, p2k, p2k1, tag;
253  int rank, size, rk, sk;
254  MPI_Status status;
255  MPI_Request s_request, r_request;
256  double *sbuf, *rbuf;
257 
258  MPI_Comm_size(comm, &size);
259  MPI_Comm_rank(comm, &rank);
260 
261  sbuf = (double *) malloc(nSmax * sizeof(double));
262  rbuf = (double *) malloc(nRmax * sizeof(double));
263  tag = 0;
264  p2k = 1;
265  p2k1 = p2k * 2;
266 
267  for (k = 0; k < steps; k++) {
268 
269  if (rank % p2k1 < p2k) {
270 
271  sk = rk = rank + p2k;
272 
273  // st=MPI_Wtime();
274 
275  // MPI_Sendrecv(sbuf, nS[k], MPI_DOUBLE, sk, tag, rbuf, nR[k],
276  // MPI_DOUBLE, rk, tag, comm, &status);
277 
278  m2s(val, sbuf, S[k], nS[k]); // fill the sending buffer
279  MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
280  MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
281 
282  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
283  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
284  s2m_sum(val, rbuf, R[k], nR[k]); // sum receive buffer into values
285 
286 
287  // t=t+MPI_Wtime()-st;
288 
289  } else {
290 
291  sk = rk = rank - p2k;
292 
293  // st=MPI_Wtime();
294 
295  MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
296  m2s(val, sbuf, S[k], nS[k]); // fill the sending buffer
297  MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
298 
299  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
300  s2m_sum(val, rbuf, R[k], nR[k]); // sum receive buffer into values
301 
302  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
303 
304  // MPI_Sendrecv(sbuf, nS[k], MPI_DOUBLE, sk, tag, rbuf, nR[k],
305  // MPI_DOUBLE, rk, tag, comm, &status);
306 
307  // t=t+MPI_Wtime()-st;
308  }
309 
310  p2k *= 2;
311  p2k1 *= 2;
312  tag++;
313  }
314  free(sbuf);
315  free(rbuf);
316  return 0;
317 }
318 
319 #endif
void m2s(double *mapval, double *submapval, int *subset, int count)
Definition: alm.c:27
void s2m_sum(double *mapval, double *submapval, int *subset, int count)
Sum submap values the submap values array.
Definition: alm.c:54
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
Definition: als.c:138
int set_and(int *A1, int n1, int *A2, int n2, int *A1andA2)
Definition: als.c:186
int card_or(int *A1, int n1, int *A2, int n2)
Definition: als.c:72
int card_and(int *A1, int n1, int *A2, int n2)
Definition: als.c:111
void subset2map(int *A, int nA, int *subA, int nsubA)
Definition: als.c:237
int truebutterfly_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
Initialize tables for butterfly-like communication scheme (true means pair wise) This routine set up ...
Definition: truebutterfly.c:51
int truebutterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme (tru...