MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
butterfly_new.c
Go to the documentation of this file.
1 
2 #include "math.h"
3 #include "mpi.h"
4 #include "stdio.h"
5 #include "stdlib.h"
6 #include "sys/param.h"
7 #include "sys/stat.h"
8 #include "sys/types.h"
9 #include "time.h"
10 
11 int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax,
12  double *val, int steps, MPI_Comm comm);
13 int butterfly_reshuffle(int **R, int *nR, int nRmax, int **S, int *nS,
14  int nSmax, double *val, int steps, MPI_Comm comm);
15 int butterfly_reduce_init(int *indices, int count, int **R, int *nR, int **S,
16  int *nS, int **com_indices, int *com_count, int steps,
17  MPI_Comm comm);
18 int butterfly_reshuffle_init(int *indices_in, int count_in, int *indices_out,
19  int count_out, int **R, int *nR, int **S, int *nS,
20  int **com_indices, int *com_count, int steps,
21  MPI_Comm comm);
22 int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2);
23 int set_and(int *A1, int n1, int *A2, int n2, int *A1andA2);
24 int card_or(int *A1, int n1, int *A2, int n2);
25 void m2s(double *mapval, double *submapval, int *subset, int count);
26 void subset2map(int *A, int nA, int *subA, int nsubA);
27 void s2m_sum(double *mapval, double *submapval, int *subset, int count);
28 void s2m_copy(double *mapval, double *submapval, int *subset, int count);
29 
30 int butterfly_reduce_init(int *indices, int count, int **R, int *nR, int **S,
31  int *nS, int **com_indices, int *com_count, int steps,
32  MPI_Comm comm) {
33 
34  /* initializes butterfly communication where the pixels distribution
35  * does not change at the outset of the process * This is the original
36  * MIDAPACK routine by Pierre Cargemel ... - rs 2022/06/10 */
37 
38  int i, k, p2k;
39  int rank, size, rk, sk;
40  int tag;
41  MPI_Request s_request, r_request;
42  int nbuf, *buf;
43  int **I, *nI;
44  int **J, *nJ;
45 
46  MPI_Comm_size(comm, &size);
47  MPI_Comm_rank(comm, &rank);
48 
49  I = (int **) malloc(steps * sizeof(int *));
50  nI = (int *) malloc(steps * sizeof(int));
51  tag = 0;
52  p2k = size / 2;
53 
54  for (k = 0; k < steps;
55  k++) { /* butterfly first pass : bottom up (fill tabs nI and I) */
56  sk = (rank + size - p2k) % size;
57  rk = (rank + p2k) % size;
58 
59  if (k == 0) { /* S^0 := A */
60  nS[k] = count; /* NEEDS TO BE MODIFIED with "final" number of
61  pixels */
62  S[k] = (int *) malloc(nS[k] * sizeof(int));
63  memcpy(S[k], indices,
64  nS[k] * sizeof(int)); /* copy *my* pixel indices to the
65  send (really receive
66  ?!) buffer NEEDS TO BE MODIFIED
67  with final pix numbers ?! */
68  } else { /* S^k := S^{k-1} \cup R^{k-1} */
69  nS[k] = card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
70  S[k] = (int *) malloc(nS[k] * sizeof(int));
71  set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
72  }
73 
74  MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
75  &r_request); /* receive number of indices */
76  MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
77  &s_request); /* send number of indices */
78  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
79  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
80 
81  I[steps - k - 1] = (int *) malloc(nI[steps - k - 1] * sizeof(int));
82 
83  tag++;
84  MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag, comm,
85  &r_request); /* receive indices */
86  MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
87  &s_request); /* send indices */
88  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
89  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
90 
91  p2k /= 2;
92  tag++;
93  }
94 
95  J = (int **) malloc(steps * sizeof(int *));
96  nJ = (int *) malloc(steps * sizeof(int));
97 
98  tag = 0;
99  p2k = 1;
100  for (k = 0; k < steps;
101  k++) { /* buuterfly second pass : top down (fill tabs nJ and J) */
102  free(S[k]);
103  sk = (rank + p2k) % size;
104  rk = (rank + size - p2k) % size;
105  if (k == 0) {
106  nJ[k] = count;
107  J[k] = (int *) malloc(nJ[k] * sizeof(int));
108  memcpy(J[k], indices, nJ[k] * sizeof(int));
109  } else {
110  nJ[k] = card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
111  J[k] = (int *) malloc(nJ[k] * sizeof(int));
112  set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
113  J[k]); /* J^k=R^k-1 \cup J^k-1 */
114  free(R[k - 1]);
115  }
116  if (k != steps - 1) {
117  MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
118  MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
119  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
120  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
121 
122  R[k] = (int *) malloc(nR[k] * sizeof(int));
123  tag++;
124 
125  MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
126  MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
127  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
128  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
129  }
130  p2k *= 2;
131  tag++;
132  }
133 
134 
135  tag = 0;
136  p2k = 1;
137  for (k = 0; k < steps;
138  k++) { /* butterfly last pass : know that Sending tab is S = I \cap
139  J, so send S and we'll get R */
140  sk = (rank + p2k) % size;
141  rk = (rank + size - p2k) % size;
142 
143  nS[k] = card_and(I[k], nI[k], J[k], nJ[k]);
144  S[k] = (int *) malloc(nJ[k] * sizeof(int));
145  set_and(I[k], nI[k], J[k], nJ[k], S[k]); /* S^k=I^k \cap J^k */
146 
147  free(I[k]);
148  free(J[k]);
149 
150  MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
151  &r_request); /* receive size */
152  MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
153  &s_request); /* send size */
154  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
155  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
156 
157  R[k] = (int *) malloc(nR[k] * sizeof(int));
158  tag++;
159 
160  MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
161  &r_request); /* receive indices */
162  MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
163  &s_request); /* send indices */
164  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
165  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
166 
167  p2k *= 2;
168  tag++;
169  }
170 
171  /* Now we work locally */
172  int **USR, *nUSR, **U, *nU;
173 
174  USR = (int **) malloc(steps * sizeof(int *));
175  nUSR = (int *) malloc(steps * sizeof(int));
176  U = (int **) malloc(steps * sizeof(int *));
177  nU = (int *) malloc(steps * sizeof(int));
178 
179  for (k = 0; k < steps; k++) {
180  nUSR[k] = card_or(S[k], nS[k], R[k], nR[k]);
181  USR[k] = (int *) malloc(nUSR[k] * sizeof(int));
182  set_or(S[k], nS[k], R[k], nR[k], USR[k]);
183  }
184  for (k = 0; k < steps; k++) {
185  if (k == 0) {
186  nU[k] = nUSR[k];
187  U[k] = (int *) malloc(nU[k] * sizeof(int));
188  memcpy(U[k], USR[k], nU[k] * sizeof(int));
189  } else {
190  nU[k] = card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
191  U[k] = (int *) malloc(nU[k] * sizeof(int *));
192  set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
193  }
194  }
195  *com_count = nU[steps - 1];
196  *com_indices = (int *) malloc(*com_count * sizeof(int));
197  memcpy(*com_indices, U[steps - 1], *com_count * sizeof(int));
198  /* ====================================================================
199  */
200 
201  for (k = 0; k < steps; k++) {
202  subset2map(*com_indices, *com_count, S[k], nS[k]);
203  subset2map(*com_indices, *com_count, R[k], nR[k]);
204  }
205  free(USR);
206  free(U);
207 
208  return 0;
209 }
210 
211 int butterfly_reshuffle_init(int *indices_in, int count_in, int *indices_out,
212  int count_out, int **R, int *nR, int **S, int *nS,
213  int **com_indices, int *com_count, int steps,
214  MPI_Comm comm) {
215 
216  /* initializes butterfly communication where the pixels distributions
217  * prior to and after it may be different. * The routine
218  * explicitly allows for some of the MPI processes to have no data
219  * either prior or after. * This is a recoding of the
220  * original MIDAPACK routine by Pierre Cargemel (called now
221  * butterfly_reduce_init * which should be equivalent to if the
222  * input and output distribution of the pixels coincide. - rs 2022/06/10
223  */
224 
225 
226  int i, k, p2k;
227  int rank, size, rk, sk;
228  int tag;
229  MPI_Request s_request, r_request;
230  int nbuf, *buf;
231  int **I, *nI;
232  int **J, *nJ;
233 
234  MPI_Comm_size(comm, &size);
235  MPI_Comm_rank(comm, &rank);
236 
237  I = (int **) malloc(steps * sizeof(int *));
238  nI = (int *) malloc(steps * sizeof(int));
239  tag = 0;
240  p2k = size / 2;
241 
242  for (k = 0; k < steps;
243  k++) { /* butterfly first pass : bottom up (fill tabs nI and I) */
244  sk = (rank + size - p2k) % size;
245  rk = (rank + p2k) % size;
246 
247  if (k == 0) { /* S^0 := A */
248  nS[k] = count_out;
249  if (nS[k]) { /* do not allocate zero length objects */
250  S[k] = (int *) malloc(nS[k] * sizeof(int));
251  memcpy(S[k], indices_out, nS[k] * sizeof(int));
252  }
253  } else { /* S^k := S^{k-1} \cup R^{k-1} */
254  nS[k] = card_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k]);
255  if (nS[k]) {
256  S[k] = (int *) malloc(nS[k] * sizeof(int));
257  set_or(S[k - 1], nS[k - 1], I[steps - k], nI[steps - k], S[k]);
258  }
259  }
260 
261  MPI_Irecv(&nI[steps - k - 1], 1, MPI_INT, rk, tag, comm,
262  &r_request); /* receive number of indices */
263  MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
264  &s_request); /* send number of indices */
265  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
266  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
267 
268  if (nI[steps - k - 1])
269  I[steps - k - 1] = (int *) malloc(nI[steps - k - 1] * sizeof(int));
270 
271  tag++;
272  if (nI[steps - k - 1])
273  MPI_Irecv(I[steps - k - 1], nI[steps - k - 1], MPI_INT, rk, tag,
274  comm, &r_request); /* receive indices */
275  if (nS[k])
276  MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
277  &s_request); /* send indices */
278  if (nI[steps - k - 1]) MPI_Wait(&r_request, MPI_STATUS_IGNORE);
279  if (nS[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
280 
281  p2k /= 2;
282  tag++;
283  }
284 
285  J = (int **) malloc(steps * sizeof(int *));
286  nJ = (int *) malloc(steps * sizeof(int));
287 
288  tag = 0;
289  p2k = 1;
290  for (k = 0; k < steps;
291  k++) { /* buuterfly second pass : top down (fill tabs nJ and J) */
292  free(S[k]);
293  sk = (rank + p2k) % size;
294  rk = (rank + size - p2k) % size;
295  if (k == 0) {
296  nJ[k] = count_in;
297  if (nJ[k]) {
298  J[k] = (int *) malloc(nJ[k] * sizeof(int));
299  memcpy(J[k], indices_in, nJ[k] * sizeof(int));
300  }
301  } else {
302  nJ[k] = card_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1]);
303  if (nJ[k]) {
304  J[k] = (int *) malloc(nJ[k] * sizeof(int));
305  set_or(J[k - 1], nJ[k - 1], R[k - 1], nR[k - 1],
306  J[k]); /* J^k=R^k-1 \cup J^k-1 */
307  }
308  free(R[k - 1]);
309  }
310  if (k != steps - 1) {
311  MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
312  MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
313  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
314  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
315 
316  if (nR[k]) R[k] = (int *) malloc(nR[k] * sizeof(int));
317  tag++;
318 
319  if (nR[k])
320  MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
321  if (nJ[k])
322  MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
323  if (nR[k]) MPI_Wait(&r_request, MPI_STATUS_IGNORE);
324  if (nJ[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
325  }
326  p2k *= 2;
327  tag++;
328  }
329 
330 
331  tag = 0;
332  p2k = 1;
333  for (k = 0; k < steps;
334  k++) { /* butterfly last pass : know that Sending tab is S = I \cap
335  J, so send S and we'll get R */
336  sk = (rank + p2k) % size;
337  rk = (rank + size - p2k) % size;
338 
339  nS[k] = card_and(I[k], nI[k], J[k], nJ[k]);
340  if (nS[k]) {
341  S[k] = (int *) malloc(nJ[k] * sizeof(int));
342  set_and(I[k], nI[k], J[k], nJ[k], S[k]); /* S^k=I^k \cap J^k */
343  }
344 
345  if (nI[k]) free(I[k]);
346  if (nJ[k]) free(J[k]);
347 
348  MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm,
349  &r_request); /* receive size */
350  MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm,
351  &s_request); /* send size */
352  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
353  MPI_Wait(&s_request, MPI_STATUS_IGNORE);
354 
355  if (nR[k]) R[k] = (int *) malloc(nR[k] * sizeof(int));
356  tag++;
357 
358  if (nR[k])
359  MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm,
360  &r_request); /* receive indices */
361  if (nS[k])
362  MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm,
363  &s_request); /* send indices */
364  if (nR[k]) MPI_Wait(&r_request, MPI_STATUS_IGNORE);
365  if (nS[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
366 
367  p2k *= 2;
368  tag++;
369  }
370 
371  /* Now we work locally */
372  int **USR, *nUSR, **U, *nU;
373 
374  USR = (int **) malloc(steps * sizeof(int *));
375  nUSR = (int *) malloc(steps * sizeof(int));
376  U = (int **) malloc(steps * sizeof(int *));
377  nU = (int *) malloc(steps * sizeof(int));
378 
379  for (k = 0; k < steps; k++) {
380  nUSR[k] = card_or(S[k], nS[k], R[k], nR[k]);
381  if (nUSR[k]) {
382  USR[k] = (int *) malloc(nUSR[k] * sizeof(int));
383  set_or(S[k], nS[k], R[k], nR[k], USR[k]);
384  }
385  }
386  for (k = 0; k < steps; k++) {
387  if (k == 0) {
388  nU[k] = nUSR[k];
389  if (nU[k]) {
390  U[k] = (int *) malloc(nU[k] * sizeof(int));
391  memcpy(U[k], USR[k], nU[k] * sizeof(int));
392  }
393  } else {
394  nU[k] = card_or(U[k - 1], nU[k - 1], USR[k], nUSR[k]);
395  if (nU[k]) {
396  U[k] = (int *) malloc(nU[k] * sizeof(int *));
397  set_or(U[k - 1], nU[k - 1], USR[k], nUSR[k], U[k]);
398  }
399  }
400  }
401  *com_count = nU[steps - 1];
402  if (*com_count) {
403  *com_indices = (int *) malloc(*com_count * sizeof(int));
404  memcpy(*com_indices, U[steps - 1],
405  *com_count * sizeof(int)); /* the full set of pixel indices dealt
406  with by this proc at some point */
407  }
408  /* ====================================================================
409  */
410 
411  /* makes the indices in S and R relative to those stored in full set of
412  * pixels processed by this proc */
413 
414  if (*com_count) {
415  for (k = 0; k < steps; k++) {
416  if (nS[k]) subset2map(*com_indices, *com_count, S[k], nS[k]);
417  if (nR[k]) subset2map(*com_indices, *com_count, R[k], nR[k]);
418  }
419  }
420  free(USR);
421  free(U);
422 
423  return 0;
424 }
425 
426 int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax,
427  double *val, int steps, MPI_Comm comm) {
428  /* double st, t; */
429  /* t=0.0; */
430 
431  /* this performs the butterfly all reduce as in the original routine of
432  * MIDAPACK butterfly() by P. Cargemel but allows for MPI * process
433  * which have no data at the begining or at the end of the
434  * communication. * This
435  * routine needs to be preceded by a call to either
436  * butterfly_reduce_init() if the pixel distributions prior and after
437  * * communication are the same or butterfly_reshuffle_init() if they
438  * are not. - rs 2022/06/09 */
439 
440  int k, p2k, tag;
441  int rank, size, rk, sk;
442  MPI_Request s_request, r_request;
443  double *sbuf, *rbuf;
444 
445  MPI_Comm_size(comm, &size);
446  MPI_Comm_rank(comm, &rank);
447 
448  sbuf = (double *) malloc(nSmax * sizeof(double));
449  rbuf = (double *) malloc(nRmax * sizeof(double));
450  tag = 0;
451  p2k = 1;
452 
453  for (k = 0; k < steps; k++) {
454  /* st=MPI_Wtime(); */
455  rk = (rank + size - p2k) % size;
456  if (nR[k])
457  MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
458  sk = (rank + p2k) % size;
459  if (nS[k]) {
460  m2s(val, sbuf, S[k], nS[k]); /* fill the sending buffer */
461  MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
462  }
463  if (nR[k]) {
464  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
465  s2m_sum(val, rbuf, R[k], nR[k]); /* sum receive buffer into
466  values nR[k] floating sum */
467  }
468  p2k *= 2;
469  tag++;
470  if (nS[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
471  /* t=t+MPI_Wtime()-st; */
472  }
473  free(sbuf);
474  free(rbuf);
475  return 0;
476 }
477 
478 int butterfly_reshuffle(int **R, int *nR, int nRmax, int **S, int *nS,
479  int nSmax, double *val, int steps, MPI_Comm comm) {
480  /* double st, t; */
481  /* t=0.0; */
482 
483  /* this performs the butterfly reshuffle of all the pixels without
484  * modifying the values assigned to the pixels. In the case of the
485  * redundant pixels * distribution in the input it is assumed that it is
486  * consistent between different MPI process. This is not checked by the
487  * routine! In such a case * the routine may not be optimal either. It
488  * should be however if the initial distribution is without any overlaps
489  * beyween the processes. The * allows for some of the mprocess to have
490  * no data either on the input or output. * This routine is a recoded
491  * MIDAPACK routine, butterfly(), by P. Cargamel. * This routine needs
492  * to be preceded by a call to butterfly_reshuffle_init(). - rs
493  * 2022/06/09 */
494 
495  int k, p2k, tag;
496  int rank, size, rk, sk;
497  MPI_Request s_request, r_request;
498  double *sbuf, *rbuf;
499 
500  MPI_Comm_size(comm, &size);
501  MPI_Comm_rank(comm, &rank);
502 
503  sbuf = (double *) malloc(nSmax * sizeof(double));
504  rbuf = (double *) malloc(nRmax * sizeof(double));
505  tag = 0;
506  p2k = 1;
507 
508  for (k = 0; k < steps; k++) {
509  /* st=MPI_Wtime(); */
510  rk = (rank + size - p2k) % size;
511  if (nR[k])
512  MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
513  sk = (rank + p2k) % size;
514  if (nS[k]) {
515  m2s(val, sbuf, S[k], nS[k]); /* fill the sending buffer */
516  MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
517  }
518  if (nR[k]) {
519  MPI_Wait(&r_request, MPI_STATUS_IGNORE);
520  s2m_copy(val, rbuf, R[k], nR[k]); /* sum receive buffer into values
521  nR[k] floating sum */
522  }
523  p2k *= 2;
524  tag++;
525  if (nS[k]) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
526  /* t=t+MPI_Wtime()-st; */
527  }
528  free(sbuf);
529  free(rbuf);
530  return 0;
531 }
532 
533 void m2s(double *mapval, double *submapval, int *subset, int count) {
534  int i;
535 
536 
537  for (i = 0; i < count; i++) { submapval[i] = mapval[subset[i]]; }
538 }
539 
540 void s2m_sum(double *mapval, double *submapval, int *subset, int count) {
541  int i;
542 
543  for (i = 0; i < count; i++) { mapval[subset[i]] += submapval[i]; }
544 }
545 
546 void s2m_copy(double *mapval, double *submapval, int *subset, int count) {
547 
548  int i;
549 
550  for (i = 0; i < count; i++) { mapval[subset[i]] = submapval[i]; }
551 
552  int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2) {
553 
554  /* added cases when either n1 or n2 are zero. One of which *has to*
555  * be nonzero. - rs 2022/06/09 */
556 
557  int i = 0, j = 0, k = 0;
558 
559  if (n1 && n2) {
560  while (i < n1 || j < n2) {
561  if (A1[i] < A2[j]) {
562  if (i < n1) {
563  A1orA2[k] = A1[i];
564  i++;
565  } else {
566  A1orA2[k] = A2[j];
567  j++;
568  }
569  } else if (A1[i] > A2[j]) {
570  if (j < n2) {
571  A1orA2[k] = A2[j];
572  j++;
573  } else {
574  A1orA2[k] = A1[i];
575  i++;
576  }
577  } else {
578  A1orA2[k] = A1[i];
579  i++;
580  j++;
581  }
582  k++;
583  }
584  } else {
585  if (n1 == 0) {
586  for (j = 0; j < n2; j++) A1orA2[j] = A2[j];
587  k = n2;
588  } else {
589  for (i = 0; i < n1; i++) A1orA2[i] = A1[i];
590  k = n1;
591  }
592  }
593 
594  return k;
595  }
596 
597  int set_and(int *A1, int n1, int *A2, int n2,
598  int *A1andA2) { /* this one is only called if the result is
599  a non-empty set so both n1 and n2 have to
600  be non-zero */
601  int i = 0, j = 0, k = 0;
602  while (i < n1 && j < n2) {
603  if (A1[i] < A2[j]) {
604  i++;
605  } else if (A1[i] > A2[j]) {
606  j++;
607  } else {
608  A1andA2[k] = A1[i];
609  k++;
610  i++;
611  j++;
612  }
613  }
614  return k;
615  }
616 
617  int card_or(int *A1, int n1, int *A2, int n2) {
618 
619  /* acounts for cases with either n1 or n2 or both equal to zero - rs
620  * 2022/06/09 */
621 
622  int i = 0, j = 0, k = 0;
623 
624  if (n1 && n2) {
625  while (i < n1 || j < n2) {
626  if (A1[i] < A2[j]) {
627  if (i < n1) {
628  i++;
629  } else {
630  j++;
631  }
632  } else if (A1[i] > A2[j]) {
633  if (j < n2) {
634  j++;
635  } else {
636  i++;
637  }
638  } else {
639  if (i < n1) { i++; }
640  if (j < n2) { j++; }
641  }
642  k++;
643  }
644  } else {
645  k = (n1 == 0) ? k = n2 : k = n1;
646  }
647  return k;
648  }
649 
650  void subset2map(int *A, int nA, int *subA, int nsubA) {
651  int i = 0, j = 0;
652  while (i < nA && j < nsubA) {
653  if (A[i] < subA[j]) {
654  i++;
655  } else {
656  subA[j] = i;
657  i++;
658  j++;
659  }
660  }
661  }
int card_and(int *A1, int n1, int *A2, int n2)
Definition: als.c:111
void m2s(double *mapval, double *submapval, int *subset, int count)
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
Definition: als.c:138
int set_and(int *A1, int n1, int *A2, int n2, int *A1andA2)
Definition: als.c:186
int card_or(int *A1, int n1, int *A2, int n2)
Definition: als.c:72
int butterfly_reshuffle(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
void s2m_copy(double *mapval, double *submapval, int *subset, int count)
void s2m_sum(double *mapval, double *submapval, int *subset, int count)
void subset2map(int *A, int nA, int *subA, int nsubA)
Definition: als.c:237
int butterfly_reduce_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
Definition: butterfly_new.c:30
int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
int butterfly_reshuffle_init(int *indices_in, int count_in, int *indices_out, int count_out, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)