48 int ring_init(
int *indices,
int count,
int **R,
int *nR,
int **S,
int *nS,
49 int steps, MPI_Comm comm) {
51 int size, rank, sp, rp;
53 MPI_Request s_request, r_request;
55 MPI_Comm_size(comm, &size);
56 MPI_Comm_rank(comm, &rank);
57 MPI_Allreduce(&count, &nbuf, 1, MPI_INT, MPI_MAX,
59 buf = (
int *) malloc(nbuf *
sizeof(
int));
61 for (p = 1; p < steps; p++) {
63 sp = (rank + p) % size;
64 rp = (rank + size - p) % size;
65 MPI_Isend(&count, 1, MPI_INT, sp, 0, comm,
67 MPI_Irecv(&nbuf, 1, MPI_INT, rp, 0, comm,
70 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
71 MPI_Irecv(buf, nbuf, MPI_INT, rp, tag, comm,
73 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
74 MPI_Isend(indices, count, MPI_INT, sp, tag, comm,
78 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
79 nR[p] =
card_and(indices, count, buf,
81 nS[steps - p] = nR[p];
82 R[p] = (
int *) malloc(nR[p] *
sizeof(
int));
83 S[steps - p] = (
int *) malloc(nS[steps - p]
85 map_and(indices, count, buf, nbuf, R[p]);
109 int ring_reduce(
int **R,
int *nR,
int nRmax,
int **S,
int *nS,
int nSmax,
110 double *val,
double *res_val,
int steps, MPI_Comm comm) {
111 int tag, rank, size, p;
112 MPI_Request s_request, r_request;
116 MPI_Comm_size(comm, &size);
117 MPI_Comm_rank(comm, &rank);
120 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
121 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
123 for (p = 1; p < steps; p++) {
124 rp = (rank + size - p) % size;
125 MPI_Irecv(rbuf, nR[p], MPI_DOUBLE, rp, tag, comm, &r_request);
126 sp = (rank + p) % size;
127 m2s(val, sbuf, S[p], nS[p]);
128 MPI_Isend(sbuf, nS[p], MPI_DOUBLE, sp, tag, comm, &s_request);
132 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
133 s2m_sum(res_val, rbuf, R[p], nR[p]);
135 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
158 double *val,
double *res_val,
int steps, MPI_Comm comm) {
160 MPI_Request s_request, r_request;
161 int sp, rp, *rindx, *sindx, *rdisp, *sdisp;
168 MPI_Comm_rank(comm, &rank);
170 rbuf = (
double *) malloc(nRtot *
sizeof(
double));
171 sbuf = (
double *) malloc(nStot *
sizeof(
double));
173 rindx = (
int *) calloc(size,
sizeof(
int));
174 sindx = (
int *) calloc(size,
sizeof(
int));
176 rdisp = (
int *) calloc(size,
sizeof(
int));
177 sdisp = (
int *) calloc(size,
sizeof(
int));
181 for (p = 0; p < steps; p++) {
182 rp = (rank + size - p) % size;
184 sp = (rank + p) % size;
188 for (p = 1; p < size; p++) {
189 sdisp[p] = sdisp[p - 1] + sindx[p - 1];
190 rdisp[p] = rdisp[p - 1] + rindx[p - 1];
195 for (p = 0; p < steps; p++) {
196 sp = (rank + p) % size;
197 m2s(val, &sbuf[sdisp[sp]], S[p], nS[p]);
200 MPI_Alltoallv(sbuf, sindx, sdisp, MPI_DOUBLE, rbuf, rindx, rdisp,
205 for (p = 0; p < steps; p++) {
206 rp = (rank + size - p) % size;
207 s2m_sum(res_val, &rbuf[rdisp[rp]], R[p],
235 double *res_val,
int steps, MPI_Comm comm) {
236 int tag, rank, size, p;
237 MPI_Request *s_request, *r_request;
239 double **sbuf, **rbuf;
241 MPI_Comm_size(comm, &size);
242 MPI_Comm_rank(comm, &rank);
245 s_request = (MPI_Request *) malloc((steps - 1) *
sizeof(MPI_Request));
246 r_request = (MPI_Request *) malloc((steps - 1) *
sizeof(MPI_Request));
248 rbuf = (
double **) malloc((steps - 1) *
sizeof(
double *));
249 sbuf = (
double **) malloc((steps - 1) *
sizeof(
double *));
251 for (p = 1; p < steps; p++) {
253 rbuf[p - 1] = (
double *) malloc(nR[p] *
sizeof(
double));
254 sbuf[p - 1] = (
double *) malloc(nS[p] *
sizeof(
double));
255 m2s(val, sbuf[p - 1], S[p], nS[p]);
259 for (p = 1; p < steps; p++) {
261 sp = (rank + p) % size;
262 rp = (rank + size - p) % size;
264 MPI_Irecv(rbuf[p - 1], nR[p], MPI_DOUBLE, rp, tag, comm,
266 MPI_Isend(sbuf[p - 1], nS[p], MPI_DOUBLE, sp, tag, comm,
270 MPI_Waitall(size - 1, r_request, MPI_STATUSES_IGNORE);
272 for (p = 1; p < steps; p++) {
273 s2m_sum(res_val, rbuf[p - 1], R[p],
276 MPI_Waitall(size - 1, s_request, MPI_STATUSES_IGNORE);
299 double *val,
double *res_val,
int steps,
301 int tag, rank, size, p;
302 MPI_Request *s_request, *r_request;
303 int sp, rp, nesi, neri;
304 double **sbuf, **rbuf;
306 MPI_Comm_size(comm, &size);
307 MPI_Comm_rank(comm, &rank);
310 s_request = (MPI_Request *) malloc(nneS *
sizeof(MPI_Request));
311 r_request = (MPI_Request *) malloc(nneR *
sizeof(MPI_Request));
313 rbuf = (
double **) malloc(nneR *
sizeof(
double *));
314 sbuf = (
double **) malloc(nneS *
sizeof(
double *));
317 for (p = 1; p < steps; p++) {
319 sbuf[nesi] = (
double *) malloc(nS[p] *
sizeof(
double));
320 m2s(val, sbuf[nesi], S[p], nS[p]);
328 for (p = 1; p < steps; p++) {
329 sp = (rank + p) % size;
330 rp = (rank + size - p) % size;
332 rbuf[neri] = (
double *) malloc(nR[p] *
sizeof(
double));
333 MPI_Irecv(rbuf[neri], nR[p], MPI_DOUBLE, rp, tag, comm,
338 MPI_Isend(sbuf[nesi], nS[p], MPI_DOUBLE, sp, tag, comm,
344 MPI_Waitall(nneR, r_request, MPI_STATUSES_IGNORE);
347 for (p = 1; p < steps; p++) {
349 s2m_sum(res_val, rbuf[neri], R[p],
354 MPI_Waitall(nneS, s_request, MPI_STATUSES_IGNORE);
381 int nSmax,
double *val,
double *res_val,
int steps,
383 int tag, rank, size, p;
384 MPI_Request s_request, r_request;
388 MPI_Comm_size(comm, &size);
389 MPI_Comm_rank(comm, &rank);
392 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
393 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
395 for (p = 1; p < steps; p++) {
396 rp = (rank + size - p) % size;
398 MPI_Irecv(rbuf, nR[p], MPI_DOUBLE, rp, tag, comm, &r_request);
399 sp = (rank + p) % size;
401 m2s(val, sbuf, S[p], nS[p]);
402 MPI_Isend(sbuf, nS[p], MPI_DOUBLE, sp, tag, comm, &s_request);
407 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
411 if (nS[p] != 0) MPI_Wait(&s_request, MPI_STATUS_IGNORE);
void m2s(double *mapval, double *submapval, int *subset, int count)
void s2m_sum(double *mapval, double *submapval, int *subset, int count)
Sum submap values the submap values array.
int card_and(int *A1, int n1, int *A2, int n2)
int map_and(int *A1, int n1, int *A2, int n2, int *mapA1andA2)
Compute map A1 and A2 / A1.
int ring_noempty_step_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like communication scheme.
int alltoallv_reduce(int **R, int *nR, int nRtot, int **S, int *nS, int nStot, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using an MPI-Alltoallv call.
int ring_nonblocking_reduce(int **R, int *nR, int **S, int *nS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking communication sch...
int ring_noempty_reduce(int **R, int *nR, int nneR, int **S, int *nS, int nneS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking no-empty communic...
int ring_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int steps, MPI_Comm comm)
Initialize tables for ring-like communication scheme.
int ring_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like communication scheme.