54 int MatInit(Mat *A,
int m,
int nnz,
int *indices,
double *values,
int flag
108 int i = 0, size, rank;
109 double maxSizeR = 0.0;
110 double maxSizeS = 0.0;
111 double amountSizeR = 0.0;
112 double amountSizeS = 0.0;
113 double stepSum = 0.0, stepAvg = 0.0;
115 double *amountSizeByStep = NULL;
116 double minStep = 0.0, maxStep = 0.0;
119 MPI_Comm comm = MPI_COMM_WORLD;
120 MPI_Comm_rank(comm, &rank);
121 MPI_Comm_size(comm, &size);
122 s = (
double *) malloc(4 *
sizeof(
double));
123 r = (
double *) malloc(4 * 3 *
sizeof(
double));
124 amountSizeByStep = (
double *) malloc(A->steps *
sizeof(
double));
129 for (i = 0; i < A->steps; i++) {
130 amountSizeR += A->nR[i];
131 amountSizeS += A->nS[i];
132 if (A->nR[i] > maxSizeR) maxSizeR = A->nR[i];
133 if (A->nS[i] > maxSizeS) maxSizeS = A->nS[i];
138 case BUTTERFLY_BLOCKING_1:
139 for (i = 0; i < A->steps; i++) {
140 amountSizeR += A->nR[i];
141 amountSizeS += A->nS[i];
142 if (A->nR[i] > maxSizeR) maxSizeR = A->nR[i];
143 if (A->nS[i] > maxSizeS) maxSizeS = A->nS[i];
146 case BUTTERFLY_BLOCKING_2:
147 for (i = 0; i < A->steps; i++) {
148 amountSizeR += A->nR[i];
149 amountSizeS += A->nS[i];
150 if (A->nR[i] > maxSizeR) maxSizeR = A->nR[i];
151 if (A->nS[i] > maxSizeS) maxSizeS = A->nS[i];
154 case NOEMPTYSTEPRING:
155 for (i = 0; i < A->steps; i++) {
156 amountSizeR += A->nR[i];
157 amountSizeS += A->nS[i];
158 if (A->nR[i] > maxSizeR) maxSizeR = A->nR[i];
159 if (A->nS[i] > maxSizeS) maxSizeS = A->nS[i];
164 for (i = 0; i < A->steps; i++) {
165 amountSizeR += A->nR[i];
166 amountSizeS += A->nS[i];
167 if (A->nR[i] > maxSizeR) maxSizeR = A->nR[i];
168 if (A->nS[i] > maxSizeS) maxSizeS = A->nS[i];
172 for (i = 0; i < A->steps; i++) {
173 amountSizeR += A->nR[i];
174 amountSizeS += A->nS[i];
175 if (A->nR[i] > maxSizeR) maxSizeR = A->nR[i];
176 if (A->nS[i] > maxSizeS) maxSizeS = A->nS[i];
180 for (i = 0; i < A->steps; i++) {
181 amountSizeR += A->nR[i];
182 amountSizeS += A->nS[i];
183 if (A->nR[i] > maxSizeR) maxSizeR = A->nR[i];
184 if (A->nS[i] > maxSizeS) maxSizeS = A->nS[i];
188 for (i = 0; i < A->steps; i++) {
189 amountSizeR += A->nR[i];
190 amountSizeS += A->nS[i];
194 amountSizeR = A->com_count;
195 amountSizeS = A->com_count;
196 maxSizeR = A->com_count;
197 maxSizeS = A->com_count;
201 if (A->flag != ALLREDUCE && A->flag != ALLTOALLV) {
204 t = (
double *) malloc(A->steps *
sizeof(
double));
206 for (i = 0; i < A->steps; i++) t[i] = A->nS[i];
208 MPI_Reduce(t, amountSizeByStep, A->steps, MPI_DOUBLE, MPI_SUM, 0, comm);
213 stepSum = minStep = maxStep = amountSizeByStep[0];
214 printf(
"\n[MEMORY]Step n°%4d, message size : %e", 0,
215 amountSizeByStep[0]);
216 for (i = 1; i < A->steps; i++) {
217 printf(
"\n[MEMORY]Step n°%4d, message size : %e", i,
218 amountSizeByStep[i]);
219 if (minStep > amountSizeByStep[i])
220 minStep = amountSizeByStep[i];
221 else if (maxStep < amountSizeByStep[i])
222 maxStep = amountSizeByStep[i];
223 stepSum += amountSizeByStep[i];
225 stepAvg = stepSum / A->steps;
232 MPI_Reduce(s, r, 4, MPI_DOUBLE, MPI_SUM, 0, comm);
234 for (i = 0; i < 4; i++) r[i] /= size;
235 MPI_Reduce(s, &r[4], 4, MPI_DOUBLE, MPI_MIN, 0, comm);
236 MPI_Reduce(s, &r[8], 4, MPI_DOUBLE, MPI_MAX, 0, comm);
238 printf(
"\n[MEMORY]Step average : %e\t[%e,%e]", stepAvg,
240 printf(
"\n[MEMORY]Amount of data received : %e\t[%e,%e]", r[0], r[4],
242 printf(
"\n[MEMORY]Amount of data sent : %e\t[%e,%e]", r[1], r[5],
244 printf(
"\n[MEMORY]Message size received : %e\t[%e,%e]", r[2], r[6],
246 printf(
"\n[MEMORY]Message size sent : %e\t[%e,%e]\n", r[3], r[7],
251 free(amountSizeByStep);
266 case BUTTERFLY_BLOCKING_1:
272 case BUTTERFLY_BLOCKING_2:
278 case NOEMPTYSTEPRING:
334 free(A->com_indices);
342 case BUTTERFLY_BLOCKING_1:
343 free(A->com_indices);
349 case BUTTERFLY_BLOCKING_2:
350 free(A->com_indices);
356 case NOEMPTYSTEPRING:
388 free(A->com_indices);
418 MPI_Comm_rank(mat->comm, &rank);
425 sprintf(fn,
"%s_%d.dat", filename, rank);
429 printf(
"cannot open file %s", fn);
432 while (feof(in) == 0 && i < (mat->m * mat->nnz)) {
434 fscanf(in,
"%d %lf", &(mat->indices[i]), &(mat->values[i]));
435 }
else if (mat->nnz == 2) {
436 fscanf(in,
"%d %lf %d %lf", &(mat->indices[i]), &(mat->values[i]),
437 &(mat->indices[i + 1]), &(mat->values[i + 1]));
443 if (i != mat->m * mat->nnz) { printf(
"WARNNING data size doesn't fit\n"); }
466 MPI_Comm_rank(mat->comm, &rank);
468 sprintf(fn,
"%s_%d.dat", filename, rank);
470 out = fopen(fn,
"w");
472 printf(
"cannot open file %s", fn);
475 for (i = 0; i < (mat->nnz * mat->m); i += mat->nnz) {
476 for (j = 0; j < mat->nnz; j++) {
477 fprintf(out,
"%d ", mat->indices[i + j]);
478 fprintf(out,
"%f ", mat->values[i + j]);
499 tmp_indices = (
int *) malloc(
500 (int64_t) (A->m) * A->nnz
502 memcpy(tmp_indices, A->indices,
503 (int64_t) (A->m) * A->nnz *
sizeof(
int));
507 A->lcount =
ssort(tmp_indices, A->m * A->nnz,
510 A->lindices = (
int *) malloc(A->lcount *
sizeof(
int));
511 memcpy(A->lindices, tmp_indices,
512 A->lcount *
sizeof(
int));
515 sindex(A->lindices, A->lcount, A->indices, A->nnz * A->m);
518 if (A->lindices[0] < 0) { A->trash_pix = 1; }
535 MPI_Comm_size(A->comm, &size);
536 if ((A->flag == BUTTERFLY || A->flag == BUTTERFLY_BLOCKING_1
537 || A->flag == BUTTERFLY_BLOCKING_2)
542 A->steps =
log_2(size);
543 A->S = (
int **) malloc(
544 A->steps *
sizeof(
int *));
545 A->R = (
int **) malloc(
546 A->steps *
sizeof(
int *));
547 A->nS = (
int *) malloc(
548 A->steps *
sizeof(
int));
549 A->nR = (
int *) malloc(
550 A->steps *
sizeof(
int));
552 A->lcount - (A->nnz) * (A->trash_pix), A->R, A->nR,
553 A->S, A->nS, &(A->com_indices), &(A->com_count),
558 case BUTTERFLY_BLOCKING_1:
559 A->steps =
log_2(size);
560 A->S = (
int **) malloc(
561 A->steps *
sizeof(
int *));
562 A->R = (
int **) malloc(
563 A->steps *
sizeof(
int *));
564 A->nS = (
int *) malloc(
565 A->steps *
sizeof(
int));
566 A->nR = (
int *) malloc(
567 A->steps *
sizeof(
int));
569 A->lcount - (A->nnz) * (A->trash_pix), A->R, A->nR,
570 A->S, A->nS, &(A->com_indices), &(A->com_count),
573 case BUTTERFLY_BLOCKING_2:
574 A->steps =
log_2(size);
575 A->S = (
int **) malloc(
576 A->steps *
sizeof(
int *));
577 A->R = (
int **) malloc(
578 A->steps *
sizeof(
int *));
579 A->nS = (
int *) malloc(
580 A->steps *
sizeof(
int));
581 A->nR = (
int *) malloc(
582 A->steps *
sizeof(
int));
584 A->lcount - (A->nnz) * (A->trash_pix), A->R, A->nR,
585 A->S, A->nS, &(A->com_indices), &(A->com_count),
588 case NOEMPTYSTEPRING:
590 A->S = (
int **) malloc(
591 A->steps *
sizeof(
int *));
592 A->R = (
int **) malloc(
593 A->steps *
sizeof(
int *));
594 A->nS = (
int *) malloc(
595 A->steps *
sizeof(
int));
596 A->nR = (
int *) malloc(
597 A->steps *
sizeof(
int));
598 ring_init(A->lindices + (A->nnz) * (A->trash_pix),
599 A->lcount - (A->nnz) * (A->trash_pix), A->R, A->nR, A->S,
600 A->nS, A->steps, A->comm);
601 A->com_count = A->lcount - (A->nnz) * (A->trash_pix);
602 A->com_indices = A->lindices + (A->nnz) * (A->trash_pix);
607 A->S = (
int **) malloc(
608 A->steps *
sizeof(
int *));
609 A->R = (
int **) malloc(
610 A->steps *
sizeof(
int *));
611 A->nS = (
int *) malloc(
612 A->steps *
sizeof(
int));
613 A->nR = (
int *) malloc(
614 A->steps *
sizeof(
int));
615 ring_init(A->lindices + (A->nnz) * (A->trash_pix),
616 A->lcount - (A->nnz) * (A->trash_pix), A->R, A->nR, A->S,
617 A->nS, A->steps, A->comm);
618 A->com_count = A->lcount - (A->nnz) * (A->trash_pix);
619 A->com_indices = A->lindices + (A->nnz) * (A->trash_pix);
623 A->S = (
int **) malloc(
624 A->steps *
sizeof(
int *));
625 A->R = (
int **) malloc(
626 A->steps *
sizeof(
int *));
627 A->nS = (
int *) malloc(
628 A->steps *
sizeof(
int));
629 A->nR = (
int *) malloc(
630 A->steps *
sizeof(
int));
631 ring_init(A->lindices + (A->nnz) * (A->trash_pix),
632 A->lcount - (A->nnz) * (A->trash_pix), A->R, A->nR, A->S,
633 A->nS, A->steps, A->comm);
634 A->com_count = A->lcount - (A->nnz) * (A->trash_pix);
635 A->com_indices = A->lindices + (A->nnz) * (A->trash_pix);
639 A->S = (
int **) malloc(
640 A->steps *
sizeof(
int *));
641 A->R = (
int **) malloc(
642 A->steps *
sizeof(
int *));
643 A->nS = (
int *) malloc(
644 A->steps *
sizeof(
int));
645 A->nR = (
int *) malloc(
646 A->steps *
sizeof(
int));
647 ring_init(A->lindices + (A->nnz) * (A->trash_pix),
648 A->lcount - (A->nnz) * (A->trash_pix), A->R, A->nR, A->S,
649 A->nS, A->steps, A->comm);
650 A->com_count = A->lcount - (A->nnz) * (A->trash_pix);
651 A->com_indices = A->lindices + (A->nnz) * (A->trash_pix);
655 A->S = (
int **) malloc(
656 A->steps *
sizeof(
int *));
657 A->R = (
int **) malloc(
658 A->steps *
sizeof(
int *));
659 A->nS = (
int *) malloc(
660 A->steps *
sizeof(
int));
661 A->nR = (
int *) malloc(
662 A->steps *
sizeof(
int));
663 ring_init(A->lindices + (A->nnz) * (A->trash_pix),
664 A->lcount - (A->nnz) * (A->trash_pix), A->R, A->nR, A->S,
665 A->nS, A->steps, A->comm);
666 A->com_count = A->lcount - (A->nnz) * (A->trash_pix);
667 A->com_indices = A->lindices + (A->nnz) * (A->trash_pix);
670 MPI_Allreduce(&(A->lindices[A->lcount - 1]), &max, 1, MPI_INT,
673 MPI_Allreduce(&(A->lindices[(A->nnz) * (A->trash_pix)]), &min, 1,
676 A->com_count = (max - min + 1);
678 (
int *) malloc((A->lcount - (A->nnz) * (A->trash_pix))
680 i = (A->nnz) * (A->trash_pix);
682 while (j < A->com_count
684 if (min + j < A->lindices[i]) {
687 A->com_indices[i - (A->nnz) * (A->trash_pix)] = j;
708 for (i = 0; i < A->m; i++) y[i] = 0.0;
712 for (i = 0; i < A->m * A->nnz; i += A->nnz) {
713 if (A->indices[i] != 0) {
714 for (j = 0; j < A->nnz; j++) {
715 y[e] += A->values[i + j] * x[A->indices[i + j] - (A->nnz)];
721 for (i = 0; i < A->m * A->nnz; i += A->nnz) {
722 for (j = 0; j < A->nnz; j++) {
723 y[e] += A->values[i + j] * x[A->indices[i + j]];
745 int i, j, e, rank, size;
746 int *rbuf, rbufcount;
747 double *rbufvalues, *lvalues;
749 MPI_Request s_request, r_request;
752 MPI_Comm_rank(A->comm, &rank);
753 MPI_Comm_size(A->comm, &size);
754 lvalues = (
double *) malloc(
755 A->lcount *
sizeof(
double));
756 for (i = 0; i < A->lcount; i++)
760 for (i = 0; i < A->m; i++) {
761 for (j = 0; j < A->nnz; j++) {
762 lvalues[A->indices[i * A->nnz + j]] +=
763 (A->values[i * A->nnz + j]) * y[i];
768 (A->lcount) *
sizeof(
double));
770 &(A->lcount), &(rbufcount), 1, MPI_INT, MPI_MAX,
773 rbuf = (
int *) malloc(rbufcount *
sizeof(
int));
774 rbufvalues = (
double *) malloc(rbufcount *
sizeof(
double));
777 for (p = 1; p < size;
779 rp = (size + rank - p) % size;
780 sp = (rank + p) % size;
781 MPI_Send(&(A->lcount), 1, MPI_INT, sp, 0, A->comm);
782 MPI_Recv(&rbufcount, 1, MPI_INT, rp, 0, A->comm, &status);
784 MPI_Irecv(rbuf, rbufcount, MPI_INT, rp, tag, A->comm,
786 MPI_Isend(A->lindices, A->lcount, MPI_INT, sp, tag, A->comm,
788 MPI_Wait(&r_request, &status);
789 MPI_Wait(&s_request, &status);
791 MPI_Irecv(rbufvalues, rbufcount, MPI_DOUBLE, rp, tag, A->comm,
793 MPI_Isend(lvalues, A->lcount, MPI_DOUBLE, sp, tag, A->comm, &s_request);
795 MPI_Wait(&r_request, &status);
796 m2m_sum(rbufvalues, rbuf, rbufcount, x, A->lindices,
798 MPI_Wait(&s_request, &status);
827 for (i = 0; i < A->lcount - A->nnz; i++) x[i] = 0.0;
830 for (i = 0; i < A->m * A->nnz; i += A->nnz) {
831 if (A->indices[i] != 0) {
833 for (j = 0; j < A->nnz; j++) {
834 x[A->indices[i + j] - (A->nnz)] +=
835 A->values[i + j] * y[e];
842 for (i = 0; i < A->lcount; i++) x[i] = 0.0;
845 for (i = 0; i < A->m * A->nnz; i += A->nnz) {
847 for (j = 0; j < A->nnz; j++) {
848 x[A->indices[i + j]] += A->values[i + j] * y[e];
867 int MatInfo(Mat *mat,
int verbose,
char *filename) {
872 int nnzline, sparsity, maxstep, maxsize, sumline, total;
877 MPI_Comm_rank(mat->comm, &rank);
878 MPI_Comm_size(mat->comm, &size);
880 if (rank == master) {
881 sprintf(fn,
"%s_%s", filename,
"info.txt");
882 out = fopen(fn,
"w");
884 printf(
"cannot open file %s\n", fn);
887 printf(
"open file %s ...", fn);
888 fprintf(out,
"flag %d\n",
891 fprintf(out,
"rows %d\n ", mat->m);
892 fprintf(out,
"nnz %d\n", mat->nnz);
912 for (i = 0; i < mat->steps; i++) {
913 sumline += mat->nS[i];
914 if (mat->nS[i] == 0) {
918 MPI_Reduce(&nnzline, &sparsity, 1, MPI_INT, MPI_SUM, 0,
920 MPI_Reduce(&sumline, &total, 1, MPI_INT, MPI_SUM, 0, mat->comm);
921 if (rank == master) {
922 fprintf(out,
"sparsity %d\n", sparsity);
923 fprintf(out,
"total %d\n", total);
927 for (i = 0; i < mat->steps; i++) {
928 MPI_Reduce(&(mat->nS[i]), &maxstep, 1, MPI_INT, MPI_MAX, 0,
932 if (rank == master) {
933 fprintf(out,
"maxsize %d\n ", maxsize);
999 if (rank == master) {
1001 printf(
"close %s\n", fn);
1010 int nSmax, nRmax, nStot, nRtot;
1012 lvalues = (
double *) malloc(
1013 (A->lcount - (A->nnz) * (A->trash_pix))
1016 (A->lcount - (A->nnz) * (A->trash_pix))
1023 for (k = 0; k < A->steps;
1025 if (A->nR[k] > nRmax) nRmax = A->nR[k];
1026 for (k = 0; k < A->steps; k++)
1027 if (A->nS[k] > nSmax) nSmax = A->nS[k];
1028 com_val = (
double *) malloc(A->com_count *
sizeof(
double));
1029 for (i = 0; i < A->com_count; i++) com_val[i] = 0.0;
1030 m2m(lvalues, A->lindices + (A->nnz) * (A->trash_pix),
1031 A->lcount - (A->nnz) * (A->trash_pix), com_val, A->com_indices,
1035 m2m(com_val, A->com_indices, A->com_count, x,
1036 A->lindices + (A->nnz) * (A->trash_pix),
1037 A->lcount - (A->nnz) * (A->trash_pix));
1042 case BUTTERFLY_BLOCKING_1:
1043 for (k = 0; k < A->steps;
1045 if (A->nR[k] > nRmax) nRmax = A->nR[k];
1046 for (k = 0; k < A->steps; k++)
1047 if (A->nS[k] > nSmax) nSmax = A->nS[k];
1048 com_val = (
double *) malloc(A->com_count *
sizeof(
double));
1049 for (i = 0; i < A->com_count; i++) com_val[i] = 0.0;
1050 m2m(lvalues, A->lindices + (A->nnz) * (A->trash_pix),
1051 A->lcount - (A->nnz) * (A->trash_pix), com_val, A->com_indices,
1054 nSmax, com_val, A->steps, A->comm);
1055 m2m(com_val, A->com_indices, A->com_count, x,
1056 A->lindices + (A->nnz) * (A->trash_pix),
1057 A->lcount - (A->nnz) * (A->trash_pix));
1060 case BUTTERFLY_BLOCKING_2:
1061 for (k = 0; k < A->steps;
1063 if (A->nR[k] > nRmax) nRmax = A->nR[k];
1064 for (k = 0; k < A->steps; k++)
1065 if (A->nS[k] > nSmax) nSmax = A->nS[k];
1066 com_val = (
double *) malloc(A->com_count *
sizeof(
double));
1067 for (i = 0; i < A->com_count; i++) com_val[i] = 0.0;
1068 m2m(lvalues, A->lindices + (A->nnz) * (A->trash_pix),
1069 A->lcount - (A->nnz) * (A->trash_pix), com_val, A->com_indices,
1072 nSmax, com_val, A->steps, A->comm);
1073 m2m(com_val, A->com_indices, A->com_count, x,
1074 A->lindices + (A->nnz) * (A->trash_pix),
1075 A->lcount - (A->nnz) * (A->trash_pix));
1078 case NOEMPTYSTEPRING:
1079 for (k = 1; k < A->steps;
1081 if (A->nR[k] > nRmax) nRmax = A->nR[k];
1084 lvalues, x, A->steps, A->comm);
1088 for (k = 1; k < A->steps;
1090 if (A->nR[k] > nRmax) nRmax = A->nR[k];
1092 ring_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, lvalues, x,
1100 for (k = 1; k < A->steps; k++)
1101 if (A->nR[k] != 0) ne++;
1106 com_val = (
double *) malloc(A->com_count *
sizeof(
double));
1107 out_val = (
double *) malloc(A->com_count *
sizeof(
double));
1108 for (i = 0; i < A->com_count; i++) {
1112 s2m(com_val, lvalues, A->com_indices,
1113 A->lcount - (A->nnz) * (A->trash_pix));
1117 MPI_Allreduce(com_val, out_val, A->com_count, MPI_DOUBLE, MPI_SUM,
1122 m2s(out_val, x, A->com_indices,
1123 A->lcount - (A->nnz) * (A->trash_pix));
1130 for (k = 0; k < A->steps; k++) {
void m2s(double *mapval, double *submapval, int *subset, int count)
int m2m_sum(double *vA1, int *A1, int n1, double *vA2, int *A2, int n2)
int m2m(double *vA1, int *A1, int n1, double *vA2, int *A2, int n2)
void s2m(double *mapval, double *submapval, int *subset, int count)
assign submap values the submap values array
int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme.
int butterfly_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
Initialize tables for butterfly-like communication scheme This routine set up needed tables for the b...
int butterfly_blocking_1instr_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme.
int sindex(int *T, int nT, int *A, int nA)
int ssort(int *indices, int count, int flag)
int TrMatVecProd(Mat *A, double *y, double *x, int pflag)
int MatInit(Mat *A, int m, int nnz, int *indices, double *values, int flag #ifdef W_MPI, MPI_Comm comm #endif)
int TrMatVecProd_Naive(Mat *A, double *y, double *x, int pflag)
int MatLoad(Mat *mat, char *filename)
int MatComShape(Mat *A, int flag, MPI_Comm comm)
int MatSave(Mat *mat, char *filename)
int greedyreduce(Mat *A, double *x)
int MatInfo(Mat *mat, int verbose, char *filename)
Print information about a matrix. Usefull function to check, debug or bench. It prints matrix array...
void MatSetIndices(Mat *A, int m, int nnz, int *indices)
void MatSetValues(Mat *A, int m, int nnz, double *values)
int MatLocalShape(Mat *A, int sflag)
int MatVecProd(Mat *A, double *x, double *y, int pflag)
int ring_noempty_step_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like communication scheme.
int alltoallv_reduce(int **R, int *nR, int nRtot, int **S, int *nS, int nStot, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using an MPI-Alltoallv call.
int ring_nonblocking_reduce(int **R, int *nR, int **S, int *nS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking communication sch...
int ring_noempty_reduce(int **R, int *nR, int nneR, int **S, int *nS, int nneS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking no-empty communic...
int ring_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int steps, MPI_Comm comm)
Initialize tables for ring-like communication scheme.
int ring_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like communication scheme.