29 int CMatInit(CMat *A,
int r,
int *m,
int *nnz,
int **indices,
double **values,
36 int M, k, *tmp_indices;
40 A->disp = (
int *) malloc((A->r + 1) *
sizeof(int));
43 for (k = 1; k <= A->r; k++) {
44 A->disp[k] = A->disp[k - 1] + A->m[k - 1] * A->nnz[k - 1];
60 tmp_indices = (
int *) malloc(
63 for (k = 0; k < A->r; k++) {
64 memcpy(tmp_indices + A->disp[k], A->indices[k],
65 A->m[k] * A->nnz[k] *
sizeof(
int));
68 A->lcount =
ssort(tmp_indices, A->disp[A->r],
70 A->lindices = (
int *) malloc((A->lcount) *
sizeof(int));
71 memcpy(A->lindices, tmp_indices,
76 for (k = 0; k < A->r; k++) {
77 sindex(A->lindices, A->lcount, A->indices[k],
101 if (A->flag != NONE) {
108 if (A->nS) free(A->nS);
120 MPI_Comm_size(mat->comm, &size);
121 if (flag == BUTTERFLY) {
124 mat->steps =
log_2(size);
129 }
else if (flag == NONE) {
136 mat->S = (
int **) malloc(mat->steps
138 mat->R = (
int **) malloc(mat->steps
140 mat->nS = (
int *) malloc(mat->steps
142 mat->nR = (
int *) malloc(
143 mat->steps *
sizeof(
int));
145 if (mat->flag == BUTTERFLY) {
146 butterfly_init(mat->lindices, mat->lcount, mat->R, mat->nR, mat->S,
147 mat->nS, &(mat->com_indices), &(mat->com_count),
148 mat->steps, mat->comm);
150 ring_init(mat->lindices, mat->lcount, mat->R, mat->nR, mat->S, mat->nS,
151 mat->steps, mat->comm);
152 mat->com_count = mat->lcount;
153 mat->com_indices = mat->lindices;
161 int CMatVecProd(CMat *A,
double *xvalues,
double *yvalues,
int pflag) {
164 for (i = 0; i < A->disp[A->r]; i++) yvalues[i] = 0.0;
166 for (k = 0; k < A->r; k++) {
167 for (i = 0; i < A->m[k]; i += A->nnz[k]) {
168 for (j = 0; j < A->nnz[k]; j++) {
170 A->values[k][i + j] * xvalues[A->indices[k][i + j]];
179 int CTrMatVecProd(CMat *A,
double *in_values,
double *out_values,
int pflag) {
185 lvalues = (
double *) malloc(
188 for (i = 0; i < A->lcount; i++) lvalues[i] = 0.0;
191 for (k = 0; k < A->r; k++) {
192 for (i = 0; i < A->m[k]; i += A->nnz[k]) {
193 for (j = 0; j < A->nnz[k]; j++) {
194 lvalues[A->indices[k][i + j]] +=
195 A->values[k][i + j] * in_values[l];
200 memcpy(out_values, lvalues,
207 if (A->flag == BUTTERFLY) {
208 for (k = 0; k < A->steps; k++)
209 if (A->nR[k] > nRmax) nRmax = A->nR[k];
210 for (k = 0; k < A->steps; k++)
211 if (A->nS[k] > nSmax) nSmax = A->nS[k];
214 com_val = (
double *) malloc(A->com_count *
sizeof(
double));
215 for (i = 0; i < A->com_count; i++) { com_val[i] = 0.0; }
216 m2m(lvalues, A->lindices, A->lcount, com_val, A->com_indices,
220 m2m(com_val, A->com_indices, A->com_count, out_values, A->lindices,
223 }
else if (A->flag == RING) {
224 for (k = 1; k < A->steps; k++)
225 if (A->nR[k] > nRmax) nRmax = A->nR[k];
228 ring_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, lvalues, out_values,
230 }
else if (A->flag == NONBLOCKING) {
233 }
else if (A->flag == NOEMPTY) {
235 for (k = 1; k < A->steps; k++)
236 if (A->nR[k] != 0) ne++;
238 out_values, A->steps, A->comm);
int m2m(double *vA1, int *A1, int n1, double *vA2, int *A2, int n2)
int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme.
int butterfly_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
Initialize tables for butterfly-like communication scheme This routine set up needed tables for the b...
int sindex(int *T, int nT, int *A, int nA)
int ssort(int *indices, int count, int flag)
int CMatComShape(CMat *mat, int flag)
int CMatVecProd(CMat *A, double *xvalues, double *yvalues, int pflag)
int CMatInit(CMat *A, int r, int *m, int *nnz, int **indices, double **values, int flag #ifdef W_MPI, MPI_Comm comm #endif)
int CTrMatVecProd(CMat *A, double *in_values, double *out_values, int pflag)
int ring_nonblocking_reduce(int **R, int *nR, int **S, int *nS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking communication sch...
int ring_noempty_reduce(int **R, int *nR, int nneR, int **S, int *nS, int nneS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking no-empty communic...
int ring_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int steps, MPI_Comm comm)
Initialize tables for ring-like communication scheme.
int ring_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like communication scheme.