29 int CMatInit(CMat *A, 
int r, 
int *m, 
int *nnz, 
int **indices, 
double **values,
 
   36     int M, k, *tmp_indices;
 
   40     A->disp = (
int *) malloc((A->r + 1) * 
sizeof(int)); 
 
   43     for (k = 1; k <= A->r; k++) {
 
   44         A->disp[k] = A->disp[k - 1] + A->m[k - 1] * A->nnz[k - 1];
 
   60     tmp_indices = (
int *) malloc(
 
   63     for (k = 0; k < A->r; k++) {
 
   64         memcpy(tmp_indices + A->disp[k], A->indices[k],
 
   65                A->m[k] * A->nnz[k] * 
sizeof(
int)); 
 
   68     A->lcount   = 
ssort(tmp_indices, A->disp[A->r],
 
   70     A->lindices = (
int *) malloc((A->lcount) * 
sizeof(int)); 
 
   71     memcpy(A->lindices, tmp_indices,
 
   76     for (k = 0; k < A->r; k++) {
 
   77         sindex(A->lindices, A->lcount, A->indices[k],
 
  101     if (A->flag != NONE) { 
 
  108         if (A->nS) free(A->nS);
 
  120     MPI_Comm_size(mat->comm, &size);
 
  121     if (flag == BUTTERFLY) {
 
  124             mat->steps = 
log_2(size);
 
  129     } 
else if (flag == NONE) {
 
  136     mat->S  = (
int **) malloc(mat->steps
 
  138     mat->R  = (
int **) malloc(mat->steps
 
  140     mat->nS = (
int *) malloc(mat->steps
 
  142     mat->nR = (
int *) malloc(
 
  143             mat->steps * 
sizeof(
int)); 
 
  145     if (mat->flag == BUTTERFLY) {
 
  146         butterfly_init(mat->lindices, mat->lcount, mat->R, mat->nR, mat->S,
 
  147                        mat->nS, &(mat->com_indices), &(mat->com_count),
 
  148                        mat->steps, mat->comm);
 
  150         ring_init(mat->lindices, mat->lcount, mat->R, mat->nR, mat->S, mat->nS,
 
  151                   mat->steps, mat->comm);
 
  152         mat->com_count   = mat->lcount;
 
  153         mat->com_indices = mat->lindices;
 
  161 int CMatVecProd(CMat *A, 
double *xvalues, 
double *yvalues, 
int pflag) {
 
  164     for (i = 0; i < A->disp[A->r]; i++) yvalues[i] = 0.0;
 
  166     for (k = 0; k < A->r; k++) {                   
 
  167         for (i = 0; i < A->m[k]; i += A->nnz[k]) { 
 
  168             for (j = 0; j < A->nnz[k]; j++) {      
 
  170                         A->values[k][i + j] * xvalues[A->indices[k][i + j]];
 
  179 int CTrMatVecProd(CMat *A, 
double *in_values, 
double *out_values, 
int pflag) {
 
  185     lvalues = (
double *) malloc(
 
  188     for (i = 0; i < A->lcount; i++) lvalues[i] = 0.0;
 
  191     for (k = 0; k < A->r; k++) {                   
 
  192         for (i = 0; i < A->m[k]; i += A->nnz[k]) { 
 
  193             for (j = 0; j < A->nnz[k]; j++) {      
 
  194                 lvalues[A->indices[k][i + j]] +=
 
  195                         A->values[k][i + j] * in_values[l];
 
  200     memcpy(out_values, lvalues,
 
  207     if (A->flag == BUTTERFLY) {        
 
  208         for (k = 0; k < A->steps; k++) 
 
  209             if (A->nR[k] > nRmax) nRmax = A->nR[k];
 
  210         for (k = 0; k < A->steps; k++)
 
  211             if (A->nS[k] > nSmax) nSmax = A->nS[k];
 
  214         com_val = (
double *) malloc(A->com_count * 
sizeof(
double));
 
  215         for (i = 0; i < A->com_count; i++) { com_val[i] = 0.0; }
 
  216         m2m(lvalues, A->lindices, A->lcount, com_val, A->com_indices,
 
  220         m2m(com_val, A->com_indices, A->com_count, out_values, A->lindices,
 
  223     } 
else if (A->flag == RING) {
 
  224         for (k = 1; k < A->steps; k++) 
 
  225             if (A->nR[k] > nRmax) nRmax = A->nR[k];
 
  228         ring_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, lvalues, out_values,
 
  230     } 
else if (A->flag == NONBLOCKING) {
 
  233     } 
else if (A->flag == NOEMPTY) {
 
  235         for (k = 1; k < A->steps; k++)
 
  236             if (A->nR[k] != 0) ne++;
 
  238                             out_values, A->steps, A->comm);
 
int m2m(double *vA1, int *A1, int n1, double *vA2, int *A2, int n2)
int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme.
int butterfly_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
Initialize tables for butterfly-like communication scheme This routine set up needed tables for the b...
int sindex(int *T, int nT, int *A, int nA)
int ssort(int *indices, int count, int flag)
int CMatComShape(CMat *mat, int flag)
int CMatVecProd(CMat *A, double *xvalues, double *yvalues, int pflag)
int CMatInit(CMat *A, int r, int *m, int *nnz, int **indices, double **values, int flag #ifdef W_MPI, MPI_Comm comm #endif)
int CTrMatVecProd(CMat *A, double *in_values, double *out_values, int pflag)
int ring_nonblocking_reduce(int **R, int *nR, int **S, int *nS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking communication sch...
int ring_noempty_reduce(int **R, int *nR, int nneR, int **S, int *nS, int nneS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking no-empty communic...
int ring_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int steps, MPI_Comm comm)
Initialize tables for ring-like communication scheme.
int ring_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like communication scheme.