MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
mapmatc.c
Go to the documentation of this file.
1 
21 #ifdef W_MPI
22 #include <mpi.h>
23 #endif
24 #include "mapmatc.h"
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 int CMatInit(CMat *A, int r, int *m, int *nnz, int **indices, double **values,
30  int flag
31 #ifdef W_MPI
32  ,
33  MPI_Comm comm
34 #endif
35 ) {
36  int M, k, *tmp_indices;
37  A->r = r; // set number of local rows
38  A->m = m; //
39  A->nnz = nnz; //
40  A->disp = (int *) malloc((A->r + 1) * sizeof(int)); // allocate disp array
41  A->disp[0] = 0;
42  // printf(" %d\t%d\t%d\n", A->m[0], A->nnz[0], A->disp[0]);
43  for (k = 1; k <= A->r; k++) {
44  A->disp[k] = A->disp[k - 1] + A->m[k - 1] * A->nnz[k - 1];
45  // if(k!=A->r)
46  // printf(" %d\t%d\t", A->m[k], A->nnz[k]);
47  // printf(" %d\n", A->disp[k]);
48  }
49  A->indices = indices; //
50  A->values = values;
51  /*int i, j;
52  for(k=0; k<A->r; k++){
53  for(i=0; i<A->m[k]*A->nnz[k]; i+=A->nnz[k]){
54  for(j=0; j<A->nnz[k]; j++){
55  printf(" %d ", A->indices[k][i+j]);
56  }
57  }
58  printf("\n");
59  }*/
60  tmp_indices = (int *) malloc(
61  A->disp[A->r]
62  * sizeof(int)); // allocate a tmp copy of indices tab to sort
63  for (k = 0; k < A->r; k++) {
64  memcpy(tmp_indices + A->disp[k], A->indices[k],
65  A->m[k] * A->nnz[k] * sizeof(int)); // copy
66  }
67 
68  A->lcount = ssort(tmp_indices, A->disp[A->r],
69  0); // sequential sort tmp_indices (flag:3=counting sort)
70  A->lindices = (int *) malloc((A->lcount) * sizeof(int)); //
71  memcpy(A->lindices, tmp_indices,
72  (A->lcount)
73  * sizeof(int)); // copy tmp_indices into lindices and free
74  free(tmp_indices);
75 
76  for (k = 0; k < A->r; k++) {
77  sindex(A->lindices, A->lcount, A->indices[k],
78  A->nnz[k]
79  * A->m[k]); // transform indices tab in local indices tab
80  }
81  /*for(k=0; k<A->r; k++){
82  for(i=0; i<A->m[k]*A->nnz[k]; i+=A->nnz[k]){
83  for(j=0; j<A->nnz[k]; j++){
84  printf(" %d ", A->indices[k][i+j]);
85  }
86  }
87  printf("\n");
88  }*/
89  // printf("cmat init 0\n");
90 #ifdef W_MPI
91  A->comm = comm; // link communivcator
92  return CMatComShape(A, flag); // build communication scheme
93 #endif
94 }
95 
96 
97 int CMatFree(CMat *A) {
98  free(A->disp);
99  free(A->lindices);
100 #ifdef W_MPI
101  if (A->flag != NONE) { // if necessary free communication tab
102  if (A->R) //
103  free(A->R); //
104  if (A->nR) //
105  free(A->nR); //
106  if (A->S) //
107  free(A->S); //
108  if (A->nS) free(A->nS);
109  }
110 #endif
111  return 0;
112 }
113 
114 
115 #ifdef W_MPI
116 int CMatComShape(CMat *mat, int flag) {
117  // printf("commshape 0\n");
118  int size;
119  mat->flag = flag;
120  MPI_Comm_size(mat->comm, &size);
121  if (flag == BUTTERFLY) {
122  if (is_pow_2(size) == 0) {
123  mat->flag = flag;
124  mat->steps = log_2(size);
125  } else {
126  mat->flag = RING;
127  mat->steps = size;
128  }
129  } else if (flag == NONE) {
130  mat->flag = flag;
131  return 0;
132  } else {
133  mat->flag = flag;
134  mat->steps = size;
135  }
136  mat->S = (int **) malloc(mat->steps
137  * sizeof(int *)); /*<allocate sending maps tab*/
138  mat->R = (int **) malloc(mat->steps
139  * sizeof(int *)); /*<allocate receiving maps tab*/
140  mat->nS = (int *) malloc(mat->steps
141  * sizeof(int)); /*<allocate sending map sizes tab*/
142  mat->nR = (int *) malloc(
143  mat->steps * sizeof(int)); /*<allocate receiving map size tab*/
144 
145  if (mat->flag == BUTTERFLY) {
146  butterfly_init(mat->lindices, mat->lcount, mat->R, mat->nR, mat->S,
147  mat->nS, &(mat->com_indices), &(mat->com_count),
148  mat->steps, mat->comm);
149  } else {
150  ring_init(mat->lindices, mat->lcount, mat->R, mat->nR, mat->S, mat->nS,
151  mat->steps, mat->comm);
152  mat->com_count = mat->lcount;
153  mat->com_indices = mat->lindices;
154  }
155  // printf("commshape 1\n");
156  return 0;
157 }
158 #endif
159 
160 
161 int CMatVecProd(CMat *A, double *xvalues, double *yvalues, int pflag) {
162  int i, j, k;
163  int l;
164  for (i = 0; i < A->disp[A->r]; i++) yvalues[i] = 0.0;
165  l = 0;
166  for (k = 0; k < A->r; k++) { // coarse levels
167  for (i = 0; i < A->m[k]; i += A->nnz[k]) { // rows
168  for (j = 0; j < A->nnz[k]; j++) { // non-zero per row
169  yvalues[l] +=
170  A->values[k][i + j] * xvalues[A->indices[k][i + j]];
171  }
172  l++;
173  }
174  }
175  return 0;
176 }
177 
178 
179 int CTrMatVecProd(CMat *A, double *in_values, double *out_values, int pflag) {
180  int i, j, k;
181  int l;
182  int nSmax, nRmax;
183  double *lvalues;
184 
185  lvalues = (double *) malloc(
186  A->lcount
187  * sizeof(double)); /*<allocate and set to 0.0 local values*/
188  for (i = 0; i < A->lcount; i++) lvalues[i] = 0.0;
189 
190  l = 0;
191  for (k = 0; k < A->r; k++) { // coarse levels
192  for (i = 0; i < A->m[k]; i += A->nnz[k]) { // rows
193  for (j = 0; j < A->nnz[k]; j++) { // non-zero per row
194  lvalues[A->indices[k][i + j]] +=
195  A->values[k][i + j] * in_values[l];
196  }
197  l++;
198  }
199  }
200  memcpy(out_values, lvalues,
201  (A->lcount)
202  * sizeof(double)); /*<copy local values into result values*/
203 #ifdef W_MPI
204  nRmax = 0;
205  nSmax = 0;
206 
207  if (A->flag == BUTTERFLY) { /*<branch butterfly*/
208  for (k = 0; k < A->steps; k++) /*compute max communication buffer size*/
209  if (A->nR[k] > nRmax) nRmax = A->nR[k];
210  for (k = 0; k < A->steps; k++)
211  if (A->nS[k] > nSmax) nSmax = A->nS[k];
212 
213  double *com_val;
214  com_val = (double *) malloc(A->com_count * sizeof(double));
215  for (i = 0; i < A->com_count; i++) { com_val[i] = 0.0; }
216  m2m(lvalues, A->lindices, A->lcount, com_val, A->com_indices,
217  A->com_count);
218  butterfly_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, com_val,
219  A->steps, A->comm);
220  m2m(com_val, A->com_indices, A->com_count, out_values, A->lindices,
221  A->lcount);
222  free(com_val);
223  } else if (A->flag == RING) {
224  for (k = 1; k < A->steps; k++) /*compute max communication buffer size*/
225  if (A->nR[k] > nRmax) nRmax = A->nR[k];
226 
227  nSmax = nRmax;
228  ring_reduce(A->R, A->nR, nRmax, A->S, A->nS, nSmax, lvalues, out_values,
229  A->steps, A->comm);
230  } else if (A->flag == NONBLOCKING) {
231  ring_nonblocking_reduce(A->R, A->nR, A->S, A->nS, lvalues, out_values,
232  A->steps, A->comm);
233  } else if (A->flag == NOEMPTY) {
234  int ne = 0;
235  for (k = 1; k < A->steps; k++)
236  if (A->nR[k] != 0) ne++;
237  ring_noempty_reduce(A->R, A->nR, ne, A->S, A->nS, ne, lvalues,
238  out_values, A->steps, A->comm);
239  } else {
240  return 1;
241  }
242 #endif
243  free(lvalues);
244  return 0;
245 }
int m2m(double *vA1, int *A1, int n1, double *vA2, int *A2, int n2)
Definition: alm.c:97
int is_pow_2(int n)
Definition: bitop.c:13
int log_2(int n)
Definition: bitop.c:26
int butterfly_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a butterfly-like communication scheme.
Definition: butterfly.c:236
int butterfly_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int **com_indices, int *com_count, int steps, MPI_Comm comm)
Initialize tables for butterfly-like communication scheme This routine set up needed tables for the b...
Definition: butterfly.c:52
int sindex(int *T, int nT, int *A, int nA)
Definition: cindex.c:34
int ssort(int *indices, int count, int flag)
Definition: csort.c:172
int CMatComShape(CMat *mat, int flag)
Definition: mapmatc.c:116
int CMatVecProd(CMat *A, double *xvalues, double *yvalues, int pflag)
Definition: mapmatc.c:161
int CMatFree(CMat *A)
Definition: mapmatc.c:97
int CMatInit(CMat *A, int r, int *m, int *nnz, int **indices, double **values, int flag #ifdef W_MPI, MPI_Comm comm #endif)
Definition: mapmatc.c:29
int CTrMatVecProd(CMat *A, double *in_values, double *out_values, int pflag)
Definition: mapmatc.c:179
int ring_nonblocking_reduce(int **R, int *nR, int **S, int *nS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking communication sch...
Definition: ring.c:234
int ring_noempty_reduce(int **R, int *nR, int nneR, int **S, int *nS, int nneS, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like non-blocking no-empty communic...
Definition: ring.c:298
int ring_init(int *indices, int count, int **R, int *nR, int **S, int *nS, int steps, MPI_Comm comm)
Initialize tables for ring-like communication scheme.
Definition: ring.c:48
int ring_reduce(int **R, int *nR, int nRmax, int **S, int *nS, int nSmax, double *val, double *res_val, int steps, MPI_Comm comm)
Perform a sparse sum reduction (or mapped reduction) using a ring-like communication scheme.
Definition: ring.c:109