MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
toeplitz_nofft.c
Go to the documentation of this file.
1 
72 #include "toeplitz.h"
73 
74 #define max(a, b) \
75  ({ \
76  __typeof__(a) _a = (a); \
77  __typeof__(b) _b = (b); \
78  _a > _b ? _a : _b; \
79  })
80 
81 #define min(a, b) \
82  ({ \
83  __typeof__(a) _a = (a); \
84  __typeof__(b) _b = (b); \
85  _a < _b ? _a : _b; \
86  })
87 
88 extern int PRINT_RANK;
89 
90 // r1.1 - Frederic Dauvergne (APC)
91 // basic product without fft use.
92 // stmm_simple_core is not used by the API. This is similar to stmm_core by
93 // using a sliding windows algorithm with differents parameters.
94 
95 
96 //=========================================================================
98 
104 int stmm_simple_basic(double **V, int n, int m, double *T, int lambda,
105  double **TV) {
106 
107  int j_first, j_last;
108  int i, j, k, Tid;
109  int n_thread;
110  int idx;
111 
112  int flag_nocomputeedges = 1;
113  int offset_edges = 0;
114 
115  int distcorrmin = lambda - 1;
116 
117  if (flag_nocomputeedges == 1) offset_edges = distcorrmin;
118 
119 
120  for (k = 0; k < m; k++) {
121 
122 #pragma omp parallel for shared(k, lambda, n) private(i, j, j_first, j_last, \
123  Tid)
124  for (i = 0 + offset_edges; i < n - offset_edges; i++) {
125 
126  (*TV)[i + k * n] = 0;
127  j_first = max(i - (lambda - 1), 0);
128  j_last = min(i + lambda, n);
129 
130  for (j = j_first; j < j_last; j++) {
131  Tid = abs(j - i);
132  (*TV)[i + k * n] += T[Tid] * (*V)[j + k * n];
133  } // End j loop
134 
135  } // End i loop
136  } // End k loop
137 
138  return 0;
139 }
140 
141 
142 //=========================================================================
143 
146 
156 int stmm_simple_core(double **V, int n, int m, double *T, int blocksize,
157  int lambda, int nfft, int flag_offset) {
158 
159  // routine variable
160  int status;
161  int i, j, k, p; // loop index
162  int currentsize;
163  int distcorrmin = lambda - 1;
164  int blocksize_eff =
165  blocksize
166  - 2 * distcorrmin; // just a good part after removing the overlaps
167  int nbloc; // a number of subblock of slide/overlap algorithm
168 
169  if (flag_offset == 1)
170  nbloc = ceil((1.0 * (n - 2 * distcorrmin)) / blocksize_eff);
171  else
172  nbloc = ceil((1.0 * n) / blocksize_eff);
173 
174 
175  double *V_bloc, *TV_bloc;
176  V_bloc = (double *) calloc(blocksize * m, sizeof(double));
177  TV_bloc = (double *) calloc(blocksize * m, sizeof(double));
178  if ((V_bloc == 0) || (TV_bloc == 0))
179  return print_error_message(2, __FILE__, __LINE__);
180 
181  int offset = 0;
182  if (flag_offset == 1) offset = distcorrmin;
183 
184  int iV = 0; //"-distcorrmin+offset"; //first index in V
185  int iTV = offset; // first index in TV
186 
187  //"k=0";
188  // first subblock separately as it requires some padding. prepare the block
189  // of the data vector with the overlaps on both sides
190  currentsize = min(blocksize - distcorrmin + offset, n - iV);
191  // note: if flag_offset=0, pad first distcorrmin elements with zeros (for
192  // the first subblock only)
193  // and if flag_offset=1 there is no padding with zeros.
194  copy_block(n, m, *V, blocksize, m, V_bloc, 0, 0, currentsize, m,
195  distcorrmin - offset, 0, 1.0, 0);
196 
197  // do block computation
198  status = stmm_simple_basic(&V_bloc, blocksize, m, T, lambda, &TV_bloc);
199 
200  if (status != 0) {
201  printf("Error in stmm_core.");
202  return print_error_message(7, __FILE__, __LINE__);
203  }
204 
205  // now copy first the new chunk of the data matrix **before** overwriting
206  // the input due to overlaps !
207  iV = blocksize_eff - distcorrmin + offset;
208 
209  if (nbloc > 1) {
210  currentsize = min(blocksize, n - iV); // not to overshoot
211 
212  int flag_reset =
213  (currentsize
214  != blocksize); // with flag_reset=1, always "memset" the block.
215  copy_block(n, m, *V, blocksize, m, V_bloc, iV, 0, currentsize, m, 0, 0,
216  1.0, flag_reset);
217  }
218 
219  // and now store the ouput back in V
220  currentsize = min(blocksize_eff, n - iTV); // to trim the extra rows
221  copy_block(blocksize, m, TV_bloc, n, m, *V, distcorrmin, 0, currentsize, m,
222  iTV, 0, 1.0, 0);
223 
224 
225  iTV += blocksize_eff;
226  // now continue with all the other subblocks
227  for (k = 1; k < nbloc; k++) {
228 
229  // do bloc computation
230  status = stmm_simple_basic(&V_bloc, blocksize, m, T, lambda, &TV_bloc);
231  if (status != 0) break;
232 
233  iV += blocksize_eff;
234  // copy first the next subblock to process
235  if (k != nbloc - 1) {
236  currentsize = min(blocksize, n - iV); // not to overshoot
237 
238  int flag_resetk =
239  (currentsize != blocksize); // with flag_reset=1, always
240  // "memset" the block.
241  copy_block(n, m, *V, blocksize, m, V_bloc, iV, 0, currentsize, m, 0,
242  0, 1.0, flag_resetk);
243  }
244 
245  // and then store the output in V
246  currentsize = min(blocksize_eff, n - iTV); // not to overshoot
247  copy_block(blocksize, m, TV_bloc, n, m, *V, distcorrmin, 0, currentsize,
248  m, iTV, 0, 1.0, 0);
249  iTV += blocksize_eff;
250 
251  } // end bloc computation
252 
253 
254  free(V_bloc);
255  free(TV_bloc);
256 
257  return status;
258 }
int print_error_message(int error_number, char const *file, int line)
Prints error message corresponding to an error number.
Definition: toeplitz.c:127
int copy_block(int ninrow, int nincol, double *Vin, int noutrow, int noutcol, double *Vout, int inrow, int incol, int nblockrow, int nblockcol, int outrow, int outcol, double norm, int set_zero_flag)
Definition: toeplitz.c:514
int PRINT_RANK
Definition: toeplitz.c:117
int stmm_simple_core(double **V, int n, int m, double *T, int blocksize, int lambda, int nfft, int flag_offset)
int stmm_simple_basic(double **V, int n, int m, double *T, int lambda, double **TV)
Perform the product of a Toeplitz matrix by a matrix without using FFT's.