MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
toeplitz_gappy.c
Go to the documentation of this file.
1 /*
2 @file toeplitz_gappy.c version 1.1b, July 2012
3 @brief Gappy routines used to compute the Toeplitz product when gaps are defined
4 @author Frederic Dauvergne
5 **
6 ** Project: Midapack library, ANR MIDAS'09 - Toeplitz Algebra module
7 ** Purpose: Provide Toeplitz algebra tools suitable for Cosmic Microwave
8 Background (CMB)
9 ** data analysis.
10 **
11 ***************************************************************************
12 @note Copyright (c) 2010-2012 APC CNRS Université Paris Diderot
13 @note
14 @note This program is free software; you can redistribute it and/or modify it
15 under the terms
16 @note of the GNU Lesser General Public License as published by the Free Software
17 Foundation;
18 @note either version 3 of the License, or (at your option) any later version.
19 This program is
20 @note distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
21 without even
22 @note the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
23 PURPOSE. See the GNU
24 @note Lesser General Public License for more details.
25 @note
26 @note You should have received a copy of the GNU Lesser General Public License
27 along with this
28 @note program; if not, see http://www.gnu.org/licenses/lgpl.html
29 @note
30 @note For more information about ANR MIDAS'09 project see :
31 @note http://www.apc.univ-paris7.fr/APC_CS/Recherche/Adamis/MIDAS09/index.html
32 @note
33 @note ACKNOWLEDGMENT: This work has been supported in part by the French
34 National Research
35 @note Agency (ANR) through COSINUS program (project MIDAS no. ANR-09-COSI-009).
36 ***************************************************************************
37 ** Log: toeplitz*.c
38 **
39 ** Revision 1.0b 2012/05/07 Frederic Dauvergne (APC)
40 ** Official release 1.0beta. The first installement of the library is the
41 Toeplitz algebra
42 ** module.
43 **
44 ** Revision 1.1b 2012/07/- Frederic Dauvergne (APC)
45 ** - mpi_stbmm allows now rowi-wise order per process datas and no-blocking
46 communications.
47 ** - OMP improvment for optimal cpu time.
48 ** - bug fixed for OMP in the stmm_basic routine.
49 ** - distcorrmin is used to communicate only lambda-1 datas when it is needed.
50 ** - new reshaping routines using transformation functions in stmm. Thus, only
51 one copy
52 ** at most is needed.
53 ** - tpltz_init improvement using define_nfft and define_blocksize routines.
54 ** - add Block struture to define each Toeplitz block.
55 ** - add Flag structure and preprocessing parameters to define the computational
56 strategy.
57 **
58 **
59 ***************************************************************************
60 **
61 */
62 
63 #include "toeplitz.h"
64 
65 #define max(a, b) \
66  ({ \
67  __typeof__(a) _a = (a); \
68  __typeof__(b) _b = (b); \
69  _a > _b ? _a : _b; \
70  })
71 
72 #define min(a, b) \
73  ({ \
74  __typeof__(a) _a = (a); \
75  __typeof__(b) _b = (b); \
76  _a < _b ? _a : _b; \
77  })
78 
79 // r1.1 - Frederic Dauvergne (APC)
80 // this is the gappy routines used when gaps are defined
81 
82 //====================================================================
83 #ifdef W_MPI
91 
119 int mpi_gstbmm(double **V, int nrow, int m, int m_rowwise, Block *tpltzblocks,
120  int nb_blocks_local, int nb_blocks_all, int id0p,
121  int local_V_size, int64_t *id0gap, int *lgap, int ngap,
122  Flag flag_stgy, MPI_Comm comm) {
123 
124  // MPI parameters
125  int rank; // process rank
126  int size; // process number
127 
128  MPI_Comm_rank(comm, &rank);
129  MPI_Comm_size(comm, &size);
130 
131  int i, j, k; // some indexes
132 
133  int flag_skip_build_gappy_blocks = flag_stgy.flag_skip_build_gappy_blocks;
134 
135  FILE *file;
136  file = stdout;
137  PRINT_RANK = rank;
138 
139  // put zeros at the gaps location
140  reset_gaps(V, id0p, local_V_size, m, nrow, m_rowwise, id0gap, lgap, ngap);
141 
142  // allocation for the gappy structure of the diagonal block Toeplitz matrix
143  int nb_blocks_gappy;
144 
145  int nb_blockgappy_max;
146  int Tgappysize_max;
147 
148  Block *tpltzblocks_gappy;
149 
150  // some computation usefull to determine the max size possible for the gappy
151  // variables
152  int Tsize = 0;
153  int lambdamax = 0;
154 
155  if (VERBOSE)
156  fprintf(file, "[%d] flag_skip_build_gappy_blocks=%d\n", rank,
157  flag_skip_build_gappy_blocks);
158 
159  if (flag_skip_build_gappy_blocks == 1) { // no build gappy blocks strategy,
160  // just put zeros at gaps location
161 
162  // compute the product using only the input Toeplitz blocks structure
163  // with zeros at the gaps location
164  mpi_stbmm(V, nrow, m, m_rowwise, tpltzblocks, nb_blocks_local,
165  nb_blocks_all, id0p, local_V_size, flag_stgy, MPI_COMM_WORLD);
166  } else { // build gappy blocks strategy
167 
168  for (Tsize = i = 0; i < nb_blocks_local; i++)
169  Tsize += tpltzblocks[i].lambda;
170 
171  for (i = 0; i < nb_blocks_local; i++) {
172  if (tpltzblocks[i].lambda > lambdamax)
173  lambdamax = tpltzblocks[i].lambda;
174  }
175 
176  // compute max size possible for the gappy variables
177  nb_blockgappy_max = nb_blocks_local + ngap;
178  Tgappysize_max = Tsize + lambdamax * ngap;
179 
180  // allocation of the gappy variables with max size possible
181  tpltzblocks_gappy = (Block *) calloc(nb_blockgappy_max, sizeof(Block));
182 
183  // build gappy Toeplitz block structure considering significant gaps
184  // locations, meaning we skip the gaps lower than the minimum
185  // correlation distance. You can also use the flag_param_distmin_fixed
186  // parameter which allows you to skip the gap lower than these value.
187  // Indeed, sometimes it's better to just put somes zeros than to
188  // consider two separates blocks. ps: This criteria could be dependant
189  // of the local lambda in futur impovements.
190  int flag_param_distmin_fixed = flag_stgy.flag_param_distmin_fixed;
191  build_gappy_blocks(nrow, m, tpltzblocks, nb_blocks_local, nb_blocks_all,
192  id0gap, lgap, ngap, tpltzblocks_gappy,
193  &nb_blocks_gappy, flag_param_distmin_fixed);
194 
195  if (VERBOSE) {
196  fprintf(file, "[%d] nb_blocks_gappy=%d\n", rank, nb_blocks_gappy);
197  for (i = 0; i < nb_blocks_gappy; i++)
198  fprintf(file, "[%d] idvgappy[%d]=%ld ; ngappy[%d]=%d\n", rank,
199  i, tpltzblocks_gappy[i].idv, i, tpltzblocks_gappy[i].n);
200  }
201  // ps: we could reallocate the gappy variables to their real size. Not
202  // sure it's worth it.
203 
204  // compute the product using the freshly created gappy Toeplitz blocks
205  // structure
206  mpi_stbmm(V, nrow, m, m_rowwise, tpltzblocks_gappy, nb_blocks_local,
207  nb_blocks_all, id0p, local_V_size, flag_stgy, MPI_COMM_WORLD);
208 
209  } // end flag_skip_build_gappy_blocks==1
210 
211  // put zeros on V for the gaps location again. Indeed, some gaps are just
212  // replaced by zeros in input, it's created some fakes results we need to
213  // clear after the computation.
214  reset_gaps(V, id0p, local_V_size, m, nrow, m_rowwise, id0gap, lgap, ngap);
215 
216  return 0;
217 }
218 
219 //====================================================================
221 
226 // put zeros on V for the gaps location
227 int reset_gaps(double **V, int id0, int local_V_size, int m, int nrow,
228  int m_rowwise, const int64_t *id0gap, const int *lgap,
229  int ngap) {
230  int i, j, k, l;
231 
232  for (j = 0; j < m; j++) {
233 
234 #pragma omp parallel for private(i) schedule(dynamic, 1)
235  for (k = 0; k < ngap; k++)
236  for (i = 0; i < lgap[k]; i++)
237  if (id0gap[k] + i + j * nrow >= id0
238  && id0gap[k] + i + j * nrow < id0 + local_V_size) {
239  for (l = 0; l < m_rowwise; l++)
240  (*V)[id0gap[k] + i + j * nrow - id0
241  + l * local_V_size] = 0.;
242  }
243  }
244 
245  return 0;
246 }
247 
248 #endif
249 
250 //====================================================================
253 
274 int build_gappy_blocks(int nrow, int m, Block *tpltzblocks, int nb_blocks_local,
275  int nb_blocks_all, const int64_t *id0gap,
276  const int *lgap, int ngap, Block *tpltzblocks_gappy,
277  int *nb_blocks_gappy_final,
278  int flag_param_distmin_fixed) {
279 
280  int i, j, k;
281  int id, ib;
282  int idtmp;
283  int igapfirstblock, igaplastblock;
284 
285  int param_distmin = 0;
286  if (flag_param_distmin_fixed != 0) param_distmin = flag_param_distmin_fixed;
287 
288  int lambdaShft;
289 
290  int igaplastblock_prev = -1;
291  int lambdaShftgappy = 0;
292  int offset_id = 0;
293  // int offset_id_gappy=0;
294 
295  int flag_igapfirstinside, flag_igaplastinside;
296  int nbloc = nb_blocks_local;
297  int nblocks_gappy = 0;
298 
299  int idvtmp_firstblock;
300 
301  int nb_blockgappy_max = nb_blocks_local + ngap;
302  int Tgappysize_max;
303 
304  int ngappy_tmp;
305  int lgap_tmp;
306 
307  int flag_gapok = 0;
308 
309  int distcorr_min;
310 
311  int Tgappysize = 0;
312  int k_prev = -1;
313 
314  for (k = 0; k < ngap; k++) {
315 
316  // find block for the gap begining
317  for (igapfirstblock = -1; igapfirstblock == -1;) {
318  idtmp = id0gap[k];
319 
320  for (ib = 0; ib < nbloc; ib++) {
321  if (tpltzblocks[ib].n != 0
322  && idtmp % nrow < tpltzblocks[ib].idv + tpltzblocks[ib].n)
323  break;
324  }
325 
326  if (ib < nbloc && tpltzblocks[ib].idv <= idtmp) {
327  igapfirstblock = ib; // the block contained the id0gap
328  flag_igapfirstinside = 1;
329  } else if (ib < nbloc && tpltzblocks[ib].idv > idtmp) {
330  igapfirstblock = ib; // first block after the id0gap
331  flag_igapfirstinside = 0;
332  } else { // ib=nbloc
333  igapfirstblock = -2; // no block defined
334  flag_igapfirstinside = 0;
335  }
336  }
337 
338  // find block for the end of the gap - reverse way
339  for (igaplastblock = -1; igaplastblock == -1;) {
340  idtmp = id0gap[k] + lgap[k] - 1;
341 
342  for (ib = nbloc - 1; ib >= 0; ib--) {
343  if (tpltzblocks[ib].n != 0 && tpltzblocks[ib].idv <= idtmp)
344  break;
345  }
346 
347  if (ib >= 0 && idtmp < tpltzblocks[ib].idv + tpltzblocks[ib].n) {
348  igaplastblock = ib;
349  flag_igaplastinside = 1;
350  } else if (ib >= 0
351  && tpltzblocks[ib].idv + tpltzblocks[ib].n <= idtmp) {
352  igaplastblock = ib;
353  flag_igaplastinside = 0;
354  } else { // ib=-1
355  igaplastblock = -2; // no block defined.
356  flag_igaplastinside = 0;
357  }
358  }
359 
360  if (igapfirstblock == igaplastblock)
361  distcorr_min = tpltzblocks[igapfirstblock].lambda
362  - 1; // update for lambda-1
363  else
364  distcorr_min = 0;
365 
366  // igapfirstblock != -2 && igaplastblock != -2 not really need but it's
367  // a shortcut
368  if (lgap[k] > max(distcorr_min, param_distmin) && igapfirstblock != -2
369  && igaplastblock != -2) {
370 
371  idvtmp_firstblock = max(tpltzblocks[igapfirstblock].idv,
372  id0gap[k_prev] + lgap[k_prev]);
373 
374  // test if the gap is ok for block reduce/split
375  if (igapfirstblock != igaplastblock) {
376 
377  flag_gapok = 1; // reduce the gap in each block. no pb if we add
378  // max() inside the ifs.
379  } else if (id0gap[k] - idvtmp_firstblock
380  >= tpltzblocks[igapfirstblock].lambda
381  && tpltzblocks[igaplastblock].idv
382  + tpltzblocks[igaplastblock].n
383  - (id0gap[k] + lgap[k])
384  >= tpltzblocks[igaplastblock].lambda) {
385 
386  flag_gapok = 1;
387  } else if (igapfirstblock == igaplastblock) {
388 
389  int ngappyleft_tmp = id0gap[k] - idvtmp_firstblock;
390  int leftadd = max(0, tpltzblocks[igapfirstblock].lambda
391  - ngappyleft_tmp);
392  int ngappyright_tmp = tpltzblocks[igaplastblock].idv
393  + tpltzblocks[igaplastblock].n
394  - (id0gap[k] + lgap[k]);
395  int rightadd = max(0, tpltzblocks[igapfirstblock].lambda
396  - ngappyright_tmp);
397  int restgap = lgap[k] - (leftadd + rightadd);
398 
399  // flag_gapok = (restgap>=0);
400  flag_gapok = (restgap >= max(0, param_distmin));
401  } else {
402  flag_gapok = 0;
403  }
404 
405  // create gappy blocks if criteria is fullfill
406  if (flag_gapok == 1) {
407 
408  // copy the begining blocks
409  for (id = igaplastblock_prev + 1; id < igapfirstblock; id++) {
410 
411  tpltzblocks_gappy[nblocks_gappy].T_block =
412  tpltzblocks[id].T_block;
413  tpltzblocks_gappy[nblocks_gappy].lambda =
414  tpltzblocks[id].lambda;
415  tpltzblocks_gappy[nblocks_gappy].n = tpltzblocks[id].n;
416  tpltzblocks_gappy[nblocks_gappy].idv = tpltzblocks[id].idv;
417 
418  nblocks_gappy = nblocks_gappy + 1;
419  }
420 
421  // clear last blockgappy if same block again - outside the "if"
422  // for border cases with n[]==0
423  if (igaplastblock_prev == igapfirstblock && k != 0) {
424  nblocks_gappy = nblocks_gappy - 1;
425  // idvtmp_firstblock = id0gap[k-1]+lgap[k-1]; //always
426  // exist because igaplastblock_prev!=-1
427  // so not first turn - it's replace "idv[igapfirstblock]"
428  }
429 
430  // reduce first block if defined
431  if (flag_igapfirstinside == 1
432  && (id0gap[k] - idvtmp_firstblock)
433  > 0) { // check if inside and not on the border -
434  // meaning n[] not zero
435 
436  tpltzblocks_gappy[nblocks_gappy].T_block =
437  tpltzblocks[igapfirstblock].T_block;
438  tpltzblocks_gappy[nblocks_gappy].lambda =
439  tpltzblocks[id].lambda;
440  tpltzblocks_gappy[nblocks_gappy].n =
441  id0gap[k] - idvtmp_firstblock;
442  tpltzblocks_gappy[nblocks_gappy].n =
443  max(tpltzblocks_gappy[nblocks_gappy].n,
444  tpltzblocks[igapfirstblock].lambda);
445 
446  tpltzblocks_gappy[nblocks_gappy].idv = idvtmp_firstblock;
447  nblocks_gappy = nblocks_gappy + 1;
448  }
449 
450  // reduce last block if defined
451  if (flag_igaplastinside == 1
452  && (tpltzblocks[igaplastblock].idv
453  + tpltzblocks[igaplastblock].n - (id0gap[k] + lgap[k]))
454  > 0) { // check if inside and not on the border -
455  // meaning n[] not zero
456 
457  tpltzblocks_gappy[nblocks_gappy].T_block =
458  tpltzblocks[igaplastblock].T_block;
459  tpltzblocks_gappy[nblocks_gappy].lambda =
460  tpltzblocks[id].lambda;
461  tpltzblocks_gappy[nblocks_gappy].n =
462  tpltzblocks[igaplastblock].idv
463  + tpltzblocks[igaplastblock].n
464  - (id0gap[k] + lgap[k]);
465  int rightadd0 = max(
466  0, tpltzblocks[igapfirstblock].lambda
467  - tpltzblocks_gappy[nblocks_gappy].n);
468 
469  tpltzblocks_gappy[nblocks_gappy].n =
470  max(tpltzblocks_gappy[nblocks_gappy].n,
471  tpltzblocks[igaplastblock].lambda);
472 
473  tpltzblocks_gappy[nblocks_gappy].idv =
474  id0gap[k] + lgap[k] - rightadd0;
475 
476  nblocks_gappy = nblocks_gappy + 1;
477  lambdaShftgappy =
478  lambdaShftgappy + tpltzblocks[igaplastblock].lambda;
479  }
480 
481  igaplastblock_prev = igaplastblock;
482  k_prev = k;
483 
484  } // end if (flag_gapok)
485  } // end if (lgap[k]>param_distmin)
486  } // end gap loop
487 
488  // now continu to copy the rest of the block left
489  for (id = igaplastblock_prev + 1; id < nb_blocks_local; id++) {
490 
491  tpltzblocks_gappy[nblocks_gappy].T_block = tpltzblocks[id].T_block;
492  tpltzblocks_gappy[nblocks_gappy].lambda = tpltzblocks[id].lambda;
493  tpltzblocks_gappy[nblocks_gappy].n = tpltzblocks[id].n;
494  tpltzblocks_gappy[nblocks_gappy].idv = tpltzblocks[id].idv;
495  nblocks_gappy = nblocks_gappy + 1;
496  lambdaShftgappy = lambdaShftgappy + tpltzblocks[id].lambda;
497  }
498 
499  *nb_blocks_gappy_final = nblocks_gappy; // just for output
500 
501  return 0;
502 }
int PRINT_RANK
Definition: toeplitz.c:117
int VERBOSE
Verbose mode.
Definition: toeplitz.c:113
int mpi_stbmm(double **V, int64_t nrow, int m, int m_rowwise, Block *tpltzblocks, int nb_blocks_local, int nb_blocks_all, int64_t idp, int local_V_size, Flag flag_stgy, MPI_Comm comm)
int mpi_gstbmm(double **V, int nrow, int m, int m_rowwise, Block *tpltzblocks, int nb_blocks_local, int nb_blocks_all, int id0p, int local_V_size, int64_t *id0gap, int *lgap, int ngap, Flag flag_stgy, MPI_Comm comm)
int reset_gaps(double **V, int id0, int local_V_size, int m, int nrow, int m_rowwise, const int64_t *id0gap, const int *lgap, int ngap)
Set the data to zeros at the gaps location.
int build_gappy_blocks(int nrow, int m, Block *tpltzblocks, int nb_blocks_local, int nb_blocks_all, const int64_t *id0gap, const int *lgap, int ngap, Block *tpltzblocks_gappy, int *nb_blocks_gappy_final, int flag_param_distmin_fixed)