midapack/toeplitz__nofft_8c_source.html

 #include "toeplitz.h"


 #define max(a, b)                                                              \

     ({                                                                         \

         __typeof__(a) _a = (a);                                                \

         __typeof__(b) _b = (b);                                                \

         _a > _b ? _a : _b;                                                     \

     })


 #define min(a, b)                                                              \

     ({                                                                         \

         __typeof__(a) _a = (a);                                                \

         __typeof__(b) _b = (b);                                                \

         _a < _b ? _a : _b;                                                     \

     })


 extern int PRINT_RANK;


 // r1.1 - Frederic Dauvergne (APC)

 // basic product without fft use.

 // stmm_simple_core is not used by the API. This is similar to stmm_core by

 // using a sliding windows algorithm with differents parameters.


 //=========================================================================


 int stmm_simple_basic(double **V, int n, int m, double *T, int lambda,

                       double **TV) {


     int j_first, j_last;

     int i, j, k, Tid;

     int n_thread;

     int idx;


     int flag_nocomputeedges = 1;

     int offset_edges        = 0;


     int distcorrmin = lambda - 1;


     if (flag_nocomputeedges == 1) offset_edges = distcorrmin;


     for (k = 0; k < m; k++) {


 #pragma omp parallel for shared(k, lambda, n) private(i, j, j_first, j_last,   \

                                                               Tid)

         for (i = 0 + offset_edges; i < n - offset_edges; i++) {


             (*TV)[i + k * n] = 0;

             j_first          = max(i - (lambda - 1), 0);

             j_last           = min(i + lambda, n);


             for (j = j_first; j < j_last; j++) {

                 Tid = abs(j - i);

                 (*TV)[i + k * n] += T[Tid] * (*V)[j + k * n];

             } // End j loop


         }     // End i loop

     }         // End k loop


     return 0;

 }


 //=========================================================================


 int stmm_simple_core(double **V, int n, int m, double *T, int blocksize,

                      int lambda, int nfft, int flag_offset) {


     // routine variable

     int status;

     int i, j, k, p; // loop index

     int currentsize;

     int distcorrmin = lambda - 1;

     int blocksize_eff =

             blocksize

             - 2 * distcorrmin; // just a good part after removing the overlaps

     int nbloc; // a number of subblock of slide/overlap algorithm


     if (flag_offset == 1)

         nbloc = ceil((1.0 * (n - 2 * distcorrmin)) / blocksize_eff);

     else

         nbloc = ceil((1.0 * n) / blocksize_eff);


     double *V_bloc, *TV_bloc;

     V_bloc  = (double *) calloc(blocksize * m, sizeof(double));

     TV_bloc = (double *) calloc(blocksize * m, sizeof(double));

     if ((V_bloc == 0) || (TV_bloc == 0))

         return print_error_message(2, __FILE__, __LINE__);


     int offset = 0;

     if (flag_offset == 1) offset = distcorrmin;


     int iV  = 0;      //"-distcorrmin+offset";  //first index in V

     int iTV = offset; // first index in TV


     //"k=0";

     // first subblock separately as it requires some padding. prepare the block

     // of the data vector with the overlaps on both sides

     currentsize = min(blocksize - distcorrmin + offset, n - iV);

     // note: if flag_offset=0, pad first distcorrmin elements with zeros (for

     // the first subblock only)

     //  and if flag_offset=1 there is no padding with zeros.

     copy_block(n, m, *V, blocksize, m, V_bloc, 0, 0, currentsize, m,

                distcorrmin - offset, 0, 1.0, 0);


     // do block computation

     status = stmm_simple_basic(&V_bloc, blocksize, m, T, lambda, &TV_bloc);


     if (status != 0) {

         printf("Error in stmm_core.");

         return print_error_message(7, __FILE__, __LINE__);

     }


     // now copy first the new chunk of the data matrix **before** overwriting

     // the input due to overlaps !

     iV = blocksize_eff - distcorrmin + offset;


     if (nbloc > 1) {

         currentsize = min(blocksize, n - iV); // not to overshoot


         int flag_reset =

                 (currentsize

                  != blocksize); // with flag_reset=1, always "memset" the block.

         copy_block(n, m, *V, blocksize, m, V_bloc, iV, 0, currentsize, m, 0, 0,

                    1.0, flag_reset);

     }


     // and now store the ouput back in V

     currentsize = min(blocksize_eff, n - iTV); // to trim the extra rows

     copy_block(blocksize, m, TV_bloc, n, m, *V, distcorrmin, 0, currentsize, m,

                iTV, 0, 1.0, 0);


     iTV += blocksize_eff;

     // now continue with all the other subblocks

     for (k = 1; k < nbloc; k++) {


         // do bloc computation

         status = stmm_simple_basic(&V_bloc, blocksize, m, T, lambda, &TV_bloc);

         if (status != 0) break;


         iV += blocksize_eff;

         // copy first the next subblock to process

         if (k != nbloc - 1) {

             currentsize = min(blocksize, n - iV); // not to overshoot


             int flag_resetk =

                     (currentsize != blocksize); // with flag_reset=1, always

                                                 // "memset" the block.

             copy_block(n, m, *V, blocksize, m, V_bloc, iV, 0, currentsize, m, 0,

                        0, 1.0, flag_resetk);

         }


         // and then store the output in V

         currentsize = min(blocksize_eff, n - iTV); // not to overshoot

         copy_block(blocksize, m, TV_bloc, n, m, *V, distcorrmin, 0, currentsize,

                    m, iTV, 0, 1.0, 0);

         iTV += blocksize_eff;


     } // end bloc computation


     free(V_bloc);

     free(TV_bloc);


     return status;

 }

print_error_message
int print_error_message(int error_number, char const *file, int line)
Prints error message corresponding to an error number.
Definition: toeplitz.c:127

copy_block
int copy_block(int ninrow, int nincol, double *Vin, int noutrow, int noutcol, double *Vout, int inrow, int incol, int nblockrow, int nblockcol, int outrow, int outcol, double norm, int set_zero_flag)
Definition: toeplitz.c:514

PRINT_RANK
int PRINT_RANK
Definition: toeplitz.c:117

stmm_simple_core
int stmm_simple_core(double **V, int n, int m, double *T, int blocksize, int lambda, int nfft, int flag_offset)
Definition: toeplitz_nofft.c:156

stmm_simple_basic
int stmm_simple_basic(double **V, int n, int m, double *T, int lambda, double **TV)
Perform the product of a Toeplitz matrix by a matrix without using FFT's.
Definition: toeplitz_nofft.c:104