midapack/csort_8c_source.html

 #include "mapmat.h"

 #include <stdlib.h>

 #include <string.h>

 // int per page size

 #define PAGE 1024


 void insertion_sort(int *indices, int count) {

     int i, j;

     int tmp;

     for (i = 0; i < count - 1; i++) {

         tmp = indices[i + 1];

         j   = i;

         while (j != -1 && tmp < indices[j]) {

             indices[j + 1] = indices[j];

             indices[j]     = tmp;

             j--;

         }

     }

 }


 void quick_sort(int *indices, int left, int right) {

     int pivot;

     int tmp, key;

     int i, j;

     if (left < right) {

         key = indices[left];

         i   = left + 1;

         j   = right;

         while (i <= j) {

             while ((i <= right) && (indices[i] <= key)) i++;

             while ((j > left) && (indices[j] > key)) j--;

             if (i < j) {

                 tmp        = indices[i];

                 indices[i] = indices[j];

                 indices[j] = tmp;

                 i++;

                 j--;

             }

         }

         tmp           = indices[left];

         indices[left] = indices[j];

         indices[j]    = tmp;

         pivot         = j;

         quick_sort(indices, left, pivot - 1);

         quick_sort(indices, pivot + 1, right);

     }

 }


 void bubble_sort(int *indices, int count) {

     int i, j, tmp;

     for (i = (count - 1); i > 0; i--) {

         for (j = 1; j <= i; j++) {

             if (indices[j - 1] > indices[j]) {

                 tmp            = indices[j - 1];

                 indices[j - 1] = indices[j];

                 indices[j]     = tmp;

             }

         }

     }

 }


 int counting_sort(int *indices, int count) {

     int *buf;

     int  i, j, k;

     int  min, max;

     min = indices[0];

     max = indices[0];

     for (i = 1; i < count; i++) {

         if (indices[i] > max) {

             max = indices[i];

         } else {

             if (indices[i] < min) min = indices[i];

         }

     }

     buf = (int *) calloc(max - min + 1, sizeof(int));

     for (i = 0; i < count; i++) { buf[indices[i] - min] = 1; }

     j = 0;

     for (i = 0; i < (max - min + 1); i++) {

         if (buf[i] == 1) {

             indices[j] = min + i;

             j++;

         }

     }

     free(buf);

     return j;

 }


 void shell_sort(int *a, int n) {

     int j, i, k, m, mid;

     for (m = n / 2; m > 0; m /= 2) {

         for (j = m; j < n; j++) {

             for (i = j - m; i >= 0; i -= m) {

                 if (a[i + m] >= a[i]) break;

                 else {

                     mid      = a[i];

                     a[i]     = a[i + m];

                     a[i + m] = mid;

                 }

             }

         }

     }

 }


 int ssort(int *indices, int count, int flag) {

     int  i, n;

     int *ptr_i, *ptr_o;

     switch (flag) {

         case 0:

             quick_sort(indices, 0, count - 1);

             break;

         case 1:

             bubble_sort(indices, count);

             break;

         case 2:

             insertion_sort(indices, count);

             break;

         case 3:

             n = counting_sort(indices, count);

             return n;

         case 4:

             shell_sort(indices, count);

             break;

     }

     ptr_i = indices;

     ptr_o = indices;

     n     = 1;

     for (i = 0; i < count - 1; i++) {

         ptr_i++;

         if (*ptr_i != *ptr_o) {

             ptr_o++;

             n++;

             *ptr_o = *ptr_i;

         }

     }

     return n;

 }


 // optimized version is faster than the other implementation but there is a bug

 // !!!

 #ifdef W_OPENMP

 #include <omp.h>

 int omp_psort_opt(int *A, int nA, int flag) {

     int  i;

     int *count, *disp;

     int  q, r;

     int  p, l;

     int  tid, nths;

     int *buf;

     int *ptr_i, *ptr_o;

     int  n, k, d;


 #pragma omp parallel shared(nths)

     {              //---fork---just to get the number of threads

         nths = omp_get_num_threads();

     }              //---join---


     p = nA / PAGE; // number of full pages

     q = p / nths;  // full pages per thread

     l = p % nths;  // full pages left

     r = nA % PAGE; // number of elements the last page not full


     count = (int *) malloc(nths * sizeof(int));

     disp  = (int *) malloc(nths * sizeof(int));


     for (i = 0; i < nths; i++) {

         count[i] = q * PAGE;

         if (i < l) count[i] += PAGE;

         if (i == l) count[i] += r;

     }


     disp[0] = 0;

     for (i = 0; i < nths - 1; i++) { disp[i + 1] = disp[i] + count[i]; }


 #pragma omp parallel private(tid, n, k, d, buf) shared(nths, A, nA, disp, count)

     { //---fork---1st step, sort on local chunk

         tid = omp_get_thread_num();


         buf = (int *) malloc(nA * sizeof(int));

         // buf = (int *) malloc(count[tid]*sizeof(int));

         memcpy(buf, A + disp[tid], count[tid] * sizeof(int));


         n          = ssort(buf, count[tid], flag);

         count[tid] = n;


         memcpy(A + disp[tid], buf, n * sizeof(int));


         nths = omp_get_num_threads();


         k = nths;

         d = 1;

         while (k > 1) {

 #pragma omp barrier

             if (tid % (2 * d) == 0 && tid + d < nths) {

                 set_or(A + disp[tid], count[tid], A + disp[tid + d],

                        count[tid + d], buf);

                 count[tid] = n;

                 memcpy(A + disp[tid], buf, n * sizeof(int));

                 d *= 2;

                 k /= 2;

             }

         }

         free(buf);

     } //---join---


     nA = count[0];

     //  printf("\nA :");

     //  for(i=0; i<nA; i++){

     //    printf(" %d",A[i]);

     //  }

     free(count);

     free(disp);

     return nA;

 }


 int omp_psort(int *A, int nA, int flag) {

     int  i;

     int *count, *disp;

     int  q, r;

     int  tid, nths;

     int *buf;

     int *ptr_i, *ptr_o;

     int  n, k, d;


 #pragma omp parallel private(tid) shared(nths)

     { //---fork---just to get the number of threads

         nths = omp_get_num_threads();

     } //---join---


     q = nA / nths;

     r = nA % nths;


     count = (int *) malloc(nths * sizeof(int));

     disp  = (int *) malloc(nths * sizeof(int));


     for (i = 0; i < nths; i++) {

         if (i < r) {

             count[i] = q + 1;

         } else {

             count[i] = q;

         }

     }


     disp[0] = 0;

     for (i = 0; i < nths - 1; i++) { disp[i + 1] = disp[i] + count[i]; }


 #pragma omp parallel private(tid, n) shared(A, disp, count)

     { //---fork---1st step, sort on local chunk

         tid        = omp_get_thread_num();

         n          = ssort(A + disp[tid], count[tid], flag);

         count[tid] = n;

     } //---join---


     buf = (int *) malloc(nA * sizeof(int));


 #pragma omp parallel private(tid, n, k, d) shared(nths, nA, A, disp, count, buf)

     { //---fork---2nd step, gathering with a binary tree scheme

         tid  = omp_get_thread_num();

         nths = omp_get_num_threads();

         k    = nths;

         d    = 1;

         while (k > 1) {

             if (tid % (2 * d) == 0 && tid + d < nths) {

                 //        printf("\nd %d, thread %d, count+ %d, disp %d",d ,

                 //        tid, count[tid], disp[tid]);

                 n = card_or(A + disp[tid], count[tid], A + disp[tid + d],

                             count[tid + d]);

                 set_or(A + disp[tid], count[tid], A + disp[tid + d],

                        count[tid + d], buf + disp[tid]);

                 count[tid] = n;

                 memcpy(A + disp[tid], buf + disp[tid], n * sizeof(int));

                 d *= 2;

                 k /= 2;

             }

 #pragma omp barrier

         }

     } //---join---


     nA = count[0];

     free(buf);

     free(count);

     free(disp);

     return nA;

 }

 #endif


 int sorted(int *indices, int count) {

     int i = 0;

     while (i < count - 2) {

         if (indices[i] > indices[i + 1]) {

             return 1;

         } else {

             i++;

         }

     }

     return 0;

 }


 int monotony(int *indices, int count) {

     int i = 0;

     while (i < count - 2) {

         if (indices[i] >= indices[i + 1]) {

             return 1;

         } else {

             i++;

         }

     }

     return 0;

 }

set_or
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
Definition: als.c:138

card_or
int card_or(int *A1, int n1, int *A2, int n2)
Definition: als.c:72

ssort
int ssort(int *indices, int count, int flag)
Definition: csort.c:172

omp_psort
int omp_psort(int *A, int nA, int flag)
Definition: csort.c:302

omp_psort_opt
int omp_psort_opt(int *A, int nA, int flag)
Definition: csort.c:210

counting_sort
int counting_sort(int *indices, int count)
Definition: csort.c:111

sorted
int sorted(int *indices, int count)
Definition: csort.c:376

shell_sort
void shell_sort(int *a, int n)
Definition: csort.c:141

monotony
int monotony(int *indices, int count)
Definition: csort.c:388

insertion_sort
void insertion_sort(int *indices, int count)
Definition: csort.c:32

quick_sort
void quick_sort(int *indices, int left, int right)
Definition: csort.c:55

bubble_sort
void bubble_sort(int *indices, int count)
Definition: csort.c:88