MIDAPACK - MIcrowave Data Analysis PACKage  1.1b
Parallel software tools for high performance CMB DA analysis
csort.c
Go to the documentation of this file.
1 
21 #include "mapmat.h"
22 #include <stdlib.h>
23 #include <string.h>
24 // int per page size
25 #define PAGE 1024
26 
32 void insertion_sort(int *indices, int count) {
33  int i, j;
34  int tmp;
35  for (i = 0; i < count - 1; i++) {
36  tmp = indices[i + 1];
37  j = i;
38  while (j != -1 && tmp < indices[j]) {
39  indices[j + 1] = indices[j];
40  indices[j] = tmp;
41  j--;
42  }
43  }
44 }
45 
55 void quick_sort(int *indices, int left, int right) {
56  int pivot;
57  int tmp, key;
58  int i, j;
59  if (left < right) {
60  key = indices[left];
61  i = left + 1;
62  j = right;
63  while (i <= j) {
64  while ((i <= right) && (indices[i] <= key)) i++;
65  while ((j > left) && (indices[j] > key)) j--;
66  if (i < j) {
67  tmp = indices[i];
68  indices[i] = indices[j];
69  indices[j] = tmp;
70  i++;
71  j--;
72  }
73  }
74  tmp = indices[left];
75  indices[left] = indices[j];
76  indices[j] = tmp;
77  pivot = j;
78  quick_sort(indices, left, pivot - 1);
79  quick_sort(indices, pivot + 1, right);
80  }
81 }
82 
88 void bubble_sort(int *indices, int count) {
89  int i, j, tmp;
90  for (i = (count - 1); i > 0; i--) {
91  for (j = 1; j <= i; j++) {
92  if (indices[j - 1] > indices[j]) {
93  tmp = indices[j - 1];
94  indices[j - 1] = indices[j];
95  indices[j] = tmp;
96  }
97  }
98  }
99 }
100 
111 int counting_sort(int *indices, int count) {
112  int *buf;
113  int i, j, k;
114  int min, max;
115  min = indices[0];
116  max = indices[0];
117  for (i = 1; i < count; i++) {
118  if (indices[i] > max) {
119  max = indices[i];
120  } else {
121  if (indices[i] < min) min = indices[i];
122  }
123  }
124  buf = (int *) calloc(max - min + 1, sizeof(int));
125  for (i = 0; i < count; i++) { buf[indices[i] - min] = 1; }
126  j = 0;
127  for (i = 0; i < (max - min + 1); i++) {
128  if (buf[i] == 1) {
129  indices[j] = min + i;
130  j++;
131  }
132  }
133  free(buf);
134  return j;
135 }
136 
141 void shell_sort(int *a, int n) {
142  int j, i, k, m, mid;
143  for (m = n / 2; m > 0; m /= 2) {
144  for (j = m; j < n; j++) {
145  for (i = j - m; i >= 0; i -= m) {
146  if (a[i + m] >= a[i]) break;
147  else {
148  mid = a[i];
149  a[i] = a[i + m];
150  a[i + m] = mid;
151  }
152  }
153  }
154  }
155 }
156 
157 
172 int ssort(int *indices, int count, int flag) {
173  int i, n;
174  int *ptr_i, *ptr_o;
175  switch (flag) {
176  case 0:
177  quick_sort(indices, 0, count - 1);
178  break;
179  case 1:
180  bubble_sort(indices, count);
181  break;
182  case 2:
183  insertion_sort(indices, count);
184  break;
185  case 3:
186  n = counting_sort(indices, count);
187  return n;
188  case 4:
189  shell_sort(indices, count);
190  break;
191  }
192  ptr_i = indices;
193  ptr_o = indices;
194  n = 1;
195  for (i = 0; i < count - 1; i++) {
196  ptr_i++;
197  if (*ptr_i != *ptr_o) {
198  ptr_o++;
199  n++;
200  *ptr_o = *ptr_i;
201  }
202  }
203  return n;
204 }
205 
206 // optimized version is faster than the other implementation but there is a bug
207 // !!!
208 #ifdef W_OPENMP
209 #include <omp.h>
210 int omp_psort_opt(int *A, int nA, int flag) {
211  int i;
212  int *count, *disp;
213  int q, r;
214  int p, l;
215  int tid, nths;
216  int *buf;
217  int *ptr_i, *ptr_o;
218  int n, k, d;
219 
220 #pragma omp parallel shared(nths)
221  { //---fork---just to get the number of threads
222  nths = omp_get_num_threads();
223  } //---join---
224 
225  p = nA / PAGE; // number of full pages
226  q = p / nths; // full pages per thread
227  l = p % nths; // full pages left
228  r = nA % PAGE; // number of elements the last page not full
229 
230  count = (int *) malloc(nths * sizeof(int));
231  disp = (int *) malloc(nths * sizeof(int));
232 
233  for (i = 0; i < nths; i++) {
234  count[i] = q * PAGE;
235  if (i < l) count[i] += PAGE;
236  if (i == l) count[i] += r;
237  }
238 
239  disp[0] = 0;
240  for (i = 0; i < nths - 1; i++) { disp[i + 1] = disp[i] + count[i]; }
241 
242 #pragma omp parallel private(tid, n, k, d, buf) shared(nths, A, nA, disp, count)
243  { //---fork---1st step, sort on local chunk
244  tid = omp_get_thread_num();
245 
246  buf = (int *) malloc(nA * sizeof(int));
247  // buf = (int *) malloc(count[tid]*sizeof(int));
248  memcpy(buf, A + disp[tid], count[tid] * sizeof(int));
249 
250  n = ssort(buf, count[tid], flag);
251  count[tid] = n;
252 
253  memcpy(A + disp[tid], buf, n * sizeof(int));
254 
255  nths = omp_get_num_threads();
256 
257 
258  k = nths;
259  d = 1;
260  while (k > 1) {
261 #pragma omp barrier
262  if (tid % (2 * d) == 0 && tid + d < nths) {
263  set_or(A + disp[tid], count[tid], A + disp[tid + d],
264  count[tid + d], buf);
265  count[tid] = n;
266  memcpy(A + disp[tid], buf, n * sizeof(int));
267  d *= 2;
268  k /= 2;
269  }
270  }
271  free(buf);
272  } //---join---
273 
274  nA = count[0];
275  // printf("\nA :");
276  // for(i=0; i<nA; i++){
277  // printf(" %d",A[i]);
278  // }
279  free(count);
280  free(disp);
281  return nA;
282 }
283 
302 int omp_psort(int *A, int nA, int flag) {
303  int i;
304  int *count, *disp;
305  int q, r;
306  int tid, nths;
307  int *buf;
308  int *ptr_i, *ptr_o;
309  int n, k, d;
310 
311 #pragma omp parallel private(tid) shared(nths)
312  { //---fork---just to get the number of threads
313  nths = omp_get_num_threads();
314  } //---join---
315 
316  q = nA / nths;
317  r = nA % nths;
318 
319  count = (int *) malloc(nths * sizeof(int));
320  disp = (int *) malloc(nths * sizeof(int));
321 
322  for (i = 0; i < nths; i++) {
323  if (i < r) {
324  count[i] = q + 1;
325  } else {
326  count[i] = q;
327  }
328  }
329 
330  disp[0] = 0;
331  for (i = 0; i < nths - 1; i++) { disp[i + 1] = disp[i] + count[i]; }
332 
333 #pragma omp parallel private(tid, n) shared(A, disp, count)
334  { //---fork---1st step, sort on local chunk
335  tid = omp_get_thread_num();
336  n = ssort(A + disp[tid], count[tid], flag);
337  count[tid] = n;
338  } //---join---
339 
340  buf = (int *) malloc(nA * sizeof(int));
341 
342 #pragma omp parallel private(tid, n, k, d) shared(nths, nA, A, disp, count, buf)
343  { //---fork---2nd step, gathering with a binary tree scheme
344  tid = omp_get_thread_num();
345  nths = omp_get_num_threads();
346  k = nths;
347  d = 1;
348  while (k > 1) {
349  if (tid % (2 * d) == 0 && tid + d < nths) {
350  // printf("\nd %d, thread %d, count+ %d, disp %d",d ,
351  // tid, count[tid], disp[tid]);
352  n = card_or(A + disp[tid], count[tid], A + disp[tid + d],
353  count[tid + d]);
354  set_or(A + disp[tid], count[tid], A + disp[tid + d],
355  count[tid + d], buf + disp[tid]);
356  count[tid] = n;
357  memcpy(A + disp[tid], buf + disp[tid], n * sizeof(int));
358  d *= 2;
359  k /= 2;
360  }
361 #pragma omp barrier
362  }
363  } //---join---
364 
365  nA = count[0];
366  free(buf);
367  free(count);
368  free(disp);
369  return nA;
370 }
371 #endif
372 
373 
376 int sorted(int *indices, int count) {
377  int i = 0;
378  while (i < count - 2) {
379  if (indices[i] > indices[i + 1]) {
380  return 1;
381  } else {
382  i++;
383  }
384  }
385  return 0;
386 }
387 
388 int monotony(int *indices, int count) {
389  int i = 0;
390  while (i < count - 2) {
391  if (indices[i] >= indices[i + 1]) {
392  return 1;
393  } else {
394  i++;
395  }
396  }
397  return 0;
398 }
int set_or(int *A1, int n1, int *A2, int n2, int *A1orA2)
Definition: als.c:138
int card_or(int *A1, int n1, int *A2, int n2)
Definition: als.c:72
int ssort(int *indices, int count, int flag)
Definition: csort.c:172
int omp_psort(int *A, int nA, int flag)
Definition: csort.c:302
int omp_psort_opt(int *A, int nA, int flag)
Definition: csort.c:210
int counting_sort(int *indices, int count)
Definition: csort.c:111
int sorted(int *indices, int count)
Definition: csort.c:376
void shell_sort(int *a, int n)
Definition: csort.c:141
int monotony(int *indices, int count)
Definition: csort.c:388
void insertion_sort(int *indices, int count)
Definition: csort.c:32
void quick_sort(int *indices, int left, int right)
Definition: csort.c:55
void bubble_sort(int *indices, int count)
Definition: csort.c:88