sort

sort algorithms for any type of array
git clone https://noulin.net/git/sort.git
Log | Files | Refs | README | LICENSE

main.c (28748B)


      1 #! /usr/bin/env sheepy
      2 /* or direct path to sheepy: #! /usr/local/bin/sheepy */
      3 
      4 /* Libsheepy documentation: http://spartatek.se/libsheepy/ */
      5 #include "libsheepyObject.h"
      6 #include "sort.h"
      7 
      8 int argc; char **argv;
      9 
     10 
     11 /**
     12  * the advanced examples are in the trials function
     13  * (called from main)
     14  *
     15  * - float, double array
     16  * - pointer array
     17  * - struct array
     18  * - i32, i64, u64 keys
     19  */
     20 
     21 
     22 /* enable/disable logging */
     23 /* #undef pLog */
     24 /* #define pLog(...) */
     25 
     26 /**
     27  * tests
     28  * these functions have limited input
     29  */
     30 
     31 void myswap(int *a, int *b)
     32 {
     33     int temp = *a;
     34     *a = *b;
     35     *b = temp;
     36 }
     37 
     38 void flashSort_i32ArrayM(int *arr, size_t size, size_t m) {
     39   // Phase 1
     40   int min = arr[0];
     41   int max = arr[0];
     42   rangeFrom(i, 1, size) {
     43     min = MIN(arr[i], min);
     44     max = MAX(arr[i], max);
     45   }
     46 
     47   /* logVarG(min); */
     48   /* logVarG(max); */
     49 
     50   if (min == max)
     51     ret;
     52 
     53   // Divide to m class
     54   float c;
     55   if (max > 0 and min < 0)
     56     c = (float)(m - 1) / ((u32)max + (u32)(-min));
     57   else
     58     c = (float)(m - 1) / (max - min);
     59   /* logVarG(m); */
     60   /* logVarG((double)c); */
     61 
     62   size_t *L  = callocArray(L, m + 1);
     63   range(i, size) {
     64     size_t k;
     65     /* #define flashsortSubKMacro(num)\ */
     66     /*   if (num > 0 and min < 0) {\ */
     67     /*     k = c * ((u32)(num) + (u32)(-min)) + 1;\ */
     68     /*     #<{(|logVarG(num);\ */
     69     /*     logVarG(min);\ */
     70     /*     logVarG((u32)(num) + (u32)(-min));\ */
     71     /*     logVarG(k);|)}>#\ */
     72     /*   }\ */
     73     /*   else {\ */
     74     /*     k = c * ((num) - min) + 1;\ */
     75     /*     #<{(|logVarG(num);\ */
     76     /*     logVarG(min);\ */
     77     /*     logVarG((num) -min);\ */
     78     /*     logVarG(k);|)}>#\ */
     79     /*   } */
     80     flashsortSubKMacro(arr[i]);
     81     // L[k]: the number of class K
     82     L[k]++;
     83   }
     84   rangeFrom(k, 2, m+1) {
     85     // L[k]: upper bound of class K
     86     L[k] += L[k - 1];
     87   }
     88 
     89   // Phase 2
     90   // number of move
     91   size_t move = 0;
     92   size_t j    = 0;
     93   size_t k    = m;
     94   // Only move size - 1 step
     95   while (move < size - 1)
     96   {
     97     // arr[j] is in right class
     98     while (j >= L[k])
     99     {
    100       j++;
    101       flashsortSubKMacro(arr[j]);
    102     }
    103     // temp to hold the value
    104     int flash = arr[j];
    105     // stop when arr[j] is changed
    106     while (j < L[k])
    107     {
    108       flashsortSubKMacro(flash);
    109       myswap(&flash, &arr[--L[k]]);
    110       move++;
    111     }
    112   }
    113 
    114   // Phase 3
    115   // Use Insertion sort for every class
    116   // Don't sort the m class,
    117   // Because the m class is all max
    118   rangeFrom(k, 1, m) {
    119     rangeFrom(i, L[k] + 1, L[k + 1]) {
    120       int key = arr[i];
    121       i64 j   = i - 1;
    122       while (j >= 0 and arr[j] > key)
    123       {
    124         arr[j + 1] = arr[j];
    125         j--;
    126       }
    127       arr[j + 1] = key;
    128     }
    129   }
    130   free(L);
    131 }
    132 
    133 typ int (*flashSortGetFt)(const void *elemPtr);
    134 
    135 void flashSort_i32AM(void *arr, size_t size, size_t m, size_t elemSize, flashSortGetFt flashSortGet) {
    136   // Phase 1
    137   #define elemAddr(idx) (arr + (elemSize * (idx)))
    138   #define eGet(idx) flashSortGet(elemAddr(idx))
    139   int min = eGet(0);
    140   int max = eGet(0);
    141   rangeFrom(i, 1, size) {
    142     min = MIN(eGet(i), min);
    143     max = MAX(eGet(i), max);
    144   }
    145 
    146   if (min == max)
    147     ret;
    148 
    149   // Divide to m class
    150   float c;
    151   if (max > 0 and min < 0)
    152     c = (float)(m - 1) / ((u32)max + (u32)(-min));
    153   else
    154     c = (float)(m - 1) / (max - min);
    155 
    156   // stack allocated has no effect on speed
    157   size_t *L  = callocArray(L, m + 1);
    158   range(i, size) {
    159     size_t k;
    160     flashsortSubKMacro(eGet(i));
    161     // L[k]: the number of class K
    162     L[k]++;
    163   }
    164   rangeFrom(k, 2, m+1) {
    165     // L[k]: upper bound of class K
    166     L[k] += L[k - 1];
    167   }
    168 
    169   // Phase 2
    170   #define cpe(dst, src) memcpy(dst, src, elemSize);
    171   // number of move
    172   size_t move = 0;
    173   size_t j    = 0;
    174   size_t k    = m;
    175   // Only move size - 1 step
    176   while (move < size - 1)
    177   {
    178     // eGet(j) is in right class
    179     while (j >= L[k])
    180     {
    181       j++;
    182       flashsortSubKMacro(eGet(j));
    183     }
    184     // temp to hold the value
    185     int  flash = eGet(j);
    186     char flashElem[elemSize];
    187     cpe(flashElem, elemAddr(j));
    188 
    189     // stop when eGet(j) is changed
    190     while (j < L[k])
    191     {
    192       flashsortSubKMacro(flash);
    193       char tmpElem[elemSize];
    194       cpe(tmpElem, flashElem);
    195       cpe(flashElem, elemAddr(--L[k]));
    196       cpe(elemAddr(L[k]), tmpElem);
    197       flash = flashSortGet(flashElem);
    198       move++;
    199     }
    200   }
    201 
    202   // Phase 3
    203   // Use Insertion sort for every class
    204   // Don't sort the m class,
    205   // Because the m class is all max
    206   rangeFrom(k, 1, m) {
    207     rangeFrom(i, L[k] + 1, L[k + 1]) {
    208       int key = eGet(i);
    209       char keyElem[elemSize];
    210       cpe(keyElem, elemAddr(i));
    211 
    212       i64 j   = i - 1;
    213       while (j >= 0 and eGet(j) > key)
    214       {
    215         cpe(elemAddr(j+1), elemAddr(j));
    216         j--;
    217       }
    218       cpe(elemAddr(j+1), keyElem);
    219     }
    220   }
    221   free(L);
    222 }
    223 
    224 void flashSort_i32Array(int *arr, size_t size) {
    225   flashSort_i32ArrayM(arr, size, 0.1 * size);
    226 }
    227 
    228 void flashSort_i32A(void *arr, size_t size, size_t elemSize, flashSortGetFt flashSortGet) {
    229   flashSort_i32ArrayM(arr, size, 0.1 * size);
    230 }
    231 
    232 
    233 
    234 //radix bsort
    235 
    236 //#define RADIX_LEVEL 3
    237 #define RADIX_LEVEL 40
    238 
    239 internal inline void shellsort(u32 *arr, size_t size) {
    240   u32 tmp; // tmp record
    241 
    242   rangeFrom(i, 3, size) {
    243     tmp = arr[i];
    244     size_t j;
    245     for(j = i ; j >= 3 && arr[j-3] > tmp; j-=3) {
    246       arr[j] = arr[j-3];
    247     }
    248     arr[j] = tmp;
    249   }
    250 
    251   rangeFrom(i, 1, size) {
    252     tmp = arr[i];
    253     size_t j;
    254     for(j = i ; j >= 1 && arr[j-1] > tmp; j--) {
    255       arr[j] = arr[j-1];
    256     }
    257     arr[j] = tmp;
    258   }
    259 }
    260 
    261 void radixyOrig(u32 *arr, size_t size, size_t digit, size_t stackSize,  size_t cutoff) {
    262   #define stop 256
    263   u8 *a = (u8*) arr;
    264   size_t counts[stop];
    265   size_t offsets[stop];
    266   size_t starts[stop];
    267   size_t ends[stop];
    268   size_t digt       = sizeof(u32) - digit - 1;
    269 
    270   zeroBuf(counts, sizeof(counts));
    271 
    272   // compute counts
    273   range(x, size) {
    274     u8 c = a[x * sizeof(u32) + digt];
    275     counts[c]++;
    276   }
    277 
    278   // compute offsets
    279   offsets[0] = 0;
    280   starts[0]  = 0;
    281   rangeFrom(x, 1, stop) {
    282     offsets[x] = starts[x] = ends[x-1] = offsets[x-1] + counts[x-1];
    283   }
    284   ends[stop-1] = size;
    285 
    286 
    287   // loop on digit values
    288   range(x, stop) {
    289     // move records according to digit value
    290     while(offsets[x] < ends[x]) {
    291       u8 target = a[offsets[x] * sizeof(u32) + digt];
    292       if (target == x) {
    293         // this record already placed in the correct bucket
    294         offsets[x]++;
    295       }
    296       else {
    297         size_t stackIdx = 0;
    298         size_t stack[stackSize];
    299         stack[stackIdx++] = offsets[x];
    300         while(target != x && stackIdx < stackSize) {
    301           stack[stackIdx] = offsets[target]++;
    302           target          = a[stack[stackIdx] * sizeof(u32) + digt];
    303           stackIdx++;
    304         }
    305         if (stackIdx != stackSize) {
    306           // found an element to store a offsets[x]
    307           offsets[x]++;
    308         }
    309         stackIdx--;
    310         u32 tmp = arr[stack[stackIdx]];
    311         while(stackIdx) {
    312           arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
    313           stackIdx--;
    314         }
    315         arr[stack[0]] = tmp;
    316       }
    317     }
    318   }
    319 
    320   // cutoff to shellsort
    321   if (digit < cutoff) {
    322     // loop on digit values to sort the buckets
    323     range(x, stop) {
    324       if (counts[x] > RADIX_LEVEL) {
    325         radixyOrig(&arr[starts[x]], counts[x], digit+1, stackSize, cutoff);
    326       }
    327       else {
    328         if (counts[x] < 2) continue;
    329         shellsort(&arr[starts[x]], counts[x]);
    330       }
    331     }
    332   }
    333   else {
    334     range(x, stop) {
    335       if (counts[x] > 1) {
    336         shellsort(&arr[starts[x]], counts[x]);
    337       }
    338     }
    339   }
    340   #undef stop
    341 }
    342 
    343 void radixy(u32 *arr, size_t size, size_t digit, size_t stackSize,  size_t cutoff);
    344 
    345 typ struct {u32 *arr; size_t size; size_t digit; size_t stackSize;  size_t cutoff;} radixyArgst;
    346 
    347 typ struct {u32 *arr; size_t size;} shellsortArgst;
    348 
    349 void radixyThread(void *args) {
    350   cast(radixyArgst*, p, args);
    351   radixy(p->arr, p->size, p->digit, p->stackSize, p->cutoff);
    352 }
    353 
    354 void shellsortThread(void *args) {
    355   cast(shellsortArgst *, p, args);
    356   shellsort(p->arr, p->size);
    357 }
    358 
    359 // parallel radix sort for u32 array
    360 void radixy(u32 *arr, size_t size, size_t digit, size_t stackSize,  size_t cutoff) {
    361   #define stop 256
    362   u8 *a = (u8*) arr;
    363   size_t counts[stop];
    364   size_t offsets[stop];
    365   size_t starts[stop];
    366   size_t ends[stop];
    367   size_t digt       = sizeof(u32) - digit - 1;
    368   radixyArgst rdxArgs[stop];
    369   size_t rdxArgsIdx = 0;
    370 
    371   zeroBuf(counts, sizeof(counts));
    372 
    373   // compute counts
    374   range(x, size) {
    375     u8 c = a[x * sizeof(u32) + digt];
    376     counts[c]++;
    377   }
    378 
    379   // compute offsets
    380   offsets[0] = 0;
    381   starts[0]  = 0;
    382   rangeFrom(x, 1, stop) {
    383     offsets[x] = starts[x] = ends[x-1] = offsets[x-1] + counts[x-1];
    384   }
    385   ends[stop-1] = size;
    386 
    387 
    388   // loop on digit values
    389   range(x, stop) {
    390     // move records according to digit value
    391     while(offsets[x] < ends[x]) {
    392       u8 target = a[offsets[x] * sizeof(u32) + digt];
    393       if (target == x) {
    394         // this record already placed in the correct bucket
    395         offsets[x]++;
    396       }
    397       else {
    398         size_t stackIdx = 0;
    399         size_t stack[stackSize];
    400         stack[stackIdx++] = offsets[x];
    401         while(target != x && stackIdx < stackSize) {
    402           stack[stackIdx] = offsets[target]++;
    403           target          = a[stack[stackIdx] * sizeof(u32) + digt];
    404           stackIdx++;
    405         }
    406         if (stackIdx != stackSize) {
    407           // found an element to store a offsets[x]
    408           offsets[x]++;
    409         }
    410         stackIdx--;
    411         u32 tmp = arr[stack[stackIdx]];
    412         while(stackIdx) {
    413           arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
    414           stackIdx--;
    415         }
    416         arr[stack[0]] = tmp;
    417       }
    418     }
    419   }
    420 
    421   // cutoff to shellsort
    422   if (digit < cutoff) {
    423     // loop on digit values to sort the buckets
    424     range(x, stop) {
    425       if (counts[x] > RADIX_LEVEL) {
    426         if (!digit) {
    427           rdxArgs[rdxArgsIdx] = (radixyArgst){.arr=&arr[starts[x]], .size=counts[x], .digit=digit+1, .stackSize=stackSize, .cutoff=cutoff};
    428           tpoolAdd(radixyThread, (void*)&rdxArgs[rdxArgsIdx]);
    429           rdxArgsIdx++;
    430         }
    431         else
    432           radixy(&arr[starts[x]], counts[x], digit+1, stackSize, cutoff);
    433       }
    434       else {
    435         if (counts[x] < 2) continue;
    436         /* ssArgs[ssArgsIdx] = (shellsortArgst){.arr=&arr[starts[x]], .size=counts[x]}; */
    437         /* tpoolAdd(shellsortThread, (void*)&ssArgs[ssArgsIdx]); */
    438         /* ssArgsIdx++; */
    439         shellsort(&arr[starts[x]], counts[x]);
    440       }
    441     }
    442   }
    443   else {
    444     range(x, stop) {
    445       if (counts[x] > 1) {
    446         shellsort(&arr[starts[x]], counts[x]);
    447       }
    448     }
    449   }
    450   #undef stop
    451 
    452   if (!digit) {
    453     tpoolWait;
    454   }
    455 }
    456 
    457 void radixyMacro(u32 *arr, size_t size, size_t digit, size_t stackSize,  size_t cutoff) {
    458   #define stop 256
    459   u32 *ar[sizeof(u32)];
    460   size_t sizes[sizeof(u32)];
    461   u8 *a[sizeof(u32)];
    462   size_t counts[sizeof(u32)][stop];
    463   size_t offsets[sizeof(u32)][stop];
    464   size_t starts[sizeof(u32)][stop];
    465   size_t ends[sizeof(u32)][stop];
    466   jmp_buf jumpBuffers[sizeof(u32)];
    467 
    468   zeroBuf(counts, sizeof(counts));
    469 
    470   ar[0]    = arr;
    471   sizes[0] = size;
    472 
    473   radixStart:;
    474   a[0]              = (u8*) ar[0];
    475   size_t digt       = sizeof(u32) - digit - 1;
    476   // compute counts
    477   range(x, sizes[0]) {
    478     u8 c = a[0][x * sizeof(u32) + digt];
    479     counts[0][c]++;
    480   }
    481 
    482   // compute offsets
    483   offsets[0][0] = 0;
    484   starts[0][0]  = 0;
    485   rangeFrom(x, 1, stop) {
    486     offsets[0][x] = starts[0][x] = ends[0][x-1] = offsets[0][x-1] + counts[0][x-1];
    487   }
    488   ends[0][stop-1] = sizes[0];
    489 
    490 
    491   // loop on digit values
    492   range(x, stop) {
    493     // move records according to digit value
    494     while(offsets[0][x] < ends[0][x]) {
    495       u8 target = a[0][offsets[0][x] * sizeof(u32) + digt];
    496       if (target == x) {
    497         // this record already placed in the correct bucket
    498         offsets[0][x]++;
    499       }
    500       else {
    501         size_t stackIdx = 0;
    502         size_t stack[stackSize];
    503         stack[stackIdx++] = offsets[0][x];
    504         while(target != x && stackIdx < stackSize) {
    505           stack[stackIdx] = offsets[0][target]++;
    506           target          = a[0][stack[stackIdx] * sizeof(u32) + digt];
    507           stackIdx++;
    508         }
    509         if (stackIdx != stackSize) {
    510           // found an element to store a offsets[0][x]
    511           offsets[0][x]++;
    512         }
    513         stackIdx--;
    514         u32 tmp = ar[0][stack[stackIdx]];
    515         while(stackIdx) {
    516           ar[0][stack[stackIdx]] = ar[0][stack[stackIdx-1]];
    517           stackIdx--;
    518         }
    519         ar[0][stack[0]] = tmp;
    520       }
    521     }
    522   }
    523 
    524   // cutoff to shellsort
    525   if (digit < cutoff) {
    526     // loop on digit values to sort the buckets
    527     range(x, stop) {
    528       if (counts[0][x] > RADIX_LEVEL) {
    529         radixy(&ar[0][starts[0][x]], counts[0][x], digit+1, stackSize, cutoff);
    530       }
    531       else {
    532         if (counts[0][x] < 2) continue;
    533         shellsort(&ar[0][starts[0][x]], counts[0][x]);
    534       }
    535     }
    536   }
    537   else {
    538     range(x, stop) {
    539       if (counts[0][x] > 1) {
    540         shellsort(&ar[0][starts[0][x]], counts[0][x]);
    541       }
    542     }
    543   }
    544   #undef stop
    545 }
    546 
    547 
    548 // radixy from strings
    549 internal inline void shellsortS(char **arr, size_t size) {
    550   char *tmp; // tmp record
    551 
    552   rangeFrom(i, 3, size) {
    553     tmp = arr[i];
    554     size_t j;
    555     for(j = i ; j >= 3 && strcmp(arr[j-3],tmp) > 0; j-=3) {
    556       arr[j] = arr[j-3];
    557     }
    558     arr[j] = tmp;
    559   }
    560 
    561   rangeFrom(i, 1, size) {
    562     tmp = arr[i];
    563     size_t j;
    564     for(j = i ; j >= 1 && strcmp(arr[j-1],tmp) > 0; j--) {
    565       arr[j] = arr[j-1];
    566     }
    567     arr[j] = tmp;
    568   }
    569 }
    570 
    571 /**
    572  * radixyS doesn't accept empty strings
    573  * it segfaults when there is an empty string in the array
    574  */
    575 void radixySOrig(char **arr, size_t size, size_t digit, u8 charStart, u16 charStop, size_t stackSize,  size_t cutoff) {
    576   size_t counts[charStop+1];
    577   size_t offsets[charStop+1];
    578   size_t starts[charStop+1];
    579   size_t ends[charStop+1];
    580   size_t digt       = digit;
    581 
    582   zeroBuf(counts, sizeof(counts));
    583 
    584   // compute counts
    585   range(x, size) {
    586     u8 c = *(arr[x]+digt);
    587     counts[c]++;
    588   }
    589 
    590   // compute offsets
    591   offsets[0] = 0;
    592   starts[0]  = 0;
    593   offsets[charStart] = starts[charStart] = ends[0] = offsets[0] + counts[0];
    594   rangeFrom(x, charStart+1, charStop+1) {
    595     offsets[x] = starts[x] = ends[x-1] = offsets[x-1] + counts[x-1];
    596   }
    597   ends[charStop] = size;
    598 
    599   // sort 0 end of string
    600   // move records according to digit value
    601   while(offsets[0] < ends[0]) {
    602     u8 target = *(arr[offsets[0]] + digt);
    603     if (target == 0) {
    604       // this record already placed in the correct bucket
    605       offsets[0]++;
    606     }
    607     else {
    608       size_t stackIdx = 0;
    609       size_t stack[stackSize];
    610       stack[stackIdx++] = offsets[0];
    611       while(target != 0 && stackIdx < stackSize) {
    612         stack[stackIdx] = offsets[target]++;
    613         target          = *(arr[stack[stackIdx]] + digt);
    614         stackIdx++;
    615       }
    616       if (stackIdx != stackSize) {
    617         // found an element to store a offsets[0]
    618         offsets[0]++;
    619       }
    620       stackIdx--;
    621       char *tmp = arr[stack[stackIdx]];
    622       while(stackIdx) {
    623         arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
    624         stackIdx--;
    625       }
    626       arr[stack[0]] = tmp;
    627     }
    628   }
    629   // loop on digit values
    630   rangeFrom(x, charStart, charStop+1) {
    631     // move records according to digit value
    632     while(offsets[x] < ends[x]) {
    633       u8 target = *(arr[offsets[x]] + digt);
    634       if (target == x) {
    635         // this record already placed in the correct bucket
    636         offsets[x]++;
    637       }
    638       else {
    639         size_t stackIdx = 0;
    640         size_t stack[stackSize];
    641         stack[stackIdx++] = offsets[x];
    642         while(target != x && stackIdx < stackSize) {
    643           stack[stackIdx] = offsets[target]++;
    644           target          = *(arr[stack[stackIdx]] + digt);
    645           stackIdx++;
    646         }
    647         if (stackIdx != stackSize) {
    648           // found an element to store a offsets[x]
    649           offsets[x]++;
    650         }
    651         stackIdx--;
    652         char *tmp = arr[stack[stackIdx]];
    653         while(stackIdx) {
    654           arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
    655           stackIdx--;
    656         }
    657         arr[stack[0]] = tmp;
    658       }
    659     }
    660   }
    661 
    662   // cutoff to shellsort
    663   if (digit < cutoff) {
    664     // sort 0 end of string
    665     if (counts[0] > RADIX_LEVEL) {
    666       radixySOrig(&arr[starts[0]], counts[0], digit+1, charStart, charStop, stackSize, cutoff);
    667     }
    668     else {
    669       if (counts[0] > 1) shellsortS(&arr[starts[0]], counts[0]);
    670     }
    671     // loop on digit values to sort the buckets
    672     rangeFrom(x, charStart, charStop+1) {
    673       if (counts[x] > RADIX_LEVEL) {
    674         radixySOrig(&arr[starts[x]], counts[x], digit+1, charStart, charStop, stackSize, cutoff);
    675       }
    676       else {
    677         if (counts[x] < 2) continue;
    678         shellsortS(&arr[starts[x]], counts[x]);
    679       }
    680     }
    681   }
    682   else {
    683     if (counts[0] > 1) {
    684       shellsortS(&arr[starts[0]], counts[0]);
    685     }
    686     rangeFrom(x, charStart, charStop+1) {
    687       if (counts[x] > 1) {
    688         shellsortS(&arr[starts[x]], counts[x]);
    689       }
    690     }
    691   }
    692 }
    693 
    694 void radixyS(char **arr, size_t size, size_t digit, u8 charStart, u16 charStop, size_t stackSize,  size_t cutoff);
    695 
    696 typ struct {char **arr; size_t size; size_t digit; u8 charStart; u16 charStop; size_t stackSize;  size_t cutoff;} radixySArgst;
    697 
    698 void radixySThread(void *args) {
    699   cast(radixySArgst*, p, args);
    700   radixyS(p->arr, p->size, p->digit, p->charStart, p->charStop, p->stackSize, p->cutoff);
    701 }
    702 
    703 // parallel radix sort for string (char*) arrays
    704 void radixyS(char **arr, size_t size, size_t digit, u8 charStart, u16 charStop, size_t stackSize,  size_t cutoff) {
    705   size_t counts[charStop+1];
    706   size_t offsets[charStop+1];
    707   size_t starts[charStop+1];
    708   size_t ends[charStop+1];
    709   size_t digt       = digit;
    710   radixySArgst rdxArgs[charStop+1];
    711   size_t rdxArgsIdx = 0;
    712 
    713   zeroBuf(counts, sizeof(counts));
    714 
    715   // compute counts
    716   range(x, size) {
    717     u8 c = *(arr[x]+digt);
    718     counts[c]++;
    719   }
    720 
    721   // compute offsets
    722   offsets[0] = 0;
    723   starts[0]  = 0;
    724   offsets[charStart] = starts[charStart] = ends[0] = offsets[0] + counts[0];
    725   rangeFrom(x, charStart+1, charStop+1) {
    726     offsets[x] = starts[x] = ends[x-1] = offsets[x-1] + counts[x-1];
    727   }
    728   ends[charStop] = size;
    729 
    730   // sort 0 end of string
    731   // move records according to digit value
    732   while(offsets[0] < ends[0]) {
    733     u8 target = *(arr[offsets[0]] + digt);
    734     if (target == 0) {
    735       // this record already placed in the correct bucket
    736       offsets[0]++;
    737     }
    738     else {
    739       size_t stackIdx = 0;
    740       size_t stack[stackSize];
    741       stack[stackIdx++] = offsets[0];
    742       while(target != 0 && stackIdx < stackSize) {
    743         stack[stackIdx] = offsets[target]++;
    744         target          = *(arr[stack[stackIdx]] + digt);
    745         stackIdx++;
    746       }
    747       if (stackIdx != stackSize) {
    748         // found an element to store a offsets[0]
    749         offsets[0]++;
    750       }
    751       stackIdx--;
    752       char *tmp = arr[stack[stackIdx]];
    753       while(stackIdx) {
    754         arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
    755         stackIdx--;
    756       }
    757       arr[stack[0]] = tmp;
    758     }
    759   }
    760   // loop on digit values
    761   rangeFrom(x, charStart, charStop+1) {
    762     // move records according to digit value
    763     while(offsets[x] < ends[x]) {
    764       u8 target = *(arr[offsets[x]] + digt);
    765       if (target == x) {
    766         // this record already placed in the correct bucket
    767         offsets[x]++;
    768       }
    769       else {
    770         size_t stackIdx = 0;
    771         size_t stack[stackSize];
    772         stack[stackIdx++] = offsets[x];
    773         while(target != x && stackIdx < stackSize) {
    774           stack[stackIdx] = offsets[target]++;
    775           target          = *(arr[stack[stackIdx]] + digt);
    776           stackIdx++;
    777         }
    778         if (stackIdx != stackSize) {
    779           // found an element to store a offsets[x]
    780           offsets[x]++;
    781         }
    782         stackIdx--;
    783         char *tmp = arr[stack[stackIdx]];
    784         while(stackIdx) {
    785           arr[stack[stackIdx]] = arr[stack[stackIdx-1]];
    786           stackIdx--;
    787         }
    788         arr[stack[0]] = tmp;
    789       }
    790     }
    791   }
    792 
    793   // cutoff to shellsort
    794   if (digit < cutoff) {
    795     // sort 0 end of string
    796     if (counts[0] > RADIX_LEVEL) {
    797       if (!digit) {
    798         rdxArgs[rdxArgsIdx] = (radixySArgst){.arr=&arr[starts[0]], .size=counts[0], .digit=digit+1, .charStart=charStart, .charStop=charStop, .stackSize=stackSize, .cutoff=cutoff};
    799         tpoolAdd(radixySThread, (void*)&rdxArgs[rdxArgsIdx]);
    800         rdxArgsIdx++;
    801       }
    802       else
    803         radixyS(&arr[starts[0]], counts[0], digit+1, charStart, charStop, stackSize, cutoff);
    804     }
    805     else {
    806       if (counts[0] > 1) shellsortS(&arr[starts[0]], counts[0]);
    807     }
    808     // loop on digit values to sort the buckets
    809     rangeFrom(x, charStart, charStop+1) {
    810       if (counts[x] > RADIX_LEVEL) {
    811         if (!digit) {
    812           rdxArgs[rdxArgsIdx] = (radixySArgst){.arr=&arr[starts[x]], .size=counts[x], .digit=digit+1, .charStart=charStart, .charStop=charStop, .stackSize=stackSize, .cutoff=cutoff};
    813           tpoolAdd(radixySThread, (void*)&rdxArgs[rdxArgsIdx]);
    814           rdxArgsIdx++;
    815         }
    816         else
    817           radixyS(&arr[starts[x]], counts[x], digit+1, charStart, charStop, stackSize, cutoff);
    818       }
    819       else {
    820         if (counts[x] < 2) continue;
    821         shellsortS(&arr[starts[x]], counts[x]);
    822       }
    823     }
    824   }
    825   else {
    826     if (counts[0] > 1) {
    827       shellsortS(&arr[starts[0]], counts[0]);
    828     }
    829     rangeFrom(x, charStart, charStop+1) {
    830       if (counts[x] > 1) {
    831         shellsortS(&arr[starts[x]], counts[x]);
    832       }
    833     }
    834   }
    835 
    836   if (!digit) {
    837     tpoolWait;
    838   }
    839 }
    840 
    841 
    842 // u32 array
    843 #define radixsortAt(ptr) *(ptr)
    844 radixyUintDef(/*scope*/,radi/*name*/,u32/*elemType*/,6/*stackSize*/,4/*cutoff*/,40/*RADIX_LEVEL*/);
    845 #undef radixsortAt
    846 //#define radi
    847 
    848 
    849 // u64 array
    850 typ struct {u64 a; int b;} rdxElemt;
    851 #define radixsortAt(ptr) (ptr)->a
    852 radixyUintThreadDef(/*scope*/,radi64/*name*/,rdxElemt/*elemType*/,6/*stackSize*/,4/*cutoff*/,40/*RADIX_LEVEL*/);
    853 #undef radixsortAt
    854 
    855 // array with string keys
    856 typ struct {char *a; int b;} skElemt;
    857 
    858 #define radixsortAt(ptr) (ptr)->a
    859 radixyStringThreadDef(/*scope*/,radiS/*name*/,skElemt/*elemType*/,6/*stackSize*/,4/*cutoff*/, 40/*RADIX_LEVEL*/);
    860 #undef radixsortAt
    861 
    862 
    863 // end radix sort
    864 
    865 void trials(void) {
    866 
    867   typ struct {i32 a; int b;} elemt;
    868 
    869   //#define sz 600000
    870   #define sz 60
    871   //goto rnd;
    872 
    873   // radix sort struct array
    874   rdxElemt radA[sz];
    875   skElemt radS[sz];
    876 
    877 
    878 
    879   srandom(10);
    880   range(i, ARRAY_SIZE(radA)) {
    881     radA[i].a = (u64)random() << 27;
    882     radA[i].b = i;
    883     radS[i].a = randomS(random()%15+1);
    884     radS[i].b = i;
    885     /* printf("(%lu,%d) ", radA[i].a, radA[i].b); */
    886     printf("(%s,%d) ", radS[i].a, radS[i].b);
    887   }
    888   put;put;
    889 
    890   radi64(radA, sz);
    891   //radiS(radS, sz);
    892   if (!radiSSafe(radS, sz)) {
    893     logE("Empty strings are not allowed");
    894   }
    895 
    896   //XSUCCESS;
    897 
    898   range(i, ARRAY_SIZE(radA)) {
    899     /* printf("(%lu,%d) ", radA[i].a, radA[i].b); */
    900     printf("(%s,%d) ", radS[i].a, radS[i].b);
    901   }
    902   put;
    903   XSUCCESS;
    904 
    905   // --------------------------------
    906   // float key array
    907   //
    908   // intel skylake cpu: it takes less time sorting float array than sorting int array
    909 
    910   float  fa[sz];
    911   double da[sz];
    912 
    913   srandom(10);
    914   range(i, ARRAY_SIZE(fa)) {
    915     fa[i] = (float)((random() - 0x4fffffff) * 2) / 10000000000;
    916     da[i] = (double)((random() - 0x4fffffff) * 2) / 10000000000;
    917     /* printf("%f ", fa[i]); */
    918     /* printf("%f ", da[i]); */
    919   }
    920 
    921   stopwatchStart;
    922 
    923   #define flashsortGet(ptr) (*(ptr))
    924   flashsortM(fa, ARRAY_SIZE(fa), 0.2 * ARRAY_SIZE(fa));
    925   /* flashsortM(da, ARRAY_SIZE(da), 0.2 * ARRAY_SIZE(da)); */
    926   #undef flashsortGet
    927 
    928   stopwatchLogUs;
    929 
    930   //XSUCCESS;
    931 
    932   range(i, ARRAY_SIZE(fa)) {
    933       printf("%f ", fa[i]);
    934       /* printf("%f ", da[i]); */
    935   }
    936   put;
    937 
    938   XSUCCESS;
    939 
    940   // --------------------------------
    941   // all int keys
    942 
    943   elemt arr[sz];
    944 
    945   range(i, ARRAY_SIZE(arr)) {
    946     arr[i].b = i;
    947   }
    948 
    949   srandom(10);
    950   range(i, ARRAY_SIZE(arr)) {
    951     //arr[i].a = ((int)randomWordFromHW());
    952     //arr[i].a = absV((int)randomWordFromHW()) % (ARRAY_SIZE(arr) *2);
    953     //arr[i].a = random() % (ARRAY_SIZE(arr) *2);
    954     //arr[i].a = random();
    955     arr[i].a = (random() - 0x4fffffff) * 2;
    956     //printf("%d ", arr[i]);
    957     //printf("%lu ", arr[i]);
    958     //printf("%d %d\n", arr[i].a, arr[i].b);
    959   }
    960 
    961 
    962   goto arrr;
    963 
    964 
    965 
    966   // pointer array (move pointer to struct, to be used when the elements have a lot of data)
    967 
    968   elemt *parr[sz];
    969 
    970   range(i, ARRAY_SIZE(arr)) {
    971     parr[i] = &arr[i];
    972   }
    973 
    974   stopwatchStart;
    975 
    976   #define flashsortGet(ptr) (*(ptr))->a
    977   flashsortM(parr, ARRAY_SIZE(parr), 0.1 * ARRAY_SIZE(parr));
    978   #undef flashsortGet
    979 
    980   stopwatchLogUs;
    981 
    982   /* range(i, ARRAY_SIZE(arr)) { */
    983   /*     printf("%d %d\n", parr[i]->a,parr[i]->b); */
    984   /* } */
    985   /* put; */
    986 
    987   XSUCCESS;
    988 
    989 
    990 
    991 
    992 
    993   // struct int array (move data)
    994 
    995 
    996 arrr:;
    997 
    998   /* int get(const void *e) { */
    999   /*   ret ((elemt*)e)->a; */
   1000   /* } */
   1001 
   1002   stopwatchStart;
   1003 
   1004   //flashSort_i32AM(arr, ARRAY_SIZE(arr), 0.14 * ARRAY_SIZE(arr), sizeof(elemt), get);
   1005   #define flashsortGet(ptr) (ptr)->a
   1006   flashsortM(arr, ARRAY_SIZE(arr), 0.2 * ARRAY_SIZE(arr));
   1007   //flashsort(arr, ARRAY_SIZE(arr));
   1008   #undef flashsortGet
   1009 
   1010   int cmpArr(const void *a, const void *b) {
   1011     ret CMP(((elemt*)a)->a, ((elemt*)b)->a);
   1012     //wrong overflow - ret ( ((elemt*)a)->a - ((elemt*)b)->a );
   1013   }
   1014 
   1015   //qsort(arr, ARRAY_SIZE(arr), sizeof(elemt), cmpArr);
   1016 
   1017   stopwatchLogUs;
   1018 
   1019   range(i, ARRAY_SIZE(arr)) {
   1020       printf("%d ", arr[i].a);
   1021       /* printf("%lu ", arr[i].a); */
   1022       /* printf("%d %d\n", arr[i].a, arr[i].b); */
   1023   }
   1024   put;
   1025 
   1026   XSUCCESS;
   1027 
   1028 
   1029 
   1030   // int array
   1031 
   1032 rnd:;
   1033   int rnd[sz];
   1034   u32 urnd[sz];
   1035   i64 rnd64[sz];
   1036   u64 urnd64[sz];
   1037   char *list[sz+1];
   1038   list[sz] = NULL;
   1039   char **lst = list;
   1040 
   1041   srandom(10);
   1042   range(i, ARRAY_SIZE(rnd)) {
   1043     //rnd[i] = ((int)randomWordFromHW());
   1044     //rnd[i] = absV((int)randomWordFromHW()) % (ARRAY_SIZE(rnd) *2);
   1045     //rnd[i] = random() % (ARRAY_SIZE(rnd) *2);
   1046     rnd[i]    = (random() - 0x4fffffff) * 2;
   1047     //urnd[i]   = (random() * 2);
   1048     urnd[i]   = random();
   1049     // string representing a number
   1050     /* sprintf((char*)&urnd[i], "%d", ARRAY_SIZE(rnd)-i-1); */
   1051     /* sprintf((char*)&urnd[i], "%c", '0'+ARRAY_SIZE(rnd)-i-1); */
   1052     rnd64[i]  = random() << 33 + random();
   1053     urnd64[i] = random() << 33 + random();
   1054     /* list[i]   = intToS(random()); */
   1055     list[i]   = randomS(random()%15+1);
   1056     //printf("%d ", rnd[i]);
   1057     //printf("%u ", urnd[i]);
   1058     //printf("%08x ", urnd[i]);
   1059     //printf("%ld ", rnd64[i]);
   1060   }
   1061 
   1062   /* logVarG(lst); */
   1063   put;put
   1064 
   1065   stopwatchStart;
   1066 
   1067   /* range(i, 100000) { */
   1068   /*   flashSort_i32Array(rnd, ARRAY_SIZE(rnd)); */
   1069   /* } */
   1070 
   1071   //flashSort_i32Array(rnd, ARRAY_SIZE(rnd));
   1072   //flashSort_i32ArrayM(rnd, ARRAY_SIZE(rnd), 0.2 * ARRAY_SIZE(rnd));
   1073   #define flashsortGet(ptr) (*(ptr))
   1074   //flashsortM(rnd, ARRAY_SIZE(rnd), 0.2 * ARRAY_SIZE(rnd));
   1075   //flashsortM(urnd, ARRAY_SIZE(urnd), 0.2 * ARRAY_SIZE(urnd));
   1076   //flashsortM(rnd64, ARRAY_SIZE(rnd64), 0.2 * ARRAY_SIZE(rnd64));
   1077   //flashsortM(urnd64, ARRAY_SIZE(urnd64), 0.2 * ARRAY_SIZE(urnd64));
   1078   //flashsort(rnd, ARRAY_SIZE(rnd));
   1079   //int *A = rnd;
   1080   //flashsort(A, ARRAY_SIZE(rnd));
   1081 
   1082   //RadixSort(urnd, sz);
   1083   //radixSort(urnd, sz);
   1084   //radixy(urnd, sz, 0 /*digit*/, 6 /*stackSize*/, 4 /*cutoff*/);
   1085   radi(urnd, sz);
   1086   //radixyS(lst, sz, 0 /*digit*/, 44 /*start*/, 'z' /*stop*/, 5 /*stackSize*/, 4 /*cutoff*/);
   1087   //radixyStrTODO(urnd, sz, 0 /* digit */);
   1088   //sortG(&lst);
   1089 
   1090   int cmp(const void *a, const void *b) {
   1091     ret CMP(*(int*)a, *(int*)b);
   1092     //ret ( *(int*)a - *(int*)b );
   1093   }
   1094   //qsort(rnd, ARRAY_SIZE(rnd), sizeof(int), cmp);
   1095   int ucmp(const void *a, const void *b) {
   1096     ret CMP(*(u32*)a, *(u32*)b);
   1097     //ret ( *(u32*)a - *(u32*)b );
   1098   }
   1099   //qsort(urnd, ARRAY_SIZE(urnd), sizeof(int), ucmp);
   1100 
   1101   stopwatchLogUs;
   1102 
   1103   // check
   1104   range(i, sz-1) {
   1105     if (urnd[i+1] < urnd[i]) {
   1106       logVarG(i);
   1107       logVarG(urnd[i]);
   1108       logVarG(urnd[i+1]);
   1109       logE("wrong order");
   1110       /* XFAILURE; */
   1111     }
   1112   }
   1113 
   1114   /* // check */
   1115   /* range(i, sz-1) { */
   1116   /*   if (strcmp(lst[i+1], lst[i]) < 0) { */
   1117   /*     logVarG(i); */
   1118   /*     logVarG(lst[i]); */
   1119   /*     logVarG(lst[i+1]); */
   1120   /*     logE("wrong order"); */
   1121   /*   } */
   1122   /* } */
   1123   //XSUCCESS;
   1124 
   1125   range(i, ARRAY_SIZE(rnd)) {
   1126       /* printf("%d ", rnd[i]); */
   1127       /* printf("%ld ", rnd64[i]); */
   1128       printf("%u ", urnd[i]);
   1129       /* printf("%08x ", urnd[i]); */
   1130       /* printf("%s ", &urnd[i]); */
   1131       /* printf("%lu ", urnd64[i]); */
   1132   }
   1133   put;
   1134 
   1135   /* logVarG(lst); */
   1136   // TODO free lst strings
   1137 
   1138   /* typeof(convertToUnsignedType(rnd[0])) a = 1; */
   1139   /* typeof(convertToUnsignedType(rnd[0])) b = -1; */
   1140   /* toUnsignedType((rnd[0])) a = 1; */
   1141   /* toUnsignedType((rnd[0])) b = -1; */
   1142   /* typeof((rnd[0])) a = 1; */
   1143   /* typeof((rnd[0])) b = -1; */
   1144   /* logVarG(MAX(a,b)); */
   1145 }
   1146 
   1147 int main(int ARGC, char** ARGV) {
   1148 
   1149   argc = ARGC; argv = ARGV;
   1150 
   1151   initLibsheepy(ARGV[0]);
   1152   setLogMode(LOG_FUNC);
   1153   setLogSymbols(LOG_UTF8);
   1154   setStackLimit(-1);
   1155 
   1156   trials();
   1157 
   1158 }
   1159 // vim: set expandtab ts=2 sw=2: