sort

sort algorithms for any type of array
git clone https://noulin.net/git/sort.git
Log | Files | Refs | README | LICENSE

commit da2c7d659930bdd5972109eec9d9d307876a3992
parent e92efe666aaa983d2b0b860d3e185a86dd8f2788
Author: Remy Noulin <loader2x@gmail.com>
Date:   Sun, 13 Jan 2019 18:27:59 +0100

add flashsort

README.md   |  59 +++++++-
flashsort.h | 191 +++++++++++++++++++++++++
main.c      | 455 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
package.yml |  32 +++++
sort.h      |   3 +
5 files changed, 738 insertions(+), 2 deletions(-)

Diffstat:
MREADME.md | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Aflashsort.h | 191+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amain.c | 455+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apackage.yml | 32++++++++++++++++++++++++++++++++
Asort.h | 3+++
5 files changed, 738 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md @@ -1,2 +1,57 @@ -# sort -sort algorithms for any type of array +# Sheepy +This is a sheepy package for [sheepy](https://github.com/RemyNoulin/sheepy) and using [libsheepy](https://github.com/RemyNoulin/libsheepy) + +# Sort package + +Sorting algorithms for any type of array: + +- flashsort: sorts numbers (not strings) and uses extra memory + +## Usage + +Install with spm: `spm install sort` + +Include header file: +- `#include "shpPackages/sort/sort.h"` + +Usage examples are on the top of the headers and in `main.c`. + +## flashsort + +`flashsort.h` is a type safe flashsort implementation. + +Flashsort is in the classification sort category (no string compare) and sorts numbers (int, uint, float, ...). + +Struct and pointer elements in the input array are supported + + +The performance depends on how uniform is the distribution, the more uniform the better (high variance). + +Measuring the performance with the random arrays in `main.c`, flashsort is 50% faster than qsort from glibc. + + +An array of size m * sizeof(size_t) is allocated the heap and then freed. + + +Example: + +```c +// declare an array +typ struct { + i32 key; + i32 value; + } elemt; + +elemt array[1000]; + +// define flashsortGet +#define flashsortGet(ptr) (ptr)->key + +// call flashsort +flashsort(array, ARRAY_SIZE(array)); + +// or call flashsortM to be able to set the number of classes +// flashsortM(array, ARRAY_SIZE(array), 0.43 * ARRAY_SIZE(array)); + +#undef flashsortGet +``` diff --git a/flashsort.h b/flashsort.h @@ -0,0 +1,191 @@ +#pragma once + +/** + * \file + * + * type safe flashsort implementation + * + * flashsort is in the classification sort category (no string compare) and sorts numbers (int, uint, float, ...). + * + * struct and pointer elements in the input array are supported + * + * + * the performance depends on how uniform is the distribution, + * the more uniform the better (high variance). + * + * adjust the m parameter (number of classes) to change the performance/memory usage ratio. + * + * when the variance is high, a m/size ratio of 0.1 gives good performance and low memory usage. + * + * an array of size m * sizeof(size_t) is allocated the heap and then freed + * + * + * Example: + * + * // declare an array + * typ struct { + * i32 key; + * i32 value; + * } elemt; + * + * elemt array[1000]; + * + * // define flashsortGet + * #define flashsortGet(ptr) (ptr)->key + * + * // call flashsort + * flashsort(array, ARRAY_SIZE(array)); + * + * // or call flashsortM to be able to set the number of classes + * // flashsortM(array, ARRAY_SIZE(array), 0.43 * ARRAY_SIZE(array)); + * + * #undef flashsortGet + */ + + +/** + * flashsort with predefined m/size ratio for high variance distributions + * + * the flashsortGet macro or function has to be defined before calling flashsortM + * flashsortGet takes an address in parameter and returns a key + * + * \param + * arr array to sort + * \param + * size number of elements in the array + */ +#define flashsort(arr, size) flashsortM(arr, size, size / 16 /* m classes */) + + + + + +// ******** +// internal helper macros +#define flashsortSubKMacro(num)\ + if (num > 0 and min < 0)\ + k = c * ((toUnsignedType(num))(num) + (toUnsignedType(num))(-min)) + 1;\ + else\ + k = c * ((num) - min) + 1 +// end internal helper macros + + +// ******** +/** + * flashsort with predefined m/size ratio for high variance distributions + * + * the flashsortGet macro or function has to be defined before calling flashsortM + * flashsortGet takes an address in parameter and returns a key + * + * \param + * arr array to sort, this parameter is evaluated multiple times, + * it cannot be a value returned by a function that changes during the flashsort execution + * \param + * size number of elements in the array + * \param + * m number of classes + */ +#define flashsortM(arr, asize, mc) do {\ + size_t UNIQVAR(size) = asize;\ + size_t UNIQVAR(m) = mc;\ + if (UNIQVAR(m) < 2) {\ + logE("number of class m is too low: %zu", UNIQVAR(m));\ + break;\ + }\ + /* Phase 1*/\ + var min = flashsortGet(&arr[0]);\ + var max = flashsortGet(&arr[0]);\ + /* added { to isolate variable definition in range */\ + {rangeFrom(i, 0, UNIQVAR(size)) {\ + min = MIN(flashsortGet(&arr[i]), min);\ + max = MAX(flashsortGet(&arr[i]), max);\ + /*logVarG(i);\ + logI("v %u %d", flashsortGet(&arr[i]), flashsortGet(&arr[i)]);\ + logI("min %u max %u", min, max);\ + logVarG(flashsortGet(&arr[i)]);\ + logVarG(arr[i]);\ + logVarG(flashsortGet(&arr[i]));*/\ + }}\ + \ + /*logVarG(min);\ + logVarG(max);*/\ + \ + if (min == max)\ + break;\ + \ + /* Divide to UNIQVAR(m) class */\ + /* c could be float instead of double, but the performance is better with double (intel skylake cpu) */\ + double c;\ + /* avoid overflow */\ + if (max > 0 and min < 0)\ + c = (double)(UNIQVAR(m) - 1) / ((toUnsignedType(max))max + (toUnsignedType(min))(-min));\ + else\ + c = (double)(UNIQVAR(m) - 1) / (max - min);\ + /*logVarG(UNIQVAR(m));\ + logVarG((double)c);*/\ + \ + size_t *L = callocArray(L, UNIQVAR(m) + 1);\ + /* added { to isolate variable definition in range */\ + {range(i, UNIQVAR(size)) {\ + size_t k;\ + flashsortSubKMacro(flashsortGet(&arr[i]));\ + /* L[k]: the number of class K */\ + L[k]++;\ + }}\ + /* added { to isolate variable definition in range */\ + {rangeFrom(k, 2, UNIQVAR(m)+1) {\ + /* L[k]: upper bound of class K */\ + L[k] += L[k - 1];\ + }}\ + \ + /* Phase 2 */\ + /* number of move */\ + size_t move = 0;\ + size_t j = 0;\ + size_t k = UNIQVAR(m);\ + /* Only move UNIQVAR(size) - 1 step */\ + while (move < UNIQVAR(size) - 1)\ + {\ + /* flashsortGet(&arr[j]) is in right class */\ + while (j >= L[k])\ + {\ + j++;\ + flashsortSubKMacro(flashsortGet(&arr[j]));\ + }\ + /* temp to hold the value */\ + var flash = flashsortGet(&arr[j]);\ + var flashEl = arr[j];\ + /* stop when flashsortGet(&arr[j]) is changed */\ + while (j < L[k])\ + {\ + flashsortSubKMacro(flash);\ + /* swap */\ + var tmpEl = flashEl;\ + flashEl = arr[--L[k]];\ + arr[L[k]] = tmpEl;\ + flash = flashsortGet(&flashEl);\ + move++;\ + }\ + }\ + \ + /* Phase 3 + Use Insertion sort for every class + Don't sort the UNIQVAR(m) class, + Because the UNIQVAR(m) class is all max */\ + rangeFrom(k, 1, UNIQVAR(m)) {\ + for (size_t i = L[k] + 1; i < L[k + 1]; i++) {\ + var key = flashsortGet(&arr[i]);\ + var keyEl = arr[i];\ + i64 j = i - 1;\ + while (j >= 0 and flashsortGet(&arr[j]) > key)\ + {\ + arr[j+1] = arr[j];\ + j--;\ + }\ + arr[j+1] = keyEl;\ + }\ + }\ + free(L);\ + }while(0) + +// vim: set expandtab ts=2 sw=2: diff --git a/main.c b/main.c @@ -0,0 +1,455 @@ +#! /usr/bin/env sheepy +/* or direct path to sheepy: #! /usr/local/bin/sheepy */ + +/* Libsheepy documentation: http://spartatek.se/libsheepy/ */ +#include "libsheepyObject.h" +#include "sort.h" + +int argc; char **argv; + + +/** + * the advanced examples are in the trials function + * (called from main) + * + * - float, double array + * - pointer array + * - struct array + * - i32, i64, u64 keys + */ + + +/* enable/disable logging */ +/* #undef pLog */ +/* #define pLog(...) */ + +/** + * tests + * these functions have limited input + */ + +void myswap(int *a, int *b) +{ + int temp = *a; + *a = *b; + *b = temp; +} + +void flashSort_i32ArrayM(int *arr, size_t size, size_t m) { + // Phase 1 + int min = arr[0]; + int max = arr[0]; + rangeFrom(i, 1, size) { + min = MIN(arr[i], min); + max = MAX(arr[i], max); + } + + /* logVarG(min); */ + /* logVarG(max); */ + + if (min == max) + ret; + + // Divide to m class + float c; + if (max > 0 and min < 0) + c = (float)(m - 1) / ((u32)max + (u32)(-min)); + else + c = (float)(m - 1) / (max - min); + /* logVarG(m); */ + /* logVarG((double)c); */ + + size_t *L = callocArray(L, m + 1); + range(i, size) { + size_t k; + /* #define flashsortSubKMacro(num)\ */ + /* if (num > 0 and min < 0) {\ */ + /* k = c * ((u32)(num) + (u32)(-min)) + 1;\ */ + /* #<{(|logVarG(num);\ */ + /* logVarG(min);\ */ + /* logVarG((u32)(num) + (u32)(-min));\ */ + /* logVarG(k);|)}>#\ */ + /* }\ */ + /* else {\ */ + /* k = c * ((num) - min) + 1;\ */ + /* #<{(|logVarG(num);\ */ + /* logVarG(min);\ */ + /* logVarG((num) -min);\ */ + /* logVarG(k);|)}>#\ */ + /* } */ + flashsortSubKMacro(arr[i]); + // L[k]: the number of class K + L[k]++; + } + rangeFrom(k, 2, m+1) { + // L[k]: upper bound of class K + L[k] += L[k - 1]; + } + + // Phase 2 + // number of move + size_t move = 0; + size_t j = 0; + size_t k = m; + // Only move size - 1 step + while (move < size - 1) + { + // arr[j] is in right class + while (j >= L[k]) + { + j++; + flashsortSubKMacro(arr[j]); + } + // temp to hold the value + int flash = arr[j]; + // stop when arr[j] is changed + while (j < L[k]) + { + flashsortSubKMacro(flash); + myswap(&flash, &arr[--L[k]]); + move++; + } + } + + // Phase 3 + // Use Insertion sort for every class + // Don't sort the m class, + // Because the m class is all max + rangeFrom(k, 1, m) { + rangeFrom(i, L[k] + 1, L[k + 1]) { + int key = arr[i]; + i64 j = i - 1; + while (j >= 0 and arr[j] > key) + { + arr[j + 1] = arr[j]; + j--; + } + arr[j + 1] = key; + } + } + free(L); +} + +typ int (*flashSortGetFt)(const void *elemPtr); + +void flashSort_i32AM(void *arr, size_t size, size_t m, size_t elemSize, flashSortGetFt flashSortGet) { + // Phase 1 + #define elemAddr(idx) (arr + (elemSize * (idx))) + #define eGet(idx) flashSortGet(elemAddr(idx)) + int min = eGet(0); + int max = eGet(0); + rangeFrom(i, 1, size) { + min = MIN(eGet(i), min); + max = MAX(eGet(i), max); + } + + if (min == max) + ret; + + // Divide to m class + float c; + if (max > 0 and min < 0) + c = (float)(m - 1) / ((u32)max + (u32)(-min)); + else + c = (float)(m - 1) / (max - min); + + // stack allocated has no effect on speed + size_t *L = callocArray(L, m + 1); + range(i, size) { + size_t k; + flashsortSubKMacro(eGet(i)); + // L[k]: the number of class K + L[k]++; + } + rangeFrom(k, 2, m+1) { + // L[k]: upper bound of class K + L[k] += L[k - 1]; + } + + // Phase 2 + #define cpe(dst, src) memcpy(dst, src, elemSize); + // number of move + size_t move = 0; + size_t j = 0; + size_t k = m; + // Only move size - 1 step + while (move < size - 1) + { + // eGet(j) is in right class + while (j >= L[k]) + { + j++; + flashsortSubKMacro(eGet(j)); + } + // temp to hold the value + int flash = eGet(j); + char flashElem[elemSize]; + cpe(flashElem, elemAddr(j)); + + // stop when eGet(j) is changed + while (j < L[k]) + { + flashsortSubKMacro(flash); + char tmpElem[elemSize]; + cpe(tmpElem, flashElem); + cpe(flashElem, elemAddr(--L[k])); + cpe(elemAddr(L[k]), tmpElem); + flash = flashSortGet(flashElem); + move++; + } + } + + // Phase 3 + // Use Insertion sort for every class + // Don't sort the m class, + // Because the m class is all max + rangeFrom(k, 1, m) { + rangeFrom(i, L[k] + 1, L[k + 1]) { + int key = eGet(i); + char keyElem[elemSize]; + cpe(keyElem, elemAddr(i)); + + i64 j = i - 1; + while (j >= 0 and eGet(j) > key) + { + cpe(elemAddr(j+1), elemAddr(j)); + j--; + } + cpe(elemAddr(j+1), keyElem); + } + } + free(L); +} + +void flashSort_i32Array(int *arr, size_t size) { + flashSort_i32ArrayM(arr, size, 0.1 * size); +} + +void flashSort_i32A(void *arr, size_t size, size_t elemSize, flashSortGetFt flashSortGet) { + flashSort_i32ArrayM(arr, size, 0.1 * size); +} + + + + + + +void trials(void) { + + typ struct {i32 a; int b;} elemt; + + //#define sz 600000 + #define sz 20 + //goto rnd; + + // -------------------------------- + // float key array + // + // intel skylake cpu: it takes less time sorting float array than sorting int array + + float fa[sz]; + double da[sz]; + + srandom(10); + range(i, ARRAY_SIZE(fa)) { + fa[i] = (float)((random() - 0x4fffffff) * 2) / 10000000000; + da[i] = (double)((random() - 0x4fffffff) * 2) / 10000000000; + /* printf("%f ", fa[i]); */ + /* printf("%f ", da[i]); */ + } + + stopwatchStart; + + #define flashsortGet(ptr) (*(ptr)) + flashsortM(fa, ARRAY_SIZE(fa), 0.2 * ARRAY_SIZE(fa)); + /* flashsortM(da, ARRAY_SIZE(da), 0.2 * ARRAY_SIZE(da)); */ + #undef flashsortGet + + stopwatchLogUs; + + //XSUCCESS; + + range(i, ARRAY_SIZE(fa)) { + printf("%f ", fa[i]); + /* printf("%f ", da[i]); */ + } + put; + + XSUCCESS; + + // -------------------------------- + // all int keys + + elemt arr[sz]; + + range(i, ARRAY_SIZE(arr)) { + arr[i].b = i; + } + + srandom(10); + range(i, ARRAY_SIZE(arr)) { + //arr[i].a = ((int)randomWordFromHW()); + //arr[i].a = absV((int)randomWordFromHW()) % (ARRAY_SIZE(arr) *2); + //arr[i].a = random() % (ARRAY_SIZE(arr) *2); + //arr[i].a = random(); + arr[i].a = (random() - 0x4fffffff) * 2; + //printf("%d ", arr[i]); + //printf("%lu ", arr[i]); + //printf("%d %d\n", arr[i].a, arr[i].b); + } + + + goto arrr; + + + + // pointer array (move pointer to struct, to be used when the elements have a lot of data) + + elemt *parr[sz]; + + range(i, ARRAY_SIZE(arr)) { + parr[i] = &arr[i]; + } + + stopwatchStart; + + #define flashsortGet(ptr) (*(ptr))->a + flashsortM(parr, ARRAY_SIZE(parr), 0.1 * ARRAY_SIZE(parr)); + #undef flashsortGet + + stopwatchLogUs; + + /* range(i, ARRAY_SIZE(arr)) { */ + /* printf("%d %d\n", parr[i]->a,parr[i]->b); */ + /* } */ + /* put; */ + + XSUCCESS; + + + + + + // struct int array (move data) + + +arrr:; + + /* int get(const void *e) { */ + /* ret ((elemt*)e)->a; */ + /* } */ + + stopwatchStart; + + //flashSort_i32AM(arr, ARRAY_SIZE(arr), 0.14 * ARRAY_SIZE(arr), sizeof(elemt), get); + #define flashsortGet(ptr) (ptr)->a + flashsortM(arr, ARRAY_SIZE(arr), 0.2 * ARRAY_SIZE(arr)); + //flashsort(arr, ARRAY_SIZE(arr)); + #undef flashsortGet + + int cmpArr(const void *a, const void *b) { + ret CMP(((elemt*)a)->a, ((elemt*)b)->a); + //wrong overflow - ret ( ((elemt*)a)->a - ((elemt*)b)->a ); + } + + //qsort(arr, ARRAY_SIZE(arr), sizeof(elemt), cmpArr); + + stopwatchLogUs; + + range(i, ARRAY_SIZE(arr)) { + printf("%d ", arr[i].a); + /* printf("%lu ", arr[i].a); */ + /* printf("%d %d\n", arr[i].a, arr[i].b); */ + } + put; + + XSUCCESS; + + + + // int array + +rnd:; + int rnd[sz]; + u32 urnd[sz]; + i64 rnd64[sz]; + u64 urnd64[sz]; + + srandom(10); + range(i, ARRAY_SIZE(rnd)) { + //rnd[i] = ((int)randomWordFromHW()); + //rnd[i] = absV((int)randomWordFromHW()) % (ARRAY_SIZE(rnd) *2); + //rnd[i] = random() % (ARRAY_SIZE(rnd) *2); + rnd[i] = (random() - 0x4fffffff) * 2; + urnd[i] = random() * 2; + rnd64[i] = random() << 33 + random(); + urnd64[i] = random() << 33 + random(); + //printf("%d ", rnd[i]); + //printf("%u ", urnd[i]); + //printf("%ld ", rnd64[i]); + } + + put;put + + stopwatchStart; + + /* range(i, 100000) { */ + /* flashSort_i32Array(rnd, ARRAY_SIZE(rnd)); */ + /* } */ + + //flashSort_i32Array(rnd, ARRAY_SIZE(rnd)); + //flashSort_i32ArrayM(rnd, ARRAY_SIZE(rnd), 0.2 * ARRAY_SIZE(rnd)); + #define flashsortGet(ptr) (*(ptr)) + //flashsortM(rnd, ARRAY_SIZE(rnd), 0.2 * ARRAY_SIZE(rnd)); + //flashsortM(urnd, ARRAY_SIZE(urnd), 0.2 * ARRAY_SIZE(urnd)); + //flashsortM(rnd64, ARRAY_SIZE(rnd64), 0.2 * ARRAY_SIZE(rnd64)); + flashsortM(urnd64, ARRAY_SIZE(urnd64), 0.2 * ARRAY_SIZE(urnd64)); + //flashsort(rnd, ARRAY_SIZE(rnd)); + //int *A = rnd; + //flashsort(A, ARRAY_SIZE(rnd)); + + int cmp(const void *a, const void *b) { + ret CMP(*(int*)a, *(int*)b); + //ret ( *(int*)a - *(int*)b ); + } + //qsort(rnd, ARRAY_SIZE(rnd), sizeof(int), cmp); + int ucmp(const void *a, const void *b) { + ret CMP(*(u32*)a, *(u32*)b); + //ret ( *(u32*)a - *(u32*)b ); + } + //qsort(urnd, ARRAY_SIZE(urnd), sizeof(int), ucmp); + + stopwatchLogUs; + + //XSUCCESS; + + range(i, ARRAY_SIZE(rnd)) { + /* printf("%d ", rnd[i]); */ + /* printf("%u ", urnd[i]); */ + /* printf("%ld ", rnd64[i]); */ + printf("%lu ", urnd64[i]); + } + put; + + /* typeof(convertToUnsignedType(rnd[0])) a = 1; */ + /* typeof(convertToUnsignedType(rnd[0])) b = -1; */ + /* toUnsignedType((rnd[0])) a = 1; */ + /* toUnsignedType((rnd[0])) b = -1; */ + /* typeof((rnd[0])) a = 1; */ + /* typeof((rnd[0])) b = -1; */ + /* logVarG(MAX(a,b)); */ +} + +int main(int ARGC, char** ARGV) { + + argc = ARGC; argv = ARGV; + + initLibsheepy(ARGV[0]); + setLogMode(LOG_FUNC); + setLogSymbols(LOG_UTF8); + setStackLimit(-1); + + trials(); + +} +// vim: set expandtab ts=2 sw=2: diff --git a/package.yml b/package.yml @@ -0,0 +1,32 @@ +--- + name: sort + version: 0.0.1 + description: "sorting algorithms for any type of array: flashsort" + bin: ./sort.h + cflags: -O3 -std=gnu11 -fPIC -pipe + #lflags: -lpcre + repository: + type: git + url: git+https://github.com/RemyNoulin/sort.git + keywords: + - sorting + - library + author: Remy + license: MIT + bugs: + url: https://github.com/RemyNoulin/sort/issues + homepage: https://github.com/RemyNoulin/sort#readme + #compileHelp: # text displayed when there is a compilation error + #dependencies: + # md4c: + # Test configuration: + #testBin: ./testSort.c + #testCflags: -ggdb -std=gnu11 -fPIC -pipe -fprofile-arcs -ftest-coverage -Wall -Wextra + #testLflags: -lcheck_pic -lrt -lm -lsubunit -fprofile-arcs -ftest-coverage -rdynamic + # Memcheck configuration: + #memcheckBin: ./memcheckSort.c + #memcheckCmd: valgrind --leak-check=full --show-leak-kinds=all + #memcheckCflags: -ggdb -std=gnu11 -fPIC -pipe + #memcheckLflags: -rdynamic + #documentationCmd: # command for generating the documentation with spm doc + private: false # true for private package diff --git a/sort.h b/sort.h @@ -0,0 +1,3 @@ +#pragma once + +#include "flashsort.h"