blockFile

Save data in chunked in blocks in a file for creating databases
git clone https://noulin.net/git/blockFile.git
Log | Files | Refs | LICENSE

commit 622a5af225700c73d0626cacc5af13079516630e
parent a622c489a90fe75d8271549c3e441f977c8187a5
Author: Remy Noulin <loader2x@gmail.com>
Date:   Sat, 19 May 2018 23:31:29 +0200

blockFile package

blockFile.c              | 652 +++++++++++++++++++++++++++++++++++++++++++++++
blockFile.h              | 123 +++++++++
blockFileInternal.h      | 106 ++++++++
main.c                   |  71 ++++++
memTest.c.template       |  23 ++
package.yml              |  29 +++
runMemtest.c             | 112 ++++++++
testBlockFile.c          | 227 +++++++++++++++++
testBlockFile.sh         |   5 +
testBlockFileMem.c       | 202 +++++++++++++++
testBlockFileMem.sh      |   2 +
valgrindSuppressions.cfg |  15 ++
12 files changed, 1567 insertions(+)

Diffstat:
AblockFile.c | 652+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AblockFile.h | 123+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AblockFileInternal.h | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amain.c | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AmemTest.c.template | 23+++++++++++++++++++++++
Apackage.yml | 29+++++++++++++++++++++++++++++
ArunMemtest.c | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AtestBlockFile.c | 227+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AtestBlockFile.sh | 5+++++
AtestBlockFileMem.c | 202+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AtestBlockFileMem.sh | 2++
AvalgrindSuppressions.cfg | 15+++++++++++++++
12 files changed, 1567 insertions(+), 0 deletions(-)

diff --git a/blockFile.c b/blockFile.c @@ -0,0 +1,652 @@ + + +/* Add class methods and modify the base functions (free, duplicate, ...) where there are the TODOs (TODO)*/ + +#include "libsheepyObject.h" +#include "blockFile.h" +#include "blockFileInternal.h" +#include "shpPackages/lz4/lz4.h" + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +void initiateBlockFile(blockFilet *self); +void registerMethodsBlockFile(blockFileFunctionst *f); +void initiateAllocateBlockFile(blockFilet **self); +void finalizeBlockFile(void); +blockFilet* allocBlockFile(char *filename); +internal void freeBlockFile(blockFilet *self); +internal void terminateBlockFile(blockFilet **self); +internal char* toStringBlockFile(blockFilet *self); +internal blockFilet* duplicateBlockFile(blockFilet *self); +internal void smashBlockFile(blockFilet **self); +internal void finishBlockFile(blockFilet **self); +internal bool openBlockFile(blockFilet *self, const char *filename); +internal void closeBlockFile(blockFilet *self); +internal void deleteBlockFile(blockFilet *self); +internal void deleteFBlockFile(char *filename); +internal u64 addBlockBlockFile(blockFilet *self, void* buf, i64 len, flagsBlockFilet flags); +internal u64 addBlockFile(blockFilet *self, void* buf, i64 len); +internal bufBlockFilet getBlockFile(blockFilet *self, u64 block); +internal bool removeBlockFile(blockFilet *self, u64 block); +internal bool loadBlockFile(blockFilet *self, void *closure, loadFBlockFileFt callback); +/* TODO add prototypes */ + +void initiateBlockFile(blockFilet *self) { + + self->type = "blockFile"; + if (!blockFileF) { + blockFileF = malloc(sizeof(blockFileFunctionst)); + registerMethodsBlockFile(blockFileF); + pErrorNot0(atexit(finalizeBlockFile)); + } + self->f = blockFileF; + self->file = malloc(sizeof(privateBlockFilet)); + // no database opened: no name + fil.name = NULL; + /* TODO Initialize object data */ +} + +void registerMethodsBlockFile(blockFileFunctionst *f) { + + f->free = freeBlockFile; + f->terminate = terminateBlockFile; + f->toString = toStringBlockFile; + f->duplicate = duplicateBlockFile; + f->smash = smashBlockFile; + f->finish = finishBlockFile; + f->open = openBlockFile; + f->close = closeBlockFile; + f->delete = deleteBlockFile; + f->deleteF = deleteFBlockFile; + f->addBlock = addBlockBlockFile; + f->add = addBlockFile; + f->get = getBlockFile; + f->remove = removeBlockFile; + f->load = loadBlockFile; + /* TODO add class functions */ +} + +void initiateAllocateBlockFile(blockFilet **self) { + + if (self) { + (*self) = malloc(sizeof(blockFilet)); + if (*self) { + initiateBlockFile(*self); + } + } +} + +void finalizeBlockFile(void) { + + if (blockFileF) { + free(blockFileF); + blockFileF = NULL; + } +} + +blockFilet* allocBlockFile(char* filename) { + blockFilet *r = NULL; + + initiateAllocateBlockFile(&r); + + if (!openBlockFile(r, filename)) { + terminateBlockFile(&r); + } + return(r); +} + + +internal void freeBlockFile(blockFilet *self) { + + if (fil.name) { + closeBlockFile(self); + } + free(self->file); + /* TODO free internal data (not the structure holding the function pointers) */ + return; +} + +internal void terminateBlockFile(blockFilet **self) { + + freeBlockFile(*self); + free(*self); + *self = NULL; +} + + +internal char* toStringBlockFile(blockFilet UNUSED *self) { + + /* TODO convert object data to string */ + return(strdup("TODO - blockFile")); +} + +internal blockFilet* duplicateBlockFile(blockFilet UNUSED *self) { + + createAllocateBlockFile(dup); + /* TODO COPY data */ + return(dup); +} + +internal void smashBlockFile(blockFilet **self) { + + finishBlockFile(self); +} + +internal void finishBlockFile(blockFilet **self) { + + free(*self); + *self = NULL; +} + +internal bool openBlockFile(blockFilet *self, const char *filename) { + if (!filename || isBlankS(filename)) + return false; + + bool dbExists = true; + + // init + fil.name = strdup(filename); + fil.blockSize = BLOCKSIZE; + fil.freeName = catS(filename, "x"); + fil.defaultFlags = COMPRESSED; + + // check if db file exists + if (!fileExists(filename)) { + dbExists = false; + + // save info in block 0 + block0 b0; + b0.blockSize = fil.blockSize; + b0.blockIndexSize = 0; + + char blockBuf[BLOCKSIZE]; + + memcpy(blockBuf, &b0, sizeof b0); + + writeFile(filename, blockBuf, fil.blockSize); + } + fil.f = fopen(filename, "r+"); + if (!fil.f) pFuncError; + fil.fmode = READWRITE; + + // TODO read block 0 for configuration: blockSize, blockIndexSize 32/64 bits + + fil.count = fileSize(filename) / fil.blockSize; + + // initialize freeF + if (!fileExists(fil.freeName) || !dbExists) { + dArrayInit(&fil.freeBlocks); + // create empty freeF file + fil.freeF = fopen(fil.freeName, "w"); + fclose(fil.freeF); + } + else { + // load freeBlocks + dArrayInit(&fil.freeBlocks); + dArrayReadFilename(&fil.freeBlocks, fil.freeName); + } + + // open freeF - read mode to keep the content (fixes the duplicate test) + // this file is freopen in write mode later + fil.freeF = fopen(fil.freeName, "r"); + + return true; +} + +internal void closeBlockFile(blockFilet *self) { + // save free blocks + freopen(NULL, "w", fil.freeF); + dArrayWrite(&fil.freeBlocks, fil.freeF); + // free dArray + dArrayFree(&fil.freeBlocks); + + // close FILES + fclose(fil.f); + fclose(fil.freeF); + freeManyS(fil.name, fil.freeName); +} + +internal void deleteBlockFile(blockFilet *self) { + char *s1 = strdup(fil.name); + char *s2 = strdup(fil.freeName); + + // close file + closeBlockFile(self); + + // remove file and freeF + rmAll(s1); + rmAll(s2); + freeManyS(s1, s2); +} + +internal void deleteFBlockFile(char *filename) { + if (!filename || isBlankS(filename)) + return; + + char *s = catS(filename, "x"); + + // remove file and freeF + rmAll(filename); + rmAll(s); + free(s); +} + +/** + * set file mode + * read/write or append + */ +internal void setFMode(blockFilet *self, fmodet fmode) { + if ((fil.fmode == APPEND) and (fmode == READWRITE)) { + freopen(NULL, "r+", fil.f); + fil.fmode = READWRITE; + } + if ((fil.fmode == READWRITE) and (fmode == APPEND)) { + freopen(NULL, "a", fil.f); + fil.fmode = APPEND; + } +} + +internal uI insertBlock(blockFilet *self, void *blockData) { + if (!dArrayCount(&fil.freeBlocks)) { + // append new block + //logI("no free blocks"); + setFMode(self, APPEND); + fwrite(blockData, 1, fil.blockSize, fil.f); + fil.count++; + //logVarG(fil.count-1); + return fil.count-1; + } + else { + // reuse free block + uI block = dArrayPop(&fil.freeBlocks); + setFMode(self, READWRITE); + fseek(fil.f, block * fil.blockSize, SEEK_SET); + fwrite(blockData, 1, fil.blockSize, fil.f); + //logVarG(block); + return block; + } +} + +internal uI nextBlock(blockFilet *self) { + if (!dArrayCount(&fil.freeBlocks)) { + return fil.count+1; + } + else if (dArrayCount(&fil.freeBlocks) == 1) { + return fil.count; + } + else { + return dArrayAt(&fil.freeBlocks, dArrayLastIndex(&fil.freeBlocks)-1); + } +} + +internal u64 addBlockBlockFile(blockFilet *self, void* buf, i64 len, flagsBlockFilet flags) { + uI firstBlock = 0; + size_t freeB = dArrayCount(&fil.freeBlocks); + + //logVarG(len); + + // compress with lz4 + uint32_t decompressedSize; + if (flags == COMPRESSED) { + if (len < 192) { + // too small to have benefits from compression + flags = NOT_COMPRESSED; + //AT; + } + else { + decompressedSize = len; + const char *uncompressed = buf; + + int max_dst_size = LZ4_compressBound(decompressedSize); + + char* compressed_data = malloc(max_dst_size); + if (compressed_data == NULL) { + shEPrintfS("Failed to allocate memory for *compressed_data."); + return 0; + } + + len = LZ4_compress_default(uncompressed, compressed_data, decompressedSize, max_dst_size); + + if (len < 0) { + shEPrintfS("A negative result from LZ4_compress_default indicates a failure trying to compress the data. Value returned %d\n", len); + return 0; + } + if (len == 0) { + shEPrintfS("A result of 0 means compression worked, but was stopped because the destination buffer couldn't hold all the information."); + return 0; + } + + buf = compressed_data; + } + } + + // header sizes + u64 firstHeaderSize; + if (flags == COMPRESSED) { + firstHeaderSize = sizeof(struct firstBlockHeaderCompressed); + } + else { + firstHeaderSize = sizeof(struct firstBlockHeader); + } + u64 headerSize = sizeof(struct blockheader); + i64 firstDataSize = fil.blockSize - firstHeaderSize; + i64 dataSize = fil.blockSize - headerSize; + + // create block chain + // user data in first block: blockSize - firstHeaderSize + // user data in other blocks: blockSize - headerSize + + char blockBuf[fil.blockSize]; + blockt *header = (blockt *)blockBuf; + bool isFirstBlock = true; + char *offset = buf; + u64 currentDataSize; + + //loghex(buf, len); + + while(len > 0) { + if (isFirstBlock) { + isFirstBlock = false; + header->chain = 1; + header->z = flags == COMPRESSED ? 1 : 0; + header->dataSize = len; + + if (flags == COMPRESSED) { + header->decompressedSize = decompressedSize; + } + + if (len <= firstDataSize) { + header->nextBlock = 0; + memcpy(blockBuf+firstHeaderSize, offset, len); + firstBlock = insertBlock(self, blockBuf); + //len = 0; + break; + } + else { + // more blocks to save + header->nextBlock = nextBlock(self); + currentDataSize = firstDataSize; + memcpy(blockBuf+firstHeaderSize, offset, firstDataSize); + firstBlock = insertBlock(self, blockBuf); + } + } + else { + // other blocks + header->chain = 0; + if (len <= dataSize) { + header->nextBlock = 0; + memcpy(blockBuf+headerSize, offset, len); + insertBlock(self, blockBuf); + //len = 0; + break; + } + else { + // more blocks to save + header->nextBlock = nextBlock(self); + currentDataSize = dataSize; + memcpy(blockBuf+headerSize, offset, currentDataSize); + insertBlock(self, blockBuf); + } + } + //logVarG(header->nextBlock); + len -= currentDataSize; + offset += currentDataSize; + } + //logVarG(header->nextBlock); + + if (flags == COMPRESSED) { + // free the compressed buffer + free(buf); + } + + // save free blocks if necessary + if (freeB) { + // there were free block when we started, save free blocks + freopen(NULL, "w", fil.freeF); + dArrayWrite(&fil.freeBlocks, fil.freeF); + } + return firstBlock; +} + +internal u64 addBlockFile(blockFilet *self, void* buf, i64 len) { + return addBlockBlockFile(self, buf, len, fil.defaultFlags); +} + +/** + * \return + * bufBlockFilet {data, len}, data has to be freed + */ +internal bufBlockFilet getBlockFile(blockFilet *self, u64 block) { + char blockBuf[fil.blockSize]; + blockt *header = (blockt *)blockBuf; + bufBlockFilet result = {NULL, 0}; + + // block 0 is reserved + if (!block) return result; + + // check if block is free + range(i, (size_t)(dArrayCount(&fil.freeBlocks))) { + if (block == dArrayAt(&fil.freeBlocks, i)) { + return result; + } + } + + setFMode(self, READWRITE); + fseek(fil.f, block * fil.blockSize, SEEK_SET); + fread(blockBuf, 1, fil.blockSize, fil.f); + + if (header->chain != 1) { + // error not first block + return result; + } + + // header sizes + u64 firstHeaderSize; + if (header->z) { + firstHeaderSize = sizeof(struct firstBlockHeaderCompressed); + } + else { + firstHeaderSize = sizeof(struct firstBlockHeader); + } + u64 headerSize = sizeof(struct blockheader); + u64 firstDataSize = fil.blockSize - firstHeaderSize; + u64 dataSize = fil.blockSize - headerSize; + + u64 currentDataSize; + + // decompress + uint32_t decompressedSize = 0; + if (header->z) { + decompressedSize = header->decompressedSize; + } + + u64 len = header->dataSize; + const u64 compressedSize = header->dataSize; + void *r = malloc(len); + if (!r) { + return result; + } + + // set result length: dataSize or decompressedSize + if (header->z) { + result.len = decompressedSize; + } + else { + result.len = len; + } + + /* first block already loaded */ + char *offset = r; + + bool isFirstBlock = true; + while(len > 0) { + if (isFirstBlock) { + isFirstBlock = false; + if (len <= firstDataSize) { + if (header->nextBlock) { + // error this block should be the last block + goto error; + } + // all the data is in the first block + memcpy(offset, blockBuf+firstHeaderSize, len); + //len = 0; + break; + } + else { + block = header->nextBlock; + if (!block) { + // error there should be a next block + goto error; + } + currentDataSize = firstDataSize; + memcpy(offset, blockBuf+firstHeaderSize, currentDataSize); + } + } + else { + fseek(fil.f, block * fil.blockSize, SEEK_SET); + fread(blockBuf, 1, fil.blockSize, fil.f); + + if (header->chain != 0) { + // error not a block in a chain + goto error; + } + + if (len <= dataSize) { + if (header->nextBlock) { + // error this block should be the last block + goto error; + } + // all the data is in the first block + memcpy(offset, blockBuf+headerSize, len); + //len = 0; + break; + } + else { + block = header->nextBlock; + if (!block) { + // error there should be a next block + goto error; + } + currentDataSize = dataSize; + memcpy(offset, blockBuf+headerSize, currentDataSize); + } + } + //logVarG(header->nextBlock); + len -= currentDataSize; + offset += currentDataSize; + } + + //loghex(r, compressedSize); + + // decompress + if (decompressedSize) { + // the data is compressed + char *compressed_data = r; + char* regen_buffer = malloc(decompressedSize); + if (regen_buffer == NULL) { + free(r); + shEPrintfS("Failed to allocate memory for *regen_buffer."); + return result; + } + + const int decompressed_size = LZ4_decompress_safe(compressed_data, regen_buffer, compressedSize, decompressedSize); + free(r); + + if (decompressed_size < 0) { + free(regen_buffer); + shEPrintfS("A negative result from LZ4_decompress_safe indicates a failure trying to decompress the data. Value returned %d\n", decompressed_size); + return result; + } + if (decompressed_size == 0) { + free(regen_buffer); + shEPrintfS("I'm not sure this function can ever return 0. Documentation in lz4.h doesn't indicate so."); + return result; + } + + r = regen_buffer; + } + + result.data = r; + return result; + +error: + //logVarG(header->nextBlock); + free(r); + return result; +} + +internal bool removeBlockFile(blockFilet *self, u64 block) { + char blockBuf[sizeof(blockt)]; + blockt *header = (blockt *)blockBuf; + + // block 0 is reserved + if (!block) return false; + + // check if block is already free + range(i, (size_t)(dArrayCount(&fil.freeBlocks))) { + if (block == dArrayAt(&fil.freeBlocks, i)) { + return false; + } + } + + setFMode(self, READWRITE); + fseek(fil.f, block * fil.blockSize, SEEK_SET); + fread(blockBuf, 1, sizeof(blockt), fil.f); + + if (header->chain != 1) { + // error not first block + return false; + } + + // delete first block + dArrayAppend(&fil.freeBlocks, block); + + while(header->nextBlock) { + block = header->nextBlock; + fseek(fil.f, block * fil.blockSize, SEEK_SET); + fread(blockBuf, 1, sizeof(blockt), fil.f); + if (header->chain != 0) { + // error not other block + return false; + } + // delete block + dArrayAppend(&fil.freeBlocks, block); + } + + // save free blocks + freopen(NULL, "w", fil.freeF); + dArrayWrite(&fil.freeBlocks, fil.freeF); + + return true; +} + +internal bool loadBlockFile(blockFilet *self, void *closure, loadFBlockFileFt callback) { + char blockBuf[sizeof(blockt)]; + blockt *header = (blockt *)blockBuf; + + setFMode(self, READWRITE); + + rangeFrom(i, 1, fil.count) { + // check if block i is free + range(j, (size_t)(dArrayCount(&fil.freeBlocks))) { + if (i == dArrayAt(&fil.freeBlocks, j)) goto cont; + } + + fseek(fil.f, i * fil.blockSize, SEEK_SET); + fread(blockBuf, 1, sizeof(blockt), fil.f); + + if (header->chain != 1) { + // not first block, skip + goto cont; + } + + bufBlockFilet data = getBlockFile(self, i); + if (!callback(closure, i, data)) return false; + cont:; + } + return true; +} + +/* TODO add method implementations */ diff --git a/blockFile.h b/blockFile.h @@ -0,0 +1,123 @@ +#pragma once + +/* Add class methods and class data where there are the TODOs (TODO)*/ + +#define closeBFO(obj) (obj)->f->close(obj) +#define addBFO(obj, buffer, len) (obj)->f->add(obj, buffer, len) +#define getBFO(obj, block) (obj)->f->get(obj, block) +#define removeBFO(obj, block) (obj)->f->remove(obj, block) +#define loadBFO(obj, closure, callback) (obj)->f->load(obj, closure, callback) + +/* TODO add generics: #define amethodG(obj) (obj)->f->amethod(obj) */ + +/* Class blockFile */ +typedef struct blockFile blockFilet; + +/* for object inheriting blockFile, cast to blockFile to be able to use this class functions and generics*/ +#define cBlockFile(self) ( (blockFilet*) self ) + +typedef enum {NOT_COMPRESSED, COMPRESSED} flagsBlockFilet; + +typedef struct { + void *data; + u64 len; +} bufBlockFilet; + +typedef void (*freeBlockFileFt) (blockFilet *self); +typedef void (*terminateBlockFileFt) (blockFilet **self); +typedef char* (*toStringBlockFileFt) (blockFilet *self); +typedef blockFilet* (*duplicateBlockFileFt) (blockFilet *self); +typedef void (*smashBlockFileFt) (blockFilet **self); + +/** + * free blockFile + */ +typedef void (*finishBlockFileFt) (blockFilet **self); + +/* TODO add function typedef with pattern: functionNameClassTempleFt */ + +typedef bool (*openBlockFileFt) (blockFilet *self, const char *filename); +typedef void (*closeBlockFileFt) (blockFilet *self); +typedef void (*deleteBlockFileFt) (blockFilet *self); +typedef void (*deleteFBlockFileFt) (char *filename); +typedef u64 (*addBlockBlockFileFt) (blockFilet *self, void* buf, i64 len, flagsBlockFilet flags); +typedef u64 (*addBlockFileFt) (blockFilet *self, void* buf, i64 len); +typedef bufBlockFilet (*getBlockFileFt) (blockFilet *self, u64 block); +typedef bool (*removeBlockFileFt) (blockFilet *self, u64 block); + +/** + * callback parameter for the load function + */ +typedef bool (*loadFBlockFileFt)(void *closure, u64 block, bufBlockFilet data); + +/** + * load + */ +typedef bool (*loadBlockFileFt) (blockFilet *self, void *closure, loadFBlockFileFt callback); + +/** + * class functions + * allocated once for all objects + * + * freed with finalizeBlockFile + */ + +/** + * use this define in child classes and add the new function after this class functions + * + * in this define, add the methods after <finishBlockFileFt finish;> + * + * Example: + * #define RINGFUNCTIONST \n * BLOCKFILEFUNCTIONST; \n * setSizeRingFt setSize + */ +#define BLOCKFILEFUNCTIONST \ + openBlockFileFt open;\ + closeBlockFileFt close;\ + deleteBlockFileFt delete;\ + deleteFBlockFileFt deleteF;\ + addBlockBlockFileFt addBlock;\ + addBlockFileFt add;\ + getBlockFileFt get;\ + removeBlockFileFt remove;\ + loadBlockFileFt load; + /* TODO ADD METHODS AFTER <finishBlockFileFt finish;> HERE */ + +typedef struct { + freeBlockFileFt free; + terminateBlockFileFt terminate; + toStringBlockFileFt toString; + duplicateBlockFileFt duplicate; + smashBlockFileFt smash; + finishBlockFileFt finish; + BLOCKFILEFUNCTIONST; +} blockFileFunctionst; + +/** + * class + */ + +typedef struct privateBlockFile privateBlockFilet; + +struct blockFile { + const char *type; + blockFileFunctionst *f; + + privateBlockFilet *file; + /* TODO add class data */ +}; + +/* blockFile */ + +#define createBlockFile(obj) blockFilet obj; initiateBlockFile(&obj) +#define createAllocateBlockFile(obj) blockFilet *obj; initiateAllocateBlockFile(&obj) + +void initiateBlockFile(blockFilet *self); +void initiateAllocateBlockFile(blockFilet **self); +void finalizeBlockFile(void); + +/* initialize class methods, call registerMethodsBlockFile from classes inheriting this class */ +void registerMethodsBlockFile(blockFileFunctionst *f); + +blockFilet* allocBlockFile(char* filename); + +/* end class blockFile*/ diff --git a/blockFileInternal.h b/blockFileInternal.h @@ -0,0 +1,106 @@ +#pragma once + +static blockFileFunctionst *blockFileF = NULL; + +/* TODO declare structs for private data and add a void pointer to the private data in the class declaration */ + +// file format +// block 0 reserved > first 8 bytes: blockSize, next byte: block index size 32bits (0) or 64bits (1) +// block 1 data +// block 2 data +// ... + +typedef struct { + u64 blockSize; + uint8_t blockIndexSize:1; // 0 32bits - 1 64bits TODO 32 only for now +} block0; + +// block format +// byte 0: +// bit 0: 0 block in a chain - 1 first block in chain +// bit 1: 0 not compressed - 1 compressed with lz4 +// dataSize u64 - size of the document, only first block +// nextBlock u32 or u64 - 0 means no next block +// (blCount u64 - how many block for this document) +// decompressedSize u32 - only when document is compressed, only first block +// data +// +// First block +// 1 or 3 +// dataSize +// nextBlock +// decompressedSize +// data +// +// Other blocks +// 0 +// nextBlock +// data + +#define PACKED __attribute__((__packed__)) + +typedef struct PACKED { + u8 chain:1; // 0 block in a chain - 1 first block in chain + u8 z:1; // 0 not compressed - 1 compressed with lz4 + u64 nextBlock; + // end of header in chain block + u64 dataSize; + // end of header in not compressed document + uint32_t decompressedSize; +} blockt; + +struct PACKED firstBlockHeader { + u8 chain:1; // 0 block in a chain - 1 first block in chain + u8 z:1; // 0 not compressed - 1 compressed with lz4 + u64 nextBlock; + u64 dataSize; +}; + +struct PACKED firstBlockHeaderCompressed { + u8 chain:1; // 0 block in a chain - 1 first block in chain + u8 z:1; // 0 not compressed - 1 compressed with lz4 + u64 nextBlock; + u64 dataSize; + uint32_t decompressedSize; +}; + +struct PACKED blockheader { + u8 chain:1; // 0 block in a chain - 1 first block in chain + u8 z:1; // 0 not compressed - 1 compressed with lz4 + u64 nextBlock; +}; + + + +// API +// open open file +// load load all documents in file +// add add data, return first block +// get get data by block +// remove remove data +// delete delete file +// close close file + +#define uI u32 + +dArrayT(freeBlocks, uI); + +typedef enum {READWRITE, APPEND} fmodet; + +struct privateBlockFile { + u64 count; // block count + u64 blockSize; + freeBlocks freeBlocks; + const char *name; + char *freeName; + FILE *f; + fmodet fmode; + FILE *freeF; + flagsBlockFilet defaultFlags; +}; + +// save freeBlocks in a file + +#define fil (*(self->file)) +#define BLOCKSIZE 64 + diff --git a/main.c b/main.c @@ -0,0 +1,71 @@ +#! /usr/bin/env sheepy + +#include "libsheepyObject.h" +#include "blockFile.h" + +int argc; char **argv; + +// TEST + +bool loadFunc(void *c, u64 b, bufBlockFilet data) { + cast(char*, s, data.data); + logVarG(b); + logVarG(s); + logVarG(data.len); + return true; +} + +// END TEST + +int main(int ARGC, char** ARGV) { + + argc = ARGC; argv = ARGV; + + initLibsheepy(argv[0]); + + char *filename = "file.db"; + + /* createBlockFile(bf); */ + /* bf.f->deleteF(filename); */ + /* freeO(&bf); */ + + blockFilet *file = allocBlockFile("file.db"); + + loadBFO(file, NULL, loadFunc); + XSUCCESS; + + // add data to a block: addBlock(file, void*, length): u64 block + // add data in many blocks: addData(file, void*, length): u64[], end with 0 + // save index of first block in first u64 of the block + // save next block index in second u64 of the block, when next block index is 0, the chain is finished + // use free block first, when there are no free blocks + // update freeF + logVarG(addBFO(file, "qweqwe", 6)); + char *s1 = "111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"; + char *s2 = "222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 cfp command printing current path in front of given parameters searchReplace command searches strings in files and replaces them with given searchReplace command searches strings in files and replaces them with"; + addBFO(file, s1, strlen(s1)); + logVarG(removeBFO(file, 1)); + logVarG(addBFO(file, s2, strlen(s2))); + + // get block + + // remove data: remove(file, block): bool + // read block header + // find first block + // add blocks to freeF + + // load: load(file, callback) + // read block 1, find first block + // add read blocks to a list + // find next chain of blocks + // > recreate index, or save the index + + // delete block file: delete(file) + //delete(file); + + // close: close(file) + closeBFO(file); + + // delete file: deleteF(filename) + //deleteF(filename); +} diff --git a/memTest.c.template b/memTest.c.template @@ -0,0 +1,23 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#define ck_assert_str_eq(a,b) a;b; +#define ck_assert_str_ne(a,b) a;b; +#define ck_assert_ptr_eq(a,b) a;b; +#define ck_assert_ptr_ne(a,b) a;b; +#define ck_assert_uint_eq(a,b) a;b; +#define ck_assert_uint_ne(a,b) a;b; +#define ck_assert_int_eq(a,b) a;b; +#define ck_assert_int_ne(a,b) a;b; +#define ck_assert(a) a; + +__tests + +int main(int n, char**v) { + +initLibsheepy(v[0]); +setLogMode(LOG_VERBOSE); + +__calls +} diff --git a/package.yml b/package.yml @@ -0,0 +1,29 @@ +--- + name: blockFile + version: 0.0.5 + description: "Save data in chunked in blocks in a file for creating databases" + bin: ./blockFile.c + #cflags: -DA -ggdb -std=gnu11 -fPIC -pipe + #lflags: -lpcre + repository: + type: git + url: git+https://github.com/RemyNoulin/blockFile.git + keywords: + - utility + author: Remy + license: MIT + bugs: + url: https://github.com/RemyNoulin/blockFile/issues + homepage: https://github.com/RemyNoulin/blockFile + dependencies: + lz4: + # Test configuration: + testBin: ./testBlockFile.c + #testCflags: -ggdb -std=gnu11 -fPIC -pipe -fprofile-arcs -ftest-coverage -Wall -Wextra + #testLflags: -lcheck_pic -lrt -lm -lsubunit -fprofile-arcs -ftest-coverage -rdynamic + # Memcheck configuration: + memcheckBin: ./testBlockFileMem.c + memcheckCmd: valgrind --leak-check=full --show-leak-kinds=all --suppressions=valgrindSuppressions.cfg + #memcheckCmd: valgrind --leak-check=full --show-leak-kinds=all --gen-suppressions=all + #memcheckCflags: -ggdb -std=gnu11 -fPIC -pipe + #memcheckLflags: -rdynamic diff --git a/runMemtest.c b/runMemtest.c @@ -0,0 +1,112 @@ +#! /usr/bin/env sheepy +/* or direct path to sheepy: #! /usr/local/bin/sheepy */ + +// +// in unit test file, add line: +// //START MEM TEST ANCHOR +// +// + +#include "libsheepyObject.h" + +#define internal static + +#include <stdlib.h> +#include <stdio.h> + +int argc; char **argv; + +enum {START, TEST, TESTEND, SEARCH}; + +int main(int ARGC, char** ARGV) { + char **list = NULL; + char **tests = NULL; + char **functions = NULL; + char **result = NULL; + + argc = ARGC; argv = ARGV;;// + + initLibsheepy(argv[0]); + + if (argc < 3) { + printf("Give a parameter: unit test c file and template file"); + printf("\n"); + XFAILURE; + } + + // get function list from argv[1] + list = readText(argv[1]); + + int status = START;; + forEachCharP(list, e) { + if (status != START) { + if (findS(*e, "#include")) { + listPushS(&tests, *e); + continue; + } + if (findS(*e, "START_TEST(")) { + char **l = split(*e, "("); + char **l2 = split(l[1], ")");; + iAppendS(&l2[0], "();"); + listPushS(&functions, l2[0]); + listFreeManyS(l,l2); + iReplaceManyS(e, "START_TEST(", "void ", ")", "(void) {"); + status = TEST; + } + if (findS(*e, "END_TEST")) { + iReplaceS(e, "END_TEST", "}",0); + status = TESTEND; + } + if (status == SEARCH) { + char *s = sliceS(*e, 0, 5);; + if (strEq(s, "Suite")) { + break; + } + free(s); + listPushS(&tests, *e); + continue; + } + if ((status == TEST) || (status == TESTEND)) { + listPushS(&tests, *e); + if (status == TESTEND) { + status = SEARCH; + } + } + } + else if (findS(*e, "START MEM TEST ANCHOR")) { + status = SEARCH; + } + } + + listFreeS(list); + + //listPrintS(tests); + //listPrintS(functions); + + // read template + char **template = readText(argv[2]); + + // process template + forEachCharP(template, e) { + if (findS(*e, "__tests")) { + listAppendS(&result, tests); + } + else if (findS(*e, "__calls")) { + listAppendS(&result, functions); + } + else { + listPushS(&result, *e); + } + } + + // save result + char *fileName = sliceS(argv[1], 0, -2); + iAppendS(&fileName, "Mem.c"); + printf("%s\n", fileName); + writeText(fileName, result); + + free(fileName); + listFreeManyS(tests, functions, result, template); + + XSUCCESS; +} diff --git a/testBlockFile.c b/testBlockFile.c @@ -0,0 +1,227 @@ +#! /usr/bin/env sheepy +/* or direct path to sheepy: #! /usr/local/bin/sheepy */ + +/** \file + * Each test must independent and self contained + */ + +#include <check.h> + +//START MEM TEST ANCHOR + +#include "libsheepyObject.h" +#include "blockFile.h" + +int argc; char **argv; + +blockFilet bf; + +START_TEST(basetT) + + initiateBlockFile(&bf); + freeO(&bf); + + blockFilet *rg = NULL; + initiateAllocateBlockFile(&rg); + terminateO(rg); + + rg = allocBlockFile("file.db"); + + char *s = toStringO(rg); + + ck_assert_str_eq(s, "TODO - blockFile"); + free(s); + + blockFilet *rgDup = duplicateO(rg); + terminateO(rgDup); + + terminateO(rg); + +END_TEST + +char *s1 = "111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"; + +char *s2 = "222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 cfp command printing current path in front of given parameters searchReplace command searches strings in files and replaces them with given searchReplace command searches strings in files and replaces them with"; + +char *s3 = "get block emove data: remove(file, block): bool read block header find first block add blocks to freeF load: load(file, callback) read block 1, find first block add read blocks to a list find next chain of blocks > recreate index, or save the index delete block file: delete(file) close: close(file) delete file: deleteF(filename) add data to a block: addBlock(file, void*, length): u64 block add data in many blocks: addData(file, void*, length): u64[], end with 0"; + +START_TEST(addT) + + initiateBlockFile(&bf); + + bf.f->open(&bf, "file.db"); + bf.f->delete(&bf); + bf.f->open(&bf, "file.db"); + + u64 r; + + r = addBFO(&bf, "qweqwe", 7); + ck_assert_int_eq(r, 1); + + // add compressed block + r = addBFO(&bf, s2, strlen(s2)+1); + ck_assert_int_eq(r, 2); + + // add big block + r = addBFO(&bf, s1, strlen(s1)+1); + ck_assert_int_eq(r, 5); + + freeO(&bf); + + // no free blocks + ck_assert_int_eq(fileSize("file.dbx"), 0); + +END_TEST + +START_TEST(getT) + + initiateBlockFile(&bf); + + bf.f->open(&bf, "file.db"); + + bufBlockFilet res; + + res = getBFO(&bf, 5); + ck_assert_str_eq((char*)res.data, s1); + ck_assert_int_eq(res.len, strlen(s1)+1); + free(res.data); + + res = getBFO(&bf, 2); + ck_assert_str_eq((char*)res.data, s2); + ck_assert_int_eq(res.len, strlen(s2)+1); + free(res.data); + + res = getBFO(&bf, 1); + ck_assert_str_eq((char*)res.data, "qweqwe"); + ck_assert_int_eq(res.len, 7); + free(res.data); + + freeO(&bf); + + // no free blocks + ck_assert_int_eq(fileSize("file.dbx"), 0); + +END_TEST + +START_TEST(removeT) + + initiateBlockFile(&bf); + + bf.f->open(&bf, "file.db"); + + bufBlockFilet res; + bool r; + u64 R; + + // remove block 1 + r = removeBFO(&bf, 1); + ck_assert(r); + + // remove block 1 again, fails + r = removeBFO(&bf, 1); + ck_assert(!r); + + // get a deleted block + res = getBFO(&bf, 1); + ck_assert_ptr_eq(res.data, NULL); + ck_assert_int_eq(res.len, 0); + + // add block + R = addBFO(&bf, "asdasd", 7); + ck_assert_int_eq(R, 1); + + // remove block 5 + r = removeBFO(&bf, 5); + ck_assert(r); + + // add block + R = addBFO(&bf, "zxczxc", 7); + ck_assert_int_eq(R, 7); + + // add another big block + R = addBFO(&bf, s3, strlen(s3)+1); + ck_assert_int_eq(R, 6); + + // remove block 2 + r = removeBFO(&bf, 2); + ck_assert(r); + + // remove block 2 + r = removeBFO(&bf, 2); + ck_assert(!r); + + freeO(&bf); + + // 3 free blocks + ck_assert_int_eq(fileSize("file.dbx"), 24); + +END_TEST + +bool loadFunc(void UNUSED *c, u64 UNUSED b, bufBlockFilet data) { + /* cast(char*, s, data.data); */ + /* logVarG(b); */ + /* logVarG(s); */ + /* logVarG(data.len); */ + free(data.data); + return true; +} + +START_TEST(loadT) + + initiateBlockFile(&bf); + + bf.f->open(&bf, "file.db"); + + loadBFO(&bf, NULL, loadFunc); + /* bufBlockFilet res; */ + /* bool r; */ + /* u64 R; */ + + freeO(&bf); + + // 3 free blocks + ck_assert_int_eq(fileSize("file.dbx"), 24); + +END_TEST + +Suite * blockFileSuite(void) { + Suite *s; + TCase *tc_core; + + s = suite_create("blockFile"); + + /* Core test case */ + tc_core = tcase_create("Core"); + + + tcase_add_test(tc_core, basetT); + tcase_add_test(tc_core, addT); + tcase_add_test(tc_core, getT); + tcase_add_test(tc_core, removeT); + tcase_add_test(tc_core, loadT); + + suite_add_tcase(s, tc_core); + + return s; +} + +int main(int ARGC, char** ARGV) { + + argc = ARGC; argv = ARGV; + + //dont initialize libsheepy, it conflicts with libcheck - initLibsheepy(ARGV[0]); + setLogMode(LOG_VERBOSE); + + int number_failed; + Suite *s; + SRunner *sr; + + s = blockFileSuite(); + sr = srunner_create(s); + + srunner_run_all(sr, CK_NORMAL); + number_failed = srunner_ntests_failed(sr); + srunner_free(sr); + + exit((number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/testBlockFile.sh b/testBlockFile.sh @@ -0,0 +1,5 @@ +spm test +cd ~/.sheepy/build/home/remy/git/sw/sheepyPackages/blockFile +gcov -b blockFile.c +cd - +cp ~/.sheepy/build/home/remy/git/sw/sheepyPackages/blockFile/blockFile.c.gcov . diff --git a/testBlockFileMem.c b/testBlockFileMem.c @@ -0,0 +1,202 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#define ck_assert_str_eq(a,b) a;b; +#define ck_assert_str_ne(a,b) a;b; +#define ck_assert_ptr_eq(a,b) a;b; +#define ck_assert_ptr_ne(a,b) a;b; +#define ck_assert_uint_eq(a,b) a;b; +#define ck_assert_uint_ne(a,b) a;b; +#define ck_assert_int_eq(a,b) a;b; +#define ck_assert_int_ne(a,b) a;b; +#define ck_assert(a) a; + + +#include "libsheepyObject.h" +#include "blockFile.h" + +int argc; char **argv; + +blockFilet bf; + +void basetT(void) { + + initiateBlockFile(&bf); + freeO(&bf); + + blockFilet *rg = NULL; + initiateAllocateBlockFile(&rg); + terminateO(rg); + + rg = allocBlockFile("file.db"); + + char *s = toStringO(rg); + + ck_assert_str_eq(s, "TODO - blockFile"); + free(s); + + blockFilet *rgDup = duplicateO(rg); + terminateO(rgDup); + + terminateO(rg); + +} + +char *s1 = "111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"; + +char *s2 = "222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 cfp command printing current path in front of given parameters searchReplace command searches strings in files and replaces them with given searchReplace command searches strings in files and replaces them with"; + +char *s3 = "get block emove data: remove(file, block): bool read block header find first block add blocks to freeF load: load(file, callback) read block 1, find first block add read blocks to a list find next chain of blocks > recreate index, or save the index delete block file: delete(file) close: close(file) delete file: deleteF(filename) add data to a block: addBlock(file, void*, length): u64 block add data in many blocks: addData(file, void*, length): u64[], end with 0"; + +void addT(void) { + + initiateBlockFile(&bf); + + bf.f->open(&bf, "file.db"); + bf.f->delete(&bf); + bf.f->open(&bf, "file.db"); + + u64 r; + + r = addBFO(&bf, "qweqwe", 7); + ck_assert_int_eq(r, 1); + + // add compressed block + r = addBFO(&bf, s2, strlen(s2)+1); + ck_assert_int_eq(r, 2); + + // add big block + r = addBFO(&bf, s1, strlen(s1)+1); + ck_assert_int_eq(r, 5); + + freeO(&bf); + + // no free blocks + ck_assert_int_eq(fileSize("file.dbx"), 0); + +} + +void getT(void) { + + initiateBlockFile(&bf); + + bf.f->open(&bf, "file.db"); + + bufBlockFilet res; + + res = getBFO(&bf, 5); + ck_assert_str_eq((char*)res.data, s1); + ck_assert_int_eq(res.len, strlen(s1)+1); + free(res.data); + + res = getBFO(&bf, 2); + ck_assert_str_eq((char*)res.data, s2); + ck_assert_int_eq(res.len, strlen(s2)+1); + free(res.data); + + res = getBFO(&bf, 1); + ck_assert_str_eq((char*)res.data, "qweqwe"); + ck_assert_int_eq(res.len, 7); + free(res.data); + + freeO(&bf); + + // no free blocks + ck_assert_int_eq(fileSize("file.dbx"), 0); + +} + +void removeT(void) { + + initiateBlockFile(&bf); + + bf.f->open(&bf, "file.db"); + + bufBlockFilet res; + bool r; + u64 R; + + // remove block 1 + r = removeBFO(&bf, 1); + ck_assert(r); + + // remove block 1 again, fails + r = removeBFO(&bf, 1); + ck_assert(!r); + + // get a deleted block + res = getBFO(&bf, 1); + ck_assert_ptr_eq(res.data, NULL); + ck_assert_int_eq(res.len, 0); + + // add block + R = addBFO(&bf, "asdasd", 7); + ck_assert_int_eq(R, 1); + + // remove block 5 + r = removeBFO(&bf, 5); + ck_assert(r); + + // add block + R = addBFO(&bf, "zxczxc", 7); + ck_assert_int_eq(R, 7); + + // add another big block + R = addBFO(&bf, s3, strlen(s3)+1); + ck_assert_int_eq(R, 6); + + // remove block 2 + r = removeBFO(&bf, 2); + ck_assert(r); + + // remove block 2 + r = removeBFO(&bf, 2); + ck_assert(!r); + + freeO(&bf); + + // 3 free blocks + ck_assert_int_eq(fileSize("file.dbx"), 24); + +} + +bool loadFunc(void UNUSED *c, u64 UNUSED b, bufBlockFilet data) { + /* cast(char*, s, data.data); */ + /* logVarG(b); */ + /* logVarG(s); */ + /* logVarG(data.len); */ + free(data.data); + return true; +} + +void loadT(void) { + + initiateBlockFile(&bf); + + bf.f->open(&bf, "file.db"); + + loadBFO(&bf, NULL, loadFunc); + /* bufBlockFilet res; */ + /* bool r; */ + /* u64 R; */ + + freeO(&bf); + + // 3 free blocks + ck_assert_int_eq(fileSize("file.dbx"), 24); + +} + + +int main(int n, char**v) { + +initLibsheepy(v[0]); +setLogMode(LOG_VERBOSE); + +basetT(); +addT(); +getT(); +removeT(); +loadT(); +} diff --git a/testBlockFileMem.sh b/testBlockFileMem.sh @@ -0,0 +1,2 @@ +./runMemtest.c testBlockFile.c memTest.c.template +spm memcheck diff --git a/valgrindSuppressions.cfg b/valgrindSuppressions.cfg @@ -0,0 +1,15 @@ +{ + <insert_a_suppression_name_here> + Memcheck:Param + write(buf) + obj:/lib/x86_64-linux-gnu/libc-2.24.so + fun:_IO_file_write@@GLIBC_2.2.5 + fun:new_do_write + fun:_IO_do_write@@GLIBC_2.2.5 + fun:_IO_file_close_it@@GLIBC_2.2.5 + fun:fclose@@GLIBC_2.2.5 + fun:writeFile + fun:openBlockFile + fun:addT + fun:main +}