From f576ef7e0ec4317f0ff2f632ae8c2312dc916673 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Wed, 10 Feb 2016 11:11:16 +0100 Subject: Rough implementation of new permdb format. Added regression tests and benchmarks. Chunks not implemented yet. --- c_src/Makefile | 2 +- c_src/permdb.c | 637 +++++++++++++++++++++++++++++++++++++++++++++++++++------ c_src/permdb.h | 1 + 3 files changed, 572 insertions(+), 68 deletions(-) (limited to 'c_src') diff --git a/c_src/Makefile b/c_src/Makefile index 72a47d8..d45e601 100644 --- a/c_src/Makefile +++ b/c_src/Makefile @@ -1,6 +1,6 @@ CC = gcc CFLAGS = -Wall -Werror -std=gnu99 -LDFLAGS = +LDFLAGS = -lnettle PORTS = fsynchelper hsmhelper permdbport diff --git a/c_src/permdb.c b/c_src/permdb.c index 7a58885..f629e85 100644 --- a/c_src/permdb.c +++ b/c_src/permdb.c @@ -14,23 +14,25 @@ #include #include #include +#include #include "erlport.h" #include "permdb.h" #include "hash.h" +#define INDEX_COMMIT_TRAILER_SIZE (8 + SHA256_DIGEST_SIZE + 8) + static const int bitsperlevel = 2; static const int keylen = 32; -static const char *nodemagic = "\x8a\x44"; -static const char *datamagic = "\xcb\x0e"; - typedef struct { int fd; + char *name; node_offset datasize; node_offset lastcommit; node_offset filesize; char *writebuffer; uint64_t writebufferalloc; + struct sha256_ctx commit_checksum_context; } buffered_file; struct permdb_object { @@ -43,8 +45,21 @@ struct permdb_object { static const node_object nullnode = {{0, 0, 0, 0}}; +static const node_object errornode = {{NODE_ENTRY_ERROR_NODE, + NODE_ENTRY_ERROR_NODE, + NODE_ENTRY_ERROR_NODE, + NODE_ENTRY_ERROR_NODE}}; + +int +calc_padding(int offset, int alignment) +{ + int misalign = offset % alignment; + if (misalign == 0) { + return 0; + } + return alignment - misalign; +} -static const char indexfile_header[16] = "PERMDB IDX FILE "; #if 0 static void @@ -61,6 +76,8 @@ static void writebuffer_add(buffered_file *file, const void *data, uint64_t length); static int writebuffer_flush(buffered_file *file); +static uint64_t +writebuffer_length(buffered_file *file); struct nodecache { node_object value; @@ -96,7 +113,7 @@ hashnode(void *node_v) return hash; } -#if DEBUG_CACHE +#if DEBUG_CACHE || 1 static void print_hex(const void *data, int length) { @@ -111,6 +128,7 @@ print_hex(const void *data, int length) #define DEBUG_CACHE 0 #define DEBUG_WRITE 0 #define DEBUG_READ 0 +#define DEBUG_PORT 0 static node_object get_node_from_cache(permdb_object *state, const char *key) @@ -128,7 +146,7 @@ get_node_from_cache(permdb_object *state, const char *key) #if DEBUG_CACHE fprintf(stderr, "found nothing in cache\n"); #endif - return nullnode; + return errornode; } #if DEBUG_CACHE fprintf(stderr, "got cache key %s: ", node->key); @@ -153,7 +171,7 @@ get_node_from_dirtynodes(permdb_object *state, const char *key) #if DEBUG_CACHE fprintf(stderr, "found nothing\n"); #endif - return nullnode; + return errornode; } #if DEBUG_CACHE fprintf(stderr, "got key %s: ", node->key); @@ -215,6 +233,277 @@ delete_all_dirty_nodes(permdb_object *state) hashtabcleantab(state->dirtynodes, true_cond, NULL); } +static const uint64_t index_file_cookie = 0xb7e16b02ba8a6d1b; +static const uint64_t index_commit_cookie = 0x2fb1778c74a402e4; +static const uint64_t index_node_cookie = 0x2e0f555ad73210d1; + +static const uint8_t data_file_cookie[] = {0xd5, 0x35, 0x51, 0xba, 0x53, 0x9a, 0x42, 0x52}; + +static const uint8_t data_entry_cookie[] = {0xe7, 0xc1, 0xcd, 0xc2, 0xba, 0x3d, 0xc7, 0x7c}; + +static const uint8_t data_commit_start_cookie[] = {0x75, 0xc2, 0xe4, 0xb3, 0xd5, 0xf6, 0x43, 0xa1}; +static const uint8_t data_commit_end_cookie[] = {0x2b, 0x05, 0xee, 0xd6, 0x1b, 0x5a, 0xf5, 0x50}; + +int +committree(permdb_object *state); + +node_offset +indexfile_add_header(buffered_file *file) +{ + writebuffer_add(file, &index_file_cookie, sizeof(index_file_cookie)); + uint64_t length = writebuffer_length(file); + writebuffer_flush(file); + return length; +} + +node_offset +datafile_add_header(buffered_file *file) +{ + fprintf(stderr, "adding header to %s\n", file->name); + uint32_t parameters[3]; + writebuffer_add(file, &data_file_cookie, sizeof(data_file_cookie)); + parameters[0] = htonl(4096); + parameters[1] = htonl(2); + parameters[2] = htonl(32); + writebuffer_add(file, parameters, sizeof(parameters)); + uint64_t length = writebuffer_length(file); + writebuffer_flush(file); + return length; +} + +void +initial_node(permdb_object *state) +{ + char *key = ""; + struct nodecache *node = malloc(sizeof(struct nodecache) + strlen(key) + 1); + strcpy(node->key, key); + node->value = nullnode; + hashtabaddreplace(state->dirtynodes, node); +} + +int +initial_commit(permdb_object *state) +{ + initial_node(state); + return committree(state); +} + +static void +set_error(permdb_object *state, const char * __restrict, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + +unsigned char * +read_from_file(buffered_file *file, size_t length, off_t offset) +{ + unsigned char *buffer = malloc(length); + if (buffer == NULL) { + return NULL; + } + ssize_t ret = pread(file->fd, buffer, length, (off_t) offset); + if (ret != length) { + free(buffer); + return NULL; + } + return buffer; +} + + +struct commit_info { + node_offset start; + node_offset length; + uint8_t checksum[SHA256_DIGEST_SIZE]; +}; + +int +validate_checksum(struct commit_info *commit, buffered_file *file) +{ + //fprintf(stderr, "validate_checksum: read from file: length %llu start %llu\n", commit->length, commit->start); + unsigned char *checksumdata = read_from_file(file, commit->length, commit->start); + + if (checksumdata == NULL) { + return -1; + } + + uint8_t checksum[SHA256_DIGEST_SIZE]; + + struct sha256_ctx commit_checksum_context; + sha256_init(&commit_checksum_context); + sha256_update(&commit_checksum_context, commit->length, checksumdata); + sha256_digest(&commit_checksum_context, SHA256_DIGEST_SIZE, checksum); + + if (memcmp(checksum, commit->checksum, SHA256_DIGEST_SIZE) == 0) { + free(checksumdata); + return 0; + } + + free(checksumdata); + + return -1; +} + +int +verify_index_commit(buffered_file *file, node_offset offset) +{ + //fprintf(stderr, "verifying index file: commit verification\n"); + offset -= INDEX_COMMIT_TRAILER_SIZE; + unsigned char *data = read_from_file(file, INDEX_COMMIT_TRAILER_SIZE, offset); + + if (memcmp(data + sizeof(uint64_t) + SHA256_DIGEST_SIZE, &index_commit_cookie, sizeof(index_commit_cookie)) != 0) { + fprintf(stderr, "verifying index file: incorrect commit cookie\n"); + return -1; + } + struct commit_info commit; + uint64_t length; + memcpy(&length, data, sizeof(uint64_t)); + commit.length = length; + commit.start = offset - commit.length; + memcpy(commit.checksum, data + sizeof(uint64_t), SHA256_DIGEST_SIZE); + + return validate_checksum(&commit, file); +} + +int +indexfile_verify_file(buffered_file *file) +{ + //fprintf(stderr, "verifying index file\n"); + unsigned char *header = read_from_file(file, sizeof(index_file_cookie), 0); + if (memcmp(header, &index_file_cookie, sizeof(index_file_cookie)) != 0) { + free(header); + fprintf(stderr, "verifying index file: incorrect file cookie\n"); + return -1; + } + free(header); + if (verify_index_commit(file, file->filesize) < 0) { + fprintf(stderr, "verifying index file: commit verification failed\n"); + return -1; + } + return 0; +} + +struct commit_info * +read_data_commit_backward(buffered_file *file, node_offset *offset); + +int +datafile_verify_file(buffered_file *file) +{ + unsigned char *header = read_from_file(file, sizeof(data_file_cookie), 0); + if (memcmp(header, &data_file_cookie, sizeof(data_file_cookie)) != 0) { + free(header); + return -1; + } + free(header); + + node_offset offset = file->lastcommit; + + //fprintf(stderr, "verifying commit: %llu\n", offset); + struct commit_info *data_commit = read_data_commit_backward(file, &offset); + + if (data_commit == NULL || validate_checksum(data_commit, file) < 0) { + //fprintf(stderr, "commit broken: %llu\n", offset); + return -1; + } + + return 0; +} + + +static uint32_t +readnet32(void *ptr); + +static unsigned char * +readdatakeyandlen(permdb_object *state, node_offset offset, size_t *datalen); + +struct commit_info * +read_data_commit(buffered_file *file, node_offset *offset) +{ + unsigned char *data = read_from_file(file, sizeof(uint32_t) + SHA256_DIGEST_SIZE + sizeof(data_commit_end_cookie), *offset); + if (memcmp(data + sizeof(uint32_t) + SHA256_DIGEST_SIZE, data_commit_end_cookie, sizeof(data_commit_end_cookie)) != 0) { + return NULL; + } + *offset += sizeof(uint32_t); + struct commit_info *commit = malloc(sizeof(struct commit_info)); + //fprintf(stderr, "read commit: %llu\n", *offset); + //print_hex(data, sizeof(uint32_t) + SHA256_DIGEST_SIZE); + commit->length = readnet32(data); + commit->start = *offset - commit->length; + memcpy(&commit->checksum, data + sizeof(uint32_t), SHA256_DIGEST_SIZE); + *offset += SHA256_DIGEST_SIZE + sizeof(data_commit_end_cookie); + return commit; +} + + +struct commit_info * +read_data_commit_forward(buffered_file *file, node_offset *offset) +{ + int padding = calc_padding(*offset, 4); + *offset += sizeof(data_commit_start_cookie) + padding; + return read_data_commit(file, offset); +} + +struct commit_info * +read_data_commit_backward(buffered_file *file, node_offset *offset) +{ + *offset -= sizeof(uint32_t) + SHA256_DIGEST_SIZE + sizeof(data_commit_end_cookie); + return read_data_commit(file, offset); +} + +int +addindex(permdb_object *state, const unsigned char *key, unsigned int keylength, node_offset dataoffset); + +int +rebuild_index_file(permdb_object *state) +{ + state->indexfile.filesize = 0; + state->indexfile.datasize = 0; + state->indexfile.lastcommit = state->indexfile.datasize; + sha256_init(&state->indexfile.commit_checksum_context); + ftruncate(state->indexfile.fd, 0); + + state->indexfile.lastcommit = indexfile_add_header(&state->indexfile); + + initial_node(state); + + node_offset offset = sizeof(data_file_cookie) + sizeof(uint32_t) * 3; + while (1) { + unsigned char *cookie = read_from_file(&state->datafile, sizeof(data_entry_cookie), offset); + if (cookie == NULL) { + break; + } + if (memcmp(&data_entry_cookie, cookie, sizeof(data_entry_cookie)) == 0) { + size_t datalen; + unsigned char *datakey = readdatakeyandlen(state, offset, &datalen); + //fprintf(stderr, "entry %llu: %zu\n", offset, datalen); + int result = addindex(state, datakey, keylen, offset); + offset += sizeof(data_entry_cookie) + keylen + sizeof(uint32_t) + datalen; + + free(datakey); + + if (result != 1) { + free(cookie); + return -1; + } + } else if (memcmp(&data_commit_start_cookie, cookie, sizeof(data_commit_start_cookie)) == 0) { + struct commit_info *data_commit = read_data_commit_forward(&state->datafile, &offset); + //fprintf(stderr, "verifying commit: %llu %p\n", offset, data_commit); + + if (data_commit == NULL || validate_checksum(data_commit, &state->datafile) < 0) { + fprintf(stderr, "commit broken: %llu\n", offset); + free(cookie); + return -1; + } + + //fprintf(stderr, "commit %llu\n", offset); + } else { + //fprintf(stderr, "error %llu\n", offset); + //print_hex(cookie, sizeof(data_entry_cookie)); + free(cookie); + return -1; + } + free(cookie); + } + fprintf(stderr, "index file rebuilt\n"); + return committree(state); +} + permdb_object * permdb_alloc(const char *dbpath) { @@ -244,7 +533,9 @@ permdb_alloc(const char *dbpath) permdb_object *state = malloc(sizeof(permdb_object)); state->datafile.fd = fd; + state->datafile.name = "datafile"; state->indexfile.fd = idxfd; + state->indexfile.name = "indexfile"; state->nodecache = hashtabnewf(1000000, comparenodes, hashnode, HASHTAB_GROW); state->dirtynodes = hashtabnewf(1000000, comparenodes, hashnode, HASHTAB_GROW); off_t datafile_filesize = lseek(fd, 0, SEEK_END); @@ -257,6 +548,7 @@ permdb_alloc(const char *dbpath) state->datafile.lastcommit = state->datafile.datasize; state->datafile.writebufferalloc = 1024*1024; state->datafile.writebuffer = calloc(state->datafile.writebufferalloc, 1); + sha256_init(&state->datafile.commit_checksum_context); off_t indexfile_filesize = lseek(idxfd, 0, SEEK_END); if (indexfile_filesize < 0) { warn("lseek %s", idxpath); @@ -267,22 +559,32 @@ permdb_alloc(const char *dbpath) state->indexfile.lastcommit = state->indexfile.datasize; state->indexfile.writebufferalloc = 1024*1024; state->indexfile.writebuffer = calloc(state->indexfile.writebufferalloc, 1); + sha256_init(&state->indexfile.commit_checksum_context); state->error = NULL; if (state->datafile.filesize == 0 && state->indexfile.filesize == 0) { #if DEBUG_WRITE fprintf(stderr, "writing header\n"); #endif - writebuffer_add(&state->indexfile, indexfile_header, sizeof(indexfile_header)); - writebuffer_flush(&state->indexfile); - state->indexfile.lastcommit = sizeof(indexfile_header); + state->indexfile.lastcommit = indexfile_add_header(&state->indexfile); + state->datafile.lastcommit = datafile_add_header(&state->datafile); + initial_commit(state); } else if (state->datafile.filesize > 0 && state->indexfile.filesize == 0) { - /* - * non-empty data file and empty index file means that - * the index should be rebuilt, but this is not - * supported yet - */ - warnx("non-empty data file but empty index"); - return NULL; + if (rebuild_index_file(state) < 0) { + warnx("index file rebuilding failed"); + return NULL; + } + } + if (datafile_verify_file(&state->datafile) < 0) { + warnx("data file verification failed"); + return NULL; + } + if (indexfile_verify_file(&state->indexfile) < 0) { + warnx("index file verification failed, rebuilding"); + + if (rebuild_index_file(state) < 0) { + warnx("index file rebuilding failed"); + return NULL; + } } return state; } @@ -311,10 +613,18 @@ writebuffer_flush_nosync(buffered_file *file); static void writebuffer_add(buffered_file *file, const void *data, uint64_t length) { + sha256_update(&file->commit_checksum_context, length, data); +#if DEBUG_WRITE + fprintf(stderr, "adding data to %s: ", file->name); + print_hex(data, length); +#endif uint64_t needspace = length + writebuffer_length(file); if (needspace > file->writebufferalloc) { - writebuffer_flush_nosync(file); + int ret = writebuffer_flush_nosync(file); + if (ret < 0) { + err(1, "writebuffer_flush_nosync failed"); + } needspace = length + writebuffer_length(file); @@ -359,6 +669,10 @@ writebuffer_flush(buffered_file *file) } ret = fsync(file->fd); + sha256_init(&file->commit_checksum_context); +#if DEBUG_WRITE + fprintf(stderr, "clearing data for %s\n", file->name); +#endif return ret; } @@ -389,9 +703,9 @@ keypart(const unsigned char *key, unsigned int level) static char * packnode(node_object node) { - char *data = malloc(strlen(nodemagic) + sizeof(node_object)); - memcpy(data, nodemagic, 2); - memcpy(data+2, &node, sizeof(node_object)); + char *data = malloc(sizeof(index_node_cookie) + sizeof(node_object)); + memcpy(data, &index_node_cookie, sizeof(index_node_cookie)); + memcpy(data+sizeof(index_node_cookie), &node, sizeof(node_object)); return data; } @@ -419,9 +733,6 @@ get_entry_in_node(node_object node, unsigned char n) } static void -set_error(permdb_object *state, const char * __restrict, ...) __attribute__ ((__format__ (__printf__, 2, 3))); - -static void set_error(permdb_object *state, const char *format, ...) { va_list args; @@ -445,18 +756,20 @@ set_error(permdb_object *state, const char *format, ...) static node_object unpacknode(permdb_object *state, const char *data, size_t datalen) { - if (memcmp(nodemagic, data, 2) != 0) { - set_error(state, "incorrect magic %02x%02x\n", (unsigned char)data[0], (unsigned char)data[1]); - return nullnode; + if (memcmp(&index_node_cookie, data, sizeof(index_node_cookie)) != 0) { + print_hex(data, sizeof(index_node_cookie)); + print_hex(&index_node_cookie, sizeof(index_node_cookie)); + set_error(state, "incorrect magic (node) %02x%02x\n", (unsigned char)data[0], (unsigned char)data[1]); + return errornode; } - if (datalen != sizeof(node_object) + 2) { - return nullnode; + if (datalen != sizeof(node_object) + sizeof(index_node_cookie)) { + return errornode; } node_object node; - memcpy(&node, data + 2, sizeof(node)); + memcpy(&node, data + sizeof(index_node_cookie), sizeof(node)); return node; } @@ -495,9 +808,12 @@ read_internal_data(permdb_object *state, node_offset offset, size_t length) } static int -isnullnode(node_object node) +iserrornode(node_object node) { - return node.data[0] == 0 && node.data[1] == 0 && node.data[2] == 0 && node.data[3] == 0; + return node.data[0] == NODE_ENTRY_ERROR_NODE && + node.data[1] == NODE_ENTRY_ERROR_NODE && + node.data[2] == NODE_ENTRY_ERROR_NODE && + node.data[3] == NODE_ENTRY_ERROR_NODE; } node_object @@ -508,36 +824,41 @@ readnode(permdb_object *state, node_offset offset, const char *cachekey) #endif if (cachekey) { node_object dirtynode = get_node_from_dirtynodes(state, cachekey); - if (!isnullnode(dirtynode)) { + if (!iserrornode(dirtynode)) { +#if DEBUG_READ + fprintf(stderr, "reading node: found node in dirty nodes\n"); +#endif return dirtynode; } if (offset == NODE_ENTRY_DIRTY_NODE) { set_error(state, "referring to dirty node at key %s, but node not in dirtynodes\n", cachekey); - return nullnode; - } - - if (state->indexfile.lastcommit == 16) { - return nullnode; +#if DEBUG_READ + fprintf(stderr, "reading node: referring to dirty node at key %s, but node not in dirtynodes\n", cachekey); +#endif + return errornode; } node_object cachednode = get_node_from_cache(state, cachekey); - if (!isnullnode(cachednode)) { + if (!iserrornode(cachednode)) { +#if DEBUG_READ + fprintf(stderr, "reading node: found node in cache\n"); +#endif return cachednode; } } - size_t length = strlen(nodemagic) + sizeof(node_object); + size_t length = sizeof(index_node_cookie) + sizeof(node_object); char *buffer = malloc(length); if (buffer == NULL) { - return nullnode; + return errornode; } ssize_t ret = pread(state->indexfile.fd, buffer, length, (off_t) offset); if (ret != length) { free(buffer); set_error(state, "node not present at %llu: length %zd\n", (long long unsigned int) offset, ret); - return nullnode; + return errornode; } @@ -548,6 +869,10 @@ readnode(permdb_object *state, node_offset offset, const char *cachekey) put_node_in_cache(state, cachekey, result); } +#if DEBUG_READ + fprintf(stderr, "reading node: success\n"); +#endif + return result; } @@ -596,20 +921,26 @@ getpath(permdb_object *state, const unsigned char *key, struct nodelist *nodes) { unsigned int level = 0; - node_offset rootoffset = state->indexfile.lastcommit - (strlen(nodemagic) + sizeof(node_object)); +#if 0 + if (state->indexfile.lastcommit < (sizeof(index_node_cookie) + sizeof(node_object) + INDEX_COMMIT_TRAILER_SIZE)) { + fprintf(stderr, "No commit exists (lastcommit %llu)\n", (long long unsigned int) state->indexfile.lastcommit); + return -1; + } +#endif + + node_offset rootoffset = state->indexfile.lastcommit - (sizeof(index_node_cookie) + sizeof(node_object) + INDEX_COMMIT_TRAILER_SIZE); node_object node = readnode(state, rootoffset, ""); - if (isnullnode(node)) { + if (iserrornode(node)) { fprintf(stderr, "cannot find root node at offset %llu (lastcommit %llu)\n", (long long unsigned int) rootoffset, (long long unsigned int) state->indexfile.lastcommit); - if (nodes->pos >= nodes->len) { - fprintf(stderr, "tried to write after end of allocated list\n"); - return -1; - } - add_entry_to_nodelist(nodes, nullnode); - return 0; + return -1; } +#if DEBUG_READ + fprintf(stderr, "getpath: got node\n"); +#endif + while (1) { unsigned char kb = keybits(key, level); node_entry entry = get_entry_in_node(node, kb); @@ -619,13 +950,19 @@ getpath(permdb_object *state, const unsigned char *key, struct nodelist *nodes) } add_entry_to_nodelist(nodes, node); if (entry == 0 || isdata(entry)) { +#if DEBUG_READ + fprintf(stderr, "getpath: return node\n"); +#endif return (char) kb; } level++; char *kp = keypart(key, level); node = readnode(state, entryoffset(entry), kp); - if (isnullnode(node)) { + if (iserrornode(node)) { free(kp); +#if DEBUG_READ + fprintf(stderr, "getpath: not found\n"); +#endif return -1; } free(kp); @@ -638,13 +975,20 @@ getpathlastnode(permdb_object *state, const unsigned char *key) { unsigned int level = 0; - node_offset rootoffset = state->indexfile.lastcommit - (strlen(nodemagic) + sizeof(node_object)); + node_offset rootoffset = state->indexfile.lastcommit - (sizeof(index_node_cookie) + sizeof(node_object) + INDEX_COMMIT_TRAILER_SIZE); node_object node = readnode(state, rootoffset, ""); - if (isnullnode(node)) { - return 0; + if (iserrornode(node)) { +#if DEBUG_READ + fprintf(stderr, "getpathlastnode: no node\n"); +#endif + return NODE_ENTRY_ERROR_NODE; } +#if DEBUG_READ + fprintf(stderr, "getpathlastnode: got node\n"); +#endif + unsigned char kb; while (1) { kb = keybits(key, level); @@ -674,7 +1018,7 @@ writenode(permdb_object *state, node_object node, const char *cachekey) #if DEBUG_WRITE fprintf(stderr, "writing node: offset %llu\n", offset); #endif - writebuffer_add(&state->indexfile, data, strlen(nodemagic) + sizeof(node_object)); + writebuffer_add(&state->indexfile, data, sizeof(index_node_cookie) + sizeof(node_object)); free(data); @@ -709,16 +1053,16 @@ memsub(void *src, size_t offset, size_t length) static unsigned char * readdatakey(permdb_object *state, node_offset offset) { - unsigned char *data = read_internal_data(state, offset, strlen(datamagic) + keylen); + unsigned char *data = read_internal_data(state, offset, sizeof(data_entry_cookie) + keylen); if (data == NULL) { return NULL; } - if (memcmp(datamagic, data, strlen(datamagic)) != 0) { + if (memcmp(&data_entry_cookie, data, sizeof(data_entry_cookie)) != 0) { free(data); - set_error(state, "incorrect magic %02x %02x\n", (unsigned char)data[0], (unsigned char)data[1]); + set_error(state, "incorrect magic (entry) %02x%02x\n", (unsigned char)data[0], (unsigned char)data[1]); return NULL; } - unsigned char *result = memsub(data, strlen(datamagic), keylen); + unsigned char *result = memsub(data, sizeof(data_entry_cookie), keylen); free(data); return result; } @@ -736,17 +1080,17 @@ readnet32(void *ptr) static unsigned char * readdatakeyandlen(permdb_object *state, node_offset offset, size_t *datalen) { - unsigned char *data = read_internal_data(state, offset, strlen(datamagic) + keylen + 4); + unsigned char *data = read_internal_data(state, offset, sizeof(data_entry_cookie) + keylen + 4); if (data == NULL) { return NULL; } - if (memcmp(datamagic, data, strlen(datamagic)) != 0) { + if (memcmp(&data_entry_cookie, data, sizeof(data_entry_cookie)) != 0) { free(data); - set_error(state, "incorrect magic %02x %02x\n", (unsigned char)data[0], (unsigned char)data[1]); + set_error(state, "incorrect magic (entry) %02x%02x\n", (unsigned char)data[0], (unsigned char)data[1]); return NULL; } - unsigned char *result = memsub(data, strlen(datamagic), keylen); - *datalen = readnet32(data+strlen(datamagic)+keylen); + unsigned char *result = memsub(data, sizeof(data_entry_cookie), keylen); + *datalen = readnet32(data+sizeof(data_entry_cookie)+keylen); free(data); return result; } @@ -754,7 +1098,7 @@ readdatakeyandlen(permdb_object *state, node_offset offset, size_t *datalen) static unsigned char * readdata(permdb_object *state, node_offset offset, size_t datalen) { - return read_internal_data(state, offset + strlen(datamagic) + keylen + 4, datalen); + return read_internal_data(state, offset + sizeof(data_entry_cookie) + keylen + 4, datalen); } @@ -767,7 +1111,7 @@ writedata(permdb_object *state, const unsigned char *key, const unsigned char *d #if DEBUG_WRITE fprintf(stderr, "writing data: offset %llu\n", offset); #endif - writebuffer_add(&state->datafile, datamagic, strlen(datamagic)); + writebuffer_add(&state->datafile, &data_entry_cookie, sizeof(data_entry_cookie)); writebuffer_add(&state->datafile, key, keylen); writebuffer_add(&state->datafile, &coded_datalength, 4); writebuffer_add(&state->datafile, data, datalength); @@ -776,6 +1120,95 @@ writedata(permdb_object *state, const unsigned char *key, const unsigned char *d } int +addindex(permdb_object *state, const unsigned char *key, unsigned int keylength, node_offset dataoffset) +{ + struct nodelist nodes; + init_nodelist(&nodes); + + char kb = getpath(state, key, &nodes); + + if (kb == -1) { + free_nodelist(&nodes); + return -1; + } + + unsigned int foundlevel = nodes.pos - 1; + + if (foundlevel >= nodes.len) { + fprintf(stderr, "tried to read after end of allocated list\n"); + free_nodelist(&nodes); + return 0; + } + node_object lastnode = nodes.nodes[foundlevel]; + if (get_entry_in_node(lastnode, (unsigned char) kb) == 0) { + addentry(&lastnode, keybits(key, foundlevel), buildentry(1, dataoffset)); + } else { + node_offset olddataoffset = entryoffset(get_entry_in_node(lastnode, (unsigned char) kb)); + unsigned char *olddatakey = readdatakey(state, olddataoffset); + if (olddatakey == NULL) { + free_nodelist(&nodes); + return -1; + } + if (memcmp(olddatakey, key, keylen) == 0) { + free_nodelist(&nodes); + free(olddatakey); + return 0; + } + unsigned int level = foundlevel + 1; + while (keybits(key, level) == keybits(olddatakey, level)) { + level++; + } + node_object leafnode = nullnode; + addentry(&leafnode, keybits(key, level), buildentry(1, dataoffset)); + addentry(&leafnode, keybits(olddatakey, level), buildentry(1, olddataoffset)); + free(olddatakey); + { + char *cachekey = keypart(key, level); + put_node_in_dirtynodes(state, cachekey, leafnode); + free(cachekey); + } + level--; + while (level > foundlevel) { + node_object node = nullnode; + addentry(&node, keybits(key, level), NODE_ENTRY_DIRTY_NODE); + char *cachekey = keypart(key, level); + put_node_in_dirtynodes(state, cachekey, node); + free(cachekey); + level--; + } + overwriteentry(&lastnode, keybits(key, foundlevel), NODE_ENTRY_DIRTY_NODE); + } + + int level = (int) foundlevel; + + { + char *cachekey = keypart(key, (unsigned int) level); + put_node_in_dirtynodes(state, cachekey, lastnode); + free(cachekey); + } + + level--; + while (level >= 0) { + if (level >= (int) nodes.len) { + fprintf(stderr, "tried to read after end of allocated list\n"); + free_nodelist(&nodes); + return 0; + } + node_object node = nodes.nodes[level]; + overwriteentry(&node, keybits(key, (unsigned int) level), NODE_ENTRY_DIRTY_NODE); + char *cachekey = keypart(key, (unsigned int) level); + put_node_in_dirtynodes(state, cachekey, node); + free(cachekey); + level--; + } + + free_nodelist(&nodes); + + return 1; +} + + +int addvalue(permdb_object *state, const unsigned char *key, unsigned int keylength, const unsigned char *data, size_t datalength) { struct nodelist nodes; @@ -870,9 +1303,16 @@ getvalue(permdb_object *state, const unsigned char *key, size_t keylength, size_ { node_entry entry = getpathlastnode(state, key); if (entry == 0) { +#if DEBUG_READ + fprintf(stderr, "getvalue: no node\n"); +#endif return NULL; } +#if DEBUG_READ + fprintf(stderr, "getvalue: got node\n"); +#endif + node_offset olddataoffset = entryoffset(entry); unsigned char *datakey = readdatakeyandlen(state, olddataoffset, datalen); @@ -972,6 +1412,9 @@ committree(permdb_object *state) unsigned int ncommits = ndirtynodes; unsigned int i; +#if DEBUG_WRITE + fprintf(stderr, "committing %d dirty nodes at offset %llu\n", ncommits, state->indexfile.datasize); +#endif for (i = 0; i < ncommits; i++) { get_node_from_dirtynodes(state, ""); char *key = commitlist[i]; @@ -997,11 +1440,50 @@ committree(permdb_object *state) free(commitlist); +#if DEBUG_WRITE + fprintf(stderr, "writing data commit trailer at offset %llu\n", state->datafile.datasize); +#endif + + int data_commit_padding_size = calc_padding(state->datafile.datasize, 4); + int data_commit_trailer_size = SHA256_DIGEST_SIZE + 8; + unsigned char *data_commit_trailer = malloc(data_commit_trailer_size); + uint8_t padding[4] = {0, 0, 0, 0}; + writebuffer_add(&state->datafile, data_commit_start_cookie, 8); + writebuffer_add(&state->datafile, padding, data_commit_padding_size); + uint32_t data_commit_length = htonl(state->datafile.datasize - state->datafile.lastcommit + sizeof(uint32_t)); + writebuffer_add(&state->datafile, &data_commit_length, sizeof(uint32_t)); + sha256_digest(&state->datafile.commit_checksum_context, SHA256_DIGEST_SIZE, data_commit_trailer); + memcpy(data_commit_trailer + SHA256_DIGEST_SIZE, &data_commit_end_cookie, sizeof(data_commit_end_cookie)); + writebuffer_add(&state->datafile, data_commit_trailer, data_commit_trailer_size); + +#if DEBUG_WRITE + fprintf(stderr, "finished writing data commit trailer at offset %llu\n", state->datafile.datasize); +#endif + free(data_commit_trailer); + if (writebuffer_flush(&state->datafile) == -1) { set_error(state, "data file flushing failed\n"); return -1; } + state->datafile.lastcommit = state->datafile.datasize; + +#if DEBUG_WRITE + fprintf(stderr, "writing index commit trailer at offset %llu\n", state->indexfile.datasize); +#endif + + uint64_t index_commit_length = state->indexfile.datasize - state->indexfile.lastcommit; + unsigned char *index_commit_trailer = malloc(INDEX_COMMIT_TRAILER_SIZE); + memcpy(index_commit_trailer, &index_commit_length, 8); + sha256_digest(&state->indexfile.commit_checksum_context, SHA256_DIGEST_SIZE, index_commit_trailer + 8); + memcpy(index_commit_trailer + 8 + SHA256_DIGEST_SIZE, &index_commit_cookie, 8); + writebuffer_add(&state->indexfile, index_commit_trailer, INDEX_COMMIT_TRAILER_SIZE); + +#if DEBUG_WRITE + fprintf(stderr, "finished writing index commit trailer at offset %llu\n", state->indexfile.datasize); +#endif + free(index_commit_trailer); + if (writebuffer_flush(&state->indexfile) == -1) { set_error(state, "index file flushing failed\n"); return -1; @@ -1025,6 +1507,9 @@ portloop(permdb_object *state) ssize_t len; while ((len = read_command(buf, sizeof(buf)-1, 4)) > 0) { if (buf[0] == 0) { +#if DEBUG_PORT + fprintf(stderr, "get\n"); +#endif if (len != keylen+1) { write_reply(NULL, 0, 4); continue; @@ -1039,8 +1524,14 @@ portloop(permdb_object *state) } else { write_reply(result, datalen, 4); } +#if DEBUG_PORT + fprintf(stderr, "get reply\n"); +#endif free(result); } else if (buf[0] == 1) { +#if DEBUG_PORT + fprintf(stderr, "add\n"); +#endif if (len < (keylen + 1)) { write_reply(NULL, 0, 4); fprintf(stderr, "invalid addvalue command, length was %zd\n", len); @@ -1059,7 +1550,13 @@ portloop(permdb_object *state) unsigned char result_byte = (unsigned char) result; write_reply(&result_byte, 1, 4); } +#if DEBUG_PORT + fprintf(stderr, "add reply\n"); +#endif } else if (buf[0] == 2) { +#if DEBUG_PORT + fprintf(stderr, "commit\n"); +#endif if (len != 1) { write_reply(NULL, 0, 4); fprintf(stderr, "invalid commit command, length was %zd\n", len); @@ -1074,8 +1571,14 @@ portloop(permdb_object *state) } else { unsigned char result_byte = (unsigned char) result; write_reply(&result_byte, 1, 4); - } + } +#if DEBUG_PORT + fprintf(stderr, "commit reply\n"); +#endif } else { +#if DEBUG_PORT + fprintf(stderr, "unknown command\n"); +#endif write_reply(NULL, 0, 4); } } diff --git a/c_src/permdb.h b/c_src/permdb.h index ded6754..7394143 100644 --- a/c_src/permdb.h +++ b/c_src/permdb.h @@ -18,6 +18,7 @@ typedef struct node_object { #define NODE_ENTRY_DIRTY_NODE 0x7FFFFFFFFFFFFFFFULL #define NODE_ENTRY_ISDATA 0x8000000000000000ULL #define NODE_ENTRY_OFFSET_MASK 0x7FFFFFFFFFFFFFFFULL +#define NODE_ENTRY_ERROR_NODE 0xFFFFFFFFFFFFFFFFULL struct permdb_object; -- cgit v1.1