summaryrefslogtreecommitdiff
path: root/c_src/permdb.c
diff options
context:
space:
mode:
authorMagnus Ahltorp <map@kth.se>2015-09-28 16:42:04 +0200
committerLinus Nordberg <linus@nordu.net>2015-11-11 13:32:37 +0100
commit9b2e72510b33547794207043714f52e16239b3f5 (patch)
tree5e9461b694a4c9c25108e8843122aa42454a1a6f /c_src/permdb.c
parent90760d10d14c11ee4c99826163c206bbf20a77f6 (diff)
Added permdb
Diffstat (limited to 'c_src/permdb.c')
-rw-r--r--c_src/permdb.c1032
1 files changed, 1032 insertions, 0 deletions
diff --git a/c_src/permdb.c b/c_src/permdb.c
new file mode 100644
index 0000000..ccac2f5
--- /dev/null
+++ b/c_src/permdb.c
@@ -0,0 +1,1032 @@
+/*
+ * Copyright (c) 2015, NORDUnet A/S.
+ * See LICENSE for licensing information.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <err.h>
+#include "erlport.h"
+#include "permdb.h"
+#include "hash.h"
+
+const int bytesperentry = 8;
+const int bitsperlevel = 2;
+const int keylen = 32;
+
+const char *nodemagic = "\x8a\x44";
+const char *datamagic = "\xcb\x0e";
+
+typedef struct {
+ int fd;
+ node_offset datasize;
+ node_offset lastcommit;
+ node_offset filesize;
+ void *writebuffer;
+ uint64_t writebufferalloc;
+} buffered_file;
+
+struct permdb_object {
+ Hashtab *nodecache;
+ Hashtab *dirtynodes;
+ buffered_file datafile;
+ buffered_file indexfile;
+ char *error;
+};
+
+node_object nullnode = {{0, 0, 0, 0}};
+
+
+char indexfile_header[16] = "PERMDB IDX FILE ";
+
+void
+print_entry(node_object node)
+{
+ for (int i = 0; i < entriespernode; i++) {
+ fprintf(stderr, " %016llx", node.data[i]);
+ }
+ fprintf(stderr, "\n");
+}
+
+static void
+writebuffer_add(buffered_file *file, const void *data, long length);
+static int
+writebuffer_flush(buffered_file *file);
+
+struct nodecache {
+ node_object value;
+ char key[];
+};
+
+int
+comparenodes(void *a_v, void *b_v)
+{
+ struct nodecache *a = (struct nodecache *)a_v;
+ struct nodecache *b = (struct nodecache *)b_v;
+
+ //fprintf(stderr, "comparing %s and %s\n", a->key, b->key);
+
+ return strcmp(a->key, b->key);
+}
+
+unsigned
+hashnode(void *node_v)
+{
+ struct nodecache *node = (struct nodecache *)node_v;
+
+ unsigned hash = 0;
+
+ for (int i = 0; node->key[i]; i++) {
+ unsigned oldhighest = hash >> 30;
+ hash <<= 2;
+ hash |= oldhighest;
+ hash ^= (node->key[i] & 0x3);
+ }
+
+ //fprintf(stderr, "hashing (%p) %s to %u\n", node, node->key, hash);
+ return hash;
+}
+
+#if DEBUG_CACHE
+static void
+print_hex(const void *data, int length)
+{
+ int i;
+ for (i = 0; i < length; i++) {
+ fprintf(stderr, "%02X ", ((unsigned char *)data)[i]);
+ }
+ fprintf(stderr, "\n");
+}
+#endif
+
+#define DEBUG_CACHE 0
+#define DEBUG_WRITE 0
+#define DEBUG_READ 0
+
+node_object
+get_node_from_cache(permdb_object *state, const char *key)
+{
+#if DEBUG_CACHE
+ fprintf(stderr, "getting cache key %s, ", key);
+#endif
+ struct nodecache *lookupnode = malloc(sizeof(struct nodecache) + strlen(key) + 1);
+ strcpy(lookupnode->key, key);
+ //fprintf(stderr, "sending in lookup node %p: ", lookupnode);
+ //print_hex(lookupnode, sizeof(struct nodecache) + strlen(key) + 1);
+ struct nodecache *node = (struct nodecache *)hashtabsearch(state->nodecache, lookupnode);
+ free(lookupnode);
+ if (node == NULL) {
+#if DEBUG_CACHE
+ fprintf(stderr, "found nothing in cache\n");
+#endif
+ return nullnode;
+ }
+#if DEBUG_CACHE
+ fprintf(stderr, "got cache key %s: ", node->key);
+ print_hex(&node->value, sizeof(struct nodecache));
+#endif
+ return node->value;
+}
+
+node_object
+get_node_from_dirtynodes(permdb_object *state, const char *key)
+{
+#if DEBUG_CACHE
+ fprintf(stderr, "getting key %s, ", key);
+#endif
+ struct nodecache *lookupnode = malloc(sizeof(struct nodecache) + strlen(key) + 1);
+ strcpy(lookupnode->key, key);
+ //fprintf(stderr, "sending in lookup node %p: ", lookupnode);
+ //print_hex(lookupnode, sizeof(struct nodecache) + strlen(key) + 1);
+ struct nodecache *node = (struct nodecache *)hashtabsearch(state->dirtynodes, lookupnode);
+ free(lookupnode);
+ if (node == NULL) {
+#if DEBUG_CACHE
+ fprintf(stderr, "found nothing\n");
+#endif
+ return nullnode;
+ }
+#if DEBUG_CACHE
+ fprintf(stderr, "got key %s: ", node->key);
+ print_hex(&node->value, sizeof(struct nodecache));
+#endif
+ return node->value;
+}
+
+void
+put_node_in_cache(permdb_object *state, const char *key, node_object value)
+{
+#if DEBUG_CACHE
+ fprintf(stderr, "putting cache key %s: ", key);
+ print_hex(&value, sizeof(node_object));
+#endif
+ struct nodecache *node = malloc(sizeof(struct nodecache) + strlen(key) + 1);
+ strcpy(node->key, key);
+ node->value = value;
+ hashtabaddreplace(state->nodecache, node);
+}
+
+void
+put_node_in_dirtynodes(permdb_object *state, const char *key, node_object value)
+{
+#if DEBUG_CACHE
+ fprintf(stderr, "putting key %s: ", key);
+ print_hex(&value, sizeof(node_object));
+#endif
+ struct nodecache *node = malloc(sizeof(struct nodecache) + strlen(key) + 1);
+ strcpy(node->key, key);
+ node->value = value;
+ hashtabaddreplace(state->dirtynodes, node);
+}
+
+static int
+true_cond(void *ptr, void *arg)
+{
+ return TRUE;
+}
+
+int
+free_hash_node(void *ptr, void *arg)
+{
+ free(ptr);
+ return 0;
+}
+
+void
+delete_all_nodes_in_cache(permdb_object *state)
+{
+ hashtabforeach(state->nodecache, free_hash_node, NULL);
+ hashtabcleantab(state->nodecache, true_cond, NULL);
+}
+
+static void
+delete_all_dirty_nodes(permdb_object *state)
+{
+ hashtabforeach(state->dirtynodes, free_hash_node, NULL);
+ hashtabcleantab(state->dirtynodes, true_cond, NULL);
+}
+
+permdb_object *
+permdb_alloc(const char *dbpath)
+{
+ char *idxpath = NULL;
+ if (asprintf(&idxpath, "%s.idx", dbpath) == -1) {
+ return NULL;
+ }
+
+ int fd;
+ int idxfd;
+
+ int flags = O_RDWR|O_CREAT;
+
+ fd = open(dbpath, flags, 0666);
+ if (fd == -1) {
+ warn("open %s", dbpath);
+ return NULL;
+ }
+
+ idxfd = open(idxpath, flags, 0666);
+ if (idxfd == -1) {
+ warn("open %s", idxpath);
+ return NULL;
+ }
+
+ free(idxpath);
+
+ permdb_object *state = malloc(sizeof(permdb_object));
+ state->datafile.fd = fd;
+ state->indexfile.fd = idxfd;
+ state->nodecache = hashtabnewf(1000000, comparenodes, hashnode, HASHTAB_GROW);
+ state->dirtynodes = hashtabnewf(1000000, comparenodes, hashnode, HASHTAB_GROW);
+ state->datafile.filesize = lseek(fd, 0, SEEK_END);
+ state->datafile.datasize = state->datafile.filesize;
+ state->datafile.lastcommit = state->datafile.datasize;
+ state->datafile.writebufferalloc = 1024*1024;
+ state->datafile.writebuffer = calloc(state->datafile.writebufferalloc, 1);
+ state->indexfile.filesize = lseek(idxfd, 0, SEEK_END);
+ state->indexfile.datasize = state->indexfile.filesize;
+ state->indexfile.lastcommit = state->indexfile.datasize;
+ state->indexfile.writebufferalloc = 1024*1024;
+ state->indexfile.writebuffer = calloc(state->indexfile.writebufferalloc, 1);
+ state->error = NULL;
+ if (state->datafile.filesize == 0 && state->indexfile.filesize == 0) {
+#if DEBUG_WRITE
+ fprintf(stderr, "writing header\n");
+#endif
+ writebuffer_add(&state->indexfile, indexfile_header, sizeof(indexfile_header));
+ writebuffer_flush(&state->indexfile);
+ state->indexfile.lastcommit = sizeof(indexfile_header);
+ } else if (state->datafile.filesize > 0 && state->indexfile.filesize == 0) {
+ /*
+ * non-empty data file and empty index file means that
+ * the index should be rebuilt, but this is not
+ * supported yet
+ */
+ warnx("non-empty data file but empty index");
+ return NULL;
+ }
+ return state;
+}
+
+void
+permdb_free(permdb_object *state)
+{
+ free(state->datafile.writebuffer);
+ free(state->indexfile.writebuffer);
+ delete_all_nodes_in_cache(state);
+ delete_all_dirty_nodes(state);
+ hashtabrelease(state->dirtynodes);
+ hashtabrelease(state->nodecache);
+ free(state);
+}
+
+long
+writebuffer_length(buffered_file *file)
+{
+ return file->datasize - file->filesize;
+}
+
+static int
+writebuffer_flush_nosync(buffered_file *file);
+
+static void
+writebuffer_add(buffered_file *file, const void *data, long length)
+{
+ long needspace = length + writebuffer_length(file);
+
+ if (needspace > file->writebufferalloc) {
+ writebuffer_flush_nosync(file);
+
+ needspace = length + writebuffer_length(file);
+
+ if (needspace > file->writebufferalloc) {
+
+ long newsize = file->writebufferalloc * 2;
+ if (needspace > newsize) {
+ newsize = needspace;
+ }
+ file->writebuffer = realloc(file->writebuffer, newsize);
+ memset(file->writebuffer + file->writebufferalloc, 0, newsize - file->writebufferalloc);
+ file->writebufferalloc = newsize;
+ }
+ }
+
+ memcpy(file->writebuffer + writebuffer_length(file), data, length);
+ file->datasize += length;
+}
+
+static int
+writebuffer_flush_nosync(buffered_file *file)
+{
+ int ret;
+
+ int length = writebuffer_length(file);
+ ret = write(file->fd, file->writebuffer, length);
+ if (ret != length) {
+ return -1;
+ }
+ file->filesize += ret;
+ return 0;
+}
+
+static int
+writebuffer_flush(buffered_file *file)
+{
+ int ret;
+
+ ret = writebuffer_flush_nosync(file);
+ if (ret) {
+ return ret;
+ }
+
+ ret = fsync(file->fd);
+ return ret;
+}
+
+static unsigned char
+keybits(const char *key, unsigned int level)
+{
+ unsigned char b = key[level/4];
+
+ return (b >> (6-(level*2) % 8)) & 0x3;
+}
+
+char *
+keypart(const char *key, unsigned int level)
+{
+ char *result = malloc(level+1);
+
+ int i;
+ for (i = 0; i < level; i++) {
+ unsigned char b = keybits(key, i);
+ result[i] = b + '0';
+ }
+
+ result[level] = 0;
+
+ return result;
+}
+
+char *
+packnode(node_object node)
+{
+ char *data = malloc(strlen(nodemagic) + sizeof(node_object));
+ memcpy(data, nodemagic, 2);
+ memcpy(data+2, &node, sizeof(node_object));
+
+ return data;
+}
+
+void
+addentry(node_object *node, unsigned char n, node_entry entry)
+{
+ assert(node->data[n] == 0);
+ node->data[n] = entry;
+}
+
+void
+overwriteentry(node_object *node, unsigned char n, node_entry entry)
+{
+ assert(node->data[n] != 0);
+ node->data[n] = entry;
+}
+
+node_entry
+get_entry_in_node(node_object node, unsigned char n)
+{
+ return node.data[n];
+}
+
+static void
+set_error(permdb_object *state, const char * __restrict, ...) __attribute__ ((__format__ (__printf__, 2, 3)));
+
+static void
+set_error(permdb_object *state, const char *format, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, format);
+ char *formatted = NULL;
+ ret = vasprintf (&formatted, format, args);
+ if (state->error) {
+ free(state->error);
+ }
+ fprintf(stderr, "error: %s\n", formatted);
+ state->error = formatted;
+ va_end(args);
+}
+
+static node_object
+unpacknode(permdb_object *state, const char *data, int datalen)
+{
+ if (memcmp(nodemagic, data, 2) != 0) {
+ set_error(state, "incorrect magic %02x%02x\n", (unsigned char)data[0], (unsigned char)data[1]);
+ return nullnode;
+ }
+
+ if (datalen != sizeof(node_object) + 2) {
+ return nullnode;
+ }
+
+ node_object *node = (node_object *)(data + 2);
+
+ return *node;
+}
+
+char *
+read_internal_data(permdb_object *state, node_offset offset, unsigned int length)
+{
+ char *result = malloc(length);
+ buffered_file *file = &state->datafile;
+#if DEBUG_READ
+ fprintf(stderr, "reading data: offset %llu\n", offset);
+#endif
+
+ if (offset >= file->filesize) {
+ node_offset writebufferoffset = offset - file->filesize;
+ if (offset + length > file->datasize) {
+ set_error(state, "pread: not enough data for offset %llu and length %u\n", offset, length);
+ return NULL;
+ }
+ memcpy(result, file->writebuffer + writebufferoffset, length);
+ } else {
+ if (offset + length > file->filesize) {
+ set_error(state, "pread: trying to read over file/writebuffer boundary for offset %llu and length %u\n", offset, length);
+ return NULL;
+ }
+
+ int ret = pread(file->fd, result, length, offset);
+ if (ret != length) {
+ free(result);
+ set_error(state, "short pread: %u (wanted %u) at offset %llu\n", ret, length, offset);
+ return NULL;
+ }
+ }
+
+ return result;
+}
+
+int
+isnullnode(node_object node)
+{
+ return node.data[0] == 0 && node.data[1] == 0 && node.data[2] == 0 && node.data[3] == 0;
+}
+
+node_object
+readnode(permdb_object *state, node_offset offset, const char *cachekey)
+{
+#if DEBUG_READ
+ fprintf(stderr, "reading node: offset %llu cachekey '%s'\n", offset, cachekey ? cachekey : "none");
+#endif
+ if (cachekey) {
+ node_object dirtynode = get_node_from_dirtynodes(state, cachekey);
+ if (!isnullnode(dirtynode)) {
+ return dirtynode;
+ }
+
+ if (offset == NODE_ENTRY_DIRTY_NODE) {
+ set_error(state, "referring to dirty node at key %s, but node not in dirtynodes\n", cachekey);
+ return nullnode;
+ }
+
+ if (state->indexfile.lastcommit == 16) {
+ return nullnode;
+ }
+
+ node_object cachednode = get_node_from_cache(state, cachekey);
+ if (!isnullnode(cachednode)) {
+ return cachednode;
+ }
+ }
+
+ int length = strlen(nodemagic) + sizeof(node_object);
+
+ char *buffer = malloc(length);
+ int ret = pread(state->indexfile.fd, buffer, length, offset);
+ if (ret != length) {
+ free(buffer);
+ set_error(state, "node not present at %llu: length %d\n", offset, ret);
+ return nullnode;
+ }
+
+
+ node_object result = unpacknode(state, buffer, length);
+ free(buffer);
+ return result;
+}
+
+
+int
+isdata(node_entry entry)
+{
+ return (entry & NODE_ENTRY_ISDATA) == NODE_ENTRY_ISDATA;
+}
+
+node_offset
+entryoffset(node_entry entry)
+{
+ return entry & NODE_ENTRY_OFFSET_MASK;
+}
+
+struct nodelist {
+ node_object *nodes;
+ int len;
+ int pos;
+};
+
+void
+add_entry_to_nodelist(struct nodelist *list, node_object node)
+{
+ list->nodes[list->pos] = node;
+ list->pos++;
+}
+
+void
+init_nodelist(struct nodelist *list)
+{
+ list->len = keylen * 8 / bitsperlevel;
+ list->nodes = malloc(sizeof(node_object) * list->len);
+ list->pos = 0;
+}
+
+void
+free_nodelist(struct nodelist *list)
+{
+ free(list->nodes);
+}
+
+static char
+getpath(permdb_object *state, const char *key, struct nodelist *nodes)
+{
+ unsigned int level = 0;
+
+ node_offset rootoffset = state->indexfile.lastcommit - (strlen(nodemagic) + sizeof(node_object));
+
+ node_object node = readnode(state, rootoffset, "");
+
+ if (isnullnode(node)) {
+ fprintf(stderr, "cannot find root node at offset %llu (lastcommit %llu)\n", rootoffset, state->indexfile.lastcommit);
+ if (nodes->pos >= nodes->len) {
+ fprintf(stderr, "tried to write after end of allocated list\n");
+ return -1;
+ }
+ add_entry_to_nodelist(nodes, nullnode);
+ return 0;
+ }
+
+ while (1) {
+ unsigned char kb = keybits(key, level);
+ node_entry entry = get_entry_in_node(node, kb);
+ if (nodes->pos >= nodes->len) {
+ fprintf(stderr, "tried to write after end of allocated list: level %d\n", level);
+ return -1;
+ }
+ add_entry_to_nodelist(nodes, node);
+ if (entry == 0 || isdata(entry)) {
+ return kb;
+ }
+ level++;
+ char *kp = keypart(key, level);
+ node = readnode(state, entryoffset(entry), kp);
+ if (isnullnode(node)) {
+ free(kp);
+ return -1;
+ }
+ free(kp);
+ }
+
+}
+
+static node_entry
+getpathlastnode(permdb_object *state, const char *key)
+{
+ unsigned int level = 0;
+
+ node_offset rootoffset = state->indexfile.lastcommit - (strlen(nodemagic) + sizeof(node_object));
+ node_object node = readnode(state, rootoffset, "");
+
+ if (isnullnode(node)) {
+ return 0;
+ }
+
+ unsigned char kb;
+ while (1) {
+ kb = keybits(key, level);
+ node_entry entry = get_entry_in_node(node, kb);
+ if (entry == 0 || isdata(entry)) {
+ break;
+ }
+ level++;
+ char *kp = keypart(key, level);
+ node = readnode(state, entryoffset(entry), kp);
+ free(kp);
+ }
+
+ node_entry entry = get_entry_in_node(node, kb);
+ return entry;
+}
+
+static long
+writenode(permdb_object *state, node_object node, const char *cachekey)
+{
+ node_offset offset = state->indexfile.datasize;
+
+ put_node_in_cache(state, cachekey, node);
+
+ char *data = packnode(node);
+
+#if DEBUG_WRITE
+ fprintf(stderr, "writing node: offset %llu\n", offset);
+#endif
+ writebuffer_add(&state->indexfile, data, strlen(nodemagic) + sizeof(node_object));
+
+ free(data);
+
+ return offset;
+}
+
+node_offset
+datasize(permdb_object *state)
+{
+ return state->indexfile.datasize;
+}
+
+
+node_entry
+buildentry(int isdata, node_offset offset)
+{
+ if (isdata) {
+ return NODE_ENTRY_ISDATA | offset;
+ } else {
+ return offset;
+ }
+}
+
+static void *
+memsub(void *src, int offset, int length)
+{
+ void *result = malloc(length);
+ memcpy(result, src + offset, length);
+ return result;
+}
+
+static char *
+readdatakey(permdb_object *state, node_offset offset)
+{
+ char *data = read_internal_data(state, offset, strlen(datamagic) + keylen);
+ if (data == NULL) {
+ return NULL;
+ }
+ if (memcmp(datamagic, data, strlen(datamagic)) != 0) {
+ free(data);
+ set_error(state, "incorrect magic %02x %02x\n", (unsigned char)data[0], (unsigned char)data[1]);
+ return NULL;
+ }
+ char *result = memsub(data, strlen(datamagic), keylen);
+ free(data);
+ return result;
+}
+
+static char *
+readdatakeyandlen(permdb_object *state, node_offset offset, unsigned int *datalen)
+{
+ char *data = read_internal_data(state, offset, strlen(datamagic) + keylen + 4);
+ if (data == NULL) {
+ return NULL;
+ }
+ if (memcmp(datamagic, data, strlen(datamagic)) != 0) {
+ free(data);
+ set_error(state, "incorrect magic %02x %02x\n", (unsigned char)data[0], (unsigned char)data[1]);
+ return NULL;
+ }
+ char *result = memsub(data, strlen(datamagic), keylen);
+ *datalen = ntohl(*(uint32_t *)(data+strlen(datamagic)+keylen));
+ free(data);
+ return result;
+}
+
+static char *
+readdata(permdb_object *state, node_offset offset, unsigned int datalen)
+{
+ return read_internal_data(state, offset + strlen(datamagic) + keylen + 4, datalen);
+}
+
+
+static node_offset
+writedata(permdb_object *state, const char *key, const char *data, unsigned int datalength)
+{
+ uint32_t coded_datalength = htonl(datalength);
+ node_offset offset = state->datafile.datasize;
+
+#if DEBUG_WRITE
+ fprintf(stderr, "writing data: offset %llu\n", offset);
+#endif
+ writebuffer_add(&state->datafile, datamagic, strlen(datamagic));
+ writebuffer_add(&state->datafile, key, keylen);
+ writebuffer_add(&state->datafile, &coded_datalength, 4);
+ writebuffer_add(&state->datafile, data, datalength);
+
+ return offset;
+}
+
+int
+addvalue(permdb_object *state, const char *key, unsigned int keylength, const char *data, unsigned int datalength)
+{
+ struct nodelist nodes;
+ init_nodelist(&nodes);
+
+ char kb = getpath(state, key, &nodes);
+
+ if (kb == -1) {
+ free_nodelist(&nodes);
+ return -1;
+ }
+
+ int foundlevel = nodes.pos - 1;
+
+ if (foundlevel >= nodes.len) {
+ fprintf(stderr, "tried to read after end of allocated list\n");
+ free_nodelist(&nodes);
+ return 0;
+ }
+ node_object lastnode = nodes.nodes[foundlevel];
+ if (get_entry_in_node(lastnode, kb) == 0) {
+ node_offset dataoffset = writedata(state, key, data, datalength);
+ addentry(&lastnode, keybits(key, foundlevel), buildentry(1, dataoffset));
+ } else {
+ node_offset olddataoffset = entryoffset(get_entry_in_node(lastnode, kb));
+ char *olddatakey = readdatakey(state, olddataoffset);
+ if (olddatakey == NULL) {
+ free_nodelist(&nodes);
+ return -1;
+ }
+ if (memcmp(olddatakey, key, keylen) == 0) {
+ free_nodelist(&nodes);
+ free(olddatakey);
+ return 0;
+ }
+ node_offset dataoffset = writedata(state, key, data, datalength);
+ int level = foundlevel + 1;
+ while (keybits(key, level) == keybits(olddatakey, level)) {
+ level++;
+ }
+ node_object leafnode = nullnode;
+ addentry(&leafnode, keybits(key, level), buildentry(1, dataoffset));
+ addentry(&leafnode, keybits(olddatakey, level), buildentry(1, olddataoffset));
+ free(olddatakey);
+ char *cachekey = keypart(key, level);
+ put_node_in_dirtynodes(state, cachekey, leafnode);
+ free(cachekey);
+ level--;
+ while (level > foundlevel) {
+ node_object node = nullnode;
+ addentry(&node, keybits(key, level), NODE_ENTRY_DIRTY_NODE);
+ char *cachekey = keypart(key, level);
+ put_node_in_dirtynodes(state, cachekey, node);
+ free(cachekey);
+ level--;
+ }
+ overwriteentry(&lastnode, keybits(key, foundlevel), NODE_ENTRY_DIRTY_NODE);
+ }
+
+ int level = foundlevel;
+
+ char *cachekey = keypart(key, level);
+ put_node_in_dirtynodes(state, cachekey, lastnode);
+ free(cachekey);
+
+ level--;
+ while (level >= 0) {
+ if (level >= nodes.len) {
+ fprintf(stderr, "tried to read after end of allocated list\n");
+ free_nodelist(&nodes);
+ return 0;
+ }
+ node_object node = nodes.nodes[level];
+ overwriteentry(&node, keybits(key, level), NODE_ENTRY_DIRTY_NODE);
+ char *cachekey = keypart(key, level);
+ put_node_in_dirtynodes(state, cachekey, node);
+ free(cachekey);
+ level--;
+ }
+
+ free_nodelist(&nodes);
+
+ return 1;
+}
+
+char *
+getvalue(permdb_object *state, const char *key, int keylen, unsigned int *datalen)
+{
+ node_entry entry = getpathlastnode(state, key);
+ if (entry == 0) {
+ return NULL;
+ }
+
+ node_offset olddataoffset = entryoffset(entry);
+
+ char *datakey = readdatakeyandlen(state, olddataoffset, datalen);
+ if (datakey == NULL) {
+ return NULL;
+ }
+ if (memcmp(datakey, key, keylen) != 0) {
+ free(datakey);
+ return NULL;
+ }
+ free(datakey);
+ return readdata(state, olddataoffset, *datalen);
+}
+
+struct keylist {
+ char **keys;
+ int pos;
+};
+
+int
+add_entry_to_keylist(void *ptr, void *arg)
+{
+ struct keylist *list = (struct keylist *) arg;
+ struct nodecache *node = (struct nodecache *)ptr;
+
+ list->keys[list->pos] = strdup(node->key);
+ list->pos++;
+ return 0;
+}
+
+int
+string_length_comparison(const void *a_v, const void *b_v) {
+ char **a = (char **) a_v;
+ char **b = (char **) b_v;
+ return strlen(*b) - strlen(*a);
+}
+
+char **
+sorted(permdb_object *state, int ndirtynodes)
+{
+ struct keylist keylist;
+ keylist.keys = malloc(sizeof(char *) * ndirtynodes);
+ keylist.pos = 0;
+ hashtabforeach(state->dirtynodes, add_entry_to_keylist, &keylist);
+
+#if 0
+ fprintf(stderr, "before sort\n");
+ for (int i = 0; i < ndirtynodes; i++) {
+ fprintf(stderr, " %s\n", keylist.keys[i]);
+ }
+#endif
+
+ qsort(keylist.keys, ndirtynodes, sizeof(char *), string_length_comparison);
+#if 0
+ fprintf(stderr, "after sort\n");
+ for (int i = 0; i < ndirtynodes; i++) {
+ fprintf(stderr, " %p %s\n", keylist.keys[i], keylist.keys[i]);
+ }
+#endif
+ return keylist.keys;
+}
+
+int
+count_entry(void *ptr, void *arg)
+{
+ int *flag = (int *)arg;
+ *flag += 1;
+ return 0;
+}
+
+int
+committree(permdb_object *state)
+{
+ get_node_from_dirtynodes(state, "");
+ int ndirtynodes = 0;
+ hashtabforeach(state->dirtynodes, count_entry, &ndirtynodes);
+ if (ndirtynodes == 0) {
+ return 0;
+ }
+
+ char **commitlist = sorted(state, ndirtynodes);
+
+ char *lastobject = commitlist[ndirtynodes - 1];
+ if (lastobject[0] != '\0') {
+ fprintf(stderr, "sorted list doesn't end with root node\n");
+ free(commitlist);
+ return -1;
+ }
+
+ int ncommits = ndirtynodes;
+ int i;
+ for (i = 0; i < ncommits; i++) {
+ get_node_from_dirtynodes(state, "");
+ char *key = commitlist[i];
+ node_object node = get_node_from_dirtynodes(state, key);
+ assert(get_entry_in_node(node, 0) != NODE_ENTRY_DIRTY_NODE);
+ assert(get_entry_in_node(node, 1) != NODE_ENTRY_DIRTY_NODE);
+ assert(get_entry_in_node(node, 2) != NODE_ENTRY_DIRTY_NODE);
+ assert(get_entry_in_node(node, 3) != NODE_ENTRY_DIRTY_NODE);
+ node_offset offset = writenode(state, node, key);
+ int keylength = strlen(key);
+ if (keylength != 0) {
+ get_node_from_dirtynodes(state, "");
+ char *parent = strdup(key);
+ parent[keylength - 1] = '\0';
+ int entrynumber = key[keylength - 1] - '0';
+ node_object parentnode = get_node_from_dirtynodes(state, parent);
+ overwriteentry(&parentnode, entrynumber, offset);
+ put_node_in_dirtynodes(state, parent, parentnode);
+ free(parent);
+ }
+ free(key);
+ }
+
+ free(commitlist);
+
+ if (writebuffer_flush(&state->datafile) == -1) {
+ set_error(state, "data file flushing failed\n");
+ return -1;
+ }
+
+ if (writebuffer_flush(&state->indexfile) == -1) {
+ set_error(state, "index file flushing failed\n");
+ return -1;
+ }
+
+ state->indexfile.lastcommit = state->indexfile.datasize;
+#if DEBUG_WRITE
+ fprintf(stderr, "setting lastcommit to %llu\n", state->indexfile.lastcommit);
+#endif
+
+ delete_all_dirty_nodes(state);
+
+ return 0;
+}
+
+
+void
+portloop(permdb_object *state)
+{
+ char buf[65536];
+ ssize_t len;
+ while ((len = read_command((unsigned char *)buf, sizeof(buf)-1, 4)) > 0) {
+ if (buf[0] == 0) {
+ if (len != keylen+1) {
+ write_reply(NULL, 0, 4);
+ continue;
+ }
+ assert(len == keylen+1);
+
+ unsigned int datalen;
+ char *result = getvalue(state, (buf+1), keylen, &datalen);
+
+ if (result == NULL) {
+ write_reply(NULL, 0, 4);
+ } else {
+ write_reply((unsigned char *)result, datalen, 4);
+ }
+ free(result);
+ } else if (buf[0] == 1) {
+ unsigned int datalen = len - keylen - 1;
+ char *key = buf + 1;
+ char *data = key + keylen;
+
+ int result = addvalue(state, key, keylen, data, datalen);
+
+ if (result < 0) {
+ write_reply(NULL, 0, 4);
+ } else {
+ unsigned char result_byte = result;
+ write_reply(&result_byte, 1, 4);
+ }
+ } else if (buf[0] == 2) {
+ if (len != 1) {
+ write_reply(NULL, 0, 4);
+ fprintf(stderr, "invalid commit command, length was %ld\n", len);
+ continue;
+ }
+
+ int result = committree(state);
+
+ if (result < 0) {
+ write_reply(NULL, 0, 4);
+ continue;
+ } else {
+ unsigned char result_byte = result;
+ write_reply(&result_byte, 1, 4);
+ }
+ } else {
+ write_reply(NULL, 0, 4);
+ }
+ }
+ if (len == -1) {
+ fprintf(stderr, "read command failed\n");
+ }
+ permdb_free(state);
+}
+