diff options
author | Stef Walter <stefw@gnome.org> | 2013-03-20 09:33:04 +0100 |
---|---|---|
committer | Stef Walter <stefw@gnome.org> | 2013-03-20 10:54:00 +0100 |
commit | f45942a4fc3e1c5219e9b5201b82203337ee7280 (patch) | |
tree | f83313676d1c8de9dbc48d161e16c13264bc8049 /common | |
parent | 1dc227b4fce16fcc721276925492f4ba4db00b4f (diff) |
hash: Add the murmur2 hash and start using it
Add implementation of the murmur2 hash function, and start using
it for our dictionaries. Our implementation is incremental
like our other hash functions.
Also remove p11_oid_hash() which wasn't being used.
In addition fix several tests whose success was based on the
way that the dictionary hashed. This was a hidden testing bug.
Diffstat (limited to 'common')
-rw-r--r-- | common/attrs.c | 11 | ||||
-rw-r--r-- | common/dict.c | 11 | ||||
-rw-r--r-- | common/hash.c | 126 | ||||
-rw-r--r-- | common/hash.h | 7 | ||||
-rw-r--r-- | common/oid.c | 17 | ||||
-rw-r--r-- | common/oid.h | 2 | ||||
-rw-r--r-- | common/tests/test-hash.c | 71 |
7 files changed, 215 insertions, 30 deletions
diff --git a/common/attrs.c b/common/attrs.c index 5539789..cce1aaf 100644 --- a/common/attrs.c +++ b/common/attrs.c @@ -40,12 +40,14 @@ #include "compat.h" #include "constants.h" #include "debug.h" +#include "hash.h" #include "pkcs11.h" #include "pkcs11x.h" #include <assert.h> #include <stdarg.h> #include <stdio.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> @@ -481,11 +483,12 @@ unsigned int p11_attr_hash (const void *data) { const CK_ATTRIBUTE *attr = data; - unsigned int hash = (unsigned int)attr->type; - const char *p, *end; + uint32_t hash; - for (p = attr->pValue, end = p + attr->ulValueLen ; p != NULL && p != end; p++) - hash = (hash << 5) - hash + *p; + p11_hash_murmur2 (&hash, + &attr->type, sizeof (attr->type), + attr->pValue, (size_t)attr->ulValueLen, + NULL); return hash; } diff --git a/common/dict.c b/common/dict.c index df8c7ac..409e3a4 100644 --- a/common/dict.c +++ b/common/dict.c @@ -35,10 +35,12 @@ #include "debug.h" #include "dict.h" +#include "hash.h" #include <sys/types.h> #include <assert.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> @@ -326,13 +328,8 @@ p11_dict_size (p11_dict *dict) unsigned int p11_dict_str_hash (const void *string) { - const char *p = string; - unsigned int hash = *p; - - if (hash) - for (p += 1; *p != '\0'; p++) - hash = (hash << 5) - hash + *p; - + uint32_t hash; + p11_hash_murmur2 (&hash, string, strlen (string), NULL); return hash; } diff --git a/common/hash.c b/common/hash.c index 59548fa..2041f1f 100644 --- a/common/hash.c +++ b/common/hash.c @@ -540,3 +540,129 @@ p11_hash_md5 (unsigned char *hash, md5_final (&md5, hash); md5_invalidate (&md5); } + +/* + * MurmurHash.c + * MYUtilities + * + * This file created by Jens Alfke on 3/17/08. + * Algorithm & source code by Austin Appleby, released to public domain. + * <http://murmurhash.googlepages.com/> + * Downloaded 3/16/2008. + * Modified slightly by Jens Alfke (use standard uint32_t and size_t types; + * change 'm' and 'r' to #defines for better C compatibility.) + * + */ + +/*----------------------------------------------------------------------------- + * MurmurHash2, by Austin Appleby + * + * Note - This code makes a few assumptions about how your machine behaves - + * + * 1. We can read a 4-byte value from any address without crashing + * 2. sizeof(int) == 4 **Jens: I fixed this by changing 'unsigned int' to 'uint32_t'** + * + * And it has a few limitations - + * + * 1. It will not work incrementally. + * 2. It will not produce the same results on little-endian and big-endian + * machines. + */ + +void +p11_hash_murmur2 (void *hash, + const void *input, + size_t len, + ...) +{ + /* + * 'm' and 'r' are mixing constants generated offline. + * They're not really 'magic', they just happen to work well. + * seed is arbitrarily chosen + */ + + #define m 0x5bd1e995 + #define r 24 + #define seed 42 + + const unsigned char * data = input; + unsigned char overflow[4]; + va_list va; + uint32_t h; + + /* Initialize the hash based on the length */ + va_start (va, len); + h = len; + while (va_arg (va, const void *)) + h += va_arg (va, size_t); + h ^= seed; + va_end (va); + + /* Mix 4 bytes at a time into the hash */ + va_start (va, len); + for (;;) { + uint32_t k; + + if (len >= 4) { + k = *(uint32_t *)data; + data += 4; + len -= 4; + + } else { + size_t num = len; + memcpy (overflow, data, len); + + while (num < 4) { + size_t part; + + data = va_arg (va, const void *); + if (!data) + break; + + /* Combine uint32 from old and new */ + len = va_arg (va, size_t); + part = 4 - num; + if (part > len) + part = len; + memcpy (overflow + num, data, part); + data += part; + len -= part; + num += part; + } + + if (num < 4) { + len = num; + break; + } + + k = *(uint32_t *)overflow; + } + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + } + va_end (va); + + /* Handle the last few bytes of the input array */ + switch(len) { + case 3: h ^= overflow[2] << 16; + case 2: h ^= overflow[1] << 8; + case 1: h ^= overflow[0]; + h *= m; + }; + + /* + * Do a few final mixes of the hash to ensure the last few + * bytes are well-incorporated. + */ + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + assert (sizeof (h) == P11_HASH_MURMUR2_LEN); + memcpy (hash, &h, sizeof (h)); +} diff --git a/common/hash.h b/common/hash.h index f4dfce1..b06438d 100644 --- a/common/hash.h +++ b/common/hash.h @@ -57,4 +57,11 @@ void p11_hash_sha1 (unsigned char *hash, size_t length, ...) GNUC_NULL_TERMINATED; +#define P11_HASH_MURMUR2_LEN 4 + +void p11_hash_murmur2 (void *hash, + const void *input, + size_t length, + ...) GNUC_NULL_TERMINATED; + #endif /* P11_HASH_H_ */ diff --git a/common/oid.c b/common/oid.c index 0c876ec..b4b0bf6 100644 --- a/common/oid.c +++ b/common/oid.c @@ -60,23 +60,6 @@ p11_oid_simple (const unsigned char *oid, (size_t)oid[1] == len - 2); /* matches length */ } -unsigned int -p11_oid_hash (const void *oid) -{ - const unsigned char *v = oid; - unsigned int hash; - int len; - int i; - - len = p11_oid_length (v); - hash = v[0]; - - for (i = 1; i < len; i++) - hash = (hash << 5) - hash + v[i]; - - return hash; -} - bool p11_oid_equal (const void *oid_one, const void *oid_two) diff --git a/common/oid.h b/common/oid.h index 96b7a27..dee6b10 100644 --- a/common/oid.h +++ b/common/oid.h @@ -40,8 +40,6 @@ bool p11_oid_simple (const unsigned char *oid, int len); -unsigned int p11_oid_hash (const void *oid); - bool p11_oid_equal (const void *oid_one, const void *oid_two); diff --git a/common/tests/test-hash.c b/common/tests/test-hash.c index 5e32c85..f57988e 100644 --- a/common/tests/test-hash.c +++ b/common/tests/test-hash.c @@ -35,6 +35,8 @@ #include "config.h" #include "CuTest.h" +#include <assert.h> +#include <stdint.h> #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -128,6 +130,73 @@ test_md5 (CuTest *cu) } } +static void +test_murmur2 (CuTest *cu) +{ + struct { + const char *input; + const char *input2; + int hash; + } fixtures[] = { + { "one", NULL, 1910179066 }, + { "two", NULL, 396151652 }, + { "four", NULL, -2034170174 }, + { "seven", NULL, -588341181 }, + /* Note that these are identical output */ + { "eleven", NULL, -37856894 }, + { "ele", "ven", -37856894 }, + { NULL }, + }; + + uint32_t first; + uint32_t second; + int i; + + assert (sizeof (first) == P11_HASH_MURMUR2_LEN); + for (i = 0; fixtures[i].input != NULL; i++) { + p11_hash_murmur2 ((unsigned char *)&first, + fixtures[i].input, + strlen (fixtures[i].input), + fixtures[i].input2, + fixtures[i].input2 ? strlen (fixtures[i].input2) : 0, + NULL); + + p11_hash_murmur2 ((unsigned char *)&second, + fixtures[i].input, + strlen (fixtures[i].input), + fixtures[i].input2, + fixtures[i].input2 ? strlen (fixtures[i].input2) : 0, + NULL); + + CuAssertIntEquals (cu, fixtures[i].hash, first); + CuAssertIntEquals (cu, fixtures[i].hash, second); + } +} + +static void +test_murmur2_incr (CuTest *cu) +{ + uint32_t first, second; + + p11_hash_murmur2 ((unsigned char *)&first, + "this is the long input!", 23, + NULL); + + p11_hash_murmur2 ((unsigned char *)&second, + "this", 4, + " ", 1, + "is ", 3, + "the long ", 9, + "in", 2, + "p", 1, + "u", 1, + "t", 1, + "!", 1, + NULL); + + CuAssertIntEquals (cu, first, second); +} + int main (void) { @@ -138,6 +207,8 @@ main (void) SUITE_ADD_TEST (suite, test_sha1); SUITE_ADD_TEST (suite, test_sha1_long); SUITE_ADD_TEST (suite, test_md5); + SUITE_ADD_TEST (suite, test_murmur2); + SUITE_ADD_TEST (suite, test_murmur2_incr); CuSuiteRun (suite); CuSuiteSummary (suite, output); |