%%% Copyright (c) 2014, NORDUnet A/S. %%% See LICENSE for licensing information. %%% %%% Implementation of a history tree as described in Efficient Data %%% Structures for Tamper-Evident Logging [0]. This implementation %%% follows RFC 6962 and differs from [0] only in how non-full trees %%% are handled. %%% %%% [0] https://www.usenix.org/event/sec09/tech/full_papers/crosby.pdf %%% %%% Useful terminology: %%% E -- Entry, stored in leaf nodes. %%% I(i,r) -- Inner node with index i, on layer r. %%% A(v)(i,r) -- Hash of I(i,r) in tree of version v. %%% d -- Depth of tree. %%% %%% Nodes are identified by their index i and layer r. %%% I(i,r) has left child I(i,r-1) and right child I(i+2^(r-1),r-1). %%% %%% A version-n tree stores n+1 entries. %%% In a version-n tree, I(i,r) is frozen when n >= i + 2^r - 1. %%% %%% Data types: %%% history_tree %%% version (integer) %%% head (reference to a frozen or warm node) %%% frozen nodes (ets tables handled by ts) %%% warm nodes (ets table handled by ts) %%% %%% Interface: %%% create tree (-> tree) %%% add entry (data -> tree) %%% get hash of leaf or inner node (i, r -> hash) %%% get inclusion proof for leaf i in version-n tree (i, n -> hashes) %%% get consistency proof between trees of version n and m (n, m -> hashes) %%% -module(ht). -include("$CTROOT/plop/include/plop.hrl"). -include_lib("eunit/include/eunit.hrl"). -import(lists, [foreach/2, foldl/3, reverse/1]). -export_type([history_tree/0, inner/0]). -export([new/0, new/1, destroy/1, size/1, add/2]). -export([get_hash/3, get_incl/3, get_cons/3]). -export([tree_hash/1, tree_hash/2]). %%%%%%%%%%%%%%%%%%%% %% Public interface. %%%%%%%%%%%%%%%%%%%% %% Data types. -record(history_tree, {version :: integer(), store :: ts:tree_store()}). -type history_tree() :: #history_tree{}. -record(inner, {index :: non_neg_integer(), % 27bit integer => max 134e6 entries layer :: non_neg_integer(), % 5bit integer hash :: binary()}). -type inner() :: #inner{}. %%%%%%%%%%%%%%%%%%%% %% Public interface. size(#history_tree{version = V}) -> V + 1. -spec new() -> history_tree(). new() -> #history_tree{version = -1, store = ts:new()}. -spec new(non_neg_integer()) -> history_tree(). new(Version) -> foldl(fun(#mtl{entry = E}, Tree) -> D = (E#timestamped_entry.entry)#plop_entry.data, add(Tree, D) % Return value -> Tree in next invocation. end, new(), db:get_by_index_sorted(0, Version)). destroy(Tree) -> ts:delete(Tree#history_tree.store). %% @doc Which layers need to change when creating a version-n tree? %% Layer 0 is where the new leaf is added and always needs %% updating. Apart from 0, the layers with a number corresponding to %% the "positions of the bits set in n" are the ones that need to be %% touched. This assumes a somewhat unorthodox bit numbering where the %% least significant bit has number 1 (rather than 0). %% %% For example: %% A version 2 (0010) tree has had changes to layer 2 %% A version 5 (0101) tree has had changes to layers 1 and 3 %% A version 8 (1000) tree has had changes to layer 4 -spec add(history_tree(), binary()) -> history_tree(). add(#history_tree{version = V, store = Store}, Entry) -> NewVersion = V + 1, LeafIndex = NewVersion, LeafHash = mkleafhash(Entry), LayerMap = reverse([B || <> <= binary:encode_unsigned(NewVersion)]), #history_tree{version = NewVersion, store = update(ts:store(Store, {LeafIndex, 0}, LeafHash), 1, LayerMap, #inner{index = LeafIndex, layer = 0, hash = LeafHash})}. -spec tree_hash(history_tree()) -> binary(). tree_hash(Tree) -> tree_hash(Tree, Tree#history_tree.version). -spec tree_hash(history_tree(), integer()) -> binary(). tree_hash(_, -1) -> hash(""); tree_hash(Tree, Version) -> get_hash(Tree, Version, {0, depth(Tree) - 1}). -spec get_hash(history_tree(), non_neg_integer(), tuple()) -> binary(). get_hash(#history_tree{store = S}, _Version, IR) -> %% TODO: Use Version! {{_I, _R}, Hash} = ts:retrieve(S, IR), Hash. %-spec get_incl/3 get_incl(_, _, _) -> nyi. %-spec get_cons/3 get_cons(_, _, _) -> nyi. %% Private. %% -spec head(history_tree()) -> inner(). %% head(#history_tree{store = Store}) -> %% {{I, R}, Hash} = ts:retrieve(Store, {0, 0}), %% #inner{index = I, layer = R, hash = Hash}. %% @doc Return position of highest bit set, counting from the least %% significant bit, starting at 1. bitpos_first_set(N) -> L = [Bit || <> <= binary:encode_unsigned(N)], length(L) - ffs(L, 0). ffs([], Acc) -> Acc; ffs([H|T], Acc) -> case H of 0 -> ffs(T, Acc + 1); _ -> Acc end. depth(#history_tree{version = -1}) -> 0; depth(#history_tree{version = V}) -> bitpos_first_set(V) + 1. -spec update(ts:tree_store(), pos_integer(), list(), inner()) -> ts:tree_store(). update(Store, _, [], _) -> Store; update(Store, CurrentLayer, [UpdateThisLayerP|LayerMap], Child = #inner{index = ChildIndex, layer = ChildLayer, hash = ChildHash}) -> case UpdateThisLayerP == 1 of true -> %% Create/update the parent of ChildHash at ChildIndex. %% %% Parent index is equal to child index with its r lowest %% bits stripped off, where r is current layer. %% %% Other child is found by comparing index of parent and %% known child. If they're the same, the known child is %% the left child and its sibling is found on the same %% layer. If they differ, the known child is the right %% child and the left child is to be found one layer below %% the parent, same index. Index = strip_low_bits(ChildIndex, CurrentLayer), Hash = case Index == ChildIndex of true -> {_SibIR, SibHash} = ts:retrieve(Store, {Index + (1 bsl ChildLayer) - 1, ChildLayer}), mkinnerhash(ChildHash, SibHash); _ -> {_SibIR, SibHash} = ts:retrieve(Store, {Index, CurrentLayer - 1}), mkinnerhash(SibHash, ChildHash) end, update(ts:store(Store, {Index, CurrentLayer}, Hash), CurrentLayer + 1, LayerMap, #inner{index = Index, layer = CurrentLayer, hash = Hash}); _ -> update(Store, CurrentLayer + 1, LayerMap, Child) end. strip_low_bits(N, Nbits) -> (N bsr Nbits) bsl Nbits. hash(Data) -> crypto:hash(sha256, Data). mkleafhash(Data) -> hash([<<"\x00">>, Data]). mkinnerhash(Hash1, Hash2) -> hash([<<"\x01">>, Hash1, Hash2]). %%%%%%%%%%%%%%%%%%%% %% Testing ht. -define(TEST_VECTOR_LEAVES, ["", "\x00", "\x10", " !", "01", "@ABC", "PQRSTUVW", "`abcdefghijklmno"]). -define(TEST_VECTOR_HASHES, ["6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", "fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125", "aeb6bcfe274b70a14fb067a5e5578264db0fa9b51af5e0ba159158f329e06e77", "d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7", "4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4", "76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c", "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]). %% @doc Build trees using add/2 and mth/2 and compare the resulting %% tree hashes. add_test() -> lists:foreach( fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X), ?assertEqual(mth(L), tree_hash(mktree(L))) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). %%%%%%%%%%%%%%%%%%%% %% Testing the test helpers. mth_test() -> lists:foreach( fun(X) -> ?assertEqual( mth(lists:sublist(?TEST_VECTOR_LEAVES, X)), hex:hexstr_to_bin(lists:nth(X, ?TEST_VECTOR_HASHES))) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). %%%%%%%%%%%%%%%%%%%% %% Test helpers. mktree(L) -> mktree(new(), L). mktree(Tree, []) -> Tree; mktree(Tree, [H|T]) -> mktree(add(Tree, H), T). %% @doc Return the Merkle Tree Head for the leaves in L. Implements %% the algorithm in section 2.1 of RFC 6962. Used for testing. -spec mth(list()) -> binary(). mth([]) -> hash(<<"">>); mth(L) -> case length(L) of 1 -> hash([<<"\x00">>, L]); _ -> Split = 1 bsl (bitpos_first_set(length(L) - 1) - 1), {L1, L2} = lists:split(Split, L), % TODO: PERF hash([<<"\x01">>, mth(L1), mth(L2)]) end.