%%% Copyright (c) 2014, NORDUnet A/S. %%% See LICENSE for licensing information. %%% %%% Implementation of a history tree as described in Efficient Data %%% Structures for Tamper-Evident Logging [0]. This implementation %%% follows RFC 6962 and differs from [0] only in how non-full trees %%% are handled. %%% %%% [0] https://www.usenix.org/event/sec09/tech/full_papers/crosby.pdf %%% %%% Hashes of inner nodes and leaves are stored in arrays, one per %%% layer with layer 0 being where the leaves are. The total number of %%% arrays is equal to the depth of the tree. The depth of the tree is %%% ceil(lg2(number of leaves)). %%% Let {r,i} denote the hash with index i on layer r. The first leaf %%% is {0,0}, second is {0,1} and n:th is {0,n-1}. %%% The parent of {r,i} is {r+1,floor(i/2)} (not strictly true because %%% of "placeholder nodes", see update_parent/4). %%% The sibling of {r,i} is {r,i+1} when i is even and {r,i-1} when i %%% is odd. -module(ht). -behaviour(gen_server). -export([size/0, add/1, tree_hash/0, tree_hash/1]). -export([get_incl/2, get_cons/2]). -export([start_link/0, start_link/1, stop/0]). -export([init/1, handle_call/3, terminate/2, handle_cast/2, handle_info/2, code_change/3]). -include("$CTROOT/plop/include/plop.hrl"). -include_lib("eunit/include/eunit.hrl"). -import(lists, [foreach/2, foldl/3, reverse/1]). %% Data types. -record(tree, {version :: integer(), evaluated :: integer(), store :: ts:tree_store()}). -type tree() :: #tree{}. %%%%%%%%%%%%%%%%%%%% %% Public interface. start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). start_link(NEntries) -> gen_server:start_link({local, ?MODULE}, ?MODULE, [NEntries], []). stop() -> gen_server:call(?MODULE, stop). size() -> gen_server:call(?MODULE, size). add(Entry) -> gen_server:call(?MODULE, {add, Entry}). tree_hash() -> gen_server:call(?MODULE, tree_hash). tree_hash(Version) -> gen_server:call(?MODULE, {tree_hash, Version}). get_incl(I, V) -> gen_server:call(?MODULE, {inclusion, I, V}). get_cons(V1, V2) -> gen_server:call(?MODULE, {consistency, V1, V2}). %% gen_server callbacks init([]) -> {ok, new()}; init(Args) -> {ok, new(Args)}. handle_cast(_Request, State) -> {noreply, State}. handle_info(_Info, State) -> {noreply, State}. code_change(_OldVersion, State, _Extra) -> {ok, State}. terminate(_Reason, _State) -> ok. handle_call(stop, _From, State) -> {stop, normal, stopped, State}; handle_call(size, _From, State) -> {reply, State#tree.version + 1, State}; handle_call({add, Entry}, _From, State) -> {reply, ok, add(State, Entry)}; handle_call(tree_hash, _From, State) -> {NewState, Hash} = tree_hash(State, State#tree.version), {reply, Hash, NewState}; handle_call({tree_hash, Version}, _From, State) -> {NewState, Hash} = tree_hash(State, Version), {reply, Hash, NewState}; handle_call({inclusion, _Index, _Version}, _From, State) -> {reply, nyi, State}; handle_call({consistency, _Version1, _Version2}, _From, State) -> {reply, nyi, State}. %%%%%%%%%%%%%%%%%%%% %% Private. -spec get_hash(tree(), non_neg_integer(), tuple()) -> {tree(), binary()}. get_hash(Tree, Version, IR) -> NewTree = update(Tree, Version), Hash = ts:retrieve_hash(NewTree#tree.store, IR), {NewTree, Hash}. %% FIXME: rename to tree_head or maybe just head? -spec tree_hash(tree(), integer()) -> {tree(), binary()}. tree_hash(Tree, -1) -> {Tree, hash(<<"">>)}; tree_hash(Tree = #tree{version = V}, Version) when Version == V -> get_hash(Tree, Version, {0, depth(Tree) - 1}); tree_hash(Tree = #tree{version = V}, Version) when Version > V -> {Tree, enotimetravel}; tree_hash(Tree, Version) -> old_version_tree_head(update(Tree, Version), Version). -spec old_version_tree_head(tree(), non_neg_integer()) -> {tree(), binary()}. old_version_tree_head(Tree, Version) -> true = Tree#tree.evaluated >= Version, % ASSERTION %% Go up the tree from the rightmost leaf (index=Version) until a %% left node is found. (There is always one -- the head is a left %% node.) {FirstLeftR, FirstLeftI} = first_left_node(0, Version), %% Walk up the tree from this lowest left node up to and including %% the last right node, rehashing as we go. Calculate the parent %% hash of that node and its sibling. Return that hash. {NewTree, LeftHash} = get_hash(Tree, Version, {FirstLeftI, FirstLeftR}), last_right_node_rehash(NewTree, Version, FirstLeftR, FirstLeftI, LeftHash). -spec last_right_node_rehash(tree(), non_neg_integer(), non_neg_integer(), non_neg_integer(), binary()) -> {tree(), binary()}. last_right_node_rehash(Tree, _Version, _Layer, 0, RightNodeHash) -> {Tree, RightNodeHash}; last_right_node_rehash(Tree, Version, Layer, Index, RightNodeHash) -> {NewTree, Hash} = case right_node_p(Index) of true -> {T2, LHash} = get_hash(Tree, Version, {Index - 1, Layer}), {T2, mkinnerhash(LHash, RightNodeHash)}; false -> {Tree, RightNodeHash} end, last_right_node_rehash(NewTree, Version, Layer + 1, parent(Index), Hash). -spec first_left_node(non_neg_integer(), non_neg_integer()) -> {non_neg_integer(), non_neg_integer()}. first_left_node(Layer, Index) -> case right_node_p(Index) of true -> first_left_node(Layer + 1, parent(Index)); false -> {Layer, Index} end. %% @doc Add an entry but don't update the tree. -spec add(tree(), binary()) -> tree(). add(Tree = #tree{version = V, store = Store}, Entry) -> NewVersion = V + 1, LeafIndex = NewVersion, LeafHash = mkleafhash(Entry), Tree#tree{version = NewVersion, store = ts:store(Store, {LeafIndex, 0}, LeafHash)}. -spec new() -> tree(). new() -> #tree{version = -1, evaluated = -1, store = ts:new()}. -spec new([non_neg_integer()]) -> tree(). new([Version]) when is_integer(Version) -> foldl(fun(#mtl{entry = E}, Tree) -> D = (E#timestamped_entry.entry)#plop_entry.data, add(Tree, D) % Return value -> Tree in next invocation. end, new(), db:get_by_index_sorted(0, Version)); new([List]) when is_list(List) -> foldl(fun(D, Tree) -> add(Tree, D) % Return value -> Tree in next invocation. end, new(), List). %% @doc Calculate hashes in Tree up to and including node with index %% equal to Version. Update Tree.evaluated to reflect the new state. -spec update(tree(), non_neg_integer()) -> tree(). update(Tree, 0) -> %% A version 0 tree needs no updating. Tree; update(Tree = #tree{evaluated = E}, V) when E >= V -> %% Evaluated enough already. Nothing to do. Tree; update(Tree = #tree{version = MaxV}, V) when V > MaxV -> %% Asking for more than we've got. Do as much as possible. update(Tree, MaxV); update(Tree = #tree{evaluated = Evaluated}, Version) -> NewTree = update_layer(Tree, 0, Evaluated + 1, Version), NewTree#tree{evaluated = Version}. %% @doc Update the tree wrt the leaves ICur..ILast. -spec update_layer(tree(), non_neg_integer(), non_neg_integer(), non_neg_integer()) -> tree(). update_layer(Tree, _Layer, _ICur, 0) -> % Done Tree; update_layer(Tree, Layer, ICur, ILast) -> %% Update parents on next upper layer, starting with a left %% child <= ICur and ending with ILast. Recurse with next layer. NewStore = update_parent(Tree#tree.store, Layer, strip_bits_bottom(ICur, 1), ILast), update_layer(Tree#tree{store = NewStore}, Layer + 1, parent(ICur), parent(ILast)). %% @doc Update parents of I..ILast, on Layer+1. I has to be a left child. -spec update_parent(ts:tree_store(), non_neg_integer(), non_neg_integer(), non_neg_integer()) -> ts:tree_store(). update_parent(S, Layer, I, ILast) when I >= ILast -> %% We're done updating parents. If ILast is a left child, copy it %% to where its parent would've been were it a right child. This %% is a "placeholder node" which simplifies when creating %% incomplete ("non-frozen") trees. case right_node_p(ILast) of true -> S; _ -> ts:append(S, Layer + 1, ts:retrieve_hash(S, {ILast, Layer})) end; update_parent(S, Layer, I, ILast) -> false = right_node_p(I), % ASSERTION %% Make an inner node hash of I and sibling. Store it as %% parent. Recurse with next pair of leaves. update_parent(ts:store(S, {parent(I), Layer + 1}, mkinnerhash(ts:retrieve_hash(S, {I, Layer}), ts:retrieve_hash(S, {I + 1, Layer}))), Layer, I + 2, ILast). %% @doc Parent of {i, r} is at {i/2, r+1} (unless it's a "placeholder"). parent(I) -> I bsr 1. -spec right_node_p(integer()) -> boolean(). right_node_p(Index) -> case Index band 1 of 1 -> true; _ -> false end. strip_bits_bottom(N, Nbits) -> (N bsr Nbits) bsl Nbits. %% @doc Return position of highest bit set, counting from the least %% significant bit, starting at 1. bitpos_first_set(N) -> L = [Bit || <> <= binary:encode_unsigned(N)], length(L) - ffs(L, 0). ffs([], Acc) -> Acc; ffs([H|T], Acc) -> case H of 0 -> ffs(T, Acc + 1); _ -> Acc end. depth(#tree{version = -1}) -> 0; depth(#tree{version = V}) -> bitpos_first_set(V) + 1. -spec mkleafhash(binary()) -> binary(). mkleafhash(Data) -> hash([<<"\x00">>, Data]). -spec mkinnerhash(binary(), binary()) -> binary(). mkinnerhash(Hash1, Hash2) -> hash([<<"\x01">>, Hash1, Hash2]). -spec hash(binary()) -> binary() | iolist(). hash(Data) -> crypto:hash(sha256, Data). %%%%%%%%%%%%%%%%%%%% %% Testing ht. -define(TEST_VECTOR_LEAVES, ["", "\x00", "\x10", " !", "01", "@ABC", "PQRSTUVW", "`abcdefghijklmno"]). -define(TEST_VECTOR_HASHES, ["6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", "fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125", "aeb6bcfe274b70a14fb067a5e5578264db0fa9b51af5e0ba159158f329e06e77", "d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7", "4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4", "76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c", "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]). %% FIXME: Don't start and stop the server manually all the time. EUnit %% can help. test_init(L) -> stop(), {ok, _Pid} = start_link(L). %% @doc Build tree using add/2 and mth/2 and compare the resulting %% tree hashes. %% FIXME: Move outside. add_test() -> lists:foreach( fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X), test_init(L), ?assertEqual(mth(L), tree_hash()) end, random_entries(length(?TEST_VECTOR_LEAVES))). %% FIXME: Move outside. old_versions_test() -> test_init(?TEST_VECTOR_LEAVES), ?assertEqual(mth(?TEST_VECTOR_LEAVES), tree_hash()), lists:foreach( fun(X) -> ?assertEqual(mth(lists:sublist(?TEST_VECTOR_LEAVES, X)), tree_hash(X - 1)) end, random_entries(length(?TEST_VECTOR_LEAVES))). %%%%%%%%%%%%%%%%%%%% %% Testing the test helpers. mth_test() -> lists:foreach( fun(X) -> ?assertEqual( mth(lists:sublist(?TEST_VECTOR_LEAVES, X)), hex:hexstr_to_bin(lists:nth(X, ?TEST_VECTOR_HASHES))) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). %%%%%%%%%%%%%%%%%%%% %% Test helpers. random_entries(N) -> [V || {_, V} <- lists:sort( [{random:uniform(N), E} || E <- lists:seq(1, N)])]. %% @doc Return the Merkle Tree Head for the leaves in L. Implements %% the algorithm in section 2.1 of RFC 6962. Used for testing. -spec mth(list()) -> binary(). mth([]) -> hash(<<"">>); mth(L) -> case length(L) of 1 -> hash([<<"\x00">>, L]); _ -> Split = 1 bsl (bitpos_first_set(length(L) - 1) - 1), {L1, L2} = lists:split(Split, L), % TODO: PERF hash([<<"\x01">>, mth(L1), mth(L2)]) end.