%%% Copyright (c) 2014, NORDUnet A/S. %%% See LICENSE for licensing information. %%% %%% Implementation of a history tree similar to what is described in %%% Efficient Data Structures for Tamper-Evident Logging [0]. This %%% implementation follows RFC 6962 and differs from [0] only in how %%% non-full trees are handled. %%% %%% Useful terminology: %%% C -- Commitment. %%% X -- Event, stored in leaf nodes. %%% I(i,r) -- Interior node with index i, on layer r. %%% A(v)(i,r) -- Hash of I(i,r) in tree of version v. %%% d -- Depth of tree. %%% %%% Nodes are identified by their index i and layer r. %%% I(i,r) has left child I(i,r-1) and right child I(i+2^(r-1),r-1). %%% %%% A version-n tree stores n+1 events. %%% In a version-n tree, I(i,r) is frozen when n >= i + 2^r - 1. %%% %%% [0] https://www.usenix.org/event/sec09/tech/full_papers/crosby.pdf %%% -module('ht'). -include_lib("eunit/include/eunit.hrl"). -record(leaf, {hash :: binary()}). % hash of data -record(inner, {child0 :: #leaf{} | #inner{}, % left child child1 :: #leaf{} | #inner{}, % right child hash :: binary()}). % hash of children's hashes -record(head, {version :: non_neg_integer(), % number of leaves tree :: tree()}). % the tree -type head() :: #head{}. -type tree() :: inner() | leaf() | undefined. -type leaf() :: #leaf{}. -type inner() :: #inner{}. -export_type([head/0, tree/0, inner/0, leaf/0]). -export([create/0, append/2, tree_hash/1, size/1, audit_path/2, consistency_proof/2]). %%%%%%%%%%%%%%%%%%%% %% Public interface. %% @doc Return an empty tree. -spec create() -> head(). create() -> mkhead(0, undefined). %% @doc Return the merkle tree hash of a tree. -spec tree_hash(head()) -> binary(); (tree()) -> binary(). tree_hash(#head{tree=T}) -> case T of undefined -> hashfun(<<>>); #inner{hash=H} -> H; #leaf{hash=H} -> H end. %% @doc Return the size of a tree, i.e. its number of leaves. -spec size(head()) -> non_neg_integer(). size(Head) -> tree_version(Head). %% @doc Append a leaf to a tree. %% %% Walk down the tree in Head, stop at The Right Place and make Leaf %% the right sibling of that place. To find the right place, let d be %% the depth of the tree, then go down the tree on the right hand side %% to level l, where l is the position of the first set bit in d, %% looking at d from the least significant bit. l=0 is where the leaves %% are and l=d-1 is the root. %% %% The depth of the tree is found by walking down the right path. It %% would be better if we inserted the leaf and calculated the nodes on %% the way up instead of walking down the tree again. Worst case this %% is lg2(N) iterations, i.e. 24 steps for N=16e10. %% %% Example: N=3 (011) => l=0, the rightmost leaf. %% Example: N=4 (100) => l=2, the root (soon not to be root). %% Example: N=5 (101) => l=0, the rightmost leaf. %% Example: N=6 (110) => l=1, the last and rightmost inner node. %% %% NOTE: Adding a tree requires some more work. One thing is that if %% the tree to append doesn't fit in right-hand subtree, it has to be %% added in smaller pieces. %% -spec append(head(), leaf() | iolist() | binary()) -> head(). append(#head{version = 0}, Leaf) when is_record(Leaf, leaf) -> mkhead(1, Leaf); append(#head{version = N, tree = Tree}, Leaf) when is_record(Leaf, leaf) -> Level = fls(N), RBD = rightbranchdepth(Tree), %io:format("N=~p, Level=~p, RBD=~p~n", [N, Level, RBD]), #head{version = N + 1, tree = append_at(Tree, Leaf, RBD-Level-1)}; append(Head, Data) -> append(Head, mkleaf(Data)). -spec append_at(tree(), tree(), pos_integer()) -> tree(). append_at(Dest, Newtree, _) when is_record(Dest, leaf) -> mkinner(Dest, Newtree); append_at(Dest, Newtree, 0) when is_record(Dest, inner) -> mkinner(Dest, Newtree); append_at(Dest, Newtree, Depth) when is_record(Dest, inner) -> mkinner(Dest#inner.child0, append_at(Dest#inner.child1, Newtree, Depth - 1)). %% @doc Return an audit path. %% %% An audit path is a list of those hashes needed to calculate the %% tree hash for Head given the knowledge of the nth hash in the %% tree. An audit path proves existance of a given leaf in a given %% tree. %% %% Walk down the tree to N and build the list in Acc by adding each %% sibling. -spec audit_path(head(), non_neg_integer()) -> list(). audit_path(#head{version = Version, tree = Tree}, N) -> L = [Bit || <> <= ui2b(N)], {_, Path} = lists:split(length(L) - ffs(Version) -1, L), audit_path(Tree, Path, []). audit_path(Tree, _, Acc) when is_record(Tree, leaf) -> Acc; audit_path(_, [], Acc) -> Acc; audit_path(#inner{child0 = Left, child1 = Right}, [PathH|PathT], Acc) -> case PathH of 0 -> % Take a left. audit_path(Left, PathT, [gethash(Right) | Acc]); _ -> % Take a right. audit_path(Right, PathT, [gethash(Left) | Acc]) end. %% @doc Return a consistency proof. %% %% A consistency proof is the shortest list of nodes required to %% verify that the tree built from the first n leaves of a given tree %% is a subset of that tree. Consistency proofs prove the append-only %% property of a tree. -spec consistency_proof(head(), non_neg_integer()) -> list(). consistency_proof(Head, N) -> [fixme, Head, N]. %%%%%%%%%%%%%%%%%%%% %% Private functions. %% @doc Return version number of a tree. %% %% The version number is the number of leaves in tree. Note that this %% is set off by one (higher) compared to the history tree version as %% explained by Crosby and Wallach. -spec tree_version(head()) -> non_neg_integer(). tree_version(#head{version=Ver}) -> Ver. -spec mkhead(non_neg_integer(), tree()) -> head(); (head(), list()) -> head(). mkhead(Version, Tree) when is_integer(Version) -> #head{version=Version, tree=Tree}; mkhead(Head, []) -> Head; mkhead(Head, [H|T]) -> append(mkhead(Head, T), mkleaf(H)). -spec hashfun(iolist() | binary()) -> binary(). hashfun(Data) -> crypto:hash(sha256, Data). -spec mkleaf(iolist() | binary()) -> leaf(). mkleaf(Data) -> #leaf{hash = hashfun([<<"\x00">>, Data])}. -spec mkinner(tree(), tree()) -> inner(). mkinner(Leaf, Tree) -> #inner{child0 = Leaf, child1 = Tree, hash = mkhash(Leaf, Tree)}. -spec mkhash(tree(), tree()) -> binary(). mkhash(Tree0, Tree1) -> hashfun([<<"\x01">>, hash(Tree0), hash(Tree1)]). -spec hash(tree()) -> binary(). hash(#leaf{hash=Hash}) -> Hash; hash(#inner{child0=Child0, child1=Child1}) -> mkhash(Child0, Child1). gethash(#leaf{hash = Hash}) -> Hash; gethash(#inner{hash = Hash}) -> Hash. %% @doc Unsigned integer -> binary. -spec ui2b(pos_integer()) -> binary(). ui2b(Unsigned) -> binary:encode_unsigned(Unsigned). %% @doc Find first set bit in V, starting counting at zero from the %% least significant bit. ffs(V) when is_integer(V) -> L = [Bit || <> <= ui2b(V)], length(L) - ffs(L, 0) - 1. ffs([], Acc) -> Acc; ffs([H|T], Acc) -> case H of 0 -> ffs(T, Acc+1); _ -> Acc end. fls(V) when is_integer(V) -> L = lists:reverse([Bit || <> <= ui2b(V)]), ffs(L, 0). rightbranchdepth(Tree) -> 1 + rightbranchdepth(Tree, 0). -spec rightbranchdepth(tree(), non_neg_integer()) -> non_neg_integer(). rightbranchdepth(Tree, Acc) when is_record(Tree, leaf) -> Acc; rightbranchdepth(Tree, Acc) -> rightbranchdepth(Tree#inner.child1, Acc + 1). %%%%%%%%%%%%%%%%%%%% %% Internal tests. -define(TEST_VECTOR_TREES, [[<<"a foo is a bar">>, <<148,242,40,0,3,172,180,106,111,230,146,161,32,40,128,38,103,8,194, 102,72,68, 126,70,108,47,8,216,208,146,178,107>>]]). %% Test vectors from certificate-transparency/src/python/ct/crypto/merkle_test.py. -define(EMPTY_HASH, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"). -define(TEST_VECTOR_LEAVES, ["", "\x00", "\x10", " !", "01", "@ABC", "PQRSTUVW", "`abcdefghijklmno"]). -define(TEST_VECTOR_HASHES, ["6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", "fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125", "aeb6bcfe274b70a14fb067a5e5578264db0fa9b51af5e0ba159158f329e06e77", "d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7", "4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4", "76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c", "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]). %% Own test vectors. -define(TEST_VECTOR_LEAVES2, ["d0", "d1", "d2", "d3", "d4", "d5", "d6"]). -define(TEST_VECTOR_HASHES2, [ {a, "C67F9FFE68E0761021341DD516428F42FBDEA633731CBDADA03BEA6B84C652F7"}, {b, "49B717E4D6ECDD82F6F6648CF8F86FDF4A912600A4557398E1733186FA952C1D"}, {c, "F366DF4718EF75064317794FF5300E0963E96DD93FE24203118055FA5A00BE13"}, {d, "5E0C4E1130DFA84D27437BA073EB817E1896643D42EA100A0940F8752D496783"}, {e, "39298BE94337336FC5515E7A34DE6EF23C9A1BFF66378B71918AE2D105D684C8"}, {f, "6D1BB6BBB111AF4A1E9EC0B9FB2613CC2BCB394141CEE8C2CD462B5AD3803D78"}, {g, "46C78708413A23175F51FAF1C22604BCCB44482D553B45943B189130EA8221C8"}, {h, "C59E9A6D9575777BA3BDBD3E3086516196CF87EC9760861362ABA5CD0F78DF1D"}, {i, "A4F2A847CCE0DCE0519B1D6B83E4CA15166193DBB0C8F864E736665EDBDE1994"}, {j, "D750CA922FABC5422EEC469D4370779B61D5488186CB871EEEA299D8113D20BC"}, {k, "8DF3870B33FAE650E81938994F98EB4551B143B86C95D3DAE4E6444E00715016"}, {l, "3CF05FF16D26C024828E93B3A14C5656E5ABCBC5E6F0BCE2CF8A169720599674"} ]). smap_get(Key, SimpleMap) -> case lists:keyfind(Key, 1, SimpleMap) of {_, Val} -> Val; Notfound -> Notfound end. basic_helpers_test_() -> [test_bitcount()]. basic_tree_test_() -> TVT1 = lists:nth(1, ?TEST_VECTOR_TREES), [?_assertEqual(#head{version = 0, tree = undefined}, create()), ?_assertEqual(#head{version = 1, tree = #leaf{hash = lists:last(TVT1)}}, create(hd(TVT1)))]. empty_hash_test_() -> [?_assertEqual(hex:hexstr_to_bin(?EMPTY_HASH), mth([]))]. mth_test() -> lists:foreach( fun(X) -> ?assertEqual( mth(lists:sublist(?TEST_VECTOR_LEAVES, X)), hex:hexstr_to_bin(lists:nth(X, ?TEST_VECTOR_HASHES))) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). new_tree_test_() -> TVT1 = lists:nth(1, ?TEST_VECTOR_TREES), V0 = create(), V1 = append(V0, hd(TVT1)), [?_assertEqual(0, V0#head.version), ?_assertEqual(undefined, V0#head.tree), ?_assertEqual(lists:last(TVT1), (V1#head.tree)#leaf.hash), ?_assertEqual(1, V1#head.version)]. %% @doc Build trees using append/2 and mth/2 and compare the resulting %% tree hashes. append_test() -> lists:foreach( fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X), ?assertEqual( mth(L), gethash((mkhead(L))#head.tree)) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). mkhead_from_list_test() -> L = ["a", "b"], ?assertEqual(append(mkhead(1, mkleaf(lists:nth(1, L))), mkleaf(lists:nth(2, L))), mkhead(L)). append_eq_mth_test() -> Count = 300, L = [<> || X <- lists:seq(0, Count)], ?assertEqual(gethash((mkhead(L))#head.tree), mth(L)). path_test_() -> Leaves = ?TEST_VECTOR_LEAVES2, H = ?TEST_VECTOR_HASHES2, %%Tree = lists:foldl(fun(E, T) -> ht:append(T, E) end, ht:create(), Leaves), [?_assertEqual([smap_get(b, H), smap_get(h, H), smap_get(l, H)], lists:map(fun hex:bin_to_hexstr/1, path(0, Leaves))), ?_assertEqual([smap_get(c, H), smap_get(g, H), smap_get(l, H)], lists:map(fun hex:bin_to_hexstr/1, path(3, Leaves))), ?_assertEqual([smap_get(f, H), smap_get(j, H), smap_get(k, H)], lists:map(fun hex:bin_to_hexstr/1, path(4, Leaves))), ?_assertEqual([smap_get(i, H), smap_get(k, H)], lists:map(fun hex:bin_to_hexstr/1, path(6, Leaves)))]. audit_path_test_() -> Tree = lists:foldl(fun(E, T) -> ht:append(T, E) end, ht:create(), ?TEST_VECTOR_LEAVES2), H = ?TEST_VECTOR_HASHES2, [?_assertEqual([smap_get(b, H), smap_get(h, H), smap_get(l, H)], lists:map(fun hex:bin_to_hexstr/1, audit_path(Tree, 0))), ?_assertEqual([smap_get(c, H), smap_get(g, H), smap_get(l, H)], lists:map(fun hex:bin_to_hexstr/1, audit_path(Tree, 3))), ?_assertEqual([smap_get(f, H), smap_get(j, H), smap_get(k, H)], lists:map(fun hex:bin_to_hexstr/1, audit_path(Tree, 4))), ?_assertEqual([smap_get(i, H), smap_get(k, H)], lists:map(fun hex:bin_to_hexstr/1, audit_path(Tree, 6)))]. consistency_proof_test_() -> H = mkhead([]), M = 0, [?_assertEqual([niy, H, M], proof(H, M))]. %%%%%%%%%%%%%%%%%%%% %% Test helpers. %% @doc Calculate a Merkle Tree Hash from an ordered list as specified %% in RFC 6962. %% %% K, the split point, is the number of leaves comprising the largest %% possible full tree. %% %% The way we calculate K is to let N be the number of entries, find %% the most significant bit in N-1 and raise two to that number. This %% is K. test_bitcount() -> L = [1, 2, 3, 255, 256, 511, 512, 32767, 32768, 65535, 65536, 2147483648, 4294967296, 8589934591, 8589934592, 18446744073709551616], [[?_assertEqual(lists:nth(1, tv_bitcount(X)), ffs(X)) || X <- L], [?_assertEqual(lists:nth(2, tv_bitcount(X)), fls(X)) || X <- L]]. tv_bitcount(1) -> [0, 0]; tv_bitcount(2) -> [1, 1]; tv_bitcount(3) -> [1, 0]; tv_bitcount(255) -> [7, 0]; tv_bitcount(256) -> [8, 8]; tv_bitcount(511) -> [8, 0]; tv_bitcount(512) -> [9, 9]; tv_bitcount(32767) -> [14, 0]; tv_bitcount(32768) -> [15, 15]; tv_bitcount(65535) -> [15, 0]; tv_bitcount(65536) -> [16, 16]; tv_bitcount(2147483648) -> [31, 31]; tv_bitcount(4294967296) -> [32, 32]; tv_bitcount(8589934591) -> [32, 0]; tv_bitcount(8589934592) -> [33, 33]; tv_bitcount(18446744073709551616) -> [64, 64]. %% @doc Return the Merkle Tree Head for the leaves in L. Implements %% the algorithm in section 2.1 of RFC 6962. Used for testing. -spec mth(list()) -> binary(). mth([]) -> hashfun(<<"">>); mth(L) -> case length(L) of 1 -> hashfun([<<"\x00">>, L]); _ -> Split = 1 bsl ffs(length(L) - 1), {L1, L2} = lists:split(Split, L), % TODO: PERF hashfun([<<"\x01">>, mth(L1), mth(L2)]) end. %% @doc Return the Merkle Audit Path for leaf m in L. Implements the %% algorithm in section 2.1.1 of RFC 6962. Used for testing. -spec path(integer(), list()) -> list(). path(0, L) when length(L) == 1 -> []; path(M, L) -> K = 1 bsl ffs(length(L) - 1), {L1, L2} = lists:split(K, L), R = case M < K of true -> [path(M, L1), mth(L2)]; _ -> [path(M - K, L2), mth(L1)] end, lists:flatten(R). %% @doc Return the Merkle Consistency Proof between the first m leaves %% of L and L. Implements the algorithm in section 2.1.2 of RFC %% 6962. Used for testing. -spec proof(integer(), list()) -> list(). proof(M, L) -> [niy, M, L]. -spec create(iolist() | binary()) -> head(). create(D) -> mkhead(1, mkleaf(D)). -spec mkhead(list()) -> head(). mkhead([]) -> mkhead(1, mkleaf([])); mkhead(L) when is_list(L) -> mkhead(create(hd(L)), lists:reverse(tl(L))). %% For looking at a tree in a REPL. %% hashes_hex(Head) -> %% lists:map(fun hex:bin_to_hexstr/1, hashes(Head)). %% hashes(Head) when is_record(Head, head) -> %% lists:flatten(hashes(Head#head.tree)); %% hashes(#inner{child0 = L, child1 = R, hash = H}) -> %% [H | [hashes(L), hashes(R)]]; %% hashes(#leaf{hash = H}) -> %% H.