-module('ht'). -include_lib("eunit/include/eunit.hrl"). -record(leaf, {hash :: binary()}). % hash of data -record(inner, {child0 :: #leaf{} | #inner{}, % left child child1 :: #leaf{} | #inner{}, % right child hash :: binary()}). % hash of children's hashes -record(head, {version :: non_neg_integer(), % number of leafs tree :: tree()}). % the tree -type head() :: #head{}. -type tree() :: inner() | leaf() | undefined. -type leaf() :: #leaf{}. -type inner() :: #inner{}. -export_type([head/0, tree/0, inner/0, leaf/0]). -export([create/0, append/2, tree_hash/1, size/1, audit_path/2, consistency_proof/2]). %% Public interface. -spec create() -> head(). create() -> mkhead(0, undefined). -spec tree_hash(head()) -> binary(); (tree()) -> binary(). tree_hash(#head{tree=T}) -> case T of undefined -> hashfun(<<>>); #inner{hash=H} -> H; #leaf{hash=H} -> H end. -spec size(head()) -> non_neg_integer(). size(Head) -> tree_version(Head). %% @doc Append Leaf to Head. %% %% Walk down the tree in Head, stop at The Right Place and make Leaf %% the right sibling of that place. To find the right place, let d be %% the depth of the tree, then go down the tree on the right side to %% level l, where l is the position of the first set bit in d, looking %% at d "from the right". l=0 is where the leafs are and l=d-1 is the %% root. %% %% The depth of the tree is found by walking down the right path. It %% would be better if we inserted the leaf and calculated the nodes on %% the way up instead of walking down the tree again. Worst case this %% is lg2(N) iterations, i.e. 24 steps for N=16e10. %% %% Example: N=3 (011) => l=0, the rightmost leaf %% Example: N=4 (100) => l=2, the root (soon not to be root). %% Example: N=5 (101) => l=0, the rightmost leaf. %% Example: N=6 (110) => l=1, the last and rightmost inner node. -spec append(head(), leaf() | iolist() | binary()) -> head(). append(#head{version = 0}, Leaf) when is_record(Leaf, leaf) -> mkhead(1, Leaf); append(Head, Leaf) when is_record(Leaf, leaf) -> N = Head#head.version, %Depth = depth(Head), Level = fls(N), RBD = rightbranchdepth(Head#head.tree), %io:format("N=~p, Depth=~p, Level=~p, RBD=~p~n", [N, Depth, Level, RBD]), #head{version = N + 1, tree = append(Head#head.tree, Leaf, RBD-Level-1)}; append(Head, Data) -> append(Head, mkleaf(Data)). -spec append(tree(), tree(), pos_integer()) -> tree(). append(Dest, Newtree, _) when is_record(Dest, leaf) -> mkinner(Dest, Newtree); append(Dest, Newtree, 0) when is_record(Dest, inner) -> mkinner(Dest, Newtree); append(Dest, Newtree, Depth) when is_record(Dest, inner) -> mkinner(Dest#inner.child0, append(Dest#inner.child1, Newtree, Depth - 1)). %% @doc Return an audit path, i.e a list of those hashes needed to %% calculate the tree hash for Head given the knowledge of the nth %% hash in the tree. Audit paths prove existance of a given leaf in a %% given tree. -spec audit_path(head(), non_neg_integer()) -> list(). audit_path(Head, N) -> [fixme, Head, N]. %% @doc Return a consistency proof, i.e. the shortest list of nodes %% required to verify that the tree built from the first n leaves of a %% given tree is a subset of that tree. Consistency proofs prove the %% append-only property of a tree. -spec consistency_proof(head(), non_neg_integer()) -> list(). consistency_proof(Head, N) -> [fixme, Head, N]. %%%%%%%%%%%%%%%%%%%% %% Private functions. %% @doc Tree version number, i.e. number of leafs in tree. Note that %% this is set off by one (one higher) compared with the history tree %% version as explained by Crosby and Wallach. -spec tree_version(head()) -> non_neg_integer(). tree_version(#head{version=Ver}) -> Ver. -spec mkhead(non_neg_integer(), tree()) -> head(); (head(), list()) -> head(). mkhead(Version, Tree) when is_integer(Version) -> #head{version=Version, tree=Tree}; mkhead(Head, []) -> Head; mkhead(Head, [H|T]) -> append(mkhead(Head, T), mkleaf(H)). -spec hashfun(iolist() | binary()) -> binary(). hashfun(Data) -> crypto:hash(sha256, Data). -spec mkleaf(iolist() | binary()) -> leaf(). mkleaf(Data) -> #leaf{hash = hashfun([<<"\x00">>, Data])}. -spec mkinner(tree(), tree()) -> inner(). mkinner(Leaf, Tree) -> #inner{child0 = Leaf, child1 = Tree, hash = mkhash(Leaf, Tree)}. %% TODO: merge mkhash/2 and gethash? if so, use it in mkleaf/1. -spec mkhash(tree(), tree()) -> binary(). mkhash(Tree0, Tree1) -> hashfun([<<"\x01">>, gethash(Tree0), gethash(Tree1)]). -spec gethash(tree()) -> binary(). gethash(#leaf{hash=Hash}) -> Hash; gethash(#inner{child0=Child0, child1=Child1}) -> mkhash(Child0, Child1). %% @doc Unsigned integer -> binary. %% In R16, we can use integer_to_binary/1. -spec ui2b(pos_integer()) -> binary(). ui2b(Unsigned) -> binary:encode_unsigned(Unsigned). %% @doc Find first set bit in V, starting counting at zero from the %% least significant bit. ffs(V) when is_integer(V) -> L = [Bit || <> <= ui2b(V)], length(L) - ffs(L, 0) - 1. ffs([], Acc) -> Acc; ffs([H|T], Acc) -> case H of 0 -> ffs(T, Acc+1); _ -> Acc end. fls(V) when is_integer(V) -> L = lists:reverse([Bit || <> <= ui2b(V)]), ffs(L, 0). rightbranchdepth(Tree) -> 1 + rightbranchdepth(Tree, 0). -spec rightbranchdepth(tree(), non_neg_integer()) -> non_neg_integer(). rightbranchdepth(Tree, Acc) when is_record(Tree, leaf) -> Acc; rightbranchdepth(Tree, Acc) -> rightbranchdepth(Tree#inner.child1, Acc + 1). %%%%%%%%%%%%%%%%%%%% %% Internal tests. -define(TEST_VECTOR_TREES, [[<<"a foo is a bar">>, <<148,242,40,0,3,172,180,106,111,230,146,161,32,40,128,38,103,8,194, 102,72,68, 126,70,108,47,8,216,208,146,178,107>>]]). %% Test vectors from certificate-transparency/src/python/ct/crypto/merkle_test.py. -define(EMPTY_HASH, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"). -define(TEST_VECTOR_LEAVES, ["", "\x00", "\x10", " !", "01", "@ABC", "PQRSTUVW", "`abcdefghijklmno"]). -define(TEST_VECTOR_HASHES, ["6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d", "fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125", "aeb6bcfe274b70a14fb067a5e5578264db0fa9b51af5e0ba159158f329e06e77", "d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7", "4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4", "76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef", "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c", "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]). basic_helpers_test_() -> [test_bitcount()]. basic_tree_test_() -> TVT1 = lists:nth(1, ?TEST_VECTOR_TREES), [?_assertEqual(#head{version = 0, tree = undefined}, create()), ?_assertEqual(#head{version = 1, tree = #leaf{hash = lists:last(TVT1)}}, create(hd(TVT1)))]. empty_hash_test_() -> [?_assertEqual(hex:hexstr_to_bin(?EMPTY_HASH), mth([]))]. mth_test() -> lists:foreach( fun(X) -> ?assertEqual( mth(lists:sublist(?TEST_VECTOR_LEAVES, X)), hex:hexstr_to_bin(lists:nth(X, ?TEST_VECTOR_HASHES))) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). new_tree_test_() -> TVT1 = lists:nth(1, ?TEST_VECTOR_TREES), V0 = create(), V1 = append(V0, hd(TVT1)), [?_assertEqual(0, V0#head.version), ?_assertEqual(undefined, V0#head.tree), ?_assertEqual(lists:last(TVT1), (V1#head.tree)#leaf.hash), ?_assertEqual(1, V1#head.version)]. %% @doc Build trees using append/2 and mth/2 and compare the resulting %% tree hashes. append_test() -> lists:foreach( fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X), io:format("~p~n", [L]), ?assertEqual( mth(L), gethash((mkhead(L))#head.tree)) end, lists:seq(1, length(?TEST_VECTOR_LEAVES))). mkhead_from_list_test() -> L = ["a", "b"], ?assertEqual(append(mkhead(1, mkleaf(lists:nth(1, L))), mkleaf(lists:nth(2, L))), mkhead(L)). append_eq_mth_test() -> Count = 300, L = [<> || X <- lists:seq(0, Count)], ?assertEqual(gethash((mkhead(L))#head.tree), mth(L)). audit_path_test_() -> H = mkhead([]), M = 0, [?_assertEqual([niy, H, M], path(H, M))]. consistency_proof_test_() -> H = mkhead([]), M = 0, [?_assertEqual([niy, H, M], proof(H, M))]. %% Test helpers. %% @doc Calculate a Merkle Tree Hash from an ordered list as specified %% in RFC 6962. %% %% K, the split point, is the number of leafs comprising the largest %% possible full tree. %% %% The way we calculate K is to let N be the number of entries, find %% the most significant bit in N-1 and raise two to that number. This %% is K. test_bitcount() -> L = [1, 2, 3, 255, 256, 511, 512, 32767, 32768, 65535, 65536, 2147483648, 4294967296, 8589934591, 8589934592, 18446744073709551616], [[?_assertEqual(lists:nth(1, tv_bitcount(X)), ffs(X)) || X <- L], [?_assertEqual(lists:nth(2, tv_bitcount(X)), fls(X)) || X <- L]]. tv_bitcount(1) -> [0, 0]; tv_bitcount(2) -> [1, 1]; tv_bitcount(3) -> [1, 0]; tv_bitcount(255) -> [7, 0]; tv_bitcount(256) -> [8, 8]; tv_bitcount(511) -> [8, 0]; tv_bitcount(512) -> [9, 9]; tv_bitcount(32767) -> [14, 0]; tv_bitcount(32768) -> [15, 15]; tv_bitcount(65535) -> [15, 0]; tv_bitcount(65536) -> [16, 16]; tv_bitcount(2147483648) -> [31, 31]; tv_bitcount(4294967296) -> [32, 32]; tv_bitcount(8589934591) -> [32, 0]; tv_bitcount(8589934592) -> [33, 33]; tv_bitcount(18446744073709551616) -> [64, 64]. %% @doc Return the Merkle Tree Head for the leafs in L. Implements %% the algorithm in section 2.1 of RFC 6962. Used for testing. -spec mth(list()) -> binary(). mth([]) -> hashfun(<<"">>); mth(L) -> case length(L) of 1 -> hashfun([<<"\x00">>, L]); _ -> Split = 1 bsl ffs(length(L) - 1), {L1, L2} = lists:split(Split, L), % TODO: PERF hashfun([<<"\x01">>, mth(L1), mth(L2)]) end. %% @doc Return the Merkle Audit Path for leaf m in L. Implements the %% algorithm in section 2.1.1 of RFC 6962. Used for testing. -spec path(integer(), list()) -> list(). path(M, L) -> [niy, M, L]. %% @doc Return the Merkle Consistency Proof between the first m leaves %% of L and L. Implements the algorithm in section 2.1.2 of RFC %% 6962. Used for testing. -spec proof(integer(), list()) -> list(). proof(M, L) -> [niy, M, L]. -spec create(iolist() | binary()) -> head(). create(D) -> mkhead(1, mkleaf(D)). -spec mkhead(list()) -> head(). mkhead([]) -> mkhead(1, mkleaf([])); mkhead(L) when is_list(L) -> mkhead(create(hd(L)), lists:reverse(tl(L))).