-module('ht').
-include_lib("eunit/include/eunit.hrl").

-record(leaf, {hash :: binary()}).              % hash of data
-record(inner, {child0 :: #leaf{} | #inner{},   % left child
                child1 :: #leaf{} | #inner{},   % right child
                hash :: binary()}).        % hash of children's hashes
-record(head, {version :: non_neg_integer(),    % number of leafs
               tree :: tree()}).                % the tree

-type head() :: #head{}.
-type tree() :: inner() | leaf() | undefined.
-type leaf() :: #leaf{}.
-type inner() :: #inner{}.

-export_type([head/0, tree/0, inner/0, leaf/0]).
-export([create/0, append/2, tree_hash/1, size/1, audit_path/2,
         consistency_proof/2]).

%% Public interface.
-spec create() -> head().
create() ->
    mkhead(0, undefined).

-spec tree_hash(head()) -> binary();
               (tree()) -> binary().
tree_hash(#head{tree=T}) ->
    case T of
        undefined -> hashfun(<<>>);
        #inner{hash=H} -> H;
        #leaf{hash=H} -> H
    end.

-spec size(head()) -> non_neg_integer().
size(Head) ->
    tree_version(Head).

%% @doc Append Leaf to Head.
%%
%% Walk down the tree in Head, stop at The Right Place and make Leaf
%% the right sibling of that place. To find the right place, let d be
%% the depth of the tree, then go down the tree on the right side to
%% level l, where l is the position of the first set bit in d, looking
%% at d "from the right". l=0 is where the leafs are and l=d-1 is the
%% root.
%%
%% The depth of the tree is found by walking down the right path. It
%% would be better if we inserted the leaf and calculated the nodes on
%% the way up instead of walking down the tree again. Worst case this
%% is lg2(N) iterations, i.e. 24 steps for N=16e10.
%%
%% Example: N=3 (011) => l=0, the rightmost leaf
%% Example: N=4 (100) => l=2, the root (soon not to be root).
%% Example: N=5 (101) => l=0, the rightmost leaf.
%% Example: N=6 (110) => l=1, the last and rightmost inner node.
-spec append(head(), leaf() | iolist() | binary()) -> head().
append(#head{version = 0}, Leaf) when is_record(Leaf, leaf) ->
    mkhead(1, Leaf);
append(Head, Leaf) when is_record(Leaf, leaf) ->
    N = Head#head.version,
    %Depth = depth(Head),
    Level = fls(N),
    RBD = rightbranchdepth(Head#head.tree),
    %io:format("N=~p, Depth=~p, Level=~p, RBD=~p~n", [N, Depth, Level, RBD]),
    #head{version = N + 1, tree = append(Head#head.tree, Leaf, RBD-Level-1)};
append(Head, Data) ->
    append(Head, mkleaf(Data)).

-spec append(tree(), tree(), pos_integer()) -> tree().
append(Dest, Newtree, _) when is_record(Dest, leaf) ->
    mkinner(Dest, Newtree);
append(Dest, Newtree, 0) when is_record(Dest, inner) ->
    mkinner(Dest, Newtree);
append(Dest, Newtree, Depth) when is_record(Dest, inner) ->
    mkinner(Dest#inner.child0, append(Dest#inner.child1, Newtree, Depth - 1)).

%% @doc Return an audit path, i.e a list of those hashes needed to
%% calculate the tree hash for Head given the knowledge of the nth
%% hash in the tree. Audit paths prove existance of a given leaf in a
%% given tree.
-spec audit_path(head(), non_neg_integer()) -> list().
audit_path(Head, N) ->
    [fixme, Head, N].

%% @doc Return a consistency proof, i.e. the shortest list of nodes
%% required to verify that the tree built from the first n leaves of a
%% given tree is a subset of that tree. Consistency proofs prove the
%% append-only property of a tree.
-spec consistency_proof(head(), non_neg_integer()) -> list().
consistency_proof(Head, N) ->
    [fixme, Head, N].

%%%%%%%%%%%%%%%%%%%%
%% Private functions.

%% @doc Tree version number, i.e. number of leafs in tree. Note that
%% this is set off by one (one higher) compared with the history tree
%% version as explained by Crosby and Wallach.
-spec tree_version(head()) -> non_neg_integer().
tree_version(#head{version=Ver}) ->
    Ver.

-spec mkhead(non_neg_integer(), tree()) -> head();
            (head(), list()) -> head().
mkhead(Version, Tree) when is_integer(Version) ->
    #head{version=Version, tree=Tree};
mkhead(Head, []) ->
    Head;
mkhead(Head, [H|T]) ->
    append(mkhead(Head, T), mkleaf(H)).

-spec hashfun(iolist() | binary()) -> binary().
hashfun(Data) ->
    crypto:hash(sha256, Data).

-spec mkleaf(iolist() | binary()) -> leaf().
mkleaf(Data) ->
    #leaf{hash = hashfun([<<"\x00">>, Data])}.

-spec mkinner(tree(), tree()) -> inner().
mkinner(Leaf, Tree) ->
    #inner{child0 = Leaf,
           child1 = Tree,
           hash = mkhash(Leaf, Tree)}.

%% TODO: merge mkhash/2 and gethash? if so, use it in mkleaf/1.
-spec mkhash(tree(), tree()) -> binary().
mkhash(Tree0, Tree1) ->
    hashfun([<<"\x01">>, gethash(Tree0), gethash(Tree1)]).

-spec gethash(tree()) -> binary().
gethash(#leaf{hash=Hash}) ->
    Hash;
gethash(#inner{child0=Child0, child1=Child1}) ->
    mkhash(Child0, Child1).

%% @doc Unsigned integer -> binary.
%% In R16, we can use integer_to_binary/1.
-spec ui2b(pos_integer()) -> binary().
ui2b(Unsigned) ->
    binary:encode_unsigned(Unsigned).

%% @doc Find first set bit in V, starting counting at zero from the
%% least significant bit.
ffs(V) when is_integer(V) ->
    L = [Bit || <<Bit:1>> <= ui2b(V)],
    length(L) - ffs(L, 0) - 1.
ffs([], Acc) ->
    Acc;
ffs([H|T], Acc) ->
    case H of
        0 -> ffs(T, Acc+1);
        _ -> Acc
    end.
fls(V) when is_integer(V) ->
    L = lists:reverse([Bit || <<Bit:1>> <= ui2b(V)]),
    ffs(L, 0).

rightbranchdepth(Tree) ->
    1 + rightbranchdepth(Tree, 0).
-spec rightbranchdepth(tree(), non_neg_integer()) -> non_neg_integer().
rightbranchdepth(Tree, Acc) when is_record(Tree, leaf) ->
    Acc;
rightbranchdepth(Tree, Acc) ->
    rightbranchdepth(Tree#inner.child1, Acc + 1).

%%%%%%%%%%%%%%%%%%%%
%% Internal tests.
-define(TEST_VECTOR_TREES,
        [[<<"a foo is a bar">>,
          <<148,242,40,0,3,172,180,106,111,230,146,161,32,40,128,38,103,8,194,
           102,72,68, 126,70,108,47,8,216,208,146,178,107>>]]).
%% Test vectors from certificate-transparency/src/python/ct/crypto/merkle_test.py.
-define(EMPTY_HASH,
        "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855").
-define(TEST_VECTOR_LEAVES,
        ["", "\x00", "\x10", " !", "01", "@ABC", "PQRSTUVW", "`abcdefghijklmno"]).
-define(TEST_VECTOR_HASHES,
        ["6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d",
         "fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125",
         "aeb6bcfe274b70a14fb067a5e5578264db0fa9b51af5e0ba159158f329e06e77",
         "d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7",
         "4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4",
         "76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef",
         "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c",
         "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]).

basic_helpers_test_() ->
    [test_bitcount()].

basic_tree_test_() ->
    TVT1 = lists:nth(1, ?TEST_VECTOR_TREES),
    [?_assertEqual(#head{version = 0, tree = undefined},
                   create()),
     ?_assertEqual(#head{version = 1,
                         tree = #leaf{hash = lists:last(TVT1)}},
                   create(hd(TVT1)))].

empty_hash_test_() ->
    [?_assertEqual(hex:hexstr_to_bin(?EMPTY_HASH), mth([]))].

mth_test() ->
    lists:foreach(
      fun(X) -> ?assertEqual(
		   mth(lists:sublist(?TEST_VECTOR_LEAVES, X)),
		   hex:hexstr_to_bin(lists:nth(X, ?TEST_VECTOR_HASHES)))
      end,
      lists:seq(1, length(?TEST_VECTOR_LEAVES))).

new_tree_test_() ->
    TVT1 = lists:nth(1, ?TEST_VECTOR_TREES),
    V0 = create(),
    V1 = append(V0, hd(TVT1)),
    [?_assertEqual(0, V0#head.version),
     ?_assertEqual(undefined, V0#head.tree),
     ?_assertEqual(lists:last(TVT1), (V1#head.tree)#leaf.hash),
     ?_assertEqual(1, V1#head.version)].

%% @doc Build trees using append/2 and mth/2 and compare the resulting
%% tree hashes.
append_test() ->
    lists:foreach(
      fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X),
                io:format("~p~n", [L]),
                ?assertEqual(
		   mth(L),
		   gethash((mkhead(L))#head.tree))
      end,
      lists:seq(1, length(?TEST_VECTOR_LEAVES))).

mkhead_from_list_test() ->
    L = ["a", "b"],
    ?assertEqual(append(mkhead(1, mkleaf(lists:nth(1, L))),
                      mkleaf(lists:nth(2, L))),
                 mkhead(L)).

append_eq_mth_test() ->
    Count = 300,
    L = [<<X:16>> || X <- lists:seq(0, Count)],
    ?assertEqual(gethash((mkhead(L))#head.tree), mth(L)).

audit_path_test_() ->
    H = mkhead([]),
    M = 0,
    [?_assertEqual([niy, H, M], path(H, M))].

consistency_proof_test_() ->
    H = mkhead([]),
    M = 0,
    [?_assertEqual([niy, H, M], proof(H, M))].

%% Test helpers.
%% @doc Calculate a Merkle Tree Hash from an ordered list as specified
%% in RFC 6962.
%%
%% K, the split point, is the number of leafs comprising the largest
%% possible full tree. 
%%
%% The way we calculate K is to let N be the number of entries, find
%% the most significant bit in N-1 and raise two to that number. This
%% is K.
test_bitcount() ->
    L = [1, 2, 3, 255, 256, 511, 512, 32767, 32768, 65535, 65536,
         2147483648, 4294967296, 8589934591, 8589934592, 18446744073709551616],
    [[?_assertEqual(lists:nth(1, tv_bitcount(X)), ffs(X)) || X <- L],
     [?_assertEqual(lists:nth(2, tv_bitcount(X)), fls(X)) || X <- L]].

tv_bitcount(1) -> [0, 0];
tv_bitcount(2) -> [1, 1];
tv_bitcount(3) -> [1, 0];
tv_bitcount(255) -> [7, 0];
tv_bitcount(256) -> [8, 8];
tv_bitcount(511) -> [8, 0];
tv_bitcount(512) -> [9, 9];
tv_bitcount(32767) -> [14, 0];
tv_bitcount(32768) -> [15, 15];
tv_bitcount(65535) -> [15, 0];
tv_bitcount(65536) -> [16, 16];
tv_bitcount(2147483648) -> [31, 31];
tv_bitcount(4294967296) -> [32, 32];
tv_bitcount(8589934591) -> [32, 0];
tv_bitcount(8589934592) -> [33, 33];
tv_bitcount(18446744073709551616) -> [64, 64].

%% @doc Return the Merkle Tree Head for the leafs in L. Implements
%% the algorithm in section 2.1 of RFC 6962. Used for testing.
-spec mth(list()) -> binary().
mth([]) ->
    hashfun(<<"">>);
mth(L) ->
    case length(L) of
        1 -> hashfun([<<"\x00">>, L]);
        _ -> Split = 1 bsl ffs(length(L) - 1),
             {L1, L2} = lists:split(Split, L),  % TODO: PERF
             hashfun([<<"\x01">>, mth(L1), mth(L2)])
    end.

%% @doc Return the Merkle Audit Path for leaf m in L. Implements the
%% algorithm in section 2.1.1 of RFC 6962. Used for testing.
-spec path(integer(), list()) -> list().
path(M, L) ->
    [niy, M, L].

%% @doc Return the Merkle Consistency Proof between the first m leaves
%% of L and L. Implements the algorithm in section 2.1.2 of RFC
%% 6962. Used for testing.
-spec proof(integer(), list()) -> list().
proof(M, L) ->
    [niy, M, L].

-spec create(iolist() | binary()) -> head().
create(D) ->
    mkhead(1, mkleaf(D)).

-spec mkhead(list()) -> head().
mkhead([]) ->
    mkhead(1, mkleaf([]));
mkhead(L) when is_list(L) ->
    mkhead(create(hd(L)), lists:reverse(tl(L))).