summaryrefslogtreecommitdiff
path: root/src/ht.erl
blob: 3af71f83f60c95bcc3379064aaa95047e4660769 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
%%% Copyright (c) 2014, NORDUnet A/S.
%%% See LICENSE for licensing information.
%%%
%%% Implementation of a history tree as described in Efficient Data
%%% Structures for Tamper-Evident Logging [0]. This implementation
%%% follows RFC 6962 and differs from [0] only in how non-full trees
%%% are handled.
%%%
%%% [0] https://www.usenix.org/event/sec09/tech/full_papers/crosby.pdf
%%%
%%% Useful terminology:
%%% E -- Entry, stored in leaf nodes.
%%% I(i,r) -- Inner node with index i, on layer r.
%%% A(v)(i,r) -- Hash of I(i,r) in tree of version v.
%%% d -- Depth of tree.
%%%
%%% Nodes are identified by their index i and layer r.
%%% I(i,r) has left child I(i,r-1) and right child I(i+2^(r-1),r-1).
%%%
%%% A version-n tree stores n+1 entries.
%%% In a version-n tree, I(i,r) is frozen when n >= i + 2^r - 1.

%%%
%%% Data types:
%%% history_tree
%%%   version (integer)
%%%   head (reference to a frozen or warm node)
%%%   frozen nodes (ets tables handled by ts)
%%%   warm nodes (ets table handled by ts)
%%%
%%% Interface:
%%% create tree (-> tree)
%%% add entry (data -> tree)
%%% get hash of leaf or inner node (i, r -> hash)
%%% get inclusion proof for leaf i in version-n tree (i, n -> hashes)
%%% get consistency proof between trees of version n and m (n, m -> hashes)
%%%
-module(ht).
-include("$CTROOT/plop/include/plop.hrl").
-include_lib("eunit/include/eunit.hrl").
-import(lists, [foreach/2, foldl/3, reverse/1]).

-export_type([history_tree/0, inner/0]).
-export([new/0, new/1, destroy/1, size/1, add/2]).
-export([get_hash/3, get_incl/3, get_cons/3]).
-export([tree_hash/1, tree_hash/2]).

%%%%%%%%%%%%%%%%%%%%
%% Public interface.

%%%%%%%%%%%%%%%%%%%%
%% Data types.
-record(history_tree, {version :: integer(),
                       store :: ts:tree_store()}).
-type history_tree() :: #history_tree{}.

-record(inner, {index :: non_neg_integer(), % 27bit integer => max 134e6 entries
                layer :: non_neg_integer(), % 5bit integer
                hash :: binary()}).
-type inner() :: #inner{}.

%%%%%%%%%%%%%%%%%%%%
%% Public interface.
size(#history_tree{version = V}) ->
    V + 1.

-spec new() -> history_tree().
new() ->
    #history_tree{version = -1, store = ts:new()}.

-spec new(integer()) -> history_tree().
new(Version) ->
    foldl(fun(#mtl{entry = E}, Tree) ->
                  D = (E#timestamped_entry.entry)#plop_entry.data,
                  add(Tree, D) % Return value -> Tree in next invocation.
          end, new(), db:get_by_index_sorted(0, Version)).

destroy(Tree) ->
    ts:delete(Tree#history_tree.store).

%% @doc Which layers need to change when creating a version-n tree?
%% Layer 0 is where the new leaf is added and always needs
%% updating. Apart from 0, the layers with a number corresponding to
%% the "positions of the bits set in n" are the ones that need to be
%% touched. This assumes a somewhat unorthodox bit numbering where the
%% least significant bit has number 1 (rather than 0).
%%
%% For example:
%% A version 2 (0010) tree has had changes to layer 2
%% A version 5 (0101) tree has had changes to layers 1 and 3
%% A version 8 (1000) tree has had changes to layer 4
-spec add(history_tree(), binary()) -> history_tree().
add(#history_tree{version = V, store = Store}, Entry) ->
    NewVersion = V + 1,
    LeafIndex = NewVersion,
    LeafHash = mkleafhash(Entry),
    LayerMap = reverse([B || <<B:1>> <= binary:encode_unsigned(NewVersion)]),
    #history_tree{version = NewVersion,
                      store = update(ts:store(Store, {LeafIndex, 0}, LeafHash),
                                     1, LayerMap,
                                     #inner{index = LeafIndex, layer = 0,
                                            hash = LeafHash})}.

tree_hash(Tree) ->
    tree_hash(Tree, Tree#history_tree.version).
tree_hash(_, -1) ->
    hash("");
tree_hash(Tree, Version) ->
    get_hash(Tree, Version, {0, depth(Tree) - 1}).

-spec get_hash(history_tree(), non_neg_integer(), tuple()) -> binary().
get_hash(#history_tree{store = S}, _Version, IR) ->
    %% TODO: Use Version!
    {{_I, _R}, Hash} = ts:retrieve(S, IR),
    Hash.

%-spec get_incl/3
get_incl(_, _, _) ->
    nyi.
%-spec get_cons/3
get_cons(_, _, _) ->
    nyi.

%% Private.
%% -spec head(history_tree()) -> inner().
%% head(#history_tree{store = Store}) ->
%%     {{I, R}, Hash} = ts:retrieve(Store, {0, 0}),
%%     #inner{index = I, layer = R, hash = Hash}.

%% @doc Return position of highest bit set, counting from the least
%% significant bit, starting at 1.
bitpos_first_set(N) ->
    L = [Bit || <<Bit:1>> <= binary:encode_unsigned(N)],
    length(L) - ffs(L, 0).
ffs([], Acc) ->
    Acc;
ffs([H|T], Acc) ->
    case H of
        0 -> ffs(T, Acc + 1);
        _ -> Acc
    end.

depth(#history_tree{version = -1}) ->
    0;
depth(#history_tree{version = V}) ->
    bitpos_first_set(V) + 1.

-spec update(ts:tree_store(), pos_integer(), list(), inner()) -> ts:tree_store().
update(Store, _, [], _) ->
    Store;
update(Store,
       CurrentLayer, [UpdateThisLayerP|LayerMap],
       Child = #inner{index = ChildIndex,
                      layer = ChildLayer,
                      hash = ChildHash}) ->
    case UpdateThisLayerP ==  1 of
        true ->
            %% Create/update the parent of ChildHash at ChildIndex.
            %%
            %% Parent index is equal to child index with its r lowest
            %% bits stripped off, where r is current layer.
            %%
            %% Other child is found by comparing index of parent and
            %% known child. If they're the same, the known child is
            %% the left child and its sibling is found on the same
            %% layer. If they differ, the known child is the right
            %% child and the left child is to be found one layer below
            %% the parent, same index.

            Index = strip_low_bits(ChildIndex, CurrentLayer),
            Hash =
                case Index == ChildIndex of
                    true ->
                        {_SibIR, SibHash} =
                            ts:retrieve(Store, {Index + (1 bsl ChildLayer) - 1,
                                                ChildLayer}),
                        mkinnerhash(ChildHash, SibHash);
                    _ ->
                        {_SibIR, SibHash} =
                            ts:retrieve(Store, {Index, CurrentLayer - 1}),
                        mkinnerhash(SibHash, ChildHash)
                end,
            update(ts:store(Store, {Index, CurrentLayer}, Hash),
                   CurrentLayer + 1, LayerMap,
                   #inner{index = Index, layer = CurrentLayer, hash = Hash});
        _ ->
            update(Store, CurrentLayer + 1, LayerMap, Child)
    end.

strip_low_bits(N, Nbits) ->
    (N bsr Nbits) bsl Nbits.

hash(Data) ->
    crypto:hash(sha256, Data).

mkleafhash(Data) ->
    hash([<<"\x00">>, Data]).

mkinnerhash(Hash1, Hash2) ->
    hash([<<"\x01">>, Hash1, Hash2]).

%%%%%%%%%%%%%%%%%%%%
%% Testing ht.
-define(TEST_VECTOR_LEAVES,
        ["", "\x00", "\x10", " !", "01", "@ABC", "PQRSTUVW", "`abcdefghijklmno"]).
-define(TEST_VECTOR_HASHES,
        ["6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d",
         "fac54203e7cc696cf0dfcb42c92a1d9dbaf70ad9e621f4bd8d98662f00e3c125",
         "aeb6bcfe274b70a14fb067a5e5578264db0fa9b51af5e0ba159158f329e06e77",
         "d37ee418976dd95753c1c73862b9398fa2a2cf9b4ff0fdfe8b30cd95209614b7",
         "4e3bbb1f7b478dcfe71fb631631519a3bca12c9aefca1612bfce4c13a86264d4",
         "76e67dadbcdf1e10e1b74ddc608abd2f98dfb16fbce75277b5232a127f2087ef",
         "ddb89be403809e325750d3d263cd78929c2942b7942a34b77e122c9594a74c8c",
         "5dc9da79a70659a9ad559cb701ded9a2ab9d823aad2f4960cfe370eff4604328"]).

%% @doc Build trees using add/2 and mth/2 and compare the resulting
%% tree hashes.
add_test() ->
    lists:foreach(
      fun(X) -> L = lists:sublist(?TEST_VECTOR_LEAVES, X),
                ?assertEqual(mth(L), tree_hash(mktree(L)))
      end,
      lists:seq(1, length(?TEST_VECTOR_LEAVES))).

%%%%%%%%%%%%%%%%%%%%
%% Testing the test helpers.
mth_test() ->
    lists:foreach(
      fun(X) -> ?assertEqual(
		   mth(lists:sublist(?TEST_VECTOR_LEAVES, X)),
		   hex:hexstr_to_bin(lists:nth(X, ?TEST_VECTOR_HASHES)))
      end,
      lists:seq(1, length(?TEST_VECTOR_LEAVES))).

%%%%%%%%%%%%%%%%%%%%
%% Test helpers.

mktree(L) ->
    mktree(new(), L).
mktree(Tree, []) ->
    Tree;
mktree(Tree, [H|T]) ->
    mktree(add(Tree, H), T).

%% @doc Return the Merkle Tree Head for the leaves in L. Implements
%% the algorithm in section 2.1 of RFC 6962. Used for testing.
-spec mth(list()) -> binary().
mth([]) ->
    hash(<<"">>);
mth(L) ->
    case length(L) of
        1 -> hash([<<"\x00">>, L]);
        _ -> Split = 1 bsl (bitpos_first_set(length(L) - 1) - 1),
             {L1, L2} = lists:split(Split, L),  % TODO: PERF
             hash([<<"\x01">>, mth(L1), mth(L2)])
    end.