From 3eabea607e7da4a5e3fa42668f470301d38509c5 Mon Sep 17 00:00:00 2001 From: Linus Nordberg Date: Tue, 16 Sep 2014 16:08:50 +0200 Subject: Rewrite ts to use a list of lists and change its API. We want to get rid of maps because they're a bit too new for some distributions. Replacing the arrays with lists is not necessary and arguably not even the right move -- they're about twice as costly RAM wise and the CPU cost for accesses are O(n). This cleans up the implementation though so let's keep it as a reference implementation. Changes to ht include poping potential placeholders in parent layer before adding and swapping IR -> RI all over, for consistency. --- src/ht.erl | 51 +++++++++++------------- src/ts.erl | 131 +++++++++++++++++++++++++++++++++---------------------------- 2 files changed, 94 insertions(+), 88 deletions(-) (limited to 'src') diff --git a/src/ht.erl b/src/ht.erl index 44fb2b4..4150721 100644 --- a/src/ht.erl +++ b/src/ht.erl @@ -127,7 +127,7 @@ consistency(Tree, V1, V2) -> UpdTree = update(Tree, V2), First = case StartIndex of 0 -> []; - _ -> [get_hash(UpdTree, {StartIndex, StartLayer})] + _ -> [get_hash(UpdTree, {StartLayer, StartIndex})] end, %% Get path from first left child to head of V2. {_, Path} = path(UpdTree, StartLayer, StartIndex, V2), @@ -164,23 +164,23 @@ path(Tree, Layer, I, ILast, Version, Acc) -> [old_version_tree_head(Tree, Version, Layer) | Acc]; Sib when Sib < ILast -> %% Just use sibling. - [get_hash(Tree, {Sib, Layer}) | Acc]; + [get_hash(Tree, {Layer, Sib}) | Acc]; _ -> %% Sibling is larger than ILast so doesn't exist. Acc end). -%% @doc updates the tree and returns new tree plus hash for IR -spec get_hash(tree(), tuple()) -> binary(). -get_hash(Tree, IR) -> - ts:retrieve_hash(Tree#tree.store, IR). +get_hash(Tree, {R, I}) -> + true = Tree#tree.evaluated >= I, % ASSERTION + ts:retrieve(Tree#tree.store, {R, I}). -spec head(tree(), integer()) -> {tree(), binary()}. head(Tree, -1) -> {Tree, hash(<<"">>)}; head(Tree = #tree{version = V}, Version) when Version == V -> NewTree = update(Tree), - {NewTree, get_hash(NewTree, {0, depth(Tree) - 1})}; + {NewTree, get_hash(NewTree, {depth(Tree) - 1, 0})}; head(Tree = #tree{version = V}, Version) when Version > V -> {Tree, enotimetravel}; head(Tree, Version) -> @@ -203,7 +203,7 @@ old_version_tree_head(Tree, Version, BreakAtLayer) -> %% the last right node, rehashing as we go. Calculate the parent %% hash of that node and its sibling. Return that hash. last_right_node_rehash(Tree, Version, FirstLeftR, FirstLeftI, - get_hash(Tree, {FirstLeftI, FirstLeftR}), + get_hash(Tree, {FirstLeftR, FirstLeftI}), BreakAtLayer). -spec last_right_node_rehash(tree(), non_neg_integer(), non_neg_integer(), @@ -221,7 +221,7 @@ last_right_node_rehash(Tree, Version, Layer, Index, RightNodeHash, BAL) -> case right_node_p(Index) of true -> %% Rehash parent using sibling. - mkinnerhash(get_hash(Tree, {Index - 1, Layer}), RightNodeHash); + mkinnerhash(get_hash(Tree, {Layer, Index - 1}), RightNodeHash); false -> %% Just use the incoming hash. RightNodeHash @@ -246,11 +246,8 @@ first_left_node(Layer, Index, BAL) -> %% @doc Add an entry but don't update the tree. -spec add(tree(), binary()) -> tree(). add(Tree = #tree{version = V, store = Store}, Entry) -> - NewVersion = V + 1, - LeafIndex = NewVersion, - LeafHash = mkleafhash(Entry), - Tree#tree{version = NewVersion, - store = ts:store(Store, {LeafIndex, 0}, LeafHash)}. + Tree#tree{version = V + 1, + store = ts:add(Store, 0, mkleafhash(Entry))}. %% @doc Return a new tree. -spec new(list()) -> tree(). @@ -296,10 +293,16 @@ update(Tree = #tree{evaluated = Evaluated}, Version) -> non_neg_integer()) -> tree(). update_layer(Tree, _Layer, _ICur, 0) -> % Done Tree; -update_layer(Tree, Layer, ICur, ILast) -> +update_layer(Tree = #tree{store = S}, Layer, ICur, ILast) -> + %% Before updating parent layer, delete potential placeholder. + Store = case ts:count(S, Layer + 1) == parent(ICur) + 1 of + true -> ts:delete(S, Layer + 1); + false -> S + end, + %% Update parents on next upper layer, starting with a left %% child <= ICur and ending with ILast. Recurse with next layer. - NewStore = update_parent(Tree#tree.store, Layer, + NewStore = update_parent(Store, Layer, strip_bits_bottom(ICur, 1), ILast), update_layer(Tree#tree{store = NewStore}, Layer + 1, parent(ICur), parent(ILast)). @@ -314,24 +317,18 @@ update_parent(S, Layer, I, ILast) when I >= ILast -> %% ("non-frozen") trees. case right_node_p(ILast) of true -> S; - _ -> ts:append(S, Layer + 1, ts:retrieve_hash(S, {ILast, Layer})) + _ -> ts:add(S, Layer + 1, ts:retrieve(S, {Layer, ILast})) end; update_parent(S, Layer, I, ILast) -> false = right_node_p(I), % ASSERTION %% Make an inner node hash of I and its sibling. Store it as %% parent. Recurse with next pair of leaves. - %% TODO: This is the only place where we store rather than - %% append. Consider changing this to an append. This would make it - %% possible for ts to use other data structures for the storage - %% (like a binary per layer). If so, don't forget to truncate - %% Layer+1 before appending if there's already a parent for I - %% there. - update_parent(ts:store(S, {parent(I), Layer + 1}, - mkinnerhash(ts:retrieve_hash(S, {I, Layer}), - ts:retrieve_hash(S, {I + 1, Layer}))), + update_parent(ts:add(S, Layer + 1, + mkinnerhash(ts:retrieve(S, {Layer, I}), + ts:retrieve(S, {Layer, I + 1}))), Layer, I + 2, ILast). -%% @doc Parent of {i, r} is at {i/2, r+1} (unless it's a "placeholder"). +%% @doc Parent of {r, i} is at {r+1, i/2} (unless it's a placeholder). parent(I) -> I bsr 1. @@ -398,7 +395,7 @@ print_layer(Tree, HashOutputLen, Layer, ILast) -> fun(I) -> io:format("~s ", [string:substr( hex:bin_to_hexstr( - get_hash(Tree, {I, Layer})), 1, HashOutputLen)]) + get_hash(Tree, {I, Layer})), HashOutputLen, 1)]) end, lists:seq(0, ILast)), io:format("~n"). diff --git a/src/ts.erl b/src/ts.erl index e60a114..cf71ff5 100644 --- a/src/ts.erl +++ b/src/ts.erl @@ -7,78 +7,87 @@ -module(ts). -include_lib("eunit/include/eunit.hrl"). -export_type([tree_store/0]). --export([new/0, store/3, append/3, truncate/2, retrieve/2, retrieve_hash/2, delete/2]). +-export([new/0, add/3, delete/2, retrieve/2, count/2]). -%% FIXME: swap IR -> RI for consistency with ht - -%% FIXME: Keep the arrays in an array instead of in a map? Or maybe an -%% array of binaries? Hashes do have fixed lenght. --record(tree_store, {arrays :: map()}). % Map of arrays, indexed by layer. +%% FIXME: Keep the entries in binaries instead of lists? Hashes do +%% have fixed lenght. +-record(tree_store, {layers :: list()}). % orddict of lists, keyed on layer. -type tree_store() :: #tree_store{}. +%%%%%%%%%%%%%%%%%%%% +%% Public. new() -> - #tree_store{arrays = #{}}. + #tree_store{layers = orddict:new()}. --spec store(tree_store(), tuple(), binary()) -> tree_store(). -store(S = #tree_store{arrays = Arrays}, {I, R}, Hash) -> - {A, NewArrays} = get_array(R, Arrays), - Index = case I of - append -> array:size(A); - N -> N - end, - S#tree_store{arrays = maps:put(R, array:set(Index, Hash, A), NewArrays)}. +-spec add(tree_store(), non_neg_integer(), binary()) -> tree_store(). +add(S = #tree_store{layers = Layers}, Layer, Entry) -> + {NewLayers, List} = layer(Layers, rw, Layer), + NewList = [Entry | List], + S#tree_store{layers = orddict:store(Layer, NewList, NewLayers)}. --spec retrieve(tree_store(), tuple()) -> {tuple(), binary()}. -retrieve(#tree_store{arrays = Arrays}, {I, R}) -> - {{I, R}, - case maps:get(R, Arrays, undefined) of - undefined -> undefined; - A -> array:get(I, A) - end}. +-spec delete(tree_store(), non_neg_integer()) -> tree_store(). +delete(S = #tree_store{layers = Layers}, Layer) -> + List = layer(Layers, ro, Layer), + [_ | NewList] = List, + S#tree_store{layers = orddict:store(Layer, NewList, Layers)}. --spec retrieve_hash(tree_store(), tuple()) -> binary(). -retrieve_hash(S, IR) -> - element(2, retrieve(S, IR)). - --spec delete(tree_store(), tuple()) -> tree_store(). -delete(S, {I, R}) -> - {A, NewArrays} = get_array(R, S#tree_store.arrays), - Index = case I of - last -> array:size(A) - 1; - N -> N - end, - S#tree_store{arrays = maps:put(R, - array:reset(Index, A), - NewArrays)}. -append(S, R, Hash) -> - store(S, {append, R}, Hash). +-spec retrieve(tree_store(), tuple()) -> binary() | undefined. +retrieve(#tree_store{layers = Layers}, {Layer, Index}) -> + List = layer(Layers, ro, Layer), + Len = length(List), + case Index < Len of + true -> lists:nth(Len - Index, List); + false -> undefined + end. -truncate(S, R) -> - delete(S, {last, R}). +-spec count(tree_store(), non_neg_integer()) -> non_neg_integer(). +count(#tree_store{layers = Layers}, Layer) -> + length(layer(Layers, ro, Layer)). -%% Private -get_array(Layer, Arrays) -> - case maps:is_key(Layer, Arrays) of - true -> {maps:get(Layer, Arrays), Arrays} ; - false -> - NewArray = array:new(), - {NewArray, maps:put(Layer, NewArray, Arrays)} +%%%%%%%%%%%%%%%%%%%% +%% Private. +-spec layer(list(), rw | ro, non_neg_integer()) -> list() | {list(), list()}. +layer(Layers, Access, Layer) -> + case Access of + rw -> + case orddict:find(Layer, Layers) of + error -> {orddict:store(Layer, [], Layers), []}; + {ok, List} -> {Layers, List} + end; + ro -> + case orddict:find(Layer, Layers) of + error -> []; + {ok, List} -> List + end end. %%%%%%%%%%%%%%%%%%%% %% Testing ts. --define(TEST_VECTOR, {{{0,0}, <<00>>}, - {{0,1}, <<01>>}, - {{1,0}, <<10>>}}). -store_test_() -> - T = ?TEST_VECTOR, +add_retrieve_test_() -> + S = new(), + S0 = add(S, 0, <<00>>), + S1 = add(S0, 0, <<01>>), + S2 = add(S1, 1, <<10>>), + [?_assertEqual(2, count(S2, 0)), + ?_assertEqual(1, count(S2, 1)), + ?_assertEqual(<<10>>, retrieve(S2, {1,0})), + ?_assertEqual(<<00>>, retrieve(S2, {0,0})), + ?_assertEqual(<<01>>, retrieve(S2, {0,1})), + ?_assertEqual(<<00>>, retrieve(S2, {0,0})), + ?_assertEqual(undefined, retrieve(S2, {3,0})), + ?_assertEqual(undefined, retrieve(S1, {1,0}))]. + +delete_test_() -> S = new(), - S0 = store(S, {0,0}, <<00>>), - S1 = store(S0, {0,1}, <<01>>), - S2 = store(S1, {1,0}, <<10>>), - [?_assertEqual(retrieve(S2, {1,0}), element(3, T)), - ?_assertEqual(retrieve(S2, {0,1}), element(2, T)), - ?_assertEqual(retrieve(S2, {0,0}), element(1, T)), - ?_assertEqual(retrieve_hash(S2, {3,0}), undefined), - ?_assertEqual(retrieve_hash(S1, {1,0}), undefined) - ]. + S0 = add(S, 0, <<00>>), + S1 = add(S0, 0, <<01>>), + S2 = add(S1, 1, <<10>>), + S3 = delete(S2, 0), + S4 = add(S3, 0, <<99>>), + [?_assertEqual(2, count(S2, 0)), + ?_assertEqual(1, count(S3, 0)), + ?_assertEqual(2, count(S4, 0)), + ?_assertEqual(retrieve(S2, {0,1}), <<01>>), + ?_assertEqual(retrieve(S3, {0,1}), undefined), + ?_assertEqual(retrieve(S3, {0,0}), <<00>>), + ?_assertEqual(retrieve(S4, {0,1}), <<99>>)]. -- cgit v1.1