From f576ef7e0ec4317f0ff2f632ae8c2312dc916673 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Wed, 10 Feb 2016 11:11:16 +0100 Subject: Rough implementation of new permdb format. Added regression tests and benchmarks. Chunks not implemented yet. --- test/listpermdb.erl | 133 ++++++++++++++++++++++++++++++++++++++++++++++ test/permdbbench.erl | 119 ++++++++++++++++++++++++++++++++++++++++++ test/permdbtest.erl | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 397 insertions(+) create mode 100755 test/listpermdb.erl create mode 100755 test/permdbbench.erl create mode 100755 test/permdbtest.erl (limited to 'test') diff --git a/test/listpermdb.erl b/test/listpermdb.erl new file mode 100755 index 0000000..04acbfa --- /dev/null +++ b/test/listpermdb.erl @@ -0,0 +1,133 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -pa ebin -pa ../lager/ebin -pa ../lager/deps/goldrush/ebin + +-mode(compile). + +-include_lib("kernel/include/file.hrl"). + +-define(DATA_COMMIT_START_COOKIE, 16#75c2e4b3d5f643a1). +-define(DATA_COMMIT_END_COOKIE, 16#2b05eed61b5af550). +-define(INDEX_COMMIT_END_COOKIE, 16#2fb1778c74a402e4). +-define(DATA_FILE_COOKIE, 16#d53551ba539a4252). +-define(INDEX_FILE_COOKIE, 16#b7e16b02ba8a6d1b). +-define(DATA_ENTRY_COOKIE, 16#e7c1cdc2ba3dc77c). +-define(INDEX_NODE_COOKIE, 16#2e0f555ad73210d1). + +openfile(Filename) -> + {ok, File} = file:open(Filename, [read, write, binary, raw]), + File. + +%% getroot(File) -> +%% {ok, RootNodeBinary} = file:read(File, ?NODESIZE), +%% Nodemagic = ?NODEMAGIC, +%% <> = RootNodeBinary, +%% ets:insert(State#state.cachename, {root, Node}), +%% Node; +%% [{root, Node}] -> +%% Node +%% end. + +printfile(<>) -> + io:format("index file header~n", []), + printcommit(Rest, index, 8); +printfile(<>) -> + io:format("data file header: blocksize ~p q ~p keylength ~p~n", [Blocksize, Q, Keylength]), + printcommit(Rest, data, 8+4*3); +printfile(<>) -> + io:format("unknown byte: ~p at file start~n", [Unknown]), + error. + +printnode(<<>>) -> + ok; +printnode(<>) -> + case <> of + <<0:1, Offset:63>> -> + io:format(" child ~p~n", [Offset]); + <<1:1, Offset:63>> -> + io:format(" offset ~p~n", [Offset]) + end, + printnode(Rest). + +printcommit(Data, FileType, FileOffset) -> + OneCommit = case FileType of + index -> + printindex(Data, FileOffset); + data -> + printdata(Data, FileOffset) + end, + case OneCommit of + {{CommitLength, CommitChecksum, Rest}, FileOffset2, FileOffset3} -> + io:format("------- commit ~p bytes at ~p-~p -------~n", [CommitLength, FileOffset, FileOffset3]), + CalculatedChecksum = crypto:hash(sha256, binary:part(Data, 0, CommitLength)), + if + CommitChecksum /= CalculatedChecksum -> + io:format("incorrect checksum~n", []), + io:format("commit: length ~p checksum ~p~n", [CommitLength, CommitChecksum]), + io:format("calculated length ~p checksum ~p~n", [FileOffset2 - FileOffset, crypto:hash(sha256, binary:part(Data, 0, CommitLength))]), + io:format("checksummed data: ~p~n", [binary:part(Data, 0, CommitLength)]), + exit(assert); + CommitLength /= (FileOffset2 - FileOffset) -> + io:format("length and offset mismatch~n", []), + io:format("commit: length ~p checksum ~p~n", [CommitLength, CommitChecksum]), + io:format("calculated length ~p checksum ~p~n", [FileOffset2 - FileOffset, crypto:hash(sha256, binary:part(Data, 0, CommitLength))]), + io:format("checksummed data: ~p~n", [binary:part(Data, 0, CommitLength)]), + exit(assert); + true -> + ok + end, + printcommit(Rest, FileType, FileOffset3); + {ok, FileOffset2} -> + io:format("ending at offset ~p~n", [FileOffset]), + ok + end. + +printindex(<<>>, FileOffset) -> + {ok, FileOffset}; +printindex(<>, FileOffset) -> + FileOffset2 = FileOffset + 8, + Q = 2, + NChildren = 4, + ChildrenLength = NChildren * 8, + <> = Rest, + io:format("node ~p~n", [FileOffset]), + printnode(Children), + printindex(Rest2, FileOffset2 + ChildrenLength); +printindex(<>, FileOffset) -> + {{CommitLength, CommitChecksum, Rest}, FileOffset, FileOffset + 8 + 32 + 8}; +printindex(<>, FileOffset) -> + io:format("unknown byte: ~p at ~p~n", [Unknown, FileOffset]), + error. + +printdata(<<>>, FileOffset) -> + {ok, FileOffset}; +printdata(<>, FileOffset) -> + FileOffset2 = FileOffset + 8 + 32 + 4, + io:format("data ~p key ~p length ~p~n", [FileOffset, Key, Length]), + <> = Rest, + printdata(Rest2, FileOffset2 + Length); +printdata(<> = Rest, FileOffset) -> + Padding = case FileOffset rem 4 of + 0 -> + 0; + 1 -> + 3; + 2 -> + 2; + 3 -> + 1 + end, + io:format("printdata: ~p ~p~n", [Padding, FileOffset]), + <<0:Padding/unit:8, CommitLength:32, CommitChecksum:32/binary, ?DATA_COMMIT_END_COOKIE:64, Rest3/binary>> = Rest2, + {{CommitLength, CommitChecksum, Rest3}, FileOffset + 8 + Padding + 4, FileOffset + 8 + Padding + 4 + 32 + 8}; +printdata(<>, FileOffset) -> + io:format("unknown byte: ~p at ~p~n", [Unknown, FileOffset]), + error. + +main([Filename]) -> + {ok, FileInfo} = file:read_file_info(Filename), + File = openfile(Filename), + {ok, Data} = file:read(File, FileInfo#file_info.size), + io:format("read ~p bytes~n", [size(Data)]), + ok = printfile(Data), + ok. diff --git a/test/permdbbench.erl b/test/permdbbench.erl new file mode 100755 index 0000000..ade9c34 --- /dev/null +++ b/test/permdbbench.erl @@ -0,0 +1,119 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -pa ebin -pa ../lager/ebin -pa ../lager/deps/goldrush/ebin + +-mode(compile). + +gentestdata(Size) -> + [{crypto:hash(sha256, <>), crypto:hash(sha256, <>)} || E <- lists:seq(0, Size-1)]. + +timeprint(Time) -> + io_lib:format("~.2fs", [Time/1000000]). + +testinit(Filename) -> + permdb:start_link(testdb, Filename). + +teststop() -> + permdb:stop(testdb). + +constructdata(VSeed, Size) -> + A = binary:copy(VSeed, Size div 32), + B = binary:part(VSeed, 0, Size rem 32), + <>. + +getvalue_loop([], _Port, _Datasize) -> + none; +getvalue_loop([{K, VSeed}|Rest], Port, Datasize) -> + V = case VSeed of + noentry -> + noentry; + _ -> + constructdata(VSeed, Datasize) + end, + case permdb:getvalue(testdb, K) of + V -> + getvalue_loop(Rest, Port, Datasize); + VOther -> + io:format("expected: ~p got: ~p~nkey: ~p~n", [V, VOther, K]), + exit(mismatch) + end. + +addvalue_loop([], _Port, _Datasize) -> + none; +addvalue_loop([{K, VSeed}|Rest], Port, Datasize) -> + V = constructdata(VSeed, Datasize), + case permdb:addvalue(testdb, K, V) of + ok -> + addvalue_loop(Rest, Port, Datasize); + Other -> + io:format("expected: 0 or 1 got: ~p~n", [Other]), + exit(mismatch) + end. + +testget(_Filename, TestData, Datasize) -> + getvalue_loop(TestData, none, Datasize), + ok. + +testadd(_Filename, TestData, Datasize) -> + addvalue_loop(TestData, none, Datasize), + case permdb:commit(testdb) of + <<0>> -> + ok; + Other -> + io:format("commit expected: 0 got: ~p~n", [Other]), + exit(mismatch) + end. + +stop() -> + teststop(), + receive + after + 100 -> + ok + end. + +runbench(Fun, Size, Verb) -> + {Time2, ok} = timer:tc(Fun), + io:format("~s ~p entries: ~s ~.1f entries/s (~.2f microseconds)~n", [Verb, Size, timeprint(Time2), Size*1000000/Time2, Time2/Size]). + +chunk([], N) -> + []; +chunk(List, N) -> + First = lists:sublist(List, N), + Rest = lists:nthtail(N, List), + [First | chunk(Rest, N)]. + +main([]) -> + {ok, Cwd} = file:get_cwd(), + code:add_path(Cwd ++ "/ebin"), + Size = 20000, + Datasize = 1000, + ChunkSize = 1000, + Filename = "testpermdb", + file:delete(Filename), + file:delete(Filename ++ ".idx"), + {Time1, TestData} = timer:tc(fun () -> gentestdata(Size) end), + TestDataLists = chunk(TestData, ChunkSize), + io:format("Init with ~p entries: ~s~n", [Size, timeprint(Time1)]), + testinit(Filename), + runbench(fun () -> + lists:foreach(fun (E) -> + testadd(Filename, E, Datasize) + end, TestDataLists) + end, Size, "Add"), + runbench(fun () -> testget(Filename, TestData, Datasize) end, Size, "Get"), + stop(), + + testinit(Filename), + runbench(fun () -> testget(Filename, TestData, Datasize) end, Size, "Get"), + stop(), + + file:delete(Filename ++ ".idx"), + + testinit(Filename), + runbench(fun () -> testget(Filename, [hd(TestData)], Datasize) end, Size, "Rebuild"), + runbench(fun () -> testget(Filename, TestData, Datasize) end, Size, "Get"), + stop(), + + ok. + diff --git a/test/permdbtest.erl b/test/permdbtest.erl new file mode 100755 index 0000000..1c43861 --- /dev/null +++ b/test/permdbtest.erl @@ -0,0 +1,145 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -pa ebin -pa ../lager/ebin -pa ../lager/deps/goldrush/ebin + +-mode(compile). + +gentestdata(Size) -> + [{crypto:hash(sha256, <>), crypto:hash(sha256, <>)} || E <- lists:seq(0, Size-1)]. + +genemptytestdata(Size) -> + [{crypto:hash(sha256, <>), noentry} || E <- lists:seq(0, Size-1)]. + +timeprint(Time) -> + io_lib:format("~.2fs", [Time/1000000]). + +testinit(Filename) -> + permdb:start_link(testdb, Filename). + +teststop() -> + permdb:stop(testdb). + +constructdata(VSeed, Size) -> + A = binary:copy(VSeed, Size div 32), + B = binary:part(VSeed, 0, Size rem 32), + <>. + +getvalue_loop([], _Port, _Datasize) -> + none; +getvalue_loop([{K, VSeed}|Rest], Port, Datasize) -> + V = case VSeed of + noentry -> + noentry; + _ -> + constructdata(VSeed, Datasize) + end, + case permdb:getvalue(testdb, K) of + V -> + getvalue_loop(Rest, Port, Datasize); + VOther -> + io:format("expected: ~p got: ~p~nkey: ~p~n", [V, VOther, K]), + exit(mismatch) + end. + +addvalue_loop([], _Port, _Datasize) -> + none; +addvalue_loop([{K, VSeed}|Rest], Port, Datasize) -> + V = constructdata(VSeed, Datasize), + case permdb:addvalue(testdb, K, V) of + ok -> + addvalue_loop(Rest, Port, Datasize); + Other -> + io:format("expected: 0 or 1 got: ~p~n", [Other]), + exit(mismatch) + end. + +testget(_Filename, TestData, Datasize) -> + getvalue_loop(TestData, none, Datasize), + ok. + +testadd(_Filename, TestData, Datasize) -> + addvalue_loop(TestData, none, Datasize), + case permdb:commit(testdb) of + <<0>> -> + ok; + Other -> + io:format("commit expected: 0 got: ~p~n", [Other]), + exit(mismatch) + end. + +stop() -> + teststop(), + receive + after + 100 -> + ok + end. + +main([]) -> + {ok, Cwd} = file:get_cwd(), + code:add_path(Cwd ++ "/ebin"), + Size = 10, + Datasize = 99, + Filename = "testpermdb", + file:delete(Filename), + file:delete(Filename ++ ".idx"), + {Time1, TestData} = timer:tc(fun () -> gentestdata(Size) end), + EmptyTestData = genemptytestdata(Size), + io:format("Init with ~p entries: ~s~n", [Size, timeprint(Time1)]), + testinit(Filename), + Testadd = fun () -> + {Time2, ok} = timer:tc(fun () -> testadd(Filename, TestData, Datasize) end), + io:format("Add ~p entries: ~s ~.1f entries/s (~.2f microseconds)~n", [Size, timeprint(Time2), Size*1000000/Time2, Time2/Size]) + end, + Testadd(), + Testget = fun () -> + {Time2, ok} = timer:tc(fun () -> testget(Filename, TestData, Datasize) end), + io:format("Get ~p entries: ~s ~.1f entries/s (~.2f microseconds)~n", [Size, timeprint(Time2), Size*1000000/Time2, Time2/Size]) + end, + Testget(), + stop(), + + testinit(Filename), + Testget(), + stop(), + + file:delete(Filename ++ ".idx"), + testinit(Filename), + Testget(), + stop(), + + testinit(Filename), + Testget(), + stop(), + + {ok, File} = file:open(Filename ++ ".idx", [read, write, binary]), + {ok, _Position} = file:position(File, {eof, -120}), + ok = file:write(File, <<0>>), + file:close(File), + + testinit(Filename), + Testget(), + stop(), + + io:format("------------------------------------------------------------~n", []), + file:delete(Filename), + file:delete(Filename ++ ".idx"), + testinit(Filename), + Testemptyget = fun () -> + {Time2, ok} = timer:tc(fun () -> testget(Filename, EmptyTestData, Datasize) end), + io:format("Get ~p entries: ~s ~.1f entries/s (~.2f microseconds)~n", [Size, timeprint(Time2), Size*1000000/Time2, Time2/Size]) + end, + Testemptyget(), + testadd(Filename, gentestdata(1), 99), + testadd(Filename, gentestdata(1+2), 99), + testadd(Filename, gentestdata(1+2+3), 99), + testadd(Filename, gentestdata(1+2+3+4), 99), + testget(Filename, gentestdata(1+2+3+4), 99), + stop(), + + testinit(Filename), + testget(Filename, gentestdata(1+2+3+4), 99), + stop(), + + ok. + -- cgit v1.1