From 80c8ef847d996af04ec677a79555d640733641f2 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Sun, 19 Oct 2014 01:37:29 +0200 Subject: db:get_by_leaf_hash(): Return notfound instead of crashing when no entry could be found. db:get_by_entry_hash(): Don't fetch index, isn't used and might not exist. index:add(): Allow writes at exiting indicies. --- src/index.erl | 64 +++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 26 deletions(-) (limited to 'src/index.erl') diff --git a/src/index.erl b/src/index.erl index 7871215..5169fbb 100644 --- a/src/index.erl +++ b/src/index.erl @@ -1,17 +1,18 @@ %%% Copyright (c) 2014, NORDUnet A/S. %%% See LICENSE for licensing information. -%% Implements an interface to a file pair (basename and basename.chksum) -%% that stores an ordered list of fixed-size entries. Entries can be -%% added at the end and are retrieved by index. The list can also be -%% truncated. +%% Implements an interface to a file pair (basename and +%% basename.chksum) that stores an ordered list of fixed-size entries. +%% Entries can be added at the end and are retrieved by index. Entries +%% can also be added at already existing indices, but then the +%% contents must be the same. %% -%% Writes(add, truncate, addlast) need to be serialized. +%% Writes(add, addlast) need to be serialized. %% TODO: Checksums -module(index). --export([get/2, add/3, addlast/2, truncate/2]). +-export([get/2, add/3, addlast/2]). -define(ENTRYSIZE, 32). -define(ENTRYSIZEINFILE, (?ENTRYSIZE*2+1)). @@ -21,27 +22,38 @@ add(Basepath, Index, Entry) when is_binary(Entry), size(Entry) == ?ENTRYSIZE -> case file:open(Basepath, [read, write, binary]) of {ok, File} -> {ok, Position} = file:position(File, eof), - case Index of - last when Position rem ?ENTRYSIZEINFILE == 0 -> - ok; - Index when is_integer(Index), - Index * ?ENTRYSIZEINFILE == Position -> - ok - end, + Mode = case Index of + last when Position rem ?ENTRYSIZEINFILE == 0 -> + write; + Index when is_integer(Index), + Index * ?ENTRYSIZEINFILE == Position -> + write; + Index when is_integer(Index), + Index * ?ENTRYSIZEINFILE < Position -> + read; + _ -> + util:exit_with_error(invalid, writefile, + "Index not valid") + end, EntryText = hex:bin_to_hexstr(Entry) ++ "\n", - ok = file:write(File, EntryText), - ok = file:close(File), - util:fsync([Basepath, filename:dirname(Basepath)]); - {error, Error} -> - util:exit_with_error(Error, writefile, - "Error opening file for writing") - end. - -truncate(Basepath, Index) -> - case file:open(Basepath, [read, write, binary]) of - {ok, File} -> - {ok, _Position} = file:position(File, Index * ?ENTRYSIZEINFILE), - ok = file:truncate(File), + case Mode of + write -> + ok = file:write(File, EntryText); + read -> + {ok, _Position} = + file:position(File, {bof, Index * ?ENTRYSIZEINFILE}), + {ok, OldEntryText} = file:read(File, ?ENTRYSIZEINFILE), + %% check that the written content is the same as + %% the old content + case binary_to_list(OldEntryText) of + EntryText -> + ok; + _ -> + util:exit_with_error(invalid, writefile, + "Written content not the" ++ + " same as old content") + end + end, ok = file:close(File), util:fsync([Basepath, filename:dirname(Basepath)]); {error, Error} -> -- cgit v1.1 From 2483f0cf09ccc4cf73558c7a85bbb51a72d29c3a Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Sat, 25 Oct 2014 15:22:09 +0200 Subject: Optimize db:get_by_indices by not fetching entry and implementing index:getrange --- src/index.erl | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) (limited to 'src/index.erl') diff --git a/src/index.erl b/src/index.erl index 5169fbb..bbc9a10 100644 --- a/src/index.erl +++ b/src/index.erl @@ -12,7 +12,7 @@ %% TODO: Checksums -module(index). --export([get/2, add/3, addlast/2]). +-export([get/2, getrange/3, add/3, addlast/2]). -define(ENTRYSIZE, 32). -define(ENTRYSIZEINFILE, (?ENTRYSIZE*2+1)). @@ -66,31 +66,38 @@ add(Basepath, Index, Entry) when is_binary(Entry), size(Entry) == ?ENTRYSIZE -> addlast(Basepath, Entry) -> add(Basepath, last, Entry). -%% From lib/stdlib/src/lists.erl. For supporting < R17. --spec droplast(nonempty_list()) -> list(). -droplast([_T]) -> []; -droplast([H|T]) -> [H|droplast(T)]. +decodedata(Binary) -> + lists:reverse(decodedata(Binary, [])). -decodedata(EntryText) when length(EntryText) == ?ENTRYSIZEINFILE -> - case [lists:last(EntryText)] of - "\n" -> - hex:hexstr_to_bin(droplast(EntryText)); - _ -> - util:exit_with_error(badformat, readindex, - "Index line not ending with linefeed") - end. +decodedata(<<>>, Acc) -> + Acc; +decodedata(<>, Acc) -> + decodedata(Rest, [mochihex:to_bin(binary_to_list(Entry)) | Acc]); +decodedata(<<_:?ENTRYSIZE/binary-unit:16, _>>, _Acc) -> + util:exit_with_error(badformat, readindex, + "Index line not ending with linefeed"). -spec get(string(), integer()) -> binary(). get(Basepath, Index) -> + case getrange(Basepath, Index, Index) of + noentry -> + noentry; + [Entry] -> + Entry + end. + +-spec getrange(string(), integer(), integer()) -> [binary()]. +getrange(Basepath, Start, End) when Start =< End -> case file:open(Basepath, [read, binary]) of {ok, File} -> {ok, Filesize} = file:position(File, eof), if - Index * ?ENTRYSIZEINFILE + ?ENTRYSIZEINFILE =< Filesize -> + End * ?ENTRYSIZEINFILE + ?ENTRYSIZEINFILE =< Filesize -> {ok, _Position} = file:position(File, - Index * ?ENTRYSIZEINFILE), - {ok, EntryText} = file:read(File, ?ENTRYSIZEINFILE), - Entry = decodedata(binary_to_list(EntryText)), + Start * ?ENTRYSIZEINFILE), + {ok, EntryText} = + file:read(File, ?ENTRYSIZEINFILE * (End - Start + 1)), + Entry = decodedata(EntryText), file:close(File), Entry; true -> -- cgit v1.1 From ebc9d5bac1a69ba25044a73674b8e9ea18217f60 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Sat, 25 Oct 2014 23:56:41 +0200 Subject: Optimize fetchnewentries --- src/index.erl | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/index.erl') diff --git a/src/index.erl b/src/index.erl index bbc9a10..c1e0352 100644 --- a/src/index.erl +++ b/src/index.erl @@ -77,6 +77,17 @@ decodedata(<<_:?ENTRYSIZE/binary-unit:16, _>>, _Acc) -> util:exit_with_error(badformat, readindex, "Index line not ending with linefeed"). +-spec size(string()) -> integer(). +size(Basepath) -> + case file:open(Basepath, [read, binary]) of + {ok, File} -> + {ok, Filesize} = file:position(File, eof), + Filesize mod ?ENTRYSIZEINFILE; + {error, Error} -> + util:exit_with_error(Error, readfile, + "Error opening file for reading") + end. + -spec get(string(), integer()) -> binary(). get(Basepath, Index) -> case getrange(Basepath, Index, Index) of -- cgit v1.1 From fb3b9591cc81158824db13818cf6320d5f4a0f7b Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Mon, 27 Oct 2014 01:28:32 +0100 Subject: Fix mistake in ebc9d5ba (Optimize fetchnewentries) --- src/index.erl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/index.erl') diff --git a/src/index.erl b/src/index.erl index c1e0352..96195e3 100644 --- a/src/index.erl +++ b/src/index.erl @@ -12,7 +12,7 @@ %% TODO: Checksums -module(index). --export([get/2, getrange/3, add/3, addlast/2]). +-export([get/2, getrange/3, add/3, addlast/2, indexsize/1]). -define(ENTRYSIZE, 32). -define(ENTRYSIZEINFILE, (?ENTRYSIZE*2+1)). @@ -77,18 +77,19 @@ decodedata(<<_:?ENTRYSIZE/binary-unit:16, _>>, _Acc) -> util:exit_with_error(badformat, readindex, "Index line not ending with linefeed"). --spec size(string()) -> integer(). -size(Basepath) -> +-spec indexsize(string()) -> integer(). +indexsize(Basepath) -> case file:open(Basepath, [read, binary]) of {ok, File} -> {ok, Filesize} = file:position(File, eof), - Filesize mod ?ENTRYSIZEINFILE; + lager:debug("file ~p size ~p", [Basepath, Filesize]), + Filesize div ?ENTRYSIZEINFILE; {error, Error} -> util:exit_with_error(Error, readfile, "Error opening file for reading") end. --spec get(string(), integer()) -> binary(). +-spec get(string(), integer()) -> binary() | noentry. get(Basepath, Index) -> case getrange(Basepath, Index, Index) of noentry -> @@ -99,6 +100,7 @@ get(Basepath, Index) -> -spec getrange(string(), integer(), integer()) -> [binary()]. getrange(Basepath, Start, End) when Start =< End -> + lager:debug("path ~p start ~p end ~p", [Basepath, Start, End]), case file:open(Basepath, [read, binary]) of {ok, File} -> {ok, Filesize} = file:position(File, eof), @@ -109,6 +111,7 @@ getrange(Basepath, Start, End) when Start =< End -> {ok, EntryText} = file:read(File, ?ENTRYSIZEINFILE * (End - Start + 1)), Entry = decodedata(EntryText), + lager:debug("entries ~p", [length(Entry)]), file:close(File), Entry; true -> -- cgit v1.1