summaryrefslogtreecommitdiff
path: root/src/r3_hex_tarball.erl
diff options
context:
space:
mode:
authorBryan Paxton <39971740+starbelly@users.noreply.github.com>2019-03-07 10:54:01 -0600
committerTristan Sloughter <t@crashfast.com>2019-03-07 09:54:01 -0700
commit209c02ec57c2cc3207ee0174c3af3675b8dc8f79 (patch)
treee07b25a27a2c4cc438db75d8d066179b4e57f3f7 /src/r3_hex_tarball.erl
parent9e2e54afd74104ecb05c55e033803f41932eb940 (diff)
Fix fetching of private packages from orgs on hex repos (#2020)
- vendor in hex_core at v0.5.0 - Change where repo_name should be the org and not the parent - Changed rebar_utils:url_append_path/2 to not explicitly add a '?', this is returned in the Query chunk by http_uri:parse/1 (e.g., "?foo=bar") - update organization_merging test to expect the sub-repo as the repo_name - Add tests for rebar_utils:url_append_path/2 - Stop referencing/setting "organization" in config and use new organization settings (api_repository and repo_organization) - Do not set (assume) the read key is valid for any/every repo - Set repo_organization and api_repository to org - Update tests to check for new config opts
Diffstat (limited to 'src/r3_hex_tarball.erl')
-rw-r--r--src/r3_hex_tarball.erl507
1 files changed, 507 insertions, 0 deletions
diff --git a/src/r3_hex_tarball.erl b/src/r3_hex_tarball.erl
new file mode 100644
index 0000000..dd2c77a
--- /dev/null
+++ b/src/r3_hex_tarball.erl
@@ -0,0 +1,507 @@
+%% Vendored from hex_core v0.5.0, do not edit manually
+
+-module(r3_hex_tarball).
+-export([create/2, create_docs/1, unpack/2, format_checksum/1, format_error/1]).
+-ifdef(TEST).
+-export([do_decode_metadata/1, gzip/1, normalize_requirements/1]).
+-endif.
+-define(VERSION, <<"3">>).
+-define(TARBALL_MAX_SIZE, 8 * 1024 * 1024).
+-define(TARBALL_MAX_UNCOMPRESSED_SIZE, 64 * 1024 * 1024).
+-define(BUILD_TOOL_FILES, [
+ {<<"mix.exs">>, <<"mix">>},
+ {<<"rebar.config">>, <<"rebar3">>},
+ {<<"rebar">>, <<"rebar3">>},
+ {<<"Makefile">>, <<"make">>},
+ {<<"Makefile.win">>, <<"make">>}
+]).
+-include_lib("kernel/include/file.hrl").
+
+-type checksum() :: binary().
+-type contents() :: #{filename() => binary()}.
+-type filename() :: string().
+-type files() :: [filename() | {filename(), filename()}] | contents().
+-type metadata() :: map().
+-type tarball() :: binary().
+
+%%====================================================================
+%% API functions
+%%====================================================================
+
+%% @doc
+%% Creates a package tarball.
+%%
+%% Examples:
+%%
+%% ```
+%% > Metadata = #{<<"name">> => <<"foo">>, <<"version">> => <<"1.0.0">>},
+%% > Files = [{"src/foo.erl", <<"-module(foo).">>}],
+%% > {ok, {Tarball, Checksum}} = r3_hex_tarball:create(Metadata, Files).
+%% > Tarball.
+%% <<86,69,...>>
+%% > Checksum.
+%% <<40,32,...>>
+%% '''
+%% @end
+-spec create(metadata(), files()) -> {ok, {tarball(), checksum()}}.
+create(Metadata, Files) ->
+ MetadataBinary = encode_metadata(Metadata),
+ ContentsTarball = create_memory_tarball(Files),
+ ContentsTarballCompressed = gzip(ContentsTarball),
+ Checksum = checksum(?VERSION, MetadataBinary, ContentsTarballCompressed),
+ ChecksumBase16 = encode_base16(Checksum),
+
+ OuterFiles = [
+ {"VERSION", ?VERSION},
+ {"CHECKSUM", ChecksumBase16},
+ {"metadata.config", MetadataBinary},
+ {"contents.tar.gz", ContentsTarballCompressed}
+ ],
+
+ Tarball = create_memory_tarball(OuterFiles),
+
+ UncompressedSize = byte_size(ContentsTarball),
+
+ case(byte_size(Tarball) > ?TARBALL_MAX_SIZE) or (UncompressedSize > ?TARBALL_MAX_UNCOMPRESSED_SIZE) of
+ true ->
+ {error, {tarball, too_big}};
+
+ false ->
+ {ok, {Tarball, Checksum}}
+ end.
+
+%% @doc
+%% Creates a docs tarball.
+%%
+%% Examples:
+%%
+%% ```
+%% > Files = [{"doc/index.html", <<"Docs">>}],
+%% > {ok, {Tarball, Checksum}} = r3_hex_tarball:create_docs(Files).
+%% > Tarball.
+%% %%=> <<86,69,...>>
+%% > Checksum.
+%% %%=> <<40,32,...>>
+%% '''
+%% @end
+-spec create_docs(files()) -> {ok, {tarball(), checksum()}}.
+create_docs(Files) ->
+ UncompressedTarball = create_memory_tarball(Files),
+ UncompressedSize = byte_size(UncompressedTarball),
+ Tarball = gzip(UncompressedTarball),
+ Checksum = checksum(Tarball),
+ Size = byte_size(Tarball),
+
+ case(Size > ?TARBALL_MAX_SIZE) or (UncompressedSize > ?TARBALL_MAX_UNCOMPRESSED_SIZE) of
+ true ->
+ {error, {tarball, too_big}};
+
+ false ->
+ {ok, {Tarball, Checksum}}
+ end.
+
+%% @doc
+%% Unpacks a package tarball.
+%%
+%% Examples:
+%%
+%% ```
+%% > r3_hex_tarball:unpack(Tarball, memory).
+%% {ok,#{checksum => <<...>>,
+%% contents => [{"src/foo.erl",<<"-module(foo).">>}],
+%% metadata => #{<<"name">> => <<"foo">>, ...}}}
+%%
+%% > r3_hex_tarball:unpack(Tarball, "path/to/unpack").
+%% {ok,#{checksum => <<...>>,
+%% metadata => #{<<"name">> => <<"foo">>, ...}}}
+%% '''
+-spec unpack(tarball(), memory) ->
+ {ok, #{checksum => checksum(), metadata => metadata(), contents => contents()}} |
+ {error, term()};
+ (tarball(), filename()) ->
+ {ok, #{checksum => checksum(), metadata => metadata()}} |
+ {error, term()}.
+unpack(Tarball, _) when byte_size(Tarball) > ?TARBALL_MAX_SIZE ->
+ {error, {tarball, too_big}};
+
+unpack(Tarball, Output) ->
+ case r3_hex_erl_tar:extract({binary, Tarball}, [memory]) of
+ {ok, []} ->
+ {error, {tarball, empty}};
+
+ {ok, FileList} ->
+ do_unpack(maps:from_list(FileList), Output);
+
+ {error, Reason} ->
+ {error, {tarball, Reason}}
+ end.
+
+%% @doc
+%% Returns base16-encoded representation of checksum.
+-spec format_checksum(checksum()) -> binary().
+format_checksum(Checksum) ->
+ encode_base16(Checksum).
+
+%% @doc
+%% Converts an error reason term to a human-readable error message string.
+-spec format_error(term()) -> string().
+format_error({tarball, empty}) -> "empty tarball";
+format_error({tarball, too_big}) -> "tarball is too big";
+format_error({tarball, {missing_files, Files}}) -> io_lib:format("missing files: ~p", [Files]);
+format_error({tarball, {invalid_files, Files}}) -> io_lib:format("invalid files: ~p", [Files]);
+format_error({tarball, {bad_version, Vsn}}) -> io_lib:format("unsupported version: ~p", [Vsn]);
+format_error({tarball, invalid_checksum}) -> "invalid tarball checksum";
+format_error({tarball, Reason}) -> "tarball error, " ++ r3_hex_erl_tar:format_error(Reason);
+format_error({inner_tarball, Reason}) -> "inner tarball error, " ++ r3_hex_erl_tar:format_error(Reason);
+format_error({metadata, invalid_terms}) -> "error reading package metadata: invalid terms";
+format_error({metadata, not_key_value}) -> "error reading package metadata: not in key-value format";
+format_error({metadata, Reason}) -> "error reading package metadata" ++ r3_safe_erl_term:format_error(Reason);
+
+format_error({checksum_mismatch, ExpectedChecksum, ActualChecksum}) ->
+ io_lib:format(
+ "tarball checksum mismatch~n~n" ++
+ "Expected (base16-encoded): ~s~n" ++
+ "Actual (base16-encoded): ~s",
+ [encode_base16(ExpectedChecksum), encode_base16(ActualChecksum)]).
+
+%%====================================================================
+%% Internal functions
+%%====================================================================
+
+checksum(Version, MetadataBinary, ContentsBinary) ->
+ Blob = <<Version/binary, MetadataBinary/binary, ContentsBinary/binary>>,
+ crypto:hash(sha256, Blob).
+
+checksum(ContentsBinary) ->
+ Blob = <<ContentsBinary/binary>>,
+ crypto:hash(sha256, Blob).
+
+encode_metadata(Meta) ->
+ Data = lists:map(
+ fun(MetaPair) ->
+ String = io_lib_pretty:print(binarify(MetaPair), [{encoding, utf8}]),
+ unicode:characters_to_binary([String, ".\n"])
+ end, maps:to_list(Meta)),
+ iolist_to_binary(Data).
+
+do_unpack(Files, Output) ->
+ State = #{
+ checksum => undefined,
+ contents => undefined,
+ files => Files,
+ metadata => undefined,
+ output => Output
+ },
+ State1 = check_files(State),
+ State2 = check_version(State1),
+ State3 = check_checksum(State2),
+ State4 = decode_metadata(State3),
+ finish_unpack(State4).
+
+finish_unpack({error, _} = Error) ->
+ Error;
+finish_unpack(#{metadata := Metadata, files := Files, output := Output}) ->
+ _Version = maps:get("VERSION", Files),
+ Checksum = decode_base16(maps:get("CHECKSUM", Files)),
+ ContentsBinary = maps:get("contents.tar.gz", Files),
+ case unpack_tarball(ContentsBinary, Output) of
+ ok ->
+ copy_metadata_config(Output, maps:get("metadata.config", Files)),
+ {ok, #{checksum => Checksum, metadata => Metadata}};
+
+ {ok, Contents} ->
+ {ok, #{checksum => Checksum, metadata => Metadata, contents => Contents}};
+
+ {error, Reason} ->
+ {error, {inner_tarball, Reason}}
+ end.
+
+copy_metadata_config(Output, MetadataBinary) ->
+ ok = file:write_file(filename:join(Output, "hex_metadata.config"), MetadataBinary).
+
+check_files(#{files := Files} = State) ->
+ RequiredFiles = ["VERSION", "CHECKSUM", "metadata.config", "contents.tar.gz"],
+ case diff_keys(Files, RequiredFiles, []) of
+ ok ->
+ State;
+
+ {error, {missing_keys, Keys}} ->
+ {error, {tarball, {missing_files, Keys}}};
+
+ {error, {unknown_keys, Keys}} ->
+ {error, {tarball, {invalid_files, Keys}}}
+ end.
+
+check_version({error, _} = Error) ->
+ Error;
+check_version(#{files := Files} = State) ->
+ case maps:get("VERSION", Files) of
+ <<"3">> ->
+ State;
+
+ Version ->
+ {error, {tarball, {bad_version, Version}}}
+ end.
+
+check_checksum({error, _} = Error) ->
+ Error;
+check_checksum(#{files := Files} = State) ->
+ ChecksumBase16 = maps:get("CHECKSUM", Files),
+ ExpectedChecksum = decode_base16(ChecksumBase16),
+
+ Version = maps:get("VERSION", Files),
+ MetadataBinary = maps:get("metadata.config", Files),
+ ContentsBinary = maps:get("contents.tar.gz", Files),
+ ActualChecksum = checksum(Version, MetadataBinary, ContentsBinary),
+
+ if
+ byte_size(ExpectedChecksum) /= 32 ->
+ {error, {tarball, invalid_checksum}};
+
+ ExpectedChecksum == ActualChecksum ->
+ maps:put(checksum, ExpectedChecksum, State);
+
+ true ->
+ {error, {tarball, {checksum_mismatch, ExpectedChecksum, ActualChecksum}}}
+ end.
+
+decode_metadata({error, _} = Error) ->
+ Error;
+decode_metadata(#{files := #{"metadata.config" := Binary}} = State) when is_binary(Binary) ->
+ case do_decode_metadata(Binary) of
+ #{} = Metadata -> maps:put(metadata, normalize_metadata(Metadata), State);
+ Other -> Other
+ end.
+
+do_decode_metadata(Binary) when is_binary(Binary) ->
+ {ok, String} = characters_to_list(Binary),
+
+ case r3_safe_erl_term:string(String) of
+ {ok, Tokens, _Line} ->
+ try
+ Terms = r3_safe_erl_term:terms(Tokens),
+ maps:from_list(Terms)
+ catch
+ error:function_clause ->
+ {error, {metadata, invalid_terms}};
+
+ error:badarg ->
+ {error, {metadata, not_key_value}}
+ end;
+
+ {error, {_Line, r3_safe_erl_term, Reason}, _Line2} ->
+ {error, {metadata, Reason}}
+ end.
+
+characters_to_list(Binary) ->
+ case unicode:characters_to_list(Binary) of
+ List when is_list(List) ->
+ {ok, List};
+ {error, _, _} ->
+ case unicode:characters_to_list(Binary, latin1) of
+ List when is_list(List) -> {ok, List};
+ Other -> Other
+ end
+ end.
+
+normalize_metadata(Metadata1) ->
+ Metadata2 = maybe_update_with(<<"requirements">>, fun normalize_requirements/1, Metadata1),
+ Metadata3 = maybe_update_with(<<"links">>, fun try_into_map/1, Metadata2),
+ Metadata4 = maybe_update_with(<<"extra">>, fun try_into_map/1, Metadata3),
+ guess_build_tools(Metadata4).
+
+normalize_requirements(Requirements) ->
+ case is_list(Requirements) andalso (Requirements /= []) andalso is_list(hd(Requirements)) of
+ true ->
+ maps:from_list(lists:map(fun normalize_legacy_requirement/1, Requirements));
+
+ false ->
+ try_into_map(fun normalize_normal_requirement/1, Requirements)
+ end.
+
+normalize_normal_requirement({Name, Requirement}) ->
+ {Name, try_into_map(Requirement)}.
+
+normalize_legacy_requirement(Requirement) ->
+ Map = maps:from_list(Requirement),
+ Name = maps:get(<<"name">>, Map),
+ {Name, maps:without([<<"name">>], Map)}.
+
+guess_build_tools(#{<<"build_tools">> := BuildTools} = Metadata) when is_list(BuildTools) ->
+ Metadata;
+guess_build_tools(#{<<"files">> := Filenames} = Metadata) ->
+ BaseFiles = [Filename || Filename <- Filenames, filename:dirname(binary_to_list(Filename)) == "."],
+ BuildTools = lists:usort([Tool || {Filename, Tool} <- ?BUILD_TOOL_FILES, lists:member(Filename, BaseFiles)]),
+ Metadata#{<<"build_tools">> => BuildTools};
+guess_build_tools(Metadata) ->
+ Metadata.
+
+%%====================================================================
+%% Tar Helpers
+%%====================================================================
+
+unpack_tarball(ContentsBinary, memory) ->
+ r3_hex_erl_tar:extract({binary, ContentsBinary}, [memory, compressed]);
+unpack_tarball(ContentsBinary, Output) ->
+ case r3_hex_erl_tar:extract({binary, ContentsBinary}, [{cwd, Output}, compressed]) of
+ ok ->
+ [try_updating_mtime(filename:join(Output, Path)) || Path <- filelib:wildcard("**", Output)],
+ ok;
+ Other ->
+ Other
+ end.
+
+%% let it silently fail for bad symlinks
+try_updating_mtime(Path) ->
+ Time = calendar:universal_time(),
+ _ = file:write_file_info(Path, #file_info{mtime=Time}, [{time, universal}]),
+ ok.
+
+create_memory_tarball(Files) ->
+ Path = tmp_path(),
+ {ok, Tar} = r3_hex_erl_tar:open(Path, [write]),
+
+ try
+ add_files(Tar, Files)
+ after
+ ok = r3_hex_erl_tar:close(Tar)
+ end,
+ {ok, Tarball} = file:read_file(Path),
+ ok = file:delete(Path),
+ Tarball.
+
+tmp_path() ->
+ "tmp_" ++ binary_to_list(encode_base16(crypto:strong_rand_bytes(32))).
+
+add_files(Tar, Files) when is_list(Files) ->
+ lists:map(fun(File) -> add_file(Tar, File) end, Files).
+
+add_file(Tar, {Filename, Contents}) when is_list(Filename) and is_binary(Contents) ->
+ ok = r3_hex_erl_tar:add(Tar, Contents, Filename, tar_opts());
+add_file(Tar, Filename) when is_list(Filename) ->
+ add_file(Tar, {Filename, Filename});
+add_file(Tar, {Filename, AbsFilename}) when is_list(Filename), is_list(AbsFilename) ->
+ {ok, FileInfo} = file:read_link_info(AbsFilename, []),
+
+ case FileInfo#file_info.type of
+ symlink ->
+ ok = r3_hex_erl_tar:add(Tar, {Filename, AbsFilename}, tar_opts());
+ directory ->
+ case file:list_dir(AbsFilename) of
+ {ok, []} ->
+ r3_hex_erl_tar:add(Tar, {Filename, AbsFilename}, tar_opts());
+
+ {ok, _} ->
+ ok
+ end;
+ _ ->
+ Mode = FileInfo#file_info.mode,
+ {ok, Contents} = file:read_file(AbsFilename),
+ ok = r3_hex_erl_tar:add(Tar, Contents, Filename, Mode, tar_opts())
+ end.
+
+tar_opts() ->
+ NixEpoch = calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}),
+ Y2kEpoch = calendar:datetime_to_gregorian_seconds({{2000, 1, 1}, {0, 0, 0}}),
+ Epoch = Y2kEpoch - NixEpoch,
+ [{atime, Epoch}, {mtime, Epoch}, {ctime, Epoch}, {uid, 0}, {gid, 0}].
+
+%% Reproducible gzip by not setting mtime and OS
+%%
+%% From https://tools.ietf.org/html/rfc1952
+%%
+%% +---+---+---+---+---+---+---+---+---+---+
+%% |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
+%% +---+---+---+---+---+---+---+---+---+---+
+%%
+%% +=======================+
+%% |...compressed blocks...| (more-->)
+%% +=======================+
+%%
+%% +---+---+---+---+---+---+---+---+
+%% | CRC32 | ISIZE |
+%% +---+---+---+---+---+---+---+---+
+gzip(Uncompressed) ->
+ Compressed = gzip_no_header(Uncompressed),
+ Header = <<31, 139, 8, 0, 0, 0, 0, 0, 0, 0>>,
+ Crc = erlang:crc32(Uncompressed),
+ Size = byte_size(Uncompressed),
+ Trailer = <<Crc:32/little, Size:32/little>>,
+ iolist_to_binary([Header, Compressed, Trailer]).
+
+gzip_no_header(Uncompressed) ->
+ Zstream = zlib:open(),
+
+ try
+ zlib:deflateInit(Zstream, default, deflated, -15, 8, default),
+ Compressed = zlib:deflate(Zstream, Uncompressed, finish),
+ zlib:deflateEnd(Zstream),
+ iolist_to_binary(Compressed)
+ after
+ zlib:close(Zstream)
+ end.
+
+%%====================================================================
+%% Helpers
+%%====================================================================
+
+binarify(Binary) when is_binary(Binary) -> Binary;
+binarify(Number) when is_number(Number) -> Number;
+binarify(Atom) when Atom == undefined orelse is_boolean(Atom) -> Atom;
+binarify(Atom) when is_atom(Atom) -> atom_to_binary(Atom, utf8);
+binarify(List) when is_list(List) ->
+ [binarify(E) || E <- List];
+binarify({Key, Value}) ->
+ {binarify(Key), binarify(Value)};
+binarify(Map) when is_map(Map) ->
+ List = maps:to_list(Map),
+ lists:map(fun({K, V}) -> binarify({K, V}) end, List).
+
+diff_keys(Map, RequiredKeys, OptionalKeys) ->
+ Keys = maps:keys(Map),
+ MissingKeys = RequiredKeys -- Keys,
+ UnknownKeys = Keys -- (RequiredKeys ++ OptionalKeys),
+
+ case {MissingKeys, UnknownKeys} of
+ {[], []} ->
+ ok;
+
+ {_, [_ | _]} ->
+ {error, {unknown_keys, UnknownKeys}};
+
+ _ ->
+ {error, {missing_keys, MissingKeys}}
+ end.
+
+maybe_update_with(Key, Fun, Map) ->
+ case maps:find(Key, Map) of
+ {ok, Value} -> maps:put(Key, Fun(Value), Map);
+ error -> Map
+ end.
+
+try_into_map(List) ->
+ try_into_map(fun(X) -> X end, List).
+
+try_into_map(Fun, Input) ->
+ case is_list(Input) andalso lists:all(fun(E) -> is_tuple(E) andalso (tuple_size(E) == 2) end, Input) of
+ true -> maps:from_list(lists:map(Fun, Input));
+ false -> Input
+ end.
+
+encode_base16(Binary) ->
+ <<X:256/big-unsigned-integer>> = Binary,
+ String = string:to_upper(lists:flatten(io_lib:format("~64.16.0b", [X]))),
+ list_to_binary(String).
+
+%% Based on https://github.com/goj/base16/blob/master/src/base16.erl
+%% (C) 2012, Erlang Solutions Ltd.
+
+decode_base16(Base16) ->
+ << <<(unhex(H) bsl 4 + unhex(L))>> || <<H,L>> <= Base16 >>.
+
+unhex(D) when $0 =< D andalso D =< $9 ->
+ D - $0;
+unhex(D) when $a =< D andalso D =< $f ->
+ 10 + D - $a;
+unhex(D) when $A =< D andalso D =< $F ->
+ 10 + D - $A.