From 209c02ec57c2cc3207ee0174c3af3675b8dc8f79 Mon Sep 17 00:00:00 2001 From: Bryan Paxton <39971740+starbelly@users.noreply.github.com> Date: Thu, 7 Mar 2019 10:54:01 -0600 Subject: Fix fetching of private packages from orgs on hex repos (#2020) - vendor in hex_core at v0.5.0 - Change where repo_name should be the org and not the parent - Changed rebar_utils:url_append_path/2 to not explicitly add a '?', this is returned in the Query chunk by http_uri:parse/1 (e.g., "?foo=bar") - update organization_merging test to expect the sub-repo as the repo_name - Add tests for rebar_utils:url_append_path/2 - Stop referencing/setting "organization" in config and use new organization settings (api_repository and repo_organization) - Do not set (assume) the read key is valid for any/every repo - Set repo_organization and api_repository to org - Update tests to check for new config opts --- src/r3_hex_tarball.erl | 507 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 507 insertions(+) create mode 100644 src/r3_hex_tarball.erl (limited to 'src/r3_hex_tarball.erl') diff --git a/src/r3_hex_tarball.erl b/src/r3_hex_tarball.erl new file mode 100644 index 0000000..dd2c77a --- /dev/null +++ b/src/r3_hex_tarball.erl @@ -0,0 +1,507 @@ +%% Vendored from hex_core v0.5.0, do not edit manually + +-module(r3_hex_tarball). +-export([create/2, create_docs/1, unpack/2, format_checksum/1, format_error/1]). +-ifdef(TEST). +-export([do_decode_metadata/1, gzip/1, normalize_requirements/1]). +-endif. +-define(VERSION, <<"3">>). +-define(TARBALL_MAX_SIZE, 8 * 1024 * 1024). +-define(TARBALL_MAX_UNCOMPRESSED_SIZE, 64 * 1024 * 1024). +-define(BUILD_TOOL_FILES, [ + {<<"mix.exs">>, <<"mix">>}, + {<<"rebar.config">>, <<"rebar3">>}, + {<<"rebar">>, <<"rebar3">>}, + {<<"Makefile">>, <<"make">>}, + {<<"Makefile.win">>, <<"make">>} +]). +-include_lib("kernel/include/file.hrl"). + +-type checksum() :: binary(). +-type contents() :: #{filename() => binary()}. +-type filename() :: string(). +-type files() :: [filename() | {filename(), filename()}] | contents(). +-type metadata() :: map(). +-type tarball() :: binary(). + +%%==================================================================== +%% API functions +%%==================================================================== + +%% @doc +%% Creates a package tarball. +%% +%% Examples: +%% +%% ``` +%% > Metadata = #{<<"name">> => <<"foo">>, <<"version">> => <<"1.0.0">>}, +%% > Files = [{"src/foo.erl", <<"-module(foo).">>}], +%% > {ok, {Tarball, Checksum}} = r3_hex_tarball:create(Metadata, Files). +%% > Tarball. +%% <<86,69,...>> +%% > Checksum. +%% <<40,32,...>> +%% ''' +%% @end +-spec create(metadata(), files()) -> {ok, {tarball(), checksum()}}. +create(Metadata, Files) -> + MetadataBinary = encode_metadata(Metadata), + ContentsTarball = create_memory_tarball(Files), + ContentsTarballCompressed = gzip(ContentsTarball), + Checksum = checksum(?VERSION, MetadataBinary, ContentsTarballCompressed), + ChecksumBase16 = encode_base16(Checksum), + + OuterFiles = [ + {"VERSION", ?VERSION}, + {"CHECKSUM", ChecksumBase16}, + {"metadata.config", MetadataBinary}, + {"contents.tar.gz", ContentsTarballCompressed} + ], + + Tarball = create_memory_tarball(OuterFiles), + + UncompressedSize = byte_size(ContentsTarball), + + case(byte_size(Tarball) > ?TARBALL_MAX_SIZE) or (UncompressedSize > ?TARBALL_MAX_UNCOMPRESSED_SIZE) of + true -> + {error, {tarball, too_big}}; + + false -> + {ok, {Tarball, Checksum}} + end. + +%% @doc +%% Creates a docs tarball. +%% +%% Examples: +%% +%% ``` +%% > Files = [{"doc/index.html", <<"Docs">>}], +%% > {ok, {Tarball, Checksum}} = r3_hex_tarball:create_docs(Files). +%% > Tarball. +%% %%=> <<86,69,...>> +%% > Checksum. +%% %%=> <<40,32,...>> +%% ''' +%% @end +-spec create_docs(files()) -> {ok, {tarball(), checksum()}}. +create_docs(Files) -> + UncompressedTarball = create_memory_tarball(Files), + UncompressedSize = byte_size(UncompressedTarball), + Tarball = gzip(UncompressedTarball), + Checksum = checksum(Tarball), + Size = byte_size(Tarball), + + case(Size > ?TARBALL_MAX_SIZE) or (UncompressedSize > ?TARBALL_MAX_UNCOMPRESSED_SIZE) of + true -> + {error, {tarball, too_big}}; + + false -> + {ok, {Tarball, Checksum}} + end. + +%% @doc +%% Unpacks a package tarball. +%% +%% Examples: +%% +%% ``` +%% > r3_hex_tarball:unpack(Tarball, memory). +%% {ok,#{checksum => <<...>>, +%% contents => [{"src/foo.erl",<<"-module(foo).">>}], +%% metadata => #{<<"name">> => <<"foo">>, ...}}} +%% +%% > r3_hex_tarball:unpack(Tarball, "path/to/unpack"). +%% {ok,#{checksum => <<...>>, +%% metadata => #{<<"name">> => <<"foo">>, ...}}} +%% ''' +-spec unpack(tarball(), memory) -> + {ok, #{checksum => checksum(), metadata => metadata(), contents => contents()}} | + {error, term()}; + (tarball(), filename()) -> + {ok, #{checksum => checksum(), metadata => metadata()}} | + {error, term()}. +unpack(Tarball, _) when byte_size(Tarball) > ?TARBALL_MAX_SIZE -> + {error, {tarball, too_big}}; + +unpack(Tarball, Output) -> + case r3_hex_erl_tar:extract({binary, Tarball}, [memory]) of + {ok, []} -> + {error, {tarball, empty}}; + + {ok, FileList} -> + do_unpack(maps:from_list(FileList), Output); + + {error, Reason} -> + {error, {tarball, Reason}} + end. + +%% @doc +%% Returns base16-encoded representation of checksum. +-spec format_checksum(checksum()) -> binary(). +format_checksum(Checksum) -> + encode_base16(Checksum). + +%% @doc +%% Converts an error reason term to a human-readable error message string. +-spec format_error(term()) -> string(). +format_error({tarball, empty}) -> "empty tarball"; +format_error({tarball, too_big}) -> "tarball is too big"; +format_error({tarball, {missing_files, Files}}) -> io_lib:format("missing files: ~p", [Files]); +format_error({tarball, {invalid_files, Files}}) -> io_lib:format("invalid files: ~p", [Files]); +format_error({tarball, {bad_version, Vsn}}) -> io_lib:format("unsupported version: ~p", [Vsn]); +format_error({tarball, invalid_checksum}) -> "invalid tarball checksum"; +format_error({tarball, Reason}) -> "tarball error, " ++ r3_hex_erl_tar:format_error(Reason); +format_error({inner_tarball, Reason}) -> "inner tarball error, " ++ r3_hex_erl_tar:format_error(Reason); +format_error({metadata, invalid_terms}) -> "error reading package metadata: invalid terms"; +format_error({metadata, not_key_value}) -> "error reading package metadata: not in key-value format"; +format_error({metadata, Reason}) -> "error reading package metadata" ++ r3_safe_erl_term:format_error(Reason); + +format_error({checksum_mismatch, ExpectedChecksum, ActualChecksum}) -> + io_lib:format( + "tarball checksum mismatch~n~n" ++ + "Expected (base16-encoded): ~s~n" ++ + "Actual (base16-encoded): ~s", + [encode_base16(ExpectedChecksum), encode_base16(ActualChecksum)]). + +%%==================================================================== +%% Internal functions +%%==================================================================== + +checksum(Version, MetadataBinary, ContentsBinary) -> + Blob = <>, + crypto:hash(sha256, Blob). + +checksum(ContentsBinary) -> + Blob = <>, + crypto:hash(sha256, Blob). + +encode_metadata(Meta) -> + Data = lists:map( + fun(MetaPair) -> + String = io_lib_pretty:print(binarify(MetaPair), [{encoding, utf8}]), + unicode:characters_to_binary([String, ".\n"]) + end, maps:to_list(Meta)), + iolist_to_binary(Data). + +do_unpack(Files, Output) -> + State = #{ + checksum => undefined, + contents => undefined, + files => Files, + metadata => undefined, + output => Output + }, + State1 = check_files(State), + State2 = check_version(State1), + State3 = check_checksum(State2), + State4 = decode_metadata(State3), + finish_unpack(State4). + +finish_unpack({error, _} = Error) -> + Error; +finish_unpack(#{metadata := Metadata, files := Files, output := Output}) -> + _Version = maps:get("VERSION", Files), + Checksum = decode_base16(maps:get("CHECKSUM", Files)), + ContentsBinary = maps:get("contents.tar.gz", Files), + case unpack_tarball(ContentsBinary, Output) of + ok -> + copy_metadata_config(Output, maps:get("metadata.config", Files)), + {ok, #{checksum => Checksum, metadata => Metadata}}; + + {ok, Contents} -> + {ok, #{checksum => Checksum, metadata => Metadata, contents => Contents}}; + + {error, Reason} -> + {error, {inner_tarball, Reason}} + end. + +copy_metadata_config(Output, MetadataBinary) -> + ok = file:write_file(filename:join(Output, "hex_metadata.config"), MetadataBinary). + +check_files(#{files := Files} = State) -> + RequiredFiles = ["VERSION", "CHECKSUM", "metadata.config", "contents.tar.gz"], + case diff_keys(Files, RequiredFiles, []) of + ok -> + State; + + {error, {missing_keys, Keys}} -> + {error, {tarball, {missing_files, Keys}}}; + + {error, {unknown_keys, Keys}} -> + {error, {tarball, {invalid_files, Keys}}} + end. + +check_version({error, _} = Error) -> + Error; +check_version(#{files := Files} = State) -> + case maps:get("VERSION", Files) of + <<"3">> -> + State; + + Version -> + {error, {tarball, {bad_version, Version}}} + end. + +check_checksum({error, _} = Error) -> + Error; +check_checksum(#{files := Files} = State) -> + ChecksumBase16 = maps:get("CHECKSUM", Files), + ExpectedChecksum = decode_base16(ChecksumBase16), + + Version = maps:get("VERSION", Files), + MetadataBinary = maps:get("metadata.config", Files), + ContentsBinary = maps:get("contents.tar.gz", Files), + ActualChecksum = checksum(Version, MetadataBinary, ContentsBinary), + + if + byte_size(ExpectedChecksum) /= 32 -> + {error, {tarball, invalid_checksum}}; + + ExpectedChecksum == ActualChecksum -> + maps:put(checksum, ExpectedChecksum, State); + + true -> + {error, {tarball, {checksum_mismatch, ExpectedChecksum, ActualChecksum}}} + end. + +decode_metadata({error, _} = Error) -> + Error; +decode_metadata(#{files := #{"metadata.config" := Binary}} = State) when is_binary(Binary) -> + case do_decode_metadata(Binary) of + #{} = Metadata -> maps:put(metadata, normalize_metadata(Metadata), State); + Other -> Other + end. + +do_decode_metadata(Binary) when is_binary(Binary) -> + {ok, String} = characters_to_list(Binary), + + case r3_safe_erl_term:string(String) of + {ok, Tokens, _Line} -> + try + Terms = r3_safe_erl_term:terms(Tokens), + maps:from_list(Terms) + catch + error:function_clause -> + {error, {metadata, invalid_terms}}; + + error:badarg -> + {error, {metadata, not_key_value}} + end; + + {error, {_Line, r3_safe_erl_term, Reason}, _Line2} -> + {error, {metadata, Reason}} + end. + +characters_to_list(Binary) -> + case unicode:characters_to_list(Binary) of + List when is_list(List) -> + {ok, List}; + {error, _, _} -> + case unicode:characters_to_list(Binary, latin1) of + List when is_list(List) -> {ok, List}; + Other -> Other + end + end. + +normalize_metadata(Metadata1) -> + Metadata2 = maybe_update_with(<<"requirements">>, fun normalize_requirements/1, Metadata1), + Metadata3 = maybe_update_with(<<"links">>, fun try_into_map/1, Metadata2), + Metadata4 = maybe_update_with(<<"extra">>, fun try_into_map/1, Metadata3), + guess_build_tools(Metadata4). + +normalize_requirements(Requirements) -> + case is_list(Requirements) andalso (Requirements /= []) andalso is_list(hd(Requirements)) of + true -> + maps:from_list(lists:map(fun normalize_legacy_requirement/1, Requirements)); + + false -> + try_into_map(fun normalize_normal_requirement/1, Requirements) + end. + +normalize_normal_requirement({Name, Requirement}) -> + {Name, try_into_map(Requirement)}. + +normalize_legacy_requirement(Requirement) -> + Map = maps:from_list(Requirement), + Name = maps:get(<<"name">>, Map), + {Name, maps:without([<<"name">>], Map)}. + +guess_build_tools(#{<<"build_tools">> := BuildTools} = Metadata) when is_list(BuildTools) -> + Metadata; +guess_build_tools(#{<<"files">> := Filenames} = Metadata) -> + BaseFiles = [Filename || Filename <- Filenames, filename:dirname(binary_to_list(Filename)) == "."], + BuildTools = lists:usort([Tool || {Filename, Tool} <- ?BUILD_TOOL_FILES, lists:member(Filename, BaseFiles)]), + Metadata#{<<"build_tools">> => BuildTools}; +guess_build_tools(Metadata) -> + Metadata. + +%%==================================================================== +%% Tar Helpers +%%==================================================================== + +unpack_tarball(ContentsBinary, memory) -> + r3_hex_erl_tar:extract({binary, ContentsBinary}, [memory, compressed]); +unpack_tarball(ContentsBinary, Output) -> + case r3_hex_erl_tar:extract({binary, ContentsBinary}, [{cwd, Output}, compressed]) of + ok -> + [try_updating_mtime(filename:join(Output, Path)) || Path <- filelib:wildcard("**", Output)], + ok; + Other -> + Other + end. + +%% let it silently fail for bad symlinks +try_updating_mtime(Path) -> + Time = calendar:universal_time(), + _ = file:write_file_info(Path, #file_info{mtime=Time}, [{time, universal}]), + ok. + +create_memory_tarball(Files) -> + Path = tmp_path(), + {ok, Tar} = r3_hex_erl_tar:open(Path, [write]), + + try + add_files(Tar, Files) + after + ok = r3_hex_erl_tar:close(Tar) + end, + {ok, Tarball} = file:read_file(Path), + ok = file:delete(Path), + Tarball. + +tmp_path() -> + "tmp_" ++ binary_to_list(encode_base16(crypto:strong_rand_bytes(32))). + +add_files(Tar, Files) when is_list(Files) -> + lists:map(fun(File) -> add_file(Tar, File) end, Files). + +add_file(Tar, {Filename, Contents}) when is_list(Filename) and is_binary(Contents) -> + ok = r3_hex_erl_tar:add(Tar, Contents, Filename, tar_opts()); +add_file(Tar, Filename) when is_list(Filename) -> + add_file(Tar, {Filename, Filename}); +add_file(Tar, {Filename, AbsFilename}) when is_list(Filename), is_list(AbsFilename) -> + {ok, FileInfo} = file:read_link_info(AbsFilename, []), + + case FileInfo#file_info.type of + symlink -> + ok = r3_hex_erl_tar:add(Tar, {Filename, AbsFilename}, tar_opts()); + directory -> + case file:list_dir(AbsFilename) of + {ok, []} -> + r3_hex_erl_tar:add(Tar, {Filename, AbsFilename}, tar_opts()); + + {ok, _} -> + ok + end; + _ -> + Mode = FileInfo#file_info.mode, + {ok, Contents} = file:read_file(AbsFilename), + ok = r3_hex_erl_tar:add(Tar, Contents, Filename, Mode, tar_opts()) + end. + +tar_opts() -> + NixEpoch = calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}), + Y2kEpoch = calendar:datetime_to_gregorian_seconds({{2000, 1, 1}, {0, 0, 0}}), + Epoch = Y2kEpoch - NixEpoch, + [{atime, Epoch}, {mtime, Epoch}, {ctime, Epoch}, {uid, 0}, {gid, 0}]. + +%% Reproducible gzip by not setting mtime and OS +%% +%% From https://tools.ietf.org/html/rfc1952 +%% +%% +---+---+---+---+---+---+---+---+---+---+ +%% |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) +%% +---+---+---+---+---+---+---+---+---+---+ +%% +%% +=======================+ +%% |...compressed blocks...| (more-->) +%% +=======================+ +%% +%% +---+---+---+---+---+---+---+---+ +%% | CRC32 | ISIZE | +%% +---+---+---+---+---+---+---+---+ +gzip(Uncompressed) -> + Compressed = gzip_no_header(Uncompressed), + Header = <<31, 139, 8, 0, 0, 0, 0, 0, 0, 0>>, + Crc = erlang:crc32(Uncompressed), + Size = byte_size(Uncompressed), + Trailer = <>, + iolist_to_binary([Header, Compressed, Trailer]). + +gzip_no_header(Uncompressed) -> + Zstream = zlib:open(), + + try + zlib:deflateInit(Zstream, default, deflated, -15, 8, default), + Compressed = zlib:deflate(Zstream, Uncompressed, finish), + zlib:deflateEnd(Zstream), + iolist_to_binary(Compressed) + after + zlib:close(Zstream) + end. + +%%==================================================================== +%% Helpers +%%==================================================================== + +binarify(Binary) when is_binary(Binary) -> Binary; +binarify(Number) when is_number(Number) -> Number; +binarify(Atom) when Atom == undefined orelse is_boolean(Atom) -> Atom; +binarify(Atom) when is_atom(Atom) -> atom_to_binary(Atom, utf8); +binarify(List) when is_list(List) -> + [binarify(E) || E <- List]; +binarify({Key, Value}) -> + {binarify(Key), binarify(Value)}; +binarify(Map) when is_map(Map) -> + List = maps:to_list(Map), + lists:map(fun({K, V}) -> binarify({K, V}) end, List). + +diff_keys(Map, RequiredKeys, OptionalKeys) -> + Keys = maps:keys(Map), + MissingKeys = RequiredKeys -- Keys, + UnknownKeys = Keys -- (RequiredKeys ++ OptionalKeys), + + case {MissingKeys, UnknownKeys} of + {[], []} -> + ok; + + {_, [_ | _]} -> + {error, {unknown_keys, UnknownKeys}}; + + _ -> + {error, {missing_keys, MissingKeys}} + end. + +maybe_update_with(Key, Fun, Map) -> + case maps:find(Key, Map) of + {ok, Value} -> maps:put(Key, Fun(Value), Map); + error -> Map + end. + +try_into_map(List) -> + try_into_map(fun(X) -> X end, List). + +try_into_map(Fun, Input) -> + case is_list(Input) andalso lists:all(fun(E) -> is_tuple(E) andalso (tuple_size(E) == 2) end, Input) of + true -> maps:from_list(lists:map(Fun, Input)); + false -> Input + end. + +encode_base16(Binary) -> + <> = Binary, + String = string:to_upper(lists:flatten(io_lib:format("~64.16.0b", [X]))), + list_to_binary(String). + +%% Based on https://github.com/goj/base16/blob/master/src/base16.erl +%% (C) 2012, Erlang Solutions Ltd. + +decode_base16(Base16) -> + << <<(unhex(H) bsl 4 + unhex(L))>> || <> <= Base16 >>. + +unhex(D) when $0 =< D andalso D =< $9 -> + D - $0; +unhex(D) when $a =< D andalso D =< $f -> + 10 + D - $a; +unhex(D) when $A =< D andalso D =< $F -> + 10 + D - $A. -- cgit v1.1