From 09284a8af93e742798ca61d64fb8699e18e53207 Mon Sep 17 00:00:00 2001 From: Fred Hebert Date: Fri, 13 Oct 2017 10:09:46 -0400 Subject: Avoid guessing on utf8 decoding of app files Rather than trying one method and then the other, allow the caller to specify the encoding of the expected file. All other schemes are risky and won't work well. Rollback the function's default interface to the binary format in case any plugin used it for non-unicode content, preserving backwards compat. --- src/rebar_file_utils.erl | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/rebar_file_utils.erl') diff --git a/src/rebar_file_utils.erl b/src/rebar_file_utils.erl index 4a783f2..b0755ed 100644 --- a/src/rebar_file_utils.erl +++ b/src/rebar_file_utils.erl @@ -35,6 +35,7 @@ mv/2, delete_each/1, write_file_if_contents_differ/2, + write_file_if_contents_differ/3, system_tmpdir/0, system_tmpdir/1, reset_dir/1, @@ -334,15 +335,19 @@ delete_each([File | Rest]) -> ?FAIL end. +%% @doc backwards compat layer to pre-utf8 support write_file_if_contents_differ(Filename, Bytes) -> - %% first try to convert directly to binaries, - %% but if it fails, we likely contain unicode and - %% need special treatment - ToWrite = try - iolist_to_binary(Bytes) - catch - error:badarg -> unicode:characters_to_binary(Bytes) - end, + write_file_if_contents_differ(Filename, Bytes, raw). + +%% @doc let the user pick the encoding required; there are no good +%% heuristics for data encoding +write_file_if_contents_differ(Filename, Bytes, raw) -> + write_file_if_contents_differ_(Filename, iolist_to_binary(Bytes)); +write_file_if_contents_differ(Filename, Bytes, utf8) -> + write_file_if_contents_differ_(Filename, unicode:characters_to_binary(Bytes, utf8)). + +%% @private compare raw strings and check contents +write_file_if_contents_differ_(Filename, ToWrite) -> case file:read_file(Filename) of {ok, ToWrite} -> ok; -- cgit v1.1