diff options
author | Andrew Thompson <andrew@hijacked.us> | 2014-03-05 13:39:55 -0500 |
---|---|---|
committer | Andrew Thompson <andrew@hijacked.us> | 2014-03-05 13:39:55 -0500 |
commit | 62b006227cc1c71102ff8a608b3b11e77a06fced (patch) | |
tree | 63994d7dad6a762b94a702eff009c6cc51bc5371 | |
parent | 6e24cd6ac7f8ba9b125c4898e29fa1e9a207f7d2 (diff) | |
parent | b6908421b7a83a2c2972c3263f32d438c3dc3cec (diff) |
Merge pull request #129 from tuncer/erlc-speedup-v5
Speed up the compilation process v5
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | THANKS | 1 | ||||
-rw-r--r-- | dialyzer_reference | 2 | ||||
-rw-r--r-- | src/rebar_deps.erl | 2 | ||||
-rw-r--r-- | src/rebar_erlc_compiler.erl | 467 | ||||
-rw-r--r-- | src/rebar_utils.erl | 6 | ||||
-rw-r--r-- | src/rebar_xref.erl | 2 |
8 files changed, 363 insertions, 120 deletions
@@ -4,9 +4,9 @@ rebar *.orig .*.swp rt.work -.hgignore .test dialyzer_warnings rebar.cmd .eunit deps +.rebar/* @@ -8,6 +8,7 @@ all: clean: @rm -rf rebar ebin/*.beam inttest/rt.work rt.work .eunit + @rm -f .rebarinfo distclean: clean @rm -f dialyzer_warnings @@ -120,3 +120,4 @@ Pedram Nimreezi Sylvain Benner Oliver Ferrigni Dave Thomas +Evgeniy Khramtsov diff --git a/dialyzer_reference b/dialyzer_reference index 678c38e..7fbe609 100644 --- a/dialyzer_reference +++ b/dialyzer_reference @@ -1,3 +1,3 @@ rebar_eunit.erl:434: Call to missing or unexported function eunit_test:function_wrapper/2 -rebar_utils.erl:163: Call to missing or unexported function escript:foldl/3 +rebar_utils.erl:164: Call to missing or unexported function escript:foldl/3 diff --git a/src/rebar_deps.erl b/src/rebar_deps.erl index 2e305d5..43bde04 100644 --- a/src/rebar_deps.erl +++ b/src/rebar_deps.erl @@ -304,7 +304,7 @@ get_deps_dir(Config) -> get_deps_dir(Config, ""). get_deps_dir(Config, App) -> - BaseDir = rebar_config:get_xconf(Config, base_dir, []), + BaseDir = rebar_utils:base_dir(Config), DepsDir = get_shared_deps_dir(Config, "deps"), {true, filename:join([BaseDir, DepsDir, App])}. diff --git a/src/rebar_erlc_compiler.erl b/src/rebar_erlc_compiler.erl index dbefa4a..a1740b0 100644 --- a/src/rebar_erlc_compiler.erl +++ b/src/rebar_erlc_compiler.erl @@ -36,6 +36,17 @@ -include("rebar.hrl"). -include_lib("stdlib/include/erl_compile.hrl"). +-define(ERLCINFO_VSN, 1). +-define(ERLCINFO_FILE, "erlcinfo"). +-type erlc_info_v() :: {digraph:vertex(), term()} | 'false'. +-type erlc_info_e() :: {digraph:vertex(), digraph:vertex()}. +-type erlc_info() :: {list(erlc_info_v()), list(erlc_info_e())}. +-record(erlcinfo, + { + vsn = ?ERLCINFO_VSN :: pos_integer(), + info = {[], []} :: erlc_info() + }). + %% =================================================================== %% Public API %% =================================================================== @@ -90,7 +101,7 @@ compile(Config, _AppFile) -> doterl_compile(Config, "ebin"). -spec clean(rebar_config:config(), file:filename()) -> 'ok'. -clean(_Config, _AppFile) -> +clean(Config, _AppFile) -> MibFiles = rebar_utils:find_files("mibs", "^.*\\.mib\$"), MIBs = [filename:rootname(filename:basename(MIB)) || MIB <- MibFiles], rebar_file_utils:delete_each( @@ -103,6 +114,9 @@ clean(_Config, _AppFile) -> [ binary_to_list(iolist_to_binary(re:replace(F, "\\.[x|y]rl$", ".erl"))) || F <- YrlFiles ]), + %% Delete the build graph, if any + rebar_file_utils:rm_rf(erlcinfo_file(Config)), + %% Erlang compilation is recursive, so it's possible that we have a nested %% directory structure in ebin with .beam files within. As such, we want %% to scan whatever is left in the ebin/ directory for sub-dirs which @@ -260,7 +274,7 @@ doterl_compile(Config, OutDir) -> doterl_compile(Config, OutDir, []). doterl_compile(Config, OutDir, MoreSources) -> - FirstErls = rebar_config:get_list(Config, erl_first_files, []), + ErlFirstFiles = rebar_config:get_list(Config, erl_first_files, []), ErlOpts = rebar_utils:erl_opts(Config), ?DEBUG("erl_opts ~p~n", [ErlOpts]), %% Support the src_dirs option allowing multiple directories to @@ -268,114 +282,270 @@ doterl_compile(Config, OutDir, MoreSources) -> %% eunit tests be separated from the core application source. SrcDirs = rebar_utils:src_dirs(proplists:append_values(src_dirs, ErlOpts)), RestErls = [Source || Source <- gather_src(SrcDirs, []) ++ MoreSources, - not lists:member(Source, FirstErls)], - - %% Split RestErls so that parse_transforms and behaviours are instead added - %% to erl_first_files, parse transforms first. - %% This should probably be somewhat combined with inspect_epp - [ParseTransforms, Behaviours, OtherErls] = - lists:foldl(fun(F, [A, B, C]) -> - case compile_priority(F) of - parse_transform -> - [[F | A], B, C]; - behaviour -> - [A, [F | B], C]; - callback -> - [A, [F | B], C]; - _ -> - [A, B, [F | C]] - end - end, [[], [], []], RestErls), - - NewFirstErls = FirstErls ++ ParseTransforms ++ Behaviours, - + not lists:member(Source, ErlFirstFiles)], %% Make sure that ebin/ exists and is on the path ok = filelib:ensure_dir(filename:join("ebin", "dummy.beam")), CurrPath = code:get_path(), true = code:add_path(filename:absname("ebin")), OutDir1 = proplists:get_value(outdir, ErlOpts, OutDir), - rebar_base_compiler:run(Config, NewFirstErls, OtherErls, - fun(S, C) -> - internal_erl_compile(C, S, OutDir1, ErlOpts) - end), + G = init_erlcinfo(Config, RestErls), + %% Split RestErls so that files which are depended on are treated + %% like erl_first_files. + {OtherFirstErls, OtherErls} = + lists:partition( + fun(F) -> + Children = get_children(G, F), + log_files(?FMT("Files dependent on ~s", [F]), Children), + + case erls(Children) of + [] -> + %% There are no files dependent on this file. + false; + _ -> + %% There are some files dependent on the file. + %% Thus the file has higher priority + %% and should be compiled in the first place. + true + end + end, RestErls), + %% Dependencies of OtherFirstErls that must be compiled first. + OtherFirstErlsDeps = lists:flatmap( + fun(Erl) -> erls(get_parents(G, Erl)) end, + OtherFirstErls), + %% NOTE: In case the way we retrieve OtherFirstErlsDeps or merge + %% it with OtherFirstErls does not result in the correct compile + %% priorities, or the method in use proves to be too slow for + %% certain projects, consider using a more elaborate method (maybe + %% digraph_utils) or alternatively getting and compiling the .erl + %% parents of an individual Source in internal_erl_compile. By not + %% handling this in internal_erl_compile, we also avoid extra + %% needs_compile/2 calls. + FirstErls = ErlFirstFiles ++ uo_merge(OtherFirstErlsDeps, OtherFirstErls), + ?DEBUG("Files to compile first: ~p~n", [FirstErls]), + rebar_base_compiler:run( + Config, FirstErls, OtherErls, + fun(S, C) -> + internal_erl_compile(C, S, OutDir1, ErlOpts, G) + end), true = code:set_path(CurrPath), ok. +%% +%% Return all .erl files from a list of files +%% +erls(Files) -> + [Erl || Erl <- Files, filename:extension(Erl) =:= ".erl"]. + +%% +%% Return a list without duplicates while preserving order +%% +ulist(L) -> + ulist(L, []). + +ulist([H|T], Acc) -> + case lists:member(H, T) of + true -> + ulist(T, Acc); + false -> + ulist(T, [H|Acc]) + end; +ulist([], Acc) -> + lists:reverse(Acc). + +%% +%% Merge two lists without duplicates while preserving order +%% +uo_merge(L1, L2) -> + lists:foldl(fun(E, Acc) -> u_add_element(E, Acc) end, ulist(L1), L2). + +u_add_element(Elem, [Elem|_]=Set) -> Set; +u_add_element(Elem, [E1|Set]) -> [E1|u_add_element(Elem, Set)]; +u_add_element(Elem, []) -> [Elem]. + -spec include_path(file:filename(), rebar_config:config()) -> [file:filename(), ...]. include_path(Source, Config) -> ErlOpts = rebar_config:get(Config, erl_opts, []), - ["include", filename:dirname(Source)] - ++ proplists:get_all_values(i, ErlOpts). - --spec inspect(file:filename(), - [file:filename(), ...]) -> {string(), [string()]}. -inspect(Source, IncludePath) -> - ModuleDefault = filename:basename(Source, ".erl"), - case epp:open(Source, IncludePath) of - {ok, Epp} -> - inspect_epp(Epp, Source, ModuleDefault, []); - {error, Reason} -> - ?DEBUG("Failed to inspect ~s: ~p\n", [Source, Reason]), - {ModuleDefault, []} - end. - --spec inspect_epp(pid(), file:filename(), file:filename(), - [string()]) -> {string(), [string()]}. -inspect_epp(Epp, Source, Module, Includes) -> - case epp:parse_erl_form(Epp) of - {ok, {attribute, _, module, ModInfo}} -> - ActualModuleStr = - case ModInfo of - %% Typical module name, single atom - ActualModule when is_atom(ActualModule) -> - atom_to_list(ActualModule); - %% Packag-ized module name, list of atoms - ActualModule when is_list(ActualModule) -> - string:join([atom_to_list(P) || - P <- ActualModule], "."); - %% Parameterized module name, single atom - {ActualModule, _} when is_atom(ActualModule) -> - atom_to_list(ActualModule); - %% Parameterized and packagized module name, list of atoms - {ActualModule, _} when is_list(ActualModule) -> - string:join([atom_to_list(P) || - P <- ActualModule], ".") - end, - inspect_epp(Epp, Source, ActualModuleStr, Includes); - {ok, {attribute, 1, file, {Module, 1}}} -> - inspect_epp(Epp, Source, Module, Includes); - {ok, {attribute, 1, file, {Source, 1}}} -> - inspect_epp(Epp, Source, Module, Includes); - {ok, {attribute, 1, file, {IncFile, 1}}} -> - inspect_epp(Epp, Source, Module, [IncFile | Includes]); - {eof, _} -> - epp:close(Epp), - {Module, Includes}; - _ -> - inspect_epp(Epp, Source, Module, Includes) - end. + lists:usort(["include", filename:dirname(Source)] + ++ proplists:get_all_values(i, ErlOpts)). -spec needs_compile(file:filename(), file:filename(), [string()]) -> boolean(). -needs_compile(Source, Target, Hrls) -> +needs_compile(Source, Target, Parents) -> TargetLastMod = filelib:last_modified(Target), lists:any(fun(I) -> TargetLastMod < filelib:last_modified(I) end, - [Source] ++ Hrls). + [Source] ++ Parents). + +check_erlcinfo(_Config, #erlcinfo{vsn=?ERLCINFO_VSN}) -> + ok; +check_erlcinfo(Config, #erlcinfo{vsn=Vsn}) -> + ?ABORT("~s file version is incompatible. expected: ~b got: ~b~n", + [erlcinfo_file(Config), ?ERLCINFO_VSN, Vsn]); +check_erlcinfo(Config, _) -> + ?ABORT("~s file is invalid. Please delete before next run.~n", + [erlcinfo_file(Config)]). + +erlcinfo_file(Config) -> + filename:join([rebar_utils:base_dir(Config), ".rebar", ?ERLCINFO_FILE]). + +init_erlcinfo(Config, Erls) -> + G = restore_erlcinfo(Config), + %% Get a unique list of dirs based on the source files' locations. + %% This is used for finding files in sub dirs of the configured + %% src_dirs. For example, src/sub_dir/foo.erl. + Dirs = sets:to_list(lists:foldl( + fun(Erl, Acc) -> + Dir = filename:dirname(Erl), + sets:add_element(Dir, Acc) + end, sets:new(), Erls)), + Updates = [update_erlcinfo(G, Erl, include_path(Erl, Config) ++ Dirs) + || Erl <- Erls], + Modified = lists:member(modified, Updates), + ok = store_erlcinfo(G, Config, Modified), + G. + +update_erlcinfo(G, Source, Dirs) -> + case digraph:vertex(G, Source) of + {_, LastUpdated} -> + LastModified = filelib:last_modified(Source), + if LastModified == 0 -> + %% The file doesn't exist anymore, + %% erase it from the graph. + %% All the edges will be erased automatically. + digraph:del_vertex(G, Source), + modified; + LastUpdated < LastModified -> + modify_erlcinfo(G, Source, Dirs); + modified; + true -> + unmodified + end; + false -> + modify_erlcinfo(G, Source, Dirs), + modified + end. + +modify_erlcinfo(G, Source, Dirs) -> + {ok, Fd} = file:open(Source, [read]), + Incls = parse_attrs(Fd, []), + AbsIncls = expand_file_names(Incls, Dirs), + ok = file:close(Fd), + LastUpdated = {date(), time()}, + digraph:add_vertex(G, Source, LastUpdated), + lists:foreach( + fun(Incl) -> + update_erlcinfo(G, Incl, Dirs), + digraph:add_edge(G, Source, Incl) + end, AbsIncls). + +restore_erlcinfo(Config) -> + File = erlcinfo_file(Config), + G = digraph:new(), + case file:read_file(File) of + {ok, Data} -> + try binary_to_term(Data) of + Erlcinfo -> + ok = check_erlcinfo(Config, Erlcinfo), + #erlcinfo{info=ErlcInfo} = Erlcinfo, + {Vs, Es} = ErlcInfo, + lists:foreach( + fun({V, LastUpdated}) -> + digraph:add_vertex(G, V, LastUpdated) + end, Vs), + lists:foreach( + fun({V1, V2}) -> + digraph:add_edge(G, V1, V2) + end, Es) + catch + error:badarg -> + ?ERROR( + "Failed (binary_to_term) to restore rebar info file." + " Discard file.~n", []), + ok + end; + _Err -> + ok + end, + G. + +store_erlcinfo(_G, _Config, _Modified = false) -> + ok; +store_erlcinfo(G, Config, _Modified) -> + Vs = lists:map( + fun(V) -> + digraph:vertex(G, V) + end, digraph:vertices(G)), + Es = lists:flatmap( + fun({V, _}) -> + lists:map( + fun(E) -> + {_, V1, V2, _} = digraph:edge(G, E), + {V1, V2} + end, digraph:out_edges(G, V)) + end, Vs), + File = erlcinfo_file(Config), + ok = filelib:ensure_dir(File), + Data = term_to_binary(#erlcinfo{info={Vs, Es}}, [{compressed, 9}]), + file:write_file(File, Data). + +%% NOTE: If, for example, one of the entries in Files, refers to +%% gen_server.erl, that entry will be dropped. It is dropped because +%% such an entry usually refers to the beam file, and we don't pass a +%% list of OTP src dirs for finding gen_server.erl's full path. Also, +%% if gen_server.erl was modified, it's not rebar's task to compile a +%% new version of the beam file. Therefore, it's reasonable to drop +%% such entries. Also see process_attr(behaviour, Form, Includes). +-spec expand_file_names([file:filename()], + [file:filename()]) -> [file:filename()]. +expand_file_names(Files, Dirs) -> + %% We check if Files exist by itself or within the directories + %% listed in Dirs. + %% Return the list of files matched. + lists:flatmap( + fun(Incl) -> + case filelib:is_regular(Incl) of + true -> + [Incl]; + false -> + lists:flatmap( + fun(Dir) -> + FullPath = filename:join(Dir, Incl), + case filelib:is_regular(FullPath) of + true -> + [FullPath]; + false -> + [] + end + end, Dirs) + end + end, Files). + +-spec get_parents(digraph(), file:filename()) -> [file:filename()]. +get_parents(G, Source) -> + %% Return all files which the Source depends upon. + digraph_utils:reachable_neighbours([Source], G). + +-spec get_children(digraph(), file:filename()) -> [file:filename()]. +get_children(G, Source) -> + %% Return all files dependent on the Source. + digraph_utils:reaching_neighbours([Source], G). -spec internal_erl_compile(rebar_config:config(), file:filename(), - file:filename(), list()) -> 'ok' | 'skipped'. -internal_erl_compile(Config, Source, Outdir, ErlOpts) -> + file:filename(), list(), + digraph()) -> 'ok' | 'skipped'. +internal_erl_compile(Config, Source, OutDir, ErlOpts, G) -> %% Determine the target name and includes list by inspecting the source file - {Module, Hrls} = inspect(Source, include_path(Source, Config)), + Module = filename:basename(Source, ".erl"), + Parents = get_parents(G, Source), + log_files(?FMT("~s depends on", [Source]), Parents), %% Construct the target filename - Target = filename:join([Outdir | string:tokens(Module, ".")]) ++ ".beam", + Target = filename:join([OutDir | string:tokens(Module, ".")]) ++ ".beam", ok = filelib:ensure_dir(Target), %% If the file needs compilation, based on last mod date of includes or %% the target - case needs_compile(Source, Target, Hrls) of + case needs_compile(Source, Target, Parents) of true -> Opts = [{outdir, filename:dirname(Target)}] ++ ErlOpts ++ [{i, "include"}, return], @@ -463,40 +633,97 @@ delete_dir(Dir, Subdirs) -> lists:foreach(fun(D) -> delete_dir(D, dirs(D)) end, Subdirs), file:del_dir(Dir). --spec compile_priority(file:filename()) -> 'normal' | 'behaviour' | - 'callback' | - 'parse_transform'. -compile_priority(File) -> - case epp_dodger:parse_file(File) of - {error, _} -> - normal; % couldn't parse the file, default priority - {ok, Trees} -> - F2 = fun({tree,arity_qualifier,_, - {arity_qualifier,{tree,atom,_,behaviour_info}, - {tree,integer,_,1}}}, _) -> - behaviour; - ({tree,arity_qualifier,_, - {arity_qualifier,{tree,atom,_,parse_transform}, - {tree,integer,_,2}}}, _) -> - parse_transform; - (_, Acc) -> - Acc - end, - - F = fun({tree, attribute, _, - {attribute, {tree, atom, _, export}, - [{tree, list, _, {list, List, none}}]}}, Acc) -> - lists:foldl(F2, Acc, List); - ({tree, attribute, _, - {attribute, {tree, atom, _, callback},_}}, _Acc) -> - callback; - (_, Acc) -> - Acc - end, +parse_attrs(Fd, Includes) -> + case io:parse_erl_form(Fd, "") of + {ok, Form, _Line} -> + case erl_syntax:type(Form) of + attribute -> + NewIncludes = process_attr(Form, Includes), + parse_attrs(Fd, NewIncludes); + _ -> + parse_attrs(Fd, Includes) + end; + {eof, _} -> + Includes; + _Err -> + parse_attrs(Fd, Includes) + end. - lists:foldl(F, normal, Trees) +process_attr(Form, Includes) -> + try + AttrName = erl_syntax:atom_value(erl_syntax:attribute_name(Form)), + process_attr(AttrName, Form, Includes) + catch _:_ -> + %% TODO: We should probably try to be more specific here + %% and not suppress all errors. + Includes end. +process_attr(import, Form, Includes) -> + case erl_syntax_lib:analyze_import_attribute(Form) of + {Mod, _Funs} -> + [atom_to_list(Mod) ++ ".erl"|Includes]; + Mod -> + [atom_to_list(Mod) ++ ".erl"|Includes] + end; +process_attr(file, Form, Includes) -> + {File, _} = erl_syntax_lib:analyze_file_attribute(Form), + [File|Includes]; +process_attr(include, Form, Includes) -> + [FileNode] = erl_syntax:attribute_arguments(Form), + File = erl_syntax:string_value(FileNode), + [File|Includes]; +process_attr(include_lib, Form, Includes) -> + [FileNode] = erl_syntax:attribute_arguments(Form), + RawFile = erl_syntax:string_value(FileNode), + File = maybe_expand_include_lib_path(RawFile), + [File|Includes]; +process_attr(behaviour, Form, Includes) -> + [FileNode] = erl_syntax:attribute_arguments(Form), + File = erl_syntax:atom_name(FileNode) ++ ".erl", + [File|Includes]; +process_attr(compile, Form, Includes) -> + [Arg] = erl_syntax:attribute_arguments(Form), + case erl_syntax:concrete(Arg) of + {parse_transform, Mod} -> + [atom_to_list(Mod) ++ ".erl"|Includes]; + {core_transform, Mod} -> + [atom_to_list(Mod) ++ ".erl"|Includes]; + L when is_list(L) -> + lists:foldl( + fun({parse_transform, M}, Acc) -> + [atom_to_list(M) ++ ".erl"|Acc]; + ({core_transform, M}, Acc) -> + [atom_to_list(M) ++ ".erl"|Acc]; + (_, Acc) -> + Acc + end, Includes, L) + end. + +%% Given the filename from an include_lib attribute, if the path +%% exists, return unmodified, or else get the absolute ERL_LIBS +%% path. +maybe_expand_include_lib_path(File) -> + case filelib:is_regular(File) of + true -> + File; + false -> + expand_include_lib_path(File) + end. + +%% Given a path like "stdlib/include/erl_compile.hrl", return +%% "OTP_INSTALL_DIR/lib/erlang/lib/stdlib-x.y.z/include/erl_compile.hrl". +%% Usually a simple [Lib, SubDir, File1] = filename:split(File) should +%% work, but to not crash when an unusual include_lib path is used, +%% utilize more elaborate logic. +expand_include_lib_path(File) -> + File1 = filename:basename(File), + Split = filename:split(filename:dirname(File)), + Lib = hd(Split), + SubDir = filename:join(tl(Split)), + Dir = code:lib_dir(list_to_atom(Lib), list_to_atom(SubDir)), + filename:join(Dir, File1). + %% %% Ensure all files in a list are present and abort if one is missing %% @@ -509,3 +736,13 @@ check_file(File) -> false -> ?ABORT("File ~p is missing, aborting\n", [File]); true -> File end. + +%% Print prefix followed by list of files. If the list is empty, print +%% on the same line, otherwise use a separate line. +log_files(Prefix, Files) -> + case Files of + [] -> + ?DEBUG("~s: ~p~n", [Prefix, Files]); + _ -> + ?DEBUG("~s:~n~p~n", [Prefix, Files]) + end. diff --git a/src/rebar_utils.erl b/src/rebar_utils.erl index 618427f..2d227b6 100644 --- a/src/rebar_utils.erl +++ b/src/rebar_utils.erl @@ -52,6 +52,7 @@ erl_opts/1, src_dirs/1, ebin_dir/0, + base_dir/1, processing_base_dir/1, processing_base_dir/2]). -include("rebar.hrl"). @@ -307,12 +308,15 @@ src_dirs(SrcDirs) -> ebin_dir() -> filename:join(get_cwd(), "ebin"). +base_dir(Config) -> + rebar_config:get_xconf(Config, base_dir). + processing_base_dir(Config) -> Cwd = rebar_utils:get_cwd(), processing_base_dir(Config, Cwd). processing_base_dir(Config, Dir) -> - Dir =:= rebar_config:get_xconf(Config, base_dir). + Dir =:= base_dir(Config). %% ==================================================================== %% Internal functions diff --git a/src/rebar_xref.erl b/src/rebar_xref.erl index 91fb60d..16e8cc4 100644 --- a/src/rebar_xref.erl +++ b/src/rebar_xref.erl @@ -146,7 +146,7 @@ code_path(Config) -> %% functions, even though those functions are present as part %% of compilation. H/t to @dluna. Long term we should tie more %% properly into the overall compile code path if possible. - BaseDir = rebar_config:get_xconf(Config, base_dir), + BaseDir = rebar_utils:base_dir(Config), [P || P <- code:get_path() ++ rebar_config:get(Config, xref_extra_paths, []) ++ [filename:join(BaseDir, filename:join(SubDir, "ebin")) |