%%% Copyright (c) 2014-2016, NORDUnet A/S.
%%% See LICENSE for licensing information.

-module(x509).
-export([normalise_chain/2, cert_string/1, read_pemfiles_from_dir/1,
         self_signed/1, detox/2]).
-include_lib("public_key/include/public_key.hrl").
-include_lib("eunit/include/eunit.hrl").
-import(lists, [nth/2, filter/2]).

-type reason() :: {chain_too_long |
                   root_unknown |
                   signature_mismatch |
                   encoding_invalid}.

-define(MAX_CHAIN_LENGTH, 10).
-define(LEAF_POISON_OID, {1,3,6,1,4,1,11129,2,4,3}).
-define(LEAF_POISON_VAL, [5,0]).
-define(CA_POISON_OID, {1,3,6,1,4,1,11129,2,4,4}).

-spec normalise_chain([binary()], [binary()]) -> {ok, [binary()]} |
                                                 {error, reason()}.
normalise_chain(AcceptableRootCerts, CertChain) ->
    case normalise_chain(AcceptableRootCerts, CertChain, ?MAX_CHAIN_LENGTH) of
        {false, Reason} ->
            {error, Reason};
        {true, Root} ->
            {ok, CertChain ++ Root}
    end.

-spec cert_string(binary()) -> string().
cert_string(Der) ->
    mochihex:to_hex(crypto:hash(sha, Der)).

-spec read_pemfiles_from_dir(file:filename()) -> [binary()].
%% @doc Reading certificates from files. Flattening the result -- all
%% certs in all files are returned in a single list.
read_pemfiles_from_dir(Dir) ->
    case file:list_dir(Dir) of
        {error, enoent} ->
            lager:error("directory does not exist: ~p", [Dir]),
            [];
        {error, Reason} ->
            lager:error("unable to read directory ~p: ~p", [Dir, Reason]),
            [];
        {ok, Filenames} ->
            Files = lists:filter(
                      fun(F) -> string:equal(".pem", filename:extension(F)) end,
                      Filenames),
            ders_from_pemfiles(Dir, Files)
    end.

-spec self_signed([binary()]) -> [binary()].
%% @doc Return a list of certs in L that are self signed.
self_signed(L) ->
    lists:filter(fun(Cert) -> signed_by_p(Cert, Cert) end, L).

-spec detox(binary(), [binary()]) -> {binary(), binary()}.
%% @doc Return a detoxed LeafDer and its issuer.
detox(LeafDer, ChainDer) ->
    detox_precert(LeafDer, nth(1, ChainDer), nth(2, ChainDer)).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Private functions.

-spec normalise_chain([binary()], [binary()], integer()) ->
                             {false, reason()} | {true, [binary()]}.
%% @doc Verify that the leaf cert or precert has a valid chain back to
%% an acceptable root cert. The order of certificates in the second
%% argument is: leaf cert in head, chain in tail. Order of first
%% argument is irrelevant.
%%
%% Return {false, Reason} or {true, ListWithRoot}. Note that
%% ListWithRoot allways contain exactly one element -- a CA cert from
%% first argument (AcceptableRootCerts) signing the root of the
%% chain. FIXME: Any point in returning this as a list?
normalise_chain(_, _, MaxChainLength) when MaxChainLength =< 0 ->
    %% Chain too long.
    {false, chain_too_long};
normalise_chain(AcceptableRootCerts, [TopCert], MaxChainLength) ->
    %% Check root of chain.
    case lists:member(TopCert, AcceptableRootCerts) of
        true ->
            %% Top cert is part of chain.
            {true, [TopCert]};
        false when MaxChainLength =< 1 ->
            %% Chain too long.
            {false, chain_too_long};
        false ->
            %% Top cert _might_ be signed by a cert in truststore.
            case signer(TopCert, AcceptableRootCerts) of
                notfound -> {false, root_unknown};
                Root -> {true, [Root]}
            end
    end;
normalise_chain(AcceptableRootCerts, [BottomCert|Rest], MaxChainLength) ->
    case signed_by_p(BottomCert, hd(Rest)) of
        true -> normalise_chain(AcceptableRootCerts, Rest, MaxChainLength - 1);
        false -> {false, signature_mismatch}
    end.

-spec signer(binary(), [binary()]) -> notfound | binary().
%% @doc Return first cert in list signing Cert, or notfound. NOTE:
%% This is potentially expensive. It'd be more efficient to search for
%% Cert.issuer in a list of Issuer.subject's. If so, maybe make the
%% matching somewhat fuzzy unless that too is expensive.
signer(_Cert, []) ->
    notfound;
signer(Cert, [H|T]) ->
    case signed_by_p(Cert, H) of
        true ->
            H;
        false ->
            signer(Cert, T)
    end.

-spec encoded_tbs_cert(binary()) -> binary().
%% Code from pubkey_cert:encoded_tbs_cert/1.
encoded_tbs_cert(DerCert) ->
    {ok, PKIXCert} =
	'OTP-PUB-KEY':decode_TBSCert_exclusive(DerCert),
    {'Certificate', {'Certificate_tbsCertificate', EncodedTBSCert}, _, _} =
        PKIXCert,
    EncodedTBSCert.

-spec decode_cert(binary()) -> #'Certificate'{} | error.
decode_cert(Der) ->
    case (catch public_key:pkix_decode_cert(Der, plain)) of
        #'Certificate'{} = Cert ->
            Cert;
        {'EXIT', Reason} ->
            lager:info("invalid certificate: ~p: ~p", [cert_string(Der), Reason]),
            dump_unparsable_cert(Der),
            error;
        Unknown ->
            lager:info("unknown error decoding cert: ~p: ~p",
                       [cert_string(Der), Unknown]),
            error
    end.

parsable_cert_p(Der) ->
    case decode_cert(Der) of
        error ->
            false;
        _ ->
            true
    end.

%% @doc Is Cert signed by Issuer? Only verify that the signature
%% matches and don't check things like Cert.issuer == Issuer.subject.
-spec signed_by_p(binary(), binary()) -> boolean().
signed_by_p(SubjectDer, IssuerDer) ->
    SubjectCert = decode_cert(SubjectDer),
    IssuerCert = decode_cert(IssuerDer),

    case {SubjectCert, IssuerCert} of
        {#'Certificate'{},
         #'Certificate'{tbsCertificate =
                            #'TBSCertificate'{subjectPublicKeyInfo =
                                                  IssuerSPKI}}} ->
            %% Dig out digest, digest type and signature from subject cert and
            %% verify signature.
            case extract_verify_data(decode_cert(SubjectDer), SubjectDer) of
                error ->
                    false;
                {ok, SubjectData} ->
                    verify_sig(IssuerSPKI, SubjectData)
            end;
        _ ->
            false
    end.

verify_sig(IssuerSPKI, {DigestOrPlainText, DigestType, Signature}) ->
    {Alg, Params, Key0} = catlfish_compat:unpack_issuer(IssuerSPKI),
    KeyType = pubkey_cert_records:supportedPublicKeyAlgorithms(Alg),
    IssuerKey =
        case KeyType of
            'RSAPublicKey' ->
                public_key:der_decode(KeyType, Key0);
            'ECPoint' ->
                Point = #'ECPoint'{point = Key0},
                ECParams = public_key:der_decode('EcpkParameters', Params),
                {Point, ECParams};
            _ ->                               % FIXME: 'DSAPublicKey'
                lager:error("NIY: Issuer key type ~p", [KeyType]),
                false
        end,
    %% Verify the signature.
    public_key:verify(DigestOrPlainText, DigestType, Signature, IssuerKey).

-spec extract_verify_data(#'Certificate'{}, binary()) -> {ok, tuple()} | error.
%% @doc Return DER encoded TBScertificate, digest type and signature.
%% Code from pubkey_cert:extract_verify_data/2.
extract_verify_data(Cert, DerCert) ->
    PlainText = encoded_tbs_cert(DerCert),
    Sig = catlfish_compat:unpack_signature(Cert#'Certificate'.signature),
    SigAlgRecord = Cert#'Certificate'.signatureAlgorithm,
    SigAlg = SigAlgRecord#'AlgorithmIdentifier'.algorithm,
    try
        {DigestType, _} = public_key:pkix_sign_types(SigAlg),
        {ok, {PlainText, DigestType, Sig}}
    catch
        error:function_clause ->
            lager:debug("~p: signature algorithm not supported: ~p",
                        [cert_string(DerCert), SigAlg]),
            error
    end.

%% Precerts according to RFC6962.
%%
%% Submitted precerts have a special critical poison extension -- OID
%% 1.3.6.1.4.1.11129.2.4.3, whose extnValue OCTET STRING contains
%% ASN.1 NULL data (0x05 0x00).
%%
%% They are signed with either the CA cert that will sign the final
%% cert or a Precertificate Signing Certificate directly signed by the
%% CA cert that will sign the final cert. A Precertificate Signing
%% Certificate has CA:true and Extended Key Usage: Certificate
%% Transparency, OID 1.3.6.1.4.1.11129.2.4.4.
%%
%% PreCert in SignedCertificateTimestamp does _not_ contain the poison
%% extension, nor does it have an issuer which is a Precertificate
%% Signing Certificate. This means that we have to 1) remove the
%% poison extension and 2) potentially change issuer and Authority Key
%% Identifier. See RFC6962 Section 3.2.
%%
%% Changes in draft-ietf-trans-rfc6962-bis-??: TODO.

-spec detox_precert(binary(), binary(), binary()) -> {binary(), binary()}.
%% @doc Return {DetoxedLeaf, IssuerPubKeyHash} where i) DetoxedLeaf is
%% the tbsCertificate w/o poison and adjusted issuer and authkeyid;
%% and ii) IssuerPubKeyHash is the hash over issuing cert's public
%% key.
detox_precert(LeafDer, ParentDer, GrandParentDer) ->
    Leaf = public_key:pkix_decode_cert(LeafDer, plain),
    Parent = public_key:pkix_decode_cert(ParentDer, plain),
    GrandParent = public_key:pkix_decode_cert(GrandParentDer, plain),
    DetoxedLeafTBS = remove_poison_ext(Leaf),

    %% If parent is a precert signing cert, change issuer and
    %% potential authority key id to refer to grandparent.
    {C, IssuerKeyHash} =
        case is_precert_signer(Parent) of
            true -> 
                {set_issuer_and_authkeyid(DetoxedLeafTBS, Parent),
                 extract_pub_key(GrandParent)};
            false -> 
                {DetoxedLeafTBS, extract_pub_key(Parent)}
        end,
    {public_key:pkix_encode('TBSCertificate', C, plain),
     crypto:hash(sha256, public_key:pkix_encode(
                           'SubjectPublicKeyInfo', IssuerKeyHash, plain))}.

-spec extract_pub_key(#'Certificate'{}) -> #'SubjectPublicKeyInfo'{}.
extract_pub_key(#'Certificate'{
                   tbsCertificate = #'TBSCertificate'{
                                       subjectPublicKeyInfo = SPKI}}) ->
    SPKI.    

-spec set_issuer_and_authkeyid(#'TBSCertificate'{}, #'Certificate'{}) ->
                                      #'TBSCertificate'{}.
%% @doc Return Cert with issuer and AuthorityKeyIdentifier from Parent.
set_issuer_and_authkeyid(TBSCert,
                         #'Certificate'{
                            tbsCertificate =
                                #'TBSCertificate'{
                                   issuer = ParentIssuer,
                                   extensions = ParentExtensions}}) ->
    case pubkey_cert:select_extension(?'id-ce-authorityKeyIdentifier',
                                      ParentExtensions) of
        undefined ->
            lager:debug("setting issuer only", []),
            TBSCert#'TBSCertificate'{issuer = ParentIssuer};
        ParentAuthKeyExt ->
            NewExtensions =
                lists:map(
                  fun(E) ->
                          case E of
                              #'Extension'{extnID =
                                               ?'id-ce-authorityKeyIdentifier'} ->
                                  lager:debug("swapping auth key id to ~p",
                                              [ParentAuthKeyExt]),
                                  ParentAuthKeyExt;
                              _ -> E
                          end
                  end,
                  TBSCert#'TBSCertificate'.extensions),
            lager:debug("setting issuer and auth key id", []),
            TBSCert#'TBSCertificate'{issuer = ParentIssuer,
                                     extensions = NewExtensions}
    end.

-spec is_precert_signer(#'Certificate'{}) -> boolean().
is_precert_signer(#'Certificate'{tbsCertificate = TBSCert}) ->
    Extensions = pubkey_cert:extensions_list(TBSCert#'TBSCertificate'.extensions),
    %% NOTE: It's OK to look at only the first extension found since
    %% "A certificate MUST NOT include more than one instance of a
    %% particular extension." --RFC5280 Sect 4.2
    case pubkey_cert:select_extension(?'id-ce-extKeyUsage', Extensions) of
        #'Extension'{extnValue = Val} ->
            case 'OTP-PUB-KEY':decode('ExtKeyUsageSyntax', Val) of
                %% NOTE: We require that the poisoned OID is the
                %% _only_ extkeyusage present. RFC6962 Sect 3.1 is not
                %% really clear.
                {ok, [?CA_POISON_OID]} -> is_ca(TBSCert);
                _ -> false
            end;
        _ -> false
    end.

-spec is_ca(#'TBSCertificate'{}) -> boolean().
is_ca(#'TBSCertificate'{extensions = Extensions}) ->
    case pubkey_cert:select_extension(?'id-ce-basicConstraints', Extensions) of
        #'Extension'{critical = true, extnValue = Val} ->
            case 'OTP-PUB-KEY':decode('BasicConstraints', Val) of
                {ok, {'BasicConstraints', true, _}} -> true;
                _ -> false
            end;
        _ -> false
    end.

-spec remove_poison_ext(#'Certificate'{}) -> #'TBSCertificate'{}.
remove_poison_ext(#'Certificate'{tbsCertificate = TBSCert}) ->
    Extensions =
        filter(fun(E) -> not poisoned_leaf_p(E) end,
               pubkey_cert:extensions_list(TBSCert#'TBSCertificate'.extensions)),
    TBSCert#'TBSCertificate'{extensions = Extensions}.

poisoned_leaf_p(#'Extension'{extnID = ?LEAF_POISON_OID,
                             critical = true,
                             extnValue = ExtnValue}) ->
    ExtnValue =:= catlfish_compat:poison_val(?LEAF_POISON_VAL);
poisoned_leaf_p(_) ->
    false.

%%%% PEM files.
-spec ders_from_pemfiles(string(), [string()]) -> [binary()].
ders_from_pemfiles(Dir, Filenames) ->
    lists:flatten(
      [ders_from_pemfile(filename:join(Dir, X)) || X <- Filenames]).

-spec ders_from_pemfile(string()) -> [binary()].
ders_from_pemfile(Filename) ->
    lager:debug("reading PEM from ~s", [Filename]),
    PemBins = pems_from_file(Filename),
    Pems = case (catch public_key:pem_decode(PemBins)) of
               {'EXIT', Reason} ->
                   lager:info("~p: invalid PEM-encoding: ~p", [Filename, Reason]),
                   [];
               P -> P
           end,
    [der_from_pem(X) || X <- Pems].

-spec der_from_pem(binary()) -> binary().
der_from_pem(Pem) ->
    case Pem of
        {_Type, Der, not_encrypted} ->
            case parsable_cert_p(Der) of
                true ->
                    Der;
                false ->
                    dump_unparsable_cert(Der),
                    []
            end;
        Fail ->
            lager:info("ignoring PEM-encoded data: ~p~n", [Fail]),
            []
    end.

-spec pems_from_file(file:filename()) -> binary().
pems_from_file(Filename) ->
    {ok, Pems} = file:read_file(Filename),
    Pems.

-spec dump_unparsable_cert(binary()) -> ok | {error, atom()} | not_logged.
dump_unparsable_cert(Der) ->
    case application:get_env(catlfish, rejected_certs_path) of
        {ok, Directory} ->
            {NowMegaSec, NowSec, NowMicroSec} = plop_compat:timestamp(),
            Filename =
                filename:join(Directory,
                              io_lib:format("~p:~p.~p",
                                            [cert_string(Der),
                                             NowMegaSec * 1000 * 1000 + NowSec,
                                             NowMicroSec])),
            lager:info("dumping cert to ~p~n", [Filename]),
            file:write_file(Filename, Der);
        _ ->
            not_logged
    end.

%%%%%%%%%%%%%%%%%%%%
%% Testing private functions.
-include("x509_test.hrl").
sign_test_() ->
    {setup,
     fun() -> ok end,
     fun(_) -> ok end,
     fun(_) -> [?_assertMatch(true, signed_by_p(?C0, ?C1))] end}.

valid_cert_test_() ->
    {setup,
     fun() -> {read_pemfiles_from_dir("test/testdata/known_roots"),
               read_certs("test/testdata/chains")} end,
     fun(_) -> ok end,
     fun({KnownRoots, Chains}) ->
             [
              %% Self-signed but verified against itself so pass.
              %% Note that this certificate is rejected by the
              %% stricter OTP-PKIX.asn1 specification generating
              %% #'OTPCertificate'{}. The error is
              %% {invalid_choice_tag,{22,<<"US">>}}}} in
              %% 'OTP-PUB-KEY':Func('OTP-X520countryname', Value0).
              ?_assertMatch({true, _}, normalise_chain(nth(1, Chains),
                                                       nth(1, Chains), 10)),
              %% Self-signed so fail.
              ?_assertMatch({false, root_unknown},
                            normalise_chain(KnownRoots,
                                            nth(2, Chains), 10)),
              %% Leaf signed by known CA, pass.
              ?_assertMatch({true, _}, normalise_chain(KnownRoots,
                                                       nth(3, Chains), 10)),
              %% Proper 3-depth chain with root in KnownRoots, pass.
              %% Bug CATLFISH-19 --> [info] rejecting "3ee62cb678014c14d22ebf96f44cc899adea72f1": chain_broken
              %% leaf sha1: 3ee62cb678014c14d22ebf96f44cc899adea72f1
              %% leaf Subject: C=KR, O=Government of Korea, OU=Group of Server, OU=\xEA\xB5\x90\xEC\x9C\xA1\xEA\xB3\xBC\xED\x95\x99\xEA\xB8\xB0\xEC\x88\xA0\xEB\xB6\x80, CN=www.berea.ac.kr, CN=haksa.bits.ac.kr
              ?_assertMatch({true, _}, normalise_chain(KnownRoots,
                                                       nth(4, Chains), 3)),
              %% Verify against self, pass.
              %% Bug CATLFISH-??, can't handle issuer keytype ECPoint.
              %% Issuer sha1: 6969562e4080f424a1e7199f14baf3ee58ab6abb
              ?_assertMatch(true, signed_by_p(hd(nth(5, Chains)),
                                              hd(nth(5, Chains)))),
              %% Unsupported signature algorithm MD2-RSA, fail.
              %% Signature Algorithm: md2WithRSAEncryption
              %% CA cert with sha1 96974cd6b663a7184526b1d648ad815cf51e801a
              ?_assertMatch(false, signed_by_p(hd(nth(6, Chains)),
                                               hd(nth(6, Chains)))),

              %% Supposedly problematic chains from Google Aviator, fatal.
              %% 00459972: asn1: syntax error: sequence truncated
              ?_assertMatch({true, _}, normalise_chain(nth(7, Chains),
                                                       nth(7, Chains), 10)),
              %% 1402673: x509: RSA modulus is not a positive number
              ?_assertMatch({true, _}, normalise_chain(nth(8, Chains),
                                                       nth(8, Chains), 10)),
              %% 1345105: asn1: syntax error: IA5String contains invalid character
              ?_assertMatch({true, _}, normalise_chain(nth(9, Chains),
                                                       nth(9, Chains), 10)),
              %% 1557693: asn1: structure error: integer too large
              ?_assertMatch({true, _}, normalise_chain(nth(10, Chains),
                                                       nth(10, Chains), 10)),

              %% Supposedly problematic chains from Google Aviator, non-fatal.
              %% 16800: x509: negative serial number
              %% a.pem
              ?_assertMatch({true, _}, normalise_chain(nth(11, Chains),
                                                       nth(11, Chains), 10)),
              %% 22487: x509: unhandled critical extension ([2 5 29 32])
              %% b.pem
              ?_assertMatch({true, _}, normalise_chain(nth(12, Chains),
                                                       nth(12, Chains), 10)),
              %% 5198: x509: certificate contained IP address of length 8
              %% c.pem
              ?_assertMatch({true, _}, normalise_chain(nth(13, Chains),
                                                       nth(13, Chains), 10))
              ] end}.

chain_test_() ->
    {setup,
     fun() -> {?C0, ?C1} end,
     fun(_) -> ok end,
     fun({C0, C1}) -> chain_test(C0, C1) end}.

chain_test(C0, C1) ->
    [
     %% Root not in chain but in trust store.
     ?_assertEqual({true, [C1]}, normalise_chain([C1], [C0], 10)),
     ?_assertEqual({true, [C1]}, normalise_chain([C1], [C0], 2)),
     %% Chain too long.
     ?_assertMatch({false, chain_too_long}, normalise_chain([C1], [C0], 1)),
     %% Root in chain and in trust store.
     ?_assertEqual({true, [C1]}, normalise_chain([C1], [C0, C1], 2)),
     %% Chain too long.
     ?_assertMatch({false, chain_too_long}, normalise_chain([C1], [C0, C1], 1)),
     %% Root not in trust store.
     ?_assertMatch({false, root_unknown}, normalise_chain([], [C0, C1], 10)),
     %% Selfsigned. Actually OK.
     ?_assertMatch({true, [C0]}, normalise_chain([C0], [C0], 10)),
     ?_assertMatch({true, [C0]}, normalise_chain([C0], [C0], 1)),
     %% Max chain length 0 is not OK.
     ?_assertMatch({false, chain_too_long}, normalise_chain([C0], [C0], 0))
    ].

%%-spec read_certs(file:filename()) -> [string:string()].
-spec read_certs(file:filename()) -> [[binary()]].
read_certs(Dir) ->
    {ok, Fnames} = file:list_dir(Dir),
    PemBins =
        [Pems || {ok, Pems} <-
                     [file:read_file(filename:join(Dir, F)) ||
                         F <- lists:sort(
                                lists:filter(
                                  fun(FN) -> string:equal(
                                               ".pem", filename:extension(FN))
                                  end,
                                  Fnames))]],
    PemEntries = [public_key:pem_decode(P) || P <- PemBins],
    lists:map(fun(L) -> [Der || {'Certificate', Der, not_encrypted} <- L] end,
              PemEntries).