From 4ee7e18aa4a31d605bc751a514698641aa9fae4c Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Thu, 19 Feb 2015 13:39:19 +0100 Subject: fetchallcerts.py: handle precerts submitcert.py: handle .zip files fetchallcerts.py: Always calculate full tree fetchallcerts.py: Cache level 16 hashes fetchallcerts.py: Save STH --- tools/certtools.py | 93 ++++++++++++++++++++++++++++++-- tools/fetchallcerts.py | 142 +++++++++++++++++++++++++++++++++++++++++-------- tools/submitcert.py | 51 +++++++++++++----- 3 files changed, 249 insertions(+), 37 deletions(-) mode change 100644 => 100755 tools/fetchallcerts.py diff --git a/tools/certtools.py b/tools/certtools.py index e1ca57a..6a144c9 100644 --- a/tools/certtools.py +++ b/tools/certtools.py @@ -11,6 +11,8 @@ import sys import hashlib import ecdsa import datetime +import cStringIO +import zipfile publickeys = { "https://ct.googleapis.com/pilot/": @@ -44,11 +46,14 @@ def get_cert_info(s): def get_pemlike(filename, marker): + return get_pemlike_from_file(open(filename), marker) + +def get_pemlike_from_file(f, marker): entries = [] entry = "" inentry = False - for line in open(filename): + for line in f: line = line.strip() if line == "-----BEGIN " + marker + "-----": entry = "" @@ -63,6 +68,10 @@ def get_pemlike(filename, marker): def get_certs_from_file(certfile): return get_pemlike(certfile, "CERTIFICATE") +def get_certs_from_string(s): + f = cStringIO.StringIO(s) + return get_pemlike_from_file(f, "CERTIFICATE") + def get_eckey_from_file(keyfile): keys = get_pemlike(keyfile, "EC PRIVATE KEY") assert len(keys) == 1 @@ -138,6 +147,10 @@ def get_entries(baseurl, start, end): print "ERROR:", e.read() sys.exit(1) +def extract_precertificate(precert_chain_entry): + (precert, certchain) = unpack_tls_array(precert_chain_entry, 3) + return (precert, certchain) + def decode_certificate_chain(packed_certchain): (unpacked_certchain, rest) = unpack_tls_array(packed_certchain, 3) assert len(rest) == 0 @@ -235,8 +248,13 @@ def unpack_mtl(merkle_tree_leaf): leaf_type = merkle_tree_leaf[1:2] timestamped_entry = merkle_tree_leaf[2:] (timestamp, entry_type) = struct.unpack(">QH", timestamped_entry[0:10]) - (leafcert, rest_entry) = unpack_tls_array(timestamped_entry[10:], 3) - return (leafcert, timestamp) + if entry_type == 0: + issuer_key_hash = None + (leafcert, rest_entry) = unpack_tls_array(timestamped_entry[10:], 3) + elif entry_type == 1: + issuer_key_hash = timestamped_entry[10:42] + (leafcert, rest_entry) = unpack_tls_array(timestamped_entry[42:], 3) + return (leafcert, timestamp, issuer_key_hash) def get_leaf_hash(merkle_tree_leaf): leaf_hash = hashlib.sha256() @@ -284,3 +302,72 @@ def build_merkle_tree(layer0): current_layer = next_merkle_layer(current_layer) layers.append(current_layer) return layers + +def print_inclusion_proof(proof): + audit_path = proof[u'audit_path'] + n = proof[u'leaf_index'] + level = 0 + for s in audit_path: + entry = base64.b16encode(base64.b64decode(s)) + n ^= 1 + print level, n, entry + n >>= 1 + level += 1 + +def get_one_cert(store, i): + filename = i / 10000 + zf = zipfile.ZipFile("%s/%04d.zip" % (store, i / 10000)) + cert = zf.read("%08d" % i) + zf.close() + return cert + +def get_hash_from_certfile(cert): + for line in cert.split("\n"): + if line.startswith("-----"): + return None + if line.startswith("Leafhash: "): + return base64.b16decode(line[len("Leafhash: "):]) + return None + +def get_proof(store, tree_size, n): + hash = get_hash_from_certfile(get_one_cert(store, n)) + return get_proof_by_hash(args.baseurl, hash, tree_size) + +def get_certs_from_zipfiles(zipfiles, firstleaf, lastleaf): + for i in range(firstleaf, lastleaf + 1): + try: + yield zipfiles[i / 10000].read("%08d" % i) + except KeyError: + return + +def get_merkle_hash_64k(store, blocknumber, write_to_cache=False): + hashfilename = "%s/%04x.64khash" % (store, blocknumber) + try: + hash = base64.b16decode(open(hashfilename).read()) + assert len(hash) == 32 + return ("hash", hash) + except IOError: + pass + firstleaf = blocknumber * 65536 + lastleaf = firstleaf + 65535 + firstfile = firstleaf / 10000 + lastfile = lastleaf / 10000 + zipfiles = {} + for i in range(firstfile, lastfile + 1): + try: + zipfiles[i] = zipfile.ZipFile("%s/%04d.zip" % (store, i)) + except IOError: + break + certs = get_certs_from_zipfiles(zipfiles, firstleaf, lastleaf) + layer0 = [get_hash_from_certfile(cert) for cert in certs] + tree = build_merkle_tree(layer0) + calculated_hash = tree[-1][0] + for zf in zipfiles.values(): + zf.close() + if len(layer0) != 65536: + return ("incomplete", (len(layer0), calculated_hash)) + if write_to_cache: + f = open(hashfilename, "w") + f.write(base64.b16encode(calculated_hash)) + f.close() + return ("hash", calculated_hash) diff --git a/tools/fetchallcerts.py b/tools/fetchallcerts.py old mode 100644 new mode 100755 index 2276e68..866bb43 --- a/tools/fetchallcerts.py +++ b/tools/fetchallcerts.py @@ -14,20 +14,25 @@ import struct import hashlib import itertools from certtools import * +import zipfile +import os +import time parser = argparse.ArgumentParser(description='') parser.add_argument('baseurl', help="Base URL for CT server") parser.add_argument('--store', default=None, metavar="dir", help='Store certificates in directory dir') -parser.add_argument('--start', default=0, metavar="n", type=int, help='Start at index n') -parser.add_argument('--verify', action='store_true', help='Verify STH') +parser.add_argument('--write-sth', action='store_true', help='Write STH') args = parser.parse_args() def extract_original_entry(entry): leaf_input = base64.decodestring(entry["leaf_input"]) - (leaf_cert, timestamp) = unpack_mtl(leaf_input) + (leaf_cert, timestamp, issuer_key_hash) = unpack_mtl(leaf_input) extra_data = base64.decodestring(entry["extra_data"]) + if issuer_key_hash != None: + (precert, extra_data) = extract_precertificate(extra_data) + leaf_cert = precert certchain = decode_certificate_chain(extra_data) - return [leaf_cert] + certchain + return ([leaf_cert] + certchain, timestamp, issuer_key_hash) def get_entries_wrapper(baseurl, start, end): fetched_entries = 0 @@ -45,36 +50,129 @@ def print_layer(layer): print base64.b16encode(entry) sth = get_sth(args.baseurl) +check_sth_signature(args.baseurl, sth) tree_size = sth["tree_size"] root_hash = base64.decodestring(sth["sha256_root_hash"]) +try: + if args.store: + oldsth = json.load(open(args.store + "/currentsth")) + else: + oldsth = None +except IOError: + oldsth = None + +sth_timestamp = datetime.datetime.fromtimestamp(sth["timestamp"]/1000) +since_timestamp = time.time() - sth["timestamp"]/1000 + +print "Log last updated %s, %d seconds ago" % (sth_timestamp.ctime(), since_timestamp) + print "tree size", tree_size print "root hash", base64.b16encode(root_hash) -entries = get_entries_wrapper(args.baseurl, args.start, tree_size - 1) +if oldsth: + if oldsth["tree_size"] == tree_size: + print "Tree size has not changed" + if oldsth["sha256_root_hash"] != sth["sha256_root_hash"]: + print "Root hash is different even though tree size is the same." + print "Log has violated the append-only property." + print "Old hash:", oldsth["sha256_root_hash"] + print "New hash:", sth["sha256_root_hash"] + sys.exit(1) + if oldsth["timestamp"] == sth["timestamp"]: + print "Timestamp has not changed" + else: + print "Tree size changed, old tree size was", oldsth["tree_size"] -if args.verify: +merkle_64klayer = [] + +if args.store: + ncerts = None + for blocknumber in range(0, (tree_size / 65536) + 1): + (resulttype, result) = get_merkle_hash_64k(args.store, blocknumber, write_to_cache=True) + if resulttype == "incomplete": + (incompletelength, hash) = result + ncerts = blocknumber * 65536 + incompletelength + break + assert resulttype == "hash" + hash = result + merkle_64klayer.append(hash) + print blocknumber * 65536, + sys.stdout.flush() + print + print "ncerts", ncerts +else: + ncerts = 0 + +entries = get_entries_wrapper(args.baseurl, ncerts, tree_size - 1) + +if not args.store: layer0 = [get_leaf_hash(base64.decodestring(entry["leaf_input"])) for entry in entries] tree = build_merkle_tree(layer0) calculated_root_hash = tree[-1][0] - print "calculated root hash", base64.b16encode(calculated_root_hash) - - if calculated_root_hash != root_hash: - print "fetched root hash and calculated root hash different, aborting" - sys.exit(1) - -elif args.store: - for entry, i in itertools.izip(entries, itertools.count(args.start)): +else: + currentfilename = None + zf = None + for entry, i in itertools.izip(entries, itertools.count(ncerts)): try: - chain = extract_original_entry(entry) - f = open(args.store + "/" + ("%08d" % i), "w") + (chain, timestamp, issuer_key_hash) = extract_original_entry(entry) + zipfilename = args.store + "/" + ("%04d.zip" % (i / 10000)) + if zipfilename != currentfilename: + if zf: + zf.close() + zf = zipfile.ZipFile(zipfilename, "a", + compression=zipfile.ZIP_DEFLATED) + currentfilename = zipfilename + s = "" + s += "Timestamp: %s\n" % timestamp + leaf_input = base64.decodestring(entry["leaf_input"]) + leaf_hash = get_leaf_hash(leaf_input) + s += "Leafhash: %s\n" % base64.b16encode(leaf_hash) + if issuer_key_hash: + s += "-----BEGIN PRECERTIFICATE-----\n" + s += base64.encodestring(chain[0]).rstrip() + "\n" + s += "-----END PRECERTIFICATE-----\n" + s += "\n" + chain = chain[1:] for cert in chain: - print >> f, "-----BEGIN CERTIFICATE-----" - print >> f, base64.encodestring(cert).rstrip() - print >> f, "-----END CERTIFICATE-----" - print >> f, "" - except AssertionError: - print "error for cert", i + s += "-----BEGIN CERTIFICATE-----\n" + s += base64.encodestring(cert).rstrip() + "\n" + s += "-----END CERTIFICATE-----\n" + s += "\n" + zf.writestr("%08d" % i, s) + except AssertionError, e: + print "error for cert", i, e + if zf: + zf.close() + + for blocknumber in range(ncerts / 65536, (tree_size / 65536) + 1): + (resulttype, result) = get_merkle_hash_64k(args.store, blocknumber, write_to_cache=True) + if resulttype == "incomplete": + (incompletelength, hash) = result + ncerts = blocknumber * 65536 + incompletelength + merkle_64klayer.append(hash) + break + assert resulttype == "hash" + hash = result + merkle_64klayer.append(hash) + print blocknumber * 65536, base64.b16encode(hash) + + tree = build_merkle_tree(merkle_64klayer) + + calculated_root_hash = tree[-1][0] + + assert ncerts == tree_size + +print "calculated root hash", base64.b16encode(calculated_root_hash) + +if calculated_root_hash != root_hash: + print "fetched root hash and calculated root hash different" + sys.exit(1) + +if args.store and args.write_sth: + f = open(args.store + "/currentsth", "w") + f.write(json.dumps(sth)) + f.close() diff --git a/tools/submitcert.py b/tools/submitcert.py index 1b87b53..04b6ebe 100755 --- a/tools/submitcert.py +++ b/tools/submitcert.py @@ -15,6 +15,7 @@ from certtools import * import os import signal import select +import zipfile from multiprocessing import Pool @@ -29,13 +30,13 @@ if certfilepath[-1] == "/": else: certfiles = [certfilepath] -def submitcert(certfile): +def submitcert((certfile, cert)): timing = timing_point() - certs = get_certs_from_file(certfile) + certchain = get_certs_from_string(cert) timing_point(timing, "readcerts") try: - result = add_chain(baseurl, {"chain":map(base64.b64encode, certs)}) + result = add_chain(baseurl, {"chain":map(base64.b64encode, certchain)}) except SystemExit: print "EXIT:", certfile select.select([], [], [], 1.0) @@ -49,7 +50,7 @@ def submitcert(certfile): try: if check_sig: - check_sct_signature(baseurl, certs[0], result) + check_sct_signature(baseurl, certchain[0], result) timing_point(timing, "checksig") except AssertionError, e: print "ERROR:", certfile, e @@ -63,7 +64,7 @@ def submitcert(certfile): if lookup_in_log: - merkle_tree_leaf = pack_mtl(result["timestamp"], certs[0]) + merkle_tree_leaf = pack_mtl(result["timestamp"], certchain[0]) leaf_hash = get_leaf_hash(merkle_tree_leaf) @@ -84,14 +85,14 @@ def submitcert(certfile): certchain = decode_certificate_chain(base64.decodestring(extra_data)) - submittedcertchain = certs[1:] + submittedcertchain = certchain[1:] for (submittedcert, fetchedcert, i) in zip(submittedcertchain, certchain, itertools.count(1)): print "cert", i, "in chain is the same:", submittedcert == fetchedcert if len(certchain) == len(submittedcertchain) + 1: - last_issuer = get_cert_info(certs[-1])["issuer"] + last_issuer = get_cert_info(certchain[-1])["issuer"] root_subject = get_cert_info(certchain[-1])["subject"] print "issuer of last cert in submitted chain and " \ "subject of last cert in fetched chain is the same:", \ @@ -105,20 +106,46 @@ def submitcert(certfile): timing_point(timing, "lookup") return timing["deltatimes"] +def get_ncerts(certfiles): + n = 0 + for certfile in certfiles: + if certfile.endswith(".zip"): + zf = zipfile.ZipFile(certfile) + n += len(zf.namelist()) + zf.close() + else: + n += 1 + return n + +def get_all_certificates(certfiles): + for certfile in certfiles: + if certfile.endswith(".zip"): + zf = zipfile.ZipFile(certfile) + for name in zf.namelist(): + yield (name, zf.read(name)) + zf.close() + else: + yield (certfile, open(certfile).read()) + p = Pool(16, lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) nsubmitted = 0 lastprinted = 0 -starttime = datetime.datetime.now() -print len(certfiles), "certs" +ncerts = get_ncerts(certfiles) -submitcert(certfiles[0]) +print ncerts, "certs" + +certs = get_all_certificates(certfiles) + +submitcert(certs.next()) nsubmitted += 1 select.select([], [], [], 3.0) +starttime = datetime.datetime.now() + try: - for timing in p.imap_unordered(submitcert, certfiles[1:]): + for timing in p.imap_unordered(submitcert, certs): if timing == None: print "error" print "submitted", nsubmitted @@ -129,7 +156,7 @@ try: deltatime = datetime.datetime.now() - starttime deltatime_f = deltatime.seconds + deltatime.microseconds / 1000000.0 rate = nsubmitted / deltatime_f - if nsubmitted > lastprinted + len(certfiles) / 10: + if nsubmitted > lastprinted + ncerts / 10: print nsubmitted, "rate %.1f" % rate lastprinted = nsubmitted #print timing, "rate %.1f" % rate -- cgit v1.1