diff options
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/merge.py | 47 |
1 files changed, 28 insertions, 19 deletions
diff --git a/tools/merge.py b/tools/merge.py index 004b98b..974bab0 100755 --- a/tools/merge.py +++ b/tools/merge.py @@ -64,16 +64,16 @@ def get_new_entries(baseurl): print "ERROR: fetchnewentries", e.read() sys.exit(1) -def get_entry(baseurl, hash): +def get_entries(baseurl, hashes): try: - params = urllib.urlencode({"hash":base64.b64encode(hash)}) + params = urllib.urlencode({"hash":[base64.b64encode(hash) for hash in hashes]}, doseq=True) result = urllib2.urlopen(baseurl + "ct/storage/getentry?" + params).read() parsed_result = json.loads(result) if parsed_result.get(u"result") == u"ok": - entries = parsed_result[u"entries"] - assert len(entries) == 1 - assert base64.b64decode(entries[0]["hash"]) == hash - return base64.b64decode(entries[0]["entry"]) + entries = dict([(base64.b64decode(entry["hash"]), base64.b64decode(entry["entry"])) for entry in parsed_result[u"entries"]]) + assert len(entries) == len(hashes) + assert set(entries.keys()) == set(hashes) + return entries print "ERROR: getentry", parsed_result sys.exit(1) except urllib2.HTTPError, e: @@ -152,6 +152,9 @@ def get_missingentries(baseurl): print "ERROR: missingentries", e.read() sys.exit(1) +def chunks(l, n): + return [l[i:i+n] for i in range(0, len(l), n)] + timing = timing_point() logorder = get_logorder() @@ -162,11 +165,13 @@ certsinlog = set(logorder) new_entries_per_node = {} new_entries = set() +entries_to_fetch = {} for storagenode in storagenodes: print "getting new entries from", storagenode new_entries_per_node[storagenode] = get_new_entries(storagenode) new_entries.update(set(new_entries_per_node[storagenode])) + entries_to_fetch[storagenode] = [] timing_point(timing, "get new entries") @@ -177,20 +182,24 @@ print "adding", len(new_entries), "entries" if args.nomerge: sys.exit(0) -added_entries = 0 for hash in new_entries: - if hash not in certsinlog: - entry = None - for storagenode in storagenodes: - if hash in new_entries_per_node[storagenode]: - entry = get_entry(storagenode, hash) - break - assert entry != None - write_chain(hash, entry) - add_to_logorder(hash) - logorder.append(hash) - certsinlog.add(hash) - added_entries += 1 + for storagenode in storagenodes: + if hash in new_entries_per_node[storagenode]: + entries_to_fetch[storagenode].append(hash) + break + + +added_entries = 0 +for storagenode in storagenodes: + for chunk in chunks(entries_to_fetch[storagenode], 10): + entries = get_entries(storagenode, chunk) + for hash in chunk: + entry = entries[hash] + write_chain(hash, entry) + add_to_logorder(hash) + logorder.append(hash) + certsinlog.add(hash) + added_entries += 1 timing_point(timing, "add entries") print "added", added_entries, "entries" |