diff options
-rwxr-xr-x | monitor/josef_experimental.py | 1 | ||||
-rwxr-xr-x | monitor/josef_mover.py | 221 |
2 files changed, 110 insertions, 112 deletions
diff --git a/monitor/josef_experimental.py b/monitor/josef_experimental.py index 580ee11..eab57a1 100755 --- a/monitor/josef_experimental.py +++ b/monitor/josef_experimental.py @@ -30,6 +30,7 @@ def check_inclusion_by_submission(first, last, source, dests): try: entries = [] while len(entries) <= last - first: + print "Getting " + str(first + len(entries)) + " to " + str(last) entries += get_entries(s_log["url"], first + len(entries), last)["entries"] # print "Fetched entries up to " + str(len(first + len(entries))) diff --git a/monitor/josef_mover.py b/monitor/josef_mover.py index d3532a1..a531e3e 100755 --- a/monitor/josef_mover.py +++ b/monitor/josef_mover.py @@ -125,151 +125,148 @@ def move_entry(first, last, source, dest): print "FAILED!\n" print s_log["name"] + "[" + str(first + i) + "] found in " + str(len(inclusions)) + " logs: ", inclusions -def check_inclusion_by_submission(first, last, source, dest, logfile): - # print entries - for s_log in source: - try: - entries = [] - while len(entries) < last - first: - print "Getting " + str(first + len(entries)) + " to " + str(last) - entries += get_entries(s_log["url"], first + len(entries), last)["entries"] - # print "Fetched entries up to " + str(len(first + len(entries))) - except: - print "Failed to get entries from " + s_log["name"] - - for i in range(len(entries)): - item = entries[i] - inclusions = [] - for d_log in dest: - try: - entry = extract_original_entry(item) - if entry[2]: - precert = True - else: - precert = False - submission = [] - - for e in entry[0]: - submission.append(base64.b64encode(e)) - - if entry[2]: - res = add_prechain(d_log["url"], {"chain" : submission}) - else: - res = add_chain(d_log["url"], {"chain" : submission}) - # print_reply(res, entry) - - if not is_new_timestamp(res["timestamp"]): - inclusions.append(d_log["name"]) - - except KeyboardInterrupt: - sys.exit() - except: - pass - s = s_log["name"] + "[" + str(first + i) + "] found in " + str(len(inclusions)) + " logs: " + str(inclusions) - print s - # logfile = OUTPUT_DIR + s_log["name"] + "_overlap.log" - if logfile: - log(logfile, s) - time.sleep(1) # to ease up om rate-limiting... def check_overlap(source, dests): - process_count = 1 - processes = [] - for tmp_log in source: - sth = get_sth(tmp_log["url"]) - + PROCESS_COUNT = 100 - first = 6125 + for log in source: + print "Checkong overlap from " + log["name"] + sth = get_sth(log["url"]) + first = 0 last = int(sth["tree_size"]) - # last = 8 - print "last:",last # split into tasks - chunk_size = (last - first)/process_count + processes = [] + chunk_size = (last - first)/PROCESS_COUNT print "chunk_size:", chunk_size - for i in range(process_count): - if i + 1 == process_count: + + for i in range(PROCESS_COUNT): + if i + 1 == PROCESS_COUNT: tmp_start = first + (i * chunk_size) tmp_last = last - 1 else: tmp_start = first + (i * chunk_size) tmp_last = first + ((i+1) * chunk_size - 1) - # execute in parallell - filename = OUTPUT_DIR + tmp_log["name"] + "_overlap_" + str(i) + ".tmp" - p = multiprocessing.Process(target=check_inclusion_by_submission, \ - args=(tmp_start, tmp_last, [tmp_log], dests, filename)) + + filename = OUTPUT_DIR + log["name"] + "_overlap_" + str(i) + ".tmp" + p = multiprocessing.Process(target=check_stress_submission, \ + args=(tmp_start, tmp_last, [log], dests, filename)) p.start() processes.append(p) for p in processes: p.join() - # print "Done!" # merge results - with open(OUTPUT_DIR + tmp_log["name"] + "_overlap.log", "wb") as outfile: - for i in range(process_count): - filename = OUTPUT_DIR + tmp_log["name"] + "_overlap_" + str(i) + ".tmp" + with open(OUTPUT_DIR + log["name"] + "_overlap.log", "wb") as outfile: + for i in range(PROCESS_COUNT): + filename = OUTPUT_DIR + log["name"] + "_overlap_" + str(i) + ".tmp" # print filename - with open(filename, "rb") as infile: - outfile.write(infile.read()) - infile.close() - os.remove(filename) - outfile.close() + if os.path.exists(filename): + with open(filename, "rb") as infile: + outfile.write(infile.read()) + infile.close() + os.remove(filename) + outfile.close() + + +def check_submission_inner(d_log, item, inclusions): + entry = extract_original_entry(item) + if entry[2]: + precert = True + else: + precert = False + submission = [] + + for e in entry[0]: + submission.append(base64.b64encode(e)) + + if entry[2]: + res = add_prechain(d_log["url"], {"chain" : submission}) + else: + res = add_chain(d_log["url"], {"chain" : submission}) + if not is_new_timestamp(res["timestamp"]): + inclusions.append(d_log["name"]) + +def check_submission_outer(first, last, s_log, dest, logfile=None): + MAX_CHUNK_SIZE = 50 + MAX_RETRIES = 2 + idx = 0 + while first + idx < last: + if first + idx + MAX_CHUNK_SIZE < last: + tmp_last = first + idx + MAX_CHUNK_SIZE - 1 + else: + tmp_last = last + + retries = 0 + while retries <= MAX_RETRIES: + try: + # print "Getting " + str(first + idx) + " to " + str(tmp_last) + entries = get_entries(s_log["url"], first + idx, tmp_last)["entries"] + break + except Exception, e: + time.sleep(1) + # if retries == MAX_RETRIES: + # print "FAILED!", Exception, e + # else: + # print "Failed, retrying...(" + str(retries) + ")" + retries += 1 -def check_stress_submission(first, last, source, dest, logfile): - # print entries - for s_log in source: - try: - entries = [] - while len(entries) < last - first: - # print "Getting " + str(first + len(entries)) + " to " + str(last) - entries += get_entries(s_log["url"], first + len(entries), last)["entries"] - # print "Fetched entries up to " + str(len(first + len(entries))) - except: - print "Failed to get entries from " + s_log["name"] for i in range(len(entries)): item = entries[i] inclusions = [] for d_log in dest: - try: - entry = extract_original_entry(item) - if entry[2]: - precert = True - else: - precert = False - submission = [] - - for e in entry[0]: - submission.append(base64.b64encode(e)) - - if entry[2]: - res = add_prechain(d_log["url"], {"chain" : submission}) - else: - res = add_chain(d_log["url"], {"chain" : submission}) - # print_reply(res, entry) - # print res + retries = 0 + while retries <= MAX_RETRIES: + try: + check_submission_inner(d_log, item,inclusions) + break + except KeyboardInterrupt: + sys.exit() + except Exception, e: + time.sleep(1) # Wait a bit before retrying... + # if retries == MAX_RETRIES: + # print "FAILED!", Exception, e + # else: + # print "Failed, retrying...(" + str(retries) + ")" + retries += 1 + + s = s_log["name"] + "[" + str(first + idx + i) + "] found in " + str(len(inclusions)) + " logs: " + str(inclusions) + print s + if logfile: + log(logfile, s) + idx += len(entries) - if not is_new_timestamp(res["timestamp"]): - inclusions.append(d_log["name"]) +def check_stress_submission(first, last, source, dest, filename=None): + MAX_RETRIES = 2 - except KeyboardInterrupt: - sys.exit() - except Exception, e: - print Exception, e + for s_log in source: + retries = 0 + while retries <= MAX_RETRIES: + try: + check_submission_outer(first, last, s_log, dest, filename) + break + except KeyboardInterrupt: + sys.exit() + except Exception, e: + time.sleep(1) # Wait a bit before retrying... + if retries == MAX_RETRIES: + print "FAILED!", Exception, e + else: + print "Failed, retrying...(" + str(retries) + ")" + retries += 1 - s = s_log["name"] + "[" + str(first + i) + "] found in " + str(len(inclusions)) + " logs: " + str(inclusions) - print s def stress_test(dest): - NR_OF_ENTRIES = 8 - ENTRY_OFFSET = 1010 - PROCESS_COUNT = 200 + NR_OF_ENTRIES = 5 + ENTRY_OFFSET = 5030 + PROCESS_COUNT = 10 for log in dest: print "Stress testing " + log["name"] @@ -278,7 +275,7 @@ def stress_test(dest): for i in range(PROCESS_COUNT): # execute in parallell p = multiprocessing.Process(target=check_stress_submission, \ - args=(ENTRY_OFFSET, ENTRY_OFFSET + NR_OF_ENTRIES, [log], [log], None)) + args=(ENTRY_OFFSET, ENTRY_OFFSET + NR_OF_ENTRIES, [log], [log])) p.start() processes.append(p) @@ -298,8 +295,8 @@ if __name__ == "__main__": source = [CTLOGS[2]] dests = CTLOGS - stress_test([CTLOGS[2]]) - # check_overlap(source, dests) + # stress_test([CTLOGS[0]]) + check_overlap(source, dests) |