diff options
-rwxr-xr-x | monitor/josef_logreader.py | 35 | ||||
-rwxr-xr-x | monitor/josef_mover.py | 62 |
2 files changed, 66 insertions, 31 deletions
diff --git a/monitor/josef_logreader.py b/monitor/josef_logreader.py index c8feb79..3d28146 100755 --- a/monitor/josef_logreader.py +++ b/monitor/josef_logreader.py @@ -131,7 +131,7 @@ def print_overlap(l): print e + ", " + str(metadata[e]) + " (" + str(int(100 * float(metadata[e])/float(total))) + "%)" def write_history_data(l): - print log + logfile = OUTPUT_DIR + l[:-4] + "_history.txt" log = read_loglog(l) data = {} start_date = datetime.date(2015, 9, 29) @@ -156,15 +156,18 @@ def write_history_data(l): data_list.append((item, data[item])) data_list.sort(key=lambda tup: tup[0]) - # fill out missing data points and print - prev_val = 0 - for item in data_list: - if item[1] > prev_val: - val = item[1] - else: - val = prev_val - prev_val = val - print item[0], val + # fill out missing data points and write to file + with open(logfile, 'a') as f: + prev_val = 0 + for item in data_list: + if item[1] > prev_val: + val = item[1] + else: + val = prev_val + prev_val = val + # print item[0], val + f.write(str(item[0]) + " " + str(val) + "\n") + f.close() @@ -180,14 +183,14 @@ if __name__ == "__main__": if log == "monitor.log": pass elif log[- len(OVERLAP_STR):] == OVERLAP_STR: - # print_overlap(log) + print_overlap(log) pass else: - # print log - # print_log_stats(log) - # print_average_age(log) - # print_errors(log) - # print "" + # print log + # print_log_stats(log) + # print_average_age(log) + # print_errors(log) + # print "" write_history_data(log) diff --git a/monitor/josef_mover.py b/monitor/josef_mover.py index 5bf5a7a..63a155f 100755 --- a/monitor/josef_mover.py +++ b/monitor/josef_mover.py @@ -6,6 +6,7 @@ import time import datetime import os import json +import multiprocessing from precerttools import cleanprecert from monitor_conf import * @@ -123,7 +124,7 @@ def move_entry(first, last, source, dest): print "FAILED!\n" print s_log["name"] + "[" + str(first + i) + "] found in " + str(len(inclusions)) + " logs: ", inclusions -def check_inclusion_by_submission(first, last, source, dest): +def check_inclusion_by_submission(first, last, source, dest, logfile): # print entries for s_log in source: try: @@ -135,13 +136,10 @@ def check_inclusion_by_submission(first, last, source, dest): except: print "Failed to get entries from " + s_log["name"] - # print "\n\nSource: " + s_log["name"] + "\n" for i in range(len(entries)): - # for item in entries: item = entries[i] inclusions = [] for d_log in dests: - # print "Log: " + d_log["name"] try: entry = extract_original_entry(item) if entry[2]: @@ -162,17 +160,18 @@ def check_inclusion_by_submission(first, last, source, dest): if not is_new_timestamp(res["timestamp"]): inclusions.append(d_log["name"]) - # time.sleep(5) except KeyboardInterrupt: sys.exit() except: - # print "FAILED!\n" pass s = s_log["name"] + "[" + str(first + i) + "] found in " + str(len(inclusions)) + " logs: " + str(inclusions) print s - logfile = OUTPUT_DIR + s_log["name"] + "_overlap.log" + # logfile = OUTPUT_DIR + s_log["name"] + "_overlap.log" log(logfile, s) + + + def log(fn, string): logfile = fn s = time_str() + " " + string @@ -180,21 +179,54 @@ def log(fn, string): f.write(s + "\n") f.close() + if __name__ == "__main__": - source = [CTLOGS[7]] + source = [CTLOGS[3]] dests = CTLOGS - # source = ctlogs - # dests = ctlogs + process_count = 4 + processes = [] for tmp_log in source: sth = get_sth(tmp_log["url"]) - first = 1654 - last = int(sth["tree_size"]) - 1 - # print last + first = 0 + last = int(sth["tree_size"]) + last = 8 + print "last:",last + + # split into tasks + chunk_size = last/process_count + print "chunk_size:", chunk_size + for i in range(process_count): + if i + 1 == process_count: + tmp_start = i * chunk_size + tmp_last = last - 1 + else: + tmp_start = i * chunk_size + tmp_last = (i+1) * chunk_size - 1 + + # execute in parallell + filename = OUTPUT_DIR + tmp_log["name"] + "_overlap_" + str(i) + ".tmp" + p = multiprocessing.Process(target=check_inclusion_by_submission, \ + args=(tmp_start, tmp_last, [tmp_log], dests, filename)) + p.start() + processes.append(p) + + for p in processes: + p.join() + # print "Done!" + + # merge results + with open(OUTPUT_DIR + tmp_log["name"] + "_overlap.log", "wb") as outfile: + for i in range(process_count): + filename = OUTPUT_DIR + tmp_log["name"] + "_overlap_" + str(i) + ".tmp" + # print filename + with open(filename, "rb") as infile: + outfile.write(infile.read()) + infile.close() + os.remove(filename) + outfile.close() - check_inclusion_by_submission(first, last, [tmp_log], dests) - # check_inclusion_all(first,last,source, dests) |