diff options
Diffstat (limited to 'monitor')
-rwxr-xr-x | monitor/josef_logreader.py | 65 | ||||
-rwxr-xr-x | monitor/josef_mover.py | 12 |
2 files changed, 70 insertions, 7 deletions
diff --git a/monitor/josef_logreader.py b/monitor/josef_logreader.py index 2b3a473..868ab00 100755 --- a/monitor/josef_logreader.py +++ b/monitor/josef_logreader.py @@ -99,11 +99,21 @@ def print_errors(l): def print_overlap(l): log = read_loglog(l) + source = log[0].split("[")[0].split()[-1] + print "\nOverlaps in " + source + ":" entry_count = {} + metadata = {"unique":0, "valid":0} total = 0 for line in log: # line = item[TIME_LEN:] logs = json.loads(line.split("logs: ")[-1][:-1].replace("'", '"')) + if not source in logs: # don't check entries that were submitted after measurement start + break + if len(logs) == 1: + metadata["unique"] += 1 + if "pilot" in logs or "rocketeer" in logs or "aviator" in logs: + metadata["valid"] += 1 + for l in logs: if l in entry_count: entry_count[l] += 1 @@ -113,7 +123,54 @@ def print_overlap(l): # print entry_count for e in entry_count: - print e + ", " + str(entry_count[e]) + " (" + str(int(100 * float(entry_count[e])/float(total))) + "%)" + if e != "plausible": + print e + ", " + str(entry_count[e]) + " (" + str(int(100 * float(entry_count[e])/float(total))) + "%)" + + print "" + for e in metadata: + print e + ", " + str(metadata[e]) + " (" + str(int(100 * float(metadata[e])/float(total))) + "%)" + +def write_history_data(l): + log = read_loglog(l) + data = {} + start_date = datetime.date(2015, 9, 29) + end_date = datetime.date(2015, 10, 20) + + # initialize with all dates + for single_date in daterange(start_date, end_date): + data[single_date.strftime("%Y-%m-%d")] = 0 + + # parse log data + for line in log: + date = line[:TIME_LEN -10] + try: + size = line.split("Size: ")[1].split(",")[0] + data[date] = size + except: + pass + + # put in list and sort + data_list = [] + for item in data: + data_list.append((item, data[item])) + data_list.sort(key=lambda tup: tup[0]) + + # fill out missing data points and print + prev_val = 0 + for item in data_list: + if item[1] > prev_val: + val = item[1] + else: + val = prev_val + prev_val = val + print item[0], val + + + +def daterange(start_date, end_date): + for n in range(int ((end_date - start_date).days)): + yield start_date + datetime.timedelta(n) + if __name__ == "__main__": OVERLAP_STR = "_overlap.log" @@ -122,13 +179,15 @@ if __name__ == "__main__": if log == "monitor.log": pass elif log[- len(OVERLAP_STR):] == OVERLAP_STR: - print_overlap(log) - # else: + # print_overlap(log) + pass + else: # print log # print_log_stats(log) # print_average_age(log) # print_errors(log) # print "" + write_history_data(log) diff --git a/monitor/josef_mover.py b/monitor/josef_mover.py index ac79448..5bf5a7a 100755 --- a/monitor/josef_mover.py +++ b/monitor/josef_mover.py @@ -127,9 +127,13 @@ def check_inclusion_by_submission(first, last, source, dest): # print entries for s_log in source: try: - entries = get_entries(s_log["url"], first, last)["entries"] + entries = [] + while len(entries) < last - first: + print "Getting " + str(first + len(entries)) + " to " + str(last) + entries += get_entries(s_log["url"], first + len(entries), last)["entries"] + # print "Fetched entries up to " + str(len(first + len(entries))) except: - log("Failed to get entries from " + s_log["name"]) + print "Failed to get entries from " + s_log["name"] # print "\n\nSource: " + s_log["name"] + "\n" for i in range(len(entries)): @@ -177,7 +181,7 @@ def log(fn, string): f.close() if __name__ == "__main__": - source = [CTLOGS[9]] + source = [CTLOGS[7]] dests = CTLOGS # source = ctlogs # dests = ctlogs @@ -185,7 +189,7 @@ if __name__ == "__main__": sth = get_sth(tmp_log["url"]) - first = 0 + first = 1654 last = int(sth["tree_size"]) - 1 # print last |