diff options
| author | josef <josef.gson@gmail.com> | 2015-10-20 10:16:50 +0200 | 
|---|---|---|
| committer | josef <josef.gson@gmail.com> | 2015-10-20 10:16:50 +0200 | 
| commit | cd218c70e3eabaeb5b9a9b6d26adb6a93929713b (patch) | |
| tree | 41e20d8a8593bc4a8310d7829a8c2b0700633229 | |
| parent | 833f64c2621b8cef3ec350d530541498d7173dbc (diff) | |
reading log size history from log files
| -rwxr-xr-x | monitor/josef_logreader.py | 65 | ||||
| -rwxr-xr-x | monitor/josef_mover.py | 12 | 
2 files changed, 70 insertions, 7 deletions
| diff --git a/monitor/josef_logreader.py b/monitor/josef_logreader.py index 2b3a473..868ab00 100755 --- a/monitor/josef_logreader.py +++ b/monitor/josef_logreader.py @@ -99,11 +99,21 @@ def print_errors(l):  def print_overlap(l):      log = read_loglog(l) +    source = log[0].split("[")[0].split()[-1] +    print "\nOverlaps in " + source + ":"      entry_count = {} +    metadata = {"unique":0, "valid":0}      total = 0      for line in log:          # line = item[TIME_LEN:]          logs = json.loads(line.split("logs: ")[-1][:-1].replace("'", '"')) +        if not source in logs: # don't check entries that were submitted after measurement start +            break +        if len(logs) == 1: +            metadata["unique"] += 1 +        if "pilot" in logs or "rocketeer" in logs or "aviator" in logs: +            metadata["valid"] += 1 +          for l in logs:              if l in entry_count:                  entry_count[l] += 1 @@ -113,7 +123,54 @@ def print_overlap(l):      # print entry_count      for e in entry_count: -        print e + ", " + str(entry_count[e]) + " (" + str(int(100 * float(entry_count[e])/float(total))) + "%)" +        if e != "plausible": +            print e + ", " + str(entry_count[e]) + " (" + str(int(100 * float(entry_count[e])/float(total))) + "%)" + +    print "" +    for e in metadata: +        print e + ", " + str(metadata[e]) + " (" + str(int(100 * float(metadata[e])/float(total))) + "%)" + +def write_history_data(l): +    log = read_loglog(l) +    data = {} +    start_date = datetime.date(2015, 9, 29) +    end_date = datetime.date(2015, 10, 20) + +    # initialize with all dates +    for single_date in daterange(start_date, end_date): +        data[single_date.strftime("%Y-%m-%d")] = 0 + +    # parse log data +    for line in log: +        date = line[:TIME_LEN -10] +        try: +            size = line.split("Size: ")[1].split(",")[0] +            data[date] = size +        except: +            pass + +    # put in list and sort +    data_list = [] +    for item in data: +        data_list.append((item, data[item])) +    data_list.sort(key=lambda tup: tup[0]) + +    # fill out missing data points and print +    prev_val = 0 +    for item in data_list: +        if item[1] > prev_val: +            val = item[1] +        else: +            val = prev_val +        prev_val = val +        print item[0], val + + + +def daterange(start_date, end_date): +    for n in range(int ((end_date - start_date).days)): +        yield start_date + datetime.timedelta(n) +  if __name__ == "__main__":      OVERLAP_STR = "_overlap.log" @@ -122,13 +179,15 @@ if __name__ == "__main__":          if log == "monitor.log":              pass          elif log[- len(OVERLAP_STR):] == OVERLAP_STR: -            print_overlap(log) -        # else: +            # print_overlap(log) +            pass +        else:          #     print log          #     print_log_stats(log)          #     print_average_age(log)          #     print_errors(log)          #     print "" +            write_history_data(log) diff --git a/monitor/josef_mover.py b/monitor/josef_mover.py index ac79448..5bf5a7a 100755 --- a/monitor/josef_mover.py +++ b/monitor/josef_mover.py @@ -127,9 +127,13 @@ def check_inclusion_by_submission(first, last, source, dest):      # print entries      for s_log in source:          try: -            entries = get_entries(s_log["url"], first, last)["entries"] +            entries = [] +            while len(entries) < last - first: +                print "Getting " + str(first + len(entries)) + " to " + str(last) +                entries += get_entries(s_log["url"], first + len(entries), last)["entries"] +                # print "Fetched entries up to " + str(len(first + len(entries)))          except: -            log("Failed to get entries from " + s_log["name"]) +            print "Failed to get entries from " + s_log["name"]          # print "\n\nSource: " + s_log["name"] + "\n"          for i in range(len(entries)): @@ -177,7 +181,7 @@ def log(fn, string):          f.close()  if __name__ == "__main__": -    source = [CTLOGS[9]] +    source = [CTLOGS[7]]      dests = CTLOGS      # source = ctlogs      # dests = ctlogs @@ -185,7 +189,7 @@ if __name__ == "__main__":          sth = get_sth(tmp_log["url"]) -        first = 0 +        first = 1654          last = int(sth["tree_size"]) - 1          # print last | 
