summaryrefslogtreecommitdiff
path: root/tools/josef_experimental_auditor.py
blob: e8e7b523619362462e4c8ec91a57722504acd33a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
#!/usr/bin/python
# -*- coding: utf-8 -*-     

import time
import datetime
import base64
import argparse
import errno
from certtools import *

NAGIOS_OK = 0
NAGIOS_WARN = 1
NAGIOS_CRIT = 2
NAGIOS_UNKNOWN = 3

DEFAULT_CUR_FILE = 'all-sth.json'

base_urls = [
    # "https://plausible.ct.nordu.net/",
    # "https://ct1.digicert-ct.com/log/", 
    # "https://ct.izenpe.com/",
    # "https://log.certly.io/", 
    # "https://ct.googleapis.com/aviator/", 
    # "https://ct.googleapis.com/pilot/",
    # "https://ct.googleapis.com/rocketeer/",
    "https://ctlog.api.venafi.com/",
    "https://ct.ws.symantec.com/",
]

parser = argparse.ArgumentParser(description="")
parser.add_argument('--audit', action='store_true', help="run lightweight auditor verifying consistency in STH")
parser.add_argument('--monitor', action='store_true', help="run full monitoring for all logs")
parser.add_argument('--audit2', action='store_true', help="run medium-weight auditor verifying consistency in STH and inclusion proofs of new entries")
parser.add_argument('--audit3', action='store_true', help="continously run medium-weight auditor verifying consistency in STH and inclusion proofs of new entries")
parser.add_argument('--audit4', action='store_true', help="run one check on one server")
parser.add_argument('--build-sth', action='store_true', help="get all entries and construct STH")
parser.add_argument('--verify-index', default=None, help="Verify a specific index in all logs" )
parser.add_argument('--host', default=None, help="Base URL for CT log")
parser.add_argument('--roots', action='store_true', help="Check accepted root certificates for all logs" )
parser.add_argument('--cur-sth',
                    metavar='file',
                    default=DEFAULT_CUR_FILE,
                    help="File containing current STH (default=%s)" % DEFAULT_CUR_FILE)

timings = {}
errors = []

monitored_domains = [
    # "google.com",
    # "preishelden.de",
    # "liu.se",
    # "nordu.net",
    # "symantec.com",
    # "sunet.se",
    # ".se",
]

class UTC(datetime.tzinfo):
    def utcoffset(self, dt):
      return datetime.timedelta(hours=0)
    def dst(self, dt):
        return datetime.timedelta(0)

def reduce_layer(layer):
    new_layer = []
    while len(layer) > 1:
        e1 = layer.pop(0)
        e2 = layer.pop(0)
        new_layer.append(internal_hash((e1,e2)))
    return new_layer

def reduce_tree(entries, layers):
    if len(entries) == 0 and layers is []:
        return [[hashlib.sha256().digest()]]
  
    layer_idx = 0
    layers[layer_idx] += entries

    while len(layers[layer_idx]) > 1:
        if len(layers) == layer_idx + 1:
            layers.append([])

        layers[layer_idx + 1] += reduce_layer(layers[layer_idx]) 
        layer_idx += 1
    return layers

def reduce_subtree_to_root(layers):
    while len(layers) > 1:
        if len(layers[1]) == 0:
            layers[1] = layers[0]
        else:
            layers[1] += next_merkle_layer(layers[0])
        del layers[0]

    if len(layers[0]) > 1:
        return next_merkle_layer(layers[0])
    return layers[0]

def fetch_all_sth():
    sths = {}
    for base_url in base_urls:
        # Fetch STH
        try:
            sths[base_url] =  get_sth(base_url)
        except:
            sths[base_url] = None
            error_str = time.strftime('%H:%M:%S') + " ERROR: Failed to retrieve STH from " + base_url
            print error_str
            errors.append(error_str)
            continue

        # Check signature on the STH
        try:
            # check_sth_signature(base_url, sths[base_url], logkeys[base_url])
            check_sth_signature(base_url, sths[base_url], None)
        except:
            error_str = time.strftime('%H:%M:%S') + " ERROR: Could not verify signature from " + base_url
            print error_str
            errors.append(error_str)
            continue

    return sths

def verify_progress(old, new):
    print "Verifying progress"
    try:
        for url in new:
            if new and old and new[url] and old[url]:
                if new[url]["tree_size"] == old[url]["tree_size"]:
                    if old[url]["sha256_root_hash"] != new[url]["sha256_root_hash"]:
                        errors.append(time.strftime('%H:%M:%S') + " CRITICAL: root hash is different for same tree size in " + url)
                elif new[url]["tree_size"] < old[url]["tree_size"]:
                        errors.append(time.strftime('%H:%M:%S') + " CRITICAL: new tree smaller than previous tree (%d < %d)" % \
                          (new[url]["tree_size"], old[url]["tree_size"]))
            if new[url]:
                age = time.time() - new[url]["timestamp"]/1000
                sth_time = datetime.datetime.fromtimestamp(new[url]['timestamp'] / 1000, UTC()).strftime("%Y-%m-%d %H:%M:%S")
                roothash = new[url]['sha256_root_hash']
                if age > 24 * 3600:
                    errors.append(time.strftime('%H:%M:%S') + " CRITICAL: %s is older than 24h: %s UTC" % (url, sth_time))
                elif age > 12 * 3600:
                    errors.append(time.strftime('%H:%M:%S') + " WARNING: %s is older than 12h: %s UTC" % (url, sth_time))
                elif age > 6 * 3600:
                    errors.append(time.strftime('%H:%M:%S') + " WARNING: %s is older than 6h: %s UTC" % (url, sth_time))
                # elif age > 2 * 3600:
                #     errors.append(time.strftime('%H:%M:%S') + " WARNING: %s is older than 2h: %s UTC" % (url, sth_time))
    except:
        print time.strftime('%H:%M:%S') + " ERROR: Failed to verify progress for " + url


def verify_consistency(old, new):
    for url in old:
        try:
            if old[url] and new[url] and old[url]["tree_size"]!= new[url]["tree_size"]:
                consistency_proof = get_consistency_proof(url, old[url]["tree_size"], new[url]["tree_size"])
                decoded_consistency_proof = []
                for item in consistency_proof:
                    decoded_consistency_proof.append(base64.b64decode(item))
                res = verify_consistency_proof(decoded_consistency_proof, old[url]["tree_size"], new[url]["tree_size"], old[url]["sha256_root_hash"])
                
                if old[url]["sha256_root_hash"] != str(base64.b64encode(res[0])):
                    print time.strftime('%H:%M:%S') + " Verification of old hash failed! " + old[url]["sha256_root_hash"], str(base64.b64encode(res[0]))
                    errors.append(time.strftime('%H:%M:%S') + " ERROR: Failed to verify consistency for " + url + ", tree size " + old[url]["tree_size"])
                elif new[url]["sha256_root_hash"] != str(base64.b64encode(res[1])):
                    print time.strftime('%H:%M:%S') + " Verification of new hash failed! " + new[url]["sha256_root_hash"], str(base64.b64encode(res[1]))
                    errors.append(time.strftime('%H:%M:%S') + " ERROR: Failed to verify consistency for " + url + ", tree size " + new[url]["tree_size"])
                else:
                    print time.strftime("%H:%M:%S") + " New STH from " + url + ", timestamp: " + \
                        str(new[url]["timestamp"]) + ", size: " + str(new[url]["tree_size"]) + "...OK."

        except:
          print "ERROR: Could not verify consistency for " + url

def verify_inclusion_all(old, new):
    for url in old:
        try:
            if old[url] and new[url]:
                if old[url]["tree_size"]!= new[url]["tree_size"]:
                    entries = []

                    while len(entries) + old[url]["tree_size"]!= new[url]["tree_size"]:
                        entries += get_entries(url, str(int(old[url]["tree_size"]) + len(entries)), new[url]["tree_size"] -1)["entries"]
                        print "Got " + str(len(entries)) + " entries..."

                    success = True
                    for i in entries:
                        h = get_leaf_hash(base64.b64decode(i["leaf_input"]))
                        if not verify_inclusion_by_hash(url, h):
                            success = False

                    if success:
                        print time.strftime("%H:%M:%S") + " Verifying inclusion for " + str(len(entries)) + " new entries in " + url + " ...OK"
                    else:
                        print time.strftime('%H:%M:%S') + " ERROR: Failed to prove inclusion of all new entries in " + url
                        errors.append(time.strftime('%H:%M:%S') + " ERROR: Failed to prove inclusion of all new entries in " + url)
        except:
            print time.strftime('%H:%M:%S') + " ERROR: Failed to prove inclusion of all new entries in " + url
            errors.append(time.strftime('%H:%M:%S') + " ERROR: Failed to prove inclusion of all new entries in " + url)

def check_domain(raw_entry):
    orig_entry = extract_original_entry(raw_entry)
    cert_info = get_cert_info(orig_entry[0][0])
    for md in monitored_domains:
        if md in cert_info["subject"]:
            print md + " (" + cert_info["subject"].split("CN=")[1] + ") certifed by " + cert_info["issuer"]


def fetch_and_increment_subtree(old_sth, new_sth_in, subtree, base_url):
    try:
        sth = old_sth[base_url]
        new_sth = new_sth_in[base_url]
        idx = sth["tree_size"]

        # print time.strftime('%H:%M:%S') + " Getting all entries from " + base_url
        while idx < new_sth["tree_size"]:
            pre_size = idx
            entries = get_entries(base_url, idx, new_sth["tree_size"]-1)["entries"]
            
            new_leafs = []
            for item in entries:
                check_domain(item)
                new_leafs.append(get_leaf_hash(base64.b64decode(item["leaf_input"])))
            idx += len(new_leafs)
            print time.strftime('%H:%M:%S') + " Got entries " + str(pre_size) + " to " \
                + str(idx -1) + " (" + str(len(new_leafs)) +" entries) from " + base_url
            subtree = reduce_tree(new_leafs, subtree)

    except:
        print "Failed to build subtree :("

    return subtree 

def fetch_and_build_subtree(old_sth, base_url):
    try:
        sth = old_sth[base_url]
        subtree = [[]]
        idx = 0

        while idx < sth["tree_size"]:
            pre_size = idx
            entries = get_entries(base_url, idx, sth["tree_size"]-1)["entries"]
            new_leafs = []
            for item in entries:
                check_domain(item)
                new_leafs.append(get_leaf_hash(base64.b64decode(item["leaf_input"])))
            idx += len(new_leafs)
            print time.strftime('%H:%M:%S') + " Got entries " + str(pre_size) + " to " + str(idx) + " from " + base_url
            subtree = reduce_tree(new_leafs, subtree)

    except:
        print "Failed to build subtree :("

    return subtree


def verify_subtree(old_sth, subtree, base_url):
    try:
        sth = old_sth[base_url]
        root = base64.b64encode(reduce_subtree_to_root(subtree)[0])

        if root == sth["sha256_root_hash"]:
            print time.strftime('%H:%M:%S') + " Verifying root hashes for " + base_url + "...OK."
        else:
            print time.strftime('%H:%M:%S') + " ERROR: Failed to verify root hashes! STH root: " \
                + sth["sha256_root_hash"] + ", Tree root: " + root
            errors.append(time.strftime('%H:%M:%S') + " ERROR: Failed to verify root hash for " 
                + base_url + ", tre size " + sth["tree_size"])
    except:
        print time.strftime('%H:%M:%S') + " ERROR: Failed to build STH for " + base_url
        errors.append(time.strftime('%H:%M:%S') + " ERROR: Failed to build STH for " + base_url)

def verify_inclusion_by_hash(base_url, leaf_hash):
    try: 
        tmp_sth = get_sth(base_url)
        proof = get_proof_by_hash(base_url, leaf_hash, tmp_sth["tree_size"])

        decoded_inclusion_proof = []
        for item in proof["audit_path"]:
            decoded_inclusion_proof.append(base64.b64decode(item))
        
        root = base64.b64encode(verify_inclusion_proof(decoded_inclusion_proof, proof["leaf_index"], tmp_sth["tree_size"], leaf_hash))

        if tmp_sth["sha256_root_hash"] == root:
            return True
        else:
            print time.strftime('%H:%M:%S') + " ERROR: Could not prove inclusion for entry " + str(proof["leaf_index"]) + " in " + base_url
            errors.append(time.strftime('%H:%M:%S') + " ERROR: Could not prove inclusion for entry " + str(proof["leaf_index"]) + " in " + base_url)
            return False
    except:
        print time.strftime('%H:%M:%S') + " ERROR: Could not prove inclusion for hashed entry in " + base_url
        errors.append(time.strftime('%H:%M:%S') + " ERROR: Could not prove inclusion for hashed entry in " + base_url)
        return False

def verify_inclusion_by_index(base_url, index):
    try: 
        tmp_sth = get_sth(base_url)
        proof = get_proof_by_index(base_url, index, tmp_sth["tree_size"])

        decoded_inclusion_proof = []
        for item in proof["audit_path"]:
            decoded_inclusion_proof.append(base64.b64decode(item))

        root = base64.b64encode(verify_inclusion_proof(decoded_inclusion_proof, index, tmp_sth["tree_size"], get_leaf_hash(base64.b64decode(proof["leaf_input"]))))

        if tmp_sth["sha256_root_hash"] == root:
            print time.strftime('%H:%M:%S') + " Verifying inclusion for entry " + str(index) + " in " + base_url + "...OK."
        else:
            print time.strftime('%H:%M:%S') + " ERROR: Could not prove inclusion for entry " + str(index) + " in " + base_url
            errors.append(time.strftime('%H:%M:%S') + " ERROR: Could not prove inclusion for entry " + str(index) + " in " + base_url)
    except:
        print time.strftime('%H:%M:%S') + " ERROR: Could not prove inclusion for entry " + str(index) + " in " + base_url
        errors.append(time.strftime('%H:%M:%S') + " ERROR: Could not prove inclusion for entry " + str(index) + " in " + base_url)

def get_proof_by_index(baseurl, index, tree_size):
    try:
        params = urllib.urlencode({"leaf_index":index,
                                   "tree_size":tree_size})
        result = \
          urlopen(baseurl + "ct/v1/get-entry-and-proof?" + params).read()
        return json.loads(result)
    except urllib2.HTTPError, e:
        print "ERROR:", e.read()
        sys.exit(0)

def get_all_roots(base_url):
    result = urlopen(base_url + "ct/v1/get-roots").read()
    certs = json.loads(result)["certificates"]
    print time.strftime('%H:%M:%S') + " Received " + str(len(certs)) + " certs from " + base_url

    for accepted_cert in certs:
        subject = get_cert_info(base64.decodestring(accepted_cert))["subject"]
        issuer = get_cert_info(base64.decodestring(accepted_cert))["issuer"]
        if subject == issuer:
            root_cert = base64.decodestring(accepted_cert)
            print get_cert_info(root_cert)["subject"]

def print_errors(errors):
    print "Encountered " + str(len(errors)) + " errors:"
    for item in errors:
        print item

def print_timings(timings):
    for item in timings:
        m,s = divmod(timings[item]["longest"], 60)
        h,m = divmod(m, 60)
        print item + " last seen " + datetime.datetime.fromtimestamp(int(timings[item]["last"])/1000).strftime('%Y-%m-%d %H:%M:%S') \
            + " longest between two STH: " + str(int(h)) + "h " + str(int(m)) + "m "# + str(int(s)) + "s."

def read_sth(fn):
    try:
        f = open(fn)
    except IOError, e:
        if e.errno == errno.ENOENT:
            return None
        raise e
    return json.loads(f.read())

def write_file(fn, sth):
    tempname = fn + ".new"
    open(tempname, 'w').write(json.dumps(sth))
    mv_file(tempname, fn)

def main(args):
    
    # print time.strftime("%H:%M:%S") + " Starting..."
    if args.verify_index is None and not args.build_sth and not args.audit and not args.audit2 \
        and not args.audit3 and not args.audit4 and not args.roots and not args.monitor:
        
        print time.strftime('%H:%M:%S') + " Nothing to do."
        return
    elif args.audit4:
        pass
    else:
        sth = fetch_all_sth()

    if args.verify_index is not None:
        for url in base_urls:
            verify_inclusion_by_index(url, int(args.verify_index))

    if args.roots:
        print time.strftime('%H:%M:%S') + " Getting accepted Root Certs from all logs..."
        for url in base_urls:
            get_all_roots(url)


    if args.build_sth:
        print time.strftime('%H:%M:%S') + " Building trees from entries. This may take a while, go get coffee or something..."
        for base_url in base_urls:
            subtree = fetch_and_build_subtree(sth, base_url)
            verify_subtree(sth, subtree, base_url)
        # fetch_and_build_tree(sth, base_urls[2])

    if args.audit:
        print time.strftime('%H:%M:%S') + " Running auditor1 for " +str(len(base_urls)) + " logs..."
        old_sth = read_sth(args.cur_sth)
        if old_sth:
            verify_consistency(old_sth, sth)
        else: 
            print "No old sth found..."
        write_file(args.cur_sth, sth)


    if args.audit3:
        print time.strftime('%H:%M:%S') + " Running auditor3 for " +str(len(base_urls)) + " logs..."
        while True:
            time.sleep(30)
            new_sth = fetch_all_sth()
            verify_consistency(sth, new_sth)
            verify_inclusion_all(sth, new_sth)
            sth = new_sth

    if args.audit2:
        print time.strftime('%H:%M:%S') + " Running auditor2 for " +str(len(base_urls)) + " logs..."
        old_sth = read_sth(args.cur_sth)
            # print "Verifying progress..."
        verify_progress(old_sth, sth)
        if old_sth:
            print "Verifying consistency..."
            verify_consistency(old_sth, sth)
            print "Verifying inclusion..."
            verify_inclusion_all(old_sth, sth)
        write_file(args.cur_sth, sth)
    
    # Experimental for plausible + nagios
    if args.audit4:
        base_url = base_urls[0]
        old_sth = read_sth("plausible-sth.json")
        print "Running auditor4 for " + base_url
        try:
            tmp_sth =  get_sth(base_url)
        except:
            error_str = time.strftime('%H:%M:%S') + " ERROR: Failed to retrieve STH from " + base_url
            print error_str
            errors.append(error_str)
            sys.exit(NAGIOS_WARN)

        # Check signature on the STH
        try:
            check_sth_signature(base_url, tmp_sth, None)
            write_file("plausible-sth.json", tmp_sth)
        except:
            error_str = time.strftime('%H:%M:%S') + " ERROR: Could not verify signature from " + base_url
            print error_str
            errors.append(error_str)
            sys.exit(NAGIOS_CRIT)
        sys.exit(NAGIOS_OK)

    if args.monitor:
        # Run for one log only
        all_subtrees = {}
        print time.strftime('%H:%M:%S') + " Building trees from entries. This may take a while, go get coffee or something..."
        for url in base_urls:
            sth[url]["tree_size"] -= 10
            sth[url]["sha256_root_hash"] = "na"
            all_subtrees[url] = fetch_and_build_subtree(sth, url)
            # verify_subtree(sth, all_subtrees[url], url)

        
        while True:
            time.sleep(30)
            new_sth = fetch_all_sth()
            for url in base_urls:
                if sth[url]["tree_size"] != new_sth[url]["tree_size"]:
                    all_subtrees[url] = fetch_and_increment_subtree(sth, new_sth, all_subtrees[url], url)
                    verify_subtree(new_sth, all_subtrees[url], url)
            sth = new_sth


if __name__ == '__main__':
    # try:
        main(parser.parse_args())
        if len(errors) == 0:
            print time.strftime('%H:%M:%S') + " Everything OK."
            sys.exit(NAGIOS_OK)
        else:
            # print "errors found!"
            print_errors(errors)
            sys.exit(NAGIOS_WARN)
    # except:
    #     pass
    # finally:
    #     # print_timings(timings)
    #     print_errors(errors)