diff options
-rw-r--r-- | src/README.md | 62 | ||||
-rwxr-xr-x | src/db.py | 171 | ||||
-rwxr-xr-x | src/store.py | 62 | ||||
-rwxr-xr-x | src/wsgi.py | 120 |
4 files changed, 415 insertions, 0 deletions
diff --git a/src/README.md b/src/README.md new file mode 100644 index 0000000..dc5b771 --- /dev/null +++ b/src/README.md @@ -0,0 +1,62 @@ +# soc_collector -- Gathering vulnerability information and presenting it + +## The oneliner + +The collector answers the fundamental question constantly posed by all +SOC personnel ever: Can I go for lunch now? + +## The elevator pitch + +Your're working as a Security Operations Center engineer and your job +is to, one, know when any part of your infrastructure is vulnerable +and, two, if it is, do something smart about it. + +The collector compiles data from vulnerability scanners and stores it +in a database. You query the collector for the current vulnerability +status of your network infrastructure. + +Without a summary of your vulnerability status and ability to quickly +deepen your knowledge of a given system, your chances of ever eating +lunch with a clear conscience are slim. + +## The user interface + +TODO + +## The gory^Wtechnical details + +TODO + +## The name + +The "soc" part means Security Operations Center. + +The "collector" part is correct but misleading since `soc_collector` +also processes and presents. + +## The license + +This code is licensed under the 2-Clause BSD License, see LICENSE for +the full text. + +## How to test it out + +The collector has been tested on Debian 10 (Buster). Other Unix +systems should be possible to host it as well. + +On a Debian system, first install the dependencies. + + sudo apt install python3 python3-pip python3-leveldb + pip3 install falcon + +Start the demo HTTP server. + + python3 src/wsgi.py + +Submit some data. + + echo '[{"a":"räksmörgås","domain":"foo.se"},{"bar":"Baz"}]' | curl -s -u admin:admin --data-binary @- http://localhost:8000/sc/v0/add + +Get the same data back. + + curl -s -u foo.se: http://localhost:8000/sc/v0/get | json_pp -json_opt utf8,pretty diff --git a/src/db.py b/src/db.py new file mode 100755 index 0000000..4702def --- /dev/null +++ b/src/db.py @@ -0,0 +1,171 @@ +#! /usr/bin/env python3 + +# A database storing dictionaries, keyed on a timestamp. + +# key = 8 octets timestamp | 1 octet version +# struct.pack('!dB', time.time(), 0) + +# value = A dict which will be stored as a JSON object encoded in +# UTF-8. Note that dict keys of type integer or float will become +# strings while values will keep their type. + +# Note that there's a (slim) chance that you'd stomp on the previous +# value if you're too quick with generating the timestamps, ie +# invoking time.time() several times quickly enough. + +from store import KVStore +import json +from pathlib import PurePath +import time +import struct +import os + +class DictDB(KVStore): + VERSION = 0 + + # TODO: implement indexes + # TODO: implement search(dict key) for indexed fields + + def __init__(self, name, basedir='.'): + super().__init__(name, basedir) + self._ts = time.time() + self._index = {} + + def unique_key(self): + ts = time.time() + while ts == self._ts: + ts = time.time() + self._ts = ts + return struct.pack('!dB', ts, DictDB.VERSION) + + def index_add(self, path): + name = PurePath(path).name + self._index[name] = DBIndex(path) + + def add(self, data, batch_write=False): + if type(data) is list: + ret = [] + if batch_write: # Supposedly makes the update atomic. + batch = self.batch() + for e in data: + ret += [self.add(e)] + if batch_write: + self.write(batch) + return ret + else: + key = self.unique_key() + json_data = json.dumps(data).encode('UTF-8') + self.put(key, json_data) + return key + + def get(self, key): + enc = super().get(key) # name collision, specify super class + ret = json.loads(enc.decode('UTF-8')) + return ret + + def slice(self, key_from=None, key_to=None): + ret = [] + for key_raw, val_raw in list(self.range_iter(key_from=key_from, key_to=key_to)): + (key, ver) = struct.unpack('!dB', key_raw) + if ver == DictDB.VERSION: + val = json.loads(val_raw.decode('UTF-8')) + ret.append((key, val)) + return ret + + # Search top level dict for objects with a name matching DICT_NAME + # and optionally value DICT_VAL. + def search(self, dict_name, dict_val=None): + res = [] + for key, obj in self.slice(): + if dict_name in obj: + if dict_val is None: + res.append((key, obj)) + elif dict_val == obj[dict_name]: + res.append((key, obj)) + return res + + def timestamp_from_key(key): + ts, _ = struct.unpack('!dB', key) + return ts + +class DBIndex(KVStore): + def __init__(self, name, basedir='.'): + super().__init__(name, basedir) + +# def update_or_create(self, key, val): +# curval = [] +# try: +# curval = self._db.Get(key) +# except KeyError: +# self._db.Put(key, [val]) +# return + +# if curval is list: +# self._db.Put(key, curval + [val]) +# else: +# self._db.Put(key, json.dumps([curval, val])) + + # def index_full(self, name): + # kv = self._index[name]) + # for key, val in self.range_iter(): + # valdict = json.loads(val) + # field = valdict.get(name) + # if field: + # ix = kv.get(key) + # if ix: + # kv.put(ix + [key]) + # else: + # kv.put([key]) + +if __name__ == '__main__': + DBDIR = 'test_db.db' + # TODO: rm -r DBDIR + db = DictDB('db', basedir = DBDIR) + #ix = DBIndex('foo', basedir = DBDIR) + + key = db.add({'foo': 'Bar'}) + assert(db.get(key) == {'foo': 'Bar'}) + + key = db.add({0: 'Foo'}) # NOTE: int keys become strings + assert(db.get(key) == {'0': 'Foo'}) + + d = {'4711': 'Large number', '7': 'Small number', '0': 'Bar'} + key = db.add(d) + res = db.get(key) + assert(dict_eq(d, res)) + + key = db.add({'an int': 0}) + assert(db.get(key) == {'an int': 0}) + + key = db.add({'a float': 1.1}) + assert(db.get(key) == {'a float': 1.1}) + + # TODO: verify slice() too + for key, val in db.slice(): + print(key, val) + + res = db.search('an int') + assert(dict_eq(res[0][1], {'an int': 0})) + + res = db.search('0') + assert(dict_eq(res[0][1], {'0': 'Foo'})) + assert(dict_eq(res[1][1], d)) + + res = db.search('7', dict_val = 'Small number') # FIXME: verify better -- do we hit only '7' here, f.ex.? + assert('7' in res[0][1]) + + res = db.search('7', dict_val = 'not matching') + assert(not res) + + N = 10 * 1000 # 10k takes ~0.2s. + data = [{str(x): x} for x in range(N)] + keys = db.add(data, batch_write = False) + assert(len(keys) == N) + for k in range(len(keys)): + assert(db.get(keys[k]) == data[k]) + + +from operator import itemgetter +def dict_eq(a, b): + sort_on_key = lambda d: sorted(d.items(), key=itemgetter(0)) + return sort_on_key(a) == sort_on_key(b) diff --git a/src/store.py b/src/store.py new file mode 100755 index 0000000..bc2c4ff --- /dev/null +++ b/src/store.py @@ -0,0 +1,62 @@ +#! /usr/bin/env python3 + +from leveldb import LevelDB, WriteBatch +import time +from pathlib import PurePath +import os + +class KVStore: + """Wraps a Python wrapper for LevelDB in case we want to change wrapper + """ + def __init__(self, name, basedir='.'): + os.makedirs(basedir, exist_ok = True) + path = str(PurePath(basedir).joinpath(name)) + self._db = LevelDB(path) + + def get(self, key): + try: + val = self._db.Get(key) + except KeyError: + val = None # You can thus not store None! + return val + + def put(self, key, val): + self._db.Put(key, val) + + def delete(self, key): + self._db.Delete(key) + + def range_iter(self, key_from=None, key_to=None): + return self._db.RangeIter(key_from=key_from, key_to=key_to) + + def batch(self): + return WriteBatch() + + def write(self, batch): + self._db.Write(batch, sync=True) + + def timestamp_asc(): + return str(time.time()).encode('ascii') + + +def test_store(name): + # TODO: rm -r name + db = KVStore(name) + + ts0 = KVStore.timestamp_asc() + db.put(ts0, b'Bar') + assert(db.get(ts0) == b'Bar') + + ts1 = KVStore.timestamp_asc() + db.put(ts1, b'Foo') + assert(db.get(ts1) == b'Foo') + + assert(list(db.range_iter()) == [(ts0, b'Bar'), (ts1, b'Foo')]) + + db.delete(ts0) + assert(db.get(ts0) is None) + + return db + +if __name__ == '__main__': + test_store('test_store.db') diff --git a/src/wsgi.py b/src/wsgi.py new file mode 100755 index 0000000..1eda9de --- /dev/null +++ b/src/wsgi.py @@ -0,0 +1,120 @@ +#! /usr/bin/env python3 + +import sys +from wsgiref.simple_server import make_server +import falcon +import json +from db import DictDB +import time +from base64 import b64decode + +class CollectorResource(): + def __init__(self, db): + self._db = db + + def parse_error(data): + return "I want valid JSON but got this:\n{}\n".format(data) + + def user_authn(self, auth_header, authfun): + if not auth_header: + return None # Fail. + BAlit, b64 = auth_header.split() + if BAlit != "Basic": + return None # Fail + userbytes, pwbytes = b64decode(b64).split(b':') + try: + user = userbytes.decode('ascii') + except: + return None # Fail + if authfun(user, pwbytes): + return user # Success. + return None # Fail. + + +class EPGet(CollectorResource): + def on_get(self, req, resp): + resp.status = falcon.HTTP_200 + resp.content_type = falcon.MEDIA_JSON + out = [] + + userid = self.user_authn(req.auth, lambda user,_pw: user is not None) + if not userid: + resp.status = falcon.HTTP_401 + resp.text = 'Invalid user or password\n' + return + + out = [{time.ctime(key): dict} for (key, dict) in self._db.search('domain', dict_val=userid)] + + resp.text = json.dumps(out) + '\n' + + +class EPAdd(CollectorResource): + def on_post(self, req, resp): + resp.status = falcon.HTTP_200 + resp.content_type = falcon.MEDIA_TEXT + self._indata = [] + + if self.user_authn(req.auth, + lambda u,p: u == 'admin' and p == b'admin') is None: + resp.status = falcon.HTTP_401 + resp.text = 'Invalid user or password\n' + return + + # TODO: can we do json.load(req.bounded_stream, + # cls=customDecoder) where our decoder calls JSONDecoder after + # decoding UTF-8? + + # NOTE: Reading the whole body in one go instead of streaming + # it nicely. + rawin = req.bounded_stream.read() + try: + decodedin = rawin.decode('UTF-8') + except: + resp.status = falcon.HTTP_400 + resp.text = 'Need UTF-8\n' + return + + try: + keys = json.loads(decodedin, object_hook=self.handle_data) + except TypeError: + print('DEBUG: type error') + resp.status = falcon.HTTP_400 + resp.text = CollectorResource.parse_error(decodedin) + return + except json.decoder.JSONDecodeError: + print('DEBUG: json decode error') + resp.status = falcon.HTTP_400 + resp.text = CollectorResource.parse_error(decodedin) + return + + resp.text = '' + for key in keys: + resp.text += repr(key) + '\n' + + def handle_data(self, data): + return self._db.add(data) # return key + + +def init(url_res_map, addr = '', port = 8000): + app = falcon.App() + for url, res in url_res_map: + app.add_route(url, res) + + return make_server(addr, port, app) + + +def main(): + # Simple demo. + # Try adding some observations, basic auth admin:admin, and + # include {"domain": "foo.se"} in some of them. + # Try retreiving all observations for user 'foo.se' (basic auth + # foo.se:whatever). + + db = DictDB('wsgi_demo.db') + httpd = init([('/sc/v0/add', EPAdd(db)), + ('/sc/v0/get', EPGet(db))]) + print('Serving on port 8000...') + httpd.serve_forever() + +if __name__ == '__main__': + sys.exit(main()) |