summaryrefslogtreecommitdiff
path: root/src/db
diff options
context:
space:
mode:
Diffstat (limited to 'src/db')
-rwxr-xr-xsrc/db/dictionary.py11
-rw-r--r--src/db/schema.py154
-rw-r--r--src/db/sql.py6
3 files changed, 119 insertions, 52 deletions
diff --git a/src/db/dictionary.py b/src/db/dictionary.py
index 15f8f88..f0f5fe9 100755
--- a/src/db/dictionary.py
+++ b/src/db/dictionary.py
@@ -11,7 +11,8 @@ import os
import sys
import time
-from db import couch, index
+from db import couch
+from db.schema import as_index_list, validate_collector_data
class DictDB():
@@ -46,7 +47,7 @@ class DictDB():
print("Creating database and indexes.")
self.couchdb = self.server.create(self.database)
- for i in index.as_list():
+ for i in as_index_list():
self.couchdb.index(i)
self._ts = time.time()
@@ -71,9 +72,15 @@ class DictDB():
if type(data) is list:
for item in data:
+ error = validate_collector_data(item)
+ if error != "":
+ return error
item['_id'] = str(self.unique_key())
ret = self.couchdb.save_bulk(data)
else:
+ error = validate_collector_data(data)
+ if error != "":
+ return error
data['_id'] = str(self.unique_key())
ret = self.couchdb.save(data)
diff --git a/src/db/schema.py b/src/db/schema.py
index 37da5aa..9bdf130 100644
--- a/src/db/schema.py
+++ b/src/db/schema.py
@@ -1,75 +1,135 @@
import json
+import sys
+import traceback
import jsonschema
+# fmt:off
+# NOTE: Commented out properties are left intentionally, so it is easier to see
+# what properties are optional.
schema = {
"$schema": "http://json-schema.org/schema#",
"type": "object",
"properties": {
- "document_version": {
- "type": "integer"
- },
- "ip": {
- "type": "string"
- },
- "port": {
- "type": "integer"
- },
- "whois_description": {
- "type": "string"
- },
- "asn": {
- "type": "string"
- },
- "asn_country_code": {
- "type": "string"
- },
- "ptr": {
- "type": "string"
- },
- "abuse_mail": {
- "type": "string"
- },
- "domain": {
- "type": "string"
- },
- "timestamp_in_utc": {
- "type": "string"
+ "document_version": {"type": "integer"},
+ "ip": {"type": "string"},
+ "port": {"type": "integer"},
+ "whois_description": {"type": "string"},
+ "asn": {"type": "string"},
+ "asn_country_code": {"type": "string"},
+ "ptr": {"type": "string"},
+ "abuse_mail": {"type": "string"},
+ "domain": {"type": "string"},
+ "timestamp_in_utc": {"type": "string"},
+ "display_name": {"type": "string"},
+ "description": {"type": "string"},
+ "custom_data": {
+ "type": "object",
+ "patternProperties": {
+ ".*": {
+ "type": "object",
+ "properties": {
+ "display_name": {"type": "string"},
+ "data": {"type": ["string", "boolean", "integer"]},
+ "description": {"type": "string"},
+ },
+ "required": [
+ "display_name",
+ "data",
+ # "description"
+ ]
+ },
+ },
},
- "user_presentation": {
+ "result": {
"type": "object",
- "properties": {
- "description": {
- "type": "string"
+ "patternProperties": {
+ ".*": {
+ "type": "object",
+ "properties": {
+ "display_name": {"type": "string"},
+ "vulnerable": {"type": "boolean"},
+ "investigation_needed": {"type": "boolean"},
+ "reliability": {"type": "integer"},
+ "description": {"type": "string"},
+ },
+ "oneOf": [
+ {
+ "required": [
+ "display_name",
+ "vulnerable",
+ # "reliability", # TODO: reliability is required if vulnerable = true
+ # "description",
+ ]
+ },
+ {
+ "required": [
+ "display_name",
+ "investigation_needed",
+ # "reliability", # TODO: reliability is required if investigation_needed = true
+ # "description",
+ ]
+ },
+ ]
},
- "data": {
- "type": "object"
- }
- }
- }
+ },
+ },
},
"required": [
"document_version",
"ip",
"port",
+ "whois_description",
+ "asn",
+ "asn_country_code",
+ "ptr",
+ "abuse_mail",
+ "domain",
"timestamp_in_utc",
- "user_presentation"
- ]
+ "display_name",
+ # "description",
+ # "custom_data",
+ "result",
+ ],
}
+# fmt:on
+
+
+def get_index_keys():
+ keys = list()
+ for key in schema["properties"]:
+ keys.append(key)
+ return keys
+
+
+def as_index_list():
+ index_list = list()
+ for key in schema["properties"]:
+ name = f"{key}-json-index"
+ index = {
+ "index": {
+ "fields": [
+ key,
+ ]
+ },
+ "name": name,
+ "type": "json"
+ }
+ index_list.append(index)
+
+ return index_list
def validate_collector_data(json_blob):
try:
jsonschema.validate(json_blob, schema)
except jsonschema.exceptions.ValidationError as e:
- print(f'Validation failed with error: {e}')
- return False
-
- return True
+ return f"Validation failed with error: {e.message}"
+ return ""
-if __name__ == '__main__':
- with open('example_data.json') as fd:
+if __name__ == "__main__":
+ with open(sys.argv[1]) as fd:
json_data = json.loads(fd.read())
- validate_collector_data(json_data)
+ print(validate_collector_data(json_data))
diff --git a/src/db/sql.py b/src/db/sql.py
index fc20e36..c47a69c 100644
--- a/src/db/sql.py
+++ b/src/db/sql.py
@@ -1,9 +1,9 @@
import datetime
+import os
import sys
from contextlib import contextmanager
-import os
-from sqlalchemy import (Boolean, Column, Date, Integer, Serial, String, Text,
+from sqlalchemy import (Boolean, Column, Date, Integer, String, Text,
create_engine, text)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
@@ -15,7 +15,7 @@ metadata = Base.metadata
class Log(Base):
__tablename__ = "log"
- id = Column(Serial, primary_key=True)
+ id = Column(Integer, primary_key=True)
timestamp = Column(Date, nullable=False,
default=datetime.datetime.utcnow)
username = Column(Text, nullable=False)