summaryrefslogtreecommitdiff
path: root/src/db/schema.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/db/schema.py')
-rw-r--r--src/db/schema.py135
1 files changed, 135 insertions, 0 deletions
diff --git a/src/db/schema.py b/src/db/schema.py
new file mode 100644
index 0000000..9bdf130
--- /dev/null
+++ b/src/db/schema.py
@@ -0,0 +1,135 @@
+import json
+import sys
+import traceback
+
+import jsonschema
+
+# fmt:off
+# NOTE: Commented out properties are left intentionally, so it is easier to see
+# what properties are optional.
+schema = {
+ "$schema": "http://json-schema.org/schema#",
+ "type": "object",
+ "properties": {
+ "document_version": {"type": "integer"},
+ "ip": {"type": "string"},
+ "port": {"type": "integer"},
+ "whois_description": {"type": "string"},
+ "asn": {"type": "string"},
+ "asn_country_code": {"type": "string"},
+ "ptr": {"type": "string"},
+ "abuse_mail": {"type": "string"},
+ "domain": {"type": "string"},
+ "timestamp_in_utc": {"type": "string"},
+ "display_name": {"type": "string"},
+ "description": {"type": "string"},
+ "custom_data": {
+ "type": "object",
+ "patternProperties": {
+ ".*": {
+ "type": "object",
+ "properties": {
+ "display_name": {"type": "string"},
+ "data": {"type": ["string", "boolean", "integer"]},
+ "description": {"type": "string"},
+ },
+ "required": [
+ "display_name",
+ "data",
+ # "description"
+ ]
+ },
+ },
+ },
+ "result": {
+ "type": "object",
+ "patternProperties": {
+ ".*": {
+ "type": "object",
+ "properties": {
+ "display_name": {"type": "string"},
+ "vulnerable": {"type": "boolean"},
+ "investigation_needed": {"type": "boolean"},
+ "reliability": {"type": "integer"},
+ "description": {"type": "string"},
+ },
+ "oneOf": [
+ {
+ "required": [
+ "display_name",
+ "vulnerable",
+ # "reliability", # TODO: reliability is required if vulnerable = true
+ # "description",
+ ]
+ },
+ {
+ "required": [
+ "display_name",
+ "investigation_needed",
+ # "reliability", # TODO: reliability is required if investigation_needed = true
+ # "description",
+ ]
+ },
+ ]
+ },
+ },
+ },
+ },
+ "required": [
+ "document_version",
+ "ip",
+ "port",
+ "whois_description",
+ "asn",
+ "asn_country_code",
+ "ptr",
+ "abuse_mail",
+ "domain",
+ "timestamp_in_utc",
+ "display_name",
+ # "description",
+ # "custom_data",
+ "result",
+ ],
+}
+# fmt:on
+
+
+def get_index_keys():
+ keys = list()
+ for key in schema["properties"]:
+ keys.append(key)
+ return keys
+
+
+def as_index_list():
+ index_list = list()
+ for key in schema["properties"]:
+ name = f"{key}-json-index"
+ index = {
+ "index": {
+ "fields": [
+ key,
+ ]
+ },
+ "name": name,
+ "type": "json"
+ }
+ index_list.append(index)
+
+ return index_list
+
+
+def validate_collector_data(json_blob):
+ try:
+ jsonschema.validate(json_blob, schema)
+ except jsonschema.exceptions.ValidationError as e:
+ return f"Validation failed with error: {e.message}"
+ return ""
+
+
+if __name__ == "__main__":
+ with open(sys.argv[1]) as fd:
+ json_data = json.loads(fd.read())
+
+ print(validate_collector_data(json_data))