From 83f25ea5cafb075474b7379eac0aa612b710e1a6 Mon Sep 17 00:00:00 2001 From: Ernst Widerberg Date: Mon, 11 Apr 2022 16:14:47 +0200 Subject: Update example_data + schema --- src/schema.py | 109 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/schema.py b/src/schema.py index 37da5aa..7e106d1 100644 --- a/src/schema.py +++ b/src/schema.py @@ -1,75 +1,98 @@ import json +import sys import jsonschema +# fmt:off +# NOTE: Commented out properties are left intentionally, so it is easier to see +# what properties are optional. schema = { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": { - "document_version": { - "type": "integer" - }, - "ip": { - "type": "string" - }, - "port": { - "type": "integer" - }, - "whois_description": { - "type": "string" - }, - "asn": { - "type": "string" - }, - "asn_country_code": { - "type": "string" - }, - "ptr": { - "type": "string" - }, - "abuse_mail": { - "type": "string" - }, - "domain": { - "type": "string" - }, - "timestamp_in_utc": { - "type": "string" + "document_version": {"type": "integer"}, + "ip": {"type": "string"}, + "port": {"type": "integer"}, + "whois_description": {"type": "string"}, + "asn": {"type": "string"}, + "asn_country_code": {"type": "string"}, + "ptr": {"type": "string"}, + "abuse_mail": {"type": "string"}, + "domain": {"type": "string"}, + "timestamp_in_utc": {"type": "string"}, + "system_name": {"type": "string"}, + "description": {"type": "string"}, + "custom_data": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "display_name": {"type": "string"}, + "data": {"type": ["string", "boolean", "integer"]}, + "description": {"type": "string"}, + }, + "required": [ + "display_name", + "data", + # "description" + ] + }, + }, }, - "user_presentation": { + "result": { "type": "object", - "properties": { - "description": { - "type": "string" + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "display_name": {"type": "string"}, + "vulnerable": {"type": "boolean"}, + "reliability": {"type": "integer"}, + "description": {"type": "string"}, + }, + "required": [ + "display_name", + "vulnerable", + # "reliability", # TODO: reliability is required if vulnerable = true + # "description", + ] }, - "data": { - "type": "object" - } - } - } + }, + }, }, "required": [ "document_version", "ip", "port", + "whois_description", + "asn", + "asn_country_code", + "ptr", + "abuse_mail", + "domain", "timestamp_in_utc", - "user_presentation" - ] + "system_name", + # "description", + # "custom_data", + "result", + ], } +# fmt:on def validate_collector_data(json_blob): try: jsonschema.validate(json_blob, schema) except jsonschema.exceptions.ValidationError as e: - print(f'Validation failed with error: {e}') + print(f"Validation failed with error: {e}") return False return True -if __name__ == '__main__': - with open('example_data.json') as fd: +if __name__ == "__main__": + with open(sys.argv[1]) as fd: json_data = json.loads(fd.read()) validate_collector_data(json_data) -- cgit v1.1 From e28a618db0505d8ba6fdd64a1bb5cddb170090f8 Mon Sep 17 00:00:00 2001 From: Ernst Widerberg Date: Mon, 11 Apr 2022 16:27:22 +0200 Subject: Rename top-level property "system_name" -> "display_name" --- src/schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/schema.py b/src/schema.py index 7e106d1..29df4f3 100644 --- a/src/schema.py +++ b/src/schema.py @@ -20,7 +20,7 @@ schema = { "abuse_mail": {"type": "string"}, "domain": {"type": "string"}, "timestamp_in_utc": {"type": "string"}, - "system_name": {"type": "string"}, + "display_name": {"type": "string"}, "description": {"type": "string"}, "custom_data": { "type": "object", @@ -72,7 +72,7 @@ schema = { "abuse_mail", "domain", "timestamp_in_utc", - "system_name", + "display_name", # "description", # "custom_data", "result", -- cgit v1.1 From 25ce80ec7dc0d9155962aa96164f01420f2a4cfc Mon Sep 17 00:00:00 2001 From: John Van de Meulebrouck Brendgard Date: Mon, 11 Apr 2022 21:11:29 +0200 Subject: Added investigation_needed and made it required to specify either vulnerable or investigation_needed. --- src/schema.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/schema.py b/src/schema.py index 29df4f3..f92a2ea 100644 --- a/src/schema.py +++ b/src/schema.py @@ -46,16 +46,29 @@ schema = { ".*": { "type": "object", "properties": { - "display_name": {"type": "string"}, - "vulnerable": {"type": "boolean"}, - "reliability": {"type": "integer"}, - "description": {"type": "string"}, + "display_name": {"type": "string"}, + "vulnerable": {"type": "boolean"}, + "investigation_needed": {"type": "boolean"}, + "reliability": {"type": "integer"}, + "description": {"type": "string"}, }, - "required": [ - "display_name", - "vulnerable", - # "reliability", # TODO: reliability is required if vulnerable = true - # "description", + "oneOf": [ + { + "required": [ + "display_name", + "vulnerable", + # "reliability", # TODO: reliability is required if vulnerable = true + # "description", + ] + }, + { + "required": [ + "display_name", + "investigation_needed", + # "reliability", # TODO: reliability is required if investigation_needed = true + # "description", + ] + }, ] }, }, -- cgit v1.1 From 2aebcdeca17f9b46d90f5255dd4d03caa358701e Mon Sep 17 00:00:00 2001 From: Kristofer Hallin Date: Tue, 12 Apr 2022 11:03:53 +0200 Subject: Use the schema when creating indexes, also validate data before writing to CouchDB. --- src/db.py | 10 ++++++++-- src/index.py | 61 ----------------------------------------------------------- src/main.py | 37 ++++++++++++++++++++++++++---------- src/schema.py | 34 ++++++++++++++++++++++++++++----- 4 files changed, 64 insertions(+), 78 deletions(-) delete mode 100644 src/index.py (limited to 'src') diff --git a/src/db.py b/src/db.py index d186616..6f25ec3 100755 --- a/src/db.py +++ b/src/db.py @@ -12,7 +12,7 @@ import sys import time import couch -import index +from schema import as_index_list, validate_collector_data class DictDB(): @@ -47,7 +47,7 @@ class DictDB(): print("Creating database and indexes.") self.couchdb = self.server.create(self.database) - for i in index.as_list(): + for i in as_index_list(): self.couchdb.index(i) self._ts = time.time() @@ -72,9 +72,15 @@ class DictDB(): if type(data) is list: for item in data: + error = validate_collector_data(item) + if error != "": + return error item['_id'] = str(self.unique_key()) ret = self.couchdb.save_bulk(data) else: + error = validate_collector_data(data) + if error != "": + return error data['_id'] = str(self.unique_key()) ret = self.couchdb.save(data) diff --git a/src/index.py b/src/index.py deleted file mode 100644 index 688ceeb..0000000 --- a/src/index.py +++ /dev/null @@ -1,61 +0,0 @@ -from pydantic import BaseSettings - - -class CouchIindex(BaseSettings): - domain: dict = { - "index": { - "fields": [ - "domain", - ] - }, - "name": "domain-json-index", - "type": "json" - } - ip: dict = { - "index": { - "fields": [ - "domain", - "ip" - ] - }, - "name": "ip-json-index", - "type": "json" - } - port: dict = { - "index": { - "fields": [ - "domain", - "port" - ] - }, - "name": "port-json-index", - "type": "json" - } - asn: dict = { - "index": { - "fields": [ - "domain", - "asn" - ] - }, - "name": "asn-json-index", - "type": "json" - } - asn_country_code: dict = { - "index": { - "fields": [ - "domain", - "asn_country_code" - ] - }, - "name": "asn-country-code-json-index", - "type": "json" - } - - -def as_list(): - index_list = list() - for item in CouchIindex().dict(): - index_list.append(CouchIindex().dict()[item]) - - return index_list diff --git a/src/main.py b/src/main.py index fb359df..9de8eb8 100755 --- a/src/main.py +++ b/src/main.py @@ -1,16 +1,18 @@ +import json import os import sys -import uvicorn +import time -from fastapi import FastAPI, Depends, Request +import uvicorn +from fastapi import Depends, FastAPI, Request +from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from fastapi_jwt_auth import AuthJWT from fastapi_jwt_auth.exceptions import AuthJWTException -from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel -from index import CouchIindex -import time + from db import DictDB +from schema import get_index_keys, validate_collector_data app = FastAPI() @@ -68,7 +70,7 @@ def get_data(key=None, limit=25, skip=0, ip=None, return db.get(key) selectors = dict() - indexes = CouchIindex().dict() + indexes = get_index_keys() selectors['domain'] = domain if ip and 'ip' in indexes: @@ -166,15 +168,30 @@ async def get_key(key=None, Authorize: AuthJWT = Depends()): @app.post('/sc/v0/add') async def add(data: Request, Authorize: AuthJWT = Depends()): - - # Maybe we should protect this enpoint too and let the scanner use - # a JWT token as well. # Authorize.jwt_required() - json_data = await data.json() + try: + json_data = await data.json() + except json.decoder.JSONDecodeError: + return JSONResponse( + content={ + "status": "error", + "message": "Invalid JSON.", + }, + status_code=400, + ) key = db.add(json_data) + if isinstance(key, str): + return JSONResponse( + content={ + "status": "error", + "message": key, + }, + status_code=400, + ) + return JSONResponse(content={"status": "success", "docs": key}) diff --git a/src/schema.py b/src/schema.py index f92a2ea..9bdf130 100644 --- a/src/schema.py +++ b/src/schema.py @@ -1,5 +1,6 @@ import json import sys +import traceback import jsonschema @@ -94,18 +95,41 @@ schema = { # fmt:on +def get_index_keys(): + keys = list() + for key in schema["properties"]: + keys.append(key) + return keys + + +def as_index_list(): + index_list = list() + for key in schema["properties"]: + name = f"{key}-json-index" + index = { + "index": { + "fields": [ + key, + ] + }, + "name": name, + "type": "json" + } + index_list.append(index) + + return index_list + + def validate_collector_data(json_blob): try: jsonschema.validate(json_blob, schema) except jsonschema.exceptions.ValidationError as e: - print(f"Validation failed with error: {e}") - return False - - return True + return f"Validation failed with error: {e.message}" + return "" if __name__ == "__main__": with open(sys.argv[1]) as fd: json_data = json.loads(fd.read()) - validate_collector_data(json_data) + print(validate_collector_data(json_data)) -- cgit v1.1