diff options
author | Kristofer Hallin <kristofer@sunet.se> | 2022-04-12 13:49:05 +0200 |
---|---|---|
committer | Kristofer Hallin <kristofer@sunet.se> | 2022-04-12 13:49:05 +0200 |
commit | 509bf7fe6a4589d525b21f179ce8cb730c0d4e59 (patch) | |
tree | 975714991716efe60ce67e82e143c825a8eb27c0 /src | |
parent | 16f5009ac0d630c5f25c9d6cb4e8fb026ae628f9 (diff) | |
parent | 2aebcdeca17f9b46d90f5255dd4d03caa358701e (diff) |
Merged main and updated stuff.
Diffstat (limited to 'src')
-rwxr-xr-x | src/db/dictionary.py | 11 | ||||
-rw-r--r-- | src/db/schema.py | 154 | ||||
-rw-r--r-- | src/db/sql.py | 6 | ||||
-rwxr-xr-x | src/main.py | 41 |
4 files changed, 145 insertions, 67 deletions
diff --git a/src/db/dictionary.py b/src/db/dictionary.py index 15f8f88..f0f5fe9 100755 --- a/src/db/dictionary.py +++ b/src/db/dictionary.py @@ -11,7 +11,8 @@ import os import sys import time -from db import couch, index +from db import couch +from db.schema import as_index_list, validate_collector_data class DictDB(): @@ -46,7 +47,7 @@ class DictDB(): print("Creating database and indexes.") self.couchdb = self.server.create(self.database) - for i in index.as_list(): + for i in as_index_list(): self.couchdb.index(i) self._ts = time.time() @@ -71,9 +72,15 @@ class DictDB(): if type(data) is list: for item in data: + error = validate_collector_data(item) + if error != "": + return error item['_id'] = str(self.unique_key()) ret = self.couchdb.save_bulk(data) else: + error = validate_collector_data(data) + if error != "": + return error data['_id'] = str(self.unique_key()) ret = self.couchdb.save(data) diff --git a/src/db/schema.py b/src/db/schema.py index 37da5aa..9bdf130 100644 --- a/src/db/schema.py +++ b/src/db/schema.py @@ -1,75 +1,135 @@ import json +import sys +import traceback import jsonschema +# fmt:off +# NOTE: Commented out properties are left intentionally, so it is easier to see +# what properties are optional. schema = { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": { - "document_version": { - "type": "integer" - }, - "ip": { - "type": "string" - }, - "port": { - "type": "integer" - }, - "whois_description": { - "type": "string" - }, - "asn": { - "type": "string" - }, - "asn_country_code": { - "type": "string" - }, - "ptr": { - "type": "string" - }, - "abuse_mail": { - "type": "string" - }, - "domain": { - "type": "string" - }, - "timestamp_in_utc": { - "type": "string" + "document_version": {"type": "integer"}, + "ip": {"type": "string"}, + "port": {"type": "integer"}, + "whois_description": {"type": "string"}, + "asn": {"type": "string"}, + "asn_country_code": {"type": "string"}, + "ptr": {"type": "string"}, + "abuse_mail": {"type": "string"}, + "domain": {"type": "string"}, + "timestamp_in_utc": {"type": "string"}, + "display_name": {"type": "string"}, + "description": {"type": "string"}, + "custom_data": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "display_name": {"type": "string"}, + "data": {"type": ["string", "boolean", "integer"]}, + "description": {"type": "string"}, + }, + "required": [ + "display_name", + "data", + # "description" + ] + }, + }, }, - "user_presentation": { + "result": { "type": "object", - "properties": { - "description": { - "type": "string" + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "display_name": {"type": "string"}, + "vulnerable": {"type": "boolean"}, + "investigation_needed": {"type": "boolean"}, + "reliability": {"type": "integer"}, + "description": {"type": "string"}, + }, + "oneOf": [ + { + "required": [ + "display_name", + "vulnerable", + # "reliability", # TODO: reliability is required if vulnerable = true + # "description", + ] + }, + { + "required": [ + "display_name", + "investigation_needed", + # "reliability", # TODO: reliability is required if investigation_needed = true + # "description", + ] + }, + ] }, - "data": { - "type": "object" - } - } - } + }, + }, }, "required": [ "document_version", "ip", "port", + "whois_description", + "asn", + "asn_country_code", + "ptr", + "abuse_mail", + "domain", "timestamp_in_utc", - "user_presentation" - ] + "display_name", + # "description", + # "custom_data", + "result", + ], } +# fmt:on + + +def get_index_keys(): + keys = list() + for key in schema["properties"]: + keys.append(key) + return keys + + +def as_index_list(): + index_list = list() + for key in schema["properties"]: + name = f"{key}-json-index" + index = { + "index": { + "fields": [ + key, + ] + }, + "name": name, + "type": "json" + } + index_list.append(index) + + return index_list def validate_collector_data(json_blob): try: jsonschema.validate(json_blob, schema) except jsonschema.exceptions.ValidationError as e: - print(f'Validation failed with error: {e}') - return False - - return True + return f"Validation failed with error: {e.message}" + return "" -if __name__ == '__main__': - with open('example_data.json') as fd: +if __name__ == "__main__": + with open(sys.argv[1]) as fd: json_data = json.loads(fd.read()) - validate_collector_data(json_data) + print(validate_collector_data(json_data)) diff --git a/src/db/sql.py b/src/db/sql.py index fc20e36..c47a69c 100644 --- a/src/db/sql.py +++ b/src/db/sql.py @@ -1,9 +1,9 @@ import datetime +import os import sys from contextlib import contextmanager -import os -from sqlalchemy import (Boolean, Column, Date, Integer, Serial, String, Text, +from sqlalchemy import (Boolean, Column, Date, Integer, String, Text, create_engine, text) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker @@ -15,7 +15,7 @@ metadata = Base.metadata class Log(Base): __tablename__ = "log" - id = Column(Serial, primary_key=True) + id = Column(Integer, primary_key=True) timestamp = Column(Date, nullable=False, default=datetime.datetime.utcnow) username = Column(Text, nullable=False) diff --git a/src/main.py b/src/main.py index e6bb8e2..a62d77c 100755 --- a/src/main.py +++ b/src/main.py @@ -1,12 +1,10 @@ +import json import os import sys import time import uvicorn - -from fastapi import Depends -from fastapi import FastAPI -from fastapi import Request +from fastapi import Depends, FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from fastapi_jwt_auth import AuthJWT @@ -14,9 +12,7 @@ from fastapi_jwt_auth.exceptions import AuthJWTException from pydantic import BaseModel from db.dictionary import DictDB -from db.index import CouchIindex -from db.sql import Log -from db.sql import Scanner +from db.schema import get_index_keys app = FastAPI() @@ -41,8 +37,8 @@ async def mock_x_total_count_header(request: Request, call_next): for i in range(10): try: db = DictDB() - except Exception: - print(f"Database not responding, will try again soon. Attempt {i + 1} of 10.") + except Exception as e: + print(f"Database not responding, will try again soon: {e}") else: break time.sleep(1) @@ -73,7 +69,7 @@ def get_data(key=None, limit=25, skip=0, ip=None, return db.get(key) selectors = dict() - indexes = CouchIindex().dict() + indexes = get_index_keys() selectors['domain'] = domain if ip and 'ip' in indexes: @@ -169,17 +165,32 @@ async def get_key(key=None, Authorize: AuthJWT = Depends()): return JSONResponse(content={"status": "success", "docs": data}) -@ app.post('/sc/v0/add') +@app.post('/sc/v0/add') async def add(data: Request, Authorize: AuthJWT = Depends()): - - # Maybe we should protect this enpoint too and let the scanner use - # a JWT token as well. # Authorize.jwt_required() - json_data = await data.json() + try: + json_data = await data.json() + except json.decoder.JSONDecodeError: + return JSONResponse( + content={ + "status": "error", + "message": "Invalid JSON.", + }, + status_code=400, + ) key = db.add(json_data) + if isinstance(key, str): + return JSONResponse( + content={ + "status": "error", + "message": key, + }, + status_code=400, + ) + return JSONResponse(content={"status": "success", "docs": key}) |