diff options
-rw-r--r-- | env-vars.sh | 3 | ||||
-rw-r--r-- | example_data.json | 47 | ||||
-rw-r--r-- | example_data_1.json | 57 | ||||
-rw-r--r-- | example_data_2.json | 51 | ||||
-rw-r--r-- | requirements.txt | 6 | ||||
-rwxr-xr-x | src/db/dictionary.py | 11 | ||||
-rw-r--r-- | src/db/schema.py | 154 | ||||
-rw-r--r-- | src/db/sql.py | 6 | ||||
-rwxr-xr-x | src/main.py | 41 |
9 files changed, 261 insertions, 115 deletions
diff --git a/env-vars.sh b/env-vars.sh index 8361a2f..b5540f1 100644 --- a/env-vars.sh +++ b/env-vars.sh @@ -1,6 +1,7 @@ export COUCHDB_USER=test export COUCHDB_PASSWORD=test export COUCHDB_NAME=test -export COUCHDB_HOSTNAME=couchdb +export COUCHDB_HOSTNAME=localhost export DOCKER_JWT_PUBKEY_PATH=/tmp/soc_collector/ export DOCKER_JWT_HTPASSWD_PATH=/tmp/soc_collector_htpasswd/ +export JWT_PUBKEY_PATH=/tmp/public.pem diff --git a/example_data.json b/example_data.json deleted file mode 100644 index 3af9f35..0000000 --- a/example_data.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "document_version": 1, - "ip": "192.0.2.10", - "port": 443, - "whois_description": "SOMENET", - "asn": "AS65001", - "asn_country_code": "SE", - "ptr": "host10.test.soc.sunet.se", - "abuse_mail": "abuse@test.soc.sunet.se", - "domain": "sunet.se", - "timestamp_in_utc": "2021-06-21T14:06 UTC", - "user_presentation": { - "description": "A presentation of the observation as a whole (optional)", - "data": { - "subject_cn": { - "data": "unknown", - "display_name": "Subject Common Name", - "description": "A description of this key (optional)" - }, - "subject_o": { - "data": "unknown", - "display_name": "Subject O", - "description": "..." - }, - "full_name": { - "data": "VMware ESXi 6.7.0 build-17700523", - "display_name": "Full Name" - }, - "end_of_general_support": { - "data": false, - "display_name": "End of general support" - }, - "cve_2021_21972": { - "data": "CVE-2021-21972 not applicable", - "display_name": "CVE 2021 21972" - }, - "cve_2021_21974": { - "data": "CVE-2021-21974 patched", - "display_name": "CVE 2021 21974" - }, - "cve_2021_21985": { - "data": "CVE-2021-21985 not applicable", - "display_name": "CVE 2021 21985" - } - } - } -} diff --git a/example_data_1.json b/example_data_1.json new file mode 100644 index 0000000..5c9dcae --- /dev/null +++ b/example_data_1.json @@ -0,0 +1,57 @@ +{ + "document_version": 1, + "ip": "192.0.2.10", + "port": 443, + "whois_description": "SOMENET", + "asn": "AS65001", + "asn_country_code": "SE", + "ptr": "host10.test.soc.sunet.se", + "abuse_mail": "abuse@test.soc.sunet.se", + "domain": "sunet.se", + "timestamp_in_utc": "2021-06-21T14:06 UTC", + "display_name": "Apache 2.1.3", + "description": "The Apache HTTP Server is a free and open-source cross-platform web server software, released under the terms of Apache License 2.0.", + "custom_data": { + "subject_cn": { + "data": "Apache", + "display_name": "Subject Common Name" + }, + "end_of_general_support": { + "data": false, + "display_name": "End of general support", + "description": "Is the software currently supported?" + } + }, + "result": { + "cve_2015_0049": { + "display_name": "CVE-2015-0049", + "vulnerable": false, + "description": "Allows remote attackers to execute arbitrary code or cause a denial of service (memory corruption)." + }, + "cve_2015_0050": { + "display_name": "CVE-2015-0050", + "vulnerable": false + }, + "cve_2015_0060": { + "display_name": "CVE-2015-0060", + "vulnerable": true, + "reliability": 2 + }, + "cve_2015_0063": { + "display_name": "CVE-2015-0063", + "vulnerable": false + }, + "insecure_cryptography": { + "display_name": "Insecure cryptography", + "vulnerable": true, + "reliability": 5, + "description": "Uses RSA instead of elliptic curve." + }, + "possible_webshell": { + "display_name": "Webshells (PST)", + "investigation_needed": true, + "reliability": 1, + "description": "A webshell of type PST was confirmed at /test/webshell.php" + } + } +} diff --git a/example_data_2.json b/example_data_2.json new file mode 100644 index 0000000..90e3f0a --- /dev/null +++ b/example_data_2.json @@ -0,0 +1,51 @@ +{ + "document_version": 1, + "ip": "192.0.2.20", + "port": 80, + "whois_description": "SOMENET", + "asn": "AS65001", + "asn_country_code": "SE", + "ptr": "host11.test.soc.sunet.se", + "abuse_mail": "abuse@test.soc.sunet.se", + "domain": "sunet.se", + "timestamp_in_utc": "2021-06-30T10:00 UTC", + "display_name": "VMware ESXi 6.7.0 build-17700523", + "description": "VMware ESXi is an enterprise-class, type-1 hypervisor developed by VMware for deploying and serving virtual computers. As a type-1 hypervisor, ESXi is not a software application that is installed on an operating system; instead, it includes and integrates vital OS components, such as a kernel.", + "custom_data": { + "subject_cn": { + "data": "VMware ESXi", + "display_name": "Subject Common Name" + }, + "end_of_general_support": { + "data": true, + "display_name": "End of general support", + "description": "Is the software currently supported?" + } + }, + "result": { + "cve_2019_0001": { + "display_name": "CVE-2019-0001", + "vulnerable": false + }, + "cve_2015_0002": { + "display_name": "CVE-2015-0002", + "vulnerable": false, + "description": "There is a use of insufficiently random values vulnerability. An unauthenticated, remote attacker can guess information by a large number of attempts. Successful exploitation may cause information leak." + }, + "cve_2015_0003": { + "display_name": "CVE-2015-0003", + "vulnerable": true, + "reliability": 2, + "description": "A carefully crafted request body can cause a read to a random memory area which could cause the process to crash." + }, + "cve_2015_0004": { + "display_name": "CVE-2015-0004", + "vulnerable": false + }, + "cve_2015_0005": { + "display_name": "CVE-2015-0005", + "vulnerable": true, + "reliability": 4 + } + } +} diff --git a/requirements.txt b/requirements.txt index ce2f921..c50a7e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,18 +10,24 @@ fastapi==0.70.0 fastapi-jwt-auth==0.5.0 h11==0.12.0 idna==3.3 +inflect==5.5.2 iniconfig==1.1.1 +jsonschema==4.4.0 nose==1.3.7 packaging==21.3 pluggy==1.0.0 +psycopg2-binary==2.9.3 py==1.11.0 pycparser==2.20 pydantic==1.8.2 PyJWT==1.7.1 pyparsing==3.0.6 +pyrsistent==0.18.1 pytest==6.2.5 requests==2.26.0 sniffio==1.2.0 +sqlacodegen==2.3.0 +SQLAlchemy==1.4.35 starlette==0.16.0 toml==0.10.2 typing-extensions==3.10.0.2 diff --git a/src/db/dictionary.py b/src/db/dictionary.py index 15f8f88..f0f5fe9 100755 --- a/src/db/dictionary.py +++ b/src/db/dictionary.py @@ -11,7 +11,8 @@ import os import sys import time -from db import couch, index +from db import couch +from db.schema import as_index_list, validate_collector_data class DictDB(): @@ -46,7 +47,7 @@ class DictDB(): print("Creating database and indexes.") self.couchdb = self.server.create(self.database) - for i in index.as_list(): + for i in as_index_list(): self.couchdb.index(i) self._ts = time.time() @@ -71,9 +72,15 @@ class DictDB(): if type(data) is list: for item in data: + error = validate_collector_data(item) + if error != "": + return error item['_id'] = str(self.unique_key()) ret = self.couchdb.save_bulk(data) else: + error = validate_collector_data(data) + if error != "": + return error data['_id'] = str(self.unique_key()) ret = self.couchdb.save(data) diff --git a/src/db/schema.py b/src/db/schema.py index 37da5aa..9bdf130 100644 --- a/src/db/schema.py +++ b/src/db/schema.py @@ -1,75 +1,135 @@ import json +import sys +import traceback import jsonschema +# fmt:off +# NOTE: Commented out properties are left intentionally, so it is easier to see +# what properties are optional. schema = { "$schema": "http://json-schema.org/schema#", "type": "object", "properties": { - "document_version": { - "type": "integer" - }, - "ip": { - "type": "string" - }, - "port": { - "type": "integer" - }, - "whois_description": { - "type": "string" - }, - "asn": { - "type": "string" - }, - "asn_country_code": { - "type": "string" - }, - "ptr": { - "type": "string" - }, - "abuse_mail": { - "type": "string" - }, - "domain": { - "type": "string" - }, - "timestamp_in_utc": { - "type": "string" + "document_version": {"type": "integer"}, + "ip": {"type": "string"}, + "port": {"type": "integer"}, + "whois_description": {"type": "string"}, + "asn": {"type": "string"}, + "asn_country_code": {"type": "string"}, + "ptr": {"type": "string"}, + "abuse_mail": {"type": "string"}, + "domain": {"type": "string"}, + "timestamp_in_utc": {"type": "string"}, + "display_name": {"type": "string"}, + "description": {"type": "string"}, + "custom_data": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "display_name": {"type": "string"}, + "data": {"type": ["string", "boolean", "integer"]}, + "description": {"type": "string"}, + }, + "required": [ + "display_name", + "data", + # "description" + ] + }, + }, }, - "user_presentation": { + "result": { "type": "object", - "properties": { - "description": { - "type": "string" + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "display_name": {"type": "string"}, + "vulnerable": {"type": "boolean"}, + "investigation_needed": {"type": "boolean"}, + "reliability": {"type": "integer"}, + "description": {"type": "string"}, + }, + "oneOf": [ + { + "required": [ + "display_name", + "vulnerable", + # "reliability", # TODO: reliability is required if vulnerable = true + # "description", + ] + }, + { + "required": [ + "display_name", + "investigation_needed", + # "reliability", # TODO: reliability is required if investigation_needed = true + # "description", + ] + }, + ] }, - "data": { - "type": "object" - } - } - } + }, + }, }, "required": [ "document_version", "ip", "port", + "whois_description", + "asn", + "asn_country_code", + "ptr", + "abuse_mail", + "domain", "timestamp_in_utc", - "user_presentation" - ] + "display_name", + # "description", + # "custom_data", + "result", + ], } +# fmt:on + + +def get_index_keys(): + keys = list() + for key in schema["properties"]: + keys.append(key) + return keys + + +def as_index_list(): + index_list = list() + for key in schema["properties"]: + name = f"{key}-json-index" + index = { + "index": { + "fields": [ + key, + ] + }, + "name": name, + "type": "json" + } + index_list.append(index) + + return index_list def validate_collector_data(json_blob): try: jsonschema.validate(json_blob, schema) except jsonschema.exceptions.ValidationError as e: - print(f'Validation failed with error: {e}') - return False - - return True + return f"Validation failed with error: {e.message}" + return "" -if __name__ == '__main__': - with open('example_data.json') as fd: +if __name__ == "__main__": + with open(sys.argv[1]) as fd: json_data = json.loads(fd.read()) - validate_collector_data(json_data) + print(validate_collector_data(json_data)) diff --git a/src/db/sql.py b/src/db/sql.py index fc20e36..c47a69c 100644 --- a/src/db/sql.py +++ b/src/db/sql.py @@ -1,9 +1,9 @@ import datetime +import os import sys from contextlib import contextmanager -import os -from sqlalchemy import (Boolean, Column, Date, Integer, Serial, String, Text, +from sqlalchemy import (Boolean, Column, Date, Integer, String, Text, create_engine, text) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker @@ -15,7 +15,7 @@ metadata = Base.metadata class Log(Base): __tablename__ = "log" - id = Column(Serial, primary_key=True) + id = Column(Integer, primary_key=True) timestamp = Column(Date, nullable=False, default=datetime.datetime.utcnow) username = Column(Text, nullable=False) diff --git a/src/main.py b/src/main.py index e6bb8e2..a62d77c 100755 --- a/src/main.py +++ b/src/main.py @@ -1,12 +1,10 @@ +import json import os import sys import time import uvicorn - -from fastapi import Depends -from fastapi import FastAPI -from fastapi import Request +from fastapi import Depends, FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from fastapi_jwt_auth import AuthJWT @@ -14,9 +12,7 @@ from fastapi_jwt_auth.exceptions import AuthJWTException from pydantic import BaseModel from db.dictionary import DictDB -from db.index import CouchIindex -from db.sql import Log -from db.sql import Scanner +from db.schema import get_index_keys app = FastAPI() @@ -41,8 +37,8 @@ async def mock_x_total_count_header(request: Request, call_next): for i in range(10): try: db = DictDB() - except Exception: - print(f"Database not responding, will try again soon. Attempt {i + 1} of 10.") + except Exception as e: + print(f"Database not responding, will try again soon: {e}") else: break time.sleep(1) @@ -73,7 +69,7 @@ def get_data(key=None, limit=25, skip=0, ip=None, return db.get(key) selectors = dict() - indexes = CouchIindex().dict() + indexes = get_index_keys() selectors['domain'] = domain if ip and 'ip' in indexes: @@ -169,17 +165,32 @@ async def get_key(key=None, Authorize: AuthJWT = Depends()): return JSONResponse(content={"status": "success", "docs": data}) -@ app.post('/sc/v0/add') +@app.post('/sc/v0/add') async def add(data: Request, Authorize: AuthJWT = Depends()): - - # Maybe we should protect this enpoint too and let the scanner use - # a JWT token as well. # Authorize.jwt_required() - json_data = await data.json() + try: + json_data = await data.json() + except json.decoder.JSONDecodeError: + return JSONResponse( + content={ + "status": "error", + "message": "Invalid JSON.", + }, + status_code=400, + ) key = db.add(json_data) + if isinstance(key, str): + return JSONResponse( + content={ + "status": "error", + "message": key, + }, + status_code=400, + ) + return JSONResponse(content={"status": "success", "docs": key}) |