From 2b4cdea913117854e164742ae65b802403bd374a Mon Sep 17 00:00:00 2001 From: Jan Mach <jan.mach@cesnet.cz> Date: Wed, 6 Feb 2019 17:02:40 +0100 Subject: [PATCH] This commit removes support for MongoDB migrations to PostgreSQL. After this all code related to MongoDB is gone and database migration from MongoDB to PostgreSQL is no longer possible. I have kept the documentation page about migration for now, so that the users can still at least find the information. (Redmine issue: #4225) --- bin/mentat-storage.py | 2 +- conf/core/database.json.conf | 313 --------------- conf/requirements-latest.pip | 1 - conf/requirements.pip | 1 - doc/sphinx/_doclib/migration.rst | 5 +- lib/mentat/datatype/mongodb.py | 206 ---------- lib/mentat/idea/internal.py | 4 +- lib/mentat/idea/mongodb.py | 570 --------------------------- lib/mentat/idea/sqldb.py | 97 ++--- lib/mentat/idea/test_sqldb.py | 2 +- lib/mentat/plugin/test/test.py | 6 - lib/mentat/stats/idea.py | 39 -- lib/mentat/storage.py | 433 --------------------- lib/mentat/test_storage.py | 136 ------- scripts/depcheck.py | 2 +- scripts/depmanage.py | 2 +- scripts/mentat-dbfix-alerts.py | 73 ---- scripts/mentat-dump-analyzer.py | 290 -------------- scripts/mentat-dump.py | 94 ----- scripts/sqldb-migrate-data.py | 638 ------------------------------- scripts/sqldb-migrate-events.py | 192 ---------- 21 files changed, 33 insertions(+), 3073 deletions(-) delete mode 100644 lib/mentat/datatype/mongodb.py delete mode 100644 lib/mentat/idea/mongodb.py delete mode 100644 lib/mentat/storage.py delete mode 100644 lib/mentat/test_storage.py delete mode 100755 scripts/mentat-dbfix-alerts.py delete mode 100755 scripts/mentat-dump-analyzer.py delete mode 100755 scripts/mentat-dump.py delete mode 100644 scripts/sqldb-migrate-data.py delete mode 100644 scripts/sqldb-migrate-events.py diff --git a/bin/mentat-storage.py b/bin/mentat-storage.py index 067f53534..61f1ca011 100755 --- a/bin/mentat-storage.py +++ b/bin/mentat-storage.py @@ -11,7 +11,7 @@ """ This Mentat module is a real-time message processing daemon capable of storing `IDEA <https://idea.cesnet.cz/en/index>`__ messages into persistent storage. -Currently only `MongoDB <https://www.mongodb.com/>`__ NoSQL database is supported. +Currently only `PostgreSQL <https://www.postgresql.org/>`__ SQL database is supported. To view built-in help please execute the application with ``--help`` command line option:: diff --git a/conf/core/database.json.conf b/conf/core/database.json.conf index 202434b25..17a739c01 100644 --- a/conf/core/database.json.conf +++ b/conf/core/database.json.conf @@ -21,319 +21,6 @@ "password": "mentat", "host": "localhost", "port": 5432 - }, - - # - # Database connection settings. - # - "connection": { - "host": "localhost", - "port": 27017, - "timeout": 10000, - "args": null - }, - - # - # Database content settings. - # - "config": { - # Name of the primary database. - "db": "mentat", - # Name of the statistical database. - "db_stats": "mentat_stats", - # Name of the test database. - "db_test": "mentat_test", - - # Name of the collection containing IDEA messages. - "col_alerts": "alerts", - # Name of the collection containing generated reports. - "col_reports": "reports_ng", - # Name of the collection containing user accounts. - "col_users": "users", - # Name of the collection containing abuse groups. - "col_groups": "groups", - # Name of the collection containing web interface accesslogs. - "col_accesslogs": "accesslogs", - # Name of the collection containing web interface changelogs. - "col_changelogs": "changelogs", - - # Name of the collection containing calculated IDEA message statistics. - "col_stats_alerts": "statistics" - }, - - # - # Database schema settings. - # - "schema": { - "mentat": { - "description": "primary database", - "collections": { - "alerts": { - "description": "collection containing IDEA messages", - "indexes": [ - { - "name": "_id_", - "index": ["_id","ascending"] - }, - { - "name": "ts_1", - "index": ["ts","ascending"], - "background": true, - "sparse": false - }, - { - "name": "DetectTime_-1_Node.Name_1", - "index": [["DetectTime","descending"],["Node.Name","ascending"]], - "background": true, - "sparse": false - }, - { - "name": "DetectTime_-1_Category_1", - "index": [["DetectTime","descending"],["Category","ascending"]], - "background": true, - "sparse": false - }, - { - "name": "Source.IP4.ip_1", - "index": ["Source.IP4.ip","ascending"], - "background": true, - "sparse": true - }, - { - "name": "Target.IP4.ip_1", - "index": ["Target.IP4.ip","ascending"], - "background": true, - "sparse": true - }, - { - "name": "Source.IP4.min_1_Source.IP4.max_1", - "index": [["Source.IP4.min","ascending"],["Source.IP4.max","ascending"]], - "background": true, - "sparse": true - }, - { - "name": "Target.IP4.min_1_Target.IP4.max_1", - "index": [["Target.IP4.min","ascending"],["Target.IP4.max","ascending"]], - "background": true, - "sparse": true - }, - { - "name": "DetectTime_-1_Target.IP4.min_1_Target.IP4.max_1", - "index": [["DetectTime","descending"],["Target.IP4.min","ascending"],["Target.IP4.max","ascending"]], - "background": true, - "sparse": false - }, - { - "name": "_CESNET.EventClass_1", - "index": ["_CESNET.EventClass","ascending"], - "background": true, - "sparse": false - }, - { - "name": "_CESNET.StorageTime_1", - "index": ["_CESNET.StorageTime","descending"], - "background": true, - "sparse": false - }, - { - "name": "ts_-1__CESNET.ResolvedAbuses_1", - "index": [["ts","descending"],["_CESNET.ResolvedAbuses","ascending"]], - "background": true, - "sparse": false, - "partialFilterExpression" : { - "_CESNET.ResolvedAbuses" : { - "$exists" : true - } - } - }, - { - "name": "_CESNET.StorageTime_-1", - "index": ["_CESNET.StorageTime","descending"], - "background": true, - "sparse": false - } - ] - }, - - "reports_ng": { - "description": "collection containing generated reports", - "indexes": [ - { - "name": "_id_", - "index": ["_id","ascending"] - }, - { - "name": "ts_1", - "index": ["ts","descending"], - "background": true, - "sparse": false - }, - { - "name": "ts_from_1", - "index": ["ts_from","descending"], - "background": true, - "sparse": false - }, - { - "name": "ts_to_1", - "index": ["ts_to","descending"], - "background": true, - "sparse": false - }, - { - "name": "abuse_1", - "index": ["abuse","ascending"], - "background": true, - "sparse": false - }, - { - "name": "list_ips_1", - "index": ["list_ips","ascending"], - "background": true, - "sparse": false - } - ] - }, - - "users": { - "description": "collection containing user accounts", - "indexes": [ - { - "name": "_id_", - "index": ["_id","ascending"] - }, - { - "name": "ts_1", - "index": ["ts","descending"], - "background": true, - "sparse": false - } - ] - }, - - "groups": { - "description": "collection containing abuse groups", - "indexes": [ - { - "name": "_id_", - "index": ["_id","ascending"] - }, - { - "name": "ts_1", - "index": ["ts","descending"], - "background": true, - "sparse": false - } - ] - }, - - "accesslogs": { - "description": "collection containing web interface accesslogs", - "indexes": [ - { - "name": "_id_", - "index": ["_id","ascending"] - }, - { - "name": "ts_1", - "index": ["ts","descending"], - "background": true, - "sparse": false - } - ] - }, - - "changelogs": { - "description": "collection containing web interface changelogs", - "indexes": [ - { - "name": "_id_", - "index": ["_id","ascending"] - }, - { - "name": "ts_1", - "index": ["ts","descending"], - "background": true, - "sparse": false - } - ] - } - } - }, - - "mentat_stats": { - "description": "statistical database", - "collections": { - "statistics": { - "description": "collection containing calculated IDEA message statistics", - "indexes": [ - { - "name": "_id_", - "index": ["_id","ascending"] - }, - { - "name": "ts", - "index": ["ts","descending"], - "background": true, - "sparse": false - }, - { - "name": "ts_from", - "index": ["ts_from","descending"], - "background": true, - "sparse": false - }, - { - "name": "ts_to", - "index": ["ts_to","descending"], - "background": true, - "sparse": false - } - ] - } - } - } } } } - -# -# Following indices were defined previously, but they are not used anymore. -# Perhaps they will be handy again. -# -#{ -# "name": "node_name", -# "index": ["Node.Name","ascending"], -# "background": true, -# "sparse": false -#} -#{ -# "name": "node_sw", -# "index": ["Node.SW","ascending"], -# "background": true, -# "sparse": false -#} -#{ -# "name": "category", -# "index": ["Category","ascending"], -# "background": true, -# "sparse": false -#} -#{ -# "name": "description", -# "index": ["Description","ascending"], -# "background": true, -# "sparse": false -#} -#{ -# "name": "detector", -# "index": [["Node.Name","ascending"], ["Node.SW","ascending"]], -# "background": true, -# "sparse": false -#} -#{ -# "name": "detecttime", -# "index": ["DetectTime","descending"], -# "background": true, -# "sparse": false -#} diff --git a/conf/requirements-latest.pip b/conf/requirements-latest.pip index fb6cc70da..3d7a5cdc3 100644 --- a/conf/requirements-latest.pip +++ b/conf/requirements-latest.pip @@ -1,6 +1,5 @@ pytz ply -pymongo psycopg2 babel wtforms diff --git a/conf/requirements.pip b/conf/requirements.pip index cb8ccf649..f5e350473 100644 --- a/conf/requirements.pip +++ b/conf/requirements.pip @@ -1,6 +1,5 @@ pytz==2018.5 ply==3.11 -pymongo==3.5.1 psycopg2>=2.7,<2.8 --no-binary psycopg2 babel==2.6.0 wtforms==2.2.1 diff --git a/doc/sphinx/_doclib/migration.rst b/doc/sphinx/_doclib/migration.rst index e519a0337..5ef8ecc4c 100644 --- a/doc/sphinx/_doclib/migration.rst +++ b/doc/sphinx/_doclib/migration.rst @@ -9,8 +9,9 @@ see the :ref:`section-upgrading` section. .. warning:: - Prerequisite for migration is a successfull :ref:`installation <section-installation>` - of all Mentat system packages. + This manual is valid only for Mentat version up to ``2.3.x``. In version ``2.4.x`` + support for migration was dropped. This page will remain here until next version + and then it too will be gone. Please perform migration as soon as possible. New version of Mentat system comes with quite a lot of new features and most importantly uses `PostgreSQL <https://www.postgresql.org>`__ as database backend. Consider diff --git a/lib/mentat/datatype/mongodb.py b/lib/mentat/datatype/mongodb.py deleted file mode 100644 index 607a566b8..000000000 --- a/lib/mentat/datatype/mongodb.py +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# This file is part of Mentat system (https://mentat.cesnet.cz/). -# -# Copyright (C) since 2011 CESNET, z.s.p.o (http://www.ces.net/) -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - -""" -Conversion library between internal datatypes and their MongoDB representation. - - -.. warning:: - - Deprecated code. - -""" - - -__author__ = "Jan Mach <jan.mach@cesnet.cz>" -__credits__ = "Pavel Kácha <pavel.kacha@cesnet.cz>, Andrea Kropáčová <andrea.kropacova@cesnet.cz>" - - -import datetime -import bson - -import typedcols -import mentat.datatype.internal - - -MONGODB_BINARY_TYPE = bson.binary.BINARY_SUBTYPE -"""Type of binary data used to store IPs and timestamps.""" - - -#------------------------------------------------------------------------------- -# INTERNAL HELPERS -#------------------------------------------------------------------------------- - - -def _to_bin(t, size): - """ - Convert given argument to binary representation. - - Reference: `link <http://pclib.github.io/safari/program/python-cookbook/Text/ch03s05.html>`__ - """ - return bson.Binary(t.to_bytes(size, 'big'), - subtype = MONGODB_BINARY_TYPE) - -def _from_bin(t): - """ - Convert given argument from binary representation. - - Reference: `link <http://pclib.github.io/safari/program/python-cookbook/Text/ch03s05.html>`__ - """ - if isinstance(t, bson.Binary): - t = bytes(t) - return int.from_bytes(t, 'big') - - -#------------------------------------------------------------------------------- -# CONVERSION FUNCTIONS -#------------------------------------------------------------------------------- - - -def t_datetime(val): - """ - Convert/validate: Datetime. - - :param any val: Value to be converted/validated - :return: Datetime object - :rtype: datetime.datetime - :raises ValueError: if the value could not be converted to datetime.datetime object - """ - # Maybe there is nothing to do - if isinstance(val, datetime.datetime): - return val.timestamp() - if isinstance(val, float): - return val - raise ValueError("Invalid datetime '{}'".format(repr(val))) - - -def t_dbref_users(val): - """ - Convert/validate: Database reference to 'users' collection. - """ - return bson.dbref.DBRef('users', val) - -def t_dbref_groups(val): - """ - Convert/validate: Database reference to 'groups' collection. - """ - return bson.dbref.DBRef('groups', val) - -def t_ip4int2bin(ip): - """ - Forward conversion. - """ - return _to_bin(ip, 4) - -def t_ip4bin2int(ip): - """ - Inverse conversion. - """ - return _from_bin(ip) - - -def t_ip6int2bin(ip): - """ - Forward conversion. - """ - return _to_bin(ip, 16) - -def t_ip6bin2int(ip): - """ - Inverse conversion. - """ - return _from_bin(ip) - -def t_network_record(val, source = None): - """ - - """ - if isinstance(val, (NetworkRecordIP4, NetworkRecordIP6)): - return val - elif isinstance(val, mentat.datatype.internal.NetworkRecordIP4): - return NetworkRecordIP4(val) - elif isinstance(val, mentat.datatype.internal.NetworkRecordIP6): - return NetworkRecordIP6(val) - else: - raise TypeError("Invalid value '{}' for MongoDB NetworkRecord".format(repr(val))) - -# -# Simplification method from TypedList to list and from TypedDict to dict. -# BSON encoder for MongoDB is unable to encode TypedList objects. It is hardwired -# to treat any unknown object as dict and attempts the conversion, which is ok -# for TypedDict, but fails horribly with TypedList. So far we were not able to -# persuade the BSON library to treat TypeLists as lists, so we have to use -# different approach and provide it with simple datatypes dict and list instead. -# -# This simple wrapper just returns the internal ``.data`` attribute after the -# successfull conversion instead of the object itself. -# -def simplify(obj): - return lambda x: obj(x).data - -#------------------------------------------------------------------------------- -# DATATYPE DEFINITIONS -#------------------------------------------------------------------------------- - - -types_mongodb = { - 'Boolean': bool, - 'Integer': int, - 'String': str, - 'Binary': str, - 'Float': float, - 'Ip4Numeric': t_ip4int2bin, - 'Ip6Numeric': t_ip6int2bin, - 'IPRange': typedcols.Discard, - 'Datetime': t_datetime, - 'DBRefUsers': t_dbref_users, - 'DBRefGroups': t_dbref_groups, - 'NetRecTypeIP4': mentat.datatype.internal.t_network_record_type_ip4, - 'NetRecTypeIP6': mentat.datatype.internal.t_network_record_type_ip6, - 'NetworkRecordOld': mentat.datatype.internal.t_network_record_old, - 'ReportingMode': mentat.datatype.internal.t_reporting_mode, - 'ReportingAttach': mentat.datatype.internal.t_reporting_attach, - 'ReportingFilter': mentat.datatype.internal.t_reporting_filter, - 'ReportingTiming': mentat.datatype.internal.t_reporting_timing, - 'SavedQuery': mentat.datatype.internal.t_saved_query -} - -# -# Use the simplification wrapper for all list types. -# -types_mongodb_list = {} -for k, v in mentat.datatype.internal.list_types(types_mongodb).items(): - types_mongodb_list[k] = simplify(v) - - -class NetworkRecordIP4(mentat.datatype.internal.NetworkRecord): - """ - MongoDB representation of IPv4 network record structure. - """ - typedef = mentat.datatype.internal.typedef_network_record_ip4(types_mongodb, types_mongodb_list) - -class NetworkRecordIP6(mentat.datatype.internal.NetworkRecord): - """ - MongoDB representation of IPv6 network record structure. - """ - typedef = mentat.datatype.internal.typedef_network_record_ip6(types_mongodb, types_mongodb_list) - -class AbuseGroup(typedcols.TypedDict): - """ - MongoDB representation of abuse group record structure. - """ - allow_unknown = False - typedef = mentat.datatype.internal.typedef_abuse_group(types_mongodb, types_mongodb_list) - -class User(typedcols.TypedDict): - """ - Implementation of abuse group record structure. - """ - allow_unknown = False - typedef = mentat.datatype.internal.typedef_user(types_mongodb, types_mongodb_list) diff --git a/lib/mentat/idea/internal.py b/lib/mentat/idea/internal.py index df8723a63..6b8084e06 100644 --- a/lib/mentat/idea/internal.py +++ b/lib/mentat/idea/internal.py @@ -279,7 +279,7 @@ class Idea(idea.lite.Idea): # pylint: disable=locally-disabled,too-many-ancesto result.extend(list(src['IP4'])) if 'IP6' in src: result.extend(list(src['IP6'])) - return result + return sorted(result, key = str) def get_ports(self, node): """ @@ -312,7 +312,7 @@ class Idea(idea.lite.Idea): # pylint: disable=locally-disabled,too-many-ancesto for src in self[node]: if 'Proto' in src: for item in list(src['Proto']): - result.add(item) + result.add(str(item).lower()) return sorted(list(result)) def get_types(self, node): diff --git a/lib/mentat/idea/mongodb.py b/lib/mentat/idea/mongodb.py deleted file mode 100644 index e2970ed3c..000000000 --- a/lib/mentat/idea/mongodb.py +++ /dev/null @@ -1,570 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# This file is part of Mentat system (https://mentat.cesnet.cz/). -# -# Copyright (C) since 2011 CESNET, z.s.p.o (http://www.ces.net/) -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - - -""" -This module provides classess for object representation and conversion -of `IDEA <https://idea.cesnet.cz/en/index>`__ messages in -`MongoDB <https://www.mongodb.com/>`__ NoSQL database including reverse -direction. - -This module is expected to work with ``idea.lite.Idea``, ``idea.valid.Idea`` -or :py:class:`mentat.idea.internal.Idea` messages, data conversions are -very narrow regarding required input data types and will complain. - -This module contains following message classess: - -* :py:class:`mentat.idea.mongodb.IdeaIn` - - Forward conversion into `MongoDB <https://www.mongodb.com/>`__ appropriate - data format. - -* :py:class:`mentat.idea.mongodb.IdeaOut` - - Inverse conversion from `MongoDB <https://www.mongodb.com/>`__ appropriate - data format. This class is equal to :py:class:`mentat.idea.internal.Idea` - in every aspect even though it **IS** a different class. - -Example usage: - -.. code-block:: python - - >>> import mentat.idea.internal - >>> import mentat.idea.mongodb - - # IDEA messages ussually come from regular dicts or JSON - >>> idea_raw = {...} - - # Just pass the dict as parameter to constructor - >>> idea_msg = mentat.idea.internal.Idea(idea_raw) - - # When you want to store IDEA message into MongoDB: - >>> idea_mongo_in = mentat.idea.mongodb.IdeaIn(idea_msg) - - # When you want to retrieve IDEA message from MongoDB: - >>> idea_mongo_out = mentat.idea.mongodb.IdeaOut(idea_mongo_in) - -.. warning:: - - Deprecated code. - -""" - - -__author__ = "Jan Mach <jan.mach@cesnet.cz>" -__credits__ = "Pavel Kácha <pavel.kacha@cesnet.cz>, Andrea Kropáčová <andrea.kropacova@cesnet.cz>" - - -import uuid -import struct -import datetime -import time -import bson - -# -# Custom libraries. -# -import typedcols -import ipranges -import idea.base -import idea.lite -from idea.base import unicode -import mentat.idea.internal - - -MONGODB_BINARY_TYPE = bson.binary.BINARY_SUBTYPE -"""Type of binary data used to store IPs and timestamps.""" - -_SYSTEM_EPOCH = datetime.date(*time.gmtime(0)[0:3]) -"""System epoch""" - -_NTP_EPOCH = datetime.date(1900, 1, 1) -"""NTP epoch""" - -NTP_TO_EPOCH_DELTA = (_SYSTEM_EPOCH - _NTP_EPOCH).days * 24 * 3600 -"""Difference between NTP and Unix epoch (2208988800 s)""" - -NTPSTAMP_FRACTION = (10 ^ 6) -"""Timestamp fraction precision (microseconds)""" - -NTPSTAMP_FPRECISION = (2 ^ 32) -"""NTP stamp fraction precision (32 bits)""" - -NTPSTAMP_FUSIZE = int(NTPSTAMP_FPRECISION / NTPSTAMP_FRACTION) -"""NTP stamp fraction unit size""" - - -#------------------------------------------------------------------------------- -# INTERNAL HELPERS -#------------------------------------------------------------------------------- - - -def _to_bin(val, size): - """ - Convert given argument to binary representation. - - Reference: `link <http://pclib.github.io/safari/program/python-cookbook/Text/ch03s05.html>`__ - """ - return bson.Binary(val.to_bytes(size, 'big'), - subtype = MONGODB_BINARY_TYPE) - -def _from_bin(val): - """ - Convert given argument from binary representation. - - Reference: `link <http://pclib.github.io/safari/program/python-cookbook/Text/ch03s05.html>`__ - """ - if isinstance(val, bson.Binary): - val = bytes(val) - return int.from_bytes(val, 'big') - - -#------------------------------------------------------------------------------- -# MONGODB CUSTOM DATATYPES (BOTH CONVERSION DIRECTIONS) -#------------------------------------------------------------------------------- - - -def Timestamp(val): - """ - Conversion of IDEA Timestamp datatype into appropriate MongoDB - representation. - - Forward conversion. - """ - if isinstance(val, datetime.datetime): - # Take datetime as unix timestamp, convert to NTP epoch and shift - # to upper 32bit part of 64bit integer. - res = (int(val.timestamp()) + NTP_TO_EPOCH_DELTA) << 32 - - # Add microsecond part of NTP stamp to lower 32bit part of 64bit - # integer - res = res + (val.microsecond * NTPSTAMP_FUSIZE) - - # Convert to binary - return _to_bin(res, 8) - raise TypeError("Wrong Timestamp, expecting only 'datetime' object") - - -def Timestamp_inv(val): - """ - Convert MongoDB Timestamp datatype into appropriate IDEA object - representation. - - Inverse conversion. - """ - try: - # Unpack binary data directly into upper and lower part. - high, low = struct.unpack(">II", val) - - # Convert NTP epoch to Unix epoch, deal with microseconds. - stamp = high - NTP_TO_EPOCH_DELTA + ((float)(low&0xffffffff))/NTPSTAMP_FPRECISION - - # Format and return appropriate timestamp object. This must always - # return the same result as idea.lite.Timestamp, so either copy that - # code here, or call it directly and shift the blame at the cost of - # performance penalty. - #return datetime.datetime.fromtimestamp(stamp) - return idea.lite.Timestamp(stamp) - except TypeError: - return idea.lite.Timestamp(val) - raise TypeError("Wrong Timestamp, expecting only 'integer' or 'binary' objects") - -def Duration(val): - """ - Conversion of IDEA Duration datatype into appropriate MongoDB - representation. - - Forward conversion. - """ - if isinstance(val, datetime.timedelta): - val = val.total_seconds() - return val - raise TypeError("Wrong Duration, expecting only 'timedelta' object") - - -def Duration_inv(val): - """ - Convert MongoDB Duration datatype into appropriate IDEA object - representation. - - Inverse conversion. - """ - try: - val = datetime.timedelta(seconds=float(val)) - return val - except: - pass - raise TypeError("Wrong Duration, expecting only number of seconds as 'float'") - - -def Net4(val): - """ - Convert IDEA Net4 datatypes into appropriate MongoDB - representation. - - Forward conversion. - """ - # Single IP4 address is the most common, so check for that first. - if isinstance(val, ipranges.IP4): - bval = _to_bin(val.ip, 4) - return { - 'min': bval, - 'max': bval, - 'ip': bval, - } - # There is no difference between storage of IP4Net and IP4Range. - elif isinstance(val, (ipranges.IP4Net, ipranges.IP4Range)): - return { - 'min': _to_bin(val.low(), 4), - 'max': _to_bin(val.high(), 4), - } - raise TypeError("Wrong Net4, expecting one of 'ipranges.IP4...' objects") - - -def Net4_inv(val): - """ - Convert MongoDB Net4 datatypes into appropriate IDEA object - representation. - - Inverse conversion. - """ - # Single IP4 address is the most common, so check for that first. - if 'ip' in val: - return ipranges.IP4(_from_bin(val['ip'])) - - # Otherwise it will always be a network range - low = _from_bin(val['min']) - high = _from_bin(val['max']) - rng = ipranges.IP4Range((low, high)) - - # Or we can do even better and it is CIDR network - try: - return ipranges.IP4Net(rng) - except: - return rng - - -def Net6(val): - """ - Convert IDEA Net6 datatypes into appropriate MongoDB - representation. - - Forward conversion. - """ - # Single IP6 address is the most common, so check for that first. - if isinstance(val, ipranges.IP6): - bval = _to_bin(val.ip, 16) - return { - 'min': bval, - 'max': bval, - 'ip': bval, - } - # There is no difference between storage of IP6Net and IP6Range. - elif isinstance(val, (ipranges.IP6Net, ipranges.IP6Range)): - return { - 'min': _to_bin(val.low(), 16), - 'max': _to_bin(val.high(), 16), - } - raise TypeError("Wrong Net6, expecting one of 'ipranges.IP6...' objects") - - -def Net6_inv(val): - """ - Convert MongoDB Net6 datatypes into appropriate IDEA object - representation. - - Inverse conversion. - """ - # Single IP6 address is the most common, so check for that first. - if 'ip' in val: - return ipranges.IP6(_from_bin(val['ip'])) - - # Otherwise it will always be a network range - low = _from_bin(val['min']) - high = _from_bin(val['max']) - rng = ipranges.IP6Range((low, high)) - - # Or we can do even better and it is CIDR network - try: - return ipranges.IP6Net(rng) - except: - return rng - - -#------------------------------------------------------------------------------- -# "INTO MONGO" RELATED STUFF -#------------------------------------------------------------------------------- - - -# -# Define type 'flavour' for IdeaIn class. -# -idea_types_in = { - "Boolean": bool, - "Integer": int, - "String": unicode, - "Binary": str, - "ConfidenceFloat": float, - "Version": idea.lite.Version, - "MediaType": idea.lite.MediaType, - "Charset": idea.lite.Charset, - "Encoding": idea.lite.Encoding, - "Handle": idea.lite.Handle, - "ID": idea.lite.ID, - "Timestamp": Timestamp, - "Duration": Duration, - "URI": idea.lite.URI, - "Net4": Net4, - "Net6": Net6, - "Port": idea.lite.Port, - "NSID": idea.lite.NSID, - "MAC": idea.lite.MAC, - "Netname": idea.lite.Netname, - "Hash": idea.lite.Hash, - "EventTag": idea.lite.EventTag, - "ProtocolName": idea.lite.ProtocolName, - "SourceTargetTag": idea.lite.SourceTargetTag, - "NodeTag": idea.lite.NodeTag, - "AttachmentTag": idea.lite.AttachmentTag -} - - -# -# Simplification method from TypedList to list and from TypedDict to dict. -# BSON encoder for MongoDB is unable to encode TypedList objects. It is hardwired -# to treat any unknown object as dict and attempts the conversion, which is ok -# for TypedDict, but fails horribly with TypedList. So far we were not able to -# persuade the BSON library to treat TypeLists as lists, so we have to use -# different approach and provide it with simple datatypes dict and list instead. -# -# This simple wrapper just returns the internal ``.data`` attribute after the -# successfull conversion instead of the object itself. -# -def simplify(obj): - return lambda x: obj(x).data - -# -# Use the simplification wrapper for all IDEA list types. -# -idea_lists_in = {} -for k, v in idea.base.list_types(idea_types_in).items(): - idea_lists_in[k] = simplify(v) - - -# -# Apply type 'flavour' to idea.base.idea_typedef sub-definitions. -# -class SourceTargetDictIn(typedcols.TypedDict): - allow_unknown = True - typedef = idea.base.source_target_dict_typedef(idea_types_in, idea_lists_in) - - -class AttachDictIn(typedcols.TypedDict): - allow_unknown = True - typedef = idea.base.attach_dict_typedef(idea_types_in, idea_lists_in) - - -class NodeDictIn(typedcols.TypedDict): - allow_unknown = True - typedef = idea.base.node_dict_typedef(idea_types_in, idea_lists_in) - - -# -# Apply type 'flavour' to mentat.idea.internal.cesnet_dict_typedef definitions. -# -class CESNETDictIn(typedcols.TypedDict): - allow_unknown = True - typedef = mentat.idea.internal.cesnet_dict_typedef( - idea_types_in, - idea_lists_in, - simplify(typedcols.typed_list("InspectionErrorsList", str)), - simplify(typedcols.typed_list("ResolvedAbusesList", str)) - ) - - -# -# Addon for patching IdeaIn. -# -# Inject MongoDB forward conversion specific data types into mentat.idea.internal -# addon typedef. -# -idea_in_addon = mentat.idea.internal.internal_base_addon_typedef( - idea_types_in, - idea_lists_in, - simplify(CESNETDictIn), -) - - -class IdeaIn(idea.base.IdeaBase): - """ - This class implements datatype conversions to format appropriate for - MongoDB database. - - Despite the fact it is based on ``idea.base.IdeaBase`` class, it is - designed to process IDEA messages based on :py:class:`mentat.idea.internal.Idea` - class. - """ - allow_unknown = True - typedef = idea.base.idea_typedef( - idea_types_in, - idea_lists_in, - idea.lite.idea_defaults, - simplify(typedcols.typed_list("SourceList", simplify(SourceTargetDictIn))), - simplify(typedcols.typed_list("TargetList", simplify(SourceTargetDictIn))), - simplify(typedcols.typed_list("AttachList", simplify(AttachDictIn))), - simplify(typedcols.typed_list("NodeList", simplify(NodeDictIn))), - idea_in_addon - ) - - @staticmethod - def json_default(o): - """ - Helper method for JSON serialization of :py:class:`mentat.idea.mongodb.IdeaIn` - messages. - - Example usage: - - .. code-block:: python - - >>>import json - >>>idea_mongo_in = ... - >>>json.dumps(idea_mongo_in, indent=4, sort_keys=True, default=idea_mongo_in.json_default) - """ - if isinstance(o, uuid.UUID): - return str(o) - if isinstance(o, bson.Binary): - return repr(o) - return idea.base.IdeaBase.json_default(o) - - -#------------------------------------------------------------------------------- -# "OUT OF MONGO" RELATED STUFF -#------------------------------------------------------------------------------- - - -# -# Define type 'flavour' for IdeaIn class. -# -idea_types_out = { - "Boolean": bool, - "Integer": int, - "String": unicode, - "Binary": str, - "ConfidenceFloat": float, - "Version": idea.lite.Version, - "MediaType": idea.lite.MediaType, - "Charset": idea.lite.Charset, - "Encoding": idea.lite.Encoding, - "Handle": idea.lite.Handle, - "ID": idea.lite.ID, - "Timestamp": Timestamp_inv, - "Duration": Duration_inv, - "URI": idea.lite.URI, - "Net4": Net4_inv, - "Net6": Net6_inv, - "Port": idea.lite.Port, - "NSID": idea.lite.NSID, - "MAC": idea.lite.MAC, - "Netname": idea.lite.Netname, - "Hash": idea.lite.Hash, - "EventTag": idea.lite.EventTag, - "ProtocolName": idea.lite.ProtocolName, - "SourceTargetTag": idea.lite.SourceTargetTag, - "NodeTag": idea.lite.NodeTag, - "AttachmentTag": idea.lite.AttachmentTag -} - - -idea_lists_out = idea.base.list_types(idea_types_out) - - -# -# Apply type 'flavour' to idea.base.idea_typedef sub-definitions. -# -class SourceTargetDictOut(typedcols.TypedDict): - allow_unknown = True - typedef = idea.base.source_target_dict_typedef(idea_types_out, idea_lists_out) - - -class AttachDictOut(typedcols.TypedDict): - allow_unknown = True - typedef = idea.base.attach_dict_typedef(idea_types_out, idea_lists_out) - - -class NodeDictOut(typedcols.TypedDict): - allow_unknown = True - typedef = idea.base.node_dict_typedef(idea_types_out, idea_lists_out) - - -# -# Apply type 'flavour' to mentat.idea.internal.cesnet_dict_typedef definitions. -# -class CESNETDictOut(typedcols.TypedDict): - allow_unknown = True - typedef = mentat.idea.internal.cesnet_dict_typedef( - idea_types_out, - idea_lists_out, - typedcols.typed_list("InspectionErrorsList", str), - typedcols.typed_list("ResolvedAbusesList", str) - ) - -# -# Addon for patching IdeaOut. -# -# Inject MongoDB inverse conversion specific data types into mentat.idea.internal -# addon typedef. Additionally, there are few extra database specific attributes -# in the message, that need to be stripped off before. This is a legacy feature -# and will be deprecated and removed in the future. -# -idea_out_addon = mentat.idea.internal.internal_base_addon_typedef( - idea_types_out, - idea_lists_out, - CESNETDictOut -) -idea_out_addon['_id'] = typedcols.Discard -idea_out_addon['class2'] = typedcols.Discard -idea_out_addon['msg_raw2'] = typedcols.Discard -idea_out_addon['ts'] = typedcols.Discard -idea_out_addon['ts_u'] = typedcols.Discard - - -class IdeaOut(idea.base.IdeaBase): - """ - This class implements datatype conversions to format appropriate for - MongoDB database. - - Despite the fact it is based on ``idea.base.IdeaBase`` class, it is - designed to process IDEA messages based on :py:mod:`mentat.idea.internal` - module. - """ - allow_unknown = True - typedef = idea.base.idea_typedef( - idea_types_out, - idea_lists_out, - idea.lite.idea_defaults, - typedcols.typed_list("SourceList", SourceTargetDictOut), - typedcols.typed_list("TargetList", SourceTargetDictOut), - typedcols.typed_list("AttachList", AttachDictOut), - typedcols.typed_list("NodeList", NodeDictOut), - idea_out_addon - ) - - json_default = staticmethod(mentat.idea.internal.Idea.json_default) - """ - Helper method for JSON serialization of :py:class:`mentat.idea.mongodb.IdeaOut` - messages. - - Example usage: - - .. code-block:: python - - >>>import json - >>>idea_mongo_out = ... - >>>json.dumps(idea_mongo_out, indent=4, sort_keys=True, default=idea_mongo_out.json_default) - """ diff --git a/lib/mentat/idea/sqldb.py b/lib/mentat/idea/sqldb.py index 49a52b1e1..15a348e83 100644 --- a/lib/mentat/idea/sqldb.py +++ b/lib/mentat/idea/sqldb.py @@ -125,96 +125,47 @@ class Idea: # pylint: disable=locally-disabled,too-many-instance-attributes,t # to bytes. The event will be stored as a BYTEA datatype within the # PostgreSQL database istead of JSONB, because PostgreSQL is unable to # store JSON objects that contain null characters anywhere in the content. - self.jsonb = psycopg2.Binary(idea_event.to_json().encode('utf-8')) - - self.ident = idea_event['ID'] - self.detecttime = idea_event['DetectTime'] - self.category = list(idea_event.get('Category', list())) - self.description = idea_event.get('Description', None) + self.jsonb = psycopg2.Binary(idea_event.to_json().encode('utf-8')) + self.ident = idea_event.get_id() + self.detecttime = idea_event.get_detect_time() + self.category = idea_event.get_categories() + self.description = idea_event.get_description() # Source IP (both v4 a v6 in single attribute). self.source_ip = IPList() - item_list = self._get_subitems(idea_event, 'Source', 'IP4') - self.source_ip.extend([ip for ip in item_list]) - item_list = self._get_subitems(idea_event, 'Source', 'IP6') - self.source_ip.extend([ip for ip in item_list]) + self.source_ip.extend([ip for ip in idea_event.get_addresses('Source')]) # Target IP (both v4 a v6 in single attribute). self.target_ip = IPList() - item_list = self._get_subitems(idea_event, 'Target', 'IP4') - self.target_ip.extend([ip for ip in item_list]) - item_list = self._get_subitems(idea_event, 'Target', 'IP6') - self.target_ip.extend([ip for ip in item_list]) + self.target_ip.extend([ip for ip in idea_event.get_addresses('Target')]) + + # Ports. + self.source_port = idea_event.get_ports('Source') + self.target_port = idea_event.get_ports('Target') # Types (tags). - self.source_type = sorted(list(set(self._get_subitems(idea_event, 'Source', 'Type')))) - self.target_type = sorted(list(set(self._get_subitems(idea_event, 'Target', 'Type')))) + self.source_type = idea_event.get_types('Source') + self.target_type = idea_event.get_types('Target') - # Ports. - self.source_port = sorted(list(set(self._get_subitems(idea_event, 'Source', 'Port')))) - self.target_port = sorted(list(set(self._get_subitems(idea_event, 'Target', 'Port')))) # Protocol (both source and target in single attribute). - self.protocol = set() - source_proto = self._get_subitems(idea_event, 'Source', 'Proto') - for item in source_proto: - self.protocol.add(item.lower()) - target_proto = self._get_subitems(idea_event, 'Target', 'Proto') - for item in target_proto: - self.protocol.add(item.lower()) - self.protocol = sorted(list(self.protocol)) - - self.node_name = [node['Name'].lower() for node in idea_event['Node']] - self.node_type = sorted(list(set(self._get_subitems(idea_event, 'Node', 'Type')))) - - self.cesnet_resolvedabuses = list(idea_event.get('_CESNET', {}).get('ResolvedAbuses', list())) - self.cesnet_storagetime = idea_event['_CESNET']['StorageTime'] - self.cesnet_eventclass = idea_event.get('_CESNET', {}).get('EventClass', None) - self.cesnet_eventseverity = idea_event.get('_CESNET', {}).get('EventSeverity', None) + source_proto = idea_event.get_protocols('Source') + target_proto = idea_event.get_protocols('Target') + self.protocol = sorted(list(set(source_proto + target_proto))) + + self.node_name = idea_event.get_detectors() + self.node_type = idea_event.get_types('Node') + + self.cesnet_resolvedabuses = idea_event.get_abuses() + self.cesnet_storagetime = idea_event.get_storage_time() + self.cesnet_eventclass = idea_event.get_class() + self.cesnet_eventseverity = idea_event.get_severity() self.cesnet_inspectionerrors = list(idea_event.get('_CESNET', {}).get('InspectionErrors', list())) if self.cesnet_eventclass: self.cesnet_eventclass = self.cesnet_eventclass.lower() if self.cesnet_eventseverity: self.cesnet_eventseverity = self.cesnet_eventseverity.lower() - ## - ## After migrating from MongoDB to PostgreSQL and deprecating MongoDB - ## following code may be uncommented and replace the lines above. - ## - #self.jsonb = psycopg2.Binary(idea_event.to_json().encode('utf-8')) - #self.ident = idea_event.get_id() - #self.detecttime = idea_event.get_detect_time() - #self.category = idea_event.get_categories() - #self.description = idea_event.get_description() - - ## Source IP (both v4 a v6 in single attribute). - #self.source_ip = IPList() - #self.source_ip.extend([ip for ip in idea_event.get_addresses('Source')]) - - ## Target IP (both v4 a v6 in single attribute). - #self.target_ip = IPList() - #self.target_ip.extend([ip for ip in idea_event.get_addresses('Target')]) - - ## Ports. - #self.source_port = idea_event.get_ports('Source') - #self.target_port = idea_event.get_ports('Target') - - ## Types (tags). - #self.source_type = idea_event.get_types('Source') - #self.target_type = idea_event.get_types('Target') - - ## Protocol (both source and target in single attribute). - #source_proto = idea_event.get_protocols('Source') - #target_proto = idea_event.get_protocols('Target') - #self.protocol = sorted(list(set(source_proto + target_proto))) - - #self.node_name = idea_event.get_detectors() - #self.node_type = idea_event.get_types('Node') - - #self.cesnet_resolvedabuses = idea_event.get_abuses() - #self.cesnet_storagetime = idea_event.get_storage_time() - - @staticmethod def _get_subitems(obj, key, subkey): """ diff --git a/lib/mentat/idea/test_sqldb.py b/lib/mentat/idea/test_sqldb.py index 0672f3c7e..3a8ba4450 100644 --- a/lib/mentat/idea/test_sqldb.py +++ b/lib/mentat/idea/test_sqldb.py @@ -146,8 +146,8 @@ class TestMentatIdeaJSON(unittest.TestCase): self.assertEqual(idea_sqldb.ident, '4390fc3f-c753-4a3e-bc83-1b44f24baf75') self.assertEqual(idea_sqldb.detecttime.isoformat(), '2012-11-03T10:00:07') self.assertEqual(idea_sqldb.source_ip, [ - ipranges.IP4Range('192.168.0.2-192.168.0.5'), ipranges.IP4Net('192.168.0.10/25'), + ipranges.IP4Range('192.168.0.2-192.168.0.5'), ipranges.IP4('192.168.1.1'), ipranges.IP6Net('2001:db8::ff00:42:0/112'), ipranges.IP6('2001:db8::ff00:42:50') diff --git a/lib/mentat/plugin/test/test.py b/lib/mentat/plugin/test/test.py index ccc33de71..7550cf1f2 100644 --- a/lib/mentat/plugin/test/test.py +++ b/lib/mentat/plugin/test/test.py @@ -55,12 +55,6 @@ class WhoisEnricherPlugin: """ Preprocess configuration for whois module. """ - if name == 'MongodbWhoisModule': - if not 'collection' in conf: - db_settings = daemon.c(mentat.const.CKEY_CORE_DATABASE) - db_config = db_settings[mentat.const.CKEY_CORE_DATABASE_CONFIG] - conf['collection'] = db_config['col_groups'] - conf['collection'] = daemon.database.collection(conf['collection']) return conf def _bootstrap_whois_service(self, daemon, modules): diff --git a/lib/mentat/stats/idea.py b/lib/mentat/stats/idea.py index 5b9b08f6e..4302fa3cc 100644 --- a/lib/mentat/stats/idea.py +++ b/lib/mentat/stats/idea.py @@ -109,45 +109,6 @@ LIST_OPTIMAL_STEPS = ( ) """List of optimal timeline steps. This list is populated with values, that round nicelly in time calculations.""" -#------------------------------------------------------------------------------- - - -def unescape_stats(stats): - """ - Unescape ``(dot)``s in statistic keyword names with ``.`` (because ``.`` is reserved - in MongoDB). - - .. todo:: - - This feature is deprecated and will be removed with the removal of MongoDB - dependency from codebase. - - :param dict stats: Structure containing single statistic category. - :return: Updated structure containing statistics. - :rtype: dict - """ - for key in LIST_CALCSTAT_KEYS: - if key in stats: - stats[key] = unescape_dict(stats[key]) - return stats - -def unescape_stats_full(stats): - """ - Unescape all statistic categories with :py:func:`unescape_stats`. - - .. todo:: - - Perhaps move this feature into :py:mod:`mentat.datatype.mongodb` library. - - :param dict stats: Structure containing single statistic category. - :return: Updated structure containing statistics. - :rtype: dict - """ - for key in ('internal', 'external', 'overall'): - if key in stats: - stats[key] = unescape_stats(stats[key]) - return stats - #------------------------------------------------------------------------------- diff --git a/lib/mentat/storage.py b/lib/mentat/storage.py deleted file mode 100644 index 76d77b27d..000000000 --- a/lib/mentat/storage.py +++ /dev/null @@ -1,433 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# This file is part of Mentat system (https://mentat.cesnet.cz/). -# -# Copyright (C) since 2011 CESNET, z.s.p.o (http://www.ces.net/) -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - -""" -Database storage abstraction layer. The current implementation is hardwired to -work only with `MongoDB <https://www.mongodb.com/>`__ database using official -`pymongo <https://api.mongodb.com/python/current/api/pymongo/index.html>`__ -driver. - -.. warning:: - - Deprecated code. - -""" - - -__author__ = "Jan Mach <jan.mach@cesnet.cz>" -__credits__ = "Pavel Kácha <pavel.kacha@cesnet.cz>, Andrea Kropáčová <andrea.kropacova@cesnet.cz>" - - -import copy -import pymongo - -from mentat.const import CKEY_CORE_DATABASE, CKEY_CORE_DATABASE_CONNECTION, \ - CKEY_CORE_DATABASE_CONFIG, CKEY_CORE_DATABASE_SCHEMA - - -IDX_TR_TBL = { - "descending": pymongo.DESCENDING, - "ascending": pymongo.ASCENDING, - "desc": pymongo.DESCENDING, - "asc": pymongo.ASCENDING, - "d": pymongo.DESCENDING, - "a": pymongo.ASCENDING, -} -"""Translation table of index directions from human readable form into *pymongo* defined constants.""" - - -_MANAGER = None - - -class Collection: - """ - Proxy object for working with database collections. - """ - - def __init__(self, name, collection): - """ - Create new collection proxy object. - - :param str name: Name of the collection. - :param collection: Collection handler. - """ - self.name = name - self._collection = collection - - def __getattr__(self, name): - """ - Proxy method for accessing additional attributes of underlying :py:class:`pymongo.collection.Collection`. - """ - return getattr(self._collection, name) - - def _format_index_spec(self, index): - """ - Convert custom collection index specification into appropriate format. - """ - if len(index) == 2 and isinstance(index[0], str) and isinstance(index[1], str): - return [(index[0], IDX_TR_TBL[index[1].lower()])] - - result = [] - for idx in index: - result = result + self._format_index_spec(idx) - return result - - def get_current_index_map(self): - """ - Get current index map for current collection. - """ - imap = {} - try: - ilist = self.list_indexes() - for idx in ilist: - imap[idx['name']] = 1 - except: - pass - return imap - - def index_create(self, index, **kwargs): - """ - Create index accoding to given specification. - """ - idx_spec = self._format_index_spec(index) - self._collection.create_index(idx_spec, **kwargs) - - -class Database: - """ - Proxy object for working with storage databases. - """ - - def __init__(self, name, database): - """ - Create new database proxy object. - - :param str name: Name of the database. - :param database: Database handler. - """ - self.name = name - self._database = database - self._collections = {} - - def collection(self, collection): - """ - Return reference to storage database collection with given name. This method - caches the returned references for later use. - - :param str collection: Name of requested collection. - :return: Reference to storage database collection proxy object. - :rtype: mentat.storage.Collection - """ - if not self._collections.get(collection, False): - self._collections[collection] = Collection(collection, self._database[collection]) - return self._collections[collection] - - def collection_names(self): - """ - Return list of all available collection names within current storage database. - - :return: List of all available collection names as list of strings. - :rtype: list - """ - return self._database.collection_names(include_system_collections = False) - - def command(self, command, **kwargs): - """ - Run given database command and pass down optional additional arguments. - """ - return self._database.command(command, **kwargs) - - def collection_info(self, collection): - """ - Detect and return information about current storage database collection. - - :return: Complex dictionary with various data. - :rtype: dict - """ - return self._database.command('collstats', collection) - - def database_info(self): - """ - Detect and return information about current storage database. - - :return: Complex dictionary with various data. - :rtype: dict - """ - return self._database.command('dbstats') - - -class Storage: - """ - Proxy object for working with persistent storages. - """ - - def __init__(self, host = 'localhost', port = 27017, timeout = None, **kwargs): - """ - Open and cache connection to storage. - - :param str host: Connection URI. - :param int port: Connection port. - :param int timeout: Connection timeout. - :param kwargs: Additional connection arguments. - """ - if timeout: - kwargs['socketTimeoutMS'] = timeout - self._storage = pymongo.MongoClient(host, port, **kwargs) - self._databases = {} - - def close(self): - """ - Close current database connection. - """ - self._storage.close() - - def database(self, database): - """ - Return reference to storage database with given name. This method caches - the returned references for later use. - - :param str database: Name of requested database. - :return: Reference to storage database proxy object. - :rtype: mentat.storage.Database - """ - if not self._databases.get(database, False): - self._databases[database] = Database(database, self._storage[database]) - return self._databases[database] - - def collection(self, database, collection): - """ - Return reference to storage database collection with given name. This method caches - the returned references for later use. - - :param str database: Name of requested database. - :param str collection: Name of requested collection. - :return: Reference to storage database collection proxy object. - :rtype: mentat.storage.Collection - """ - return self.database(database).collection(collection) - - def database_names(self): - """ - Return list of all available database names within current storage. - - :return: List of all available database names as list of strings. - :rtype: list - """ - return self._storage.database_names() - - def database_walk(self): - """ - Return list of all available database names within current storage and - for each of them list of all currently available collection names. - - :return: Dict with database names as keys and lists of collection names as values. - :rtype: dict - """ - result = {} - dbs = self.database_names() - for dbn in dbs: - result[dbn] = self.database(dbn).collection_names() - return result - - def collection_info(self, database, collection): - """ - Detect and return information about current storage database collection. - - :param str database: Name of requested database. - :param str collection: Name of requested collection. - :return: Complex dictionary with various data. - :rtype: dict - """ - return self.database(database).collection_info(collection) - - def database_info(self, database): - """ - Detect and return information about current storage database. - - :param str database: Name of requested database. - :return: Complex dictionary with various data. - :rtype: dict - """ - return self.database(database).database_info() - - def storage_info(self): - """ - Detect and return information about current storage. - - :return: Complex dictionary with various data. - :rtype: dict - """ - return self._storage.server_info() - - -class StorageManager: - """ - Class representing a custom StorageManager capable of understanding and parsing - Mentat system core configurations and enabling easy access to preconfigured - database collections via indirrect handles (identifiers). - """ - - def __init__(self, core_config, updates = None): - """ - Initialize StorageManager object with full core configuration tree structure. - - :param dict core_config: Mentat core configuration structure. - :param dict updates: Optional configuration updates (same structure as ``core_config``). - """ - self._dbconnection = {} - self._dbconfig = {} - self._dbschema = {} - - self._storage = None - - self._configure_connection(core_config, updates) - self._configure_dbconfig(core_config, updates) - self._configure_dbschema(core_config, updates) - - def _configure_connection(self, core_config, updates): - """ - Internal sub-initialization helper: Configure storage connection parameters - and optionally merge them with additional updates. - - :param dict core_config: Mentat core configuration structure. - :param dict updates: Optional configuration updates (same structure as ``core_config``). - """ - self._dbconnection = copy.deepcopy(core_config[CKEY_CORE_DATABASE][CKEY_CORE_DATABASE_CONNECTION]) - - # TODO: Remove 'args' from configuration. - if 'args' in self._dbconnection: - del self._dbconnection['args'] - - if updates and CKEY_CORE_DATABASE in updates and CKEY_CORE_DATABASE_CONNECTION in updates[CKEY_CORE_DATABASE]: - self._dbconnection.update( - updates[CKEY_CORE_DATABASE][CKEY_CORE_DATABASE_CONNECTION] - ) - - def _configure_dbconfig(self, core_config, updates): - """ - Internal sub-initialization helper: Configure database structure parameters - and optionally merge them with additional updates. - - :param dict core_config: Mentat core configuration structure. - :param dict updates: Optional configuration updates (same structure as ``core_config``). - """ - self._dbconfig = copy.deepcopy(core_config[CKEY_CORE_DATABASE][CKEY_CORE_DATABASE_CONFIG]) - - if updates and CKEY_CORE_DATABASE in updates and CKEY_CORE_DATABASE_CONFIG in updates[CKEY_CORE_DATABASE]: - self._dbconfig.update( - updates[CKEY_CORE_DATABASE][CKEY_CORE_DATABASE_CONFIG] - ) - - def _configure_dbschema(self, core_config, updates): - """ - Internal sub-initialization helper: Configure database schema parameters - and optionally merge them with additional updates. - - :param dict core_config: Mentat core configuration structure. - :param dict updates: Optional configuration updates (same structure as ``core_config``). - """ - self._dbschema = copy.deepcopy(core_config[CKEY_CORE_DATABASE][CKEY_CORE_DATABASE_SCHEMA]) - - if updates and CKEY_CORE_DATABASE in updates and CKEY_CORE_DATABASE_SCHEMA in updates[CKEY_CORE_DATABASE]: - self._dbschema.update( - updates[CKEY_CORE_DATABASE][CKEY_CORE_DATABASE_SCHEMA] - ) - - - #--------------------------------------------------------------------------- - - - def close(self): - """ - Close internal storage connection. - """ - if self._storage: - self._storage.close() - self._storage = None - - def service(self): - """ - Return handle to storage connection service according to internal configurations. - - :return: Reference to storage connection object. - :rtype: mentat.storage.Storage - """ - if not self._storage: - self._storage = Storage(**self._dbconnection) - return self._storage - - def database(self, db_id): - """ - Return reference to storage database given as indirrect handle. - - :param str db_id: Indirrect handle (name) of requested database. - :return: Reference to storage database proxy object. - :rtype: mentat.storage.Database - """ - db_name = self.lookup_name(db_id) - return self.service().database(db_name) - - def collection(self, db_id, col_id): - """ - Return reference to storage database collection given as indirrect handle. - - :param str db_id: Indirrect handle (name) of requested database. - :param str col_id: Indirrect handle (name) of requested collection. - :return: Reference to storage database collection proxy object. - :rtype: mentat.storage.Collection - """ - db_name = self.lookup_name(db_id) - col_name = self.lookup_name(col_id) - return self.service().collection(db_name, col_name) - - def lookup_name(self, ident): - """ - Lookup database or collection name for given indirrect handle. - - :param str ident: Indirrect handle. - :return: Name of the database or collection. - :rtype: str - """ - return self._dbconfig[ident] - - def storage_info(self): - """ - Detect and return information about current storage. - - :return: Complex dictionary with various data. - :rtype: dict - """ - return self.service().storage_info() - - -#------------------------------------------------------------------------------- - - -def init(core_config, updates = None): - """ - (Re-)Initialize :py:class:`GeoipServiceManager` instance at module level and - store the refence within module. - """ - global _MANAGER - _MANAGER = StorageManager(core_config, updates) - - -def manager(): - """ - Obtain reference to :py:class:`GeoipServiceManager` instance stored at module - level. - """ - return _MANAGER - - -def service(): - """ - Obtain reference to :py:class:`GeoipService` instance from module level manager. - """ - return manager().service() diff --git a/lib/mentat/test_storage.py b/lib/mentat/test_storage.py deleted file mode 100644 index 4cd212896..000000000 --- a/lib/mentat/test_storage.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# This file is part of Mentat system (https://mentat.cesnet.cz/). -# -# Copyright (C) since 2011 CESNET, z.s.p.o (http://www.ces.net/) -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - -""" -Unit test module for testing the :py:mod:`mentat.storage` module. -""" - - -__author__ = "Jan Mach <jan.mach@cesnet.cz>" -__credits__ = "Pavel Kácha <pavel.kacha@cesnet.cz>, Andrea Kropáčová <andrea.kropacova@cesnet.cz>" - - -import unittest - -# -# Custom libraries -# -import mentat.storage - - -#------------------------------------------------------------------------------- -# NOTE: Sorry for the long lines in this file. They are deliberate, because the -# assertion permutations are (IMHO) more readable this way. -#------------------------------------------------------------------------------- - - -class TestMentatStorage(unittest.TestCase): - """ - Unit test class for testing the :py:mod:`mentat.storage` module. - """ - - def test_01_basic(self): - """ - Perform the basic storage connection and operativity tests. - """ - storage = mentat.storage.Storage() - database = storage.database('mentat_unittest') - collection = storage.collection('mentat_unittest', 'mentat_storage') - - self.assertEqual(database.name, 'mentat_unittest') - self.assertEqual(collection.name, 'mentat_storage') - - def test_02_service_manager(self): - """ - Perform the tests of storage service manager. - """ - manager = mentat.storage.StorageManager( - { - "__core__database": { - "connection": { - "host": "localhost", - "port": 27017, - "timeout": 20000 - }, - "config": { - "db": "mentat_unittest", - "col": "mentat_storage_base" - }, - "schema": {} - } - },{ - "__core__database": { - "connection": { - "timeout": 5000 - }, - "config": { - "col": "mentat_storage" - } - } - } - ) - self.assertEqual(manager._dbconnection, {'host': 'localhost', 'port': 27017, 'timeout': 5000}) - self.assertEqual(manager._dbconfig, {'col': 'mentat_storage', 'db': 'mentat_unittest'}) - self.assertEqual(manager._dbschema, {}) - - - database = manager.database('db') - collection = manager.collection('db', 'col') - - self.assertEqual(database.name, 'mentat_unittest') - self.assertEqual(collection.name, 'mentat_storage') - - def test_03_module_service(self): - """ - Perform the tests of module service. - """ - mentat.storage.init( - { - "__core__database": { - "connection": { - "host": "localhost", - "port": 27017, - "timeout": 20000 - }, - "config": { - "db": "mentat_unittest", - "col": "mentat_storage_base" - }, - "schema": {} - } - },{ - "__core__database": { - "connection": { - "timeout": 5000 - }, - "config": { - "col": "mentat_storage" - } - } - } - ) - - manager = mentat.storage.manager() - self.assertEqual(manager._dbconnection, {'host': 'localhost', 'port': 27017, 'timeout': 5000}) - self.assertEqual(manager._dbconfig, {'col': 'mentat_storage', 'db': 'mentat_unittest'}) - self.assertEqual(manager._dbschema, {}) - - - database = manager.database('db') - collection = manager.collection('db', 'col') - - self.assertEqual(database.name, 'mentat_unittest') - self.assertEqual(collection.name, 'mentat_storage') - - -#------------------------------------------------------------------------------- - - -if __name__ == "__main__": - unittest.main() diff --git a/scripts/depcheck.py b/scripts/depcheck.py index 08964ae35..a54ac732f 100755 --- a/scripts/depcheck.py +++ b/scripts/depcheck.py @@ -49,7 +49,7 @@ if __name__ == "__main__": VERBOSE = args.verbose required = ( - 'ply', 'pymongo', 'pydgets', 'pyzenkit', 'pynspect', 'ipranges', 'typedcols', + 'ply', 'pydgets', 'pyzenkit', 'pynspect', 'ipranges', 'typedcols', 'idea', 'geoip2', 'flask', 'flask_login', 'flask_mail', 'flask_babel', 'flask_principal', 'flask_wtf', 'flask_debugtoolbar' ) diff --git a/scripts/depmanage.py b/scripts/depmanage.py index 9c4dd2d9b..a051a4426 100755 --- a/scripts/depmanage.py +++ b/scripts/depmanage.py @@ -50,7 +50,7 @@ if __name__ == "__main__": VERBOSE = args.verbose - required = ('ply','pymongo','pynspect','pydgets','pyzenkit','ipranges','typedcols','idea-format') + required = ('ply','pynspect','pydgets','pyzenkit','ipranges','typedcols','idea-format') if args.action == 'install': for lib in required: diff --git a/scripts/mentat-dbfix-alerts.py b/scripts/mentat-dbfix-alerts.py deleted file mode 100755 index a109bb80b..000000000 --- a/scripts/mentat-dbfix-alerts.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# Copyright (C) since 2011 CESNET, z.s.p.o -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - -import argparse -import time -import sys -from pymongo import MongoClient -from pprint import pprint - -def drawProgressBar(percent, done, barLen = 50): - sys.stdout.write("\r") - progress = "" - for i in range(barLen): - if i < int(barLen * percent): - progress += "=" - else: - progress += " " - sys.stdout.write(" [%s] %.2f%%" % (progress, percent * 100)) - sys.stdout.flush() - -mongoclient = MongoClient() - -db = mongoclient.mentat -ts_start = time.time() - -# Calculate the total number of all alerts in database -col = db.alerts -cnt_all = col.count() -print("Total number of items in 'alerts' collection: {}".format(cnt_all)) - -# Fetch items to be updated -to_update = col.find({"_CESNET.StorageTime": {"$exists": False}}) -cnt_update = to_update.count() -print("Number of items to be updated: {} [{:5.2f}%]".format(cnt_update, (cnt_update / cnt_all * 100))) - -processed = 0 -updated = 0 -percent = 0 - -print("Starting the processing\n") -for item in to_update: - #print("ID: {}".format(item['_id'])) - #print("Before update") - #pprint(item) - result = col.update_one( - {'_id': item['_id']}, - { - "$set": { - "_CESNET.StorageTime": item['ts_u'] - } - } - ) - #print("Update result") - #pprint(result.raw_result) - processed += 1 - if result.modified_count: - updated += result.modified_count - percent = (processed / cnt_update) - drawProgressBar(percent, processed) - #print("After update") - #pprint(col.find_one({"_id": item['_id']})) - -ts_end = time.time() -dur = ts_end - ts_start - -print("\n\n------------------------------------------------------") -print("Processing duration: {:.2f}s".format(dur)) -print("Processed total of items: {}".format(processed)) -print("Successfully modified items: {}".format(updated)) diff --git a/scripts/mentat-dump-analyzer.py b/scripts/mentat-dump-analyzer.py deleted file mode 100755 index 13831dd9f..000000000 --- a/scripts/mentat-dump-analyzer.py +++ /dev/null @@ -1,290 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# Copyright (C) since 2011 CESNET, z.s.p.o -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - -import os -import time -import math -import json -import csv -import locale -import logging -import pprint - -import geoip2.database -import geoip2.errors - -# Custom libraries -import pyzenkit.zenscript - -# Global constants -DIR_DUMP = '/var/tmp' -DIR_RESULT = '/var/tmp' - -class MentatDumpAnalyzerScript(pyzenkit.zenscript.ZenScript): - """ - Script providing Mentat alert database dump functions and features - """ - - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def get_default_operation(self): - """ - Return the name of a default script operation. - """ - return 'dump' - - def configure_defaults_sub(self, parser, config): - """ - Reimplementation of template stub for adding more command line options - """ - parser.add_argument('--dump-file', help='set the name of a dump file') - self.config['dump_file'] = os.path.join(DIR_DUMP, 'mentat-dump.json') - - parser.add_argument('--result-file', help='set the name of a result file') - self.config['result_file'] = os.path.join(DIR_RESULT, 'mentat-dump-analysis.json') - - parser.add_argument('--dict-country', help='path to the country dictionary file') - self.config['dict_country'] = os.path.join(DIR_RESULT, 'country-names.csv') - - parser.add_argument('--limit', help='limit the number of objects to process, default 0 for no limit', type=int) - self.config['limit'] = 0 - - parser.add_argument('--emphasize', help='what country to emphasize') - self.config['emphasize'] = None - - return (parser, config) - - def cbk_operation_analyze(self): - """ - - """ - reader = geoip2.database.Reader("/home/mek/GeoLite2-Country.mmdb") - - dumpfile = self.config.get('dump_file') - mf = open(dumpfile, 'r') - stats = {'cnt_all': 0, 'cnt_ips': 0, 'cnt_ctg': 0, 'cnt_abs': 0, 'by_abuse': {}, 'by_country': {}, 'by_category': {}, 'by_country_category': {}, 'by_country_abuse': {}} - for line in mf: - if self.config.get('limit') and self.config.get('limit') < (stats['cnt_all'] + 1): - break - - stats['cnt_all'] += 1 - dataobj = json.loads(line) - self.logger.debug("Loaded data object:\n{}".format(pprint.pformat(dataobj,indent=4))) - - abuses = dataobj.get('_CESNET', {}).get('ResolvedAbuses', ['??']) - categories = dataobj.get('Category') - sources = dataobj.get('Source', dataobj.get('Target')) - for abuse in abuses: - stats['cnt_abs'] += 1 - stats['by_abuse'][abuse] = stats['by_abuse'].get(abuse, 0) + 1 - for ctg in categories: - stats['cnt_ctg'] += 1 - stats['by_category'][ctg] = stats['by_category'].get(ctg, 0) + 1 - - for s in sources: - if not 'IP4' in s: - continue - for ip in s['IP4']: - try: - ctry = reader.country(ip).country.iso_code - except geoip2.errors.AddressNotFoundError: - ctry = None - - if ctry is None: - ctry = "??" - stats['cnt_ips'] += 1 - stats['by_country'][ctry] = stats['by_country'].get(ctry, 0) + 1 - self.logger.debug("Geolocation for IP '{}': '{}'".format(ip, ctry)) - - for ctg in categories: - stats['by_country_category'][ctry] = stats['by_country_category'].get(ctry, {}) - stats['by_country_category'][ctry][ctg] = stats['by_country_category'][ctry].get(ctg, 0) + 1 - - for abuse in abuses: - stats['by_country_abuse'][ctry] = stats['by_country_abuse'].get(ctry, {}) - stats['by_country_abuse'][ctry][abuse] = stats['by_country_abuse'][ctry].get(abuse, 0) + 1 - - if (stats['cnt_all'] % 100000) == 0: - self.logger.info("Intermediate results after processing '{}' objects:\n{}".format(stats['cnt_all'],pprint.pformat(stats,indent=4))) - - mf.close() - self.logger.info("Final results after processing '{}' objects:\n{}".format(stats['cnt_all'],pprint.pformat(stats,indent=4))) - resultfile = self.config.get('result_file') - self.json_save(resultfile, stats) - self.logger.info("Alerts processed from file '{}'".format(dumpfile)) - self.logger.info("Result stored to file '{}'".format(resultfile)) - - def cbk_operation_display(self): - """ - - """ - trdict = {} - resultfile = self.config.get('result_file') - data = self.json_load(resultfile) - - csvfile = self.config.get('dict_country') - with open(csvfile, newline='') as f: - reader = csv.reader(f) - for row in reader: - trdict[row[2].strip()] = row[0].strip() - - locale.setlocale(locale.LC_ALL, 'en_US') - - detail = [] - - print("Statistics per country:\n") - genexp = ((k, data['by_country'][k]) for k in sorted(data['by_country'], key=data['by_country'].get, reverse=True)) - cnt = 0 - rest = 0 - hidden = 0 - sum_v = 0 - for k, v in genexp: - cnt += 1 - if len(detail) < 3 or self.config.get('emphasize') == k: - detail.append(k) - if self.config.get('limit') and self.config.get('limit') < cnt: - if not self.config.get('emphasize') == k: - rest += v - hidden += 1 - continue - - percent = (v / data['cnt_ips']) * 100 - sum_v += v - if not self.config.get('emphasize') == k: - print("{:3d}. {:2s} {:50s} {:>12s} {:6.2f}%".format(cnt, ' ' + k + ' ', trdict.get(k, '??'), locale.format("%d", v, grouping=True), percent)) - else: - print("{:3d}. {:2s} {:50s} {:>12s} {:6.2f}%".format(cnt, '*' + k + '*', trdict.get(k, '??'), locale.format("%d", v, grouping=True), percent)) - - if rest > 0: - percent = (rest / data['cnt_ips']) * 100 - print("{:3s} {:2s} {:50s} {:>12s} {:6.2f}%".format(' **', ' ** ', 'REST ({})'.format(hidden), locale.format("%d", rest, grouping=True), percent)) - - percent = ((rest+sum_v) / data['cnt_ips']) * 100 - print("{:3s} {:2s} {:50s} {:>12s} {:6.2f}%".format(' **', ' ** ', 'TOTAL', locale.format("%d", rest+sum_v, grouping=True), percent)) - - print("") - - print("Statistics per category:\n") - genexp = ((k, data['by_category'][k]) for k in sorted(data['by_category'], key=data['by_category'].get, reverse=True)) - cnt = 0 - rest = 0 - hidden = 0 - sum_v = 0 - for k, v in genexp: - cnt += 1 - if self.config.get('limit') and self.config.get('limit') < cnt: - rest += v - hidden += 1 - continue - - percent = (v / data['cnt_ctg']) * 100 - sum_v += v - print("{:3d}. {:50s} {:>12s} {:6.2f}%".format(cnt, k, locale.format("%d", v, grouping=True), percent)) - - if rest > 0: - percent = (rest / data['cnt_ctg']) * 100 - print("{:3s} {:50s} {:>12s} {:6.2f}%".format(' **', 'REST ({})'.format(hidden), locale.format("%d", rest, grouping=True), percent)) - - percent = ((rest+sum_v) / data['cnt_ctg']) * 100 - print("{:3s} {:50s} {:>12s} {:6.2f}%".format(' **', 'TOTAL', locale.format("%d", rest+sum_v, grouping=True), percent)) - - print("") - - print("Statistics per abuse:\n") - genexp = ((k, data['by_abuse'][k]) for k in sorted(data['by_abuse'], key=data['by_abuse'].get, reverse=True)) - cnt = 0 - rest = 0 - hidden = 0 - sum_v = 0 - for k, v in genexp: - cnt += 1 - if self.config.get('limit') and self.config.get('limit') < cnt: - rest += v - hidden += 1 - continue - - percent = (v / data['cnt_abs']) * 100 - sum_v += v - print("{:3d}. {:50s} {:>12s} {:6.2f}%".format(cnt, k, locale.format("%d", v, grouping=True), percent)) - - if rest > 0: - percent = (rest / data['cnt_abs']) * 100 - print("{:3s} {:50s} {:>12s} {:6.2f}%".format(' **', 'REST ({})'.format(hidden), locale.format("%d", rest, grouping=True), percent)) - - percent = ((rest+sum_v) / data['cnt_abs']) * 100 - print("{:3s} {:50s} {:>12s} {:6.2f}%".format(' **', 'TOTAL', locale.format("%d", rest+sum_v, grouping=True), percent)) - - print("") - - if data['by_country_abuse'].get(self.config.get('emphasize'), None): - country = self.config.get('emphasize') - print("Detailed statistics for country and abuse '{}' ({}):\n".format(trdict.get(country, '??'), country)) - genexp = ((k, data['by_country_abuse'][country][k]) for k in sorted(data['by_country_abuse'][country], key=data['by_country_abuse'][country].get, reverse=True)) - cnt = 0 - rest = 0 - hidden = 0 - sum_v = 0 - for k, v in genexp: - cnt += 1 - if self.config.get('limit') and self.config.get('limit') < cnt: - rest += v - hidden += 1 - continue - - percent = (v / data['by_country'][country]) * 100 - percentt = (v / data['cnt_ctg']) * 100 - sum_v += v - print("{:3d}. {:50s} {:>12s} {:6.2f}% {:6.2f}%".format(cnt, k, locale.format("%d", v, grouping=True), percent, percentt)) - - if rest > 0: - percent = (rest / data['by_country'][country]) * 100 - percentt = (rest / data['cnt_ctg']) * 100 - print("{:3s} {:50s} {:>12s} {:6.2f}% {:6.2f}%".format(' **', 'REST ({})'.format(hidden), locale.format("%d", rest, grouping=True), percent, percentt)) - - percent = ((rest+sum_v) / data['by_country'][country]) * 100 - percentt = ((rest+sum_v) / data['cnt_ctg']) * 100 - print("{:3s} {:50s} {:>12s} {:6.2f}% {:6.2f}%".format(' **', 'TOTAL', locale.format("%d", rest+sum_v, grouping=True), percent, percentt)) - - for country in detail: - print("") - print("Detailed statistics for country '{}' ({}):\n".format(trdict.get(country, '??'), country)) - genexp = ((k, data['by_country_category'][country][k]) for k in sorted(data['by_country_category'][country], key=data['by_country_category'][country].get, reverse=True)) - cnt = 0 - rest = 0 - hidden = 0 - sum_v = 0 - for k, v in genexp: - cnt += 1 - if self.config.get('limit') and self.config.get('limit') < cnt: - rest += v - hidden += 1 - continue - - percent = (v / data['by_country'][country]) * 100 - percentt = (v / data['cnt_ctg']) * 100 - sum_v += v - print("{:3d}. {:50s} {:>12s} {:6.2f}% {:6.2f}%".format(cnt, k, locale.format("%d", v, grouping=True), percent, percentt)) - - if rest > 0: - percent = (rest / data['by_country'][country]) * 100 - percentt = (rest / data['cnt_ctg']) * 100 - print("{:3s} {:50s} {:>12s} {:6.2f}% {:6.2f}%".format(' **', 'REST ({})'.format(hidden), locale.format("%d", rest, grouping=True), percent, percentt)) - - percent = ((rest+sum_v) / data['by_country'][country]) * 100 - percentt = ((rest+sum_v) / data['cnt_ctg']) * 100 - print("{:3s} {:50s} {:>12s} {:6.2f}% {:6.2f}%".format(' **', 'TOTAL', locale.format("%d", rest+sum_v, grouping=True), percent, percentt)) - - #--------------------------------------------------------------------------- - -if __name__ == "__main__": - script = MentatDumpAnalyzerScript( - path_cfg = '/etc/mentat', - path_log = '/var/mentat/log', - path_run = '/var/mentat/run' - ) - script.run() diff --git a/scripts/mentat-dump.py b/scripts/mentat-dump.py deleted file mode 100755 index a86c60b41..000000000 --- a/scripts/mentat-dump.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# Copyright (C) since 2011 CESNET, z.s.p.o -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - -import os -import time -import math -import logging -from pymongo import MongoClient - -# Custom libraries -import pyzenkit.zenscript - -# -# Global variables. -# -VERSION = "0.1" # Version information -DIR_DUMP = '/var/tmp' # Default dump directory - -class MentatDumpScript(pyzenkit.zenscript.ZenScript): - """ - Script providing Mentat alert database dump functions and features - """ - - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def _init_custom(self, config, argparser, **kwargs): - """ - Perform subinitializations on default configurations and argument parser. - """ - pass - - def get_default_command(self): - """ - Return the name of a default script operation. - """ - return 'dump' - - #--------------------------------------------------------------------------- - # OPERATION CALLBACK IMPLEMENTATIONS - #--------------------------------------------------------------------------- - - def cbk_command_dump(self): - """ - Dump Mentat alert database. - """ - mongoclient = MongoClient() - db = mongoclient.mentat - col = db.alerts - - cnt_all = col.count() - self.logger.info("Total number of items in 'alerts' collection: {:,d}".format(cnt_all)) - - # Get current timestamp - time_l = math.floor(time.time()); - time_l = time_l - (14 * 86400) - self.logger.info("Calculated lower dump period threshold: {} ({})".format(time.strftime('%Y-%m-%d %H:%M', time.localtime(time_l)), time_l)) - - # Fetch items to be updated - to_dump = col.find({"_CESNET.StorageTime": {"$gt": time_l}}) - cnt_dump = to_dump.count() - self.logger.info("Number of items to be dumped: {:,d} [{:5.2f}%]".format(cnt_dump, (cnt_dump / cnt_all * 100))) - - dumpfile = os.path.join(DIR_DUMP, 'mongodump.json') - mf = open(dumpfile, 'w') - cnt = 0 - for item in to_dump: - data = item.get('msg_raw', item.get('msg_raw2')) - mf.write(data + '\n') - cnt += 1 - if not self.config['cron']: - self.draw_progress_bar((cnt/cnt_dump), cnt) - if not self.config['cron']: - print("\n") - mf.close() - self.logger.info("Alerts dumped to file '{}'".format(dumpfile)) - -if __name__ == "__main__": - """ - Execute the MentatDumpScript script. - """ - script = MentatDumpScript( - path_cfg = '/etc/mentat', - path_log = '/var/mentat/log', - path_run = '/var/mentat/run', - path_tmp = '/tmp', - - default_config_dir = '/etc/mentat/core', - ) - script.run() diff --git a/scripts/sqldb-migrate-data.py b/scripts/sqldb-migrate-data.py deleted file mode 100644 index adff7098e..000000000 --- a/scripts/sqldb-migrate-data.py +++ /dev/null @@ -1,638 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# This file is part of Mentat system (https://mentat.cesnet.cz/). -# -# Copyright (C) since 2011 CESNET, z.s.p.o (http://www.ces.net/) -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - - -""" -Utility script for MongoDB to PostgreSQL migration - system and auxiliary data. - - -Usage examples --------------- - -.. code-block:: shell - - # Perform full migration from MongoDB to PostgreSQL: - /etc/mentat/scripts/sqldb-migrate-data.py - - # Drop SQL schema before migration: - /etc/mentat/scripts/sqldb-migrate-data.py --drop - - # Do not refresh SQL schema, but clear data first: - /etc/mentat/scripts/sqldb-migrate-data.py --clear - - # Skip conversions of event statistics database: - /etc/mentat/scripts/sqldb-migrate-data.py --skip-statistics - - # Skip conversions of event reports database: - /etc/mentat/scripts/sqldb-migrate-data.py --skip-reports - - -Command line options: -^^^^^^^^^^^^^^^^^^^^^ - -``-h``,``--help`` - Print help message and exit. - -``-v``,``--verbose`` - Run in verbose mode, print objects being written into database and tap into - ``SQLAlchemy`` logger and print SQL statements being executed. - -``-d``,``--drop`` - Drop existing SQL schema and data before migration. - -``-c``,``--clear`` - Clear existing table data before migration. This switch currently works only - for _users_ and _groups_ tables. *Note, that due to the internal reference - integrity with clearing groups table event reports will also get cleared.* - -``-i``, ``--ignore-duplicates`` - Ignore duplicate records. - -``--skip-statistics`` - Skip migration of event statistics. - -``--skip-reports`` - Skip migration of event reports. - -``-f utc-unixtimestamp``, ``--from-timestamp utc-unixtimestamp`` - Perform migration starting with given unix UTC timestamp (applicated on reports - and statistics). - - -Migration capabilities: -^^^^^^^^^^^^^^^^^^^^^^^ - -The script is capable of migrating following objects: - -* user accounts from MongoDB collection ``mentat.users`` -* group accounts from MongoDB collection ``mentat.groups`` -* network records from MongoDB collection ``mentat.groups`` -* reporting filters from MongoDB collection ``mentat.groups`` -* reporting settings from MongoDB collection ``mentat.groups`` -* event reports from MongoDB collection``mentat.reports_ng`` -* event statistics from MongoDB collection ``mentat_stats.statistics`` -""" - - -__author__ = "Jan Mach <jan.mach@cesnet.cz>" -__credits__ = "Pavel Kácha <pavel.kacha@cesnet.cz>, Andrea Kropáčová <andrea.kropacova@cesnet.cz>" - - -import logging -import datetime -import argparse -import sqlalchemy.exc - -import pyzenkit.jsonconf -import mentat.storage -import mentat.services.sqlstorage -import mentat.stats.idea -from mentat.datatype.internal import User, AbuseGroup, EventStat, Report -from mentat.datatype.sqldb import UserModel, GroupModel, NetworkModel,\ - FilterModel, SettingsReportingModel, EventStatisticsModel, EventReportModel, \ - usermodel_from_typeddict, groupmodel_from_typeddict, setrepmodel_from_typeddict, \ - filtermodel_from_typeddict, networkmodel_from_typeddict, eventstatsmodel_from_typeddict - - -#------------------------------------------------------------------------------- - - -def setup_logging(): - """ - Crude setup of both file and console logging. - """ - logger = logging.getLogger('sqldb-migrate-data.py') - logger.setLevel(logging.DEBUG) - fhnd = logging.FileHandler('/var/log/sqldb-migrate-data.py.log') - fhnd.setLevel(logging.DEBUG) - chnd = logging.StreamHandler() - chnd.setLevel(logging.INFO) - formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s: %(message)s') - fhnd.setFormatter(formatter) - chnd.setFormatter(formatter) - logger.addHandler(fhnd) - logger.addHandler(chnd) - return logger - - -#------------------------------------------------------------------------------- - - -def encconv(val): - """ - Fix invalid encoding of czech characters between legacy MongoDB records and - new UTF-8 based storages. - """ - val = val.replace('Ă¡','á')\ - .replace('Ă\x81', 'Ă')\ - .replace('Ă©','Ă©')\ - .replace('Ă\x89','É')\ - .replace('Ă„\x9b','Ä›')\ - .replace('Ă„\x9A','Äš')\ - .replace('Ă\xad','Ă')\ - .replace('Ă\x8D','ĂŤ')\ - .replace('Ă½','Ă˝')\ - .replace('Ă\x9d','Ăť')\ - .replace('ĂÂł','Ăł')\ - .replace('Ă\x93','Ă“')\ - .replace('ö','ö')\ - .replace('Ă…ÂŻ','ĹŻ')\ - .replace('Å®','Ĺ®')\ - .replace('ĂÂş','Ăş')\ - .replace('Ă\x9a','Ăš')\ - .replace('Ž','Ĺ˝')\ - .replace('Ă…Âľ','Ĺľ')\ - .replace('Å¡','š')\ - .replace('Ă…\xa0','Ĺ ')\ - .replace('Ă„\x8d','ÄŤ')\ - .replace('Ă„\x8c','ÄŚ')\ - .replace('Ă…\x99','Ĺ™')\ - .replace('Ă…\x98','Ĺ')\ - .replace('Ă„\x8f','ÄŹ')\ - .replace('Ă„\x8e','ÄŽ')\ - .replace('Ă…ÂĄ','ĹĄ')\ - .replace('Ť','Ť')\ - .replace('Ă…\x88','Ĺ')\ - .replace('Ă…\x87','Ň')\ - .replace('Ă…\x84','Ĺ„') - - return val - -# -# Initialize and execute simple command line argument parser. -# -PARSER = argparse.ArgumentParser(description = 'Utility script for MongoDB to PostgreSQL migration.') -PARSER.add_argument("-v", "--verbose", action = "store_true", help = "increase output verbosity") -PARSER.add_argument("-d", "--drop", action = "store_true", help = "drop existing tables first") -PARSER.add_argument("-c", "--clear", action = "store_true", help = "remove existing data first") -PARSER.add_argument("-f", "--from-timestamp", type = int, default = None, help = "perform migration starting with given unix UTC timestamp") -PARSER.add_argument("-i", "--ignore-duplicates", action = "store_true", help = "ignore duplicate records") -PARSER.add_argument("--skip-statistics", action = "store_true", help = "skip migration of event statistics") -PARSER.add_argument("--skip-reports", action = "store_true", help = "skip migration of event reports") -ARGS = PARSER.parse_args() - -# -# Setup logging mechanism. -# -LOGGER = setup_logging() - -DT_START = datetime.datetime.now() -LOGGER.info("Data migration started") - - -# -# Load Mentat core configurations. -# -CORECFG = pyzenkit.jsonconf.config_load_dir('/etc/mentat/core') - -# -# Initialize database service proxy objects. -# -SQLMANAGER = mentat.services.sqlstorage.StorageServiceManager( - CORECFG, - { - "__core__database": { - "SQLSTORAGE": { - "echo": ARGS.verbose - } - } - } -) -SQLSTORAGE = SQLMANAGER.service() -MONGOMANAGER = mentat.storage.StorageManager(CORECFG) - -# -# Initialize SQL database schema. -# -if ARGS.drop: - LOGGER.info("[ BEGIN ] Drop Mentat main metadata database") - SQLSTORAGE.database_drop() - LOGGER.info("[ DONE ] Drop Mentat main metadata database") - -LOGGER.info("[ BEGIN ] Create Mentat main metadata database") -SQLSTORAGE.database_create() -LOGGER.info("[ DONE ] Create Mentat main metadata database") - - -LOGGER.info("Database status before migration:") -LOGGER.info("--------------------------------------------------") -LOGGER.info("User count: {:16,d}".format(SQLSTORAGE.session.query(UserModel).count())) -LOGGER.info("Group count: {:16,d}".format(SQLSTORAGE.session.query(GroupModel).count())) -LOGGER.info("Network count: {:16,d}".format(SQLSTORAGE.session.query(NetworkModel).count())) -LOGGER.info("Filter count: {:16,d}".format(SQLSTORAGE.session.query(FilterModel).count())) -LOGGER.info("Setting count: {:16,d}".format(SQLSTORAGE.session.query(SettingsReportingModel).count())) -LOGGER.info("Event reports count: {:16,d}".format(SQLSTORAGE.session.query(EventReportModel).count())) -LOGGER.info("Event stats count: {:16,d}".format(SQLSTORAGE.session.query(EventStatisticsModel).count())) -LOGGER.info("--------------------------------------------------") - - -#------------------------------------------------------------------------------- - - -# -# Lookup object dictionaries to enable object relationship migrations -# -SQLUSERS = {} -SQLGROUPS = {} - -def lookup_user(userid): - """ - Lookup user in SQL database. - """ - if not userid in SQLUSERS: - SQLUSERS[userid] = SQLSTORAGE.session.query(UserModel).filter(UserModel.login == userid).first() - SQLSTORAGE.session.commit() - return SQLUSERS[userid] - -def lookup_group(groupid): - """ - Lookup group in SQL database. - """ - if not groupid in SQLGROUPS: - SQLGROUPS[groupid] = SQLSTORAGE.session.query(GroupModel).filter(GroupModel.name == groupid).first() - SQLSTORAGE.session.commit() - return SQLGROUPS[groupid] - - -#------------------------------------------------------------------------------- - - -# -# Convert user account objects. -# -if ARGS.clear: - LOGGER.info("[ BEGIN ] Clearing 'users'") - COUNT = SQLSTORAGE.session.query(UserModel).delete(synchronize_session = False) - SQLSTORAGE.session.commit() - LOGGER.info("Cleared total of {:,d} 'user' objects".format(COUNT)) - LOGGER.info("[ DONE ] Clearing 'users'") - -LOGGER.info("[ BEGIN ] Conversion: 'users'") -MONGO_ITEMS = MONGOMANAGER.collection('db', 'col_users') -TOTAL_COUNT = MONGO_ITEMS.count() -LOGGER.info("Found total of {:,d} 'user' objects for conversion in MongoDB".format(TOTAL_COUNT)) - -DUPL_COUNT = 0 -ITEMCOUNTER = 0 -for rawitem in MONGO_ITEMS.find().sort('_id', 1): - try: - if 'orggroups' not in rawitem or not rawitem['orggroups']: - rawitem['orggroups'] = [] - if 'affiliations' not in rawitem or not rawitem['affiliations']: - rawitem['affiliations'] = [] - mongoitem = User(rawitem) - if ARGS.verbose: - LOGGER.info(mongoitem) - - sqlitem = usermodel_from_typeddict(mongoitem) - sqlitem.fullname = encconv(sqlitem.fullname) - sqlitem.organization = encconv(sqlitem.organization) - if ARGS.verbose: - LOGGER.debug("Inserting user object: %s", sqlitem) - - SQLSTORAGE.session.add(sqlitem) - SQLSTORAGE.session.commit() - SQLUSERS[sqlitem.login] = sqlitem - - ITEMCOUNTER += 1 - - except sqlalchemy.exc.IntegrityError as err: - DUPL_COUNT += 1 - if ARGS.ignore_duplicates: - LOGGER.debug("Duplicate user record '%s'", rawitem['_id']) - else: - LOGGER.critical("Duplicate user record '%s'", rawitem['_id']) - SQLSTORAGE.session.rollback() - - except Exception as err: - LOGGER.critical("Unable to convert user record '%s': %s", rawitem['_id'], str(err)) - -LOGGER.info("Converted total of {:,d} 'user' objects, {:,d} duplicates, {:,d} failure(s)".format(ITEMCOUNTER, DUPL_COUNT, TOTAL_COUNT - DUPL_COUNT - ITEMCOUNTER)) -LOGGER.info("[ DONE ] Conversion: 'users'") - -# -# Convert group objects. -# -if ARGS.clear: - LOGGER.info("[ BEGIN ] Clearing 'groups'") - COUNT = SQLSTORAGE.session.query(EventReportModel).delete(synchronize_session = False) - SQLSTORAGE.session.commit() - LOGGER.info("Cleared total of {:,d} 'report' objects".format(COUNT)) - COUNT = SQLSTORAGE.session.query(GroupModel).delete(synchronize_session = False) - SQLSTORAGE.session.commit() - LOGGER.info("Cleared total of {:,d} 'group' objects".format(COUNT)) - LOGGER.info("[ DONE ] Clearing 'groups'") - -LOGGER.info("[ BEGIN ] Conversion: 'groups'") -MONGO_ITEMS = MONGOMANAGER.collection('db', 'col_groups') -TOTAL_COUNT = MONGO_ITEMS.count() -LOGGER.info("Found total of {:,d} 'group' objects for conversion in MongoDB".format(TOTAL_COUNT)) - -DUPL_COUNT = 0 -ITEMCOUNTER = 0 -for rawitem in MONGO_ITEMS.find().sort('_id', 1): - try: - mongoitem = AbuseGroup(rawitem) - if ARGS.verbose: - LOGGER.info(mongoitem) - - sqlitem = groupmodel_from_typeddict(mongoitem) - if ARGS.verbose: - LOGGER.debug("Inserting group object: %s", sqlitem) - - SQLSTORAGE.session.add(sqlitem) - SQLSTORAGE.session.commit() - SQLGROUPS[sqlitem.name] = sqlitem - - ITEMCOUNTER += 1 - - # - # Convert reporting settings for each group. - # - sqlstgs = setrepmodel_from_typeddict(mongoitem) - sqlitem.settings_rep = sqlstgs - - # - # Convert all reporting filters for each group. - # - if 'rep_filters' in mongoitem and mongoitem['rep_filters']: - for mongoflt in mongoitem['rep_filters']: - sqlfltr = filtermodel_from_typeddict(mongoflt) - if ARGS.verbose: - LOGGER.debug("Inserting filter object: %s", sqlitem) - sqlitem.filters.append(sqlfltr) - - # - # Convert all network records for each group. - # - if 'networks' in mongoitem and mongoitem['networks']: - for mongonet in mongoitem['networks']: - sqlnet = networkmodel_from_typeddict(mongonet) - if ARGS.verbose: - LOGGER.debug("Inserting network object: %s", sqlitem) - sqlitem.networks.append(sqlnet) - - except sqlalchemy.exc.IntegrityError as err: - DUPL_COUNT += 1 - if ARGS.ignore_duplicates: - LOGGER.debug("Duplicate group record '%s'", rawitem['_id']) - else: - LOGGER.critical("Duplicate group record '%s'", rawitem['_id']) - SQLSTORAGE.session.rollback() - - except Exception as err: - LOGGER.critical("Unable to convert group record '%s': %s", rawitem['_id'], str(err)) - -LOGGER.info("Converted total of {:,d} 'group' objects, {:,d} duplicates, {:,d} failure(s)".format(ITEMCOUNTER, DUPL_COUNT, TOTAL_COUNT -DUPL_COUNT - ITEMCOUNTER)) -LOGGER.info("[ DONE ] Conversion: 'groups'") - -# -# Setup group membership relationships. -# -LOGGER.info("[ BEGIN ] Setup: 'memberships'") -MONGO_ITEMS = MONGOMANAGER.collection('db', 'col_users') -for usr in MONGO_ITEMS.find().sort('_id', 1): - for grpname in usr['groups']: - userobj = lookup_user(usr['_id']) - groupobj = lookup_group(grpname.id) - if userobj and groupobj: - userobj.memberships.append(groupobj) - LOGGER.debug("Added group membership %s => %s", usr['_id'], grpname.id) - else: - LOGGER.critical("Unable to setup group membership %s => %s", usr['_id'], grpname.id) - -LOGGER.info("Committing changes to PostgreSQL database") -SQLSTORAGE.session.commit() -LOGGER.info("[ DONE ] Setup: 'memberships'") - -# -# Setup group management relationships. -# -LOGGER.info("[ BEGIN ] Setup: 'managements'") -MONGO_ITEMS = MONGOMANAGER.collection('db', 'col_groups') -for grp in MONGO_ITEMS.find().sort('_id', 1): - for usrname in grp['managers']: - userobj = lookup_user(usrname.id) - groupobj = lookup_group(grp['_id']) - if userobj and groupobj: - groupobj.managers.append(userobj) - LOGGER.debug("Added group management %s => %s", usrname.id, grp['_id']) - else: - LOGGER.critical("Unable to setup group management %s => %s", usrname.id, grp['_id']) - -LOGGER.info("Committing changes to PostgreSQL database") -SQLSTORAGE.session.commit() -LOGGER.info("[ DONE ] Setup: 'managements'") - - -#------------------------------------------------------------------------------- - - -# -# Convert event reports database. -# -if not ARGS.skip_reports: - LOGGER.info("[ BEGIN ] Conversion: 'reports'") - - query_filter = {} - if ARGS.from_timestamp: - query_filter['ts'] = {'$gt': ARGS.from_timestamp} - LOGGER.info("Converting only records with creation time greater than {:d}".format(ARGS.from_timestamp)) - - MONGO_ITEMS = MONGOMANAGER.collection('db', 'col_reports') - TOTAL_COUNT = MONGO_ITEMS.count(query_filter) - LOGGER.info("Found total of {:,d} 'event reports' objects for conversion in MongoDB".format(TOTAL_COUNT)) - - SUCCESS_COUNT = 0 - DUPL_COUNT = 0 - ITEMCOUNTER = 0 - for rawitem in MONGO_ITEMS.find(query_filter).sort('ts', 1): - rawitem = mentat.stats.idea.unescape_stats(rawitem) - mongorep = Report(rawitem) - if ARGS.verbose: - LOGGER.info(mongorep) - delta = mongorep['ts_to'] - mongorep['ts_from'] - - sqlrep = EventReportModel() - - if not lookup_group(mongorep['abuse']): - LOGGER.critical("Unable to convert report %s for group %s: Group does not exist.", mongorep['_id'], mongorep['abuse']) - continue - - sqlrep.group = lookup_group(mongorep['abuse']) - sqlrep.label = mongorep['_id'] - sqlrep.handle = mongorep['ua_hash'] - sqlrep.severity = mongorep['severity'] - sqlrep.type = mongorep['type'] - sqlrep.message = encconv(mongorep['message']) - - sqlrep.createtime = mongorep['ts'] - sqlrep.dt_from = mongorep['ts_from'] - sqlrep.dt_to = mongorep['ts_to'] - sqlrep.delta = delta.total_seconds() - - sqlrep.evcount_rep = mongorep.get('cnt_alerts', mongorep.get('cnt_all', None)) - sqlrep.evcount_all = mongorep.get('cnt_all', None) - sqlrep.evcount_new = mongorep.get('cnt_all', None) - sqlrep.evcount_flt = mongorep.get('cnt_flt', 0) - sqlrep.evcount_flt_blk = mongorep.get('cnt_flt_blk', 0) - sqlrep.evcount_thr = mongorep.get('cnt_thr', 0) - sqlrep.evcount_thr_blk = mongorep.get('cnt_thr_blk', 0) - sqlrep.evcount_rlp = mongorep.get('cnt_rlp', 0) - - sqlrep.mail_to = mongorep.get('mail_to', None) - if sqlrep.mail_to: - sqlrep.mail_to = list(sqlrep.mail_to.replace(' ','').split(',')) - sqlrep.mail_dt = mongorep.get('mail_ts', None) - sqlrep.mail_res = mongorep.get('mail_res', None) - - sqlrep.flag_testdata = mongorep.get('test_data', False) - sqlrep.flag_mailed = mongorep.get('flag_mail_sent', False) - - tmpsts = {} - tmpsts['cnt_alerts'] = mongorep['cnt_alerts'] - if 'cnt_analyzers' in mongorep: - tmpsts['cnt_analyzers'] = mongorep['cnt_analyzers'] - tmpsts['list_analyzers'] = list(mongorep['list_analyzers']) - tmpsts['analyzers'] = mongorep['analyzers'] - if 'cnt_detectors' in mongorep: - tmpsts['cnt_detectors'] = mongorep['cnt_detectors'] - tmpsts['list_detectors'] = list(mongorep['list_detectors']) - tmpsts['detectors'] = mongorep['detectors'] - if 'cnt_detectorsws' in mongorep: - tmpsts['cnt_detectorsws'] = mongorep['cnt_detectorsws'] - tmpsts['list_detectorsws'] = list(mongorep['list_detectorsws']) - tmpsts['detectorsws'] = mongorep['detectorsws'] - if 'cnt_categories' in mongorep: - tmpsts['cnt_categories'] = mongorep['cnt_categories'] - tmpsts['list_categories'] = list(mongorep['list_categories']) - tmpsts['categories'] = mongorep['categories'] - if 'cnt_category_sets' in mongorep: - tmpsts['cnt_category_sets'] = mongorep['cnt_category_sets'] - tmpsts['list_category_sets'] = list(mongorep['list_category_sets']) - tmpsts['category_sets'] = mongorep['category_sets'] - if 'cnt_ips' in mongorep: - tmpsts['cnt_ips'] = mongorep['cnt_ips'] - tmpsts['list_ips'] = list(mongorep['list_ips']) - tmpsts['ips'] = mongorep['ips'] - tmpsts['list_ids'] = list(mongorep['list_ids']) - sqlrep.statistics = tmpsts - sqlrep.filtering = mongorep.get('frv', list()) - - if ARGS.verbose: - LOGGER.info(sqlrep) - - try: - SQLSTORAGE.session.add(sqlrep) - SQLSTORAGE.session.commit() - SUCCESS_COUNT += 1 - - except sqlalchemy.exc.IntegrityError as err: - DUPL_COUNT += 1 - if ARGS.ignore_duplicates: - LOGGER.debug("Duplicate event report %s", sqlrep) - else: - LOGGER.critical("Duplicate event report %s", sqlrep) - SQLSTORAGE.session.rollback() - - ITEMCOUNTER = ITEMCOUNTER + 1 - if (ITEMCOUNTER % 1000) == 0: - LOGGER.info("Progress: {:>12,d} ({:>7.3f}%)".format( - ITEMCOUNTER, - (ITEMCOUNTER/TOTAL_COUNT)*100, - )) - - LOGGER.info("Processed total of {:,d} 'report' objects, {:,d} converted, {:,d} duplicates, {:,d} failure(s)".format(ITEMCOUNTER, SUCCESS_COUNT, DUPL_COUNT, TOTAL_COUNT - SUCCESS_COUNT - DUPL_COUNT)) - LOGGER.info("[ DONE ] Conversion: 'reports'") - -else: - LOGGER.info("[ SKIP ] Conversion: 'reports'") - - -#------------------------------------------------------------------------------- - - -# -# Convert event statistics database. -# -if not ARGS.skip_statistics: - LOGGER.info("[ BEGIN ] Conversion: 'statistics'") - - query_filter = {} - if ARGS.from_timestamp: - query_filter['ts'] = {'$gt': ARGS.from_timestamp} - LOGGER.info("Converting only records with creation time greater than {:d}".format(ARGS.from_timestamp)) - - MONGO_ITEMS = MONGOMANAGER.collection('db_stats', 'col_stats_alerts') - TOTAL_COUNT = MONGO_ITEMS.count(query_filter) - LOGGER.info("Found total of {:,d} 'event statistics' objects for conversion in MongoDB".format(TOTAL_COUNT)) - - SUCCESS_COUNT = 0 - DUPL_COUNT = 0 - ITEMCOUNTER = 0 - for rawitem in MONGO_ITEMS.find(query_filter).sort('_id', 1): - rawitem = mentat.stats.idea.unescape_stats_full(rawitem) - mongoitem = EventStat(rawitem) - if ARGS.verbose: - LOGGER.info(mongoitem) - - sqlitem = eventstatsmodel_from_typeddict(mongoitem) - if ARGS.verbose: - LOGGER.info(sqlitem) - - try: - SQLSTORAGE.session.add(sqlitem) - SQLSTORAGE.session.commit() - SUCCESS_COUNT += 1 - - except sqlalchemy.exc.IntegrityError as err: - DUPL_COUNT += 1 - if ARGS.ignore_duplicates: - LOGGER.debug("Duplicate event statistics record for interval %s", sqlitem.interval) - else: - LOGGER.critical("Duplicate event statistics record for interval %s", sqlitem.interval) - SQLSTORAGE.session.rollback() - - ITEMCOUNTER = ITEMCOUNTER + 1 - if (ITEMCOUNTER % 1000) == 0: - LOGGER.info("Progress: {:>12,d} ({:>7.3f}%)".format( - ITEMCOUNTER, - (ITEMCOUNTER/TOTAL_COUNT)*100, - )) - - LOGGER.info("Processed total of {:,d} 'statistics' objects, {:,d} converted, {:,d} duplicates, {:,d} failure(s)".format(ITEMCOUNTER, SUCCESS_COUNT, DUPL_COUNT, TOTAL_COUNT - SUCCESS_COUNT - DUPL_COUNT)) - LOGGER.info("[ DONE ] Conversion: 'statistics'") - -else: - LOGGER.info("[ SKIP ] Conversion: 'statistics'") - - -#------------------------------------------------------------------------------- - - -DT_STOP = datetime.datetime.now() - -LOGGER.info("Data migration results:") -LOGGER.info("--------------------------------------------------") -LOGGER.info("User count: {:16,d}".format(SQLSTORAGE.session.query(UserModel).count())) -LOGGER.info("Group count: {:16,d}".format(SQLSTORAGE.session.query(GroupModel).count())) -LOGGER.info("Network count: {:16,d}".format(SQLSTORAGE.session.query(NetworkModel).count())) -LOGGER.info("Filter count: {:16,d}".format(SQLSTORAGE.session.query(FilterModel).count())) -LOGGER.info("Setting count: {:16,d}".format(SQLSTORAGE.session.query(SettingsReportingModel).count())) -LOGGER.info("Event reports count: {:16,d}".format(SQLSTORAGE.session.query(EventReportModel).count())) -LOGGER.info("Event stats count: {:16,d}".format(SQLSTORAGE.session.query(EventStatisticsModel).count())) -LOGGER.info("--------------------------------------------------") -LOGGER.info("Migration started at: %s", str(DT_START)) -LOGGER.info("Migration finished at: %s", str(DT_STOP)) -LOGGER.info("Migration duration: %s", str(DT_STOP - DT_START)) - -#------------------------------------------------------------------------------- - - -SQLMANAGER.close() diff --git a/scripts/sqldb-migrate-events.py b/scripts/sqldb-migrate-events.py deleted file mode 100644 index 5255f34a0..000000000 --- a/scripts/sqldb-migrate-events.py +++ /dev/null @@ -1,192 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# This file is part of Mentat system (https://mentat.cesnet.cz/). -# -# Copyright (C) since 2011 CESNET, z.s.p.o (http://www.ces.net/) -# Use of this source is governed by the MIT license, see LICENSE file. -#------------------------------------------------------------------------------- - - -""" -Utility script for MongoDB to PostgreSQL migration - IDEA events. - - -Usage examples --------------- - -.. code-block:: shell - - # Perform full migration from MongoDB to PostgreSQL: - /etc/mentat/scripts/sqldb-migrate-events.py - - # Skip events already existing in PostgreSQL database: - /etc/mentat/scripts/sqldb-migrate-events.py --skip - - # Drop SQL schema for event table before migration: - /etc/mentat/scripts/sqldb-migrate-events.py --drop - - # Start with events stored into MongoDB after 2018-01-01T12:00:00: - /etc/mentat/scripts/sqldb-migrate-events.py --st-from 2018-01-01T12:00:00 - - -Command line options: ---------------------- - -``-h``,``--help`` - Print help message and exit. - -``-v``,``--verbose`` - Run in verbose mode, print objects being written into database and tap into - ``SQLAlchemy`` logger and print SQL statements being executed. - -``-d``,``--drop`` - Drop existing SQL schema and data before migration. - -``-s``,``--skip`` - Skip existing objects. - -``-f '%Y-%m-%dT%H:%M:%S'``,``--st-from '%Y-%m-%dT%H:%M:%S'`` - Lower MongoDB storage timestamp boundary where to start conversions. Only - events with ``_CESNET.StorageTime`` greater than or equal to given value - will be converted. - - -Migration capabilities: ------------------------ - -The script is capable of migrating following objects from MongoDB to PostgreSQL: - -* IDEA events from collection ``mentat.alerts`` -""" - - -__author__ = "Jan Mach <jan.mach@cesnet.cz>" -__credits__ = "Radko Krkoš <radko.krkos@cesnet.cz>, Pavel Kácha <pavel.kacha@cesnet.cz>, Andrea Kropáčová <andrea.kropacova@cesnet.cz>" - - -import datetime -import argparse -import psycopg2 - -import pyzenkit.jsonconf -import mentat.idea.mongodb -import mentat.idea.sqldb -import mentat.storage -import mentat.services.eventstorage - - -#------------------------------------------------------------------------------- - - -# -# Initialize and execute simple command line argument parser. -# -PARSER = argparse.ArgumentParser(description = 'Utility script for MongoDB to PostgreSQL migration.') -PARSER.add_argument("-v", "--verbose", action = "store_true", help = "increase output verbosity") -PARSER.add_argument("-d", "--drop", action = "store_true", help = "drop existing tables first") -PARSER.add_argument("-s", "--skip", action = "store_true", help = "skip existing objects") -PARSER.add_argument("-f", "--st-from", type = str, help = "lower timestamp boundary where to start conversions") -ARGS = PARSER.parse_args() - -DT_START = datetime.datetime.now() -print("Event migration started at: {}".format(str(DT_START))) - - -# -# Load Mentat core configurations. -# -CORECFG = pyzenkit.jsonconf.config_load_dir('/etc/mentat/core') - -# -# Initialize database service proxy objects. -# -EVENTMANAGER = mentat.services.eventstorage.EventStorageServiceManager(CORECFG) -EVENTSTORAGE = EVENTMANAGER.service() -MONGOMANAGER = mentat.storage.StorageManager(CORECFG) -EVENTFILTER = {} - -# -# Process lower timestamp boundary for conversion. -# -if ARGS.st_from: - ARGS.st_from = datetime.datetime.strptime(ARGS.st_from, '%Y-%m-%dT%H:%M:%S') - EVENTFILTER['_CESNET.StorageTime'] = {'$gt': ARGS.st_from.timestamp() } - print("Fetching only events stored after: {}".format(str(ARGS.st_from))) - -# -# Initialize SQL database schema. -# -if ARGS.drop: - print("\n[ BEGIN ] Drop Mentat event database") - EVENTSTORAGE.database_drop() - print("[ DONE ] Drop Mentat event database") - -print("\n[ BEGIN ] Create Mentat event database") -EVENTSTORAGE.database_create() -print("[ DONE ] Create Mentat event database") - - -#------------------------------------------------------------------------------- - - -print("\n[ BEGIN ] Convert IDEA events") - -MONGO_ITEMS = MONGOMANAGER.collection('db', 'col_alerts') -TOTAL_COUNT = MONGO_ITEMS.count() -print("* found total of {:,d} 'event' objects in MongoDB".format(TOTAL_COUNT)) - -CNT_PROCESSED = 0 -CNT_MIGRATED = 0 -CNT_SKIPPED = 0 -DT_PREVIOUS = datetime.datetime.now() -for alr in MONGO_ITEMS.find(EVENTFILTER).sort('_CESNET.StorageTime', 1): - mongoevent = mentat.idea.mongodb.IdeaOut(alr) - if ARGS.verbose: - print(mongoevent) - CNT_PROCESSED = CNT_PROCESSED + 1 - - try: - EVENTSTORAGE.insert_event(mongoevent) - except psycopg2.IntegrityError as err: - EVENTSTORAGE.rollback() - if ARGS.skip: - CNT_SKIPPED = CNT_SKIPPED + 1 - continue - else: - print("\nERROR: Duplicate event, {}".format(err)) - break - - CNT_MIGRATED = CNT_MIGRATED + 1 - if (CNT_MIGRATED % 10000) == 0: - DT_CURRENT = datetime.datetime.now() - print("* progress at {}: proc = {:>12,d} ({:>7.3f}%), skip = {:>12,d} ({:>7.3f}%), {}, {}".format( - str(DT_CURRENT), - CNT_PROCESSED, - (CNT_PROCESSED/TOTAL_COUNT)*100, - CNT_SKIPPED, - (CNT_SKIPPED/TOTAL_COUNT)*100, - str(DT_CURRENT - DT_START), - str(DT_CURRENT - DT_PREVIOUS) - )) - DT_PREVIOUS = DT_CURRENT - -print("[ DONE ] Convert IDEA events") - - -#------------------------------------------------------------------------------- - -DT_STOP = datetime.datetime.now() - -print("") -print("Event migration results:") -print("------------------------") -print("") -print("Processed events: {:,d} ({:>7.3f}%)".format(CNT_PROCESSED, (CNT_PROCESSED/TOTAL_COUNT)*100)) -print("Migrated events: {:,d} ({:>7.3f}%)".format(CNT_MIGRATED, (CNT_MIGRATED/TOTAL_COUNT)*100)) -print("Skipped events: {:,d} ({:>7.3f}%)".format(CNT_SKIPPED, (CNT_SKIPPED/TOTAL_COUNT)*100)) -print("Migration started at: {}".format(str(DT_START))) -print("Migration finished at: {}".format(str(DT_STOP))) -print("Migration duration: {}".format(str(DT_STOP - DT_START))) -print("") - -- GitLab