Skip to content
Snippets Groups Projects
Commit 1ce22719 authored by Jan Mach's avatar Jan Mach
Browse files

Improvements in IDEA statistics library.

Added library constants instead of literals within module code. Added more statistical calculations (min, max, sum, avg). (Redmine issue: #3379)
parent 6f1c2627
No related branches found
No related tags found
No related merge requests found
...@@ -9,12 +9,11 @@ ...@@ -9,12 +9,11 @@
""" """
... Library for calculating various statistics from given list of IDEA messages.
""" """
__version__ = "0.1" __author__ = "Jan Mach <jan.mach@cesnet.cz>"
__author__ = "Jan Mach <jan.mach@cesnet.cz>"
__credits__ = "Pavel Kácha <pavel.kacha@cesnet.cz>, Andrea Kropáčová <andrea.kropacova@cesnet.cz>" __credits__ = "Pavel Kácha <pavel.kacha@cesnet.cz>, Andrea Kropáčová <andrea.kropacova@cesnet.cz>"
...@@ -26,17 +25,53 @@ from pynspect.jpath import jpath_values, jpath_set ...@@ -26,17 +25,53 @@ from pynspect.jpath import jpath_values, jpath_set
KEY_UNKNOWN = '__unknown__' KEY_UNKNOWN = '__unknown__'
#
# Literals for keywords of statistical categories
#
ST_INTERNAL = 'internal' ST_INTERNAL = 'internal'
ST_EXTERNAL = 'external' ST_EXTERNAL = 'external'
ST_OVERALL = 'overall' ST_OVERALL = 'overall'
ST_SKEY_IPS = 'ips' #
ST_SKEY_ANALYZERS = 'analyzers' # Literals for keywords of calculated statistics
#
ST_SKEY_IPS = 'ips'
ST_SKEY_IP4S = 'ip4s'
ST_SKEY_IP6S = 'ip6s'
ST_SKEY_ANALYZERS = 'analyzers'
ST_SKEY_CATEGORIES = 'categories'
ST_SKEY_CATEGSETS = 'category_sets'
ST_SKEY_DETECTORS = 'detectors'
ST_SKEY_DETECTORSWS = 'detectorsws'
ST_SKEY_ABUSES = 'abuses'
ST_LIST_IDS = 'list_ids'
ST_LIST_IPS = 'list_ips'
ST_LIST_IP4S = 'list_ip4s'
ST_LIST_IP6S = 'list_ip6s'
LIST_CALCSTAT_KEYS = (
ST_SKEY_IPS,
ST_SKEY_ANALYZERS,
ST_SKEY_CATEGORIES,
ST_SKEY_CATEGSETS,
ST_SKEY_DETECTORS,
ST_SKEY_DETECTORSWS,
ST_SKEY_ABUSES
)
LIST_STAT_CATEGS = (
ST_INTERNAL,
ST_EXTERNAL,
ST_OVERALL,
)
def _counter_inc(stats, stat, key, increment = 1): def _counter_inc(stats, stat, key, increment = 1):
""" """
Helper for incrementing given statistical parameter within given statistical
bundle.
""" """
if not stat in stats: if not stat in stats:
stats[stat] = {} stats[stat] = {}
...@@ -46,9 +81,12 @@ def _counter_inc(stats, stat, key, increment = 1): ...@@ -46,9 +81,12 @@ def _counter_inc(stats, stat, key, increment = 1):
def escape_stats(stats): def escape_stats(stats):
""" """
Escape dots in statistic keyword names with '(dot)' (because '.' is reserved
in MongoDB).
TODO: Move this feature into mentat.datatype.mongodb library.
""" """
for key in (ST_SKEY_IPS, ST_SKEY_ANALYZERS, 'categories', 'category_sets', 'detectors', 'detectorsws', 'abuses'): for key in LIST_CALCSTAT_KEYS:
if key in stats: if key in stats:
stats[key] = escape_dict(stats[key]) stats[key] = escape_dict(stats[key])
return stats return stats
...@@ -56,9 +94,12 @@ def escape_stats(stats): ...@@ -56,9 +94,12 @@ def escape_stats(stats):
def unescape_stats(stats): def unescape_stats(stats):
""" """
Unescape '(dot)'s in statistic keyword names with '.' (because '.' is reserved
in MongoDB).
TODO: Move this feature into mentat.datatype.mongodb library.
""" """
for key in (ST_SKEY_IPS, ST_SKEY_ANALYZERS, 'categories', 'category_sets', 'detectors', 'detectorsws', 'abuses'): for key in LIST_CALCSTAT_KEYS:
if key in stats: if key in stats:
stats[key] = unescape_dict(stats[key]) stats[key] = unescape_dict(stats[key])
return stats return stats
...@@ -66,9 +107,9 @@ def unescape_stats(stats): ...@@ -66,9 +107,9 @@ def unescape_stats(stats):
def escape_stats_full(stats): def escape_stats_full(stats):
""" """
Escape all statistic categories with :py:func:escape_stats.
""" """
for key in (ST_INTERNAL, ST_EXTERNAL, ST_OVERALL): for key in LIST_STAT_CATEGS:
if key in stats: if key in stats:
stats[key] = escape_stats(stats[key]) stats[key] = escape_stats(stats[key])
return stats return stats
...@@ -76,9 +117,9 @@ def escape_stats_full(stats): ...@@ -76,9 +117,9 @@ def escape_stats_full(stats):
def unescape_stats_full(stats): def unescape_stats_full(stats):
""" """
Unescape all statistic categories with :py:func:unescape_stats.
""" """
for key in (ST_INTERNAL, ST_EXTERNAL, ST_OVERALL): for key in LIST_STAT_CATEGS:
if key in stats: if key in stats:
stats[key] = unescape_stats(stats[key]) stats[key] = unescape_stats(stats[key])
return stats return stats
...@@ -86,35 +127,35 @@ def unescape_stats_full(stats): ...@@ -86,35 +127,35 @@ def unescape_stats_full(stats):
def brief_stats(stats, top_threshold = 20): def brief_stats(stats, top_threshold = 20):
""" """
Make statistics more brief.
""" """
if stats['cnt_alerts'] > 0: if stats['cnt_alerts'] > 0:
del stats['list_ids'] del stats[ST_LIST_IDS]
ipkeys = sorted(stats['ips'].keys(), key=lambda x: stats['ips'][x], reverse=True) ipkeys = sorted(stats[ST_SKEY_IPS].keys(), key=lambda x: stats[ST_SKEY_IPS][x], reverse=True)
ipkeys = ipkeys[:top_threshold] ipkeys = ipkeys[:top_threshold]
tmp = {} tmp = {}
tmp = {key: stats['ips'][key] for key in ipkeys} tmp = {key: stats[ST_SKEY_IPS][key] for key in ipkeys}
stats['ips'] = tmp stats[ST_SKEY_IPS] = tmp
stats['list_ips'] = ipkeys stats[ST_LIST_IPS] = ipkeys
return stats return stats
def brief_stats_full(stats, top_threshold = 20): def brief_stats_full(stats, top_threshold = 20):
""" """
Make all statistic categories more brief with :py:func:brief_stats.
""" """
for key in (ST_INTERNAL, ST_EXTERNAL, ST_OVERALL): for key in LIST_STAT_CATEGS:
stats[key] = brief_stats(stats[key], top_threshold) stats[key] = brief_stats(stats[key], top_threshold)
return stats return stats
def group_messages(messages): def group_messages(messages):
""" """
Group mesages according to the presence of '_CESNET.ResolvedAbuses' key.
""" """
result = collections.defaultdict(list) result = collections.defaultdict(list)
for msg in messages: for msg in messages:
...@@ -129,7 +170,7 @@ def group_messages(messages): ...@@ -129,7 +170,7 @@ def group_messages(messages):
def evaluate_messages(messages, stats = None, brief = False): def evaluate_messages(messages, stats = None, brief = False):
""" """
Evaluate given list of IDEA messages.
""" """
if stats is None: if stats is None:
stats = dict() stats = dict()
...@@ -137,21 +178,21 @@ def evaluate_messages(messages, stats = None, brief = False): ...@@ -137,21 +178,21 @@ def evaluate_messages(messages, stats = None, brief = False):
stats['cnt_alerts'] = len(messages) stats['cnt_alerts'] = len(messages)
if stats['cnt_alerts']: if stats['cnt_alerts']:
if not 'list_ids' in stats: if not ST_LIST_IDS in stats:
stats['list_ids'] = [] stats[ST_LIST_IDS] = []
for msg in messages: for msg in messages:
stats['list_ids'].append(msg['ID']) stats[ST_LIST_IDS].append(msg['ID'])
reg = {} reg = {}
for rule in [['ips', 'Source.IP4'], for rule in [[ST_SKEY_IPS, 'Source.IP4'],
#['ip4s', 'Source.IP4'], #[ST_LIST_IP4S, 'Source.IP4'],
#['ip6s', 'Source.IP6'], #[ST_LIST_IP6S, 'Source.IP6'],
['analyzers', 'Node[#].SW'], [ST_SKEY_ANALYZERS, 'Node[#].SW'],
['categories', 'Category'], [ST_SKEY_CATEGORIES, 'Category'],
['detectors', 'Node[#].Name'], [ST_SKEY_DETECTORS, 'Node[#].Name'],
['abuses', '_CESNET.ResolvedAbuses']]: [ST_SKEY_ABUSES, '_CESNET.ResolvedAbuses']]:
values = jpath_values(msg, rule[1]) values = jpath_values(msg, rule[1])
reg[rule[0]] = values reg[rule[0]] = values
...@@ -162,23 +203,27 @@ def evaluate_messages(messages, stats = None, brief = False): ...@@ -162,23 +203,27 @@ def evaluate_messages(messages, stats = None, brief = False):
for val in values: for val in values:
stats = _counter_inc(stats, rule[0], val) stats = _counter_inc(stats, rule[0], val)
if reg['categories']: if reg[ST_SKEY_CATEGORIES]:
key = '/'.join(reg['categories']) key = '/'.join(reg[ST_SKEY_CATEGORIES])
stats = _counter_inc(stats, 'category_sets', key) stats = _counter_inc(stats, ST_SKEY_CATEGSETS, key)
if reg['detectors'] and reg['analyzers']: if reg[ST_SKEY_DETECTORS] and reg[ST_SKEY_ANALYZERS]:
for det in reg['detectors']: for det in reg[ST_SKEY_DETECTORS]:
for anl in reg['analyzers']: for anl in reg[ST_SKEY_ANALYZERS]:
key = '/'.join((det, anl)) key = '/'.join((det, anl))
stats = _counter_inc(stats, 'detectorsws', key) stats = _counter_inc(stats, ST_SKEY_DETECTORSWS, key)
elif reg['detectors']: elif reg[ST_SKEY_DETECTORS]:
for det in reg['detectors']: for det in reg[ST_SKEY_DETECTORS]:
key = det key = det
stats = _counter_inc(stats, 'detectorsws', key) stats = _counter_inc(stats, ST_SKEY_DETECTORSWS, key)
for key in ('ips', 'analyzers', 'categories', 'category_sets', 'detectors', 'detectorsws', 'abuses'): for key in LIST_CALCSTAT_KEYS:
if key in stats: if key in stats:
stats['cnt_{}'.format(key)] = len(stats[key]) stats['cnt_{}'.format(key)] = len(stats[key])
stats['sum_{}'.format(key)] = sum(stats[key].values())
stats['min_{}'.format(key)] = min(stats[key].values())
stats['max_{}'.format(key)] = max(stats[key].values())
stats['avg_{}'.format(key)] = stats['sum_{}'.format(key)]/stats['cnt_{}'.format(key)]
stats['list_{}'.format(key)] = list(stats[key].keys()) stats['list_{}'.format(key)] = list(stats[key].keys())
return stats return stats
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment