From fd49db5c0fcbc14c4705f58a0a9d8d585d916790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rajmund=20Hru=C5=A1ka?= <rajmund.hruska@cesnet.cz> Date: Tue, 3 Jan 2023 11:28:47 +0100 Subject: [PATCH] Feature: Filter out events with low credibility. (Redmine issue: #7577) --- lib/mentat/module/reporter.py | 1 + lib/mentat/reports/event.py | 58 ++++++++++++++++++++++++++++++++--- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/lib/mentat/module/reporter.py b/lib/mentat/module/reporter.py index 25669f149..d49d1a3f0 100644 --- a/lib/mentat/module/reporter.py +++ b/lib/mentat/module/reporter.py @@ -67,6 +67,7 @@ Reporting algorithm follows these steps: #. Fetch events with given severity, that appeared in database in given time window and belonging to that particular group. #. Filter events with configured reporting filters. + #. Remove events from detectors with low credibility. #. Threshold already reported events. #. Fetch relapsed events. #. Generate *summary* and/or *extra* reports and store them to database. diff --git a/lib/mentat/reports/event.py b/lib/mentat/reports/event.py index 3e2f02bb1..d30d258ed 100644 --- a/lib/mentat/reports/event.py +++ b/lib/mentat/reports/event.py @@ -41,7 +41,7 @@ import mentat.stats.idea import mentat.services.whois from mentat.const import tr_ from mentat.reports.utils import StorageThresholdingCache, NoThresholdingCache -from mentat.datatype.sqldb import EventReportModel +from mentat.datatype.sqldb import EventReportModel, DetectorModel from mentat.emails.event import ReportEmail from mentat.reports.base import BaseReporter from mentat.services.eventstorage import record_to_idea @@ -94,6 +94,7 @@ class EventReporter(BaseReporter): self.groups_dict = groups_dict self.settings_dict = settings_dict + self.detectors_dict = {det.name : det for det in self.sqlservice.session.query(DetectorModel).all()} self.filter_parser.build() @@ -210,17 +211,30 @@ class EventReporter(BaseReporter): ) break + # Create new dictionary to store events coming from credible detectors. + aggregated_credible_events = {} for groups, events_aggr in aggregated_events.items(): group_chain = groups[0] - # C: Perform event thresholding. + # C: Discard events from detectors with low credibility. + _events_aggr, passed_cnt, blocked_cnt = self.filter_events_by_credibility(events_aggr) + # If all events were discarded, _events_aggr is None. + if _events_aggr: + aggregated_credible_events[groups] = _events_aggr + # Save information about how many events passed and how many were discarded. + result[str(group_chain)]['evcount_det'] = passed_cnt + result[str(group_chain)]['evcount_det_blk'] = blocked_cnt + + for groups, events_aggr in aggregated_credible_events.items(): + group_chain = groups[0] + # D: Perform event thresholding. events_thr, events_aggr = self.threshold_events(events_aggr, abuse_group, group_chain, severity, time_h) result[str(group_chain)]['evcount_thr'] = len(events_thr) - result[str(group_chain)]['evcount_thr_blk'] = len(events_passed_filters[groups]) - len(events_thr) + result[str(group_chain)]['evcount_thr_blk'] = result[str(group_chain)]['evcount_det'] - len(events_thr) if not events_thr: continue - # D: Save aggregated events for further processing. + # E: Save aggregated events for further processing. events[groups] = {} events[groups]['regular'] = events_thr events[groups]['regular_aggr'] = events_aggr @@ -537,6 +551,42 @@ class EventReporter(BaseReporter): fallback_groups, fltlog = self._filter_groups(fallback_groups, event, fltlog) return filtered_groups, fallback_groups, fltlog + def filter_events_by_credibility(self, events_aggr): + """ + Filter given dictionary of IDEA events aggregated by the source IP address by detector credibility. + If the resulting credibility is less than 0.5, the event is discarded from the report. + + :param dict events_aggt: Dictionary of IDEA events as :py:class:`mentat.idea.internal.Idea` objects. + :return: Tuple with filtered dictionary, number of events passed, number of events discarded. + :rtype: tuple + """ + passed_cnt = 0 + blocked_cnt = 0 + _events_aggr = {} + for ip in events_aggr: + for event in events_aggr[ip]: + _pass = 1.0 + for detector in event.get_detectors(): + if detector not in self.detectors_dict: + self.logger.warning("Event with ID '%s' contains unknown detector '%s'. Assuming full credibility.'", event['ID'], detector) + continue + _pass *= self.detectors_dict[detector].credibility + if _pass < 0.5: + # TODO: blocked_cnt and passed_cnt are counting duplicate events. + blocked_cnt += 1 + # Increase number of hits. + sql_detector = self.detectors_dict[event.get_detectors()[-1]] + sql_detector.hits += 1 + # Inefficient but rare so should be alright. + self.sqlservice.session.add(sql_detector) + self.sqlservice.session.commit() + else: + passed_cnt += 1 + if ip not in _events_aggr: + _events_aggr[ip] = [] + _events_aggr[ip].append(event) + return _events_aggr if passed_cnt != 0 else None, passed_cnt, blocked_cnt + def filter_events(self, main_group, events): """ Filter given list of IDEA events according to given abuse group settings. -- GitLab