From fd49db5c0fcbc14c4705f58a0a9d8d585d916790 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rajmund=20Hru=C5=A1ka?= <rajmund.hruska@cesnet.cz>
Date: Tue, 3 Jan 2023 11:28:47 +0100
Subject: [PATCH] Feature: Filter out events with low credibility. (Redmine
 issue: #7577)

---
 lib/mentat/module/reporter.py |  1 +
 lib/mentat/reports/event.py   | 58 ++++++++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/lib/mentat/module/reporter.py b/lib/mentat/module/reporter.py
index 25669f149..d49d1a3f0 100644
--- a/lib/mentat/module/reporter.py
+++ b/lib/mentat/module/reporter.py
@@ -67,6 +67,7 @@ Reporting algorithm follows these steps:
         #. Fetch events with given severity, that appeared in database in given
            time window and belonging to that particular group.
         #. Filter events with configured reporting filters.
+        #. Remove events from detectors with low credibility.
         #. Threshold already reported events.
         #. Fetch relapsed events.
         #. Generate *summary* and/or *extra* reports and store them to database.
diff --git a/lib/mentat/reports/event.py b/lib/mentat/reports/event.py
index 3e2f02bb1..d30d258ed 100644
--- a/lib/mentat/reports/event.py
+++ b/lib/mentat/reports/event.py
@@ -41,7 +41,7 @@ import mentat.stats.idea
 import mentat.services.whois
 from mentat.const import tr_
 from mentat.reports.utils import StorageThresholdingCache, NoThresholdingCache
-from mentat.datatype.sqldb import EventReportModel
+from mentat.datatype.sqldb import EventReportModel, DetectorModel
 from mentat.emails.event import ReportEmail
 from mentat.reports.base import BaseReporter
 from mentat.services.eventstorage import record_to_idea
@@ -94,6 +94,7 @@ class EventReporter(BaseReporter):
 
         self.groups_dict = groups_dict
         self.settings_dict = settings_dict
+        self.detectors_dict = {det.name : det for det in self.sqlservice.session.query(DetectorModel).all()}
 
         self.filter_parser.build()
 
@@ -210,17 +211,30 @@ class EventReporter(BaseReporter):
                 )
                 break
 
+            # Create new dictionary to store events coming from credible detectors.
+            aggregated_credible_events = {}
             for groups, events_aggr in aggregated_events.items():
                 group_chain = groups[0]
-                # C: Perform event thresholding.
+                # C: Discard events from detectors with low credibility.
+                _events_aggr, passed_cnt, blocked_cnt = self.filter_events_by_credibility(events_aggr)
+                # If all events were discarded, _events_aggr is None.
+                if _events_aggr:
+                    aggregated_credible_events[groups] = _events_aggr
+                # Save information about how many events passed and how many were discarded.
+                result[str(group_chain)]['evcount_det'] = passed_cnt
+                result[str(group_chain)]['evcount_det_blk'] = blocked_cnt
+
+            for groups, events_aggr in aggregated_credible_events.items():
+                group_chain = groups[0]
+                # D: Perform event thresholding.
                 events_thr, events_aggr = self.threshold_events(events_aggr, abuse_group, group_chain, severity, time_h)
 
                 result[str(group_chain)]['evcount_thr'] = len(events_thr)
-                result[str(group_chain)]['evcount_thr_blk'] = len(events_passed_filters[groups]) - len(events_thr)
+                result[str(group_chain)]['evcount_thr_blk'] = result[str(group_chain)]['evcount_det'] - len(events_thr)
                 if not events_thr:
                     continue
 
-                # D: Save aggregated events for further processing.
+                # E: Save aggregated events for further processing.
                 events[groups] = {}
                 events[groups]['regular'] = events_thr
                 events[groups]['regular_aggr'] = events_aggr
@@ -537,6 +551,42 @@ class EventReporter(BaseReporter):
         fallback_groups, fltlog = self._filter_groups(fallback_groups, event, fltlog)
         return filtered_groups, fallback_groups, fltlog
 
+    def filter_events_by_credibility(self, events_aggr):
+        """
+        Filter given dictionary of IDEA events aggregated by the source IP address by detector credibility.
+        If the resulting credibility is less than 0.5, the event is discarded from the report.
+
+        :param dict events_aggt: Dictionary of IDEA events as :py:class:`mentat.idea.internal.Idea` objects.
+        :return: Tuple with filtered dictionary, number of events passed, number of events discarded.
+        :rtype: tuple
+        """
+        passed_cnt = 0
+        blocked_cnt = 0
+        _events_aggr = {}
+        for ip in events_aggr:
+            for event in events_aggr[ip]:
+                _pass = 1.0
+                for detector in event.get_detectors():
+                    if detector not in self.detectors_dict:
+                        self.logger.warning("Event with ID '%s' contains unknown detector '%s'. Assuming full credibility.'", event['ID'], detector)
+                        continue
+                    _pass *= self.detectors_dict[detector].credibility
+                if _pass < 0.5:
+                    # TODO: blocked_cnt and passed_cnt are counting duplicate events.
+                    blocked_cnt += 1
+                    # Increase number of hits.
+                    sql_detector = self.detectors_dict[event.get_detectors()[-1]]
+                    sql_detector.hits += 1
+                    # Inefficient but rare so should be alright.
+                    self.sqlservice.session.add(sql_detector)
+                    self.sqlservice.session.commit()
+                else:
+                    passed_cnt += 1
+                    if ip not in _events_aggr:
+                        _events_aggr[ip] = []
+                    _events_aggr[ip].append(event)
+        return _events_aggr if passed_cnt != 0 else None, passed_cnt, blocked_cnt
+
     def filter_events(self, main_group, events):
         """
         Filter given list of IDEA events according to given abuse group settings.
-- 
GitLab