Skip to content
Snippets Groups Projects
Commit fd49db5c authored by Rajmund Hruška's avatar Rajmund Hruška
Browse files

Feature: Filter out events with low credibility. (Redmine issue: #7577)

parent d167f92d
No related branches found
No related tags found
No related merge requests found
......@@ -67,6 +67,7 @@ Reporting algorithm follows these steps:
#. Fetch events with given severity, that appeared in database in given
time window and belonging to that particular group.
#. Filter events with configured reporting filters.
#. Remove events from detectors with low credibility.
#. Threshold already reported events.
#. Fetch relapsed events.
#. Generate *summary* and/or *extra* reports and store them to database.
......
......@@ -41,7 +41,7 @@ import mentat.stats.idea
import mentat.services.whois
from mentat.const import tr_
from mentat.reports.utils import StorageThresholdingCache, NoThresholdingCache
from mentat.datatype.sqldb import EventReportModel
from mentat.datatype.sqldb import EventReportModel, DetectorModel
from mentat.emails.event import ReportEmail
from mentat.reports.base import BaseReporter
from mentat.services.eventstorage import record_to_idea
......@@ -94,6 +94,7 @@ class EventReporter(BaseReporter):
self.groups_dict = groups_dict
self.settings_dict = settings_dict
self.detectors_dict = {det.name : det for det in self.sqlservice.session.query(DetectorModel).all()}
self.filter_parser.build()
......@@ -210,17 +211,30 @@ class EventReporter(BaseReporter):
)
break
# Create new dictionary to store events coming from credible detectors.
aggregated_credible_events = {}
for groups, events_aggr in aggregated_events.items():
group_chain = groups[0]
# C: Perform event thresholding.
# C: Discard events from detectors with low credibility.
_events_aggr, passed_cnt, blocked_cnt = self.filter_events_by_credibility(events_aggr)
# If all events were discarded, _events_aggr is None.
if _events_aggr:
aggregated_credible_events[groups] = _events_aggr
# Save information about how many events passed and how many were discarded.
result[str(group_chain)]['evcount_det'] = passed_cnt
result[str(group_chain)]['evcount_det_blk'] = blocked_cnt
for groups, events_aggr in aggregated_credible_events.items():
group_chain = groups[0]
# D: Perform event thresholding.
events_thr, events_aggr = self.threshold_events(events_aggr, abuse_group, group_chain, severity, time_h)
result[str(group_chain)]['evcount_thr'] = len(events_thr)
result[str(group_chain)]['evcount_thr_blk'] = len(events_passed_filters[groups]) - len(events_thr)
result[str(group_chain)]['evcount_thr_blk'] = result[str(group_chain)]['evcount_det'] - len(events_thr)
if not events_thr:
continue
# D: Save aggregated events for further processing.
# E: Save aggregated events for further processing.
events[groups] = {}
events[groups]['regular'] = events_thr
events[groups]['regular_aggr'] = events_aggr
......@@ -537,6 +551,42 @@ class EventReporter(BaseReporter):
fallback_groups, fltlog = self._filter_groups(fallback_groups, event, fltlog)
return filtered_groups, fallback_groups, fltlog
def filter_events_by_credibility(self, events_aggr):
"""
Filter given dictionary of IDEA events aggregated by the source IP address by detector credibility.
If the resulting credibility is less than 0.5, the event is discarded from the report.
:param dict events_aggt: Dictionary of IDEA events as :py:class:`mentat.idea.internal.Idea` objects.
:return: Tuple with filtered dictionary, number of events passed, number of events discarded.
:rtype: tuple
"""
passed_cnt = 0
blocked_cnt = 0
_events_aggr = {}
for ip in events_aggr:
for event in events_aggr[ip]:
_pass = 1.0
for detector in event.get_detectors():
if detector not in self.detectors_dict:
self.logger.warning("Event with ID '%s' contains unknown detector '%s'. Assuming full credibility.'", event['ID'], detector)
continue
_pass *= self.detectors_dict[detector].credibility
if _pass < 0.5:
# TODO: blocked_cnt and passed_cnt are counting duplicate events.
blocked_cnt += 1
# Increase number of hits.
sql_detector = self.detectors_dict[event.get_detectors()[-1]]
sql_detector.hits += 1
# Inefficient but rare so should be alright.
self.sqlservice.session.add(sql_detector)
self.sqlservice.session.commit()
else:
passed_cnt += 1
if ip not in _events_aggr:
_events_aggr[ip] = []
_events_aggr[ip].append(event)
return _events_aggr if passed_cnt != 0 else None, passed_cnt, blocked_cnt
def filter_events(self, main_group, events):
"""
Filter given list of IDEA events according to given abuse group settings.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment