diff --git a/flowmon-ads/warden3_flowmon_ads_filer.py b/flowmon-ads/warden3_flowmon_ads_filer.py index 4d8779464f87b909151f418117badc417b935ab3..5267fb92fc3923ebacabc3bab13ed6ad2575fdee 100755 --- a/flowmon-ads/warden3_flowmon_ads_filer.py +++ b/flowmon-ads/warden3_flowmon_ads_filer.py @@ -4,20 +4,19 @@ # Copyright (C) 2011-2015 Cesnet z.s.p.o # Use of this source is governed by a 3-clause BSD-style license, see LICENSE file. -import os import sys import getopt -sys.path.append('/data/warden/libs') - -from warden_client import read_cfg, format_time -from warden_filer import SafeDir +import socket import json import csv -from time import strptime, mktime import time import re from uuid import uuid4 +sys.path.append('/data/warden/libs') +from warden_client import format_time +from warden_filer import SafeDir + # Command line options handling # Had to use getopt for 2.6 compatibility. Meh. :( @@ -55,16 +54,27 @@ def get_opts(): # Conversion routines def iso_time(t): - return strptime(t, "%Y-%m-%d %H:%M:%S") + return time.strptime(t, "%Y-%m-%d %H:%M:%S") def int_list(il): if not il: return [] return [int(i.strip()) for i in il.split(",")] -def str_list(sl): +def is_ip(s): + try: + socket.inet_pton(socket.AF_INET, s) + except Exception: + try: + socket.inet_pton(socket.AF_INET6, s) + except Exception: + return False + return True + +def ip_list(sl): if sl: - return [s.strip() for s in sl.split(",")] + l = [s.strip().split(None, 1)[0] for s in sl.split(",")] + return [ip for ip in l if is_ip(ip)] else: return [] @@ -88,7 +98,7 @@ def get_proto(s): return proto def one_proto_list(s): - if s: + if s and s != "not": return [s] return None @@ -97,21 +107,21 @@ def proto_list(pl): ads_fields = ( - ('ID', int), # Unique id within ADS db - ('Timestamp', iso_time), # Timestamp of event generation - ('FirstFlow', iso_time), # Timestamp of the first Flow on which was based the event detection - ('Type', str), # Type of event, in fact a reference to the detection method, which recognized the event - ('TypeDesc', str), # Event type description - ('Perspective', str), # Perspective name - ('Severity', str), # Priority based on perspective - ('Detail', str), # Detailed information on the event - ('Ports', int_list), # List of ports (if identified) - ('Protocol', proto_list), # IP protocol (if identified) - ('Source', str_list), # Event originator (IP address) - ('CapturedSource', str), # DNS name assigned to the IP address at the time of event detection - ('Targets', str_list), # Event targets (a list of IP addresses) - ('NetFlowSource', str), # Flow data source on which the event has been generated - ('UserIdentity', str) # User ID from domain controller + ('ID', str), # Unique id within ADS db + ('Detection time', iso_time), # Timestamp of event generation + ('FirstFlow', iso_time), # Timestamp of the first Flow on which was based the event detection + ('Type', str), # Type of event, in fact a reference to the detection method, which recognized the event + ('TypeDesc', str), # Event type description + ('Perspective', str), # Perspective name + ('Severity', str), # Priority based on perspective + ('Detail', str), # Detailed information on the event + ('Ports', int_list), # List of ports (if identified) + ('Protocol', proto_list), # IP protocol (if identified) + ('Source', ip_list), # Event originator (IP address) + ('CapturedSource', str), # DNS name assigned to the IP address at the time of event detection + ('Targets', ip_list), # Event targets (a list of IP addresses) + ('NetFlowSource', str), # Flow data source on which the event has been generated + ('UserIdentity', str) # User ID from domain controller ) @@ -163,7 +173,7 @@ detail_regexps = { (INTEGER + _SPCM + r"times", int), (r"connections:" + _SPC + INTEGER, int), (r"Mail count:" + _SPC + INTEGER, int), - (r"with response:" + _SPC + INTEGER, r"without response" + _SPC + INTEGER, sum_int) + (r"with response:" + _SPC + INTEGER, r"without response:" + _SPC + INTEGER, sum_int) ), "byte_count": ( (r"uploaded:" + _SPC + FLOAT + _SPC + UNITS, r"downloaded:" + _SPC + FLOAT + _SPC + UNITS, sum_int), @@ -207,7 +217,8 @@ unit_translate = { 'gib': 1024**3 } -def parse_detail(e): + +def parse_detail(e, log=sys.stderr.write): detail = e["Detail"] for key, matchers in detail_regexps.items(): results = [] @@ -224,16 +235,16 @@ def parse_detail(e): elif len(groups) == 1: reg_res.append(groups[0]) else: - val, units = groups # two valued regexps are numbers with units, like 1.2 KiB + val, units = groups # two valued regexps are numbers with units, like 1.2 KiB unit_val = unit_translate.get(units.lower(), 1) reg_res.append(float(val) * unit_val) if reg_res: - if len(reg_res) == 1 and len(matcher)==2: # Singlevalued definition + if len(reg_res) == 1 and len(matcher) == 2: # Singlevalued definition reg_res = reg_res[0] try: res = shaper(reg_res) except Exception as e: - sys.stderr.write('Error parsing "%s" on detail "%s": %s\n' % (reg_res, detail, e)) + log('Error parsing "%s" on detail "%s": %s\n' % (reg_res, detail, e)) else: results.append(res) uniq_results = [] # We cannot use sets for uniq, as result may be unhashable @@ -241,24 +252,24 @@ def parse_detail(e): if val and val not in uniq_results: uniq_results.append(val) if len(uniq_results) > 1: - sys.stderr.write('Warning: multiple regexp rules matched differently for "%s" on detail "%s"\n' % (key, detail)) + log('Warning: multiple regexp rules matched differently for "%s" on detail "%s"\n' % (key, detail)) if uniq_results: e[key] = uniq_results[0] def idea_ip_key(ip): - if not ':' in ip: + if ':' not in ip: return "IP4" else: return "IP6" -def gen_idea_from_ads(ads, orig_data, anonymised_target, add_test): +def gen_idea_from_ads(new_id, ads, orig_data, anonymised_target, add_test): - lts = time.localtime() - ts = ads.get("Timestamp") or lts - ets = ads.get("FirstFlow", 0) - if ets > ts: # ADS sometimes reports FirstFlow greater than DetectTime + lts = time.gmtime() + ts = ads.get("Detection time") or lts + ets = ads.get("FirstFlow") + if ets and ets > ts: # ADS sometimes reports FirstFlow greater than DetectTime ts = ets atype = ads.get("Type") cat = ads_types.get(atype, ("Other",))[:] @@ -276,7 +287,7 @@ def gen_idea_from_ads(ads, orig_data, anonymised_target, add_test): # Also add some protocols guessed from port and how ADS works according to docs if atype in ("DOS", "SIPFLOOD", "HTTPDICT"): ads["Source"], ads["Targets"] = ads["Targets"], ads["Source"] - ads["CapturedSource"], ads["target_hostname"] = None, ads["CapturedSource"] + ads["CapturedSource"], ads["target_hostname"] = None, ads.get("CapturedSource") if atype == "HTTPDICT": # A guess, sure if 80 in ads["Ports"]: @@ -304,7 +315,7 @@ def gen_idea_from_ads(ads, orig_data, anonymised_target, add_test): for p in ads["Protocol"]: if p not in proto: proto.append(p) - ads["Protocol"] = proto + ads["Protocol"] = proto or ["tcp"] # Oh well. # More specific category for BLACKLIST if "botnet" in ads: @@ -315,14 +326,14 @@ def gen_idea_from_ads(ads, orig_data, anonymised_target, add_test): event = { "Format": "IDEA0", - "ID": str(uuid4()), + "ID": new_id, "Category": cat, "DetectTime": format_time(*ts[0:6]), "CreateTime": format_time(*lts[0:6]) } if ads.get("ID"): - event["AltNames"] = ["ADS-%i" % ads["ID"]] + event["AltNames"] = ["ads:%s" % ads["ID"]] if ets: event["EventTime"] = format_time(*ets[0:6]) if ads.get("TypeDesc"): @@ -339,7 +350,7 @@ def gen_idea_from_ads(ads, orig_data, anonymised_target, add_test): source = {} for srcip in ads["Source"]: source.setdefault(idea_ip_key(srcip), []).append(srcip) - if ads["CapturedSource"]: + if ads.get("CapturedSource"): source["Hostname"] = [ads["CapturedSource"]] if source: source["Proto"] = ads["Protocol"] @@ -353,7 +364,7 @@ def gen_idea_from_ads(ads, orig_data, anonymised_target, add_test): if ads["Ports"]: target["Port"] = ads["Ports"] - if anonymised_target != "NONE": + if anonymised_target and anonymised_target != "NONE": tgtips = [anonymised_target] target["Type"] = ["Anonymised"] else: @@ -414,7 +425,7 @@ def main(): # Ignore "End of attack" events as they summarise previous ones # and we would get duplicate counts. continue - event = gen_idea_from_ads(tr_row, row, opts["target"], opts["test"]) + event = gen_idea_from_ads(str(uuid4()), tr_row, row, opts["target"], opts["test"]) nf = out.newfile() try: data = json.dumps(event)