-
Pavel Kácha authoredPavel Kácha authored
labrea-idea.py 22.40 KiB
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import sys
import re
import time
import optparse
import signal
import uuid
import codecs
import json
import socket
import resource
import atexit
import logging
import logging.handlers
import os.path as pth
from collections import namedtuple
from ipaddress import IPv4Address
from pwd import getpwnam
from grp import getgrnam
try:
from itertools import izip
except ImportError:
izip = zip
try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict
try:
from contextlib import nested
except ImportError:
from contextlib import ExitStack, contextmanager
@contextmanager
def nested(*contexts):
with ExitStack() as stack:
for ctx in contexts:
stack.enter_context(ctx)
yield contexts
class WindowContextMgr(object):
def __init__(self, window=60*10, timeout=60*5, ideagen=None, anonymise=None):
self.contexts = {}
self.update_timestamp = 0
self.window = window
self.timeout = timeout
self.ideagen = ideagen
self.tgt_mask = 0xFFFFFFFF << (32 - (anonymise or 32)) & 0xFFFFFFFF
self.first_update_queue = OrderedDict()
self.last_update_queue = OrderedDict()
# Hammer to mitigate too big events
self.max_count = 200
def expire_queue(self, queue, window):
aggr_events = []
ctx_to_del = []
for ctx, timestamp in queue.items():
if timestamp >= self.update_timestamp - window:
break
closed = self.ctx_close(self.contexts[ctx])
if closed is not None:
aggr_events.append(closed)
ctx_to_del.append(ctx)
for ctx in ctx_to_del:
del self.contexts[ctx]
del self.first_update_queue[ctx]
del self.last_update_queue[ctx]
return aggr_events
def process(self, event=None, timestamp=None):
if timestamp > self.update_timestamp:
self.update_timestamp = timestamp
aggr_events = []
aggr_events.extend(self.expire_queue(self.first_update_queue, self.window))
aggr_events.extend(self.expire_queue(self.last_update_queue, self.timeout))
if event is not None:
ctx = self.match(event)
if ctx is not None:
if ctx not in self.contexts:
self.contexts[ctx] = self.ctx_create(event)
self.first_update_queue[ctx] = self.update_timestamp
self.last_update_queue[ctx] = self.update_timestamp
else:
if not self.ctx_append(self.contexts[ctx], event):
closed = self.ctx_close(self.contexts[ctx])
if closed is not None:
aggr_events.append(closed)
del self.contexts[ctx]
del self.first_update_queue[ctx]
del self.last_update_queue[ctx]
else:
del self.last_update_queue[ctx]
self.last_update_queue[ctx] = self.update_timestamp
return aggr_events
def close(self, timestamp):
if timestamp is not None and timestamp > self.update_timestamp:
self.update_timestamp = timestamp
aggr_events = []
for context in self.contexts.values():
closed = self.ctx_close(context)
if closed is not None:
aggr_events.append(closed)
self.contexts = {}
self.first_update_queue = OrderedDict()
self.last_update_queue = OrderedDict()
return aggr_events
class PingContextMgr(WindowContextMgr):
def match(self, event):
return event.src_ip if 'Ping' in event.message else None
def ctx_create(self, event):
ctx = dict(
src_ip=event.src_ip,
tgt_ips=set([int(IPv4Address(event.tgt_ip)) & self.tgt_mask]),
count=1,
first_update=self.update_timestamp,
last_update=self.update_timestamp
)
return ctx
def ctx_append(self, ctx, event):
ctx["tgt_ips"].add(int(IPv4Address(event.tgt_ip)) & self.tgt_mask)
ctx["count"] += 1
ctx["last_update"] = self.update_timestamp
return ctx["count"] < self.max_count
def ctx_close(self, ctx):
return self.ideagen.gen_idea(
src=ctx["src_ip"],
targets=[(ip, []) for ip in ctx["tgt_ips"]],
detect_time=self.update_timestamp,
event_time=ctx["first_update"],
cease_time=ctx["last_update"],
count=ctx["count"],
template="ping"
)
class ConnectContextMgr(WindowContextMgr):
def match(self, event):
return event.src_ip if 'Connect' in event.message else None
def ctx_create(self, event):
ctx = dict(
src_ip=event.src_ip,
tgt_ips_ports={int(IPv4Address(event.tgt_ip)) & self.tgt_mask: set([event.tgt_port])},
count=1,
first_update=self.update_timestamp,
last_update=self.update_timestamp
)
return ctx
def ctx_append(self, ctx, event):
tgt_ports = ctx["tgt_ips_ports"].setdefault(int(IPv4Address(event.tgt_ip)) & self.tgt_mask, set())
tgt_ports.add(event.tgt_port)
ctx["count"] += 1
ctx["last_update"] = self.update_timestamp
return len(ctx["tgt_ips_ports"]) < self.max_count
def ctx_close(self, ctx):
return self.ideagen.gen_idea(
src=ctx["src_ip"],
targets=ctx["tgt_ips_ports"].items(),
detect_time=self.update_timestamp,
event_time=ctx["first_update"],
cease_time=ctx["last_update"],
count=ctx["count"],
template="connect"
)
class InboundContextMgr(ConnectContextMgr):
def match(self, event):
return event.src_ip if 'Inbound' in event.message else None
def ctx_close(self, ctx):
return self.ideagen.gen_idea(
src=ctx["src_ip"],
targets=ctx["tgt_ips_ports"].items(),
detect_time=self.update_timestamp,
event_time=ctx["first_update"],
cease_time=ctx["last_update"],
count=ctx["count"],
template="synack"
)
class FileWatcher(object):
def __init__(self, filename, tail=True):
self.filename = filename
self.open()
self.line_buffer = ""
if tail and self.f:
self.f.seek(0, os.SEEK_END)
def open(self):
try:
self.f = codecs.open(self.filename, "r", encoding="ISO-8859-1")
st = os.fstat(self.f.fileno())
self.inode, self.size = st.st_ino, st.st_size
except IOError:
self.f = None
self.inode = -1
self.size = -1
def _check_reopen(self):
try:
st = os.stat(self.filename)
cur_inode, cur_size = st.st_ino, st.st_size
except OSError as e:
cur_inode = -1
cur_size = -1
if cur_inode != self.inode or cur_size < self.size:
self.close()
self.open()
def readline(self):
if not self.f:
self.open()
if not self.f:
return self.line_buffer
res = self.f.readline()
if not res:
self._check_reopen()
if not self.f:
return self.line_buffer
res = self.f.readline()
if not res.endswith("\n"):
self.line_buffer += res
else:
res = self.line_buffer + res
self.line_buffer = ""
return res
def close(self):
try:
if self.f:
self.f.close()
except IOError:
pass
self.inode = -1
self.size = -1
def __repr__(self):
return '%s("%s")' % (type(self).__name__, self.filename)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
return False
def __iter__(self):
return self
def next(self):
return self.readline()
class IdeaGen(object):
def __init__(self, name, test=False, anonymise=None):
self.name = name
self.test = test
self.anonymise = anonymise
self.template = {
"connect": {
"category": ["Recon.Scanning"],
"description": "TCP connections/scan",
"note": "Connections from remote host to never assigned IP",
"proto": ["tcp"]
},
"ping": {
"category": ["Recon.Scanning"],
"description": "Ping scan",
"note": "Ping requests from remote host to never assigned IP",
"proto": ["icmp"]
},
"synack": {
"category": ["Availability.DoS"],
"description": "Unsolicited TCP SYN/ACK connections/scan",
"note": "Unsolicited SYN/ACK packet received from remote host to never assigned IP",
"source_type": ["Backscatter"],
"source_to_target": True,
"proto": ["tcp"]
}
}
def format_timestamp(self, epoch):
return "%04d-%02d-%02dT%02d:%02d:%02dZ" % time.gmtime(epoch)[:6]
def gen_idea(self, src, targets, detect_time, event_time, cease_time, count, template):
tmpl = self.template[template]
isource = {
"IP4": [src],
"Proto": tmpl["proto"]
}
if "source_type" in tmpl:
isource["Type"] = tmpl["source_type"]
# Fold multiple IPs with the same portset
folded_tgt = {}
for tgt, ports in targets:
folded_tgt.setdefault(frozenset(ports), []).append(tgt)
itargets = []
for ports, tgt in folded_tgt.items():
itarget = {"Proto": tmpl["proto"]}
if self.anonymise:
itarget["Tags"] = ["Anonymised"]
itarget["Anonymised"] = True
tgt = [str(IPv4Address(ip)) + "/%d" % self.anonymise for ip in tgt]
else:
tgt = [str(IPv4Address(ip)) for ip in tgt]
itarget["IP4"] = tgt
if ports:
itarget["Port"] = [int(port) for port in ports]
itarget["Port"].sort()
itargets.append(itarget)
inode = {
"SW": ["LaBrea"],
"Type": ["Connection", "Tarpit"],
"Name": self.name
}
idea = {
"Format": "IDEA0",
"ID": str(uuid.uuid4()),
"Category": tmpl["category"] + (["Test"] if self.test else []),
"Description": tmpl["description"],
"DetectTime": self.format_timestamp(detect_time),
"EventTime": self.format_timestamp(event_time),
"CeaseTime": self.format_timestamp(cease_time),
"ConnCount": count,
"Note": tmpl["note"],
"Target": itargets,
"Node": [inode]
}
if tmpl.get("source_to_target", False):
idea["Target"].append(isource)
else:
idea["Source"] = [isource]
return idea
class Filer(object):
def __init__(self, directory):
self.basedir = self._ensure_path(directory)
self.tmp = self._ensure_path(pth.join(self.basedir, "tmp"))
self.incoming = self._ensure_path(pth.join(self.basedir, "incoming"))
self.hostname = socket.gethostname()
self.pid = os.getpid()
def _ensure_path(self, p):
try:
os.mkdir(p)
except OSError:
if not pth.isdir(p):
raise
return p
def _get_new_name(self, fd=None):
(inode, device) = os.fstat(fd)[1:3] if fd else (0, 0)
return "%s.%d.%f.%d.%d" % (
self.hostname, self.pid, time.time(), device, inode)
def create_unique_file(self):
# First find and open name unique within tmp
tmpname = None
while not tmpname:
tmpname = self._get_new_name()
try:
fd = os.open(pth.join(self.tmp, tmpname), os.O_CREAT | os.O_RDWR | os.O_EXCL)
except OSError as e:
if e.errno != errno.EEXIST:
raise # other errors than duplicates should get noticed
tmpname = None
# Now we know the device/inode, rename to make unique within system
newname = self._get_new_name(fd)
os.rename(pth.join(self.tmp, tmpname), pth.join(self.tmp, newname))
nf = os.fdopen(fd, "w")
return nf, newname
def publish_file(self, short_name):
os.rename(pth.join(self.tmp, short_name), pth.join(self.incoming, short_name))
def get_uid_gid(str_id, get_nam_func):
if str_id:
try:
id = int(str_id)
except ValueError:
id = get_nam_func(str_id)[2]
else:
id = None
return id
def daemonize(
work_dir=None, chroot_dir=None,
umask=None, uid=None, gid=None,
pidfile=None, files_preserve=[], signals={}):
# Dirs, limits, users
if chroot_dir is not None:
os.chdir(chroot_dir)
os.chroot(chroot_dir)
if umask is not None:
os.umask(umask)
if work_dir is not None:
os.chdir(work_dir)
gid = get_uid_gid(gid, getgrnam)
if gid is not None:
os.setgid(gid)
uid = get_uid_gid(uid, getpwnam)
if uid is not None:
os.setuid(uid)
# Doublefork, split session
if os.fork() > 0:
os._exit(0)
try:
os.setsid()
except OSError:
pass
if os.fork() > 0:
os._exit(0)
# Setup signal handlers
for (signum, handler) in signals.items():
signal.signal(signum, handler)
# Close descriptors
descr_preserve = set(f.fileno() for f in files_preserve)
maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
if maxfd == resource.RLIM_INFINITY:
maxfd = 65535
for fd in range(maxfd, 3, -1): # 3 means omit stdin, stdout, stderr
if fd not in descr_preserve:
try:
os.close(fd)
except Exception:
pass
# PID file
if pidfile is not None:
pidd = os.open(pidfile, os.O_RDWR | os.O_CREAT | os.O_EXCL | os.O_TRUNC)
os.write(pidd, str(os.getpid())+"\n")
os.close(pidd)
# Define and setup atexit closure
@atexit.register
def unlink_pid():
try:
os.unlink(pidfile)
except Exception:
pass
# Redirect stdin, stdout, stderr to /dev/null
devnull = os.open(os.devnull, os.O_RDWR)
for fd in range(3):
os.dup2(devnull, fd)
def save_events(aggr, filer):
for event in aggr:
f, name = filer.create_unique_file()
with f:
f.write(json.dumps(event, ensure_ascii=True))
filer.publish_file(name)
logging.info(
"Saved %s %s \"%s\" (%d) as file %s" % (
event["ID"], "+".join(event["Category"]), event["Description"], event["ConnCount"], name))
RE_LIST = (
# 1493035442 Initial Connect - tarpitting: 89.163.242.15 56736 -> 195.113.254.182 9898
# 1493037991 Inbound SYN/ACK: 185.62.190.15 21001 -> 195.113.252.222 15584
(
re.compile(r'([0-9]+) (Initial Connect - tarpitting:|Inbound SYN/ACK:) ([^ ]+) ([0-9]+) -> ([^ ]+) ([0-9]+).*'),
namedtuple("connect_tuple", ("timestamp", "message", "src_ip", "src_port", "tgt_ip", "tgt_port"))
),
# 1493035442 Responded to a Ping: 88.86.96.25 -> 195.113.253.87 *
(
re.compile(r'([0-9]+) (Responded to a Ping:) ([^ ]+) -> ([^ ]+).*'),
namedtuple("ping_tuple", ("timestamp", "message", "src_ip", "tgt_ip"))
),
# 1645797338 Capturing local IP 195.113.252.187
# 1645797339 Persist Activity: 45.227.253.45 64088 -> 195.113.254.67 3136 *
# 1645797340 Additional Activity 195.113.253.198
# 1645857629 Linux Persist Activity: 61.177.172.89 12241 -> 195.113.253.252 22 *
# 1645857629 Persist Trapping: 103.66.50.44 62229 -> 195.113.252.5 445 *
# 1714147064 Current average bw: 96 (Kb/sec)
(
re.compile(r'([0-9]+) (?:Capturing local|Persist Activity:|Additional Activity|Linux Persist Activity:|Persist Trapping:|Current average bw:).*'),
None
)
)
def match_event(line):
for labrea_re, event_tuple in RE_LIST:
match = labrea_re.match(line)
if match:
if not event_tuple:
return None
return event_tuple(*match.groups())
logging.info("Unmatched line: \"%s\"" % line.replace("\n", r"\n"))
return None
def get_args():
optp = optparse.OptionParser(
usage="usage: %prog [options] logfile ...",
description="Watch LaBrea logfiles and generate Idea events into directory")
optp.add_option(
"-w", "--window",
default=900,
dest="window",
type="int",
action="store",
help="max detection window (default: %default)")
optp.add_option(
"-t", "--timeout",
default=300,
dest="timeout",
type="int",
action="store",
help="detection timeout (default: %default)")
optp.add_option(
"-a", "--anonymise",
default=None,
dest="anonymise",
type="int",
action="store",
help="anonymisation mask length (as behind slash in CIDR) (default: no anonymisation)")
optp.add_option(
"-n", "--name",
default=None,
dest="name",
type="string",
action="store",
help="Warden client name")
optp.add_option(
"--test",
default=False,
dest="test",
action="store_true",
help="Add Test category")
optp.add_option(
"-o", "--oneshot",
default=False,
dest="oneshot",
action="store_true",
help="process files and quit (do not daemonize)")
optp.add_option(
"--poll",
default=1,
dest="poll",
type="int",
action="store",
help="log file polling interval")
optp.add_option(
"-d", "--dir",
default=None,
dest="dir",
type="string",
action="store",
help="Target directory (mandatory)"),
optp.add_option(
"-p", "--pid",
default=pth.join("/var/run", pth.splitext(pth.basename(sys.argv[0]))[0] + ".pid"),
dest="pid",
type="string",
action="store",
help="create PID file with this name (default: %default)")
optp.add_option(
"-u", "--uid",
default=None,
dest="uid",
type="string",
action="store",
help="user id to run under")
optp.add_option(
"-g", "--gid",
default=None,
dest="gid",
type="string",
action="store",
help="group id to run under")
optp.add_option(
"-v", "--verbose",
dest="verbose",
action="store_true",
help="turn on debug logging")
optp.add_option(
"--log",
default="local7",
dest="log",
type="string",
action="store",
help="syslog facility or log file name (default: %default)")
optp.add_option(
"--realtime",
default=True,
dest="realtime",
action="store_true",
help="use system time along with log timestamps (default)")
optp.add_option(
"--norealtime",
dest="realtime",
action="store_false",
help="don't system time, use solely log timestamps")
return optp
running_flag = True
reload_flag = False
def terminate_me(signum, frame):
global running_flag
running_flag = False
def reload_me(signum, frame):
global reload_flag
reload_flag = True
def main():
global reload_flag
optp = get_args()
opts, args = optp.parse_args()
if not args or opts.name is None or opts.dir is None:
optp.print_help()
sys.exit()
logformat = "%(filename)s[%(process)d] %(message)s"
logger = logging.getLogger()
if opts.oneshot:
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(asctime)s " + logformat))
else:
if "/" in opts.log:
handler = logging.handlers.WatchedFileHandler(opts.log)
handler.setFormatter(logging.Formatter("%(asctime)s " + logformat))
else:
handler = logging.handlers.SysLogHandler(address="/dev/log", facility=opts.log)
handler.setFormatter(logging.Formatter(logformat))
logger.addHandler(handler)
logger.setLevel(logging.DEBUG if opts.verbose else logging.INFO)
try:
if opts.oneshot:
signal.signal(signal.SIGINT, terminate_me)
signal.signal(signal.SIGTERM, terminate_me)
files = [codecs.open(arg, "r", encoding="ISO-8859-1") for arg in args]
else:
daemonize(
pidfile=opts.pid,
uid=opts.uid,
gid=opts.gid,
signals={
signal.SIGINT: terminate_me,
signal.SIGTERM: terminate_me,
signal.SIGHUP: reload_me
})
files = [FileWatcher(arg) for arg in args]
ideagen = IdeaGen(
name=opts.name, test=opts.test, anonymise=opts.anonymise)
filer = Filer(opts.dir)
contexts = [
context(window=opts.window, timeout=opts.timeout, ideagen=ideagen, anonymise=opts.anonymise)
for context in [PingContextMgr, ConnectContextMgr, InboundContextMgr]
]
with nested(*files):
for line_set in izip(*files):
for line in line_set:
if not line:
break
event = match_event(line)
if event or opts.realtime:
timestamp = int(event.timestamp) if event else int(time.time())
for context in contexts:
save_events(context.process(event, timestamp), filer)
if not running_flag:
break
if reload_flag:
for f in files:
f.close()
f.open()
reload_flag = False
if not any(line_set):
time.sleep(opts.poll)
for context in contexts:
timestamp = int(time.time()) if opts.realtime else None
save_events(context.close(timestamp), filer)
except Exception:
logging.exception("Exception")
main()