Skip to content

Commit

Permalink
Merge pull request #194 from ctxis/master
Browse files Browse the repository at this point in the history
Merge latest from ctxis
  • Loading branch information
kevoreilly authored Jan 18, 2020
2 parents 13ae53b + c787a3a commit 3fd93ab
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 2 deletions.
5 changes: 5 additions & 0 deletions conf/processing.conf
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ dnswhitelist = no
# additional entries
dnswhitelist_file = extra/whitelist_domains.txt

# Should the server use a compressed version of behavioural logs? This helps
# in saving space in Mongo, accelerates searchs and reduce the size of the
# final JSON report.
[loop_detection]
enabled = no

[static]
enabled = yes
Expand Down
189 changes: 189 additions & 0 deletions lib/cuckoo/common/compressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import os
import struct
import binascii
import logging

log = logging.getLogger(__name__)

try:
import bson
HAVE_BSON = True
except ImportError:
HAVE_BSON = False
else:
# The BSON module provided by pymongo works through its "BSON" class.
if hasattr(bson, "BSON"):
bson_decode = lambda d: bson.BSON(d).decode()
# The BSON module provided by "pip install bson" works through the
# "loads" function (just like pickle etc.)
elif hasattr(bson, "loads"):
bson_decode = lambda d: bson.loads(d)
else:
HAVE_BSON = False

class NGram:
def __init__(self, order):
self.order = order
self.buffer = []

def add(self, element):
tmp = None
if not element:
return tmp

if len(self.buffer) == self.order * 2:
tmp = self.buffer.pop(0)

if type(element) == list:
self.buffer.append(element)
else:
self.buffer.append([element, 1])

self.analyse()
return tmp

def analyse(self):
tmp = [c[0][0] for c in self.buffer]
if tmp[0:self.order] == tmp[self.order:]:
for i in range(self.order):
self.buffer[i][1] += self.buffer[i+self.order][1]
self.buffer = self.buffer[0:self.order]

class Compressor:
def __init__(self, level):
self.level = level
self.ngrams = [ NGram(i) for i in range(1,level+1) ]
self.final = []

def add(self, element):
head, tail = (self.ngrams[0], self.ngrams[1:])
out = head.add(element)

for t in tail:
out = t.add(out)

if out:
self.final.append(out)

def flush(self):
for i in range(len(self.ngrams)):
current_buffer = self.ngrams[i].buffer
for out in current_buffer:
for u in range(i+1, len(self.ngrams)):
out = self.ngrams[u].add(out)
if out:
self.final.append(out)

class CuckooBsonCompressor:
def __init__(self):
self.threads = {}
self.callmap = {}
self.head = []
self.ccounter = 0

def __next_message(self):
data = self.fd_in.read(4)
if not data:
return (False, False)
_size = struct.unpack('I', data)[0]
data += self.fd_in.read(_size - 4)
self.raw_data = data
return (data, bson_decode(data))

def run(self, file_path):
if not os.path.isfile(file_path) and os.stat(file_path).st_size:
log.warning('File %s does not exists or it is invalid.', file_path)
return False

self.fd_in = open(file_path, 'rb')

msg = '---'
while msg:
data, msg = self.__next_message()

if msg:
mtype = msg.get('type') # message type [debug, new_process, info]
if mtype not in ['debug', 'new_process', 'info']:
_id = msg.get('I', -1)
if not self.category.startswith('__'):
tid = msg.get('T', -1)
time = msg.get('t', 0)

if tid not in self.threads:
self.threads[tid] = Compressor(100)

csum = self.checksum(msg)
self.ccounter += 1
v = (csum, self.ccounter, time)
self.threads[tid].add(v)

if csum not in self.callmap:
self.callmap[csum] = msg
else:
self.head.append(data)
else:
self.category = msg.get('category', 'None')
self.head.append(data)

self.fd_in.close()

return self.flush(file_path)

def flush(self, file_path):
# This function flushes ngram buffers within compressor and merges
# threads compressed call lists trying preserve original order

compressed_path = file_path + '.compressed'
if os.path.isfile(compressed_path):
os.remove(compressed_path)

fd = open(compressed_path, 'wb')

for d in self.head:
fd.write(d)

final = []
for tid, c in self.threads.items():
c.flush()
for element, repeated in c.final:
data = self.callmap.get(element[0]).copy()
data['r'] += repeated
data['t'] = element[2]
data['order'] = element[1]
final.append(data)

final.sort(key=lambda x: x['order'])

if final and os.path.isfile(compressed_path):
for d in final:
d.pop('order')
edata = bson.BSON.encode(d)
fd.write(edata)

os.rename(file_path, '{}.raw'.format(file_path))
os.symlink('{}.compressed'.format(file_path), file_path)
else:
return False

return True

def checksum(self, msg):
# This function calculates a 4 bytes checksum for each call
# this value is used for identifying a call setup.

index = msg.get('I', -1)
args = ''.join([ str(c) for c in msg['args'] ])
content = [
str(index), # api call
str(msg['T']), # thread id
str(msg['R']), # caller
str(args), # call args
str(self.category), # category
str(msg['P']) # parentcaller
]
content = ''.join(content)

# Python3 version
# return binascii.crc32(bytes(content, 'utf8'))
return binascii.crc32(content)

25 changes: 23 additions & 2 deletions modules/processing/behavior.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
from lib.cuckoo.common.netlog import BsonParser
from lib.cuckoo.common.utils import convert_to_printable, pretty_print_arg, pretty_print_retval, logtime, default_converter

from lib.cuckoo.common.compressor import CuckooBsonCompressor

log = logging.getLogger(__name__)
cfg = Config()
cfg_process = Config("processing")

def fix_key(key):
"""Fix a registry key to have it normalized.
Expand Down Expand Up @@ -342,9 +345,11 @@ def _parse(self, row):
class Processes:
"""Processes analyzer."""

def __init__(self, logs_path):
def __init__(self, logs_path, task):
"""@param logs_path: logs path."""
self._logs_path = logs_path
self.task = task
self.options = dict((value.strip() for value in option.split("=", 1)) for option in self.task["options"].split(",") if option and '=' in option)

def run(self):
"""Run analysis.
Expand All @@ -365,6 +370,10 @@ def run(self):
for file_name in os.listdir(self._logs_path):
file_path = os.path.join(self._logs_path, file_name)

# Check if Loop Detection is enabled globally or locally (as an option)
if cfg_process.loop_detection.enabled or self.options.get("loop_detection"):
self.compress_log_file(file_path)

if os.path.isdir(file_path):
continue

Expand Down Expand Up @@ -397,6 +406,18 @@ def run(self):

return results

def compress_log_file(self, file_path):
if file_path.endswith(".bson") and os.stat(file_path).st_size:
if not CuckooBsonCompressor().run(file_path):
log.warning("Could not execute loop detection analysis.")
else:
log.info("BSON was compressed successfully.")
return True
else:
log.warning("Nonexistent or empty BSON file \"%s\".", file_path)

return False

class Summary:
"""Generates summary information."""

Expand Down Expand Up @@ -1183,7 +1204,7 @@ def run(self):
@return: results dict.
"""
behavior = {}
behavior["processes"] = Processes(self.logs_path).run()
behavior["processes"] = Processes(self.logs_path, self.task).run()

instances = [
Anomaly(),
Expand Down
6 changes: 6 additions & 0 deletions web/templates/submission/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,15 @@
<td><code>referrer</code></td>
<td>Specify the referrer to be used for URL tasks, overriding the default Google referrer</td>
</tr>
<tr>
<td><code>loop_detection</code></td>
<td>Set this option to 1 to enable loop detection (compress call logs - behavior analysis)</td>
</tr>
<tr>
<td><code>Dl&Exec add headers example</code></td>
<td>dnl_user_agent: "CAPE the best", dnl_referer: google</td>
</tr>
<tr>
<td><code>servicedesc - for service package</code></td>
<td>Service description</td>
</tr>
Expand Down

0 comments on commit 3fd93ab

Please sign in to comment.