Skip to content

Commit

Permalink
Merge pull request #26 from counterthreatunit/hex-bug1
Browse files Browse the repository at this point in the history
Payload Endoding issue (multi-byte UTF8)
  • Loading branch information
whartond authored Aug 6, 2020
2 parents b47dad8 + 5e0decd commit 8879c69
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 33 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ The following event attributes are currently supported:
+ tcp.flags.rst

##### Content Attribute #####
The *content* attribute is used to specify the payload of a packet. Content attributes must be enclosed in double quotes. Special characters can be expressed in hex, like: *\x0d\x0a*. Anything prefaced with \x will be converted from hex to its ascii representation. These translation takes place during the render phase.
The *content* attribute is used to specify the payload of a packet. Content attributes must be enclosed in double quotes. UTF-8 is supported and arbitrary bytes can be expressed with the "\xHH" notation where "HH" is the hexidecimal representation of the byte. For example, a carriage return (ASCII 0x0D) followed by a line feed (ASCII 0x0A) can be defined like this: *\x0D\x0A*. This translation takes place during the render phase.

Example:

Expand Down
31 changes: 31 additions & 0 deletions examples/dns-request.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
flow dns_request udp 10.200.31.12:11234 > 8.8.8.8:53;

dns_request > (
# transaction ID (should be random two bytes)
content:"\xBA\xBE";

# flags; set as appropriate (see RFC)
content:"\x01\x00";

# Number of questions
content:"\x00\x01";

# answer resource records
content:"\x00\x00";

# authority resource records
content:"\x00\x00";

# additional resource records
content:"\x00\x00";

# queries
# name (len, value, len, value, ... null)
content:"\x05linux\x16georgepburdell-desktop\x04corp\x04acme\x03com\x00";

# type (\x0001 is A)
content:"\x00\x01";

# class (0x0001 is IN/Internet)
content:"\x00\x01";
);
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="flowsynth",
version="1.3.1",
version="1.4.0",
author="Will Urbanski",
maintainer="David Wharton",
maintainer_email="[email protected]",
Expand Down
101 changes: 70 additions & 31 deletions src/flowsynth.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,16 @@
import socket
import time
import json
from io import open

#include scapy; suppress all errors
logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
logging.getLogger("scapy.interactive").setLevel(logging.ERROR)
logging.getLogger("scapy.loading").setLevel(logging.ERROR)
from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap
from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap, Raw

#global variables
APP_VERSION_STRING = "1.3.1"
APP_VERSION_STRING = "1.4.0"
# Define the standard version indicator.
__version__ = APP_VERSION_STRING
LOGGING_LEVEL = logging.INFO
Expand Down Expand Up @@ -238,7 +239,6 @@ def lex_flow(self, tokens):

while tokens[0] != ";":
token = tokens[0]
#print "token is %s" % token
if (token == ")"):
#end of attribute spec. jump forward two (should always be ');')
tokens = tokens[1:]
Expand Down Expand Up @@ -311,7 +311,6 @@ def lex_event(self, tokens):

while tokens[0] != ";":
token = tokens[0]
#print "token is %s" % token
if (token == ")"):
#end of attribute spec. jump forward two (should always be ');')
tokens = tokens[1:]
Expand Down Expand Up @@ -346,7 +345,7 @@ def lex_event(self, tokens):

if (modifier_key.lower() == 'content'):
#content
eventdecl['contents'].append({'type': 'string', 'value': modifier_value})
eventdecl['contents'].append({'type': 'bytes', 'value': modifier_value})
elif (modifier_key.lower() == 'filecontent'):
#filecontent
if ARGS.no_filecontent:
Expand Down Expand Up @@ -467,25 +466,56 @@ def _valid_mac(self, mac):
#This function expects all inputs to be enclosed within double quotes
def parse_content(self, content):
""" parse and render a content keyword """

# this regex is somewhat gnarly but leaving for now ... (why not
# just strip off double quotes on ends)?
pcre_text = r'"([^\\"]*(?:\\.[^\\"]*)*)"'

result = bytearray()

#first, check for text
mo_text = re.match(pcre_text, content)
if (mo_text != None):
logging.debug("Content: %s", mo_text.group(1))

content_text = mo_text.group(1)
replacements = re.findall(r"\\x[a-fA-F0-9]{2}", content_text)
for replacement in replacements:
content_text = content_text.replace(replacement, chr(int(replacement[2:], 16)))
logging.debug("Content: %s (length %d)" % (content_text, len(content_text)))
start = 0
previous_end = 0
# Flowsynth supports encoding arbitrary bytes with the "\xHH" notation where "HH" is
# the hexidecimal representation of the byte. That is what is handled here, while
# maintaining the rest of the content data as UTF-8.
for hex_replacement in re.finditer(r"\\x[a-fA-F0-9]{2}", content_text):
# try/catch blocks to deal with different data representation from shlex (depends on Python version)
start = hex_replacement.start(0)
end = hex_replacement.end(0)
ascii_hex = content_text[start+2:start+4]
previous_substring = content_text[previous_end:start]
if len(previous_substring) > 0:
# extend result with previous substring; encode as UTF-8
try:
result.extend(previous_substring.encode('utf-8'))
except UnicodeDecodeError:
result.extend(previous_substring)
# append ASCII hex byte to result
result.extend(bytearray.fromhex(ascii_hex))
previous_end = end
if previous_end == 0:
# no hex encoding found, just encode the whole thing
try:
result.extend(content_text.encode('utf-8'))
except UnicodeDecodeError:
result.extend(content_text)
elif previous_end < len(content_text):
# add the last substring
try:
result.extend(content_text[previous_end:len(content_text)].encode('utf-8'))
except UnicodeDecodeError:
result.extend(content_text[previous_end:len(content_text)])

return content_text
return ""
return result

def render_payload(self, event):
""" render all content matches into one payload value """
str_payload = ""
byte_payload = bytearray()
for modifier in event['attributes']:
#logging.debug("Found modifier: %s", modifier)
keyword = modifier
Expand All @@ -495,27 +525,26 @@ def render_payload(self, event):
for contentobj in event['contents']:
content_value = contentobj['value']
content_type = contentobj['type']
if (content_type == 'string'):
str_payload = "%s%s" % (str_payload, self.parse_content(content_value))
if (content_type == 'bytes'):
byte_payload.extend(self.parse_content(content_value))
elif (content_type == 'file'):
if ARGS.no_filecontent:
# '--no-filecontent' option was passed to flowsynth
# This is also checked previously in the code path but adding here too
compiler_bailout("The 'filecontent' attribute is not supported in this context.")
else:
str_payload = "%s%s" % (str_payload, self.get_file_content(content_value))

return str_payload
byte_payload.extend(self.get_file_content(content_value))
return byte_payload

def get_file_content(self, filepath):
#we need to strip quotes from the filepath
filepath = filepath.strip()[1:-1]

try:
fptr = open(filepath,'r')
fdata = bytearray()
fptr = open(filepath,'rb')
fdata = fptr.read()
fptr.close()
return fdata.replace('"','\"')
return fdata
except IOError:
raise SynCompileError("File not found -- %s" % filepath)
sys.exit(-1)
Expand All @@ -542,7 +571,7 @@ def render(self, eventid):

#get the payload
hasPayload = False
payload = ""
payload = bytearray()
total_payload = self.render_payload(event)
if len(total_payload) > 0:
hasPayload = True
Expand All @@ -560,7 +589,7 @@ def render(self, eventid):
total_payload = total_payload[self.tcp_mss:]
else:
payload = total_payload
total_payload = ""
total_payload = bytearray()

#figure out what the src/dst port and host are

Expand Down Expand Up @@ -604,8 +633,6 @@ def render(self, eventid):
if (len(payload) > 0):
tcp_ack = self.to_server_seq



pkt = None
logging.debug("SRC host: %s", src_host)
logging.debug("DST host: %s", dst_host)
Expand All @@ -616,9 +643,12 @@ def render(self, eventid):
lyr_eth = Ether(src = src_mac, dst = dst_mac)
if (self.l4_proto == Flow.PROTO_UDP):
#generate udp packet
lyr_udp = UDP(sport = src_port, dport = dst_port) / payload
# the 'payload' variable is a bytearray so make sure we pass it to scapy with Raw().
lyr_udp = UDP(sport = src_port, dport = dst_port) / Raw(payload)
pkt = lyr_eth / lyr_ip / lyr_udp
pkts.append(pkt)

logging.debug("Payload size is: %d" % len(payload))
else:
#generate tcp packet
logging.debug("TCP Packet")
Expand Down Expand Up @@ -651,11 +681,12 @@ def render(self, eventid):
flags = 'PA'

logging.debug('Data packet with inferred flags S:%s A:%s', tcp_seq, tcp_ack)
lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / payload
# the 'payload' variable is a bytearray so make sure we pass it to scapy with Raw().
lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / Raw(payload)
pkt = lyr_eth / lyr_ip / lyr_tcp
pkts.append(pkt)

logging.debug("Payload size is: %s" % len(payload))
logging.debug("Payload size is: %d" % len(payload))
logging.debug("tcp_seq is %s" % tcp_seq)
logging.debug("tcp_ack is %s" % tcp_ack)
payload_size = len(payload)
Expand Down Expand Up @@ -713,6 +744,8 @@ def render(self, eventid):
def parse_cmd_line():
""" use ArgumentParser to parse command line arguments """

global LOGGING_LEVEL

app_description = "FlowSynth v%s\nWill Urbanski <[email protected]>\n\na tool for rapidly modeling network traffic" % APP_VERSION_STRING

parser = argparse.ArgumentParser(description=app_description, formatter_class = argparse.RawTextHelpFormatter)
Expand Down Expand Up @@ -1017,15 +1050,21 @@ def add_event(flowname, eventdecl):

#has test case
def load_syn_file(filename):
""" loads a flowsynth file from disk and returns as a string"""
""" loads a flowsynth file from disk and returns as UTF-8 """
try:
filedata = ""
fptr = open(filename,'r')
# support UTF-8 and ASCII of course -- could be seen in "content" data
fptr = open(filename, 'r', encoding='utf-8')
filedata = fptr.read()
fptr.close()
# Python2 will store this as unicode type; Python3 as (UTF-8) str.
# Encode here for Python2 so shlex doesn't barf on it downstream.
if not isinstance(filedata, str):
filedata = filedata.encode('utf-8')
except IOError:
compiler_bailout("Cannot open file ('%s')" % filename)

except UnicodeDecodeError:
compiler_bailout("Unable to decode file as UTF-8 ('%s')" % filename)
return filedata

#helper function to report runtime errors
Expand Down

0 comments on commit 8879c69

Please sign in to comment.