From 560ea92776f1f6f6e536d30abc591d9f9d06e066 Mon Sep 17 00:00:00 2001 From: counterthreatunit Date: Thu, 30 Jul 2020 23:03:36 -0400 Subject: [PATCH 1/6] The payload is stored in "payload" variable which type str [sic]. Therefore, you can end up with situations where bytes (e.g. entered with hex encoding) get interpreted as multi-byte UTF8 (and expanded) when passed to scapy which is not the desired behavior. Fixed with bytearray and the scapy Raw() function. Special thanks to Tom B. for identifying the bug and solution. Also fixed variable scoping bug that prevented logging levels being set properly based on command line options. --- src/flowsynth.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/flowsynth.py b/src/flowsynth.py index 00c17a1..b299b9f 100755 --- a/src/flowsynth.py +++ b/src/flowsynth.py @@ -35,7 +35,7 @@ logging.getLogger("scapy.runtime").setLevel(logging.ERROR) logging.getLogger("scapy.interactive").setLevel(logging.ERROR) logging.getLogger("scapy.loading").setLevel(logging.ERROR) -from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap +from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap, Raw #global variables APP_VERSION_STRING = "1.3.1" @@ -613,11 +613,19 @@ def render(self, eventid): else: lyr_ip = IPv6(src = src_host, dst = dst_host) lyr_eth = Ether(src = src_mac, dst = dst_mac) + # the 'payload' variable is type str [sic] so here we put the payload into a bytearray + # and pass it to scapy using Raw(). If we don't do this, we can end up with situations + # where bytes (e.g. entered with hex encoding) can get interpreted as multi-byte UTF8 + # when passed to scapy which is not the desired behavior. + payload_bytes = bytearray() + payload_bytes.extend(map(ord,payload)) if (self.l4_proto == Flow.PROTO_UDP): #generate udp packet - lyr_udp = UDP(sport = src_port, dport = dst_port) / payload + lyr_udp = UDP(sport = src_port, dport = dst_port) / Raw(payload_bytes) pkt = lyr_eth / lyr_ip / lyr_udp pkts.append(pkt) + + logging.debug("Payload size is: %d" % len(payload)) else: #generate tcp packet logging.debug("TCP Packet") @@ -650,11 +658,11 @@ def render(self, eventid): flags = 'PA' logging.debug('Data packet with inferred flags S:%s A:%s', tcp_seq, tcp_ack) - lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / payload + lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / Raw(payload_bytes) pkt = lyr_eth / lyr_ip / lyr_tcp pkts.append(pkt) - logging.debug("Payload size is: %s" % len(payload)) + logging.debug("Payload size is: %d" % len(payload)) logging.debug("tcp_seq is %s" % tcp_seq) logging.debug("tcp_ack is %s" % tcp_ack) payload_size = len(payload) @@ -712,6 +720,8 @@ def render(self, eventid): def parse_cmd_line(): """ use ArgumentParser to parse command line arguments """ + global LOGGING_LEVEL + app_description = "FlowSynth v%s\nWill Urbanski \n\na tool for rapidly modeling network traffic" % APP_VERSION_STRING parser = argparse.ArgumentParser(description=app_description, formatter_class = argparse.RawTextHelpFormatter) From 54044ffb67ec8a02fb8684e060d9e057923f9fa0 Mon Sep 17 00:00:00 2001 From: counterthreatunit Date: Thu, 30 Jul 2020 23:20:36 -0400 Subject: [PATCH 2/6] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 728a534..84ff308 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="flowsynth", - version="1.3.1", + version="1.3.2", author="Will Urbanski", maintainer="David Wharton", maintainer_email="counterthreatunit@users.noreply.github.com", From 21e374f7f064a798dacb30b981d3e086cc01b466 Mon Sep 17 00:00:00 2001 From: counterthreatunit Date: Sun, 2 Aug 2020 23:20:30 -0400 Subject: [PATCH 3/6] handle utf-8 encoding in input --- setup.py | 2 +- src/flowsynth.py | 96 ++++++++++++++++++++++++++++++------------------ 2 files changed, 62 insertions(+), 36 deletions(-) diff --git a/setup.py b/setup.py index 84ff308..7f0f4a2 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="flowsynth", - version="1.3.2", + version="1.4.0", author="Will Urbanski", maintainer="David Wharton", maintainer_email="counterthreatunit@users.noreply.github.com", diff --git a/src/flowsynth.py b/src/flowsynth.py index b299b9f..01b061b 100755 --- a/src/flowsynth.py +++ b/src/flowsynth.py @@ -30,6 +30,7 @@ import socket import time import json +from io import open #include scapy; suppress all errors logging.getLogger("scapy.runtime").setLevel(logging.ERROR) @@ -38,7 +39,7 @@ from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap, Raw #global variables -APP_VERSION_STRING = "1.3.1" +APP_VERSION_STRING = "1.4.0" LOGGING_LEVEL = logging.INFO ARGS = None @@ -237,7 +238,6 @@ def lex_flow(self, tokens): while tokens[0] != ";": token = tokens[0] - #print "token is %s" % token if (token == ")"): #end of attribute spec. jump forward two (should always be ');') tokens = tokens[1:] @@ -310,7 +310,6 @@ def lex_event(self, tokens): while tokens[0] != ";": token = tokens[0] - #print "token is %s" % token if (token == ")"): #end of attribute spec. jump forward two (should always be ');') tokens = tokens[1:] @@ -345,7 +344,7 @@ def lex_event(self, tokens): if (modifier_key.lower() == 'content'): #content - eventdecl['contents'].append({'type': 'string', 'value': modifier_value}) + eventdecl['contents'].append({'type': 'bytes', 'value': modifier_value}) elif (modifier_key.lower() == 'filecontent'): #filecontent if ARGS.no_filecontent: @@ -466,25 +465,53 @@ def _valid_mac(self, mac): #This function expects all inputs to be enclosed within double quotes def parse_content(self, content): """ parse and render a content keyword """ + + # this regex is somewhat gnarly but leaving for now ... (why not + # just strip off double quotes on ends)? pcre_text = r'"([^\\"]*(?:\\.[^\\"]*)*)"' + result = bytearray() #first, check for text mo_text = re.match(pcre_text, content) if (mo_text != None): - logging.debug("Content: %s", mo_text.group(1)) - content_text = mo_text.group(1) - replacements = re.findall(r"\\x[a-fA-F0-9]{2}", content_text) - for replacement in replacements: - content_text = content_text.replace(replacement, chr(int(replacement[2:], 16))) + logging.debug("Content: %s (length %d)" % (content_text, len(content_text))) + start = 0 + previous_end = 0 + for hex_replacement in re.finditer(r"\\x[a-fA-F0-9]{2}", content_text): + # try/catch blocks to deal with different data representation from shlex (depends on Python version) + start = hex_replacement.start(0) + end = hex_replacement.end(0) + ascii_hex = content_text[start+2:start+4] + previous_substring = content_text[previous_end:start] + if len(previous_substring) > 0: + # extend result with previous substring; encode as UTF-8 + try: + result.extend(previous_substring.encode('utf-8')) + except UnicodeDecodeError: + result.extend(previous_substring) + # append ASCII hex byte to result + result.extend(bytearray.fromhex(ascii_hex)) + previous_end = end + if previous_end == 0: + # no hex encoding found, just encode the whole thing + try: + result.extend(content_text.encode('utf-8')) + except UnicodeDecodeError: + result.extend(content_text) + elif previous_end < len(content_text): + # add the last substring + try: + result.extend(content_text[previous_end:len(content_text)].encode('utf-8')) + except UnicodeDecodeError: + result.extend(content_text[previous_end:len(content_text)]) - return content_text - return "" + return result def render_payload(self, event): """ render all content matches into one payload value """ - str_payload = "" + byte_payload = bytearray() for modifier in event['attributes']: #logging.debug("Found modifier: %s", modifier) keyword = modifier @@ -494,27 +521,26 @@ def render_payload(self, event): for contentobj in event['contents']: content_value = contentobj['value'] content_type = contentobj['type'] - if (content_type == 'string'): - str_payload = "%s%s" % (str_payload, self.parse_content(content_value)) + if (content_type == 'bytes'): + byte_payload.extend(self.parse_content(content_value)) elif (content_type == 'file'): if ARGS.no_filecontent: # '--no-filecontent' option was passed to flowsynth # This is also checked previously in the code path but adding here too compiler_bailout("The 'filecontent' attribute is not supported in this context.") else: - str_payload = "%s%s" % (str_payload, self.get_file_content(content_value)) - - return str_payload + byte_payload.extend(self.get_file_content(content_value)) + return byte_payload def get_file_content(self, filepath): #we need to strip quotes from the filepath filepath = filepath.strip()[1:-1] - try: - fptr = open(filepath,'r') + fdata = bytearray() + fptr = open(filepath,'rb') fdata = fptr.read() fptr.close() - return fdata.replace('"','\"') + return fdata except IOError: raise SynCompileError("File not found -- %s" % filepath) sys.exit(-1) @@ -541,7 +567,7 @@ def render(self, eventid): #get the payload hasPayload = False - payload = "" + payload = bytearray() total_payload = self.render_payload(event) if len(total_payload) > 0: hasPayload = True @@ -559,7 +585,7 @@ def render(self, eventid): total_payload = total_payload[self.tcp_mss:] else: payload = total_payload - total_payload = "" + total_payload = bytearray() #figure out what the src/dst port and host are @@ -603,8 +629,6 @@ def render(self, eventid): if (len(payload) > 0): tcp_ack = self.to_server_seq - - pkt = None logging.debug("SRC host: %s", src_host) logging.debug("DST host: %s", dst_host) @@ -613,15 +637,10 @@ def render(self, eventid): else: lyr_ip = IPv6(src = src_host, dst = dst_host) lyr_eth = Ether(src = src_mac, dst = dst_mac) - # the 'payload' variable is type str [sic] so here we put the payload into a bytearray - # and pass it to scapy using Raw(). If we don't do this, we can end up with situations - # where bytes (e.g. entered with hex encoding) can get interpreted as multi-byte UTF8 - # when passed to scapy which is not the desired behavior. - payload_bytes = bytearray() - payload_bytes.extend(map(ord,payload)) if (self.l4_proto == Flow.PROTO_UDP): #generate udp packet - lyr_udp = UDP(sport = src_port, dport = dst_port) / Raw(payload_bytes) + # the 'payload' variable is a bytearray so make sure we pass it to scapy with Raw(). + lyr_udp = UDP(sport = src_port, dport = dst_port) / Raw(payload) pkt = lyr_eth / lyr_ip / lyr_udp pkts.append(pkt) @@ -658,7 +677,8 @@ def render(self, eventid): flags = 'PA' logging.debug('Data packet with inferred flags S:%s A:%s', tcp_seq, tcp_ack) - lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / Raw(payload_bytes) + # the 'payload' variable is a bytearray so make sure we pass it to scapy with Raw(). + lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / Raw(payload) pkt = lyr_eth / lyr_ip / lyr_tcp pkts.append(pkt) @@ -1026,15 +1046,21 @@ def add_event(flowname, eventdecl): #has test case def load_syn_file(filename): - """ loads a flowsynth file from disk and returns as a string""" + """ loads a flowsynth file from disk and returns as UTF-8 """ try: filedata = "" - fptr = open(filename,'r') + # support UTF-8 and ASCII of course -- could be seen in "content" data + fptr = open(filename, 'r', encoding='utf-8') filedata = fptr.read() fptr.close() + # Python2 will store this as unicode type; Python3 as (UTF-8) str. + # Encode here for Python2 so shlex doesn't barf on it downstream. + if not isinstance(filedata, str): + filedata = filedata.encode('utf-8') except IOError: compiler_bailout("Cannot open file ('%s')" % filename) - + except UnicodeDecodeError: + compiler_bailout("Unable to decode file as UTF-8 ('%s')" % filename) return filedata #helper function to report runtime errors From 12c2da93645178d758b136445dfa90dba03d2ef3 Mon Sep 17 00:00:00 2001 From: counterthreatunit Date: Sun, 2 Aug 2020 23:36:39 -0400 Subject: [PATCH 4/6] update docs/comments --- README.md | 2 +- src/flowsynth.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index abd25b6..adcd1cb 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ The following event attributes are currently supported: + tcp.flags.rst ##### Content Attribute ##### -The *content* attribute is used to specify the payload of a packet. Content attributes must be enclosed in double quotes. Special characters can be expressed in hex, like: *\x0d\x0a*. Anything prefaced with \x will be converted from hex to its ascii representation. These translation takes place during the render phase. +The *content* attribute is used to specify the payload of a packet. Content attributes must be enclosed in double quotes. UTF-8 is supported and arbitrary bytes can be expressed with the "\xHH" notation where "HH" is the hexidecimal representation of the byte. For example, a carriage return (ASCII 0x0D) followed by a line feed (ASCII 0x0A) can be defined like this: *\x0D\x0A*. This translation takes place during the render phase. Example: diff --git a/src/flowsynth.py b/src/flowsynth.py index 01b061b..fc18395 100755 --- a/src/flowsynth.py +++ b/src/flowsynth.py @@ -479,6 +479,9 @@ def parse_content(self, content): logging.debug("Content: %s (length %d)" % (content_text, len(content_text))) start = 0 previous_end = 0 + # Flowsynth supports encoding arbitrary bytes with the "\xHH" notation where "HH" is + # the hexidecimal representation of the byte. That is what is handled here, while + # maintaining the rest of the content data as UTF-8. for hex_replacement in re.finditer(r"\\x[a-fA-F0-9]{2}", content_text): # try/catch blocks to deal with different data representation from shlex (depends on Python version) start = hex_replacement.start(0) From 033296861d712fbf138227bdc249990c4a2b916a Mon Sep 17 00:00:00 2001 From: counterthreatunit Date: Tue, 4 Aug 2020 16:37:32 -0400 Subject: [PATCH 5/6] add dns request example --- examples/dns-request.fs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 examples/dns-request.fs diff --git a/examples/dns-request.fs b/examples/dns-request.fs new file mode 100644 index 0000000..4a05361 --- /dev/null +++ b/examples/dns-request.fs @@ -0,0 +1,31 @@ +flow dns_request udp 10.200.31.12:11234 > 8.8.8.8:53; + +dns_request > ( + # transaction ID (should be random two bytes) + content:"\xBA\xBE"; + + # flags; set as appropriate (see RFC) + content:"\x01\x00"; + + # Number of questions + content:"\x00\x01"; + + # answer resource records + content:"\x00\x00"; + + # authority resource records + content:"\x00\x00"; + + # additional resource records + content:"\x00\x00"; + + # queries + # name (len, value, len, value, ... null) + content:"\x05linux\x16georgepburdell-desktop\x04corp\x04acme\x03com\x00"; + + # type (\x0001 is A) + content:"\x00\x01"; + + # class (0x0001 is IN/Internet) + content:"\x00\x01"; +); From e5a2848097080160cd1c756b4ce5fc29020fb616 Mon Sep 17 00:00:00 2001 From: counterthreatunit Date: Tue, 4 Aug 2020 16:43:42 -0400 Subject: [PATCH 6/6] incorporating suggestions from PR 25 (or at least half of them) --- src/flowsynth.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/flowsynth.py b/src/flowsynth.py index fc18395..8f8a32b 100755 --- a/src/flowsynth.py +++ b/src/flowsynth.py @@ -19,7 +19,7 @@ author: Will Urbanski """ - +#from __future__ import print_function import argparse import logging @@ -40,6 +40,7 @@ #global variables APP_VERSION_STRING = "1.4.0" +__version__ = APP_VERSION_STRING LOGGING_LEVEL = logging.INFO ARGS = None