diff --git a/README.md b/README.md index abd25b6..adcd1cb 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ The following event attributes are currently supported: + tcp.flags.rst ##### Content Attribute ##### -The *content* attribute is used to specify the payload of a packet. Content attributes must be enclosed in double quotes. Special characters can be expressed in hex, like: *\x0d\x0a*. Anything prefaced with \x will be converted from hex to its ascii representation. These translation takes place during the render phase. +The *content* attribute is used to specify the payload of a packet. Content attributes must be enclosed in double quotes. UTF-8 is supported and arbitrary bytes can be expressed with the "\xHH" notation where "HH" is the hexidecimal representation of the byte. For example, a carriage return (ASCII 0x0D) followed by a line feed (ASCII 0x0A) can be defined like this: *\x0D\x0A*. This translation takes place during the render phase. Example: diff --git a/examples/dns-request.fs b/examples/dns-request.fs new file mode 100644 index 0000000..4a05361 --- /dev/null +++ b/examples/dns-request.fs @@ -0,0 +1,31 @@ +flow dns_request udp 10.200.31.12:11234 > 8.8.8.8:53; + +dns_request > ( + # transaction ID (should be random two bytes) + content:"\xBA\xBE"; + + # flags; set as appropriate (see RFC) + content:"\x01\x00"; + + # Number of questions + content:"\x00\x01"; + + # answer resource records + content:"\x00\x00"; + + # authority resource records + content:"\x00\x00"; + + # additional resource records + content:"\x00\x00"; + + # queries + # name (len, value, len, value, ... null) + content:"\x05linux\x16georgepburdell-desktop\x04corp\x04acme\x03com\x00"; + + # type (\x0001 is A) + content:"\x00\x01"; + + # class (0x0001 is IN/Internet) + content:"\x00\x01"; +); diff --git a/setup.py b/setup.py index 728a534..7f0f4a2 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="flowsynth", - version="1.3.1", + version="1.4.0", author="Will Urbanski", maintainer="David Wharton", maintainer_email="counterthreatunit@users.noreply.github.com", diff --git a/src/flowsynth.py b/src/flowsynth.py index 2679fbb..fa8c91b 100755 --- a/src/flowsynth.py +++ b/src/flowsynth.py @@ -29,15 +29,16 @@ import socket import time import json +from io import open #include scapy; suppress all errors logging.getLogger("scapy.runtime").setLevel(logging.ERROR) logging.getLogger("scapy.interactive").setLevel(logging.ERROR) logging.getLogger("scapy.loading").setLevel(logging.ERROR) -from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap +from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap, Raw #global variables -APP_VERSION_STRING = "1.3.1" +APP_VERSION_STRING = "1.4.0" # Define the standard version indicator. __version__ = APP_VERSION_STRING LOGGING_LEVEL = logging.INFO @@ -238,7 +239,6 @@ def lex_flow(self, tokens): while tokens[0] != ";": token = tokens[0] - #print "token is %s" % token if (token == ")"): #end of attribute spec. jump forward two (should always be ');') tokens = tokens[1:] @@ -311,7 +311,6 @@ def lex_event(self, tokens): while tokens[0] != ";": token = tokens[0] - #print "token is %s" % token if (token == ")"): #end of attribute spec. jump forward two (should always be ');') tokens = tokens[1:] @@ -346,7 +345,7 @@ def lex_event(self, tokens): if (modifier_key.lower() == 'content'): #content - eventdecl['contents'].append({'type': 'string', 'value': modifier_value}) + eventdecl['contents'].append({'type': 'bytes', 'value': modifier_value}) elif (modifier_key.lower() == 'filecontent'): #filecontent if ARGS.no_filecontent: @@ -467,25 +466,56 @@ def _valid_mac(self, mac): #This function expects all inputs to be enclosed within double quotes def parse_content(self, content): """ parse and render a content keyword """ + + # this regex is somewhat gnarly but leaving for now ... (why not + # just strip off double quotes on ends)? pcre_text = r'"([^\\"]*(?:\\.[^\\"]*)*)"' + result = bytearray() #first, check for text mo_text = re.match(pcre_text, content) if (mo_text != None): - logging.debug("Content: %s", mo_text.group(1)) - content_text = mo_text.group(1) - replacements = re.findall(r"\\x[a-fA-F0-9]{2}", content_text) - for replacement in replacements: - content_text = content_text.replace(replacement, chr(int(replacement[2:], 16))) + logging.debug("Content: %s (length %d)" % (content_text, len(content_text))) + start = 0 + previous_end = 0 + # Flowsynth supports encoding arbitrary bytes with the "\xHH" notation where "HH" is + # the hexidecimal representation of the byte. That is what is handled here, while + # maintaining the rest of the content data as UTF-8. + for hex_replacement in re.finditer(r"\\x[a-fA-F0-9]{2}", content_text): + # try/catch blocks to deal with different data representation from shlex (depends on Python version) + start = hex_replacement.start(0) + end = hex_replacement.end(0) + ascii_hex = content_text[start+2:start+4] + previous_substring = content_text[previous_end:start] + if len(previous_substring) > 0: + # extend result with previous substring; encode as UTF-8 + try: + result.extend(previous_substring.encode('utf-8')) + except UnicodeDecodeError: + result.extend(previous_substring) + # append ASCII hex byte to result + result.extend(bytearray.fromhex(ascii_hex)) + previous_end = end + if previous_end == 0: + # no hex encoding found, just encode the whole thing + try: + result.extend(content_text.encode('utf-8')) + except UnicodeDecodeError: + result.extend(content_text) + elif previous_end < len(content_text): + # add the last substring + try: + result.extend(content_text[previous_end:len(content_text)].encode('utf-8')) + except UnicodeDecodeError: + result.extend(content_text[previous_end:len(content_text)]) - return content_text - return "" + return result def render_payload(self, event): """ render all content matches into one payload value """ - str_payload = "" + byte_payload = bytearray() for modifier in event['attributes']: #logging.debug("Found modifier: %s", modifier) keyword = modifier @@ -495,27 +525,26 @@ def render_payload(self, event): for contentobj in event['contents']: content_value = contentobj['value'] content_type = contentobj['type'] - if (content_type == 'string'): - str_payload = "%s%s" % (str_payload, self.parse_content(content_value)) + if (content_type == 'bytes'): + byte_payload.extend(self.parse_content(content_value)) elif (content_type == 'file'): if ARGS.no_filecontent: # '--no-filecontent' option was passed to flowsynth # This is also checked previously in the code path but adding here too compiler_bailout("The 'filecontent' attribute is not supported in this context.") else: - str_payload = "%s%s" % (str_payload, self.get_file_content(content_value)) - - return str_payload + byte_payload.extend(self.get_file_content(content_value)) + return byte_payload def get_file_content(self, filepath): #we need to strip quotes from the filepath filepath = filepath.strip()[1:-1] - try: - fptr = open(filepath,'r') + fdata = bytearray() + fptr = open(filepath,'rb') fdata = fptr.read() fptr.close() - return fdata.replace('"','\"') + return fdata except IOError: raise SynCompileError("File not found -- %s" % filepath) sys.exit(-1) @@ -542,7 +571,7 @@ def render(self, eventid): #get the payload hasPayload = False - payload = "" + payload = bytearray() total_payload = self.render_payload(event) if len(total_payload) > 0: hasPayload = True @@ -560,7 +589,7 @@ def render(self, eventid): total_payload = total_payload[self.tcp_mss:] else: payload = total_payload - total_payload = "" + total_payload = bytearray() #figure out what the src/dst port and host are @@ -604,8 +633,6 @@ def render(self, eventid): if (len(payload) > 0): tcp_ack = self.to_server_seq - - pkt = None logging.debug("SRC host: %s", src_host) logging.debug("DST host: %s", dst_host) @@ -616,9 +643,12 @@ def render(self, eventid): lyr_eth = Ether(src = src_mac, dst = dst_mac) if (self.l4_proto == Flow.PROTO_UDP): #generate udp packet - lyr_udp = UDP(sport = src_port, dport = dst_port) / payload + # the 'payload' variable is a bytearray so make sure we pass it to scapy with Raw(). + lyr_udp = UDP(sport = src_port, dport = dst_port) / Raw(payload) pkt = lyr_eth / lyr_ip / lyr_udp pkts.append(pkt) + + logging.debug("Payload size is: %d" % len(payload)) else: #generate tcp packet logging.debug("TCP Packet") @@ -651,11 +681,12 @@ def render(self, eventid): flags = 'PA' logging.debug('Data packet with inferred flags S:%s A:%s', tcp_seq, tcp_ack) - lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / payload + # the 'payload' variable is a bytearray so make sure we pass it to scapy with Raw(). + lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / Raw(payload) pkt = lyr_eth / lyr_ip / lyr_tcp pkts.append(pkt) - logging.debug("Payload size is: %s" % len(payload)) + logging.debug("Payload size is: %d" % len(payload)) logging.debug("tcp_seq is %s" % tcp_seq) logging.debug("tcp_ack is %s" % tcp_ack) payload_size = len(payload) @@ -713,6 +744,8 @@ def render(self, eventid): def parse_cmd_line(): """ use ArgumentParser to parse command line arguments """ + global LOGGING_LEVEL + app_description = "FlowSynth v%s\nWill Urbanski \n\na tool for rapidly modeling network traffic" % APP_VERSION_STRING parser = argparse.ArgumentParser(description=app_description, formatter_class = argparse.RawTextHelpFormatter) @@ -1017,15 +1050,21 @@ def add_event(flowname, eventdecl): #has test case def load_syn_file(filename): - """ loads a flowsynth file from disk and returns as a string""" + """ loads a flowsynth file from disk and returns as UTF-8 """ try: filedata = "" - fptr = open(filename,'r') + # support UTF-8 and ASCII of course -- could be seen in "content" data + fptr = open(filename, 'r', encoding='utf-8') filedata = fptr.read() fptr.close() + # Python2 will store this as unicode type; Python3 as (UTF-8) str. + # Encode here for Python2 so shlex doesn't barf on it downstream. + if not isinstance(filedata, str): + filedata = filedata.encode('utf-8') except IOError: compiler_bailout("Cannot open file ('%s')" % filename) - + except UnicodeDecodeError: + compiler_bailout("Unable to decode file as UTF-8 ('%s')" % filename) return filedata #helper function to report runtime errors