Merge pull request #26 from counterthreatunit/hex-bug1

Payload Endoding issue (multi-byte UTF8)
secureworks · Aug 6, 2020 · 8879c69 · 8879c69
2 parents b47dad8 + 5e0decd
commit 8879c69
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -179,7 +179,7 @@ The following event attributes are currently supported:
 +   tcp.flags.rst
 
 ##### Content Attribute #####
-The *content* attribute is used to specify the payload of a packet. Content attributes must be enclosed in double quotes. Special characters can be expressed in hex, like: *\x0d\x0a*. Anything prefaced with \x will be converted from hex to its ascii representation. These translation takes place during the render phase.
+The *content* attribute is used to specify the payload of a packet. Content attributes must be enclosed in double quotes. UTF-8 is supported and arbitrary bytes can be expressed with the "\xHH" notation where "HH" is the hexidecimal representation of the byte. For example, a carriage return (ASCII 0x0D) followed by a line feed (ASCII 0x0A) can be defined like this: *\x0D\x0A*.  This translation takes place during the render phase.
 
 Example:
 

diff --git a/examples/dns-request.fs b/examples/dns-request.fs
@@ -0,0 +1,31 @@
+flow dns_request udp 10.200.31.12:11234 > 8.8.8.8:53;
+
+dns_request > (
+    # transaction ID (should be random two bytes)
+    content:"\xBA\xBE";
+
+    # flags; set as appropriate (see RFC)
+    content:"\x01\x00";
+
+    # Number of questions
+    content:"\x00\x01";
+
+    # answer resource records
+    content:"\x00\x00";
+
+    # authority resource records
+    content:"\x00\x00";
+
+    # additional resource records
+    content:"\x00\x00";
+
+    # queries
+    # name (len, value, len, value, ... null)
+    content:"\x05linux\x16georgepburdell-desktop\x04corp\x04acme\x03com\x00";
+
+    # type (\x0001 is A)
+    content:"\x00\x01";
+
+    # class (0x0001 is IN/Internet)
+    content:"\x00\x01";
+);
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="flowsynth",
-    version="1.3.1",
+    version="1.4.0",
     author="Will Urbanski",
     maintainer="David Wharton",
     maintainer_email="[email protected]",

diff --git a/src/flowsynth.py b/src/flowsynth.py
@@ -29,15 +29,16 @@
 import socket
 import time
 import json
+from io import open
 
 #include scapy; suppress all errors
 logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
 logging.getLogger("scapy.interactive").setLevel(logging.ERROR)
 logging.getLogger("scapy.loading").setLevel(logging.ERROR)
-from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap
+from scapy.all import Ether, IP, IPv6, TCP, UDP, RandMAC, hexdump, wrpcap, Raw
 
 #global variables
-APP_VERSION_STRING = "1.3.1"
+APP_VERSION_STRING = "1.4.0"
 # Define the standard version indicator.
 __version__ = APP_VERSION_STRING
 LOGGING_LEVEL = logging.INFO
@@ -238,7 +239,6 @@ def lex_flow(self, tokens):
 
             while tokens[0] != ";":
                 token = tokens[0]
-                #print "token is %s" % token
                 if (token == ")"):
                     #end of attribute spec. jump forward two (should always be ');')
                     tokens = tokens[1:]
@@ -311,7 +311,6 @@ def lex_event(self, tokens):
 
             while tokens[0] != ";":
                 token = tokens[0]
-                #print "token is %s" % token
                 if (token == ")"):
                     #end of attribute spec. jump forward two (should always be ');')
                     tokens = tokens[1:]
@@ -346,7 +345,7 @@ def lex_event(self, tokens):
 
                 if (modifier_key.lower() == 'content'):
                     #content
-                    eventdecl['contents'].append({'type': 'string', 'value': modifier_value})
+                    eventdecl['contents'].append({'type': 'bytes', 'value': modifier_value})
                 elif (modifier_key.lower() == 'filecontent'):
                     #filecontent
                     if ARGS.no_filecontent:
@@ -467,25 +466,56 @@ def _valid_mac(self, mac):
     #This function expects all inputs to be enclosed within double quotes
     def parse_content(self, content):
         """ parse and render a content keyword """
+
+        # this regex is somewhat gnarly but leaving for now ... (why not
+        # just strip off double quotes on ends)?
         pcre_text = r'"([^\\"]*(?:\\.[^\\"]*)*)"'
 
+        result = bytearray()
 
         #first, check for text
         mo_text = re.match(pcre_text, content)
         if (mo_text != None):
-            logging.debug("Content: %s", mo_text.group(1))
-
             content_text = mo_text.group(1)
-            replacements = re.findall(r"\\x[a-fA-F0-9]{2}", content_text)
-            for replacement in replacements:
-                content_text = content_text.replace(replacement, chr(int(replacement[2:], 16)))
+            logging.debug("Content: %s (length %d)" % (content_text, len(content_text)))
+            start = 0
+            previous_end = 0
+            # Flowsynth supports encoding arbitrary bytes with the "\xHH" notation where "HH" is
+            # the hexidecimal representation of the byte. That is what is handled here, while
+            # maintaining the rest of the content data as UTF-8.
+            for hex_replacement in re.finditer(r"\\x[a-fA-F0-9]{2}", content_text):
+                # try/catch blocks to deal with different data representation from shlex (depends on Python version)
+                start = hex_replacement.start(0)
+                end = hex_replacement.end(0)
+                ascii_hex = content_text[start+2:start+4]
+                previous_substring = content_text[previous_end:start]
+                if len(previous_substring) > 0:
+                    # extend result with previous substring; encode as UTF-8
+                    try:
+                        result.extend(previous_substring.encode('utf-8'))
+                    except UnicodeDecodeError:
+                        result.extend(previous_substring)
+                # append ASCII hex byte to result
+                result.extend(bytearray.fromhex(ascii_hex))
+                previous_end = end
+            if previous_end == 0:
+                # no hex encoding found, just encode the whole thing
+                try:
+                    result.extend(content_text.encode('utf-8'))
+                except UnicodeDecodeError:
+                    result.extend(content_text)
+            elif previous_end < len(content_text):
+                # add the last substring
+                try:
+                    result.extend(content_text[previous_end:len(content_text)].encode('utf-8'))
+                except UnicodeDecodeError:
+                    result.extend(content_text[previous_end:len(content_text)])
 
-            return content_text
-        return ""
+        return result
 
     def render_payload(self, event):
         """ render all content matches into one payload value """
-        str_payload = ""
+        byte_payload = bytearray()
         for modifier in event['attributes']:
             #logging.debug("Found modifier: %s", modifier)
             keyword = modifier
@@ -495,27 +525,26 @@ def render_payload(self, event):
             for contentobj in event['contents']:
                 content_value = contentobj['value']
                 content_type = contentobj['type']
-                if (content_type == 'string'):
-                    str_payload = "%s%s" % (str_payload, self.parse_content(content_value))
+                if (content_type == 'bytes'):
+                    byte_payload.extend(self.parse_content(content_value))
                 elif (content_type == 'file'):
                     if ARGS.no_filecontent:
                         # '--no-filecontent' option was passed to flowsynth
                         # This is also checked previously in the code path but adding here too
                         compiler_bailout("The 'filecontent' attribute is not supported in this context.")
                     else:
-                        str_payload = "%s%s" % (str_payload, self.get_file_content(content_value))
-
-        return str_payload
+                        byte_payload.extend(self.get_file_content(content_value))
+        return byte_payload
 
     def get_file_content(self, filepath):
         #we need to strip quotes from the filepath
         filepath = filepath.strip()[1:-1]
-
         try:
-            fptr = open(filepath,'r')
+            fdata = bytearray()
+            fptr = open(filepath,'rb')
             fdata = fptr.read()
             fptr.close()
-            return fdata.replace('"','\"')
+            return fdata
         except IOError:
             raise SynCompileError("File not found -- %s" % filepath)
             sys.exit(-1)
@@ -542,7 +571,7 @@ def render(self, eventid):
 
         #get the payload
         hasPayload = False
-        payload = ""
+        payload = bytearray()
         total_payload = self.render_payload(event)
         if len(total_payload) > 0:
             hasPayload = True
@@ -560,7 +589,7 @@ def render(self, eventid):
                     total_payload = total_payload[self.tcp_mss:]
                 else:
                     payload = total_payload
-                    total_payload = ""
+                    total_payload = bytearray()
 
             #figure out what the src/dst port and host are
 
@@ -604,8 +633,6 @@ def render(self, eventid):
                 if (len(payload) > 0):
                     tcp_ack = self.to_server_seq
 
-
-
             pkt = None
             logging.debug("SRC host: %s", src_host)
             logging.debug("DST host: %s", dst_host)
@@ -616,9 +643,12 @@ def render(self, eventid):
             lyr_eth = Ether(src = src_mac, dst = dst_mac)
             if (self.l4_proto == Flow.PROTO_UDP):
                 #generate udp packet
-                lyr_udp = UDP(sport = src_port, dport = dst_port) / payload
+                # the 'payload' variable is a bytearray so make sure we pass it to scapy with Raw().
+                lyr_udp = UDP(sport = src_port, dport = dst_port) / Raw(payload)
                 pkt = lyr_eth / lyr_ip / lyr_udp
                 pkts.append(pkt)
+
+                logging.debug("Payload size is: %d" % len(payload))
             else:
                 #generate tcp packet
                 logging.debug("TCP Packet")
@@ -651,11 +681,12 @@ def render(self, eventid):
                     flags = 'PA'
 
                 logging.debug('Data packet with inferred flags S:%s A:%s', tcp_seq, tcp_ack)
-                lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / payload
+                # the 'payload' variable is a bytearray so make sure we pass it to scapy with Raw().
+                lyr_tcp = TCP(flags=flags, seq=tcp_seq, ack=tcp_ack, sport = src_port, dport = dst_port) / Raw(payload)
                 pkt = lyr_eth / lyr_ip / lyr_tcp
                 pkts.append(pkt)
 
-                logging.debug("Payload size is: %s" % len(payload))
+                logging.debug("Payload size is: %d" % len(payload))
                 logging.debug("tcp_seq is %s" % tcp_seq)
                 logging.debug("tcp_ack is %s" % tcp_ack)
                 payload_size = len(payload)
@@ -713,6 +744,8 @@ def render(self, eventid):
 def parse_cmd_line():
     """ use ArgumentParser to parse command line arguments """
 
+    global LOGGING_LEVEL
+
     app_description = "FlowSynth v%s\nWill Urbanski <[email protected]>\n\na tool for rapidly modeling network traffic" % APP_VERSION_STRING
 
     parser = argparse.ArgumentParser(description=app_description, formatter_class = argparse.RawTextHelpFormatter)
@@ -1017,15 +1050,21 @@ def add_event(flowname, eventdecl):
 
 #has test case
 def load_syn_file(filename):
-    """ loads a flowsynth file from disk and returns as a string"""
+    """ loads a flowsynth file from disk and returns as UTF-8 """
     try:
         filedata = ""
-        fptr = open(filename,'r')
+        # support UTF-8 and ASCII of course -- could be seen in "content" data
+        fptr = open(filename, 'r', encoding='utf-8')
         filedata = fptr.read()
         fptr.close()
+        # Python2 will store this as unicode type; Python3 as (UTF-8) str.
+        # Encode here for Python2 so shlex doesn't barf on it downstream.
+        if not isinstance(filedata, str):
+            filedata = filedata.encode('utf-8')
     except IOError:
         compiler_bailout("Cannot open file ('%s')" % filename)
-
+    except UnicodeDecodeError:
+        compiler_bailout("Unable to decode file as UTF-8 ('%s')" % filename)
     return filedata
 
 #helper function to report runtime errors