Skip to content

Commit

Permalink
Support unpacking 128/256/etc-bit words. (#249)
Browse files Browse the repository at this point in the history
  • Loading branch information
ltfish authored Apr 28, 2020
1 parent e75c03b commit 4cd8f42
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
22 changes: 22 additions & 0 deletions cle/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
import struct
from typing import Tuple, Union, List

import archinfo

__all__ = ('ClemoryBase', 'Clemory', 'ClemoryView')


class ClemoryBase:
__slots__ = ('_arch', '_pointer')
def __init__(self, arch):
Expand Down Expand Up @@ -62,6 +65,25 @@ def unpack_word(self, addr, size=None, signed=False, endness=None):
:param bool signed: Whether the data should be extracted signed/unsigned. Default unsigned
:param archinfo.Endness endness: The endian to use in packing/unpacking. Defaults to memory endness
"""
if size is not None and size > 8:
# support larger wordsizes via recursive algorithm
subsize = size >> 1
if size != subsize << 1:
raise ValueError("Cannot unpack non-power-of-two sizes")

if endness is None:
endness = self._arch.memory_endness
if endness == archinfo.Endness.BE:
lo_off, hi_off = subsize, 0
elif endness == archinfo.Endness.LE:
lo_off, hi_off = 0, subsize
else:
raise ValueError("Unsupported endness value %s." % endness)

lo = self.unpack_word(addr + lo_off, size=subsize, signed=False, endness=endness)
hi = self.unpack_word(addr + hi_off, size=subsize, signed=signed, endness=endness)
return (hi << (subsize << 3)) | lo

return self.unpack(addr, self._arch.struct_fmt(size=size, signed=signed, endness=endness))[0]

def pack(self, addr, fmt, *data):
Expand Down
67 changes: 67 additions & 0 deletions tests/test_unpackword.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@

import nose
from io import BytesIO

import archinfo
import cle


def test_unpackword():
# Make sure the base address behaves as expected regardless of whether offset is specified or not.

BASE_ADDR = 0x8000000
ENTRYPOINT = 0x8000000

blob = BytesIO(b"\x37\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
b"\xfd\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe")
ld = cle.Loader(blob, main_opts={
'backend': 'blob',
'base_addr': BASE_ADDR,
'entry_point': ENTRYPOINT,
'arch': "x86",
'offset': 0,
})

# little endian
byt = ld.memory.unpack_word(BASE_ADDR, 1)
nose.tools.assert_equal(byt, 0x37)
short = ld.memory.unpack_word(BASE_ADDR, 2)
nose.tools.assert_equal(short, 0x0137)
long = ld.memory.unpack_word(BASE_ADDR, 4)
nose.tools.assert_equal(long, 0x03020137)
quad = ld.memory.unpack_word(BASE_ADDR, 8)
nose.tools.assert_equal(quad, 0x0706050403020137)
xmmword = ld.memory.unpack_word(BASE_ADDR, 16)
nose.tools.assert_equal(xmmword, 0x0f0e0d0c0b0a09080706050403020137)

# big endian
byt = ld.memory.unpack_word(BASE_ADDR, 1, endness=archinfo.Endness.BE)
nose.tools.assert_equal(byt, 0x37)
short = ld.memory.unpack_word(BASE_ADDR, 2, endness=archinfo.Endness.BE)
nose.tools.assert_equal(short, 0x3701)
long = ld.memory.unpack_word(BASE_ADDR, 4, endness=archinfo.Endness.BE)
nose.tools.assert_equal(long, 0x37010203)
quad = ld.memory.unpack_word(BASE_ADDR, 8, endness=archinfo.Endness.BE)
nose.tools.assert_equal(quad, 0x3701020304050607)
xmmword = ld.memory.unpack_word(BASE_ADDR, 16, endness=archinfo.Endness.BE)
nose.tools.assert_equal(xmmword, 0x370102030405060708090a0b0c0d0e0f)

# signed xmmword
xmmword = ld.memory.unpack_word(BASE_ADDR + 16, 16, endness=archinfo.Endness.BE, signed=True)
nose.tools.assert_equal(xmmword, 0xfdfffffffffffffffffffffffffffffe - 2**128)
xmmword = ld.memory.unpack_word(BASE_ADDR + 16, 16, endness=archinfo.Endness.LE, signed=True)
nose.tools.assert_equal(xmmword, 0xfefffffffffffffffffffffffffffffd - 2**128)

# ymmword
ymmword = ld.memory.unpack_word(BASE_ADDR, 32, endness=archinfo.Endness.BE, signed=False)
nose.tools.assert_equal(ymmword, 0x370102030405060708090a0b0c0d0e0ffdfffffffffffffffffffffffffffffe)
ymmword = ld.memory.unpack_word(BASE_ADDR, 32, endness=archinfo.Endness.BE, signed=True)
nose.tools.assert_equal(ymmword, 0x370102030405060708090a0b0c0d0e0ffdfffffffffffffffffffffffffffffe)
ymmword = ld.memory.unpack_word(BASE_ADDR, 32, endness=archinfo.Endness.LE, signed=False)
nose.tools.assert_equal(ymmword, 0xfefffffffffffffffffffffffffffffd0f0e0d0c0b0a09080706050403020137)
ymmword = ld.memory.unpack_word(BASE_ADDR, 32, endness=archinfo.Endness.LE, signed=True)
nose.tools.assert_equal(ymmword, 0xfefffffffffffffffffffffffffffffd0f0e0d0c0b0a09080706050403020137 - 2**256)


if __name__ == "__main__":
test_unpackword()

0 comments on commit 4cd8f42

Please sign in to comment.