Skip to content

Commit

Permalink
Cache last item/result for CasaArrayWrapper.__getitem__
Browse files Browse the repository at this point in the history
  • Loading branch information
astrofrog committed Nov 25, 2021
1 parent 100ee40 commit c0ceb26
Showing 1 changed file with 24 additions and 7 deletions.
31 changes: 24 additions & 7 deletions casa_formats_io/casa_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,20 @@ def __init__(self, filename, totalshape, chunkshape, chunkoversample=None,
self._array = np.unpackbits(np.fromfile(filename, dtype='uint8'), bitorder='little')
else:
self._array = np.fromfile(filename, dtype=np.uint8)
self._last_item = None
self._last_result = None

def __getitem__(self, item):

# dask does not cache calls to __getitem__ so in some cases might call it twice with
# the same item - rather than try and cache multiple possible items we implement a
# simple caching strategy of at least ensuring that successive calls with the same
# input are cached (https://github.com/dask/dask/issues/8420).
# This is important for example for Table.__repr__ which accesses the cells
# one by one.
if item == self._last_item:
return self._last_result

# TODO: potentially normalize item, for now assume it is a list of slice objects

indices = []
Expand Down Expand Up @@ -99,7 +110,7 @@ def __getitem__(self, item):
shape=self._chunkshape,
oversample=self._chunkoversample)[:self._chunksize]

return chunk.reshape(self._chunkshape[::-1], order='F').T[item_in_chunk].astype(np.bool_)
result = chunk.reshape(self._chunkshape[::-1], order='F').T[item_in_chunk].astype(np.bool_)

else:

Expand All @@ -111,12 +122,18 @@ def __getitem__(self, item):
data_bytes = self._array[chunk_number*self._chunksize * self._itemsize:
(chunk_number + 1)*self._chunksize * self._itemsize]

return (combine_chunks(data_bytes,
self._itemsize,
shape=self._chunkshape,
oversample=self._chunkoversample)
.view(self.dtype)
.reshape(self._chunkshape[::-1], order='F').T[item_in_chunk])
result = (combine_chunks(data_bytes,
self._itemsize,
shape=self._chunkshape,
oversample=self._chunkoversample)
.view(self.dtype)
.reshape(self._chunkshape[::-1], order='F').T[item_in_chunk])

self._last_item = item
self._last_result = result

return result



def image_to_dask(imagename, memmap=True, mask=False, target_chunksize=None):
Expand Down

0 comments on commit c0ceb26

Please sign in to comment.