-
Notifications
You must be signed in to change notification settings - Fork 267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore(Python): Improve slicing performance #6042
base: master
Are you sure you want to change the base?
Changes from 9 commits
479be8b
848f6c7
2a01d07
bf615d5
6d2ed2d
263be55
9852126
b019fca
58fa9f7
a7c5a28
2167e55
b9b8b97
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -116,11 +116,71 @@ def flatten(self): | |||||
e = q.pop() | ||||||
if isinstance(e, list): | ||||||
l += e | ||||||
elif isinstance(e, _SeqSlice): | ||||||
l += e | ||||||
elif isinstance(e, Concat): | ||||||
q.append(e.r) | ||||||
q.append(e.l) | ||||||
return l | ||||||
|
||||||
class _SeqSlice: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a little strange, that we have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think ideally, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine either way |
||||||
""" | ||||||
Internal class enabling constant time slices of Seqs. | ||||||
This should only be used internally from the Seq class when a Seq is sliced. | ||||||
This class assumes the source data is immutable, which is true for Seqs. | ||||||
""" | ||||||
def __init__(self, source, start=0, stop=None, step=1): | ||||||
if isinstance(source, _SeqSlice): | ||||||
# A SeqSlice constructed from a SeqSlice shares the same underlying source list, | ||||||
self._source = source._source | ||||||
# but updates its indices based on the original SeqSlice's indices: | ||||||
self._start = source._start + start * source._step | ||||||
self._step = source._step * step | ||||||
self._stop = ( | ||||||
source._start + (stop * source._step if stop is not None else len(source) * source._step) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Right? |
||||||
if stop is not None | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Flip? |
||||||
else source._stop | ||||||
) | ||||||
else: | ||||||
# source will not change if it is constructed from a Seq because Dafny Seqs are immutable. | ||||||
self._source = source | ||||||
self._start = start | ||||||
self._stop = len(source) if stop is None else stop | ||||||
self._step = step | ||||||
|
||||||
def __getitem__(self, index): | ||||||
if isinstance(index, slice): | ||||||
# Slice in constant time by returning a reference to the source list with updated indices | ||||||
start, stop, step = index.indices(len(self)) | ||||||
return _SeqSlice( | ||||||
self._source, | ||||||
self._start + start * self._step, | ||||||
self._start + stop * self._step, | ||||||
self._step * step, | ||||||
) | ||||||
# Access the corresponding element in the source list | ||||||
return self._source[self._start + index * self._step] | ||||||
|
||||||
def __len__(self): | ||||||
# Constant-time len | ||||||
return max(0, (self._stop - self._start + (self._step - 1)) // self._step) | ||||||
|
||||||
def __iter__(self): | ||||||
for i in range(self._start, self._stop, self._step): | ||||||
yield self._source[i] | ||||||
|
||||||
def __repr__(self): | ||||||
return f"_SeqSlice({list(self)})" | ||||||
|
||||||
def __contains__(self, item): | ||||||
return any(x == item for x in self) | ||||||
|
||||||
def index(self, value): | ||||||
for i, x in enumerate(self): | ||||||
if x == value: | ||||||
return i | ||||||
raise ValueError(f"{value} is not in list") | ||||||
Comment on lines
+172
to
+182
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are those necessary? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be leftover from debugging. I probably don't need these anymore. Will clean up. |
||||||
|
||||||
class Seq: | ||||||
def __init__(self, iterable = None, isStr = False): | ||||||
''' | ||||||
|
@@ -135,8 +195,23 @@ def __init__(self, iterable = None, isStr = False): | |||||
See docs/Compilation/StringsAndChars.md. | ||||||
''' | ||||||
|
||||||
self.elems = iterable if isinstance(iterable, Concat) else (list(iterable) if iterable is not None else []) | ||||||
self.len = len(self.elems) | ||||||
if isinstance(iterable, Seq): | ||||||
# Seqs' elements are immutable. | ||||||
# The new Seq can reference the original Seq's properties. | ||||||
self.elems = iterable.elems | ||||||
self.len = iterable.len | ||||||
self.isStr = iterable.isStr | ||||||
lucasmcdonald3 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
elif isinstance(iterable, _SeqSlice): | ||||||
# SeqSlices are lazy slices. | ||||||
# Accessing self.elems returns the underlying SeqSlice in constant time. | ||||||
# Turning this into a list, or accessing self.Elements, returns a list of the SeqSlice's elements in linear time. | ||||||
self.elems = iterable | ||||||
self.len = len(iterable) | ||||||
self.isStr = isStr | ||||||
lucasmcdonald3 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
else: | ||||||
self.elems = iterable if isinstance(iterable, Concat) else (list(iterable) if iterable is not None else []) | ||||||
self.len = len(self.elems) | ||||||
|
||||||
if isStr is None: | ||||||
self.isStr = None | ||||||
else: | ||||||
|
@@ -151,6 +226,8 @@ def __init__(self, iterable = None, isStr = False): | |||||
def Elements(self): | ||||||
if isinstance(self.elems, Concat): | ||||||
self.elems = self.elems.flatten() | ||||||
if isinstance(self.elems, _SeqSlice): | ||||||
self.elems = list(self.elems) | ||||||
return self.elems | ||||||
|
||||||
@property | ||||||
|
@@ -177,8 +254,11 @@ def __add__(self, other): | |||||
|
||||||
def __getitem__(self, key): | ||||||
if isinstance(key, slice): | ||||||
indices = range(*key.indices(len(self))) | ||||||
return Seq((self.Elements[i] for i in indices), isStr=self.isStr) | ||||||
start, stop, step = key.indices(len(self)) | ||||||
if isinstance(self.elems, _SeqSlice): | ||||||
# Avoiding .Elements call on SeqSlice avoids creating a list, which is expensive | ||||||
return Seq(_SeqSlice(self.elems, start=start, stop=stop, step=step), isStr=self.isStr) | ||||||
return Seq(_SeqSlice(self.Elements, start=start, stop=stop, step=step), isStr=self.isStr) | ||||||
lucasmcdonald3 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
return self.Elements.__getitem__(key) | ||||||
|
||||||
def set(self, key, value): | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How do you feel about merging that with the previous case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fine either way -- I'll leave it up to you
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I prefer merging