Skip to content

Commit

Permalink
Add unique functions to algorithms.nim
Browse files Browse the repository at this point in the history
These functions are similar to but not as fully featured as `numpy.unique`.
They are missing a way to count the number or returning the indexes of the unique elements.
However, they make it possibel to (optionally) sort the output, or to use a more efficient algorithm if the input is already sorted.
  • Loading branch information
AngelEzquerra committed Apr 16, 2024
1 parent d21362a commit af64112
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 0 deletions.
77 changes: 77 additions & 0 deletions src/arraymancer/tensor/algorithms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,80 @@ proc argsort*[T](t: Tensor[T], order = SortOrder.Ascending, toCopy = false): Ten
result = newTensorUninit[int](t.shape)
for i in 0 ..< t.size:
result[i] = tups[i][1]

proc unique*[T](t: Tensor[T], isSorted=false): Tensor[T] =
## Return a new Tensor with the unique elements of the input Tensor in the order they first appear
##
## Note that this is the *"unsorted"* version of this procedure which returns
## the unique values in the order in which they first appear on the input.
## Do not get confused by the `isSorted` argument which is not used to sort
## the output, but to make the algorithm more efficient when the input tensor
## is already sorted.
##
## There is another version of this procedure which gets an `order` argument
## that let's you sort the output (in ascending or descending order).
##
## Inputs:
## - t: The input Tensor
## - isSorted: Set this to `true` if the input tensor is already sorted,
## in order to use a more efficient algorithm for finding the
## unique of the input Tensor. Be careful however when using
## this option, since if the input tensor is not really sorted,
## the output will be wrong.
##
## Result:
## - A new Tensor with the unique elements of the input Tensor in the order
## in which they first appear on the input Tensor.
##
## Examples:
## ```nim
## let
## dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
## assert dup.unique == [1, 3, 2, 4, 8].toTensor
##
## # Use `isSorted = true` only if the input tensor is already sorted
## assert dup.sorted.unique(isSorted = true) == [1, 3, 2, 4, 8].toTensor
## ```

if t.is_C_contiguous:
# Note that since deduplicate returns a new sequence, it is safe to apply it
# to a view of the raw data of the input tensor
toOpenArray(t.toUnsafeView, 0, t.size - 1).deduplicate(isSorted = isSorted).toTensor
else:
# Clone the tensor in order to make it C continuous and then make it unique
unique(t.clone(), isSorted = isSorted)

proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] =
## Return a new sorted Tensor with the unique elements of the input Tensor
##
## Note that this is the "sorted" version of this procedure. There is
## another version which doesn't get a `sort` argument that returns the
## unique elements int he order in which they first appear ont he input.
##
## Inputs:
## - t: The input Tensor
## - order: The order in which elements are sorted (`SortOrder.Ascending` or `SortOrder.Descending`)
##
## Result:
## - A new Tensor with the unique elements of the input Tensor sorted in the specified order.
##
## Examples:
## ```nim
## let
## dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
## unique_ascending_sort = dup.unique(order = SortOrder.Ascending)
## unique_descending_sort = dup.unique(order = SortOrder.Descending)
## assert unique_ascending_sort == [1, 2, 3, 4, 8].toTensor
## assert unique_descending_sort == [8, 4, 3, 2, 1].toTensor
## ```

if t.is_C_contiguous:
# Note that since sorted returns a new sequence, it is safe to apply it
# to a view of the raw data of the input tensor
sorted(toOpenArray(t.toUnsafeView, 0, t.size - 1),
order = order)
.deduplicate(isSorted = true).toTensor
else:
# We need to clone the tensor in order to make it C continuous
# and then we can make it unique assuming that it is already sorted
sorted(t, order = order).unique(isSorted = true)
21 changes: 21 additions & 0 deletions tests/tensor/test_algorithms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,24 @@ suite "[Core] Testing algorithm functions":
let idxSorted = t.argsort(order = SortOrder.Descending)
check idxSorted == exp
check t[idxSorted] == @[7, 4, 3, 2, 1].toTensor()

test "Unique":
block:
let
dup = [1, 3, 2, 4, 1, 8, 2, 1, 4].toTensor
unique_unsorted = dup.unique
unique_presorted_ascending = sorted(dup.unique).unique(isSorted = true)
unique_presorted_descending = sorted(dup.unique, order = SortOrder.Descending).unique(isSorted = true)
unique_sorted_ascending = dup.unique(order = SortOrder.Ascending)
unique_sorted_descending = dup.unique(order = SortOrder.Descending)
dup_not_C_continuous = dup[_ | 2]
unique_not_c_continuous = dup_not_C_continuous.unique
unique_sorted_not_c_continuous = dup_not_C_continuous.unique(order = SortOrder.Descending)

check unique_unsorted == [1, 3, 2, 4, 8].toTensor
check unique_presorted_ascending == [1, 2, 3, 4, 8].toTensor
check unique_presorted_descending == [8, 4, 3, 2, 1].toTensor
check unique_sorted_ascending == [1, 2, 3, 4, 8].toTensor
check unique_sorted_descending == [8, 4, 3, 2, 1].toTensor
check unique_not_c_continuous == [1, 2, 4].toTensor
check unique_sorted_not_c_continuous == [4, 2, 1].toTensor

0 comments on commit af64112

Please sign in to comment.