Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More missing numpy and matlab #649

Merged
merged 7 commits into from
May 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 33 additions & 4 deletions src/arraymancer/laser/tensor/initialization.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ import
../private/nested_containers,
./datatypes
# Standard library
import std / [typetraits, sequtils]
import std / [typetraits, sequtils, sets]

# The following export is needed to avoid a compilation error in
# algorithms.nim/intersection() when running the test_algorithms test:
# `Error: type mismatch - Expression: items(s1)`
# (Alternative: could use `bind sets.items` in `intersection` and `setDiff`)
export sets

# Third-party
import nimblas

Expand Down Expand Up @@ -210,11 +217,11 @@ proc newTensor*[T](shape: Metadata): Tensor[T] =

proc toTensor[T](a: openArray[T], shape: Metadata): Tensor[T] =
## Convert an openArray to a Tensor
##
## Input:
## - An array or a seq, must be flattened. Called by `toTensor` below.
## Result:
## - A Tensor of the same shape
##
var data = @a
if unlikely(shape.product != data.len):
raise newException(
Expand All @@ -235,13 +242,14 @@ proc toTensor[T](a: openArray[T], shape: Metadata): Tensor[T] =
shallowCopy(result.storage.raw_buffer, data)

proc toTensor*[T](a: openArray[T]): auto =
## Convert an openArray to a Tensor
## Convert an openArray into a Tensor
##
## Input:
## - An array or a seq (can be nested)
## Result:
## - A Tensor of the same shape
##
# Note: we removed the dummy static bugfixe related to Nim issue
# Note: we removed the dummy static bugfix related to Nim issue
# https://github.com/nim-lang/Nim/issues/6343
# motivated by
# https://github.com/nim-lang/Nim/issues/20993
Expand All @@ -250,6 +258,18 @@ proc toTensor*[T](a: openArray[T]): auto =
let data = toSeq(flatIter(a))
result = toTensor(data, shape)

proc toTensor*[T](a: SomeSet[T]): auto =
## Convert a HashSet or an OrderedSet into a Tensor
##
## Input:
## - An HashSet or an OrderedSet
## Result:
## - A Tensor of the same shape
var shape = MetaData()
shape.add(a.len)
let data = toSeq(a)
result = toTensor(data, shape)
Comment on lines +270 to +271
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose we could also do:

result = newTensorUninit[T](a.card)
for i, x in a:
  result[i] = x

to avoid the double copy (set -> seq -> tensor)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I'll make that change

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interestingly a performance measurement using timeit showed that the original version was faster on Windows. We will leave this as is for now while we investigate what is going on (and if we find out why we'll update this in a separate PR).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, let's investigate it further after this PR.


proc fromBuffer*[T](rawBuffer: ptr UncheckedArray[T], shape: varargs[int], layout: static OrderType): Tensor[T] =
## Creates a `Tensor[T]` from a raw buffer, cast as `ptr UncheckedArray[T]`. The
## size derived from the given shape must match the size of the buffer!
Expand Down Expand Up @@ -288,6 +308,15 @@ func toUnsafeView*[T: KnownSupportsCopyMem](t: Tensor[T], aligned: static bool =
## Unsafe: the pointer can outlive the input tensor.
unsafe_raw_offset(t, aligned).distinctBase()

proc toHashSet*[T](t: Tensor[T]): HashSet[T] =
## Convert a Tensor into a `HashSet`
##
## Note that this is a lossy operation, since a HashSet only stores an
## unsorted set of unique elements.
result = initHashSet[T](t.size)
for x in t:
result.incl x

func item*[T_IN, T_OUT](t: Tensor[T_IN], _: typedesc[T_OUT]): T_OUT =
## Returns the value of the input Tensor as a scalar of the selected type.
## This only works for Tensors (of any rank) that contain one single element.
Expand Down
9 changes: 0 additions & 9 deletions src/arraymancer/spatial/distances.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,6 @@ type

AnyMetric* = Euclidean | Manhattan | Minkowski | Jaccard | CustomMetric

when (NimMajor, NimMinor, NimPatch) < (1, 4, 0):
# have to export sets for 1.0, because `bind` didn't exist apparently
export sets

proc toHashSet[T](t: Tensor[T]): HashSet[T] =
result = initHashSet[T](t.size)
for x in t:
result.incl x

proc distance*(metric: typedesc[Manhattan], v, w: Tensor[float]): float =
## Computes the Manhattan distance between points `v` and `w`. Both need to
## be rank 1 tensors with `k` elements, where `k` is the dimensionality
Expand Down
130 changes: 127 additions & 3 deletions src/arraymancer/tensor/algorithms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import ./data_structure,
./init_cpu,
./init_copy_cpu

import std / [algorithm, sequtils]
import std / [algorithm, sequtils, sets]
export SortOrder

proc sort*[T](t: var Tensor[T], order = SortOrder.Ascending) =
Expand Down Expand Up @@ -109,10 +109,12 @@ proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] =
##
## Inputs:
## - t: The input Tensor
## - order: The order in which elements are sorted (`SortOrder.Ascending` or `SortOrder.Descending`)
## - order: The order in which elements are sorted (`SortOrder.Ascending`
## or `SortOrder.Descending`)
##
## Result:
## - A new Tensor with the unique elements of the input Tensor sorted in the specified order.
## - A new Tensor with the unique elements of the input Tensor sorted in
## the specified order.
##
## Examples:
## ```nim
Expand All @@ -134,3 +136,125 @@ proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] =
# We need to clone the tensor in order to make it C continuous
# and then we can make it unique assuming that it is already sorted
sorted(t, order = order).unique(isSorted = true)

proc union*[T](t1, t2: Tensor[T]): Tensor[T] =
## Return the unsorted "union" of two Tensors as a rank-1 Tensor
##
## Returns the unique, unsorted Tensor of values that are found in either of
## the two input Tensors.
##
## Inputs:
## - t1, t2: Input Tensors.
##
## Result:
## - A rank-1 Tensor containing the (unsorted) union of the two input Tensors.
##
## Notes:
## - The equivalent `numpy` function is called `union1d`, while the
## equivalent `Matlab` function is called `union`. However, both of
## those functions always sort the output. To replicate the same
## behavior, simply apply `sort` to the output of this function.
##
## Example:
## ```nim
## let t1 = [3, 1, 3, 2, 1, 0].toTensor
## let t2 = [4, 2, 2, 3].toTensor
## echo union(t1, t2)
## # Tensor[system.int] of shape "[5]" on backend "Cpu"
## # 3 1 2 0 4
## ```
concat([t1, t2], axis = 0).unique()

proc intersection*[T](t1, t2: Tensor[T]): Tensor[T] =
## Return the "intersection" of 2 Tensors as an unsorted rank-1 Tensor
##
## Inputs:
## - t1, t2: Input Tensors.
##
## Result:
## - An unsorted rank-1 Tensor containing the intersection of
## the input Tensors.
##
## Note:
## - The equivalent `numpy` function is called `intersect1d`, while the
## equivalent `Matlab` function is called `intersect`. However, both of
## those functions always sort the output. To replicate the same
## behavior, simply apply `sort` to the output of this function.
##
## Example:
## ```nim
## let t1 = arange(0, 5)
## let t2 = arange(3, 8)
##
## echo intersection(t1, t2)
## # Tensor[system.int] of shape "[3]" on backend "Cpu"
## # 4 3
## ```
intersection(toHashSet(t1), toHashSet(t2)).toTensor

proc setDiff*[T](t1, t2: Tensor[T], symmetric = false): Tensor[T] =
## Return the (symmetric or non symmetric) "difference" between 2 Tensors as an unsorted rank-1 Tensor
##
## By default (i.e. when `symmetric` is `false`) return all the elements in
## `t1` that are ``not`` found in `t2`.
##
## If `symmetric` is true, the "symmetric" difference of the Tensors is
## returned instead, i.e. the elements which are either not in `t1` ``or``
## not in `t2`.
##
## Inputs:
## - t1, t2: Input Tensors.
## - symmetric: Whether to return a symmetric or non symmetric difference.
## Defaults to `false`.
##
## Result:
## - An unsorted rank-1 Tensor containing the selected "difference" between
## the input Tensors.
##
## Note:
## - The equivalent `numpy` function is called `setdiff1d`, while the
## equivalent `Matlab` function is called `setdiff`. However, both of
## those functions always sort the output. To replicate the same
## behavior, simply apply `sort` to the output of this function.
##
## Examples:
## ```nim
## let t1 = arange(0, 5)
## let t2 = arange(3, 8)
##
## echo setDiff(t1, t2)
## # Tensor[system.int] of shape "[3]" on backend "Cpu"
## # 2 1 0
##
## echo setDiff(t1, t2, symmetric = true)
## # Tensor[system.int] of shape "[6]" on backend "Cpu"
## # 5 2 6 1 7 0
## ```
let h1 = toHashSet(t1)
let h2 = toHashSet(t2)
let diff = if symmetric:
symmetricDifference(h1, h2)
else:
h1 - h2
result = diff.toTensor

proc contains*[T](t: Tensor[T], item: T): bool {.inline.}=
## Returns true if `item` is in the input Tensor `t` or false if not found.
## This is a shortcut for `find(t, item) >= 0`.
##
## This allows the `in` and `notin` operators, i.e.:
## `t.contains(item)` is the same as `item in a`.
##
## Examples:
## ```nim
## var t = [1, 3, 5].toTensor
## assert t.contains(5)
## assert 3 in t
## assert 99 notin t
## ```
return find(t, item) >= 0

proc ismember*[T](t1, t2: Tensor[T]): Tensor[bool] {.noinit.} =
result = newTensor[bool](t1.len)
for n, it in t1.enumerate():
result[n] = it in t2
34 changes: 34 additions & 0 deletions src/arraymancer/tensor/math_functions.nim
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,40 @@ proc classify*[T: SomeFloat](t: Tensor[T]): Tensor[FloatClass] {.noinit.} =
## - fcNegInf: value is negative infinity
t.map_inline(classify(x))

proc almostEqual*[T: SomeFloat | Complex32 | Complex64](t1, t2: Tensor[T],
unitsInLastPlace: Natural = 4): Tensor[bool] {.noinit.} =
## Element-wise almostEqual function
##
## Checks whether pairs of elements of two tensors are almost equal, using
## the [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon).
##
## For more details check the section covering the `almostEqual` procedure in
## nim's standard library documentation.
##
## Inputs:
## - t1, t2: Input (floating point or complex) tensors of the same shape.
## - unitsInLastPlace: The max number of
## [units in the last place](https://en.wikipedia.org/wiki/Unit_in_the_last_place)
## difference tolerated when comparing two numbers. The
## larger the value, the more error is allowed. A `0`
## value means that two numbers must be exactly the
## same to be considered equal.
##
## Result:
## - A new boolean tensor of the same shape as the inputs, in which elements
## are true if the two values in the same position on the two input tensors
## are almost equal (and false if they are not).
##
## Note:
## - You can combine this function with `all` to check if two real tensors
## are almost equal.
map2_inline(t1, t2):
when T is Complex:
almostEqual(x.re, y.re, unitsInLastPlace=unitsInLastPlace) and
almostEqual(x.im, y.im, unitsInLastPlace=unitsInLastPlace)
else:
almostEqual(x, y, unitsInLastPlace=unitsInLastPlace)

type ConvolveMode* = enum full, same, valid

proc convolveImpl[T: SomeNumber | Complex32 | Complex64](
Expand Down
33 changes: 33 additions & 0 deletions tests/tensor/test_algorithms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,36 @@ suite "[Core] Testing algorithm functions":
check unique_sorted_descending == [8, 4, 3, 2, 1].toTensor
check unique_not_c_continuous == [1, 2, 4].toTensor
check unique_sorted_not_c_continuous == [4, 2, 1].toTensor

test "Union":
block:
let t1 = [3, 1, 3, 2, 1, 0].toTensor
let t2 = [4, 2, 2, 3].toTensor
check: sorted(union(t1, t2)) == [0, 1, 2, 3, 4].toTensor

test "Intersection":
block:
let t1 = [3, 1, 3, 2, 1, 0].toTensor
let t2 = [4, 2, 2, 3].toTensor
check: sorted(intersection(t1, t2)) == [2, 3].toTensor

test "setDiff":
block:
let t1 = arange(0, 5)
let t2 = arange(3, 8)

check: sorted(setDiff(t1, t2)) == [0, 1, 2].toTensor
check: sorted(setDiff(t1, t2, symmetric = true)) == [0, 1, 2, 5, 6, 7].toTensor

test "Find and Contains":
let t = arange(-2, 5)

block:
check: t.find(3) == 5
check: t.find(-6) == -1

block:
check: 3 in t
check: 3 notin t == false
check: -6 in t == false
check: -6 notin t
18 changes: 18 additions & 0 deletions tests/tensor/test_math_functions.nim
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,24 @@ proc main() =
check: expected_isNaN == a.isNaN
check: expected_classification == a.classify

test "almostEqual":
block: # Real
let t1 = arange(1.0, 5.0)
let t2 = t1.clone()
check: all(almostEqual(t1, t2)) == true
var t3 = t1.clone()
t3[0] += 2e-15
check: almostEqual(t1, t3) == [false, true, true, true].toTensor()
check: all(almostEqual(t1, t3, unitsInLastPlace = 5)) == true
block: # Complex
let t1 = complex(arange(1.0, 5.0), arange(1.0, 5.0))
let t2 = t1.clone()
check: all(almostEqual(t1, t2)) == true
var t3 = t1.clone()
t3[0] += complex(2e-15)
check: almostEqual(t1, t3) == [false, true, true, true].toTensor()
check: all(almostEqual(t1, t3, unitsInLastPlace = 5)) == true

test "1-D convolution":
block:
let a = arange(4)
Expand Down
Loading