From 0398d2452b52319b1e5cc3a1874e411bcd25da6b Mon Sep 17 00:00:00 2001 From: Angel Ezquerra Date: Thu, 25 Apr 2024 23:53:22 +0200 Subject: [PATCH 1/7] Add a `union` procedure `union` returns the unique, unsorted Tensor of values that are found in either of the two input Tensors. Note that an equivalent function exists both in `numpy` (where it is called `union1d`) and in `Matlab` (where it is called `union1d`). However, those functions always sort the output, while Arraymancer's version does not. To replicate the same behavior, simply apply `sort` to the output of this function. --- src/arraymancer/tensor/algorithms.nim | 34 +++++++++++++++++++++++++-- tests/tensor/test_algorithms.nim | 6 +++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/arraymancer/tensor/algorithms.nim b/src/arraymancer/tensor/algorithms.nim index 579ae017..10565e76 100644 --- a/src/arraymancer/tensor/algorithms.nim +++ b/src/arraymancer/tensor/algorithms.nim @@ -109,10 +109,12 @@ proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] = ## ## Inputs: ## - t: The input Tensor - ## - order: The order in which elements are sorted (`SortOrder.Ascending` or `SortOrder.Descending`) + ## - order: The order in which elements are sorted (`SortOrder.Ascending` + ## or `SortOrder.Descending`) ## ## Result: - ## - A new Tensor with the unique elements of the input Tensor sorted in the specified order. + ## - A new Tensor with the unique elements of the input Tensor sorted in + ## the specified order. ## ## Examples: ## ```nim @@ -134,3 +136,31 @@ proc unique*[T](t: Tensor[T], order: SortOrder): Tensor[T] = # We need to clone the tensor in order to make it C continuous # and then we can make it unique assuming that it is already sorted sorted(t, order = order).unique(isSorted = true) + +proc union*[T](t1, t2: Tensor[T]): Tensor[T] = + ## Return the unsorted "union" of two Tensors as a rank-1 Tensor + ## + ## Returns the unique, unsorted Tensor of values that are found in either of + ## the two input Tensors. + ## + ## Inputs: + ## - t1, t2: Input Tensors. + ## + ## Result: + ## - A rank-1 Tensor containing the (unsorted) union of the two input Tensors. + ## + ## Notes: + ## - The equivalent `numpy` function is called `union1d`, while the + ## equivalent `Matlab` function is called `union`. However, both of + ## those functions always sort the output. To replicate the same + ## behavior, simply apply `sort` to the output of this function. + ## + ## Example: + ## ```nim + ## let t1 = [3, 1, 3, 2, 1, 0].toTensor + ## let t2 = [4, 2, 2, 3].toTensor + ## echo union(t1, t2) + ## # Tensor[system.int] of shape "[5]" on backend "Cpu" + ## # 3 1 2 0 4 + ## ``` + concat([t1, t2], axis = 0).unique() diff --git a/tests/tensor/test_algorithms.nim b/tests/tensor/test_algorithms.nim index 5ecc16b4..7cd68960 100644 --- a/tests/tensor/test_algorithms.nim +++ b/tests/tensor/test_algorithms.nim @@ -71,3 +71,9 @@ suite "[Core] Testing algorithm functions": check unique_sorted_descending == [8, 4, 3, 2, 1].toTensor check unique_not_c_continuous == [1, 2, 4].toTensor check unique_sorted_not_c_continuous == [4, 2, 1].toTensor + + test "Union": + block: + let t1 = [3, 1, 3, 2, 1, 0].toTensor + let t2 = [4, 2, 2, 3].toTensor + check: sorted(union(t1, t2)) == [0, 1, 2, 3, 4].toTensor From cdbac42e2f0d0c24f2beb6deb4657c034ec028f3 Mon Sep 17 00:00:00 2001 From: Angel Ezquerra Date: Thu, 25 Apr 2024 23:59:20 +0200 Subject: [PATCH 2/7] Add a version of `toTensor` that takes SomeSet as its input This will let us avoid having to convert HashSets into seqs before converting them into tensors. Note that this also improves a little the docstrings of a couple of the existing `toTensor` procedures. --- .../laser/tensor/initialization.nim | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/arraymancer/laser/tensor/initialization.nim b/src/arraymancer/laser/tensor/initialization.nim index 4bb9aa96..d7d84f47 100644 --- a/src/arraymancer/laser/tensor/initialization.nim +++ b/src/arraymancer/laser/tensor/initialization.nim @@ -13,7 +13,7 @@ import ../private/nested_containers, ./datatypes # Standard library -import std / [typetraits, sequtils] +import std / [typetraits, sequtils, sets] # Third-party import nimblas @@ -210,11 +210,11 @@ proc newTensor*[T](shape: Metadata): Tensor[T] = proc toTensor[T](a: openArray[T], shape: Metadata): Tensor[T] = ## Convert an openArray to a Tensor + ## ## Input: ## - An array or a seq, must be flattened. Called by `toTensor` below. ## Result: ## - A Tensor of the same shape - ## var data = @a if unlikely(shape.product != data.len): raise newException( @@ -235,13 +235,14 @@ proc toTensor[T](a: openArray[T], shape: Metadata): Tensor[T] = shallowCopy(result.storage.raw_buffer, data) proc toTensor*[T](a: openArray[T]): auto = - ## Convert an openArray to a Tensor + ## Convert an openArray into a Tensor + ## ## Input: ## - An array or a seq (can be nested) ## Result: ## - A Tensor of the same shape ## - # Note: we removed the dummy static bugfixe related to Nim issue + # Note: we removed the dummy static bugfix related to Nim issue # https://github.com/nim-lang/Nim/issues/6343 # motivated by # https://github.com/nim-lang/Nim/issues/20993 @@ -250,6 +251,18 @@ proc toTensor*[T](a: openArray[T]): auto = let data = toSeq(flatIter(a)) result = toTensor(data, shape) +proc toTensor*[T](a: SomeSet[T]): auto = + ## Convert a HashSet or an OrderedSet into a Tensor + ## + ## Input: + ## - An HashSet or an OrderedSet + ## Result: + ## - A Tensor of the same shape + var shape = MetaData() + shape.add(a.len) + let data = toSeq(a) + result = toTensor(data, shape) + proc fromBuffer*[T](rawBuffer: ptr UncheckedArray[T], shape: varargs[int], layout: static OrderType): Tensor[T] = ## Creates a `Tensor[T]` from a raw buffer, cast as `ptr UncheckedArray[T]`. The ## size derived from the given shape must match the size of the buffer! From b6914c1f7246e92bfc3a1e6e617eb2fd90886507 Mon Sep 17 00:00:00 2001 From: Angel Ezquerra Date: Fri, 26 Apr 2024 00:00:52 +0200 Subject: [PATCH 3/7] Add an `intersection` procedure `intersection` returns the "intersection" of 2 Tensors as an unsorted rank-1 Tensor. Note that an equivalent function exists both in `numpy` (where it is called `intersect1d`) and in `Matlab` (where it is called `intersect`). However, those functions always sort the output, while Arraymancer's version does not. To replicate the same behavior, simply apply `sort` to the output of this function. Also note that to implement this feature we moved (and made public) the existing, private toHashSet procedure from spatial/distances.nim into tensor/initialization.nim. --- .../laser/tensor/initialization.nim | 15 ++++++++++ src/arraymancer/spatial/distances.nim | 9 ------ src/arraymancer/tensor/algorithms.nim | 30 ++++++++++++++++++- tests/tensor/test_algorithms.nim | 7 +++++ 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/src/arraymancer/laser/tensor/initialization.nim b/src/arraymancer/laser/tensor/initialization.nim index d7d84f47..6bbf0ee7 100644 --- a/src/arraymancer/laser/tensor/initialization.nim +++ b/src/arraymancer/laser/tensor/initialization.nim @@ -14,6 +14,12 @@ import ./datatypes # Standard library import std / [typetraits, sequtils, sets] + +# The folling export is needed to avoid an compilation error in +# algorithms.nim/intersection() when running the test_algorithms test: +# `Error: type mismatch - Expression: items(s1)` +export sets + # Third-party import nimblas @@ -301,6 +307,15 @@ func toUnsafeView*[T: KnownSupportsCopyMem](t: Tensor[T], aligned: static bool = ## Unsafe: the pointer can outlive the input tensor. unsafe_raw_offset(t, aligned).distinctBase() +proc toHashSet*[T](t: Tensor[T]): HashSet[T] = + ## Convert a Tensor into a `HashSet` + ## + ## Note that this is a lossy operation, since a HashSet only stores an + ## unsorted set of unique elements. + result = initHashSet[T](t.size) + for x in t: + result.incl x + func item*[T_IN, T_OUT](t: Tensor[T_IN], _: typedesc[T_OUT]): T_OUT = ## Returns the value of the input Tensor as a scalar of the selected type. ## This only works for Tensors (of any rank) that contain one single element. diff --git a/src/arraymancer/spatial/distances.nim b/src/arraymancer/spatial/distances.nim index e3bc03b3..9e4c031d 100644 --- a/src/arraymancer/spatial/distances.nim +++ b/src/arraymancer/spatial/distances.nim @@ -11,15 +11,6 @@ type AnyMetric* = Euclidean | Manhattan | Minkowski | Jaccard | CustomMetric -when (NimMajor, NimMinor, NimPatch) < (1, 4, 0): - # have to export sets for 1.0, because `bind` didn't exist apparently - export sets - -proc toHashSet[T](t: Tensor[T]): HashSet[T] = - result = initHashSet[T](t.size) - for x in t: - result.incl x - proc distance*(metric: typedesc[Manhattan], v, w: Tensor[float]): float = ## Computes the Manhattan distance between points `v` and `w`. Both need to ## be rank 1 tensors with `k` elements, where `k` is the dimensionality diff --git a/src/arraymancer/tensor/algorithms.nim b/src/arraymancer/tensor/algorithms.nim index 10565e76..eb94126f 100644 --- a/src/arraymancer/tensor/algorithms.nim +++ b/src/arraymancer/tensor/algorithms.nim @@ -16,7 +16,7 @@ import ./data_structure, ./init_cpu, ./init_copy_cpu -import std / [algorithm, sequtils] +import std / [algorithm, sequtils, sets] export SortOrder proc sort*[T](t: var Tensor[T], order = SortOrder.Ascending) = @@ -164,3 +164,31 @@ proc union*[T](t1, t2: Tensor[T]): Tensor[T] = ## # 3 1 2 0 4 ## ``` concat([t1, t2], axis = 0).unique() + +proc intersection*[T](t1, t2: Tensor[T]): Tensor[T] = + ## Return the "intersection" of 2 Tensors as an unsorted rank-1 Tensor + ## + ## Inputs: + ## - t1, t2: Input Tensors. + ## + ## Result: + ## - An unsorted rank-1 Tensor containing the intersection of + ## the input Tensors. + ## + ## Note: + ## - The equivalent `numpy` function is called `intersect1d`, while the + ## equivalent `Matlab` function is called `intersect`. However, both of + ## those functions always sort the output. To replicate the same + ## behavior, simply apply `sort` to the output of this function. + ## + ## Example: + ## ```nim + ## let t1 = arange(0, 5) + ## let t2 = arange(3, 8) + ## + ## echo intersection(t1, t2) + ## # Tensor[system.int] of shape "[3]" on backend "Cpu" + ## # 4 3 + ## ``` + intersection(toHashSet(t1), toHashSet(t2)).toTensor + diff --git a/tests/tensor/test_algorithms.nim b/tests/tensor/test_algorithms.nim index 7cd68960..da3fe469 100644 --- a/tests/tensor/test_algorithms.nim +++ b/tests/tensor/test_algorithms.nim @@ -77,3 +77,10 @@ suite "[Core] Testing algorithm functions": let t1 = [3, 1, 3, 2, 1, 0].toTensor let t2 = [4, 2, 2, 3].toTensor check: sorted(union(t1, t2)) == [0, 1, 2, 3, 4].toTensor + + test "Intersection": + block: + let t1 = [3, 1, 3, 2, 1, 0].toTensor + let t2 = [4, 2, 2, 3].toTensor + check: sorted(intersection(t1, t2)) == [2, 3].toTensor + From 9d4cec37c4b63f41cb76fec32152365ce0022cf0 Mon Sep 17 00:00:00 2001 From: Angel Ezquerra Date: Fri, 26 Apr 2024 00:01:10 +0200 Subject: [PATCH 4/7] Add a `setDiff` procedure `setDiff` returns the (symmetric or non symmetric) "difference" between 2 Tensors as an unsorted rank-1 Tensor. Note that an equivalent function exists both in `numpy` (where it is called `setdiff1d`) and in `Matlab` (where it is called `setdiff`). However, those functions always sort the output, while Arraymancer's version does not. To replicate the same behavior, simply apply `sort` to the output of this function. --- src/arraymancer/tensor/algorithms.nim | 46 +++++++++++++++++++++++++++ tests/tensor/test_algorithms.nim | 7 ++++ 2 files changed, 53 insertions(+) diff --git a/src/arraymancer/tensor/algorithms.nim b/src/arraymancer/tensor/algorithms.nim index eb94126f..5b54a558 100644 --- a/src/arraymancer/tensor/algorithms.nim +++ b/src/arraymancer/tensor/algorithms.nim @@ -192,3 +192,49 @@ proc intersection*[T](t1, t2: Tensor[T]): Tensor[T] = ## ``` intersection(toHashSet(t1), toHashSet(t2)).toTensor +proc setDiff*[T](t1, t2: Tensor[T], symmetric = false): Tensor[T] = + ## Return the (symmetric or non symmetric) "difference" between 2 Tensors as an unsorted rank-1 Tensor + ## + ## By default (i.e. when `symmetric` is `false`) return all the elements in + ## `t1` that are ``not`` found in `t2`. + ## + ## If `symmetric` is true, the "symmetric" difference of the Tensors is + ## returned instead, i.e. the elements which are either not in `t1` ``or`` + ## not in `t2`. + ## + ## Inputs: + ## - t1, t2: Input Tensors. + ## - symmetric: Whether to return a symmetric or non symmetric difference. + ## Defaults to `false`. + ## + ## Result: + ## - An unsorted rank-1 Tensor containing the selected "difference" between + ## the input Tensors. + ## + ## Note: + ## - The equivalent `numpy` function is called `setdiff1d`, while the + ## equivalent `Matlab` function is called `setdiff`. However, both of + ## those functions always sort the output. To replicate the same + ## behavior, simply apply `sort` to the output of this function. + ## + ## Examples: + ## ```nim + ## let t1 = arange(0, 5) + ## let t2 = arange(3, 8) + ## + ## echo setDiff(t1, t2) + ## # Tensor[system.int] of shape "[3]" on backend "Cpu" + ## # 2 1 0 + ## + ## echo setDiff(t1, t2, symmetric = true) + ## # Tensor[system.int] of shape "[6]" on backend "Cpu" + ## # 5 2 6 1 7 0 + ## ``` + let h1 = toHashSet(t1) + let h2 = toHashSet(t2) + let diff = if symmetric: + symmetricDifference(h1, h2) + else: + h1 - h2 + result = diff.toTensor + diff --git a/tests/tensor/test_algorithms.nim b/tests/tensor/test_algorithms.nim index da3fe469..96d34876 100644 --- a/tests/tensor/test_algorithms.nim +++ b/tests/tensor/test_algorithms.nim @@ -84,3 +84,10 @@ suite "[Core] Testing algorithm functions": let t2 = [4, 2, 2, 3].toTensor check: sorted(intersection(t1, t2)) == [2, 3].toTensor + test "setDiff": + block: + let t1 = arange(0, 5) + let t2 = arange(3, 8) + + check: sorted(setDiff(t1, t2)) == [0, 1, 2].toTensor + check: sorted(setDiff(t1, t2, symmetric = true)) == [0, 1, 2, 5, 6, 7].toTensor \ No newline at end of file From 6abfa3e6201c1e1955cd34f4b4b197d52d85c313 Mon Sep 17 00:00:00 2001 From: Angel Ezquerra Date: Fri, 26 Apr 2024 00:06:25 +0200 Subject: [PATCH 5/7] Add a `contains` function (and thus add support for `in` and `notin`) `find` (which is used to implement `contains`) was already supported (since `system.find` is generic and works with Tensors) but was untested, so this also adds a test for it. --- src/arraymancer/tensor/algorithms.nim | 20 ++++++++++++++++++++ tests/tensor/test_algorithms.nim | 15 ++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/arraymancer/tensor/algorithms.nim b/src/arraymancer/tensor/algorithms.nim index 5b54a558..bf087027 100644 --- a/src/arraymancer/tensor/algorithms.nim +++ b/src/arraymancer/tensor/algorithms.nim @@ -238,3 +238,23 @@ proc setDiff*[T](t1, t2: Tensor[T], symmetric = false): Tensor[T] = h1 - h2 result = diff.toTensor +proc contains*[T](t: Tensor[T], item: T): bool {.inline.}= + ## Returns true if `item` is in the input Tensor `t` or false if not found. + ## This is a shortcut for `find(t, item) >= 0`. + ## + ## This allows the `in` and `notin` operators, i.e.: + ## `t.contains(item)` is the same as `item in a`. + ## + ## Examples: + ## ```nim + ## var t = [1, 3, 5].toTensor + ## assert t.contains(5) + ## assert 3 in t + ## assert 99 notin t + ## ``` + return find(t, item) >= 0 + +proc ismember*[T](t1, t2: Tensor[T]): Tensor[bool] {.noinit.} = + result = newTensor[bool](t1.len) + for n, it in t1.enumerate(): + result[n] = it in t2 diff --git a/tests/tensor/test_algorithms.nim b/tests/tensor/test_algorithms.nim index 96d34876..1fd702bd 100644 --- a/tests/tensor/test_algorithms.nim +++ b/tests/tensor/test_algorithms.nim @@ -90,4 +90,17 @@ suite "[Core] Testing algorithm functions": let t2 = arange(3, 8) check: sorted(setDiff(t1, t2)) == [0, 1, 2].toTensor - check: sorted(setDiff(t1, t2, symmetric = true)) == [0, 1, 2, 5, 6, 7].toTensor \ No newline at end of file + check: sorted(setDiff(t1, t2, symmetric = true)) == [0, 1, 2, 5, 6, 7].toTensor + + test "Find and Contains": + let t = arange(-2, 5) + + block: + check: t.find(3) == 5 + check: t.find(-6) == -1 + + block: + check: 3 in t + check: 3 notin t == false + check: -6 in t == false + check: -6 notin t From 2c7752bebfff02f5a9f451bc7daa99bf21b48063 Mon Sep 17 00:00:00 2001 From: Angel Ezquerra Date: Sun, 28 Apr 2024 22:40:36 +0200 Subject: [PATCH 6/7] Add support for `almostEqual` This was a useful std/math function that we did not support yet. --- src/arraymancer/tensor/math_functions.nim | 34 +++++++++++++++++++++++ tests/tensor/test_math_functions.nim | 18 ++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/arraymancer/tensor/math_functions.nim b/src/arraymancer/tensor/math_functions.nim index 626c4bf3..839c4a1c 100644 --- a/src/arraymancer/tensor/math_functions.nim +++ b/src/arraymancer/tensor/math_functions.nim @@ -274,6 +274,40 @@ proc classify*[T: SomeFloat](t: Tensor[T]): Tensor[FloatClass] {.noinit.} = ## - fcNegInf: value is negative infinity t.map_inline(classify(x)) +proc almostEqual*[T: SomeFloat | Complex32 | Complex64](t1, t2: Tensor[T], + unitsInLastPlace: Natural = 4): Tensor[bool] {.noinit.} = + ## Element-wise almostEqual function + ## + ## Checks whether pairs of elements of two tensors are almost equal, using + ## the [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon). + ## + ## For more details check the section covering the `almostEqual` procedure in + ## nim's standard library documentation. + ## + ## Inputs: + ## - t1, t2: Input (floating point or complex) tensors of the same shape. + ## - unitsInLastPlace: The max number of + ## [units in the last place](https://en.wikipedia.org/wiki/Unit_in_the_last_place) + ## difference tolerated when comparing two numbers. The + ## larger the value, the more error is allowed. A `0` + ## value means that two numbers must be exactly the + ## same to be considered equal. + ## + ## Result: + ## - A new boolean tensor of the same shape as the inputs, in which elements + ## are true if the two values in the same position on the two input tensors + ## are almost equal (and false if they are not). + ## + ## Note: + ## - You can combine this function with `all` to check if two real tensors + ## are almost equal. + map2_inline(t1, t2): + when T is Complex: + almostEqual(x.re, y.re, unitsInLastPlace=unitsInLastPlace) and + almostEqual(x.im, y.im, unitsInLastPlace=unitsInLastPlace) + else: + almostEqual(x, y, unitsInLastPlace=unitsInLastPlace) + type ConvolveMode* = enum full, same, valid proc convolveImpl[T: SomeNumber | Complex32 | Complex64]( diff --git a/tests/tensor/test_math_functions.nim b/tests/tensor/test_math_functions.nim index bafa5ca3..122c6b5e 100644 --- a/tests/tensor/test_math_functions.nim +++ b/tests/tensor/test_math_functions.nim @@ -163,6 +163,24 @@ proc main() = check: expected_isNaN == a.isNaN check: expected_classification == a.classify + test "almostEqual": + block: # Real + let t1 = arange(1.0, 5.0) + let t2 = t1.clone() + check: all(almostEqual(t1, t2)) == true + var t3 = t1.clone() + t3[0] += 2e-15 + check: almostEqual(t1, t3) == [false, true, true, true].toTensor() + check: all(almostEqual(t1, t3, unitsInLastPlace = 5)) == true + block: # Complex + let t1 = complex(arange(1.0, 5.0), arange(1.0, 5.0)) + let t2 = t1.clone() + check: all(almostEqual(t1, t2)) == true + var t3 = t1.clone() + t3[0] += complex(2e-15) + check: almostEqual(t1, t3) == [false, true, true, true].toTensor() + check: all(almostEqual(t1, t3, unitsInLastPlace = 5)) == true + test "1-D convolution": block: let a = arange(4) From 7a478b60a6fa802adaeab9996dbfe8cce8f5b951 Mon Sep 17 00:00:00 2001 From: Vindaar Date: Sun, 12 May 2024 15:29:21 +0200 Subject: [PATCH 7/7] Update src/arraymancer/laser/tensor/initialization.nim Fix typo in export comment & add alternative for reader --- src/arraymancer/laser/tensor/initialization.nim | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/arraymancer/laser/tensor/initialization.nim b/src/arraymancer/laser/tensor/initialization.nim index 6bbf0ee7..d68ff99b 100644 --- a/src/arraymancer/laser/tensor/initialization.nim +++ b/src/arraymancer/laser/tensor/initialization.nim @@ -15,9 +15,10 @@ import # Standard library import std / [typetraits, sequtils, sets] -# The folling export is needed to avoid an compilation error in +# The following export is needed to avoid a compilation error in # algorithms.nim/intersection() when running the test_algorithms test: # `Error: type mismatch - Expression: items(s1)` +# (Alternative: could use `bind sets.items` in `intersection` and `setDiff`) export sets # Third-party