-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prime numbers are an essential building block of many algorithms in diverse areas such as cryptography, digital communications and many others. This module adds the following functions: - primes: Procedure to generate lists of primes upto a certain value. - factor: Procedure which returns a Tensor containing the prime factors of the input. - isprime: Single element and element-wise procedures that return true when their inputs are prime numbers.
- Loading branch information
1 parent
972d84a
commit 469c878
Showing
2 changed files
with
334 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
## Module that implements several procedures related to prime numbers | ||
## | ||
## Prime numbers are an essential building block of many algorithms in diverse | ||
## areas such as cryptography, digital communications and many others. | ||
## This module adds a function to generate rank-1 tensors of primes upto a | ||
## certain value; as well as a function to calculate the prime factors of a | ||
## number. | ||
|
||
import arraymancer | ||
|
||
proc primes*[T: SomeInteger | SomeFloat](upto: T): Tensor[T] = | ||
## Generate a Tensor of prime numbers up to a certain value | ||
## | ||
## Return a Tensor of the prime numbers less than or equal to `upto`. | ||
## A prime number is one that has no factors other than 1 and itself. | ||
## | ||
## Input: | ||
## - upto: Integer up to which primes will be generated | ||
## | ||
## Result: | ||
## - Integer Tensor of prime values less than or equal to `upto` | ||
## | ||
## Note: | ||
## - This function implements a "half" Sieve of Erathostenes algorithm | ||
## which is a classical Sieve of Erathostenes in which only odd numbers | ||
## are checked. Many examples of this algorithm can be found online. | ||
## It also stops checking after sqrt(upto) | ||
## - The memory required by this procedure is proportional to the input | ||
## number. | ||
when T is SomeFloat: | ||
if upto != round(upto): | ||
raise newException(ValueError, | ||
"`upto` value (" & $upto & ") must be a whole number") | ||
|
||
if upto < 11: | ||
# Handle the primes below 11 to simplify the general code below | ||
# (by removing the need to handle the few cases in which the index to | ||
# `isprime`, calculated based on `factor` is negative) | ||
# This is the minimum set of primes that we must handle, but we could | ||
# extend this list to make the calculation faster for more of the | ||
# smallest primes | ||
let prime_candidates = [2.T, 3, 5, 7].toTensor() | ||
return prime_candidates[prime_candidates <=. upto] | ||
|
||
# General algorithm (valid for numbers higher than 10) | ||
let prime_candidates = arange(3.T, T(upto + 1), 2.T) | ||
var isprime = ones[bool]((upto.int - 1) div 2) | ||
let max_possible_factor_idx = int(sqrt(upto.float)) div 2 | ||
for factor in prime_candidates[_ ..< max_possible_factor_idx]: | ||
if isprime[(factor.int - 2) div 2]: | ||
isprime[(factor.int * 3 - 2) div 2 .. _ | factor.int] = false | ||
|
||
# Note that 2 is missing from the result, so it must be manually added to | ||
# the front of the result tensor | ||
return [2.T].toTensor().append(prime_candidates[isprime]) | ||
|
||
# The maximum float64 that can be represented as an integer that is followed by a | ||
# another integer that is representable as a float64 as well | ||
const maximumConsecutiveFloat64Int = pow(2.0, 53) - 1.0 | ||
|
||
proc factor*[T: SomeInteger | SomeFloat](n: T): Tensor[T] = | ||
## Return a Tensor containing the prime factors of the input | ||
## | ||
## Input: | ||
## - n: A value that will be factorized. | ||
## If its type is floating-point it must be a whole number. Otherwise | ||
## a ValueError will be raised. | ||
## Result: | ||
## - A sorted Tensor containing the prime factors of the input. | ||
## | ||
## Example: | ||
## ```nim | ||
## echo factor(60) | ||
## # Tensor[system.int] of shape "[4]" on backend "Cpu" | ||
## # 2 2 3 5 | ||
## ``` | ||
if n < 0: | ||
raise newException(ValueError, | ||
"Negative values (" & $n & ") cannot be factorized") | ||
when T is int64: | ||
if n > T(maximumConsecutiveFloat64Int): | ||
raise newException(ValueError, | ||
"Value (" & $n & ") is too large to be factorized") | ||
elif T is SomeFloat: | ||
if floor(n) != n: | ||
raise newException(ValueError, | ||
"Non whole numbers (" & $n & ") cannot be factorized") | ||
|
||
if n < 4: | ||
return [n].toTensor | ||
|
||
# The algorithm works by keeping track of the list of unique potential, | ||
# candidate prime factors of the input, and iteratively adding those | ||
# that are confirmed to be factors into a list of confirmed factors | ||
# (which is stored in the `result` tensor variable). | ||
|
||
# First we must initialize the `candidate_factor` Tensor | ||
# The factors of the input can be found among the list of primes | ||
# that are smaller than or equal to input. However we can significantly | ||
# reduce the candidate list by taking into account the fact that only a | ||
# single factor can be greater than the square root of the input. | ||
# The algorithm is such that if that is the case we will add the input number | ||
# at the very end of the loop below | ||
var candidate_factors = primes(T(ceil(sqrt(float(n))))) | ||
|
||
# This list of prime candidate_factors is refined by finding those of them | ||
# that divide the input value (i.e. those whose `input mod prime` == 0). | ||
# Those candiates that don't divide the input are known to not be valid | ||
# factors and can be removed from the candidate_factors list. Those that do | ||
# divide the input are confirmed as valid factors and as such are added to | ||
# the result list. Then the input is divided by all of the remaining | ||
# candidates (by dividing the input by the product of all the remaining | ||
# candidates). The result is a number that is the product of all the factors | ||
# that are still unknown (which must be among the remaining candidates!) and | ||
# which we can call `unknown_factor_product`. | ||
# Then we can simply repeat the same process over and over, replacing the | ||
# original input with the remaining `unknown_factor_product` after each | ||
# iteration, until the `unknown_factors_product` (which is reduced by each | ||
# division at the end of each iteration) reaches 1. Alternatively, we might | ||
# run out of candidates, which will only happen when there is only one factor | ||
# left (which must be greater than the square root of the input) and is stored | ||
# in the `unknown_factors_product`. In that case we add it to the confirmed | ||
# factors (result) list and the process can stop. | ||
var unknown_factor_product = n | ||
while unknown_factor_product > 1: | ||
# Find the primes that are divisors of the remaining unknown_factors_product | ||
# Note that this tells us which of the remaining candidate_factors are | ||
# factors of the input _at least once_ (but they could divide it more | ||
# than once) | ||
let is_factor = (unknown_factor_product mod candidate_factors) ==. 0 | ||
# Keep only the factors that divide the remainder and remove the rest | ||
# from the list of candidates | ||
candidate_factors = candidate_factors[is_factor] | ||
# after this step, all the items incandidate_factors are _known_ to be | ||
# factors of `unknown_factor_product` _at least once_! | ||
if candidate_factors.len == 0: | ||
# If there are no more prime candidates left, it means that the remainder | ||
# is a prime (and that it must be greater than the sqrt of the input), | ||
# and that we are done (after adding it to the result list) | ||
result = result.append([unknown_factor_product].toTensor) | ||
break | ||
# If we didn't stop it means that there are still candidates which we | ||
# _know_ are factors of the remainder, so we must add them to the result | ||
result = result.append(candidate_factors) | ||
# Now we can prepare for the next iteration by dividing the remainder, | ||
# by the factors we just added to the result. This reminder is the product | ||
# of the factors we don't know yet | ||
unknown_factor_product = T(unknown_factor_product / product(candidate_factors)) | ||
# After this division the items in `candidate_factors` become candidates again | ||
# and we can start a new iteration | ||
result = sorted(result) | ||
|
||
proc isprimeImpl[T: SomeInteger | SomeFloat](n: T, candidate_factors: Tensor[int]): bool {.inline.} = | ||
## Actual implementation of the isprime check | ||
# This function is optimized for speed in 2 ways: | ||
# 1. By first rejecting all non-whole float numbers and then performing the | ||
# actual isprime check using integers (which is faster than using floats) | ||
# 2. By receving a pre-calculated tensor of candidate_factors. This does not | ||
# speed up the check of a single value, but it does speed up the check of | ||
# a tensor of values. Note that because of #1 the candidate_factors must | ||
# be a tensor of ints (even if the number being checked is a float) | ||
when T is SomeFloat: | ||
if floor(n) != n: | ||
return false | ||
let n = int(n) | ||
result = (n > 1) and all(n mod candidate_factors[candidate_factors <. n]) | ||
|
||
proc isprime*[T: SomeInteger | SomeFloat](n: T): bool = | ||
## Check whether the input is a prime number | ||
## | ||
## Only positive values higher than 1 can be primes (i.e. we exclude 1 and -1 | ||
## which are sometimes considered primes). | ||
## | ||
## Note that this function also works with floats, which are considered | ||
## non-prime when they are not whole numbers. | ||
# Note that we do here some checks that are repeated later inside of | ||
# `isprimeImpl`. This is done to avoid the unnecessary calculation of | ||
# the `candidate_factors` tensor in those cases | ||
if n <= 1: | ||
return false | ||
when T is int64: | ||
if n > T(maximumConsecutiveFloat64Int): | ||
raise newException(ValueError, | ||
"Value (" & $n & ") is too large to be factorized") | ||
elif T is SomeFloat: | ||
if floor(n) != n: | ||
return false | ||
var candidate_factors = primes(int(ceil(sqrt(float(n))))) | ||
isprimeImpl(n, candidate_factors) | ||
|
||
proc isprime*[T: SomeInteger | SomeFloat](t: Tensor[T]): Tensor[bool] = | ||
## Element-wise check if the input values are prime numbers | ||
result = zeros[bool](t.len) | ||
# Pre-calculate the list of primes that will be used for the element-wise | ||
# isprime check and then call isprimeImpl on each element | ||
# Note that the candidate_factors must be a tensor of ints (for performance | ||
# reasons) | ||
var candidate_factors = primes(int(ceil(sqrt(float(max(t.flatten())))))) | ||
for idx, val in t.enumerate(): | ||
result[idx] = isprimeImpl(val, candidate_factors) | ||
return result.reshape(t.shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
import ../scinim/primes | ||
import arraymancer | ||
import std / [unittest, random] | ||
|
||
proc test_primes() = | ||
## Test the `primes` function | ||
test "Prime number generation (integer values)": | ||
check: primes(0).len == 0 | ||
check: primes(1).len == 0 | ||
check: primes(2) == [2].toTensor | ||
check: primes(3) == [2, 3].toTensor | ||
check: primes(4) == [2, 3].toTensor | ||
check: primes(11) == [2, 3, 5, 7, 11].toTensor | ||
check: primes(12) == [2, 3, 5, 7, 11].toTensor | ||
check: primes(19) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor | ||
check: primes(20) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor | ||
check: primes(22) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor | ||
check: primes(100000).len == 9592 | ||
check: primes(100003).len == 9593 | ||
check: primes(100000)[^1].item == 99991 | ||
|
||
test "Prime number generation (floating-point values)": | ||
check: primes(100000.0).len == 9592 | ||
check: primes(100000.0)[^1].item == 99991.0 | ||
|
||
# An exception must be raised if the `upto` value is not a whole number | ||
try: | ||
discard primes(100.5) | ||
check: false | ||
except ValueError: | ||
# This is what should happen! | ||
discard | ||
|
||
proc generate_random_factor_tensor[T]( | ||
max_value: T, max_factors: int, prime_list: Tensor[T]): Tensor[T] = | ||
## Generate a tensor of random prime factors taken from a tensor of primes | ||
## The tensor length will not exceed the `max_factors` and the product of | ||
## the factors will not exceed `max_value` either. | ||
## This is not just a random list of values taken from the `prime_list` | ||
## Instead we artificially introduce a random level of repetition of the | ||
## chosen factors to emulate the fact that many numbers have repeated | ||
## prime factors | ||
let max_value = rand(4 .. 2 ^ 53) | ||
let max_factors = rand(1 .. 20) | ||
result = newTensor[int](max_factors) | ||
var value = 1 | ||
var factor = prime_list[rand(prime_list.len - 1)] | ||
for idx in 0 ..< max_factors: | ||
# Randomly repeat the previous factor | ||
# Higher number of repetitions are less likely | ||
let repeat_factor = rand(5) < 1 | ||
if not repeat_factor: | ||
factor = prime_list[rand(prime_list.len - 1)] | ||
let new_value = factor * value | ||
if new_value >= max_value: | ||
break | ||
result[idx] = factor | ||
value = new_value | ||
result = sorted(result) | ||
result = result[result >. 0] | ||
|
||
proc test_factor() = | ||
test "Prime factorization of integer values (factor)": | ||
check: factor(60) == [2, 2, 3, 5].toTensor | ||
check: factor(100001) == [11, 9091].toTensor | ||
|
||
# Check that the product of the factorization of a few random values | ||
# equals the original numbers | ||
for n in 0 ..< 10: | ||
let value = rand(10000) | ||
check: value == product(factor(value)) | ||
|
||
# Repeat the previous test in a more sophisticated manner | ||
# Instead of generating random values and checking that the | ||
# product of their factorization is the same as the original values | ||
# (which could work for many incorrect implementations of factor), | ||
# generate a few random factor tensors, multiply them to get | ||
# the number that has them as prime factors, factorize those numbers | ||
# and check that their factorizations matches the original tensors | ||
let prime_list = primes(100) | ||
for n in 0 ..< 10: | ||
let max_value = rand(4 .. 2 ^ 53) | ||
let max_factors = rand(1 .. 20) | ||
var factors = generate_random_factor_tensor( | ||
max_value, max_factors, prime_list) | ||
let value = product(factors) | ||
check: factor(value) == factors | ||
|
||
test "Prime factorization of floating-point values (factor)": | ||
# Floating-point | ||
check: factor(60.0) == [2.0, 2, 3, 5].toTensor | ||
check: factor(100001.0) == [11.0, 9091].toTensor | ||
|
||
# Check that the product of the factorization of a few random values | ||
# equals the original numbers | ||
# Note that here we do not also do the reverse check (as we do for ints) | ||
# in order to make the test faster | ||
for n in 0 ..< 10: | ||
let value = floor(rand(10000.0)) | ||
check: value == product(factor(value)) | ||
|
||
# An exception must be raised if we try to factorize a non-whole number | ||
try: | ||
discard factor(100.5) | ||
check: false | ||
except ValueError: | ||
# This is what should happen! | ||
discard | ||
|
||
proc test_isprime() = | ||
test "isprime": | ||
check: isprime(7) == true | ||
check: isprime(7.0) == true | ||
check: isprime(7.5) == false | ||
check: isprime(1) == false | ||
check: isprime(0) == false | ||
check: isprime(-1) == false | ||
let t = [ | ||
[-1, 0, 2, 4], | ||
[ 5, 6, 7, 11] | ||
].toTensor | ||
let expected = [ | ||
[false, false, true, false], | ||
[ true, false, true, true] | ||
].toTensor | ||
check: isprime(t) == expected | ||
check: isprime(t.asType(float)) == expected | ||
|
||
# Run the tests | ||
suite "Primes": | ||
test_primes() | ||
test_factor() | ||
test_isprime() |