Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport release/1.5.x] Refactor test_random to minimize collective calls #1683

Merged
merged 1 commit into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion heat/core/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,11 +982,18 @@ def reduce_means_elementwise(output_shape_i: torch.Tensor) -> DNDarray:
return mu_tot[0][0] if mu_tot[0].size == 1 else mu_tot[0]

# ----------------------------------------------------------------------------------------------
# sanitize dtype
if types.heat_type_is_exact(x.dtype):
if x.dtype is types.int64:
x = x.astype(types.float64)
else:
x = x.astype(types.float32)

if axis is None:
# full matrix calculation
if not x.is_distributed():
# if x is not distributed do a torch.mean on x
ret = torch.mean(x.larray.float())
ret = torch.mean(x.larray)
return DNDarray(
ret,
gshape=tuple(ret.shape),
Expand Down Expand Up @@ -1791,6 +1798,13 @@ def std(
>>> ht.std(a, 1)
DNDarray([1.2961, 0.3362, 1.0739, 0.9820], dtype=ht.float32, device=cpu:0, split=None)
"""
# sanitize dtype
if types.heat_type_is_exact(x.dtype):
if x.dtype is types.int64:
x = x.astype(types.float64)
else:
x = x.astype(types.float32)

if not isinstance(ddof, int):
raise TypeError(f"ddof must be integer, is {type(ddof)}")
# elif ddof > 1:
Expand Down
179 changes: 77 additions & 102 deletions heat/core/tests/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,28 +153,26 @@ def test_rand(self):

a = ht.random.rand(21, 16, 17, 21, dtype=ht.float32, split=2)
b = ht.random.rand(15, 11, 19, 31, dtype=ht.float32, split=0)
a = a.numpy().flatten()
b = b.numpy().flatten()
c = np.concatenate((a, b))
a = a.flatten()
b = b.flatten()
c = ht.concatenate((a, b))

# Values should be spread evenly across the range [0, 1)
mean = np.mean(c)
median = np.median(c)
std = np.std(c)
mean = ht.mean(c)
# median = np.median(c)
std = ht.std(c)
self.assertTrue(0.49 < mean < 0.51)
self.assertTrue(0.49 < median < 0.51)
# self.assertTrue(0.49 < median < 0.51)
self.assertTrue(std < 0.3)
self.assertTrue(((0 <= c) & (c < 1)).all())

def test_randint(self):
# Checked that the random values are in the correct range
a = ht.random.randint(low=0, high=10, size=(10, 10), dtype=ht.int64)
self.assertEqual(a.dtype, ht.int64)
a = a.numpy()
self.assertTrue(((0 <= a) & (a < 10)).all())

a = ht.random.randint(low=100000, high=150000, size=(31, 25, 11), dtype=ht.int64, split=2)
a = a.numpy()
self.assertTrue(((100000 <= a) & (a < 150000)).all())

# For the range [0, 1) only the value 0 is allowed
Expand All @@ -194,20 +192,18 @@ def test_randint(self):
shape = (15, 13, 9, 21, 65)
ht.random.seed(13579)
a = ht.random.randint(10000, size=shape, split=2, dtype=ht.int64)
a = a.numpy()

ht.random.seed(13579)
b = ht.random.randint(low=0, high=10000, size=shape, split=2, dtype=ht.int64)
b = b.numpy()

self.assertTrue(np.array_equal(a, b))
mean = np.mean(a)
median = np.median(a)
std = np.std(a)
self.assertTrue(ht.equal(a, b))
mean = ht.mean(a)
# median = ht.median(a)
std = ht.std(a)

# Mean and median should be in the center while the std is very high due to an even distribution
self.assertTrue(4900 < mean < 5100)
self.assertTrue(4900 < median < 5100)
# self.assertTrue(4900 < median < 5100)
self.assertTrue(std < 2900)

with self.assertRaises(ValueError):
Expand All @@ -226,31 +222,26 @@ def test_randint(self):
self.assertEqual(a.dtype, ht.int32)
self.assertEqual(a.larray.dtype, torch.int32)
self.assertEqual(b.dtype, ht.int32)
a = a.numpy()
b = b.numpy()
self.assertEqual(a.dtype, np.int32)
self.assertTrue(np.array_equal(a, b))
self.assertTrue(ht.equal(a, b))
self.assertTrue(((50 <= a) & (a < 1000)).all())
self.assertTrue(((50 <= b) & (b < 1000)).all())

c = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0)
c = c.numpy()
self.assertFalse(np.array_equal(a, c))
self.assertFalse(np.array_equal(b, c))
self.assertFalse(ht.equal(a, c))
self.assertFalse(ht.equal(b, c))
self.assertTrue(((50 <= c) & (c < 1000)).all())

ht.random.seed(0xFFFFFFF)
a = ht.random.randint(
10000, size=(123, 42, 13, 21), split=3, dtype=ht.int32, comm=ht.MPI_WORLD
)
a = a.numpy()
mean = np.mean(a)
median = np.median(a)
std = np.std(a)
mean = ht.mean(a)
# median = np.median(a)
std = ht.std(a)

# Mean and median should be in the center while the std is very high due to an even distribution
self.assertTrue(4900 < mean < 5100)
self.assertTrue(4900 < median < 5100)
# self.assertTrue(4900 < median < 5100)
self.assertTrue(std < 2900)

# test aliases
Expand Down Expand Up @@ -297,23 +288,21 @@ def test_randn(self):
a = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2)
self.assertEqual(a.dtype, ht.float32)
self.assertEqual(a.larray[0, 0, 0].dtype, torch.float32)
a = a.numpy()
self.assertEqual(a.dtype, np.float32)
mean = np.mean(a)
median = np.median(a)
std = np.std(a)
mean = ht.mean(a)
# median = np.median(a)
std = ht.std(a)
self.assertTrue(-0.02 < mean < 0.02)
self.assertTrue(-0.02 < median < 0.02)
# self.assertTrue(-0.02 < median < 0.02)
self.assertTrue(0.99 < std < 1.01)

ls = 272 + ht.MPI_WORLD.rank
ht.random.set_state(("Batchparallel", None, ls))
b = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2).numpy()
self.assertTrue(np.allclose(a, b))
b = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2)
self.assertTrue(ht.allclose(a, b))

c = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2).numpy()
self.assertFalse(np.allclose(a, c))
self.assertFalse(np.allclose(b, c))
c = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2)
self.assertFalse(ht.allclose(a, c))
self.assertFalse(ht.allclose(b, c))

# check wrong shapes
with self.assertRaises(ValueError):
Expand Down Expand Up @@ -539,10 +528,9 @@ def test_rand(self):
a = ht.random.rand(2, 3, 4, 5, split=0)
ht.random.set_state(("Threefry", seed, 0x10000000000000000))
b = ht.random.rand(2, 44, split=0)
a = a.numpy().flatten()
b = b.numpy().flatten()
self.assertEqual(a.dtype, np.float32)
self.assertTrue(np.array_equal(a[32:], b))
a = a.flatten()
b = b.flatten()
self.assertTrue(ht.equal(a[32:], b))

# Check that random numbers don't repeat after first overflow
seed = 12345
Expand All @@ -557,9 +545,9 @@ def test_rand(self):
a = ht.random.rand(2, 34, split=0)
ht.random.set_state(("Threefry", seed, 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0))
b = ht.random.rand(2, 50, split=0)
a = a.numpy().flatten()
b = b.numpy().flatten()
self.assertTrue(np.array_equal(a, b[32:]))
a = a.flatten()
b = b.flatten()
self.assertTrue(ht.equal(a, b[32:]))

# different split axis with resetting seed
ht.random.seed(seed)
Expand All @@ -573,9 +561,9 @@ def test_rand(self):
a = ht.random.rand(2, 50, split=0)
ht.random.seed(seed)
b = ht.random.rand(100, split=None)
a = a.numpy().flatten()
b = b.larray.cpu().numpy()
self.assertTrue(np.array_equal(a, b))
a = a.flatten()
b = ht.resplit(b, 0)
self.assertTrue(ht.equal(a, b))

# On different shape and split the same random values are used
ht.random.seed(seed)
Expand Down Expand Up @@ -632,37 +620,36 @@ def test_rand(self):

ht.random.seed(9876)
b = ht.random.rand(np.prod(shape), dtype=ht.float32)
a = a.numpy().flatten()
b = b.larray.cpu().numpy()
self.assertTrue(np.array_equal(a, b))
self.assertEqual(a.dtype, np.float32)
a = a.flatten()
b = ht.resplit(b, 0)
self.assertTrue(ht.equal(a, b))

a = ht.random.rand(21, 16, 17, 21, dtype=ht.float32, split=2)
b = ht.random.rand(15, 11, 19, 31, dtype=ht.float32, split=0)
a = a.numpy().flatten()
b = b.numpy().flatten()
c = np.concatenate((a, b))
a = a.flatten()
b = b.flatten()
c = ht.concatenate((a, b))

# Values should be spread evenly across the range [0, 1)
mean = np.mean(c)
median = np.median(c)
std = np.std(c)
mean = ht.mean(c)
# median = np.median(c)
std = ht.std(c)
self.assertTrue(0.49 < mean < 0.51)
self.assertTrue(0.49 < median < 0.51)
# self.assertTrue(0.49 < median < 0.51)
self.assertTrue(std < 0.3)
self.assertTrue(((0 <= c) & (c < 1)).all())

ht.random.seed(11111)
a = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32).numpy()
a = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32)
# Overflow reached
ht.random.set_state(("Threefry", 11111, 0x10000000000000000))
b = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32).numpy()
self.assertTrue(np.array_equal(a, b))
b = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32)
self.assertTrue(ht.equal(a, b))

ht.random.set_state(("Threefry", 11111, 0x100000000))
c = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32).numpy()
self.assertFalse(np.array_equal(a, c))
self.assertFalse(np.array_equal(b, c))
c = ht.random.rand(12, 32, 44, split=1, dtype=ht.float32)
self.assertFalse(ht.equal(a, c))
self.assertFalse(ht.equal(b, c))

# To check working with large number of elements
ht.random.randn(6667, 3523, dtype=ht.float64, split=None)
Expand All @@ -675,11 +662,9 @@ def test_randint(self):
# Checked that the random values are in the correct range
a = ht.random.randint(low=0, high=10, size=(10, 10), dtype=ht.int64)
self.assertEqual(a.dtype, ht.int64)
a = a.numpy()
self.assertTrue(((0 <= a) & (a < 10)).all())

a = ht.random.randint(low=100000, high=150000, size=(31, 25, 11), dtype=ht.int64, split=2)
a = a.numpy()
self.assertTrue(((100000 <= a) & (a < 150000)).all())

# For the range [0, 1) only the value 0 is allowed
Expand All @@ -699,35 +684,32 @@ def test_randint(self):
ht.random.seed(13579)
shape = (15, 13, 9, 21, 65)
a = ht.random.randint(15, 100, size=shape, split=0, dtype=ht.int64)
a = a.numpy().flatten()
a = a.flatten()

ht.random.seed(13579)
elements = np.prod(shape)
b = ht.random.randint(low=15, high=100, size=(elements,), dtype=ht.int64)
b = b.numpy()
self.assertTrue(np.array_equal(a, b))
self.assertTrue(ht.equal(a, b))

# Two arrays with the same seed and shape have identical values
ht.random.seed(13579)
a = ht.random.randint(10000, size=shape, split=2, dtype=ht.int64)
a = a.numpy()

ht.random.seed(13579)
b = ht.random.randint(low=0, high=10000, size=shape, split=2, dtype=ht.int64)
b = b.numpy()

ht.random.seed(13579)
c = ht.random.randint(low=0, high=10000, dtype=ht.int64)
self.assertTrue(np.equal(b[0, 0, 0, 0, 0], c))
self.assertTrue(ht.equal(b[0, 0, 0, 0, 0], c))

self.assertTrue(np.array_equal(a, b))
mean = np.mean(a)
median = np.median(a)
std = np.std(a)
self.assertTrue(ht.equal(a, b))
mean = ht.mean(a)
# median = np.median(a)
std = ht.std(a)

# Mean and median should be in the center while the std is very high due to an even distribution
self.assertTrue(4900 < mean < 5100)
self.assertTrue(4900 < median < 5100)
# self.assertTrue(4900 < median < 5100)
self.assertTrue(std < 2900)

with self.assertRaises(ValueError):
Expand All @@ -746,31 +728,26 @@ def test_randint(self):
self.assertEqual(a.dtype, ht.int32)
self.assertEqual(a.larray.dtype, torch.int32)
self.assertEqual(b.dtype, ht.int32)
a = a.numpy()
b = b.numpy()
self.assertEqual(a.dtype, np.int32)
self.assertTrue(np.array_equal(a, b))
self.assertTrue(ht.equal(a, b))
self.assertTrue(((50 <= a) & (a < 1000)).all())
self.assertTrue(((50 <= b) & (b < 1000)).all())

c = ht.random.randint(50, 1000, size=(13, 45), dtype=ht.int32, split=0)
c = c.numpy()
self.assertFalse(np.array_equal(a, c))
self.assertFalse(np.array_equal(b, c))
self.assertFalse(ht.equal(a, c))
self.assertFalse(ht.equal(b, c))
self.assertTrue(((50 <= c) & (c < 1000)).all())

ht.random.seed(0xFFFFFFF)
a = ht.random.randint(
10000, size=(123, 42, 13, 21), split=3, dtype=ht.int32, comm=ht.MPI_WORLD
)
a = a.numpy()
mean = np.mean(a)
median = np.median(a)
std = np.std(a)
mean = ht.mean(a)
# median = np.median(a)
std = ht.std(a)

# Mean and median should be in the center while the std is very high due to an even distribution
self.assertTrue(4900 < mean < 5100)
self.assertTrue(4900 < median < 5100)
# self.assertTrue(4900 < median < 5100)
self.assertTrue(std < 2900)

# test aliases
Expand Down Expand Up @@ -826,22 +803,20 @@ def test_randn(self):
a = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2)
self.assertEqual(a.dtype, ht.float32)
self.assertEqual(a.larray[0, 0, 0].dtype, torch.float32)
a = a.numpy()
self.assertEqual(a.dtype, np.float32)
mean = np.mean(a)
median = np.median(a)
std = np.std(a)
mean = ht.mean(a)
# median = np.median(a)
std = ht.std(a)
self.assertTrue(-0.01 < mean < 0.01)
self.assertTrue(-0.01 < median < 0.01)
# self.assertTrue(-0.01 < median < 0.01)
self.assertTrue(0.99 < std < 1.01)

ht.random.set_state(("Threefry", 54321, 0x10000000000000000))
b = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2).numpy()
self.assertTrue(np.allclose(a, b))
b = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2)
self.assertTrue(ht.allclose(a, b))

c = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2).numpy()
self.assertFalse(np.allclose(a, c))
self.assertFalse(np.allclose(b, c))
c = ht.random.randn(30, 30, 30, dtype=ht.float32, split=2)
self.assertFalse(ht.allclose(a, c))
self.assertFalse(ht.allclose(b, c))

def test_randperm(self):
ht.random.set_state(("Threefry", 0, 0))
Expand Down
2 changes: 1 addition & 1 deletion heat/utils/data/matrixgallery.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def hermitian(
matrix = randn(n, n, dtype=real_dtype, split=split, device=device, comm=comm) + 1j * randn(
n, n, dtype=real_dtype, split=split, device=device, comm=comm
)
elif not heat_type_is_exact(dtype):
elif dtype in [core.float32, core.float64]:
matrix = randn(n, n, dtype=dtype, split=split, device=device, comm=comm)
else:
raise ValueError("dtype must be floating-point data-type but is ", dtype, ".")
Expand Down
Loading