forked from davidhallac/TICC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUnitTest.py
96 lines (76 loc) · 4.67 KB
/
UnitTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import unittest
from TICC_solver import TICC
import numpy as np
import sys
class TestStringMethods(unittest.TestCase):
def test_example(self):
fname = "example_data.txt"
ticc = TICC(window_size = 1,number_of_clusters = 8, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
assign = np.loadtxt("UnitTest_Data/Results.txt")
val = abs(assign - cluster_assignment)
self.assertEqual(sum(val), 0)
# Test prediction works with batch of data outside of `fit` method. Perhaps there is a better way
# to test this in parallel so these are more like unit tests rather than integration tests?
test_batch = ticc.predict_clusters(ticc.trained_model['complete_D_train'][0:1000, ])
batch_val = abs(test_batch - cluster_assignment[0:1000])
self.assertEqual(sum(batch_val), 0)
# Test streaming by passing in 5 row blocks at a time (current timestamp and previous 4)
# I am causing data leakage by training on the whole set and then using the trained model while streaming,
# but this is for testing the code, so it is ok
# TODO: figure out why larger blocks don't improve predictions more. Reference:
# https://github.com/davidhallac/TICC/issues/18#issuecomment-384514116
def test_streaming(block_size):
test_stream = np.zeros(1000)
test_stream[0:block_size] = cluster_assignment[0:block_size]
for i in range(block_size, 1000):
point = ticc.trained_model['complete_D_train'][i - block_size:i, ]
test_stream[i] = ticc.predict_clusters(point)[block_size - 1]
percent_correct_streaming = 100 * sum(cluster_assignment[0:1000] == test_stream) / 1000.0
self.assertGreater(percent_correct_streaming, 0.9)
test_streaming(5)
for i in range(8):
mrf = np.loadtxt("UnitTest_Data/cluster_"+str(i)+".txt",delimiter=',')
try:
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
except AssertionError:
#Test failed
self.assertTrue(1==0)
def test_multiExample(self):
fname = "example_data.txt"
ticc = TICC(window_size = 5,number_of_clusters = 5, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
assign = np.loadtxt("UnitTest_Data/multiResults.txt")
val = abs(assign - cluster_assignment)
self.assertEqual(sum(val), 0)
for i in range(5):
mrf = np.loadtxt("UnitTest_Data/multiCluster_"+str(i)+".txt",delimiter=',')
try:
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
except AssertionError:
#Test failed
self.assertTrue(1==0)
def test_biased_vs_unbiased(self):
fname = "example_data.txt"
unbiased_ticc = TICC(window_size=1, number_of_clusters=8, lambda_parameter=11e-2, beta=600, maxIters=100,
threshold=2e-5,
write_out_file=False, prefix_string="output_folder/", num_proc=1)
(unbiased_cluster_assignment, unbiased_cluster_MRFs) = unbiased_ticc.fit(input_file=fname)
biased_ticc = TICC(window_size=1, number_of_clusters=8, lambda_parameter=11e-2, beta=600, maxIters=100,
threshold=2e-5,
write_out_file=False, prefix_string="output_folder/", num_proc=1, biased=True)
(biased_cluster_assignment, biased_cluster_MRFs) = biased_ticc.fit(input_file=fname)
np.testing.assert_array_equal(np.array(biased_cluster_assignment), np.array(unbiased_cluster_assignment), "Biased assignment is not equel to unbiased assignment!")
def test_failed_unbiased(self):
with self.assertRaises(Exception) as context:
# TICC will fail in Iteration 2, because cluster 9 has only one observation.
fname = "example_data.txt"
ticc = TICC(window_size=1, number_of_clusters=50, lambda_parameter=11e-2, beta=600, maxIters=100,
threshold=2e-5,
write_out_file=False, prefix_string="output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
self.assertTrue('This is broken {}'.format(context.exception))
if __name__ == '__main__':
unittest.main()