forked from jamesrobertlloyd/automl-phase-1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfreezethaw.py
322 lines (271 loc) · 10.7 KB
/
freezethaw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
from __future__ import division, print_function
__author__ = 'James Robert Lloyd'
__description__ = 'Freeze thaw related code including samplers - will be expanded into several modules eventually probs'
import os
import sys
root_dir = os.path.dirname(__file__)
sys.path.append(root_dir)
import copy
import random
import numpy as np
import scipy.stats
class WarmLearner(object):
"""Wrapper around things like random forest that don't have a warm start method"""
def __init__(self, base_model):
self.base_model = base_model
self.model = copy.deepcopy(self.base_model)
self.n_estimators = self.model.n_estimators
self.first_fit = True
def predict_proba(self, X):
return self.model.predict_proba(X)
def fit(self, X, y):
if self.first_fit:
self.model.fit(X, y)
self.first_fit = False
# Keep training and appending base estimators to main model
while self.model.n_estimators < self.n_estimators:
self.base_model.fit(X, y)
self.model.estimators_ += self.base_model.estimators_
self.model.n_estimators = len(self.model.estimators_)
# Clip any extra models produced
self.model.estimators_ = self.model.estimators_[:self.n_estimators]
self.model.n_estimators = self.n_estimators
def trunc_norm_mean_upper_tail(a, mean, std):
alpha = (a - mean) / std
num = scipy.stats.norm.pdf(alpha)
den = (1 - scipy.stats.norm.cdf(alpha))
if num == 0 or den == 0:
# Numerical nasties
if a < mean:
return mean
else:
return a
else:
lambd = scipy.stats.norm.pdf(alpha) / (1 - scipy.stats.norm.cdf(alpha))
return mean + std * lambd
def ft_K_t_t(t, t_star, scale, alpha, beta):
"""Exponential decay mixture kernel"""
# Check 1d
# TODO - Abstract this checking behaviour - check pybo and gpy for inspiration
t = np.array(t)
t_star = np.array(t_star)
assert t.ndim == 1 or (t.ndim == 2 and t.shape[1] == 1)
assert t_star.ndim == 1 or (t_star.ndim == 2 and t_star.shape[1] == 1)
# Create kernel
K_t_t = np.zeros((len(t), len(t_star)))
for i in range(len(t)):
for j in range(len(t_star)):
K_t_t[i, j] = scale * (beta ** alpha) / ((t[i] + t_star[j] + beta) ** alpha)
return K_t_t
def ft_K_t_t_plus_noise(t, t_star, scale, alpha, beta, log_noise):
"""Ronseal - clearly this behaviour should be abstracted"""
# TODO - abstract kernel addition etc
noise = np.exp(log_noise)
K_t_t = ft_K_t_t(t, t_star, scale=scale, alpha=alpha, beta=beta)
K_noise = cov_iid(t, t_star, scale=noise)
return K_t_t + K_noise
def cov_iid(x, z=None, scale=1):
"""Identity kernel, scaled"""
if z is None:
z = x
# Check 1d
x = np.array(x)
z = np.array(z)
assert x.ndim == 1 or (x.ndim == 2 and x.shape[1] == 1)
assert z.ndim == 1 or (z.ndim == 2 and z.shape[1] == 1)
# Create kernel
K = np.zeros((len(x), len(z)))
if not np.all(x == z):
# FIXME - Is this the correct behaviour?
return K
for i in range(min(len(x), len(z))):
K[i, i] = scale
return K
def cov_matern_5_2(x, z=None, scale=1, ell=1):
"""Identity kernel, scaled"""
if z is None:
z = x
# Check 1d
x = np.array(x, ndmin=2)
z = np.array(z, ndmin=2)
if x.shape[1] > 1:
x = x.T
if z.shape[1] > 1:
z = z.T
assert (x.ndim == 2 and x.shape[1] == 1)
assert (z.ndim == 2 and z.shape[1] == 1)
# Create kernel
x = x * np.sqrt(5) / ell
z = z * np.sqrt(5) / ell
sqdist = np.sum(x**2,1).reshape(-1,1) + np.sum(z**2,1) - 2*np.dot(x, z.T)
K = sqdist
f = lambda a: 1 + a * (1 + a / 3)
m = lambda b: f(b) * np.exp(-b)
for i in range(len(K)):
for j in range(len(K[i])):
K[i, j] = m(K[i, j])
K *= scale
return K
def slice_sample_bounded_max(N, burn, logdist, xx, widths, step_out, max_attempts, bounds):
"""
Slice sampling with bounds and max iterations
Iain Murray May 2004, tweaks June 2009, a diagnostic added Feb 2010
See Pseudo-code in David MacKay's text book p375
Modified by James Lloyd, May 2012 - max attempts
Modified by James Lloyd, Jan 2015 - bounds
Ported to python by James Lloyd, Feb 2015
"""
xx = copy.deepcopy(xx)
D = len(xx)
samples = []
if (not isinstance(widths, list)) or len(widths) == 1:
widths = np.ones(D) * widths
log_Px = logdist(xx)
for ii in range(N + burn):
log_uprime = np.log(random.random()) + log_Px
# print('xx = %s' % xx)
# print('Current ll = %f' % log_Px)
# print('Slice = %f' % log_uprime)
for dd in random.sample(range(D), D):
# print('dd = %d' % dd)
# print('xx = %s' % xx)
x_l = copy.deepcopy(xx)
# print('x_l = %s' % x_l)
x_r = copy.deepcopy(xx)
xprime = copy.deepcopy(xx)
# Create a horizontal interval (x_l, x_r) enclosing xx
rr = random.random()
# print(xx[dd])
# print(rr)
# print(widths[dd])
# print(bounds[dd][0])
x_l[dd] = max(xx[dd] - rr*widths[dd], bounds[dd][0])
x_r[dd] = min(xx[dd] + (1-rr)*widths[dd], bounds[dd][1])
# print('x_l = %s' % x_l)
# if x_l[3] > 0:
# print('Large noise')
if step_out:
while logdist(x_l) > log_uprime and x_l[dd] > bounds[dd][0]:
# if x_l[3] > 0:
# print('Large noise')
x_l[dd] = max(x_l[dd] - widths[dd], bounds[dd][0])
while logdist(x_r) > log_uprime and x_r[dd] < bounds[dd][1]:
x_r[dd] = min(x_r[dd] + widths[dd], bounds[dd][1])
# print(x_l[dd])
# Propose xprimes and shrink interval until good one found
zz = 0
num_attempts = 0
while True:
zz += 1
# print(x_l)
xprime[dd] = random.random()*(x_r[dd] - x_l[dd]) + x_l[dd]
# print(x_l[dd])
# if xprime[3] > 0:
# print('Large noise')
log_Px = logdist(xx)
if log_Px > log_uprime:
xx[dd] = xprime[dd]
# print(dd)
# print(xx)
break
else:
# Shrink in
num_attempts += 1
if num_attempts >= max_attempts:
# print('Failed to find something')
break
elif xprime[dd] > xx[dd]:
x_r[dd] = xprime[dd]
elif xprime[dd] < xx[dd]:
x_l[dd] = xprime[dd]
else:
raise Exception('Slice sampling failed to find an acceptable point')
# Record samples
if ii >= burn:
samples.append(copy.deepcopy(xx))
return samples
# noinspection PyTypeChecker
def ft_ll(m, t, y, x, x_kernel, x_kernel_params, t_kernel, t_kernel_params):
"""Freeze thaw log likelihood"""
# Take copies of everything - this is a function
m = copy.deepcopy(m)
t = copy.deepcopy(t)
y = copy.deepcopy(y)
x = copy.deepcopy(x)
K_x = x_kernel(x, x, **x_kernel_params)
N = len(y)
lambd = np.zeros((N, 1))
gamma = np.zeros((N, 1))
K_t = [None] * N
for n in range(N):
K_t[n] = t_kernel(t[n], t[n], **t_kernel_params)
lambd[n] = np.dot(np.ones((1, len(t[n]))), np.linalg.solve(K_t[n], np.ones((len(t[n]), 1))))
# Making sure y[n] is a column vector
y[n] = np.array(y[n], ndmin=2)
if y[n].shape[0] == 1:
y[n] = y[n].T
gamma[n] = np.dot(np.ones((1, len(t[n]))), np.linalg.solve(K_t[n], y[n] - m[n] * np.ones(y[n].shape)))
Lambd = np.diag(lambd.ravel())
ll = 0
# Terms relating to individual curves
for n in range(N):
ll += - 0.5 * np.dot((y[n] - m[n] * np.ones(y[n].shape)).T,
np.linalg.solve(K_t[n], y[n] - m[n] * np.ones(y[n].shape)))
ll += - 0.5 * np.log(np.linalg.det(K_t[n]))
# Terms relating to K_x
ll += + 0.5 * np.dot(gamma.T, np.linalg.solve(np.linalg.inv(K_x) + Lambd, gamma))
ll += - 0.5 * np.log(np.linalg.det(np.linalg.inv(K_x) + Lambd))
ll += - 0.5 * np.log(np.linalg.det(K_x))
# Prior on kernel params
# TODO - abstract me
# ll += scipy.stats.norm.logpdf(np.log(t_kernel_params['a']))
# ll += scipy.stats.norm.logpdf(np.log(t_kernel_params['b']))
# ll += np.log(1 / t_kernel_params['scale'])
return ll
# noinspection PyTypeChecker
def ft_posterior(m, t, y, t_star, x, x_kernel, x_kernel_params, t_kernel, t_kernel_params):
"""Freeze thaw posterior (predictive)"""
# Take copies of everything - this is a function
m = copy.deepcopy(m)
t = copy.deepcopy(t)
y = copy.deepcopy(y)
t_star = copy.deepcopy(t_star)
x = copy.deepcopy(x)
K_x = x_kernel(x, x, **x_kernel_params)
N = len(y)
lambd = np.zeros((N, 1))
gamma = np.zeros((N, 1))
Omega = [None] * N
K_t = [None] * N
K_t_t_star = [None] * N
y_mean = [None] * N
for n in range(N):
K_t[n] = t_kernel(t[n], t[n], **t_kernel_params)
# TODO - Distinguish between the curve we are interested in and 'noise' with multiple kernels
K_t_t_star[n] = t_kernel(t[n], t_star[n], **t_kernel_params)
lambd[n] = np.dot(np.ones((1, len(t[n]))), np.linalg.solve(K_t[n], np.ones((len(t[n]), 1))))
# Making sure y[n] is a column vector
y[n] = np.array(y[n], ndmin=2)
if y[n].shape[0] == 1:
y[n] = y[n].T
gamma[n] = np.dot(np.ones((1, len(t[n]))), np.linalg.solve(K_t[n], y[n] - m[n] * np.ones(y[n].shape)))
Omega[n] = np.ones((len(t_star[n]), 1)) - np.dot(K_t_t_star[n].T,
np.linalg.solve(K_t[n], np.ones(y[n].shape)))
Lambda_inv = np.diag(1 / lambd.ravel())
C = K_x - np.dot(K_x, np.linalg.solve(K_x + Lambda_inv, K_x))
mu = m + np.dot(C, gamma)
# f_mean = mu
# f_var = C
for n in range(N):
y_mean[n] = np.dot(K_t_t_star[n].T, np.linalg.solve(K_t[n], y[n])) + Omega[n] * mu[n]
K_t_star_t_star = [None] * N
y_var = [None] * N
for n in range(N):
K_t_star_t_star[n] = t_kernel(t_star[n], t_star[n], **t_kernel_params)
y_var[n] = K_t_star_t_star[n] - \
np.dot(K_t_t_star[n].T,
np.linalg.solve(K_t[n], K_t_t_star[n])) + \
C[n, n] * np.dot(Omega[n], Omega[n].T)
return y_mean, y_var
if __name__ == "__main__":
pass