-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMeshGrid.py
137 lines (116 loc) · 5.29 KB
/
MeshGrid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# August George - 2021
# Collected scripts to help create a mesh grid in parameter space
import numpy as np
import pandas as pd
def calc_logl(y_obs, theta, func):
"""
:param y_obs: array of observed data
:param theta: array of parameters - a single parameter set - SIGMA at the end
:param func: function which inputs a list of parameters and outputs the predicted value
:return: the log-likelihood probability
"""
#print(theta)
#p = theta[:-1]
p = theta
sigma = theta[-1]
y_pred = func(p)
# Note: Normal log likelihood = -(n/2)ln(2*pi) -(n/2)ln(sigma^2) -(1/(2simga^2))SUM(x_i-mu)^2
logp = -len(y_obs) * np.log(np.sqrt(2.0 * np.pi) * sigma)
logp += -np.sum((y_obs - y_pred) ** 2.0) / (2.0 * sigma ** 2.0)
return logp
def create_grid_coord(p_input, verbose=True):
"""
Creates a mesh grid in parameter space from parameter vectors
:param p_input: a list of tuples to generate a parameter vector
[parameter name, lower value, upper value, number of divisions]
:param verbose: a boolean which turns on/off text output
:return: a dataframe where row_i is parameter set_i and col_i is parameter_i.
Note: includes a label row and an index column
"""
p = []
p_names = []
if verbose:
print('creating parameter vectors...')
for i, p_i in enumerate(p_input):
p_i_coord = np.linspace(p_i[1], p_i[2], int(p_i[3]))
if verbose:
print(f'{p_i[0]}: {p_i_coord}')
p.append(p_i_coord)
p_names.append(p_i[0])
if verbose:
print('creating parameter space grid coordinates...')
g = np.meshgrid(*p)
coord_df = pd.DataFrame(np.vstack(list(map(np.ravel, g))).T, columns=p_names)
if verbose:
print(f'{coord_df}')
return coord_df
def score_grid(grid, verbose=True):
"""
Calculates relative log-likelihood, likelihood, probability, and effective sample size
:param grid: a dataframe where row_i is parameter set_i and col_i is parameter_i. must also contain 'logl' col
:param verbose: a boolean which turns on/off text output
:return: an updated 'grid' dataframe containing additional columns for rel log-likelihood, likelihood, and weights
"""
if verbose:
print('scoring grid...')
grid['rel logl'] = (grid['logl'] - np.max(grid['logl'])) # ln(p/p_max) --> ln(p) -max(ln(p))
grid['rel like'] = np.exp(grid['rel logl']) # log(p) --> p = e**(ln(p))
grid['weight'] = grid['rel like']/np.sum(grid['rel like']) # sum(p) = 1 for resampling
if verbose:
print(f'{grid}')
return grid
def resample_grid(scored_grid, M, verbose=True):
"""
Resample from the dataframe of parameter sets based on likelihood probabilities
:param scored_grid: a dataframe containing a 'p(x)' column of probability densities (sum p = 1)
:param M: integer number of resamples to take
:param verbose: a boolean which turns on/off text output
:return: a filtered version of the scored grid containing only the resampled values, effective sample size
"""
if verbose:
print('resampling grid...')
start_idx = np.random.choice(np.arange(len(scored_grid.index)), size=M, replace=True, p=scored_grid['weight'])
start_p_sets = scored_grid.iloc[start_idx]
ess = np.sum(scored_grid['rel like']) # ESS = sum(likelihood)/max
if verbose:
print(start_p_sets)
print(f'ESS estimate: {ess}')
return (start_p_sets, ess)
def score_grid_batch(grid, verbose=True):
"""
Calculates relative log-likelihood, likelihood, probability, and effective sample size
:param grid: a dataframe where row_i is parameter set_i and col_i is parameter_i. must also contain 'logl' col
:param verbose: a boolean which turns on/off text output
:return: an updated 'grid' dataframe containing additional columns for rel log-likelihood, likelihood, and weights
"""
if verbose:
print('scoring grid...')
log_p_max = np.max(grid['logl'])
grid['rel logl'] = (grid['logl'] - log_p_max) # log(p/p_max) --> ln(p) -max(ln(p))
grid['rel like'] = np.exp(grid['rel logl']) # log(p) --> p = e**(log(p))
grid['rel like norm'] = grid['rel like']/np.sum(grid['rel like']) # sum(p) = 1 for resampling
ess = np.sum(grid['rel like']) # ESS = sum(likelihood)/max
b_w_log10 = np.log10(ess) + log_p_max
if verbose:
print(f'{grid}')
print(f'ess: {ess}')
print(f'log10(batch weight): {b_w_log10}')
print(f'log10(p_max): {b_w_log10}')
return grid, ess, b_w_log10, log_p_max
def resample_grid_batch(scored_grid, M, verbose=True):
"""
Resample from the dataframe of parameter sets based on likelihood probabilities
:param scored_grid: a dataframe containing a 'p(x)' column of probability densities (sum p = 1)
:param M: integer number of resamples to take
:param verbose: a boolean which turns on/off text output
:return: a filtered version of the scored grid containing only the resampled values, effective sample size
"""
if verbose:
print('resampling grid batch...')
start_idx = np.random.choice(np.arange(len(scored_grid.index)), size=M, replace=True, p=scored_grid['rel like norm'])
start_p_sets = scored_grid.iloc[start_idx]
if verbose:
print(start_p_sets)
return (start_p_sets)
if __name__ == '__main__':
pass