-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfitting_lib.py
82 lines (62 loc) · 2.12 KB
/
fitting_lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import numpy as np
import pandas as pd
from itertools import combinations
from scipy.optimize import curve_fit
from sklearn.linear_model import LinearRegression
def read_matrix(file_path):
return pd.read_csv(file_path, index_col=0)
def calculate_distance(matrix, language_groups):
distances = []
for group_1, group_2 in combinations(language_groups, 2):
for language_1 in group_1:
for language_2 in group_2:
distances.append(matrix[language_1][language_2])
return np.mean(distances)
def calculate_distances(matrix, points):
return {k: calculate_distance(matrix, language_groups)
for k, language_groups in points.items()}
def func_log(d, D, b, T):
d = np.array(d)
return (np.log(1 - d / D) + b) * -T
def func_exp(d, a, b):
d = np.array(d)
return b * np.exp(a * d)
def get_formula_log(params, get_original=False):
D, b, T = params
if get_original:
return f'-t / {T} = log(1 - d / {D}) + {b}'
d0 = D * (1 - np.exp(-b))
r = np.exp(-1000 / T / 2)
return f't = 1000 log(({D} - d) / ({D} - {d0})) / 2 log({r})'
def get_formula_exp(params):
a, b = params
return f't = {b} * exp({a} * d)'
def fit_log(ds, ts):
ds = np.array(ds)
ts = np.array(ts)
params, covariance = curve_fit(
func_log, ds, ts,
bounds=((ds.max() + 0.01, -np.inf, -np.inf), np.inf),
)
return params.tolist()
def fit_exp(ds, ts):
ds = np.array(ds)
ts = np.array(ts)
model = LinearRegression().fit(ds.reshape((-1, 1)), np.log(ts))
a = model.coef_[0]
b = np.exp(model.intercept_)
return a, b
def calculate_errors(ts, ts_predicted):
ts = np.array(ts)
ts_predicted = np.array(ts_predicted)
# Root sum squared
rss = np.sum(np.square(ts_predicted - ts))
# Root mean square error
rmse = np.sqrt(rss / len(ts))
# Mean absolute error
mae = np.sum(np.abs(ts_predicted - ts)) / len(ts)
# Normalized root mean square error
nrmse = rmse / (ts.max() - ts.min())
# Mean absolute percentage error
mape = np.sum(np.abs(ts_predicted / ts - 1)) / len(ts)
return rmse, mae, nrmse, mape