forked from YannDubs/disentangling-vae
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhyperparam.ini
165 lines (142 loc) · 3.22 KB
/
hyperparam.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
[Custom]
# General options
log_level = "info"
no_progress_bar = False
no_cuda = False
seed = 1234
# Training options
epochs = 100
batch_size = 64
lr = 5e-4
checkpoint_every = 30
dataset = 'mnist'
experiment = 'custom'
# Model Options
model = 'Burgess'
loss = "betaB"
latent_dim = 10
rec_dist = "bernoulli"
# reg_anneal doesn't seem to make much difference but some people say it can help
reg_anneal = 10000
# betaH Options
betaH_B = 4
# betaB Options
betaB_initC = 0
betaB_finC = 25
# use 100 which is used by most implementation online
betaB_G = 100
# factor Options
factor_G = 6
lr_disc = 5e-5
# btcvae Options
btcvae_A = 1
btcvae_G = 1
btcvae_B = 6
# Evaluations Options
is_metrics = False
no_test = False
is_eval_only = False
eval_batchsize = 1000
# ### DATASET COMMON ###
# same number of epochs for comparaisons
[Common_dsprites]
dataset = 'dsprites'
checkpoint_every = 10
epochs = 30
[Common_chairs]
dataset = 'chairs'
checkpoint_every = 100
epochs = 300
[Common_celeba]
dataset = 'celeba'
checkpoint_every = 100
epochs = 200
[Common_mnist]
dataset = 'mnist'
checkpoint_every = 100
epochs = 400
[Common_fashion]
dataset = 'fashion'
checkpoint_every = 100
epochs = 400
# ### LOSS COMMON ###
[Common_VAE]
loss = "VAE"
lr = 5e-4
[Common_betaH]
loss = "betaH"
lr = 5e-4
[Common_betaB]
loss = "betaB"
lr = 1e-3
reg_anneal = 100000
[Common_factor]
loss = "factor"
lr = 1e-4
[Common_btcvae]
loss = "btcvae"
lr = 5e-4
# ### EXPERIMENT SPECIFIC ###
# additional hyperparameter changes besides the common ones
# BETA H
[betaH_dsprites]
# beta as in paper
betaH_B = 4
[betaH_celeba]
# beta value as in from https://github.com/1Konny/Beta-VAE
betaH_B = 10
[betaH_chairs]
# beta value as in from https://github.com/1Konny/Beta-VAE
betaH_B = 4
# BETA B
[betaB_dsprites]
# capacity as in paper
betaB_finC = 25
[betaB_celeba]
# capacity as in paper
betaB_finC = 50
[betaB_chairs]
betaB_finC = 25
# FACTOR
[factor_chairs]
factor_G = 3.2
lr_disc = 1e-5
# beta value as in from https://github.com/1Konny/FactorVAE/blob/master/utils.py
[factor_dsprites]
factor_G = 6.4
lr_disc = 1e-4
[factor_celeba]
factor_G = 6.4
lr_disc = 1e-5
# BTCVAE
# use all same values as factor
[btcvae_chairs]
btcvae_B = ${factor_chairs:factor_G}
[btcvae_dsprites]
btcvae_B = ${factor_dsprites:factor_G}
[btcvae_celeba]
btcvae_B = ${factor_celeba:factor_G}
# Other
# those don't use the common section by default (need to be <loss>_<data> to use)!
[best_celeba]
btcvae_A = -10
btcvae_B = 20
dataset = 'celeba'
loss = "btcvae"
epochs = ${Common_celeba:epochs}
checkpoint_every = ${Common_celeba:checkpoint_every}
lr = ${Common_btcvae:lr}
rec_dist = "laplace"
[best_dsprites]
btcvae_A = -5
btcvae_B = 10
dataset = 'dsprites'
loss = "btcvae"
epochs = ${Common_dsprites:epochs}
checkpoint_every = ${Common_dsprites:checkpoint_every}
lr = ${Common_btcvae:lr}
[debug]
epochs = 1
log_level = "debug"
no_test = True
reg_anneal = 0