-
Notifications
You must be signed in to change notification settings - Fork 150
/
Copy pathMakefile
189 lines (166 loc) · 4.82 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#
# Makefile for diet and weight-loss monitoring
# vim: ts=8 sw=8 noexpandtab nosmarttab
#
# Goal:
# - Find out which lifestyle factors affect your weight the most
# - Find out which foods make you gain or lose weight
# - Find confidence-level (ranges) for each food/lifestyle item
#
# Requires you to:
# - Weight yourself once a day
# - Record what you do/eat daily
#
# How to run this code:
# - Install vowpal-wabbit (vw)
# - Clone this repo: https://github.com/arielf/weight-loss
# - Place your data in <username>.csv
# - Type 'make'
#
# Additional 'make' targets (make <target>):
#
# c/charts
# Creates optional charts
#
# sc
# Creates the per-item scores chart only
#
# m/model
# Creates a model file from the daily train-file
#
# t/train
# Creates the daily-delta (weight change target) train file
#
# i/items
# Creates 'by single-item' train file. This is a "pretend"
# data-file as if we only had one-item/day to see what's
# its "pretend-isolated" effect assuming everything else is equal.
#
# conf/confidence/r/range
# Generates a sorted *.range file, in which each item appears
# together with its 'confidence range' [min max]. This can
# help you figure out how certain we are for each variable.
# e.g. a line like this:
# -0.024568 carrot -0.071207 0.026108
# means based on the given data, the machine-learning process
# estimates carrot makes you lose a bit of weight
# (average is a negative: -0.024568) but the confidence
# daily range is from -0.071207 (loss) to 0.026108 (gain)
# so there's a low confidence in this result.
#
# conv
# Generates a convergence chart of the learning process
#
# clean
# Cleans-up generated files
#
PATH := $(PATH)::.
NAME = $(shell ./username)
# -- scripts/programs
VW = vw
TOVW := lifestyle-csv2vw
VARINFO := vw-varinfo2
SORTABS := sort-by-abs
# Adjustable parameters: to change call 'make' with NAME=Value:
# --bootsrap rounds:
BS = 7
# --passes:
P = 4
# -- learning rate
L = 0.05
# L2 regularization
L2 = 1.85201e-08
# Aggregate consecutive daily-data up to this number of days
NDAYS = 3
#
# vowpal-wabbit training args
#
VW_ARGS = \
-k \
--loss_function quantile \
--progress 1 \
--bootstrap $(BS) \
-l $(L) \
--l2 $(L2) \
-c --passes $(P)
# -- Commented out random shuffling methods
# now sorting examples by abs(delta).
# Overfitting is countered (though not completely avoided) by:
# * Aggregating on multiple partly overlapping N-day periods
# * Bootstrapping each example (multiple times) via --bootstrap
#
# Mutliple orders via shuffling and averaging results should be
# considered as a future option.
#
# SHUFFLE := shuf
# SHUFFLE := unsort --seed $(SEED)
#
# -- data files
MASTERDATA = $(NAME).csv
TRAINFILE = $(NAME).train
ITEMFILE = $(NAME).items
MODELFILE = $(NAME).model
RANGEFILE = $(NAME).range
DWCSV := weight.2015.csv
DWPNG := $(NAME).weight.png
SCPNG := $(NAME).scores.png
.PRECIOUS: Makefile $(MASTERDATA) $(TOVW)
#
# -- rules
#
all:: score
s score scores.txt: $(TRAINFILE)
$(VARINFO) $(VW_ARGS) -d $(TRAINFILE) | tee scores.txt
c charts: weight-chart score-chart
# -- Weight by date chart
wc weight-chart $(DWPNG): date-weight.r $(DWCSV)
Rscript --vanilla date-weight.r $(DWCSV) $(DWPNG)
@echo "=== done: date-weight chart saved in: '$(DWPNG)'"
# -- Feature importance score chart
sc score-chart $(SCPNG): scores.txt score-chart.r
@perl -ane '$$F[5] =~ tr/%//d ;print "$$F[0],$$F[5]\n"' scores.txt > scores.csv
@Rscript --vanilla score-chart.r scores.csv $(SCPNG)
@echo "=== done: weight-loss factors chart saved in: '$(SCPNG)'"
# -- model
m model $(MODELFILE): Makefile $(TRAINFILE)
$(VW) $(VW_ARGS) -f $(MODELFILE) -d $(TRAINFILE)
# -- train-set generation
t train $(TRAINFILE): Makefile $(MASTERDATA) $(TOVW)
$(TOVW) $(NDAYS) $(MASTERDATA) | sort-by-abs > $(TRAINFILE)
# -- generate 'by single-item' train file
i items $(ITEMFILE): $(TRAINFILE)
train-to-items $(TRAINFILE) > $(ITEMFILE)
# -- Find daily 'range' for 'per-item'
# This finds a ~90% confidence interval (leverages vw --bootstrap)
conf confidence r range $(RANGEFILE): $(MODELFILE) $(ITEMFILE)
$(VW) --quiet -t -i $(MODELFILE) \
-d $(ITEMFILE) -p /dev/stdout | sort -g > $(RANGEFILE)
# -- convergence chart
conv: $(TRAINFILE)
$(VW) $(VW_ARGS) -d $(TRAINFILE) 2>&1 | vw-convergence
clean:
/bin/rm -f $(MODELFILE) $(ITEMFILE) $(RANGEFILE) *.cache* *.tmp*
# -- more friendly error if original data doesn't exist
$(MASTERDATA):
@echo "=== Sorry: you must provide your data in '$(MASTERDATA)'"
@exit 1
# commit and push
cp:
git commit . && git push
# sync gh-pages with master & push
gh:
git checkout gh-pages && \
git merge master && \
git push && \
git checkout master
#
# Trick for introspection of this Makefile variables from the outside
# (Needs VARNAME=<some_makefile_varname>):
#
# Examples:
# $ make VARNAME=MASTERDATA echovar
#
# $ make VARNAME=TRAINFILE ev
#
ev echovar:
@echo $($(VARNAME))