-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdvc.yaml
99 lines (98 loc) · 4.49 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
vars:
- ./config/endpoints.yaml
stages:
preprocess_data:
cmd: python scripts/01_preprocess_data.py
deps:
- scripts/01_preprocess_data.py
- data/imported_data/toxcast_data.csv.gz
outs:
- data/intermediate_data/ml_ready_data.tsv
- logs/01_preprocess_data.log
assign_groups:
foreach: ${endpoint} # Specified in data_config.yaml
do:
cmd: python scripts/02_assign_groups.py --endpoint ${item} --n_groups 5
deps:
- scripts/02_assign_groups.py
- data/intermediate_data/ml_ready_data.tsv
outs:
- data/intermediate_data/presplit_data/presplit_data_${item}.tsv
ml_experiments:
matrix:
endpoint: ${endpoint}
counted_fp: [not_counted, counted]
cmd: python ./scripts/03_ml_experiments.py --endpoint ${item.endpoint} --counted_fp ${item.counted_fp}
deps:
- data/intermediate_data/presplit_data/presplit_data_${item.endpoint}.tsv
- ./scripts/03_ml_experiments.py
outs:
- ./data/intermediate_data/model_predictions/morgan_fingerprint_predictions_${item.endpoint}_${item.counted_fp}.tsv.gz
neural_fingerprint_predictions:
matrix:
endpoint: ${endpoint}
nn_calibration: ["isotonic", "sigmoid"]
cmd: python ./scripts/04_neural_fingerprint_predictions.py --endpoint ${item.endpoint} --nn_calibration ${item.nn_calibration}
deps:
- data/intermediate_data/presplit_data/presplit_data_${item.endpoint}.tsv
- ./scripts/04_neural_fingerprint_predictions.py
outs:
- ./data/intermediate_data/model_predictions/neural_fingerprint_predictions_${item.nn_calibration}_${item.endpoint}.tsv.gz
create_comparison_plots:
matrix:
endpoint: ${endpoint}
comparison: [ morgan_vs_neural, morgan_vs_counted, counted_vs_neural]
cmd: python ./scripts/05_create_plots.py --endpoint ${item.endpoint} --comparison ${item.comparison}
deps:
- data/intermediate_data/model_predictions/morgan_fingerprint_predictions_${item.endpoint}_not_counted.tsv.gz
- data/intermediate_data/model_predictions/morgan_fingerprint_predictions_${item.endpoint}_counted.tsv.gz
- data/intermediate_data/model_predictions/neural_fingerprint_predictions_isotonic_${item.endpoint}.tsv.gz
- ./scripts/05_create_plots.py
- ./scripts/plot_utils.py
outs:
- ./data/figures/${item.endpoint}/calibration_curves_${item.comparison}.pdf
- ./data/figures/${item.endpoint}/performance_metrics_${item.comparison}.pdf
create_report_plots:
foreach: ${endpoint} # Specified in data_config.yaml
do:
cmd: python ./scripts/05_create_plots.py --endpoint ${item} --comparison other
deps:
- data/intermediate_data/model_predictions/morgan_fingerprint_predictions_${item}_not_counted.tsv.gz
- data/intermediate_data/model_predictions/neural_fingerprint_predictions_isotonic_${item}.tsv.gz
- ./scripts/05_create_plots.py
- ./scripts/plot_utils.py
outs:
- ./data/figures/${item}/proba_distribution_rf.pdf
- ./data/figures/${item}/proba_distribution_chemprop_isotonic.pdf
- ./data/figures/${item}/proba_distribution_chemprop_sigmoid.pdf
- ./data/figures/${item}/data_report.pdf
create_final_plots:
matrix:
comparison: [morgan_vs_neural, morgan_vs_counted, counted_vs_neural]
cmd: python scripts/06_create_final_figures.py --comparison ${item.comparison}
deps:
- ./data/intermediate_data/model_predictions
- ./scripts/06_create_final_figures.py
- ./scripts/plot_utils.py
outs:
- ./data/figures/final_figures/performance_metrics_all_${item.comparison}.pdf
- ./data/figures/final_figures/precision_recall_${item.comparison}.pdf
- ./data/figures/final_figures/significance_plot_${item.comparison}.pdf
- ./data/figures/final_figures/scatter_metrics_${item.comparison}.pdf
create_final_plots_other:
cmd: python scripts/06_create_final_figures.py --comparison other
deps:
- ./data/intermediate_data/model_predictions
- ./scripts/06_create_final_figures.py
- ./scripts/plot_utils.py
outs:
- ./data/figures/final_figures/balanced_accuracy_scatter_counted_binary_fp.pdf
- ./data/figures/final_figures/brier_score_scatter_counted_binary_fp.pdf
- ./data/figures/final_figures/chemprop_calibration.pdf
create_final_tables:
cmd: python scripts/07_create_final_tables.py
deps:
- ./data/intermediate_data/model_predictions
- ./scripts/07_create_final_tables.py
outs:
- ./logs/07_create_final_tables.log