-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpipeline_pt2.wdl
160 lines (138 loc) · 4.76 KB
/
pipeline_pt2.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
version 1.0
# import other WDLs
import "tasks/pseudobulk.wdl" as pseudobulk
import "tasks/qc_plots.wdl" as qc
import "tasks/filter_cells_donors.wdl" as filter
import "tasks/normalize_counts.wdl" as normalize
import "tasks/run_peer.wdl" as run_peer
import "tasks/run_tensorqtl_cis_permutations.wdl" as run_tensorqtl_cis_permutations
import "tasks/peer_selection.wdl" as peer_selection
import "tasks/run_tensorqtl_cis_nominal.wdl" as run_tensorqtl_cis_nominal
import "tasks/run_tensorqtl_susie.wdl" as run_tensorqtl_susie
# This workflow takes pseudobulked data and maps eQTLs
workflow village_qtls {
input {
String group_name # ex. ips_D0
Array[String] sample_ids # ex. ips_D0
Array[File] cell_donor_map # ex. '${sample_id}_cell_to_donor.txt'
Array[File] cell_group_map # ex. ${sample_id}_cell_to_group.txt'
Array[File] h5ad # ex. '${sample_id}_singlets_cbc_suffix.h5ad'
File gene_gtf
Array[Int] peer_range
Int n_all_peers # TODO make n_all_peers max(peer_range)
Int n_chosen_peers=5
File plink_bed
File plink_bim
File plink_fam
}
# Pseudobulk the group
call pseudobulk.pseudobulk as run_pseudobulk {
input:
group_name=group_name,
sample_ids=sample_ids,
cell_donor_map=cell_donor_map,
cell_group_map=cell_group_map,
h5ad=h5ad,
}
# Make QC plots for UMIs/cell, genes/cell, cells/donor
call qc.qc_plots as qc_plots {
input:
counts=run_pseudobulk.counts,
cell_donor_map=run_pseudobulk.cell_donor_map_group,
prefix=group_name,
}
# Filter donors, genes, cells (and downscale large cells)
call filter.filter as filter_cells_donors {
input:
counts=run_pseudobulk.counts,
cell_donor_map=run_pseudobulk.cell_donor_map_group,
gene_gtf=gene_gtf,
prefix=group_name,
}
# Normalize (TPM and Inverse Normal Transform)
# TODO combine the indexing step
call normalize.normalize as normalize_counts {
input:
counts_filtered=filter_cells_donors.counts_filtered,
prefix=group_name,
}
call normalize.index_bed as index_bed_tpm {
input:
bed=normalize_counts.bed_tpm,
}
call normalize.index_bed as index_bed_int {
input:
bed=normalize_counts.bed_int,
}
# Run PEER with the max number of factors in the range
call run_peer.all_peer_factors as all_peer_factors {
input:
expression_file=index_bed_int.bed_gz,
n_all_peers=n_all_peers,
prefix=group_name,
}
# Subset for the PEERs to test
scatter (n_peer in peer_range) {
call run_peer.subset_peers_and_combine as subset_peers_and_combine {
input:
n_peer=n_peer,
peer_covariates=all_peer_factors.peer_covariates,
prefix=group_name,
}
}
# Run tensorQTL cis permutations for each number of PEER correction
scatter(file in subset_peers_and_combine.combined_covariates) {
call run_tensorqtl_cis_permutations.tensorqtl_cis_permutations as cis_permutations {
input:
covariates=file,
plink_bed=plink_bed,
plink_bim=plink_bim,
plink_fam=plink_fam,
phenotype_bed=index_bed_int.bed_gz,
}
}
# Plot eQTL discovery curve for the PEER range (option to manually choose # PEERs to correct with)
call peer_selection.peer_selection as run_peer_selection {
input:
cis_eqtl_results=cis_permutations.cis_qtl,
covariates=subset_peers_and_combine.combined_covariates,
n_chosen_peers=n_chosen_peers,
prefix=group_name,
}
# Run tensorQTL cis nominal scan for significant cis-eQTLs
call run_tensorqtl_cis_nominal.tensorqtl_cis_nominal as cis_nominal {
input:
plink_bed=plink_bed,
plink_bim=plink_bim,
plink_fam=plink_fam,
phenotype_bed=index_bed_int.bed_gz,
covariates=run_peer_selection.chosen_peer_covariates,
prefix=group_name,
}
# Run tensorQTL SuSiE fine-mapping scan for significant cis-eQTLs
call run_tensorqtl_susie.tensorqtl_cis_susie as cis_susie {
input:
plink_bed=plink_bed,
plink_bim=plink_bim,
plink_fam=plink_fam,
phenotype_bed=index_bed_int.bed_gz,
covariates=run_peer_selection.chosen_peer_covariates,
prefix=group_name,
cis_output=run_peer_selection.chosen_peer_qtls,
}
output {
# plots
File umi_cell_png=qc_plots.umi_cell_png
File gene_cell_png=qc_plots.gene_cell_png
File cell_donor_png=qc_plots.cell_donor_png
File peer_png=run_peer_selection.peer_png
# count matrices and covariates
File counts_tpm=normalize_counts.parquet_tpm
File counts_int=normalize_counts.parquet_int
File final_covariates=run_peer_selection.chosen_peer_covariates
# qtl results
File qtl_perm=run_peer_selection.chosen_peer_qtls
Array[File] qtl_nominal=cis_nominal.chr_parquet
File qtl_finemap=cis_susie.parquet
}
}