-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathkfdrc_mutect2_sub_wf.cwl
209 lines (197 loc) · 10.3 KB
/
kfdrc_mutect2_sub_wf.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
cwlVersion: v1.2
class: Workflow
id: kfdrc_mutect2_sub_wf
requirements:
- class: ScatterFeatureRequirement
- class: MultipleInputFeatureRequirement
- class: SubworkflowFeatureRequirement
inputs:
indexed_reference_fasta: {type: 'File', secondaryFiles: [.fai, ^.dict]}
reference_dict: File
bed_invtl_split: {type: 'File[]', doc: "Bed file intervals passed on from and outside pre-processing step"}
af_only_gnomad_vcf: {type: 'File', secondaryFiles: ['.tbi']}
exac_common_vcf: {type: 'File', secondaryFiles: ['.tbi']}
input_tumor_aligned: { type: File, secondaryFiles: [{pattern: ".bai", required: false},
{pattern: "^.bai", required: false}, {pattern: ".crai", required: false}, {
pattern: "^.crai", required: false}], doc: "tumor BAM or CRAM" }
input_tumor_name: string
old_tumor_name: { type: 'string?', doc: "If `SM:` sample name in the align file is different than `input_tumor_name`, you **must** provide it here"}
input_normal_aligned: { type: File, secondaryFiles: [{pattern: ".bai", required: false},
{pattern: "^.bai", required: false}, {pattern: ".crai", required: false}, {
pattern: "^.crai", required: false}], doc: "normal BAM or CRAM" }
input_normal_name: string
old_normal_name: { type: 'string?', doc: "If `SM:` sample name in the align file is different than `input_normal_name`, you **must** provide it here"}
exome_flag: {type: ['null', string], doc: "set to 'Y' for exome mode"}
select_vars_mode: {type: ['null', {type: enum, name: select_vars_mode, symbols: ["gatk", "grep"]}], doc: "Choose 'gatk' for SelectVariants tool, or 'grep' for grep expression", default: "gatk"}
tool_name: {type: 'string?', doc: "String to describe what tool was run as part of file name", default: "mutect2_somatic"}
output_basename: string
getpileup_memory: {type: 'int?'}
learnorientation_memory: {type: 'int?'}
filtermutectcalls_memory: {type: 'int?'}
vep_cache: {type: 'File', doc: "tar gzipped cache from ensembl/local converted cache"}
vep_ram: {type: 'int?', doc: "In GB, may need to increase this value depending on the size/complexity of input"}
vep_cores: {type: 'int?', doc: "Number of cores to use. May need to increase for really large inputs"}
vep_buffer_size: {type: 'int?', doc: "Increase or decrease to balance speed and memory usage"}
dbnsfp: { type: 'File?', secondaryFiles: [.tbi,^.readme.txt], doc: "VEP-formatted plugin file, index, and readme file containing dbNSFP annotations" }
dbnsfp_fields: { type: 'string?', doc: "csv string with desired fields to annotate. Use ALL to grab all"}
merged: { type: 'boolean?', doc: "Set to true if merged cache used", default: true }
cadd_indels: { type: 'File?', secondaryFiles: [.tbi], doc: "VEP-formatted plugin file and index containing CADD indel annotations" }
cadd_snvs: { type: 'File?', secondaryFiles: [.tbi], doc: "VEP-formatted plugin file and index containing CADD SNV annotations" }
run_cache_existing: { type: boolean, doc: "Run the check_existing flag for cache" }
run_cache_af: { type: boolean, doc: "Run the allele frequency flags for cache" }
# annotation vars
genomic_hotspots: { type: 'File[]?', doc: "Tab-delimited BED formatted file(s) containing hg38 genomic positions corresponding to hotspots" }
protein_snv_hotspots: { type: 'File[]?', doc: "Column-name-containing, tab-delimited file(s) containing protein names and amino acid positions corresponding to hotspots" }
protein_indel_hotspots: { type: 'File[]?', doc: "Column-name-containing, tab-delimited file(s) containing protein names and amino acid position ranges corresponding to hotspots" }
retain_info: {type: 'string?', doc: "csv string with INFO fields that you want to keep", default: "MBQ,TLOD,HotSpotAllele"}
retain_fmt: {type: 'string?', doc: "csv string with FORMAT fields that you want to keep"}
retain_ann: { type: 'string?', doc: "csv string of annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF" }
add_common_fields: {type: 'boolean?', doc: "Set to true if input is a strelka2 vcf that hasn't had common fields added", default: false}
echtvar_anno_zips: {type: 'File[]?', doc: "Annotation ZIP files for echtvar anno"}
bcftools_strip_columns: {type: 'string?', doc: "csv string of columns to strip if needed to avoid conflict, i.e INFO/AF"}
bcftools_public_filter: {type: 'string?', doc: "Will hard filter final result to create a public version", default: FILTER="PASS"|INFO/HotSpotAllele=1}
gatk_filter_name: {type: 'string[]', doc: "Array of names for each filter tag to add, recommend: [\"NORM_DP_LOW\", \"GNOMAD_AF_HIGH\"]"}
gatk_filter_expression: {type: 'string[]', doc: "Array of filter expressions to establish criteria to tag variants with. See https://gatk.broadinstitute.org/hc/en-us/articles/360036730071-VariantFiltration, recommend: \"vc.getGenotype('\" + inputs.input_normal_name + \"').getDP() <= 7\"), \"AF > 0.001\"]"}
disable_hotspot_annotation: { type: 'boolean?', doc: "Disable Hotspot Annotation and skip this task.", default: false }
maf_center: {type: 'string?', doc: "Sequencing center of variant called", default: "."}
custom_enst: { type: 'File?', doc: "Use a file with ens tx IDs for each gene to override VEP PICK" }
disable_vep_annotation: { type: 'boolean?', doc: "Disable VEP Annotation and skip this task.", default: false }
outputs:
mutect2_filtered_stats: {type: 'File', outputSource: filter_mutect2_vcf/stats_table}
mutect2_filtered_vcf: { type: 'File', outputSource: pickvalue_workaround/output }
mutect2_protected_outputs: {type: 'File[]', outputSource: annotate/annotated_protected}
mutect2_public_outputs: {type: 'File[]', outputSource: annotate/annotated_public}
steps:
mutect2:
run: ../tools/gatk_Mutect2.cwl
hints:
- class: 'sbg:AWSInstanceType'
value: c5.9xlarge
in:
input_tumor_aligned: input_tumor_aligned
input_tumor_name:
source: [old_tumor_name, input_tumor_name]
pickValue: first_non_null
input_normal_aligned: input_normal_aligned
input_normal_name:
source: [old_normal_name, input_normal_name]
pickValue: first_non_null
reference: indexed_reference_fasta
interval_list: bed_invtl_split
af_only_gnomad_vcf: af_only_gnomad_vcf
exome_flag: exome_flag
scatter: [interval_list]
out: [mutect2_vcf, f1r2_counts, mutect_stats]
mutect2_filter_support:
run: ../sub_workflows/kfdrc_mutect2_filter_support_subwf.cwl
in:
indexed_reference_fasta: indexed_reference_fasta
reference_dict: reference_dict
wgs_calling_interval_list: bed_invtl_split
input_tumor_aligned: input_tumor_aligned
input_normal_aligned: input_normal_aligned
exac_common_vcf: exac_common_vcf
output_basename: output_basename
f1r2_counts: mutect2/f1r2_counts
getpileup_memory: getpileup_memory
learnorientation_memory: learnorientation_memory
out: [contamination_table, segmentation_table, f1r2_bias]
merge_mutect2_vcf:
run: ../tools/gatk_mergevcfs.cwl
label: Merge mutect2 vcf
in:
input_vcfs: mutect2/mutect2_vcf
output_basename: output_basename
reference_dict: reference_dict
tool_name: tool_name
out: [merged_vcf]
merge_mutect2_stats:
run: ../tools/gatk_mergemutectstats.cwl
label: Merge mutect2 stats
in:
input_stats: mutect2/mutect_stats
output_basename: output_basename
out: [merged_stats]
filter_mutect2_vcf:
run: ../tools/gatk_filtermutectcalls.cwl
in:
mutect_vcf: merge_mutect2_vcf/merged_vcf
mutect_stats: merge_mutect2_stats/merged_stats
reference: indexed_reference_fasta
output_basename: output_basename
contamination_table: mutect2_filter_support/contamination_table
segmentation_table: mutect2_filter_support/segmentation_table
ob_priors: mutect2_filter_support/f1r2_bias
max_memory: filtermutectcalls_memory
out: [stats_table, filtered_vcf]
rename_vcf_samples:
run: ../tools/bcftools_reheader_samples_index.cwl
when: $(inputs.old_tumor_name != null && inputs.old_normal_name != null)
in:
input_vcf: filter_mutect2_vcf/filtered_vcf
output_filename:
valueFrom: |
$(inputs.input_vcf.basename)
new_normal_name: input_normal_name
new_tumor_name: input_tumor_name
old_normal_name: old_normal_name
old_tumor_name: old_tumor_name
tbi:
valueFrom: |
$(1 == 1)
out: [reheadered_vcf]
pickvalue_workaround:
run: ../tools/expression_pickvalue_workaround.cwl
in:
input_file:
source: [rename_vcf_samples/reheadered_vcf, filter_mutect2_vcf/filtered_vcf]
pickValue: first_non_null
out: [output]
gatk_selectvariants_mutect2:
run: ../tools/gatk_selectvariants.cwl
label: GATK Select PASS
in:
input_vcf: pickvalue_workaround/output
output_basename: output_basename
tool_name: tool_name
mode: select_vars_mode
out: [pass_vcf]
annotate:
run: ../kf-annotation-tools/workflows/kfdrc-somatic-snv-annot-workflow.cwl
in:
indexed_reference_fasta: indexed_reference_fasta
input_vcf: gatk_selectvariants_mutect2/pass_vcf
input_tumor_name: input_tumor_name
input_normal_name: input_normal_name
add_common_fields: add_common_fields
retain_info: retain_info
retain_fmt: retain_fmt
retain_ann: retain_ann
echtvar_anno_zips: echtvar_anno_zips
bcftools_strip_columns: bcftools_strip_columns
bcftools_public_filter: bcftools_public_filter
dbnsfp: dbnsfp
dbnsfp_fields: dbnsfp_fields
merged: merged
cadd_indels: cadd_indels
cadd_snvs: cadd_snvs
run_cache_af: run_cache_af
run_cache_existing: run_cache_existing
gatk_filter_name: gatk_filter_name
gatk_filter_expression: gatk_filter_expression
vep_cache: vep_cache
vep_ram: vep_ram
vep_cores: vep_cores
vep_buffer_size: vep_buffer_size
disable_hotspot_annotation: disable_hotspot_annotation
disable_vep_annotation: disable_vep_annotation
genomic_hotspots: genomic_hotspots
protein_snv_hotspots: protein_snv_hotspots
protein_indel_hotspots: protein_indel_hotspots
maf_center: maf_center
custom_enst: custom_enst
output_basename: output_basename
tool_name: tool_name
out: [annotated_protected, annotated_public]
$namespaces:
sbg: https://sevenbridges.com