Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deeparg changes #1

Open
wants to merge 1 commit into
base: deeparg
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions tools/deeparg/deeparg_predict.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,39 +30,39 @@
##
]]></command>
<inputs>
<param name="hide_db_build" type="hidden" value="" />
<param name="hide_db_build" type="hidden" value=""/>
<param name="input" type="data" format="fasta" label="Input file"/>
<param name="deeparg_db" type="select" label="DeepARG database">
<options from_data_table="deeparg_database_versioned">
<validator message="No deeparg database is available" type="no_options"/>
</options>
</param>
<param argument="--model" type="select" label="Select model to use" >
<param argument="--model" type="select" label="Select model to use">
<option value="SS" selected="true">SS (short sequences for reads)</option>
<option value="LS">LS (long sequences for genes)</option>
</param>
<param argument="--type" type="select" label="Molecular data type" >
<param argument="--type" type="select" label="Molecular data type">
<option value="nucl" selected="true">Nucleotid (default)</option>
<option value="prot">Protein</option>
</param>
<param argument="--min-prob" type="float" min="0" max="1" value="0.8" label="Minimum probability cutoff [Default: 0.8]" />
<param argument="--arg-alignment-identity" type="integer" min="0" value="50" label="Identity cutoff for sequence alignment [Default: 50]" />
<param argument="--arg-alignment-evalue" type="float" min="0" value="1e-10" label="Evalue cutoff [Default: 1e-10]" />
<param argument="--arg-alignment-overlap" type="float" min="0" max="1" value="0.8" label="Alignment read overlap [Default: 0.8]" />
<param argument="--arg-num-alignments-per-entry" type="integer" min="0" value="1000" label="Diamond, minimum number of alignments per entry [Default: 1000]" />
<param argument="--min-prob" type="float" min="0" max="1" value="0.8" label="Minimum probability cutoff [Default: 0.8]"/>
<param argument="--arg-alignment-identity" type="integer" min="0" value="50" label="Identity cutoff for sequence alignment [Default: 50]"/>
<param argument="--arg-alignment-evalue" type="float" min="0" value="1e-10" label="Evalue cutoff [Default: 1e-10]"/>
<param argument="--arg-alignment-overlap" type="float" min="0" max="1" value="0.8" label="Alignment read overlap [Default: 0.8]"/>
<param argument="--arg-num-alignments-per-entry" type="integer" min="0" value="1000" label="Diamond, minimum number of alignments per entry [Default: 1000]"/>
<section name="output_files" title="Selection of the output files">
<param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection">
<option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
<option value="file_potential_ARG_tsv" selected="true">ARG detected with prob below --prob in TSV</option>
<option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
</param>
</section>
<param name="output_selection" type="select" label="Output files selection" display="checkboxes" multiple="true">
<option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
<option value="file_potential_ARG_tsv" selected="true">ARG detected with prob below --prob in TSV</option>
<option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
</param>
</section>
</inputs>
<outputs>
<data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)" >
<data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)">
<filter>output_files['output_selection'] and "file_ARG_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)" >
<data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)">
<filter>output_files['output_selection'] and "file_potential_ARG_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_all_hits" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.align.daa.tsv" label="${tool.name} on ${on_string} : all hits detected">
Expand All @@ -72,30 +72,30 @@
<tests>
<!-- Test 1 -->
<test expect_num_outputs="3">
<param name="hide_db_build" value="true" />
<param name="hide_db_build" value="true"/>
<param name="input" value="ORFs.fa" ftype="fasta"/>
<param name="deeparg_db" value="deeparg_1.0.4-19122024" />
<param name="deeparg_db" value="deeparg_1.0.4-19122024"/>
<param name="model" value="SS"/>
<param name="type" value="nucl"/>
<section name="output_files">
<param name="output_selection" value="file_ARG_tsv,file_potential_ARG_tsv,file_all_hits_tsv"/>
</section>
<output name="output_mapping_ARG" ftype="tabular">
<assert_contents>
<has_text text="YP_003283625.1|FEATURES|tet(K)|tetracycline|tet(K)" />
<has_text text="RPOB2" />
<has_text text="YP_003283625.1|FEATURES|tet(K)|tetracycline|tet(K)"/>
<has_text text="RPOB2"/>
</assert_contents>
</output>
<output name="output_mapping_potential_ARG" ftype="tabular">
<assert_contents>
<has_text text="gi:545254650:ref:WP_021551023.1:|FEATURES|mdtB|multidrug|mdtB" />
<has_text text="MUXB" />
<has_text text="gi:545254650:ref:WP_021551023.1:|FEATURES|mdtB|multidrug|mdtB"/>
<has_text text="MUXB"/>
</assert_contents>
</output>
<output name="output_all_hits" ftype="tabular">
<assert_contents>
<has_size value="226000" delta="10000" />
<has_text text="ADV91011.1|FEATURES|RbpA|rifamycin|RbpA" />
<has_size value="226000" delta="10000"/>
<has_text text="ADV91011.1|FEATURES|RbpA|rifamycin|RbpA"/>
</assert_contents>
</output>
</test>
Expand Down
80 changes: 43 additions & 37 deletions tools/deeparg/deeparg_short_reads.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
<requirement type="package" version="2.27.1">vsearch</requirement>
<requirement type="package" version="2.3.5.1">bowtie2</requirement>
<requirement type="package" version="2.29.2">bedtools</requirement>
<requirement type="package">samtools</requirement>
<requirement type="package" version="1.21">samtools</requirement>
</expand>
<command detect_errors="exit_code"><![CDATA[
##Used only for test
## Used only for test
#if str($hide_db_build) == 'true':
deeparg download_data -o deeparg_1.0.4 &&
#end if
Expand All @@ -32,51 +32,57 @@
--deeparg_evalue $deeparg_evalue
--gene_coverage $gene_coverage
--bowtie_16s_identity $bowtie_16s_identity
##Used only for test
## Used only for test
#if str($hide_db_build) == 'true':
&& rm -r deeparg_1.0.4
#end if
##
]]></command>
<inputs>
<param name="hide_db_build" type="hidden" value="" />
<param name="hide_db_build" type="hidden" value=""/>
<param argument="--forward_pe_file" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Forward mate from paired end library"/>
<param argument="--reverse_pe_file" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Reverse mate from paired end library"/>
<param name="deeparg_db" type="select" label="DeepARG database">
<options from_data_table="deeparg_database_versioned">
<validator message="No deeparg database is available" type="no_options"/>
</options>
</param>
<param argument="--deeparg_identity" type="integer" min="0" value="80" label="Minimum identity for ARG alignments [default 80]" />
<param argument="--deeparg_probability" type="float" min="0" max="1" value="0.8" label="Minimum probability for considering a reads as ARG-like [default 0.8]" />
<param argument="--deeparg_evalue" type="float" min="0" value="1e-10" label="Minimum e-value for ARG alignments [default 1e-10]" />
<param argument="--gene_coverage" type="integer" min="0" max="100" value="1" label="Minimum coverage required for considering a full gene in percentage. This parameter looks at the full gene and all hits that align to the gene. If the overlap of all hits is below the threshold the gene is discarded. Use with caution [default 1]" />
<param argument="--bowtie_16s_identity" type="float" min="0" max="1" value="0.8" label="minimum identity a read as a 16s rRNA gene [default 0.8]" />
<param argument="--deeparg_identity" type="integer" min="0" value="80" label="Minimum identity for ARG alignments" help="default value is 80"/>
<param argument="--deeparg_probability" type="float" min="0" max="1" value="0.8"
label="Minimum probability for considering a reads as ARG-like"
help="default value is 0.8"/>
<param argument="--deeparg_evalue" type="float" min="0" value="1e-10"
label="Minimum e-value for ARG alignments"
help="default value is 1e-10"/>
<param argument="--gene_coverage" type="integer" min="0" max="100" value="1" label="Minimum coverage required for considering a full gene in percentage"
help="This parameter looks at the full gene and all hits that align to the gene. If the overlap of all hits is below the threshold the gene is discarded. Use with caution, default value is 1"/>
<param argument="--bowtie_16s_identity" type="float" min="0" max="1" value="0.8" label="minimum identity a read as a 16s rRNA gene"
help="default value is 0.8"/>
<section name="output_files" title="Selection of the output files">
<param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection">
<option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
<option value="file_merged_ARG_tsv" selected="false">ARG sorted and merged in TSV file</option>
<option value="file_ARG_subtype_tsv" selected="true">ARGs merged by subtype in TSV file</option>
<option value="file_ARG_type_tsv" selected="true">ARGs merged by type in TSV file</option>
<option value="file_potential_ARG_tsv" selected="false">ARG detected with prob below --prob in TSV</option>
<option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
</param>
</section>
<param name="output_selection" type="select" label="Output files selection" display="checkboxes" multiple="true">
<option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
<option value="file_merged_ARG_tsv" selected="false">ARG sorted and merged in TSV file</option>
<option value="file_ARG_subtype_tsv" selected="true">ARGs merged by subtype in TSV file</option>
<option value="file_ARG_type_tsv" selected="true">ARGs merged by type in TSV file</option>
<option value="file_potential_ARG_tsv" selected="false">ARG detected with prob below --prob in TSV</option>
<option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
</param>
</section>
</inputs>
<outputs>
<data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)" >
<data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)">
<filter>output_files['output_selection'] and "file_ARG_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_merged_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged" label="${tool.name} on ${on_string} : ARG merged and sorted" >
<data name="output_merged_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged" label="${tool.name} on ${on_string} : ARG merged and sorted">
<filter>output_files['output_selection'] and "file_merged_ARG_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_subtype_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged.quant.subtype" label="${tool.name} on ${on_string} : ARG merged by subtype" >
<data name="output_subtype_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged.quant.subtype" label="${tool.name} on ${on_string} : ARG merged by subtype">
<filter>output_files['output_selection'] and "file_ARG_subtype_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_type_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged.quant.type" label="${tool.name} on ${on_string} : ARG merged by type" >
<data name="output_type_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged.quant.type" label="${tool.name} on ${on_string} : ARG merged by type">
<filter>output_files['output_selection'] and "file_ARG_type_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)" >
<data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)">
<filter>output_files['output_selection'] and "file_potential_ARG_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_all_hits" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.align.daa.tsv" label="${tool.name} on ${on_string} : all hits detected">
Expand All @@ -86,47 +92,47 @@
<tests>
<!-- Test 1 -->
<test expect_num_outputs="6">
<param name="hide_db_build" value="true" />
<param name="hide_db_build" value="true"/>
<param name="forward_pe_file" value="light_F.fq.gz" ftype="fastqsanger.gz"/>
<param name="reverse_pe_file" value="light_R.fq.gz" ftype="fastqsanger.gz"/>
<param name="deeparg_db" value="deeparg_1.0.4-19122024" />
<param name="deeparg_db" value="deeparg_1.0.4-19122024"/>
<section name="output_files">
<param name="output_selection" value="file_ARG_tsv,file_merged_ARG_tsv,file_ARG_subtype_tsv,file_ARG_type_tsv,file_potential_ARG_tsv,file_all_hits_tsv"/>
</section>
<output name="output_mapping_ARG" ftype="tabular">
<assert_contents>
<has_text text="BAE06008.1|FEATURES|mexQ|multidrug|mexQ" />
<has_text text="CATA" />
<has_text text="BAE06008.1|FEATURES|mexQ|multidrug|mexQ"/>
<has_text text="CATA"/>
</assert_contents>
</output>
<output name="output_merged_ARG" ftype="tabular">
<assert_contents>
<has_line line="ACRB&#009;568&#009;600&#009;1&#009;multidrug" />
<has_text text="VANR&#009;137&#009;169&#009;1&#009;glycopeptide" />
<has_line line="ACRB&#9;568&#9;600&#9;1&#9;multidrug"/>
<has_text text="VANR&#9;137&#9;169&#9;1&#9;glycopeptide"/>
</assert_contents>
</output>
<output name="output_subtype_ARG" ftype="tabular">
<assert_contents>
<has_text text="MDTP&#009;1&#009;0.146721311475" />
<has_text text="ACRD&#009;1&#009;0.0690453230473" />
<has_text text="MDTP&#9;1&#9;0.146721311475"/>
<has_text text="ACRD&#9;1&#9;0.0690453230473"/>
</assert_contents>
</output>
<output name="output_type_ARG" ftype="tabular">
<assert_contents>
<has_text text="aminoglycoside&#009;2&#009;0.550769230769" />
<has_text text="glycopeptide&#009;1&#009;0.312663755459" />
<has_text text="aminoglycoside&#9;2&#9;0.550769230769"/>
<has_text text="glycopeptide&#9;1&#9;0.312663755459"/>
</assert_contents>
</output>
<output name="output_mapping_potential_ARG" ftype="tabular">
<assert_contents>
<has_text text="gi:447120629:ref:WP_001197885.1:|FEATURES|mdtB|multidrug|mdtB" />
<has_text text="PORIN_OMPC" />
<has_text text="gi:447120629:ref:WP_001197885.1:|FEATURES|mdtB|multidrug|mdtB"/>
<has_text text="PORIN_OMPC"/>
</assert_contents>
</output>
<output name="output_all_hits" ftype="tabular">
<assert_contents>
<has_text text="YP_001966224|FEATURES|catA|phenicol|catA" />
<has_text text="CP001918.1.gene2931.p01|FEATURES|major_facilitator_superfamily_transporter|multidrug|major_facilitator_superfamily_transporter" />
<has_text text="YP_001966224|FEATURES|catA|phenicol|catA"/>
<has_text text="CP001918.1.gene2931.p01|FEATURES|major_facilitator_superfamily_transporter|multidrug|major_facilitator_superfamily_transporter"/>
</assert_contents>
</output>
</test>
Expand Down
2 changes: 1 addition & 1 deletion tools/deeparg/macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@
</citations>
</xml>
<token name="HELP"><![CDATA[
DeepARG is a tool to predict antibiotic resistance genes (ARGs) in metagenomic samples.
DeepARG is a tool to predict antibiotic resistance genes (ARGs) in metagenomic samples.
]]></token>
</xml>