hugolefeuvre · bgruening · Jan 9, 2025
diff --git a/tools/deeparg/deeparg_predict.xml b/tools/deeparg/deeparg_predict.xml
@@ -30,39 +30,39 @@
 ##
 ]]></command>
     <inputs>
-        <param name="hide_db_build" type="hidden" value="" />
+        <param name="hide_db_build" type="hidden" value=""/>
         <param name="input" type="data" format="fasta" label="Input file"/>
         <param name="deeparg_db" type="select" label="DeepARG database">
             <options from_data_table="deeparg_database_versioned">
                 <validator message="No deeparg database is available" type="no_options"/>
             </options>
         </param>
-        <param argument="--model" type="select" label="Select model to use" >
+        <param argument="--model" type="select" label="Select model to use">
             <option value="SS" selected="true">SS (short sequences for reads)</option>
             <option value="LS">LS (long sequences for genes)</option>
         </param>
-        <param argument="--type" type="select" label="Molecular data type" >
+        <param argument="--type" type="select" label="Molecular data type">
             <option value="nucl" selected="true">Nucleotid (default)</option>
             <option value="prot">Protein</option>
         </param>
-        <param argument="--min-prob" type="float" min="0" max="1" value="0.8" label="Minimum probability cutoff [Default: 0.8]" />
-        <param argument="--arg-alignment-identity" type="integer" min="0" value="50" label="Identity cutoff for sequence alignment [Default: 50]" />
-        <param argument="--arg-alignment-evalue" type="float" min="0" value="1e-10" label="Evalue cutoff [Default: 1e-10]" />
-        <param argument="--arg-alignment-overlap" type="float" min="0" max="1" value="0.8" label="Alignment read overlap [Default: 0.8]" />
-        <param argument="--arg-num-alignments-per-entry" type="integer" min="0" value="1000" label="Diamond, minimum number of alignments per entry [Default: 1000]" />
+        <param argument="--min-prob" type="float" min="0" max="1" value="0.8" label="Minimum probability cutoff [Default: 0.8]"/>
+        <param argument="--arg-alignment-identity" type="integer" min="0" value="50" label="Identity cutoff for sequence alignment [Default: 50]"/>
+        <param argument="--arg-alignment-evalue" type="float" min="0" value="1e-10" label="Evalue cutoff [Default: 1e-10]"/>
+        <param argument="--arg-alignment-overlap" type="float" min="0" max="1" value="0.8" label="Alignment read overlap [Default: 0.8]"/>
+        <param argument="--arg-num-alignments-per-entry" type="integer" min="0" value="1000" label="Diamond, minimum number of alignments per entry [Default: 1000]"/>
         <section name="output_files" title="Selection of the output files">
-          <param name="output_selection" type="select" display="checkboxes" multiple="true"  label="Output files selection">
-              <option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
-              <option value="file_potential_ARG_tsv" selected="true">ARG detected with prob below --prob in TSV</option>
-              <option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
-          </param>
-        </section> 
+            <param name="output_selection" type="select" label="Output files selection" display="checkboxes" multiple="true">
+                <option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
+                <option value="file_potential_ARG_tsv" selected="true">ARG detected with prob below --prob in TSV</option>
+                <option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
+            </param>
+        </section>
     </inputs>
     <outputs>
-        <data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)" >
+        <data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)">
             <filter>output_files['output_selection'] and "file_ARG_tsv" in output_files['output_selection']</filter>
         </data>
-        <data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)" >
+        <data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)">
             <filter>output_files['output_selection'] and "file_potential_ARG_tsv" in output_files['output_selection']</filter>
         </data>
         <data name="output_all_hits" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.align.daa.tsv" label="${tool.name} on ${on_string} : all hits detected">
@@ -72,30 +72,30 @@
     <tests>
         <!-- Test 1 -->
         <test expect_num_outputs="3">
-            <param name="hide_db_build" value="true" />
+            <param name="hide_db_build" value="true"/>
             <param name="input" value="ORFs.fa" ftype="fasta"/>
-            <param name="deeparg_db" value="deeparg_1.0.4-19122024" />
+            <param name="deeparg_db" value="deeparg_1.0.4-19122024"/>
             <param name="model" value="SS"/>
             <param name="type" value="nucl"/>
             <section name="output_files">
                 <param name="output_selection" value="file_ARG_tsv,file_potential_ARG_tsv,file_all_hits_tsv"/>
             </section>
             <output name="output_mapping_ARG" ftype="tabular">
                 <assert_contents>
-                    <has_text text="YP_003283625.1|FEATURES|tet(K)|tetracycline|tet(K)" />
-                    <has_text text="RPOB2" />
+                    <has_text text="YP_003283625.1|FEATURES|tet(K)|tetracycline|tet(K)"/>
+                    <has_text text="RPOB2"/>
                 </assert_contents>
             </output>
             <output name="output_mapping_potential_ARG" ftype="tabular">
                 <assert_contents>
-                    <has_text text="gi:545254650:ref:WP_021551023.1:|FEATURES|mdtB|multidrug|mdtB" />
-                    <has_text text="MUXB" />
+                    <has_text text="gi:545254650:ref:WP_021551023.1:|FEATURES|mdtB|multidrug|mdtB"/>
+                    <has_text text="MUXB"/>
                 </assert_contents>
             </output>
             <output name="output_all_hits" ftype="tabular">
                 <assert_contents>
-                    <has_size value="226000" delta="10000" />
-                    <has_text text="ADV91011.1|FEATURES|RbpA|rifamycin|RbpA" />
+                    <has_size value="226000" delta="10000"/>
+                    <has_text text="ADV91011.1|FEATURES|RbpA|rifamycin|RbpA"/>
                 </assert_contents>
             </output>
         </test>

diff --git a/tools/deeparg/deeparg_short_reads.xml b/tools/deeparg/deeparg_short_reads.xml
@@ -9,10 +9,10 @@
         <requirement type="package" version="2.27.1">vsearch</requirement>
         <requirement type="package" version="2.3.5.1">bowtie2</requirement>
         <requirement type="package" version="2.29.2">bedtools</requirement>
-        <requirement type="package">samtools</requirement>
+        <requirement type="package" version="1.21">samtools</requirement>
     </expand>
     <command detect_errors="exit_code"><![CDATA[
-##Used only for test 
+## Used only for test 
 #if str($hide_db_build) == 'true':
     deeparg download_data -o deeparg_1.0.4 &&
 #end if
@@ -32,51 +32,57 @@
         --deeparg_evalue $deeparg_evalue
         --gene_coverage $gene_coverage
         --bowtie_16s_identity $bowtie_16s_identity 
-##Used only for test 
+## Used only for test 
 #if str($hide_db_build) == 'true':
     && rm -r deeparg_1.0.4
 #end if
 ##
 ]]></command>
     <inputs>
-        <param name="hide_db_build" type="hidden" value="" />
+        <param name="hide_db_build" type="hidden" value=""/>
         <param argument="--forward_pe_file" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Forward mate from paired end library"/>
         <param argument="--reverse_pe_file" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Reverse mate from paired end library"/>
         <param name="deeparg_db" type="select" label="DeepARG database">
             <options from_data_table="deeparg_database_versioned">
                 <validator message="No deeparg database is available" type="no_options"/>
             </options>
         </param>
-        <param argument="--deeparg_identity" type="integer" min="0" value="80" label="Minimum identity for ARG alignments [default 80]" />
-        <param argument="--deeparg_probability" type="float" min="0" max="1" value="0.8" label="Minimum probability for considering a reads as ARG-like [default 0.8]" />
-        <param argument="--deeparg_evalue" type="float" min="0" value="1e-10" label="Minimum e-value for ARG alignments [default 1e-10]" />
-        <param argument="--gene_coverage" type="integer" min="0" max="100" value="1" label="Minimum coverage required for considering a full gene in percentage. This parameter looks at the full gene and all hits that align to the gene. If the overlap of all hits is below the threshold the gene is discarded. Use with caution [default 1]" />
-        <param argument="--bowtie_16s_identity" type="float" min="0" max="1" value="0.8" label="minimum identity a read as a 16s rRNA gene [default 0.8]" />
+        <param argument="--deeparg_identity" type="integer" min="0" value="80" label="Minimum identity for ARG alignments" help="default value is 80"/>
+        <param argument="--deeparg_probability" type="float" min="0" max="1" value="0.8"
+            label="Minimum probability for considering a reads as ARG-like"
+            help="default value is 0.8"/>
+        <param argument="--deeparg_evalue" type="float" min="0" value="1e-10"
+            label="Minimum e-value for ARG alignments"
+            help="default value is 1e-10"/>
+        <param argument="--gene_coverage" type="integer" min="0" max="100" value="1" label="Minimum coverage required for considering a full gene in percentage" 
+            help="This parameter looks at the full gene and all hits that align to the gene. If the overlap of all hits is below the threshold the gene is discarded. Use with caution, default value is 1"/>
+        <param argument="--bowtie_16s_identity" type="float" min="0" max="1" value="0.8" label="minimum identity a read as a 16s rRNA gene"
+            help="default value is 0.8"/>
         <section name="output_files" title="Selection of the output files">
-          <param name="output_selection" type="select" display="checkboxes" multiple="true"  label="Output files selection">
-              <option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
-              <option value="file_merged_ARG_tsv" selected="false">ARG sorted and merged in TSV file</option>
-              <option value="file_ARG_subtype_tsv" selected="true">ARGs merged by subtype in TSV file</option>
-              <option value="file_ARG_type_tsv" selected="true">ARGs merged by type in TSV file</option>
-              <option value="file_potential_ARG_tsv" selected="false">ARG detected with prob below --prob in TSV</option>
-              <option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
-          </param>
-        </section> 
+            <param name="output_selection" type="select" label="Output files selection" display="checkboxes" multiple="true">
+                <option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
+                <option value="file_merged_ARG_tsv" selected="false">ARG sorted and merged in TSV file</option>
+                <option value="file_ARG_subtype_tsv" selected="true">ARGs merged by subtype in TSV file</option>
+                <option value="file_ARG_type_tsv" selected="true">ARGs merged by type in TSV file</option>
+                <option value="file_potential_ARG_tsv" selected="false">ARG detected with prob below --prob in TSV</option>
+                <option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
+            </param>
+        </section>
     </inputs>
     <outputs>
-        <data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)" >
+        <data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)">
             <filter>output_files['output_selection'] and "file_ARG_tsv" in output_files['output_selection']</filter>
         </data>
-        <data name="output_merged_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged" label="${tool.name} on ${on_string} : ARG merged and sorted" >
+        <data name="output_merged_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged" label="${tool.name} on ${on_string} : ARG merged and sorted">
             <filter>output_files['output_selection'] and "file_merged_ARG_tsv" in output_files['output_selection']</filter>
         </data>
-        <data name="output_subtype_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged.quant.subtype" label="${tool.name} on ${on_string} : ARG merged by subtype" >
+        <data name="output_subtype_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged.quant.subtype" label="${tool.name} on ${on_string} : ARG merged by subtype">
             <filter>output_files['output_selection'] and "file_ARG_subtype_tsv" in output_files['output_selection']</filter>
         </data>
-        <data name="output_type_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged.quant.type" label="${tool.name} on ${on_string} : ARG merged by type" >
+        <data name="output_type_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.ARG.merged.quant.type" label="${tool.name} on ${on_string} : ARG merged by type">
             <filter>output_files['output_selection'] and "file_ARG_type_tsv" in output_files['output_selection']</filter>
         </data>
-        <data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)" >
+        <data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)">
             <filter>output_files['output_selection'] and "file_potential_ARG_tsv" in output_files['output_selection']</filter>
         </data>
         <data name="output_all_hits" format="tabular" from_work_dir="deeparg_short_reads_output/SR.clean.deeparg.align.daa.tsv" label="${tool.name} on ${on_string} : all hits detected">
@@ -86,47 +92,47 @@
     <tests>
         <!-- Test 1 -->
         <test expect_num_outputs="6">
-            <param name="hide_db_build" value="true" />
+            <param name="hide_db_build" value="true"/>
             <param name="forward_pe_file" value="light_F.fq.gz" ftype="fastqsanger.gz"/>
             <param name="reverse_pe_file" value="light_R.fq.gz" ftype="fastqsanger.gz"/>
-            <param name="deeparg_db" value="deeparg_1.0.4-19122024" />
+            <param name="deeparg_db" value="deeparg_1.0.4-19122024"/>
             <section name="output_files">
                 <param name="output_selection" value="file_ARG_tsv,file_merged_ARG_tsv,file_ARG_subtype_tsv,file_ARG_type_tsv,file_potential_ARG_tsv,file_all_hits_tsv"/>
             </section>
             <output name="output_mapping_ARG" ftype="tabular">
                 <assert_contents>
-                    <has_text text="BAE06008.1|FEATURES|mexQ|multidrug|mexQ" />
-                    <has_text text="CATA" />
+                    <has_text text="BAE06008.1|FEATURES|mexQ|multidrug|mexQ"/>
+                    <has_text text="CATA"/>
                 </assert_contents>
             </output>
             <output name="output_merged_ARG" ftype="tabular">
                 <assert_contents>
-                    <has_line line="ACRB&#009;568&#009;600&#009;1&#009;multidrug" />
-                    <has_text text="VANR&#009;137&#009;169&#009;1&#009;glycopeptide" />
+                    <has_line line="ACRB&#9;568&#9;600&#9;1&#9;multidrug"/>
+                    <has_text text="VANR&#9;137&#9;169&#9;1&#9;glycopeptide"/>
                 </assert_contents>
             </output>
             <output name="output_subtype_ARG" ftype="tabular">
                 <assert_contents>
-                    <has_text text="MDTP&#009;1&#009;0.146721311475" />
-                    <has_text text="ACRD&#009;1&#009;0.0690453230473" />
+                    <has_text text="MDTP&#9;1&#9;0.146721311475"/>
+                    <has_text text="ACRD&#9;1&#9;0.0690453230473"/>
                 </assert_contents>
             </output>
             <output name="output_type_ARG" ftype="tabular">
                 <assert_contents>
-                    <has_text text="aminoglycoside&#009;2&#009;0.550769230769" />
-                    <has_text text="glycopeptide&#009;1&#009;0.312663755459" />
+                    <has_text text="aminoglycoside&#9;2&#9;0.550769230769"/>
+                    <has_text text="glycopeptide&#9;1&#9;0.312663755459"/>
                 </assert_contents>
             </output>
             <output name="output_mapping_potential_ARG" ftype="tabular">
                 <assert_contents>
-                    <has_text text="gi:447120629:ref:WP_001197885.1:|FEATURES|mdtB|multidrug|mdtB" />
-                    <has_text text="PORIN_OMPC" />
+                    <has_text text="gi:447120629:ref:WP_001197885.1:|FEATURES|mdtB|multidrug|mdtB"/>
+                    <has_text text="PORIN_OMPC"/>
                 </assert_contents>
             </output>
             <output name="output_all_hits" ftype="tabular">
                 <assert_contents>
-                    <has_text text="YP_001966224|FEATURES|catA|phenicol|catA" />
-                    <has_text text="CP001918.1.gene2931.p01|FEATURES|major_facilitator_superfamily_transporter|multidrug|major_facilitator_superfamily_transporter" />
+                    <has_text text="YP_001966224|FEATURES|catA|phenicol|catA"/>
+                    <has_text text="CP001918.1.gene2931.p01|FEATURES|major_facilitator_superfamily_transporter|multidrug|major_facilitator_superfamily_transporter"/>
                 </assert_contents>
             </output>
         </test>

diff --git a/tools/deeparg/macros.xml b/tools/deeparg/macros.xml
@@ -20,6 +20,6 @@
         </citations>
     </xml>
     <token name="HELP"><![CDATA[
-DeepARG is a tool to predict antibiotic resistance genes (ARGs) in metagenomic samples. 
+DeepARG is a tool to predict antibiotic resistance genes (ARGs) in metagenomic samples.
     ]]></token>
 </xml>