From 146af3f012b607280b301592452d50e9fecd995e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Cavalcante?= Date: Sat, 8 Jun 2024 08:20:33 -0300 Subject: [PATCH] refactor: Change how quast and recombination inputs work (#197) * refactor: Change way of handling IDs in organize_recomb_data * fix: Remove relative paths from quast input --- bin/organize_recomb_data.py | 6 +++--- modules/nf-core/quast/main.nf | 3 ++- modules/nf-core/quast/quast.diff | 16 +++++----------- subworkflows/local/assemblyqc.nf | 7 +------ 4 files changed, 11 insertions(+), 21 deletions(-) diff --git a/bin/organize_recomb_data.py b/bin/organize_recomb_data.py index aeb5846d..1841afe5 100755 --- a/bin/organize_recomb_data.py +++ b/bin/organize_recomb_data.py @@ -35,7 +35,7 @@ def remove_after_second_underscore(val): parts = val.split('_') if len(parts) > 2: return '_'.join(parts[:2]) - return val + return re.sub("\.(.*)|_T1|$", "", val) def create_recomb_input(quast_report, poppunk_clusters, assembly_samplesheet, file_out): # Parsing datasets @@ -46,13 +46,13 @@ def create_recomb_input(quast_report, poppunk_clusters, assembly_samplesheet, fi ) poppunk = read_csv(poppunk_clusters) - poppunk["Taxon"] = poppunk["Taxon"].str.replace("\.(.*)|_T1|$", "", regex=True) + poppunk["Taxon"] = poppunk["Taxon"].apply(remove_after_second_underscore) assemblies = read_csv(assembly_samplesheet, names=["id", "assembly_path"]) assemblies["assembly_path"] = [ Path(path).name for path in assemblies["assembly_path"].to_list() ] - assemblies["id"] = assemblies["id"].str.replace("\.(.*)|_T1|$", "", regex=True) + assemblies["id"] = assemblies["id"].apply(remove_after_second_underscore) # Merging datasets merged = poppunk.merge(quast, right_on="Assembly", left_on="Taxon").loc[ diff --git a/modules/nf-core/quast/main.nf b/modules/nf-core/quast/main.nf index fd5958fe..4591a638 100644 --- a/modules/nf-core/quast/main.nf +++ b/modules/nf-core/quast/main.nf @@ -13,6 +13,7 @@ process QUAST { val use_fasta val use_gff + output: path "${prefix}" , emit: results path "*.tsv" , emit: tsv @@ -34,7 +35,7 @@ process QUAST { $features \\ --threads $task.cpus \\ $args \\ - \$(cat ${consensus.join(' ')}) + ${consensus.join(' ')} ln -s ${prefix}/report.tsv diff --git a/modules/nf-core/quast/quast.diff b/modules/nf-core/quast/quast.diff index 7818dea1..b26c9950 100644 --- a/modules/nf-core/quast/quast.diff +++ b/modules/nf-core/quast/quast.diff @@ -10,8 +10,11 @@ Changes in module 'nf-core/quast' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/quast:5.2.0--py39pl5321h2add14b_1' : 'quay.io/biocontainers/quast:5.2.0--py39pl5321h2add14b_1' }" -@@ -15,7 +15,8 @@ +@@ -13,9 +13,11 @@ + val use_fasta + val use_gff ++ output: path "${prefix}" , emit: results - path '*.tsv' , emit: tsv @@ -20,16 +23,7 @@ Changes in module 'nf-core/quast' path "versions.yml" , emit: versions when: -@@ -33,7 +34,7 @@ - $features \\ - --threads $task.cpus \\ - $args \\ -- ${consensus.join(' ')} -+ \$(cat ${consensus.join(' ')}) - - ln -s ${prefix}/report.tsv - -@@ -42,4 +43,16 @@ +@@ -42,4 +44,16 @@ quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//') END_VERSIONS """ diff --git a/subworkflows/local/assemblyqc.nf b/subworkflows/local/assemblyqc.nf index 65c6e8eb..6d48d02e 100644 --- a/subworkflows/local/assemblyqc.nf +++ b/subworkflows/local/assemblyqc.nf @@ -27,13 +27,8 @@ workflow CHECK_ASSEMBLIES { /* * Module: QUAST quality check */ - assemblies - .map { meta, fasta -> fasta.toString() } - .collectFile(name:'assemblies.txt', newLine: true) - .set { quast_input } - QUAST ( - quast_input, + assemblies.collect { meta, fasta -> fasta }, reference_genome, [], use_reference_genome,