From b2cb1ad3625cba6a40ff25c065c7bd92c86b9518 Mon Sep 17 00:00:00 2001 From: Shaun Jackman Date: Mon, 22 Oct 2018 16:34:44 -0700 Subject: [PATCH 1/2] unicycler: Add parameter --long_reads --- changelog.md | 1 + flowcraft/generator/components/assembly.py | 9 +++++++++ flowcraft/generator/templates/unicycler.nf | 11 ++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 9926f6a3..41ef21ae 100644 --- a/changelog.md +++ b/changelog.md @@ -16,6 +16,7 @@ resolution - The `abyss` and `spades` components emit GFA in a secondary channel. - The new `bandage` component can accept either FASTA from a primary channel or GFA from a secondary channel. +- The new `unicycler` component can accept long reads from a secondary channel. ### New components diff --git a/flowcraft/generator/components/assembly.py b/flowcraft/generator/components/assembly.py index 9f1d9fa7..abe683e1 100644 --- a/flowcraft/generator/components/assembly.py +++ b/flowcraft/generator/components/assembly.py @@ -236,8 +236,17 @@ def __init__(self, **kwargs): self.input_type = "fastq" self.output_type = "fasta" + + self.link_end.append({"link": "long_reads", "alias": "long_reads"}) self.link_start.append("gfa1") + self.params = { + "long_reads": { + "default": "null", + "description": "FASTQ or FASTA file of long reads" + }, + } + self.directives = {"unicycler": { "cpus": 4, "container": "quay.io/biocontainers/unicycler", diff --git a/flowcraft/generator/templates/unicycler.nf b/flowcraft/generator/templates/unicycler.nf index 89b31ef1..dd472101 100644 --- a/flowcraft/generator/templates/unicycler.nf +++ b/flowcraft/generator/templates/unicycler.nf @@ -1,3 +1,6 @@ +// True when a long_reads secondary channel is connected to this component. +has_long_reads_{{pid}} = binding.hasVariable('long_reads_{{pid}}') + process unicycler_{{pid}} { {% include "post.txt" ignore missing %} @@ -7,6 +10,9 @@ process unicycler_{{pid}} { input: set sample_id, file(fastq_pair) from {{input_channel}} + file long_reads from has_long_reads_{{pid}} ? long_reads_{{pid}} : + params.long_reads{{param_id}} ? Channel.fromPath(params.long_reads{{param_id}}) : + Channel.value("NA") output: set sample_id, file('assembly.fasta') into {{output_channel}} @@ -16,7 +22,10 @@ process unicycler_{{pid}} { {% endwith %} script: - "unicycler -t $task.cpus -o . --no_correct --no_pilon -1 ${fastq_pair[0]} -2 ${fastq_pair[1]}" + command = "unicycler -t $task.cpus -o . --no_correct --no_pilon -1 ${fastq_pair[0]} -2 ${fastq_pair[1]}" + if (params.long_reads{{param_id}}) + command += " -l ${long_reads}" + command } {{forks}} From f41791f2631af49cf464f764b1adfefe9b788924 Mon Sep 17 00:00:00 2001 From: Shaun Jackman Date: Mon, 22 Oct 2018 16:36:22 -0700 Subject: [PATCH 2/2] Add porechop --- changelog.md | 1 + .../components/reads_quality_control.py | 34 +++++++++++++++++++ flowcraft/generator/engine.py | 1 + flowcraft/generator/templates/porechop.nf | 28 +++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 flowcraft/generator/templates/porechop.nf diff --git a/changelog.md b/changelog.md index 41ef21ae..00979308 100644 --- a/changelog.md +++ b/changelog.md @@ -22,6 +22,7 @@ resolution - Added component `abyss`. - Added component `bandage`. +- Added component `porechop`. - Added component `unicycler`. ### Minor/Other changes diff --git a/flowcraft/generator/components/reads_quality_control.py b/flowcraft/generator/components/reads_quality_control.py index de0426c4..04b1aecd 100644 --- a/flowcraft/generator/components/reads_quality_control.py +++ b/flowcraft/generator/components/reads_quality_control.py @@ -433,3 +433,37 @@ def __init__(self, **kwargs): self.status_channels = [ "downsample_fastq" ] + +class Porechop(Process): + """Porechop trims adapters from Oxford Nanopore reads. + + This process is set with: + + - ``input_type``: fastq + - ``output_type``: fastq + - ``ptype``: pre_assembly + """ + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.input_type = "fastq" + self.output_type = "fastq" + + self.link_end.append({"link": "raw_long_reads", "alias": "raw_long_reads"}) + self.link_start.append("long_reads") + + self.params = { + "long_reads": { + "default": "null", + "description": "FASTQ or FASTA file of long reads" + }, + } + + self.directives = { + "porechop": { + "cpus": 4, + "container": "quay.io/biocontainers/porechop", + "version": "0.2.3_seqan2.1.1--py36h2d50403_3" + } + } diff --git a/flowcraft/generator/engine.py b/flowcraft/generator/engine.py index 4f509d38..4edc3ede 100644 --- a/flowcraft/generator/engine.py +++ b/flowcraft/generator/engine.py @@ -88,6 +88,7 @@ "momps": typing.Momps, "patho_typing": typing.PathoTyping, "pilon": ap.Pilon, + "porechop": readsqc.Porechop, "process_skesa": ap.ProcessSkesa, "process_spades": ap.ProcessSpades, "progressive_mauve":alignment.ProgressiveMauve, diff --git a/flowcraft/generator/templates/porechop.nf b/flowcraft/generator/templates/porechop.nf new file mode 100644 index 00000000..ecb40108 --- /dev/null +++ b/flowcraft/generator/templates/porechop.nf @@ -0,0 +1,28 @@ +// True when a raw_long_reads secondary channel is connected to this component. +has_raw_long_reads_{{pid}} = binding.hasVariable('raw_long_reads_{{pid}}') + +process porechop_{{pid}} { + {% include "post.txt" ignore missing %} + + publishDir "results/porechop_{{pid}}", pattern: "*.fastq.gz" + publishDir "reports/porechop_{{pid}}", pattern: "*.log" + + tag { sample_id } + + input: + set sample_id, file(fastq_pair) from {{input_channel}} + file raw_long_reads from has_raw_long_reads_{{pid}} ? raw_long_reads_{{pid}} : + Channel.fromPath(params.long_reads{{param_id}}) + + output: + set sample_id, file(fastq_pair) into {{output_channel}} + file "${sample_id}.porechop.fastq.gz" into long_reads_{{pid}} + {% with task_name="porechop" %} + {%- include "compiler_channels.txt" ignore missing -%} + {% endwith %} + + script: + "time porechop -t $task.cpus --format fastq.gz -i ${raw_long_reads} -o ${sample_id}.porechop.fastq.gz >${sample_id}.log" +} + +{{ forks }}