From bb76db77da3c7c676741e28c7e9a30ee64461144 Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Mon, 8 Jul 2024 14:11:12 +0200 Subject: [PATCH] create subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer similar to nf-core/atacseq --- conf/modules.config | 10 +- ...antify_qc_bedtools_featurecounts_deseq2.nf | 156 ++++++++++++++++++ workflows/chipseq.nf | 127 +++----------- 3 files changed, 187 insertions(+), 106 deletions(-) create mode 100644 subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf diff --git a/conf/modules.config b/conf/modules.config index 1fc095e3..a18fa85c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -615,7 +615,7 @@ if (!params.skip_peak_annotation) { if (!params.skip_consensus_peaks) { process { - withName: 'MACS2_CONSENSUS' { + withName: '.*:BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2:MACS2_CONSENSUS' { ext.when = { meta.multiple_groups || meta.replicates_exist } ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ @@ -625,7 +625,7 @@ if (!params.skip_consensus_peaks) { ] } - withName: 'SUBREAD_FEATURECOUNTS' { + withName: '.*:BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2:SUBREAD_FEATURECOUNTS' { ext.args = '-F SAF -O --fracOverlap 0.2' ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ @@ -638,7 +638,7 @@ if (!params.skip_consensus_peaks) { if (!params.skip_peak_annotation) { process { - withName: 'HOMER_ANNOTATEPEAKS_CONSENSUS' { + withName: '.*:BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2:HOMER_ANNOTATEPEAKS' { ext.args = '-gid' ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ @@ -648,7 +648,7 @@ if (!params.skip_consensus_peaks) { ] } - withName: 'ANNOTATE_BOOLEAN_PEAKS' { + withName: '.*:BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2:ANNOTATE_BOOLEAN_PEAKS' { ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/consensus/${meta.id}" }, @@ -661,7 +661,7 @@ if (!params.skip_consensus_peaks) { if (!params.skip_deseq2_qc) { process { - withName: DESEQ2_QC { + withName: '.*:BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2:DESEQ2_QC' { ext.when = { meta.multiple_groups && meta.replicates_exist } ext.args = [ '--id_col 1', diff --git a/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf b/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf new file mode 100644 index 00000000..98e4bc80 --- /dev/null +++ b/subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf @@ -0,0 +1,156 @@ +// +// Call consensus peaks with BEDTools and custom scripts, annotate with HOMER, quantify with featureCounts and QC with DESeq2 +// + +include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' +include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts/main' + +include { MACS2_CONSENSUS } from '../../modules/local/macs2_consensus' +include { DESEQ2_QC } from '../../modules/local/deseq2_qc' + +workflow BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 { + take: + ch_peaks // channel: [ val(meta), [ peaks ] ] + ch_bams // channel: [ val(meta), [ ip_bams ] ] + ch_fasta // channel: [ fasta ] + ch_gtf // channel: [ gtf ] + ch_deseq2_pca_header_multiqc // channel: [ header_file ] + ch_deseq2_clustering_header_multiqc // channel: [ header_file ] + is_narrow_peak // boolean: true/false + skip_peak_annotation // boolean: true/false + skip_deseq2_qc // boolean: true/false + + main: + + ch_versions = Channel.empty() + + // Create channels: [ meta , [ peaks ] ] + // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] + ch_peaks + .map { + meta, peak -> + [ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ] + } + .groupTuple() + .map { + antibody, groups, peaks -> + [ + antibody, + groups.groupBy().collectEntries { [(it.key) : it.value.size()] }, + peaks + ] + } + .map { + antibody, groups, peaks -> + def meta_new = [:] + meta_new.id = antibody + meta_new.multiple_groups = groups.size() > 1 + meta_new.replicates_exist = groups.max { groups.value }.value > 1 + [ meta_new, peaks ] + } + .set { ch_antibody_peaks } + + // + // Generate consensus peaks across samples + // + MACS2_CONSENSUS ( + ch_antibody_peaks, + is_narrow_peak + ) + ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) + + // + // Annotate consensus peaks + // + if (!skip_peak_annotation) { + HOMER_ANNOTATEPEAKS ( + MACS2_CONSENSUS.out.bed, + ch_fasta, + ch_gtf + ) + + // + // MODULE: Add boolean fields to annotated consensus peaks to aid filtering + // + ANNOTATE_BOOLEAN_PEAKS ( + MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS.out.txt, by: [0]), + ) + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions) + } + + // Create channels: [ meta, [ ip_bams ], saf ] + MACS2_CONSENSUS + .out + .saf + .map { + meta, saf -> + [ meta.id, meta, saf ] + } + .join(ch_bams) + .map { + antibody, meta, saf, bams -> + [ meta, bams.flatten().sort(), saf ] + } + .set { ch_bam_saf } + + // + // Quantify peaks across samples with featureCounts + // + SUBREAD_FEATURECOUNTS ( + ch_bam_saf + ) + ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions) + + // + // Generate QC plots with DESeq2 + // + ch_deseq2_qc_pdf = Channel.empty() + ch_deseq2_qc_rdata = Channel.empty() + ch_deseq2_qc_rds = Channel.empty() + ch_deseq2_qc_pca_txt = Channel.empty() + ch_deseq2_qc_pca_multiqc = Channel.empty() + ch_deseq2_qc_dists_txt = Channel.empty() + ch_deseq2_qc_dists_multiqc = Channel.empty() + ch_deseq2_qc_log = Channel.empty() + ch_deseq2_qc_size_factors = Channel.empty() + if (!skip_deseq2_qc) { + DESEQ2_QC ( + SUBREAD_FEATURECOUNTS.out.counts, + ch_deseq2_pca_header_multiqc, + ch_deseq2_clustering_header_multiqc + ) + ch_deseq2_qc_pdf = DESEQ2_QC.out.pdf + ch_deseq2_qc_rdata = DESEQ2_QC.out.rdata + ch_deseq2_qc_rds = DESEQ2_QC.out.rds + ch_deseq2_qc_pca_txt = DESEQ2_QC.out.pca_txt + ch_deseq2_qc_pca_multiqc = DESEQ2_QC.out.pca_multiqc + ch_deseq2_qc_dists_txt = DESEQ2_QC.out.dists_txt + ch_deseq2_qc_dists_multiqc = DESEQ2_QC.out.dists_multiqc + ch_deseq2_qc_log = DESEQ2_QC.out.log + ch_deseq2_qc_size_factors = DESEQ2_QC.out.size_factors + ch_versions = ch_versions.mix(DESEQ2_QC.out.versions) + } + + emit: + consensus_bed = MACS2_CONSENSUS.out.bed // channel: [ bed ] + consensus_saf = MACS2_CONSENSUS.out.saf // channel: [ saf ] + consensus_pdf = MACS2_CONSENSUS.out.pdf // channel: [ pdf ] + consensus_txt = MACS2_CONSENSUS.out.txt // channel: [ pdf ] + consensus_boolean_txt = MACS2_CONSENSUS.out.boolean_txt // channel: [ txt ] + consensus_intersect_txt = MACS2_CONSENSUS.out.intersect_txt // channel: [ txt ] + + featurecounts_txt = SUBREAD_FEATURECOUNTS.out.counts // channel: [ txt ] + featurecounts_summary = SUBREAD_FEATURECOUNTS.out.summary // channel: [ txt ] + + deseq2_qc_pdf = ch_deseq2_qc_pdf // channel: [ pdf ] + deseq2_qc_rdata = ch_deseq2_qc_rdata // channel: [ rdata ] + deseq2_qc_rds = ch_deseq2_qc_rds // channel: [ rds ] + deseq2_qc_pca_txt = ch_deseq2_qc_pca_txt // channel: [ txt ] + deseq2_qc_pca_multiqc = ch_deseq2_qc_pca_multiqc // channel: [ txt ] + deseq2_qc_dists_txt = ch_deseq2_qc_dists_txt // channel: [ txt ] + deseq2_qc_dists_multiqc = ch_deseq2_qc_dists_multiqc // channel: [ txt ] + deseq2_qc_log = ch_deseq2_qc_log // channel: [ txt ] + deseq2_qc_size_factors = ch_deseq2_qc_size_factors // channel: [ txt ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index c7ba425d..3b8ab88a 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -17,15 +17,16 @@ include { MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS } from '../modules/local/multiqc_c // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_chipseq_pipeline' -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { ALIGN_STAR } from '../subworkflows/local/align_star' -include { BAM_FILTER_BAMTOOLS } from '../subworkflows/local/bam_filter_bamtools' -include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' -include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_chipseq_pipeline' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { ALIGN_STAR } from '../subworkflows/local/align_star' +include { BAM_FILTER_BAMTOOLS } from '../subworkflows/local/bam_filter_bamtools' +include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' +include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' +include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -476,63 +477,6 @@ workflow CHIPSEQ { ch_deseq2_pca_multiqc = Channel.empty() ch_deseq2_clustering_multiqc = Channel.empty() if (!params.skip_consensus_peaks) { - // Create channels: [ meta , [ peaks ] ] - // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] - BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.peaks - .map { - meta, peak -> - [ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ] - } - .groupTuple() - .map { - antibody, groups, peaks -> - [ - antibody, - groups.groupBy().collectEntries { [(it.key) : it.value.size()] }, - peaks - ] - } - .map { - antibody, groups, peaks -> - def meta_new = [:] - meta_new.id = antibody - meta_new.multiple_groups = groups.size() > 1 - meta_new.replicates_exist = groups.max { groups.value }.value > 1 - [ meta_new, peaks ] - } - .set { ch_antibody_peaks } - - // - // MODULE: Generate consensus peaks across samples - // - MACS2_CONSENSUS ( - ch_antibody_peaks, - params.narrow_peak - ) - ch_macs2_consensus_bed_lib = MACS2_CONSENSUS.out.bed - ch_macs2_consensus_txt_lib = MACS2_CONSENSUS.out.txt - ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) - - if (!params.skip_peak_annotation) { - // - // MODULE: Annotate consensus peaks - // - HOMER_ANNOTATEPEAKS_CONSENSUS ( - MACS2_CONSENSUS.out.bed, - ch_fasta, - ch_gtf - ) - ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_CONSENSUS.out.versions) - - // - // MODULE: Add boolean fields to annotated consensus peaks to aid filtering - // - ANNOTATE_BOOLEAN_PEAKS ( - MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS_CONSENSUS.out.txt, by: [0]), - ) - ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions) - } - // Create channels: [ antibody, [ ip_bams ] ] ch_ip_control_bam .map { @@ -542,42 +486,23 @@ workflow CHIPSEQ { .groupTuple() .set { ch_antibody_bams } - // Create channels: [ meta, [ ip_bams ], saf ] - MACS2_CONSENSUS - .out - .saf - .map { - meta, saf -> - [ meta.id, meta, saf ] - } - .join(ch_antibody_bams) - .map { - antibody, meta, saf, bams -> - [ meta, bams.flatten().sort(), saf ] - } - .set { ch_saf_bams } - - // - // MODULE: Quantify peaks across samples with featureCounts - // - SUBREAD_FEATURECOUNTS ( - ch_saf_bams + BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 ( + BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.peaks, + ch_antibody_bams, + ch_fasta, + ch_gtf, + ch_deseq2_pca_header, + ch_deseq2_clustering_header, + params.narrow_peak, + params.skip_peak_annotation, + params.skip_deseq2_qc ) - ch_subreadfeaturecounts_multiqc = SUBREAD_FEATURECOUNTS.out.summary - ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions.first()) - - if (!params.skip_deseq2_qc) { - // - // MODULE: Generate QC plots with DESeq2 - // - DESEQ2_QC ( - SUBREAD_FEATURECOUNTS.out.counts, - ch_deseq2_pca_header, - ch_deseq2_clustering_header - ) - ch_deseq2_pca_multiqc = DESEQ2_QC.out.pca_multiqc - ch_deseq2_clustering_multiqc = DESEQ2_QC.out.dists_multiqc - } + ch_macs2_consensus_bed_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_bed + ch_macs2_consensus_txt_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_txt + ch_subreadfeaturecounts_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.featurecounts_summary + ch_deseq2_pca_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.deseq2_qc_pca_multiqc + ch_deseq2_clustering_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.deseq2_qc_dists_multiqc + ch_versions = ch_versions.mix(BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.versions) } //