From 3044d8b6026e1ecf4e20d5eb523bf0e52f5bc5d2 Mon Sep 17 00:00:00 2001 From: pbelmann Date: Wed, 18 Nov 2015 10:48:47 +0100 Subject: [PATCH 1/4] close #148 --- main.nf | 54 ++++++++++++++++++++++++++++++------------------------ usage.txt | 6 +++--- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/main.nf b/main.nf index cdac309..636e8f8 100644 --- a/main.nf +++ b/main.nf @@ -5,12 +5,20 @@ params.search = "" params.keywords = "" params.help = "" + if( params.help ) { usage = file("$baseDir/usage.txt") print usage.text return } +hmmDir = file(params.input) +searchFile = file(params.search) +outputDir = file(params.output) +ncbiDB = file(params.ncbi) +genomeFaa = file(params.genome) +keywordsFile = file(params.keywords) + process bootstrap { executor 'local' @@ -22,9 +30,8 @@ process bootstrap { file allHmm shell: - outputDir = file(params.output) if(outputDir.exists()) - exit(0, "Directory ${params.output} already exists. Please remove it or assign another output directory.") + exit(0, "Directory ${outputDir} already exists. Please remove it or assign another output directory.") else outputDir.mkdir() """ @@ -33,13 +40,13 @@ process bootstrap { then make -C !{baseDir} install fi - cat !{params.input}/*.hmm > allHmm + cat !{hmmDir}/*.hmm > allHmm ${params.hmm_press} allHmm """ } fastaChunk = Channel.create() -list = Channel.fromPath(params.genome).splitFasta(by:6000,file:true).collectFile(); +list = Channel.fromPath(genomeFaa).splitFasta(by:6000,file:true).collectFile(); list.spread(allHmm).into(fastaChunk) process hmmFolderScan { @@ -107,11 +114,11 @@ process getFasta { ''' #!/bin/sh contig=`echo "!{contigLine} " | cut -d ' ' -f 4` - grep "$contig " !{params.genome} > uniq_header + grep "$contig " !{genomeFaa} > uniq_header buffer=`cat uniq_header | cut -c 2-` contig=`echo $buffer | cut -d" " -f1` - awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{params.genome} > !{baseDir}/$contig.faa - awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{params.genome} > uniq_out + awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{genomeFaa} > !{baseDir}/$contig.faa + awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{genomeFaa} > uniq_out ''' } @@ -140,7 +147,7 @@ process blastSeqTxt { ''' #!/bin/sh contig=`grep ">" !{uniq_seq} | cut -d" " -f1 | cut -c 2-` - !{params.blastp} -db !{params.ncbi} -outfmt '!{order}' -query "!{uniq_seq}" -out "!{baseDir}/$contig.txt" -num_threads !{params.blast_cpu} + !{params.blastp} -db !{ncbiDB} -outfmt '!{order}' -query "!{uniq_seq}" -out "!{baseDir}/$contig.txt" -num_threads !{params.blast_cpu} echo "$contig" > blast_out ''' } @@ -165,7 +172,7 @@ process blastSeqHtml { ''' #!/bin/sh contig=`grep ">" !{uniq_seqHtml} | cut -d" " -f1 | cut -c 2-` - !{params.blastp} -db !{params.ncbi} -query "!{uniq_seqHtml}" -html -out "!{params.output}/$contig.html" -num_threads !{params.blast_cpu} + !{params.blastp} -db !{ncbiDB} -query "!{uniq_seqHtml}" -html -out "!{outputDir}/$contig.html" -num_threads !{params.blast_cpu} ''' } @@ -173,7 +180,7 @@ process blastSeqHtml { PYTHON="$baseDir/vendor/python/bin/python" coverages = Channel.create() -coverages.bind(params.cov.replaceAll(',',' ')) +coverages.bind(params.cov.split(',').collect{file(it)}.join(' ')) bam = Channel.from(params.bam) sortedIndexedBam = bam.flatMap{ files -> files.split(',')} @@ -216,7 +223,7 @@ process createOverview { val coverageFiles output: - val params.output + '/overview.txt' into over + val outputDir + '/overview.txt' into over shell: ''' @@ -226,7 +233,7 @@ process createOverview { then searchParam="--search=!{params.search}" fi - !{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview} -faa !{baseDir} -o !{params.output} ${searchParam} -c !{coverageFiles.join(' ')} + !{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview} -faa !{baseDir} -o !{outputDir} ${searchParam} -c !{coverageFiles.join(' ')} ''' } @@ -238,14 +245,14 @@ process linkSearch { input: val x from over - params.output + outputDir output: - val params.output into inputF + val outputDir into inputF """ #!/bin/sh - $PYTHON $baseDir/scripts/link_search.py -o ${x} -out ${params.output} + $PYTHON $baseDir/scripts/link_search.py -o ${x} -out ${outputDir} """ } @@ -260,26 +267,26 @@ process folderToPubmed { input: val inp from inputF - params.output + outputDir output: - val params.output + '/all.pubHits' into pub - val params.output + '/overview.txt' into over2 + val outputDir + '/all.pubHits' into pub + val outputDir + '/overview.txt' into over2 shell: ''' #!/bin/sh keywords="" - if [ -f !{params.keywords} ] + if [ -f !{keywordsFile} ] then - keywords=!{params.keywords} + keywords=!{keywordsFile} else emptyKeywords="keywords.txt" touch $emptyKeywords keywords=$emptyKeywords fi echo $keywords - sh !{baseDir}/scripts/FolderToPubmed.sh !{inp} !{params.output} !{baseDir}/scripts/UrltoPubmedID.sh ${keywords} + sh !{baseDir}/scripts/FolderToPubmed.sh !{inp} !{outputDir} !{baseDir}/scripts/UrltoPubmedID.sh ${keywords} ''' } @@ -295,7 +302,7 @@ process linkAssignment { val p from pub output: - val params.output + '/overview_new.txt' into overNew + val outputDir + '/overview_new.txt' into overNew """ #!/bin/sh @@ -314,8 +321,7 @@ process buildHtml { """ #!/bin/sh - $PYTHON $baseDir/scripts/web/controller.py -o ${overview} -out ${params.output} -conf $baseDir/scripts/web/config.yaml -templates $baseDir/scripts/web/app/templates + $PYTHON $baseDir/scripts/web/controller.py -o ${overview} -out ${outputDir} -conf $baseDir/scripts/web/config.yaml -templates $baseDir/scripts/web/app/templates """ } - diff --git a/usage.txt b/usage.txt index 324598e..ad602b7 100644 --- a/usage.txt +++ b/usage.txt @@ -11,7 +11,7 @@ nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input DESCRIPTION MeRaGENE 0.1.0 ---genome="/vol/genomeDat/test.db" Your genome-database to search in. +--genome="/vol/genomeDat/test.faa" Your faa to search in. --ncbi="/vol/blastDat/blast.db" Your blast-database to search in. @@ -19,7 +19,7 @@ MeRaGENE 0.1.0 --output="/vol/project/output" A folder path that the pipeline should produce. ---cov="/vol/project/coverage1.txt,/vol/project/coverage2.txt" List your coverage files, link them here. +--cov="/vol/cov1.txt,/vol/cov2.txt" List your coverage files, link them here. OPTIONAL ARGUMENTS @@ -29,7 +29,7 @@ OPTIONAL ARGUMENTS --blastp="blastp" --hmm_search="hmmsearch" --hmm_scan="hmmscan" ---hmm_press="hmmpress" If you want to use a special version, change the name with its path. +--hmm_press="hmmpress" If you want to use a special version, change the name with its absolute path. E.g. blastp="blastp" -> blastp="/vol/tools/blast/blastp" --hmm_cpu=16 Numbers of cores to be used executing hmmsearch. From fcdee43c511354eb72f42f09267b9a07c73c1ba9 Mon Sep 17 00:00:00 2001 From: pbelmann Date: Wed, 18 Nov 2015 11:23:01 +0100 Subject: [PATCH 2/4] fix search.yaml path --- features/nextflow.feature | 20 ++++++++++++++++++-- main.nf | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/features/nextflow.feature b/features/nextflow.feature index 6fbd1e4..f4a9548 100644 --- a/features/nextflow.feature +++ b/features/nextflow.feature @@ -12,7 +12,7 @@ Feature: Verification steps for bioPipeline nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --cov ) """ - Scenario: Run the pipeline with valid parameters + Scenario Outline: Run the pipeline with valid parameters Given I copy the example data files: | source | dest | | db.faa | db.faa | @@ -28,7 +28,19 @@ Feature: Verification steps for bioPipeline | hmm | hmm | When I run the command: """ - ${NEXTFLOW}/nextflow run ${PWD}/main.nf -profile 'local' --genome="${PWD}/tmp/db.faa" --ncbi="${PWD}/tmp/blast.db" --blast_cpu=1 --blastp="blastp" --hmm_search="hmmsearch" --hmm_scan="hmmscan" --hmm_press="hmmpress" --input="${PWD}/tmp/hmm" --output="${PWD}/tmp/output" --cov="${PWD}/tmp/test.bam.coverage.txt" --search="${PWD}/tmp/search.yaml" --keywords="${PWD}/tmp/keywords.txt" + ${NEXTFLOW}/nextflow run ${PWD}/main.nf -profile 'local' \ + \ + \ + --blast_cpu=1 \ + --blastp="blastp" \ + --hmm_search="hmmsearch" \ + --hmm_scan="hmmscan" \ + --hmm_press="hmmpress" \ + \ + \ + \ + \ + \ """ Then the exit code should be 0 And the following files should exist and not be empty: @@ -37,3 +49,7 @@ Feature: Verification steps for bioPipeline | output/overview_new.txt | And the file "output/overview_new.txt" should contain 6 lines + Examples: + | faa | blast | input | output | cov | search | keywords | + | --genome="${PWD}/tmp/db.faa" | --ncbi="${PWD}/tmp/blast.db" | --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --cov="${PWD}/tmp/test.bam.coverage.txt" | --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" | + | --genome="tmp/db.faa" | --ncbi="tmp/blast.db" | --input="tmp/hmm" | --output="tmp/output" | --cov="tmp/test.bam.coverage.txt" | --search="tmp/search.yaml" | --keywords="tmp/keywords.txt" | \ No newline at end of file diff --git a/main.nf b/main.nf index 636e8f8..216ce0b 100644 --- a/main.nf +++ b/main.nf @@ -231,7 +231,7 @@ process createOverview { searchParam="" if [ -n !{params.search} ] then - searchParam="--search=!{params.search}" + searchParam="--search=!{searchFile}" fi !{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview} -faa !{baseDir} -o !{outputDir} ${searchParam} -c !{coverageFiles.join(' ')} ''' From f5a1636a74fa8514a1b43c22f8f223e2ad307804 Mon Sep 17 00:00:00 2001 From: pbelmann Date: Wed, 18 Nov 2015 12:46:34 +0100 Subject: [PATCH 3/4] stdout empty test added --- features/steps/cli.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/features/steps/cli.py b/features/steps/cli.py index 5488f4f..2fd8d7d 100644 --- a/features/steps/cli.py +++ b/features/steps/cli.py @@ -74,4 +74,10 @@ def step_impl(context, file_, lines_): @then(u'the {stream} should contain') def step_impl(context, stream): output = get_stream(context, stream) - nt.assert_in(context.text, output) \ No newline at end of file + nt.assert_in(context.text, output) + +@then(u'the {stream} should be empty') +def step_impl(context, stream): + output = get_stream(context, stream) + nt.assert_equal(output, "", + "The {} should be empty but contains:\n\n{}".format(stream, output)) From 647d6dbd1a341e3ae22169d03d36b553078c36b3 Mon Sep 17 00:00:00 2001 From: pbelmann Date: Wed, 18 Nov 2015 12:47:59 +0100 Subject: [PATCH 4/4] add feature test for relative path --- features/nextflow.feature | 6 +++--- main.nf | 12 ++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/features/nextflow.feature b/features/nextflow.feature index f4a9548..4901504 100644 --- a/features/nextflow.feature +++ b/features/nextflow.feature @@ -42,14 +42,14 @@ Feature: Verification steps for bioPipeline \ \ """ - Then the exit code should be 0 + Then the stderr should be empty + And the exit code should be 0 And the following files should exist and not be empty: | file | | output/overview.html | | output/overview_new.txt | And the file "output/overview_new.txt" should contain 6 lines - Examples: | faa | blast | input | output | cov | search | keywords | | --genome="${PWD}/tmp/db.faa" | --ncbi="${PWD}/tmp/blast.db" | --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --cov="${PWD}/tmp/test.bam.coverage.txt" | --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" | - | --genome="tmp/db.faa" | --ncbi="tmp/blast.db" | --input="tmp/hmm" | --output="tmp/output" | --cov="tmp/test.bam.coverage.txt" | --search="tmp/search.yaml" | --keywords="tmp/keywords.txt" | \ No newline at end of file + | --genome="db.faa" | --ncbi="blast.db" | --input="hmm" | --output="output" | --cov="test.bam.coverage.txt" | --search="search.yaml" | --keywords="keywords.txt" | \ No newline at end of file diff --git a/main.nf b/main.nf index 216ce0b..a49167b 100644 --- a/main.nf +++ b/main.nf @@ -13,11 +13,19 @@ if( params.help ) { } hmmDir = file(params.input) -searchFile = file(params.search) outputDir = file(params.output) ncbiDB = file(params.ncbi) genomeFaa = file(params.genome) -keywordsFile = file(params.keywords) + +keywordsFile = "" +if(params.keywords){ + keywordsFile = file(params.keywords) +} + +searchFile = "" +if(params.search){ + searchFile = file(params.search) +} process bootstrap {