From 3044d8b6026e1ecf4e20d5eb523bf0e52f5bc5d2 Mon Sep 17 00:00:00 2001
From: pbelmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Wed, 18 Nov 2015 10:48:47 +0100
Subject: [PATCH 1/4] close #148

---
 main.nf   | 54 ++++++++++++++++++++++++++++++------------------------
 usage.txt |  6 +++---
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/main.nf b/main.nf
index cdac309..636e8f8 100644
--- a/main.nf
+++ b/main.nf
@@ -5,12 +5,20 @@ params.search = ""
 params.keywords = ""
 params.help = ""
 
+
 if( params.help ) { 
     usage = file("$baseDir/usage.txt")   
     print usage.text
     return 
 }
 
+hmmDir = file(params.input)
+searchFile = file(params.search)
+outputDir = file(params.output)
+ncbiDB = file(params.ncbi)
+genomeFaa = file(params.genome)
+keywordsFile = file(params.keywords)
+
 process bootstrap {
 
    executor 'local'
@@ -22,9 +30,8 @@ process bootstrap {
    file allHmm
 
    shell:
-   outputDir = file(params.output)
    if(outputDir.exists()) 
-      exit(0, "Directory ${params.output} already exists. Please remove it or assign another output directory.")
+      exit(0, "Directory ${outputDir} already exists. Please remove it or assign another output directory.")
    else
       outputDir.mkdir()
       """
@@ -33,13 +40,13 @@ process bootstrap {
       then
           make -C !{baseDir} install 
       fi
-      cat !{params.input}/*.hmm > allHmm
+      cat !{hmmDir}/*.hmm > allHmm
       ${params.hmm_press} allHmm
       """
 }
 
 fastaChunk = Channel.create()
-list = Channel.fromPath(params.genome).splitFasta(by:6000,file:true).collectFile();
+list = Channel.fromPath(genomeFaa).splitFasta(by:6000,file:true).collectFile();
 list.spread(allHmm).into(fastaChunk)
 
 process hmmFolderScan {
@@ -107,11 +114,11 @@ process getFasta {
     '''
     #!/bin/sh
     contig=`echo "!{contigLine} " | cut -d ' ' -f 4`
-    grep  "$contig " !{params.genome} > uniq_header
+    grep  "$contig " !{genomeFaa} > uniq_header
     buffer=`cat uniq_header | cut -c 2-`
     contig=`echo $buffer | cut -d" " -f1`
-    awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{params.genome}  > !{baseDir}/$contig.faa
-    awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{params.genome}  > uniq_out
+    awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{genomeFaa}  > !{baseDir}/$contig.faa
+    awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{genomeFaa}  > uniq_out
     '''  
 
 }
@@ -140,7 +147,7 @@ process blastSeqTxt {
     '''
     #!/bin/sh
     contig=`grep ">" !{uniq_seq} | cut -d" " -f1 | cut -c 2-`
-    !{params.blastp} -db !{params.ncbi} -outfmt '!{order}' -query "!{uniq_seq}" -out "!{baseDir}/$contig.txt" -num_threads !{params.blast_cpu}
+    !{params.blastp} -db !{ncbiDB} -outfmt '!{order}' -query "!{uniq_seq}" -out "!{baseDir}/$contig.txt" -num_threads !{params.blast_cpu}
     echo "$contig" > blast_out
     '''
 }
@@ -165,7 +172,7 @@ process blastSeqHtml {
     '''
     #!/bin/sh
     contig=`grep ">" !{uniq_seqHtml} | cut -d" " -f1 | cut -c 2-`
-    !{params.blastp} -db !{params.ncbi} -query "!{uniq_seqHtml}" -html -out "!{params.output}/$contig.html" -num_threads !{params.blast_cpu} 
+    !{params.blastp} -db !{ncbiDB} -query "!{uniq_seqHtml}" -html -out "!{outputDir}/$contig.html" -num_threads !{params.blast_cpu} 
     '''
 
 }
@@ -173,7 +180,7 @@ process blastSeqHtml {
 PYTHON="$baseDir/vendor/python/bin/python"
 
 coverages = Channel.create()
-coverages.bind(params.cov.replaceAll(',',' '))
+coverages.bind(params.cov.split(',').collect{file(it)}.join(' '))
 
 bam = Channel.from(params.bam)
 sortedIndexedBam = bam.flatMap{ files  -> files.split(',')} 
@@ -216,7 +223,7 @@ process createOverview {
    val coverageFiles
 
    output:
-   val params.output + '/overview.txt' into over
+   val outputDir + '/overview.txt' into over
 
    shell:
    '''
@@ -226,7 +233,7 @@ process createOverview {
    then
        searchParam="--search=!{params.search}"
    fi
-   !{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview}  -faa !{baseDir} -o !{params.output}  ${searchParam}  -c !{coverageFiles.join(' ')} 
+   !{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview}  -faa !{baseDir} -o !{outputDir}  ${searchParam}  -c !{coverageFiles.join(' ')} 
    '''
 }
 
@@ -238,14 +245,14 @@ process linkSearch {
 
    input: 
    val x from over
-   params.output
+   outputDir
 
    output:
-   val params.output into inputF 
+   val outputDir into inputF 
 
    """
    #!/bin/sh
-   $PYTHON $baseDir/scripts/link_search.py -o ${x} -out ${params.output} 
+   $PYTHON $baseDir/scripts/link_search.py -o ${x} -out ${outputDir} 
    """
 }
 
@@ -260,26 +267,26 @@ process folderToPubmed {
 
    input:
    val inp from inputF
-   params.output
+   outputDir
 
    output:
-   val params.output + '/all.pubHits'  into pub
-   val params.output + '/overview.txt' into over2
+   val outputDir + '/all.pubHits'  into pub
+   val outputDir + '/overview.txt' into over2
 
    shell:
    '''
    #!/bin/sh
    keywords=""
-   if [ -f !{params.keywords} ]
+   if [ -f !{keywordsFile} ]
    then
-         keywords=!{params.keywords}
+         keywords=!{keywordsFile}
    else
          emptyKeywords="keywords.txt"
          touch $emptyKeywords 
          keywords=$emptyKeywords
    fi
    echo $keywords
-   sh !{baseDir}/scripts/FolderToPubmed.sh !{inp} !{params.output}  !{baseDir}/scripts/UrltoPubmedID.sh  ${keywords} 
+   sh !{baseDir}/scripts/FolderToPubmed.sh !{inp} !{outputDir}  !{baseDir}/scripts/UrltoPubmedID.sh  ${keywords} 
    '''
 }
 
@@ -295,7 +302,7 @@ process linkAssignment {
    val p from pub
 
    output:
-   val params.output + '/overview_new.txt' into overNew
+   val outputDir + '/overview_new.txt' into overNew
 
    """
    #!/bin/sh
@@ -314,8 +321,7 @@ process buildHtml {
 
     """
     #!/bin/sh
-    $PYTHON $baseDir/scripts/web/controller.py -o ${overview} -out ${params.output} -conf $baseDir/scripts/web/config.yaml -templates $baseDir/scripts/web/app/templates
+    $PYTHON $baseDir/scripts/web/controller.py -o ${overview} -out ${outputDir} -conf $baseDir/scripts/web/config.yaml -templates $baseDir/scripts/web/app/templates
     """
 
 }
-
diff --git a/usage.txt b/usage.txt
index 324598e..ad602b7 100644
--- a/usage.txt
+++ b/usage.txt
@@ -11,7 +11,7 @@ nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input
 DESCRIPTION
 MeRaGENE 0.1.0
 
---genome="/vol/genomeDat/test.db"      Your genome-database to search in.
+--genome="/vol/genomeDat/test.faa"     Your faa to search in.
 
 --ncbi="/vol/blastDat/blast.db"        Your blast-database to search in.
 
@@ -19,7 +19,7 @@ MeRaGENE 0.1.0
 
 --output="/vol/project/output"         A folder path that the pipeline should produce.
 
---cov="/vol/project/coverage1.txt,/vol/project/coverage2.txt"  List your coverage files, link them here.
+--cov="/vol/cov1.txt,/vol/cov2.txt"    List your coverage files, link them here.
 
 
 OPTIONAL ARGUMENTS
@@ -29,7 +29,7 @@ OPTIONAL ARGUMENTS
 --blastp="blastp"
 --hmm_search="hmmsearch"
 --hmm_scan="hmmscan"
---hmm_press="hmmpress"                 If you want to use a special version, change the name with its path.
+--hmm_press="hmmpress"                 If you want to use a special version, change the name with its absolute path.
                                        E.g. blastp="blastp" -> blastp="/vol/tools/blast/blastp"
     
 --hmm_cpu=16                           Numbers of cores to be used executing hmmsearch.

From fcdee43c511354eb72f42f09267b9a07c73c1ba9 Mon Sep 17 00:00:00 2001
From: pbelmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Wed, 18 Nov 2015 11:23:01 +0100
Subject: [PATCH 2/4] fix search.yaml path

---
 features/nextflow.feature | 20 ++++++++++++++++++--
 main.nf                   |  2 +-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/features/nextflow.feature b/features/nextflow.feature
index 6fbd1e4..f4a9548 100644
--- a/features/nextflow.feature
+++ b/features/nextflow.feature
@@ -12,7 +12,7 @@ Feature: Verification steps for bioPipeline
       nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --cov )
       """
 
-  Scenario: Run the pipeline with valid parameters
+  Scenario Outline: Run the pipeline with valid parameters
     Given I copy the example data files:
       | source           | dest        |
       | db.faa           | db.faa      |  
@@ -28,7 +28,19 @@ Feature: Verification steps for bioPipeline
       | hmm              | hmm         |  
     When I run the command:
       """
-      ${NEXTFLOW}/nextflow run ${PWD}/main.nf -profile 'local' --genome="${PWD}/tmp/db.faa" --ncbi="${PWD}/tmp/blast.db" --blast_cpu=1 --blastp="blastp" --hmm_search="hmmsearch" --hmm_scan="hmmscan" --hmm_press="hmmpress" --input="${PWD}/tmp/hmm" --output="${PWD}/tmp/output"  --cov="${PWD}/tmp/test.bam.coverage.txt"  --search="${PWD}/tmp/search.yaml"  --keywords="${PWD}/tmp/keywords.txt"
+        ${NEXTFLOW}/nextflow run ${PWD}/main.nf -profile 'local' \
+         <faa> \
+         <blast> \
+         --blast_cpu=1 \
+         --blastp="blastp" \
+         --hmm_search="hmmsearch" \
+         --hmm_scan="hmmscan" \
+         --hmm_press="hmmpress" \
+         <input> \
+         <output> \
+         <cov> \
+         <search> \
+         <keywords> \
       """
     Then the exit code should be 0
     And the following files should exist and not be empty:
@@ -37,3 +49,7 @@ Feature: Verification steps for bioPipeline
       | output/overview_new.txt  |
     And the file "output/overview_new.txt" should contain 6 lines
 
+    Examples:
+      | faa                              | blast                        | input                     | output                       | cov                                      | search                             | keywords                             |
+      | --genome="${PWD}/tmp/db.faa"     | --ncbi="${PWD}/tmp/blast.db" |  --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --cov="${PWD}/tmp/test.bam.coverage.txt" |  --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" |
+      | --genome="tmp/db.faa"            | --ncbi="tmp/blast.db"        |  --input="tmp/hmm"        | --output="tmp/output"        |  --cov="tmp/test.bam.coverage.txt"       |  --search="tmp/search.yaml"        | --keywords="tmp/keywords.txt"  |
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 636e8f8..216ce0b 100644
--- a/main.nf
+++ b/main.nf
@@ -231,7 +231,7 @@ process createOverview {
    searchParam=""
    if [ -n !{params.search} ]
    then
-       searchParam="--search=!{params.search}"
+       searchParam="--search=!{searchFile}"
    fi
    !{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview}  -faa !{baseDir} -o !{outputDir}  ${searchParam}  -c !{coverageFiles.join(' ')} 
    '''

From f5a1636a74fa8514a1b43c22f8f223e2ad307804 Mon Sep 17 00:00:00 2001
From: pbelmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Wed, 18 Nov 2015 12:46:34 +0100
Subject: [PATCH 3/4] stdout empty test added

---
 features/steps/cli.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/features/steps/cli.py b/features/steps/cli.py
index 5488f4f..2fd8d7d 100644
--- a/features/steps/cli.py
+++ b/features/steps/cli.py
@@ -74,4 +74,10 @@ def step_impl(context, file_, lines_):
 @then(u'the {stream} should contain')
 def step_impl(context, stream):
     output = get_stream(context, stream)
-    nt.assert_in(context.text, output)
\ No newline at end of file
+    nt.assert_in(context.text, output)
+
+@then(u'the {stream} should be empty')
+def step_impl(context, stream):
+    output = get_stream(context, stream)
+    nt.assert_equal(output, "",
+                    "The {} should be empty but contains:\n\n{}".format(stream, output))

From 647d6dbd1a341e3ae22169d03d36b553078c36b3 Mon Sep 17 00:00:00 2001
From: pbelmann <pbelmann@cebitec.uni-bielefeld.de>
Date: Wed, 18 Nov 2015 12:47:59 +0100
Subject: [PATCH 4/4] add feature test for relative path

---
 features/nextflow.feature |  6 +++---
 main.nf                   | 12 ++++++++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/features/nextflow.feature b/features/nextflow.feature
index f4a9548..4901504 100644
--- a/features/nextflow.feature
+++ b/features/nextflow.feature
@@ -42,14 +42,14 @@ Feature: Verification steps for bioPipeline
          <search> \
          <keywords> \
       """
-    Then the exit code should be 0
+    Then the stderr should be empty
+    And the exit code should be 0
     And the following files should exist and not be empty:
       | file                     |
       | output/overview.html     |
       | output/overview_new.txt  |
     And the file "output/overview_new.txt" should contain 6 lines
-
     Examples:
       | faa                              | blast                        | input                     | output                       | cov                                      | search                             | keywords                             |
       | --genome="${PWD}/tmp/db.faa"     | --ncbi="${PWD}/tmp/blast.db" |  --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --cov="${PWD}/tmp/test.bam.coverage.txt" |  --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" |
-      | --genome="tmp/db.faa"            | --ncbi="tmp/blast.db"        |  --input="tmp/hmm"        | --output="tmp/output"        |  --cov="tmp/test.bam.coverage.txt"       |  --search="tmp/search.yaml"        | --keywords="tmp/keywords.txt"  |
\ No newline at end of file
+      | --genome="db.faa"                | --ncbi="blast.db"            |  --input="hmm"            | --output="output"            |  --cov="test.bam.coverage.txt"           |  --search="search.yaml"            | --keywords="keywords.txt"  |
\ No newline at end of file
diff --git a/main.nf b/main.nf
index 216ce0b..a49167b 100644
--- a/main.nf
+++ b/main.nf
@@ -13,11 +13,19 @@ if( params.help ) {
 }
 
 hmmDir = file(params.input)
-searchFile = file(params.search)
 outputDir = file(params.output)
 ncbiDB = file(params.ncbi)
 genomeFaa = file(params.genome)
-keywordsFile = file(params.keywords)
+
+keywordsFile = ""
+if(params.keywords){
+	keywordsFile = file(params.keywords)
+}
+
+searchFile = ""
+if(params.search){
+	searchFile = file(params.search)
+}
 
 process bootstrap {