Merge pull request #152 from metagenomics/feature/relative-path

relative path
metagenomics · Nov 18, 2015 · 45c1efe · 45c1efe
2 parents 073c971 + 647d6db
commit 45c1efe
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 33 deletions.
diff --git a/features/nextflow.feature b/features/nextflow.feature
@@ -12,7 +12,7 @@ Feature: Verification steps for bioPipeline
       nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --cov )
       """
 
-  Scenario: Run the pipeline with valid parameters
+  Scenario Outline: Run the pipeline with valid parameters
     Given I copy the example data files:
       | source           | dest        |
       | db.faa           | db.faa      |  
@@ -28,12 +28,28 @@ Feature: Verification steps for bioPipeline
       | hmm              | hmm         |  
     When I run the command:
       """
-      ${NEXTFLOW}/nextflow run ${PWD}/main.nf -profile 'local' --genome="${PWD}/tmp/db.faa" --ncbi="${PWD}/tmp/blast.db" --blast_cpu=1 --blastp="blastp" --hmm_search="hmmsearch" --hmm_scan="hmmscan" --hmm_press="hmmpress" --input="${PWD}/tmp/hmm" --output="${PWD}/tmp/output"  --cov="${PWD}/tmp/test.bam.coverage.txt"  --search="${PWD}/tmp/search.yaml"  --keywords="${PWD}/tmp/keywords.txt"
+        ${NEXTFLOW}/nextflow run ${PWD}/main.nf -profile 'local' \
+         <faa> \
+         <blast> \
+         --blast_cpu=1 \
+         --blastp="blastp" \
+         --hmm_search="hmmsearch" \
+         --hmm_scan="hmmscan" \
+         --hmm_press="hmmpress" \
+         <input> \
+         <output> \
+         <cov> \
+         <search> \
+         <keywords> \
       """
-    Then the exit code should be 0
+    Then the stderr should be empty
+    And the exit code should be 0
     And the following files should exist and not be empty:
       | file                     |
       | output/overview.html     |
       | output/overview_new.txt  |
     And the file "output/overview_new.txt" should contain 6 lines
-
+    Examples:
+      | faa                              | blast                        | input                     | output                       | cov                                      | search                             | keywords                             |
+      | --genome="${PWD}/tmp/db.faa"     | --ncbi="${PWD}/tmp/blast.db" |  --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --cov="${PWD}/tmp/test.bam.coverage.txt" |  --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" |
+      | --genome="db.faa"                | --ncbi="blast.db"            |  --input="hmm"            | --output="output"            |  --cov="test.bam.coverage.txt"           |  --search="search.yaml"            | --keywords="keywords.txt"  |
diff --git a/features/steps/cli.py b/features/steps/cli.py
@@ -74,4 +74,10 @@ def step_impl(context, file_, lines_):
 @then(u'the {stream} should contain')
 def step_impl(context, stream):
     output = get_stream(context, stream)
-    nt.assert_in(context.text, output)
+    nt.assert_in(context.text, output)
+
+@then(u'the {stream} should be empty')
+def step_impl(context, stream):
+    output = get_stream(context, stream)
+    nt.assert_equal(output, "",
+                    "The {} should be empty but contains:\n\n{}".format(stream, output))
diff --git a/main.nf b/main.nf
@@ -5,12 +5,28 @@ params.search = ""
 params.keywords = ""
 params.help = ""
 
+
 if( params.help ) { 
     usage = file("$baseDir/usage.txt")   
     print usage.text
     return 
 }
 
+hmmDir = file(params.input)
+outputDir = file(params.output)
+ncbiDB = file(params.ncbi)
+genomeFaa = file(params.genome)
+
+keywordsFile = ""
+if(params.keywords){
+	keywordsFile = file(params.keywords)
+}
+
+searchFile = ""
+if(params.search){
+	searchFile = file(params.search)
+}
+
 process bootstrap {
 
    executor 'local'
@@ -22,9 +38,8 @@ process bootstrap {
    file allHmm
 
    shell:
-   outputDir = file(params.output)
    if(outputDir.exists()) 
-      exit(0, "Directory ${params.output} already exists. Please remove it or assign another output directory.")
+      exit(0, "Directory ${outputDir} already exists. Please remove it or assign another output directory.")
    else
       outputDir.mkdir()
       """
@@ -33,13 +48,13 @@ process bootstrap {
       then
           make -C !{baseDir} install 
       fi
-      cat !{params.input}/*.hmm > allHmm
+      cat !{hmmDir}/*.hmm > allHmm
       ${params.hmm_press} allHmm
       """
 }
 
 fastaChunk = Channel.create()
-list = Channel.fromPath(params.genome).splitFasta(by:6000,file:true).collectFile();
+list = Channel.fromPath(genomeFaa).splitFasta(by:6000,file:true).collectFile();
 list.spread(allHmm).into(fastaChunk)
 
 process hmmFolderScan {
@@ -107,11 +122,11 @@ process getFasta {
     '''
     #!/bin/sh
     contig=`echo "!{contigLine} " | cut -d ' ' -f 4`
-    grep  "$contig " !{params.genome} > uniq_header
+    grep  "$contig " !{genomeFaa} > uniq_header
     buffer=`cat uniq_header | cut -c 2-`
     contig=`echo $buffer | cut -d" " -f1`
-    awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{params.genome}  > !{baseDir}/$contig.faa
-    awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{params.genome}  > uniq_out
+    awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{genomeFaa}  > !{baseDir}/$contig.faa
+    awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{genomeFaa}  > uniq_out
     '''  
 
 }
@@ -140,7 +155,7 @@ process blastSeqTxt {
     '''
     #!/bin/sh
     contig=`grep ">" !{uniq_seq} | cut -d" " -f1 | cut -c 2-`
-    !{params.blastp} -db !{params.ncbi} -outfmt '!{order}' -query "!{uniq_seq}" -out "!{baseDir}/$contig.txt" -num_threads !{params.blast_cpu}
+    !{params.blastp} -db !{ncbiDB} -outfmt '!{order}' -query "!{uniq_seq}" -out "!{baseDir}/$contig.txt" -num_threads !{params.blast_cpu}
     echo "$contig" > blast_out
     '''
 }
@@ -165,15 +180,15 @@ process blastSeqHtml {
     '''
     #!/bin/sh
     contig=`grep ">" !{uniq_seqHtml} | cut -d" " -f1 | cut -c 2-`
-    !{params.blastp} -db !{params.ncbi} -query "!{uniq_seqHtml}" -html -out "!{params.output}/$contig.html" -num_threads !{params.blast_cpu} 
+    !{params.blastp} -db !{ncbiDB} -query "!{uniq_seqHtml}" -html -out "!{outputDir}/$contig.html" -num_threads !{params.blast_cpu} 
     '''
 
 }
 
 PYTHON="$baseDir/vendor/python/bin/python"
 
 coverages = Channel.create()
-coverages.bind(params.cov.replaceAll(',',' '))
+coverages.bind(params.cov.split(',').collect{file(it)}.join(' '))
 
 bam = Channel.from(params.bam)
 sortedIndexedBam = bam.flatMap{ files  -> files.split(',')} 
@@ -216,17 +231,17 @@ process createOverview {
    val coverageFiles
 
    output:
-   val params.output + '/overview.txt' into over
+   val outputDir + '/overview.txt' into over
 
    shell:
    '''
    #!/bin/sh
    searchParam=""
    if [ -n !{params.search} ]
    then
-       searchParam="--search=!{params.search}"
+       searchParam="--search=!{searchFile}"
    fi
-   !{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview}  -faa !{baseDir} -o !{params.output}  ${searchParam}  -c !{coverageFiles.join(' ')} 
+   !{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview}  -faa !{baseDir} -o !{outputDir}  ${searchParam}  -c !{coverageFiles.join(' ')} 
    '''
 }
 
@@ -238,14 +253,14 @@ process linkSearch {
 
    input: 
    val x from over
-   params.output
+   outputDir
 
    output:
-   val params.output into inputF 
+   val outputDir into inputF 
 
    """
    #!/bin/sh
-   $PYTHON $baseDir/scripts/link_search.py -o ${x} -out ${params.output} 
+   $PYTHON $baseDir/scripts/link_search.py -o ${x} -out ${outputDir} 
    """
 }
 
@@ -260,26 +275,26 @@ process folderToPubmed {
 
    input:
    val inp from inputF
-   params.output
+   outputDir
 
    output:
-   val params.output + '/all.pubHits'  into pub
-   val params.output + '/overview.txt' into over2
+   val outputDir + '/all.pubHits'  into pub
+   val outputDir + '/overview.txt' into over2
 
    shell:
    '''
    #!/bin/sh
    keywords=""
-   if [ -f !{params.keywords} ]
+   if [ -f !{keywordsFile} ]
    then
-         keywords=!{params.keywords}
+         keywords=!{keywordsFile}
    else
          emptyKeywords="keywords.txt"
          touch $emptyKeywords 
          keywords=$emptyKeywords
    fi
    echo $keywords
-   sh !{baseDir}/scripts/FolderToPubmed.sh !{inp} !{params.output}  !{baseDir}/scripts/UrltoPubmedID.sh  ${keywords} 
+   sh !{baseDir}/scripts/FolderToPubmed.sh !{inp} !{outputDir}  !{baseDir}/scripts/UrltoPubmedID.sh  ${keywords} 
    '''
 }
 
@@ -295,7 +310,7 @@ process linkAssignment {
    val p from pub
 
    output:
-   val params.output + '/overview_new.txt' into overNew
+   val outputDir + '/overview_new.txt' into overNew
 
    """
    #!/bin/sh
@@ -314,8 +329,7 @@ process buildHtml {
 
     """
     #!/bin/sh
-    $PYTHON $baseDir/scripts/web/controller.py -o ${overview} -out ${params.output} -conf $baseDir/scripts/web/config.yaml -templates $baseDir/scripts/web/app/templates
+    $PYTHON $baseDir/scripts/web/controller.py -o ${overview} -out ${outputDir} -conf $baseDir/scripts/web/config.yaml -templates $baseDir/scripts/web/app/templates
     """
 
 }
-
diff --git a/usage.txt b/usage.txt
@@ -11,15 +11,15 @@ nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input
 DESCRIPTION
 MeRaGENE 0.1.0
 
---genome="/vol/genomeDat/test.db"      Your genome-database to search in.
+--genome="/vol/genomeDat/test.faa"     Your faa to search in.
 
 --ncbi="/vol/blastDat/blast.db"        Your blast-database to search in.
 
 --input="/vol/project/hmmModels"       A folder containing hmm models. All hmm models in this folder are used for searching.
 
 --output="/vol/project/output"         A folder path that the pipeline should produce.
 
---cov="/vol/project/coverage1.txt,/vol/project/coverage2.txt"  List your coverage files, link them here.
+--cov="/vol/cov1.txt,/vol/cov2.txt"    List your coverage files, link them here.
 
 
 OPTIONAL ARGUMENTS
@@ -29,7 +29,7 @@ OPTIONAL ARGUMENTS
 --blastp="blastp"
 --hmm_search="hmmsearch"
 --hmm_scan="hmmscan"
---hmm_press="hmmpress"                 If you want to use a special version, change the name with its path.
+--hmm_press="hmmpress"                 If you want to use a special version, change the name with its absolute path.
                                        E.g. blastp="blastp" -> blastp="/vol/tools/blast/blastp"
 
 --hmm_cpu=16                           Numbers of cores to be used executing hmmsearch.