close #151

metagenomics · Nov 22, 2015 · e305773 · e305773
1 parent ac79c64
commit e305773
Show file tree

Hide file tree

Showing 7 changed files with 20 additions and 23 deletions.
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ http://metagenomics.github.io/MeRaGENE/
 6. type in `"curl -fsSL get.nextflow.io | bash"` to download nextflow into this folder
 7. make sure that the binaries stated in the **Requirements** section below are installed on your machine
 8. after nextflow is downloaded, replace all the `"YOUR_***"` parts in the following command with your own paths 
-  - `"./nextflow run main.nf --genome="YOUR_FAA_FILE_OF_A_METAGENOME" --ncbi="YOUR_BLAST-DB" --input="YOUR_INPUT-HMM_FOLDER" --output="YOUR_OUTPUT-FOLDER" --cov="COVERAGE_FILES" --keywords="YOUR_KEYWORD-FILE"`
+  - `"./nextflow run main.nf --genome="YOUR_FAA_FILE_OF_A_METAGENOME" --ncbi="YOUR_BLAST-DB" --input="YOUR_INPUT-HMM_FOLDER" --output="YOUR_OUTPUT-FOLDER" --bam="READ_ASSEMBLY_ALIGNMENT" --keywords="YOUR_KEYWORD-FILE"`
 9. after replacing everything, run your command
 10. that's it ! The pipeline is running and crunching your data. Look for the overview.txt or. overview_new.txt in your output folder after the pipeline is finished
 - if you have further questions:
@@ -43,7 +43,6 @@ http://metagenomics.github.io/MeRaGENE/
 
 If you want/have to make further changes to your pipeline, here are all possibilities: [usage](usage.txt)
 
-
 ### Update MeRaGENE
 
 If you have already MeRaGENE installed, just run 
@@ -69,7 +68,7 @@ Where `type` can be `feature` or `fix` and `name` is a short description of the
 
 Example: `feature/development-guide`
 
-Merge this branch by providing a pull request. Please update the [change log](CHANGELOG.md) before merging.
+Merge a branch by providing a pull request to a release/x.x.x branch. Please update the [change log](CHANGELOG.md) before merging.
 
 ### Development Scripts
 

diff --git a/features/data/test.bam.coverage.txt b/features/data/test.bam.coverage.txt
diff --git a/features/nextflow.feature b/features/nextflow.feature
@@ -9,18 +9,19 @@ Feature: Verification steps for bioPipeline
     And the stdout should contain:
       """
       USAGE
-      nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --cov )
+      nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --bam )
       """
 
   Scenario Outline: Run the pipeline with valid parameters
     Given I copy the example data files:
       | source           | dest        |
-      | db.faa           | db.faa      |  
+      | db.faa           | db.faa      |
       | blast.db         | blast.db    |
       | blast.db.phr     | blast.db.phr|
       | blast.db.psq     | blast.db.psq| 
       | blast.db.pin     | blast.db.pin|
-      | test.bam.coverage.txt |test.bam.coverage.txt | 
+      | test.bam         | test.bam    |
+      | test.bam.bai     | test.bam.bai|
       | search.yaml      | search.yaml |
       | keywords.txt     | keywords.txt|
     And I copy the example data directories:
@@ -38,7 +39,7 @@ Feature: Verification steps for bioPipeline
          --hmm_press="hmmpress" \
          <input> \
          <output> \
-         <cov> \
+         <bam> \
          <search> \
          <keywords> \
       """
@@ -50,6 +51,6 @@ Feature: Verification steps for bioPipeline
       | output/overview_new.txt  |
     And the file "output/overview_new.txt" should contain 6 lines
     Examples:
-      | faa                              | blast                        | input                     | output                       | cov                                      | search                             | keywords                             |
-      | --genome="${PWD}/tmp/db.faa"     | --ncbi="${PWD}/tmp/blast.db" |  --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --cov="${PWD}/tmp/test.bam.coverage.txt" |  --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" |
-      | --genome="db.faa"                | --ncbi="blast.db"            |  --input="hmm"            | --output="output"            |  --cov="test.bam.coverage.txt"           |  --search="search.yaml"            | --keywords="keywords.txt"  |
+      | faa                              | blast                        | input                     | output                       | bam                         | search                             | keywords                             |
+      | --genome="${PWD}/tmp/db.faa"     | --ncbi="${PWD}/tmp/blast.db" |  --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --bam="${PWD}/tmp/test.bam" |  --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" |
+      | --genome="db.faa"                | --ncbi="blast.db"            |  --input="hmm"            | --output="output"            | --bam="test.bam"            |  --search="search.yaml"            | --keywords="keywords.txt"  |
diff --git a/main.nf b/main.nf
@@ -4,6 +4,7 @@ params.vendor = "$baseDir/vendor"
 params.search = ""
 params.keywords = ""
 params.help = ""
+params.cov = ""
 
 
 if( params.help ) { 
@@ -188,10 +189,7 @@ process blastSeqHtml {
 PYTHON="$baseDir/vendor/python/bin/python"
 
 coverages = Channel.create()
-coverages.bind(params.cov.split(',').collect{file(it)}.join(' '))
-
-bam = Channel.from(params.bam)
-sortedIndexedBam = bam.flatMap{ files  -> files.split(',')} 
+sortedIndexedBam = Channel.from(params.bam.split(',').collect{file(it)})
 
 process bamToCoverage {
 
@@ -203,20 +201,20 @@ process bamToCoverage {
    val bam from sortedIndexedBam
 
    output:
-   file coverage into coverages
+   file "${bam.baseName}" into coverages
 
    when:
    bam != ''
 
    script:
    """
    #!/bin/sh
-   $PYTHON scripts/bam_to_coverage.py ${params.sortedIndexedBam} > coverage
+   $PYTHON ${baseDir}/scripts/bam_to_coverage.py ${bam} > ${bam.baseName}
    """
 }
 
 coverageFiles = Channel.create()
-coverages.toList().into(coverageFiles)
+coverages.collectFile().toList().into(coverageFiles)
 
 uniq_overview = uniq_overview.collectFile()
 process createOverview {

diff --git a/scripts/create_overview.py b/scripts/create_overview.py
@@ -26,7 +26,7 @@ def writeHeader(coverages, file, insertGroup):
               util.LINKS, util.GENE_SEQUENCE]
     if insertGroup:
         header.insert(2, util.GROUP)
-    file.write(("\t".join(coverages + header)) + '\n')
+    file.write(("\t".join(map(lambda cov:os.path.basename(cov),coverages) + header)) + '\n')
 
 
 def move_txt_faa_files(output, file_txt, file_faa):

diff --git a/test/test_create_overview.py b/test/test_create_overview.py
@@ -30,7 +30,7 @@ def test_determine_config_values():
 
 
 def test_write_header():
-    coverages = ["cov1", "cov2"]
+    coverages = ["/test/path/cov1", "/test/path/cov2"]
     _, path = tempfile.mkstemp()
     with open(path, "r+") as temp_file:
         create_overview.writeHeader(coverages, temp_file, True)

diff --git a/usage.txt b/usage.txt
@@ -6,10 +6,10 @@
 |_|  |_|\\___|_|  \\_\\__,_|\\_____|______|_| \\_|______|
 
 USAGE
-nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --cov )
+nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --bam )
 
 DESCRIPTION
-MeRaGENE 0.1.0
+MeRaGENE 0.2.0
 
 --genome="/vol/genomeDat/test.faa"     Your faa to search in.
 
@@ -19,7 +19,7 @@ MeRaGENE 0.1.0
 
 --output="/vol/project/output"         A folder path that the pipeline should produce.
 
---cov="/vol/cov1.txt,/vol/cov2.txt"    List your coverage files, link them here.
+--bam="/vol/cov1.txt,/vol/cov2.txt"    Bam (read/assembly) alignment file.
 
 
 OPTIONAL ARGUMENTS