diff --git a/README.md b/README.md index 6b35cab..6b1eb0d 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ http://metagenomics.github.io/MeRaGENE/ 6. type in `"curl -fsSL get.nextflow.io | bash"` to download nextflow into this folder 7. make sure that the binaries stated in the **Requirements** section below are installed on your machine 8. after nextflow is downloaded, replace all the `"YOUR_***"` parts in the following command with your own paths - - `"./nextflow run main.nf --genome="YOUR_FAA_FILE_OF_A_METAGENOME" --ncbi="YOUR_BLAST-DB" --input="YOUR_INPUT-HMM_FOLDER" --output="YOUR_OUTPUT-FOLDER" --cov="COVERAGE_FILES" --keywords="YOUR_KEYWORD-FILE"` + - `"./nextflow run main.nf --genome="YOUR_FAA_FILE_OF_A_METAGENOME" --ncbi="YOUR_BLAST-DB" --input="YOUR_INPUT-HMM_FOLDER" --output="YOUR_OUTPUT-FOLDER" --bam="READ_ASSEMBLY_ALIGNMENT" --keywords="YOUR_KEYWORD-FILE"` 9. after replacing everything, run your command 10. that's it ! The pipeline is running and crunching your data. Look for the overview.txt or. overview_new.txt in your output folder after the pipeline is finished - if you have further questions: @@ -43,7 +43,6 @@ http://metagenomics.github.io/MeRaGENE/ If you want/have to make further changes to your pipeline, here are all possibilities: [usage](usage.txt) - ### Update MeRaGENE If you have already MeRaGENE installed, just run @@ -69,7 +68,7 @@ Where `type` can be `feature` or `fix` and `name` is a short description of the Example: `feature/development-guide` -Merge this branch by providing a pull request. Please update the [change log](CHANGELOG.md) before merging. +Merge a branch by providing a pull request to a release/x.x.x branch. Please update the [change log](CHANGELOG.md) before merging. ### Development Scripts diff --git a/features/data/test.bam.coverage.txt b/features/data/test.bam.coverage.txt deleted file mode 100644 index 48f3382..0000000 --- a/features/data/test.bam.coverage.txt +++ /dev/null @@ -1 +0,0 @@ -250278:contig-5631000033 5587 749 20.0995167352783 diff --git a/features/nextflow.feature b/features/nextflow.feature index 4901504..bca4b6a 100644 --- a/features/nextflow.feature +++ b/features/nextflow.feature @@ -9,18 +9,19 @@ Feature: Verification steps for bioPipeline And the stdout should contain: """ USAGE - nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --cov ) + nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --bam ) """ Scenario Outline: Run the pipeline with valid parameters Given I copy the example data files: | source | dest | - | db.faa | db.faa | + | db.faa | db.faa | | blast.db | blast.db | | blast.db.phr | blast.db.phr| | blast.db.psq | blast.db.psq| | blast.db.pin | blast.db.pin| - | test.bam.coverage.txt |test.bam.coverage.txt | + | test.bam | test.bam | + | test.bam.bai | test.bam.bai| | search.yaml | search.yaml | | keywords.txt | keywords.txt| And I copy the example data directories: @@ -38,7 +39,7 @@ Feature: Verification steps for bioPipeline --hmm_press="hmmpress" \ \ \ - \ + \ \ \ """ @@ -50,6 +51,6 @@ Feature: Verification steps for bioPipeline | output/overview_new.txt | And the file "output/overview_new.txt" should contain 6 lines Examples: - | faa | blast | input | output | cov | search | keywords | - | --genome="${PWD}/tmp/db.faa" | --ncbi="${PWD}/tmp/blast.db" | --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --cov="${PWD}/tmp/test.bam.coverage.txt" | --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" | - | --genome="db.faa" | --ncbi="blast.db" | --input="hmm" | --output="output" | --cov="test.bam.coverage.txt" | --search="search.yaml" | --keywords="keywords.txt" | \ No newline at end of file + | faa | blast | input | output | bam | search | keywords | + | --genome="${PWD}/tmp/db.faa" | --ncbi="${PWD}/tmp/blast.db" | --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --bam="${PWD}/tmp/test.bam" | --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" | + | --genome="db.faa" | --ncbi="blast.db" | --input="hmm" | --output="output" | --bam="test.bam" | --search="search.yaml" | --keywords="keywords.txt" | \ No newline at end of file diff --git a/main.nf b/main.nf index a49167b..0ca9b1d 100644 --- a/main.nf +++ b/main.nf @@ -4,6 +4,7 @@ params.vendor = "$baseDir/vendor" params.search = "" params.keywords = "" params.help = "" +params.cov = "" if( params.help ) { @@ -188,10 +189,7 @@ process blastSeqHtml { PYTHON="$baseDir/vendor/python/bin/python" coverages = Channel.create() -coverages.bind(params.cov.split(',').collect{file(it)}.join(' ')) - -bam = Channel.from(params.bam) -sortedIndexedBam = bam.flatMap{ files -> files.split(',')} +sortedIndexedBam = Channel.from(params.bam.split(',').collect{file(it)}) process bamToCoverage { @@ -203,7 +201,7 @@ process bamToCoverage { val bam from sortedIndexedBam output: - file coverage into coverages + file "${bam.baseName}" into coverages when: bam != '' @@ -211,12 +209,12 @@ process bamToCoverage { script: """ #!/bin/sh - $PYTHON scripts/bam_to_coverage.py ${params.sortedIndexedBam} > coverage + $PYTHON ${baseDir}/scripts/bam_to_coverage.py ${bam} > ${bam.baseName} """ } coverageFiles = Channel.create() -coverages.toList().into(coverageFiles) +coverages.collectFile().toList().into(coverageFiles) uniq_overview = uniq_overview.collectFile() process createOverview { diff --git a/scripts/create_overview.py b/scripts/create_overview.py index 13c4ba7..97fab2b 100755 --- a/scripts/create_overview.py +++ b/scripts/create_overview.py @@ -26,7 +26,7 @@ def writeHeader(coverages, file, insertGroup): util.LINKS, util.GENE_SEQUENCE] if insertGroup: header.insert(2, util.GROUP) - file.write(("\t".join(coverages + header)) + '\n') + file.write(("\t".join(map(lambda cov:os.path.basename(cov),coverages) + header)) + '\n') def move_txt_faa_files(output, file_txt, file_faa): diff --git a/test/test_create_overview.py b/test/test_create_overview.py index 39eed68..31be702 100644 --- a/test/test_create_overview.py +++ b/test/test_create_overview.py @@ -30,7 +30,7 @@ def test_determine_config_values(): def test_write_header(): - coverages = ["cov1", "cov2"] + coverages = ["/test/path/cov1", "/test/path/cov2"] _, path = tempfile.mkstemp() with open(path, "r+") as temp_file: create_overview.writeHeader(coverages, temp_file, True) diff --git a/usage.txt b/usage.txt index ad602b7..16de8a8 100644 --- a/usage.txt +++ b/usage.txt @@ -6,10 +6,10 @@ |_| |_|\\___|_| \\_\\__,_|\\_____|______|_| \\_|______| USAGE -nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --cov ) +nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --bam ) DESCRIPTION -MeRaGENE 0.1.0 +MeRaGENE 0.2.0 --genome="/vol/genomeDat/test.faa" Your faa to search in. @@ -19,7 +19,7 @@ MeRaGENE 0.1.0 --output="/vol/project/output" A folder path that the pipeline should produce. ---cov="/vol/cov1.txt,/vol/cov2.txt" List your coverage files, link them here. +--bam="/vol/cov1.txt,/vol/cov2.txt" Bam (read/assembly) alignment file. OPTIONAL ARGUMENTS