Skip to content

Commit

Permalink
Merge pull request #152 from metagenomics/feature/relative-path
Browse files Browse the repository at this point in the history
relative path
  • Loading branch information
pbelmann committed Nov 18, 2015
2 parents 073c971 + 647d6db commit 45c1efe
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 33 deletions.
24 changes: 20 additions & 4 deletions features/nextflow.feature
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Feature: Verification steps for bioPipeline
nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input --output --cov )
"""

Scenario: Run the pipeline with valid parameters
Scenario Outline: Run the pipeline with valid parameters
Given I copy the example data files:
| source | dest |
| db.faa | db.faa |
Expand All @@ -28,12 +28,28 @@ Feature: Verification steps for bioPipeline
| hmm | hmm |
When I run the command:
"""
${NEXTFLOW}/nextflow run ${PWD}/main.nf -profile 'local' --genome="${PWD}/tmp/db.faa" --ncbi="${PWD}/tmp/blast.db" --blast_cpu=1 --blastp="blastp" --hmm_search="hmmsearch" --hmm_scan="hmmscan" --hmm_press="hmmpress" --input="${PWD}/tmp/hmm" --output="${PWD}/tmp/output" --cov="${PWD}/tmp/test.bam.coverage.txt" --search="${PWD}/tmp/search.yaml" --keywords="${PWD}/tmp/keywords.txt"
${NEXTFLOW}/nextflow run ${PWD}/main.nf -profile 'local' \
<faa> \
<blast> \
--blast_cpu=1 \
--blastp="blastp" \
--hmm_search="hmmsearch" \
--hmm_scan="hmmscan" \
--hmm_press="hmmpress" \
<input> \
<output> \
<cov> \
<search> \
<keywords> \
"""
Then the exit code should be 0
Then the stderr should be empty
And the exit code should be 0
And the following files should exist and not be empty:
| file |
| output/overview.html |
| output/overview_new.txt |
And the file "output/overview_new.txt" should contain 6 lines

Examples:
| faa | blast | input | output | cov | search | keywords |
| --genome="${PWD}/tmp/db.faa" | --ncbi="${PWD}/tmp/blast.db" | --input="${PWD}/tmp/hmm" | --output="${PWD}/tmp/output" | --cov="${PWD}/tmp/test.bam.coverage.txt" | --search="${PWD}/tmp/search.yaml" | --keywords="${PWD}/tmp/keywords.txt" |
| --genome="db.faa" | --ncbi="blast.db" | --input="hmm" | --output="output" | --cov="test.bam.coverage.txt" | --search="search.yaml" | --keywords="keywords.txt" |
8 changes: 7 additions & 1 deletion features/steps/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,10 @@ def step_impl(context, file_, lines_):
@then(u'the {stream} should contain')
def step_impl(context, stream):
output = get_stream(context, stream)
nt.assert_in(context.text, output)
nt.assert_in(context.text, output)

@then(u'the {stream} should be empty')
def step_impl(context, stream):
output = get_stream(context, stream)
nt.assert_equal(output, "",
"The {} should be empty but contains:\n\n{}".format(stream, output))
64 changes: 39 additions & 25 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,28 @@ params.search = ""
params.keywords = ""
params.help = ""


if( params.help ) {
usage = file("$baseDir/usage.txt")
print usage.text
return
}

hmmDir = file(params.input)
outputDir = file(params.output)
ncbiDB = file(params.ncbi)
genomeFaa = file(params.genome)

keywordsFile = ""
if(params.keywords){
keywordsFile = file(params.keywords)
}

searchFile = ""
if(params.search){
searchFile = file(params.search)
}

process bootstrap {

executor 'local'
Expand All @@ -22,9 +38,8 @@ process bootstrap {
file allHmm

shell:
outputDir = file(params.output)
if(outputDir.exists())
exit(0, "Directory ${params.output} already exists. Please remove it or assign another output directory.")
exit(0, "Directory ${outputDir} already exists. Please remove it or assign another output directory.")
else
outputDir.mkdir()
"""
Expand All @@ -33,13 +48,13 @@ process bootstrap {
then
make -C !{baseDir} install
fi
cat !{params.input}/*.hmm > allHmm
cat !{hmmDir}/*.hmm > allHmm
${params.hmm_press} allHmm
"""
}

fastaChunk = Channel.create()
list = Channel.fromPath(params.genome).splitFasta(by:6000,file:true).collectFile();
list = Channel.fromPath(genomeFaa).splitFasta(by:6000,file:true).collectFile();
list.spread(allHmm).into(fastaChunk)

process hmmFolderScan {
Expand Down Expand Up @@ -107,11 +122,11 @@ process getFasta {
'''
#!/bin/sh
contig=`echo "!{contigLine} " | cut -d ' ' -f 4`
grep "$contig " !{params.genome} > uniq_header
grep "$contig " !{genomeFaa} > uniq_header
buffer=`cat uniq_header | cut -c 2-`
contig=`echo $buffer | cut -d" " -f1`
awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{params.genome} > !{baseDir}/$contig.faa
awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{params.genome} > uniq_out
awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{genomeFaa} > !{baseDir}/$contig.faa
awk -v p="$buffer" 'BEGIN{ ORS=""; RS=">"; FS="\\n" } $1 == p { print ">" $0 }' !{genomeFaa} > uniq_out
'''

}
Expand Down Expand Up @@ -140,7 +155,7 @@ process blastSeqTxt {
'''
#!/bin/sh
contig=`grep ">" !{uniq_seq} | cut -d" " -f1 | cut -c 2-`
!{params.blastp} -db !{params.ncbi} -outfmt '!{order}' -query "!{uniq_seq}" -out "!{baseDir}/$contig.txt" -num_threads !{params.blast_cpu}
!{params.blastp} -db !{ncbiDB} -outfmt '!{order}' -query "!{uniq_seq}" -out "!{baseDir}/$contig.txt" -num_threads !{params.blast_cpu}
echo "$contig" > blast_out
'''
}
Expand All @@ -165,15 +180,15 @@ process blastSeqHtml {
'''
#!/bin/sh
contig=`grep ">" !{uniq_seqHtml} | cut -d" " -f1 | cut -c 2-`
!{params.blastp} -db !{params.ncbi} -query "!{uniq_seqHtml}" -html -out "!{params.output}/$contig.html" -num_threads !{params.blast_cpu}
!{params.blastp} -db !{ncbiDB} -query "!{uniq_seqHtml}" -html -out "!{outputDir}/$contig.html" -num_threads !{params.blast_cpu}
'''

}

PYTHON="$baseDir/vendor/python/bin/python"

coverages = Channel.create()
coverages.bind(params.cov.replaceAll(',',' '))
coverages.bind(params.cov.split(',').collect{file(it)}.join(' '))

bam = Channel.from(params.bam)
sortedIndexedBam = bam.flatMap{ files -> files.split(',')}
Expand Down Expand Up @@ -216,17 +231,17 @@ process createOverview {
val coverageFiles

output:
val params.output + '/overview.txt' into over
val outputDir + '/overview.txt' into over

shell:
'''
#!/bin/sh
searchParam=""
if [ -n !{params.search} ]
then
searchParam="--search=!{params.search}"
searchParam="--search=!{searchFile}"
fi
!{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview} -faa !{baseDir} -o !{params.output} ${searchParam} -c !{coverageFiles.join(' ')}
!{PYTHON} !{baseDir}/scripts/create_overview.py -u !{uniq_overview} -faa !{baseDir} -o !{outputDir} ${searchParam} -c !{coverageFiles.join(' ')}
'''
}

Expand All @@ -238,14 +253,14 @@ process linkSearch {

input:
val x from over
params.output
outputDir

output:
val params.output into inputF
val outputDir into inputF

"""
#!/bin/sh
$PYTHON $baseDir/scripts/link_search.py -o ${x} -out ${params.output}
$PYTHON $baseDir/scripts/link_search.py -o ${x} -out ${outputDir}
"""
}

Expand All @@ -260,26 +275,26 @@ process folderToPubmed {

input:
val inp from inputF
params.output
outputDir

output:
val params.output + '/all.pubHits' into pub
val params.output + '/overview.txt' into over2
val outputDir + '/all.pubHits' into pub
val outputDir + '/overview.txt' into over2

shell:
'''
#!/bin/sh
keywords=""
if [ -f !{params.keywords} ]
if [ -f !{keywordsFile} ]
then
keywords=!{params.keywords}
keywords=!{keywordsFile}
else
emptyKeywords="keywords.txt"
touch $emptyKeywords
keywords=$emptyKeywords
fi
echo $keywords
sh !{baseDir}/scripts/FolderToPubmed.sh !{inp} !{params.output} !{baseDir}/scripts/UrltoPubmedID.sh ${keywords}
sh !{baseDir}/scripts/FolderToPubmed.sh !{inp} !{outputDir} !{baseDir}/scripts/UrltoPubmedID.sh ${keywords}
'''
}

Expand All @@ -295,7 +310,7 @@ process linkAssignment {
val p from pub

output:
val params.output + '/overview_new.txt' into overNew
val outputDir + '/overview_new.txt' into overNew

"""
#!/bin/sh
Expand All @@ -314,8 +329,7 @@ process buildHtml {

"""
#!/bin/sh
$PYTHON $baseDir/scripts/web/controller.py -o ${overview} -out ${params.output} -conf $baseDir/scripts/web/config.yaml -templates $baseDir/scripts/web/app/templates
$PYTHON $baseDir/scripts/web/controller.py -o ${overview} -out ${outputDir} -conf $baseDir/scripts/web/config.yaml -templates $baseDir/scripts/web/app/templates
"""

}

6 changes: 3 additions & 3 deletions usage.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ nextflow run metagenomics/MeRaGENE [OPTIONAL_ARGUMENTS] (--genome --ncbi --input
DESCRIPTION
MeRaGENE 0.1.0

--genome="/vol/genomeDat/test.db" Your genome-database to search in.
--genome="/vol/genomeDat/test.faa" Your faa to search in.

--ncbi="/vol/blastDat/blast.db" Your blast-database to search in.

--input="/vol/project/hmmModels" A folder containing hmm models. All hmm models in this folder are used for searching.

--output="/vol/project/output" A folder path that the pipeline should produce.

--cov="/vol/project/coverage1.txt,/vol/project/coverage2.txt" List your coverage files, link them here.
--cov="/vol/cov1.txt,/vol/cov2.txt" List your coverage files, link them here.


OPTIONAL ARGUMENTS
Expand All @@ -29,7 +29,7 @@ OPTIONAL ARGUMENTS
--blastp="blastp"
--hmm_search="hmmsearch"
--hmm_scan="hmmscan"
--hmm_press="hmmpress" If you want to use a special version, change the name with its path.
--hmm_press="hmmpress" If you want to use a special version, change the name with its absolute path.
E.g. blastp="blastp" -> blastp="/vol/tools/blast/blastp"

--hmm_cpu=16 Numbers of cores to be used executing hmmsearch.
Expand Down

0 comments on commit 45c1efe

Please sign in to comment.