Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Racon, Miniasm and Minimap added #138

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .idea/circlator.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

113 changes: 112 additions & 1 deletion circlator/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self,
spades_use_first_success=False,
assembler='spades',
genomeSize=100000, # only matters for Canu if correcting reads (which we're not)
data_type='pacbio-corrected',
data_type='pacbio-raw',
):
self.outdir = os.path.abspath(outdir)
self.reads = os.path.abspath(reads)
Expand All @@ -41,6 +41,9 @@ def __init__(self,
self.canu = external_progs.make_and_check_prog('canu', verbose=self.verbose, required=True)
self.genomeSize=genomeSize
self.data_type = data_type
elif self.assembler == 'racon':
self.racon = external_progs.make_and_check_prog('racon', verbose=self.verbose, required=True)
self.data_type = data_type
else:
raise Error('Unknown assembler: "' + self.assembler + '". cannot continue')

Expand Down Expand Up @@ -170,11 +173,119 @@ def run_canu(self):
renamed_gfa = os.path.join(self.outdir, 'contigs.gfa')
os.rename(original_gfa, renamed_gfa)

def run_racon(self):
'''Runs minimap, miniasm, racon instead of spades'''

if self.data_type.split('-')[0] == 'pacbio':
overlapRaadsType = 'ava-pb' # PacBio
else:
overlapReadsType = 'ava-ont' # Nanopore

# minimap2
cmd = [
self.minimap2.exe(),
'-t', self.threads,
'-x', overlapReadsType, self.reads, self.reads,
'>', os.path.join(self.outdir, 'output.paf')
]

ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False)
if not ok:
raise Error('Error running minimap2.')

# miniasm
cmd = [
self.miniasm.exe(),
'-Rc2', '-f', self.reads, os.path.join(self.outdir, 'output.paf'),
'>', os.path.join(self.outdir, 'output.gfa')
]

ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False)
if not ok:
raise Error('Error running miniasm.')

# gfa2fasta
cmd = [
self.awk.exe(),
'/^S/{print ">"$2"\n"$3}', os.path.join(self.outdir, 'output.gfa'),
'|', 'fold ' '>', os.path.join(self.outdir, 'output.gfa.fasta')

]

ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False)
if not ok:
raise Error('Error running awk.')

if self.data_type.split('-')[0] == 'pacbio':
mapRaadsType = 'map-pb' # PacBio
else:
mapReadsType = 'map-ont' # Nanopore


# Correction 1
# minimap2
cmd = [
self.minimap2.exe(),
'-t', self.threads,
'-ax', mapRaadsType, os.path.join(self.outdir, 'output.gfa.fasta'), self.reads,
'>', os.path.join(self.outdir, 'output.gfa1.sam')
]

ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False)
if not ok:
raise Error('Error running minimap2 correction step #1.')

# Racon 1
cmd = [
self.racon.exe(),
'-t', self.threads, self.reads, os.path.join(self.outdir, 'output.gfa1.sam'),
os.path.join(self.outdir, 'output.gfa.fasta'),
'>', os.path.join(self.outdir, 'output.racon1.fasta')
]

ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False)
if not ok:
raise Error('Error running racon correction step #1.')


# Correction 2
# minimap2 2
cmd = [
self.minimap2.exe(),
'-t', self.threads,
'-ax map-pb', os.path.join(self.outdir, 'output.racon1.fasta'), self.reads,
'>', os.path.join(self.outdir, 'output.gfa2.sam')
]

ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False)
if not ok:
raise Error('Error running minimap2 correction step #2.')

# Racon 2
cmd = [
self.racon.exe(),
'-t', self.threads, self.reads, os.path.join(self.outdir, 'output.gfa2.sam'),
os.path.join(self.outdir, 'output.racon1.fasta'),
'>', os.path.join(self.outdir, 'output.racon2.fasta')
]

ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False)
if not ok:
raise Error('Error running racon correction step #2.')

original_gfa = os.path.join(self.outdir, 'output.gfa')
renamed_gfa = os.path.join(self.outdir, 'contigs.gfa')
os.rename(original_gfa, renamed_gfa)
original_contigs = os.path.join(self.outdir, 'output.racon2.fasta')
renamed_contigs = os.path.join(self.outdir, 'contigs.fasta')
os.rename(original_contigs, renamed_contigs)

def run(self):
if self.assembler == 'spades':
self.run_spades(stop_at_first_success=self.spades_use_first_success)
elif self.assembler == 'canu':
self.run_canu()
elif self.assembler == 'racon':
self.run_racon()
else:
raise Error('Unknown assembler: "' + self.assembler + '". cannot continue')
5 changes: 5 additions & 0 deletions circlator/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def _set_filenames(self):
elif self.assembler == 'canu':
if self.contigs_fasta is None or self.contigs_gfa is None:
raise Error('Error finding canu contigs fasta and/or gfa file')
elif self.assembler == 'racon':
if self.contigs_fasta is None or self.contigs_gfa is None:
raise Error('Error finding canu contigs fasta and/or gfa file')
else:
raise Error('Assembler "' + self.assembler + '" not recognised. Cannot continue')

Expand Down Expand Up @@ -185,5 +188,7 @@ def circular_contigs(self):
return set()
elif self.assembler == 'canu':
return self._circular_contigs_from_canu_gfa(self.contigs_gfa)
elif self.assembler == 'racon':
return self._circular_contigs_from_canu_gfa(self.contigs_gfa)
else:
return set()
2 changes: 1 addition & 1 deletion circlator/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class Error (Exception): pass

allowed_assemblers = ['canu', 'spades']
allowed_assemblers = ['canu', 'spades', 'racon']
allowed_data_types = ['pacbio-raw', 'pacbio-corrected', 'nanopore-raw', 'nanopore-corrected']

def syscall(cmd, allow_fail=False, verbose=False):
Expand Down
14 changes: 12 additions & 2 deletions circlator/external_progs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class Error (Exception): pass
'samtools': 'CIRCLATOR_SAMTOOLS',
'spades': 'CIRCLATOR_SPADES',
'canu': 'CIRCLATOR_CANU',
'racon': 'CIRCLATOR_RACON',
}

prog_to_version_cmd = {
Expand All @@ -20,15 +21,21 @@ class Error (Exception): pass
'samtools': ('', re.compile(r'Version: (\d+\.\d+[\.\d]*)')),
'spades': ('-v', re.compile(r'v.?([0-9][0-9\.]+)')),
'canu': ('-version', re.compile(r'^Canu \D*([\d][\d\.]+)')),
'minimap2': ('-V', re.compile(r'([0-9\.]+)')),
'miniasm': ('-V', re.compile(r'([0-9\.]+)')),
'racon': ('--version', re.compile(r'v.?([0-9][0-9\.]+)')),
}

min_versions = {
'bwa': '0.7.12',
'nucmer': '3.1',
'prodigal': '2.6',
'samtools': '0.1.19',
'spades': '3.6.2', # this is the first version to support python3
'spades': '3.11.1', # this is the first version to support python3
'canu': '0.0',
'minimap2': '2.10',
'miniasm': '0.2',
'racon': '1.2.1',
}


Expand All @@ -44,9 +51,12 @@ class Error (Exception): pass
'spades': 'spades.py',
'samtools': 'samtools',
'canu': 'canu',
'minimap2':'minimap2',
'miniasm': 'miniasm',
'racon': 'racon'
}

not_required = {'spades', 'canu'}
not_required = {'spades', 'canu','racon'}

def handle_error(message, raise_error=True):
if raise_error:
Expand Down
1 change: 1 addition & 0 deletions circlator/merge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import os
import sys
import copy
import shutil
Expand Down
4 changes: 2 additions & 2 deletions circlator/tasks/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def run():
parser = argparse.ArgumentParser(
description = 'Assemble reads using SPAdes/Canu',
description = 'Assemble reads using SPAdes/Canu/Racon',
usage = 'circlator assemble [options] <in.reads.fasta> <out_dir>')
parser.add_argument('--not_careful', action='store_true', help='Do not use the --careful option with SPAdes (used by default)')
parser.add_argument('--not_only_assembler', action='store_true', help='Do not use the --assemble-only option with SPAdes (used by default)')
Expand All @@ -14,7 +14,7 @@ def run():
parser.add_argument('--spades_k', help='Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [%(default)s]', default='127,117,107,97,87,77', metavar='k1,k2,k3,...')
parser.add_argument('--spades_use_first', action='store_true', help='Use the first successful SPAdes assembly. Default is to try all kmers and use the assembly with the largest N50')
parser.add_argument('--assembler', choices=circlator.common.allowed_assemblers, help='Assembler to use for reassemblies [%(default)s]', default='spades')
parser.add_argument('--data_type', choices=circlator.common.allowed_data_types, help='String representing one of the 4 type of data analysed (only used for Canu) [%(default)s]', default='pacbio-corrected')
parser.add_argument('--data_type', choices=circlator.common.allowed_data_types, help='String representing one of the 4 type of data analysed (only used for Canu and Racon) [%(default)s]', default='pacbio-raw')
parser.add_argument('reads', help='Name of input reads FASTA file', metavar='in.reads.fasta')
parser.add_argument('out_dir', help='Output directory (must not already exist)')
options = parser.parse_args()
Expand Down
2 changes: 1 addition & 1 deletion circlator/tasks/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def run():
parser.add_argument('--spades_k', help='Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [%(default)s]', default='127,117,107,97,87,77', metavar='k1,k2,k3,...')
parser.add_argument('--spades_use_first', action='store_true', help='Use the first successful SPAdes assembly. Default is to try all kmers and use the assembly with the largest N50')
parser.add_argument('--assembler', choices=circlator.common.allowed_assemblers, help='Assembler to use for reassemblies [%(default)s]', default='spades')
parser.add_argument('--data_type', choices=circlator.common.allowed_data_types, help='String representing one of the 4 type of data analysed (only used for Canu) [%(default)s]', default='pacbio-corrected')
parser.add_argument('--data_type', choices=circlator.common.allowed_data_types, help='String representing one of the 4 type of data analysed (only used for Canu and Racon) [%(default)s]', default='pacbio-raw')
parser.add_argument('--b2r_length_cutoff', type=int, help='All reads mapped to contigs shorter than this will be kept [%(default)s]', default=100000, metavar='INT')
parser.add_argument('--b2r_split_all_reads', action='store_true', help='By default, reads mapped to shorter contigs are left unchanged. This option splits them into two, broken at the middle of the contig to try to force circularization. May help if the assembler does not detect circular contigs (eg canu)')
parser.add_argument('--ref_end', type=int, help='max distance allowed between nucmer hit and end of input assembly contig [%(default)s]', metavar='INT', default=15000)
Expand Down
44 changes: 42 additions & 2 deletions install_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@ CANU_VERSION=1.4
PRODIGAL_VERSION=2.6.2
SAMTOOLS_VERSION=1.3
MUMMER_VERSION=3.23
SPADES_VERSION=3.7.1
SPADES_VERSION=3.11.1

MINIMAP2_VERSION=2.10
MINIASM_VERSION=55cf0189e2f7d5bda5868396cebe066eec0a9547
RACON_VERSION=1.3.0

BWA_DOWNLOAD_URL="http://downloads.sourceforge.net/project/bio-bwa/bwa-${BWA_VERSION}.tar.bz2"
CANU_DOWNLOAD_URL="https://github.com/marbl/canu/releases/download/v${CANU_VERSION}/canu-${CANU_VERSION}.Linux-amd64.tar.xz"
PRODIGAL_DOWNLOAD_URL="https://github.com/hyattpd/Prodigal/releases/download/v${PRODIGAL_VERSION}/prodigal.linux"
SAMTOOLS_DOWNLOAD_URL="https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2"
MUMMER_DOWNLOAD_URL="http://downloads.sourceforge.net/project/mummer/mummer/${MUMMER_VERSION}/MUMmer${MUMMER_VERSION}.tar.gz"
SPADES_DOWNLOAD_URL="http://spades.bioinf.spbau.ru/release${SPADES_VERSION}/SPAdes-${SPADES_VERSION}-Linux.tar.gz"

MINIMAP2_DOWNLOAD_URL="https://github.com/lh3/minimap2/archive/v${MINIMAP2_VERSION}.tar.gz"
MINIASM_DOWNLOAD_URL="https://github.com/lh3/miniasm/archive/${MINIASM_VERSION}.tar.gz"
RACON_DOWNLOAD_URL="https://github.com/isovic/racon/releases/download/${RACON_VERSION}/racon-v${RACON_VERSION}.tar.gz"

# Make an install location
if [ ! -d 'build' ]; then
Expand Down Expand Up @@ -91,6 +97,36 @@ spades_dir="$build_dir/SPAdes-${SPADES_VERSION}-Linux/bin"
tar -zxf SPAdes-${SPADES_VERSION}-Linux.tar.gz


# --------------- minimap2 -----------------
cd $build_dir
download $MINIMAP2_DOWNLOAD_URL "${MINIMAP2_VERSION}.tar.gz"
minimap2_dir="$build_dir/minimap2-${MINIMAP2_VERSION}/"
tar -zxf ${MINIMAP2_VERSION}.tar.gz
cd $minimap2_dir
make


# --------------- miniasm -----------------
cd $build_dir
download $MINIASM_DOWNLOAD_URL "${MINIASM_VERSION}.tar.gz"
miniasm_dir="$build_dir/miniasm-${MINIASM_VERSION}"
tar -zxf ${MINIASM_VERSION}.tar.gz
cd $miniasm_dir
make

# --------------- racon -----------------
cd $build_dir
download $RACON_DOWNLOAD_URL "racon-v${RACON_VERSION}.tar.gz"
racon_dir="$build_dir/racon-v${RACON_VERSION}"
tar -zxf racon-v${RACON_VERSION}.tar.gz
cd $racon_dir
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make
racon_dir="$build_dir/racon-v${RACON_VERSION}/build/bin"


cd $start_dir

update_path () {
Expand All @@ -106,4 +142,8 @@ update_path ${prodigal_dir}
update_path ${mummer_dir}
update_path ${samtools_dir}
update_path ${spades_dir}
update_path ${minimap2_dir}
update_path ${miniasm_dir}
update_path ${racon_dir}


2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='circlator',
version='1.5.5',
version='1.5.6',
description='circlator: a tool to circularise genome assemblies',
packages = find_packages(),
package_data={'circlator': ['data/*']},
Expand Down