From 43a0cbdee43305d1e482b718a2a9e3021119aab0 Mon Sep 17 00:00:00 2001 From: Jon Sanders Date: Wed, 6 Dec 2017 13:10:16 -0800 Subject: [PATCH 1/3] added test for chimera check uppercase --- deblur/test/test_workflow.py | 63 ++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/deblur/test/test_workflow.py b/deblur/test/test_workflow.py index b9bc8c6..7699272 100644 --- a/deblur/test/test_workflow.py +++ b/deblur/test/test_workflow.py @@ -530,6 +530,69 @@ def test_remove_chimeras_denovo_from_seqs(self): seqs_obs.append(label) self.assertEqual(seqs_non_chimera, seqs_obs) + def test_remove_chimeras_denovo_from_seqs_lower(self): + """ Test remove_chimeras_denovo_from_seqs() method functionality. + Remove chimeric sequences from a FASTA file using the UCHIME + algorithm, implemented in VSEARCH. + """ + seqs = [("s1_104;size=2;", "GTGCCAGCCGCCGCGGTAATACCCGCAGCTCAAGTGGTG" + "GTCGCTATTATTGAGCCTAAAACGTCCGTAGTCGGCTTT" + "GTAAATCCCTGGGTAAATCGGGAAGCTTAACTTTCCGAC" + "TTCCGAGGAGACTGTCAAACTTGGGACCGGGAG"), + ("s1_106;size=2;", "GTGTCAGCCGCCGCGGTAATACCAGCTCTCCGAGTGGTG" + "TGGATGTTTATTGGGCCTAAAGCGTCCGTAGCCGGCTGC" + "GCAAGTCTGTCGGGAAATCCGCACGCCTAACGTGCGGGC" + "GTCCGGCGGAAACTGCGTGGCTTGGGACCGGAA"), + ("s1_1;size=9;", "TACCCGCAGCTCAAGTGGTGGTCGCTATTATTGAGCCTAAA" + "ACGTCCGTAGTCGGCTTTGTAAATCCCTGGGTAAATCGGGT" + "CGCTTAACGATCCGATTCTGGGGAGACTGCAAAGCTTGGGA" + "CCGGGCGAGGTTAGAGGTACTCTCGGG"), + ("s1_20;size=9;", "TACCTGCAGCCCAAGTGGTGGTCGATTTTATTGAGTCTAA" + "AACGTTCGTAGCCGGTTTGATAAATCCTTGGGTAAATCGG" + "GAAGCTTAACTTTCCGATTCCGAGGAGACTGTCAAACTTG" + "GGACCGGGAGAGGCTAGAGGTACTTCTGGG"), + ("s1_40;size=8;", "TACCAGCTCTCCGAGTGGTGTGGATGTTTATTGGGCCTAA" + "AGCATCCGTAGCTGGCTAGGTTAGTCCCCTGTTAAATCCA" + "CCGAATTAATCGTTGGATGCGGGGGATACTGCTTGGCTAG" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), + ("s1_60;size=8;", "TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAA" + "AGCGTCCGTAGCCGGCTGCGCAAGTCTGTCGGGAAATCCG" + "CACGCCTAACGTGCGGGTCCGGCGGAAACTGCGTGGCTTG" + "GGACCGGAAGACTCGAGGGGTACGTCAGGG")] + names_non_chimera = ["s1_1;size=9;", "s1_20;size=9;", + "s1_40;size=8;", "s1_60;size=8;"] + seqs_non_chimera = [("TACCCGCAGCTCAAGTGGTGGTCGCTATTATTGAGCCTAAA" + "ACGTCCGTAGTCGGCTTTGTAAATCCCTGGGTAAATCGGGT" + "CGCTTAACGATCCGATTCTGGGGAGACTGCAAAGCTTGGGA" + "CCGGGCGAGGTTAGAGGTACTCTCGGG"), + ("TACCTGCAGCCCAAGTGGTGGTCGATTTTATTGAGTCTAA" + "AACGTTCGTAGCCGGTTTGATAAATCCTTGGGTAAATCGG" + "GAAGCTTAACTTTCCGATTCCGAGGAGACTGTCAAACTTG" + "GGACCGGGAGAGGCTAGAGGTACTTCTGGG"), + ("TACCAGCTCTCCGAGTGGTGTGGATGTTTATTGGGCCTAA" + "AGCATCCGTAGCTGGCTAGGTTAGTCCCCTGTTAAATCCA" + "CCGAATTAATCGTTGGATGCGGGGGATACTGCTTGGCTAG" + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), + ("TACCGGCAGCTCAAGTGATGACCGCTATTATTGGGCCTAA" + "AGCGTCCGTAGCCGGCTGCGCAAGTCTGTCGGGAAATCCG" + "CACGCCTAACGTGCGGGTCCGGCGGAAACTGCGTGGCTTG" + "GGACCGGAAGACTCGAGGGGTACGTCAGGG")] + seqs_fp = join(self.working_dir, "seqs.fasta") + with open(seqs_fp, 'w') as seqs_f: + for seq in seqs: + seqs_f.write(">%s\n%s\n" % seq) + output_fp = remove_chimeras_denovo_from_seqs( + seqs_fp=seqs_fp, + working_dir=self.working_dir) + names_obs = [] + seqs_obs = [] + for label, seq in sequence_generator(output_fp): + label = label.split()[0] + names_obs.append(label) + seqs_obs.append(seq) + self.assertEqual(names_non_chimera, names_obs) + self.assertEqual(seqs_non_chimera, seqs_obs) + def test_multiple_sequence_alignment(self): """Test multiple sequence alignment. """ From 973b56df4e47e77a171d2328fe79b2f276de2e52 Mon Sep 17 00:00:00 2001 From: Jon Sanders Date: Wed, 6 Dec 2017 14:56:32 -0800 Subject: [PATCH 2/3] Adding method and tests to convert chimera checked seqs to upper --- deblur/test/test_workflow.py | 27 +++++++++++++++++++++++++-- deblur/workflow.py | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/deblur/test/test_workflow.py b/deblur/test/test_workflow.py index 7699272..f87f21e 100644 --- a/deblur/test/test_workflow.py +++ b/deblur/test/test_workflow.py @@ -8,7 +8,7 @@ from unittest import TestCase, main from shutil import rmtree -from tempfile import mkdtemp +from tempfile import mkdtemp, NamedTemporaryFile from os import listdir, remove from types import GeneratorType from os.path import join, isfile, abspath, dirname, splitext @@ -34,7 +34,8 @@ remove_artifacts_from_biom_table, _get_fastq_variant, filter_minreads_samples_from_table, - fasta_from_biom) + fasta_from_biom, + upper_fasta) from deblur.deblurring import get_default_error_profile @@ -213,6 +214,28 @@ def test_dereplicate_seqs_remove_singletons(self): self.assertEqual(act, exp) + def test_upper_fasta(self): + fasta = (">Seq1\nAAGtttcA\n" + ">Seq2\nAAAAGCcA\n" + ">Seq3\nAAGTGCAA\n") + fasta_exp = (">Seq1\nAAGTTTCA\n" + ">Seq2\nAAAAGCCA\n" + ">Seq3\nAAGTGCAA\n") + + in_fa = join(self.working_dir, "seqs_lower.fasta") + + with open(in_fa, 'w') as f: + f.write(fasta) + + upper_fasta(in_fa) + + with open(in_fa) as f: + fasta_out = f.read() + + remove(in_fa) + + self.assertEqual(fasta_exp, fasta_out) + def test_dereplicate_seqs(self): """ Test dereplicate_seqs() method functionality, keep singletons diff --git a/deblur/workflow.py b/deblur/workflow.py index 0f7d55a..0f2a943 100644 --- a/deblur/workflow.py +++ b/deblur/workflow.py @@ -19,6 +19,8 @@ import warnings import io import os +import tempfile +import shutil import skbio from biom.table import Table @@ -567,9 +569,25 @@ def remove_chimeras_denovo_from_seqs(seqs_fp, working_dir, threads=1): logger.error('problem with chimera removal for file %s' % seqs_fp) logger.debug('stdout : %s' % sout) logger.debug('stderr : %s' % serr) + + upper_fasta(output_fp) + return output_fp +def upper_fasta(fp): + with open(fp) as f: + o_f = tempfile.NamedTemporaryFile(delete=False) + with open(o_f.name, 'w') as o: + for line in f: + if line.startswith('>'): + o.write(line) + else: + o.write(line.upper()) + shutil.copy2(o.name, fp) + os.remove(o_f.name) + + def sample_id_from_read_id(readid): """Get SampleID from the split_libraries_fastq.py output fasta file read header From 6b92659d8297d9bfc844bf6b3633f6e1aaa32e3e Mon Sep 17 00:00:00 2001 From: Jon Sanders Date: Wed, 6 Dec 2017 14:57:10 -0800 Subject: [PATCH 3/3] added __init__ for nose testing --- deblur/test/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 deblur/test/__init__.py diff --git a/deblur/test/__init__.py b/deblur/test/__init__.py new file mode 100644 index 0000000..e69de29