From 54993d8ebb75087094ff9ae994833e7a3951d4d0 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 12 Aug 2024 22:27:24 -0700 Subject: [PATCH 001/103] updated fevicon --- mkdocs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 4393839..a8f3f20 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -36,7 +36,7 @@ theme: icon: material/brightness-4 name: Switch to light mode - # favicon: images/icon.svg + favicon: images/icon.svg logo: images/icon.svg icon: @@ -80,4 +80,4 @@ nav: extra_javascript: - javascripts/mathjax.js - https://polyfill.io/v3/polyfill.min.js?features=es6 - - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js \ No newline at end of file + - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js From 18e1fe00ea07f77620e9187dab0d91f253118d4b Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Tue, 27 Aug 2024 10:05:13 -0700 Subject: [PATCH 002/103] moving to capnp --- CMakeLists.txt | 56 +++-- src/chaining.cpp | 22 +- src/panman.cpp | 512 ++++++++++++++++++++++++++------------------ src/panman.hpp | 226 +++++++++++-------- src/panmanUtils.cpp | 18 +- src/panmanUtils.hpp | 2 +- src/subnet.cpp | 5 +- 7 files changed, 505 insertions(+), 336 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ffd014..524f3e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,17 +5,30 @@ project(panmanUtils) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options") set(CMAKE_INCLUDE_CURRENT_DIR ON) -if(DEFINED Protobuf_PATH) - find_package(Protobuf REQUIRED HINTS ${Protobuf_PATH}) +# if(DEFINED Protobuf_PATH) +# find_package(Protobuf REQUIRED HINTS ${Protobuf_PATH}) +# else() +# find_package(Protobuf REQUIRED) +# endif() + +# Print version messages +# if(Protobuf_FOUND) +# message(STATUS "Using Protocol Buffers ${Protobuf_VERSION}") +# endif() +# include_directories(${Protobuf_INCLUDE_DIRS}) + +# Adding capn proto +if(DEFINED CapnProto_PATH) + find_package(CapnProto REQUIRED HINTS ${CapnProto_PATH}) else() - find_package(Protobuf REQUIRED) + find_package(CapnProto REQUIRED) endif() -# Print version messages -if(Protobuf_FOUND) - message(STATUS "Using Protocol Buffers ${Protobuf_VERSION}") +if(CapnProto_FOUND) + message(STATUS "Using Capn Proto ${CapnProto_VERSION}") endif() -include_directories(${Protobuf_INCLUDE_DIRS}) +include_directories(${CAPNP_INCLUDE_DIRS}) +add_definitions(${CAPNP_DEFINITIONS}) # Include TBB include(${TBB_DIR}/cmake/TBBBuild.cmake) @@ -33,31 +46,40 @@ find_package(jsoncpp CONFIG REQUIRED) file(GLOB PANMAT_SRCS "src/panmanUtils.cpp" "src/panman.cpp" "src/panmanUtils.hpp" "src/panman.hpp") -if(DEFINED Protobuf_PATH) +if(DEFINED CapnProto_PATH) add_executable(panmanUtils ${PANMAT_SRCS} ) - protobuf_generate( + # protobuf_generate( + # LANGUAGE cpp + # TARGET panmanUtils + # PROTOS panman.proto) + + capnp_generate( LANGUAGE cpp - TARGET panmanUtils - PROTOS panman.proto) + TARGET panmanUtils + PROTOS panman.capnp) else() - protobuf_generate_cpp( - PROTO_SRCS PROTO_HDRS - panman.proto) + # protobuf_generate_cpp( + # PROTO_SRCS PROTO_HDRS + # panman.proto) + + capnp_generate_cpp( + CAPNP_SRCS CAPNP_HDRS + panman.capnp) add_executable(panmanUtils ${PANMAT_SRCS} - ${PROTO_SRCS} - ${PROTO_HDRS} + ${CAPNP_SRCS} + ${CAPNP_HDRS} ) endif() TARGET_COMPILE_OPTIONS(panmanUtils PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) -TARGET_LINK_LIBRARIES(panmanUtils PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${Protobuf_LIBRARIES}) #${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) +TARGET_LINK_LIBRARIES(panmanUtils PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) target_include_directories(panmanUtils PUBLIC "${PROJECT_BINARY_DIR}") diff --git a/src/chaining.cpp b/src/chaining.cpp index 6f5bb9d..5e32a04 100644 --- a/src/chaining.cpp +++ b/src/chaining.cpp @@ -35,16 +35,16 @@ struct hashPair { } }; // Structure to represent a node in the range tree -struct Node { +struct NodeRangeTree { std::pair point; - Node* left; - Node* right; - Node* parent; + NodeRangeTree* left; + NodeRangeTree* right; + NodeRangeTree* parent; int score; }; -Node* createNode(std::pair point) { - Node* newNode = new Node; +NodeRangeTree* createNode(std::pair point) { + NodeRangeTree* newNode = new NodeRangeTree; newNode->point = point; newNode->left = newNode->right = nullptr; return newNode; @@ -67,14 +67,14 @@ bool compareY(const std::pair& a, const std::pair& b) { } -Node* constructRangeTree(tbb::concurrent_vector>& points, int start, int end) { +NodeRangeTree* constructRangeTree(tbb::concurrent_vector>& points, int start, int end) { if (start > end) return nullptr; sort(points.begin() + start, points.begin() + end + 1, compareX); int mid = (start + end) / 2; - Node* root = createNode(points[mid]); + NodeRangeTree* root = createNode(points[mid]); root->left = constructRangeTree(points, start, mid - 1); root->right = constructRangeTree(points, mid + 1, end); @@ -85,7 +85,7 @@ Node* constructRangeTree(tbb::concurrent_vector>& points, int // Function to perform range query on the 2D range tree -void queryRange(Node* root, std::pair rangeStart, std::pair rangeEnd, vector>& result) { +void queryRange(NodeRangeTree* root, std::pair rangeStart, std::pair rangeEnd, vector>& result) { if (root == nullptr) return; @@ -101,7 +101,7 @@ void queryRange(Node* root, std::pair rangeStart, std::pair ra queryRange(root->right, rangeStart, rangeEnd, result); } -void find_chain(Node* root, std::pair point, std::unordered_map, std::pair>,hashPair>&map, int K, pair &curr_base,pair &max_score_point) { +void find_chain(NodeRangeTree* root, std::pair point, std::unordered_map, std::pair>,hashPair>&map, int K, pair &curr_base,pair &max_score_point) { std::vector> result; std::pair new_base ((point.first - K > 0 ? point.first - K: 0), (point.second - K > 0 ? point.second - K: 0)); @@ -186,7 +186,7 @@ std::vector> chaining (std::vector &consensus, s // Constructing the 2D range tree // std::cout << "Range Tree Construction Function "; start = std::chrono::high_resolution_clock::now(); - Node* root = constructRangeTree(points_conc, 0, points_conc.size() - 1); + NodeRangeTree* root = constructRangeTree(points_conc, 0, points_conc.size() - 1); end = std::chrono::high_resolution_clock::now(); time_ = (end-start); // cout << time_.count() << "\n"; diff --git a/src/panman.cpp b/src/panman.cpp index 4e7db65..f10d337 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -17,6 +17,8 @@ #include #include #include +#include + #include "chaining.cpp" #include "rotation.cpp" #include "fitchSankoff.cpp" @@ -32,9 +34,6 @@ #include "panmanUtils.hpp" - - - char panmanUtils::getNucleotideFromCode(int code) { switch(code) { case 1: @@ -380,29 +379,29 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new } void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, - std::vector< panman::node >& nodes) { + std::vector< panman::Node::Reader >& nodes) { std::vector< panmanUtils::NucMut > storedNucMutation; - for(int i = 0; i < nodes[currentIndex].mutations_size(); i++) { - for(auto nucMut: nodes[currentIndex].mutations(i).nucmutation()) { + for (auto nodeMutations: nodes[currentIndex].getMutations()){ + for (auto nucMut: nodeMutations.getNucMutation()){ storedNucMutation.push_back( panmanUtils::NucMut(nucMut, - nodes[currentIndex].mutations(i).blockid(), - nodes[currentIndex].mutations(i).blockgapexist())); + nodeMutations.getBlockId(), + nodeMutations.getBlockGapExist())); } } std::vector< panmanUtils::BlockMut > storedBlockMutation; - for(int i = 0; i < nodes[currentIndex].mutations_size(); i++) { + for (auto nodeMutations: nodes[currentIndex].getMutations()){ panmanUtils::BlockMut tempBlockMut; - if(nodes[currentIndex].mutations(i).blockmutexist()) { - tempBlockMut.loadFromProtobuf(nodes[currentIndex].mutations(i)); + if (nodeMutations.getBlockMutExist()){ + tempBlockMut.loadFromProtobuf(nodeMutations); storedBlockMutation.push_back(tempBlockMut); } } - for(int i = 0; i < nodes[currentIndex].annotations_size(); i++) { - root->annotations.push_back(nodes[currentIndex].annotations(i)); - annotationsToNodes[nodes[currentIndex].annotations(i)].push_back(root->identifier); + for (auto nodeAnnotations: nodes[currentIndex].getAnnotations()){ + root->annotations.push_back(nodeAnnotations); + annotationsToNodes[nodeAnnotations].push_back(root->identifier); } root->nucMutation = storedNucMutation; @@ -412,12 +411,9 @@ void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, currentIndex++; assignMutationsToNodes(child, currentIndex, nodes); } - } - - bool panmanUtils::Tree::hasPolytomy(Node* node) { if(node->children.size() > 2) { return true; @@ -1031,14 +1027,20 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } blocks.emplace_back(0, consensusSeq); root->blockMutation.emplace_back(0, std::make_pair(BlockMutationType::BI, false)); - - tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int,int,int,int,int > > > nonGapMutations; + // pos, start, end + tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int8_t,int8_t > > > nonGapMutationsMSA; std::unordered_map< std::string, std::mutex > nodeMutexes; + std::unordered_map< size_t, std::mutex > posMutexes; for(auto u: allNodes) { nodeMutexes[u.first]; } + for (auto i=0; i states; std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; @@ -1054,18 +1056,26 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE nucFitchAssignMutations(root, states, mutations, (1 << getCodeFromNucleotide(consensusSeq[i]))); for(auto mutation: mutations) { nodeMutexes[mutation.first].lock(); - nonGapMutations[mutation.first].push_back(std::make_tuple(0, -1, i, -1, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); + nonGapMutationsMSA[mutation.first].push_back(std::make_tuple(i, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); nodeMutexes[mutation.first].unlock(); } + // posMutexes[i].lock(); + // std::cout << positionCount++ << std::endl; + // posMutexes[i].unlock(); }); - tbb::parallel_for_each(nonGapMutations, [&](auto& u) { + std::cout << "Mutations computed" << std::endl; + sequenceIdsToSequences.clear(); // saving memory + std::cout << "MSA deleted for saving memory" << std::endl; + + std::cout << "Building PanMAN..."; + tbb::parallel_for_each(nonGapMutationsMSA, [&](auto& u) { nodeMutexes[u.first].lock(); std::sort(u.second.begin(), u.second.end()); nodeMutexes[u.first].unlock(); size_t currentStart = 0; for(size_t i = 1; i < u.second.size(); i++) { - if(i - currentStart == 6 || std::get<0>(u.second[i]) != std::get<0>(u.second[i-1]) || std::get<2>(u.second[i]) != std::get<2>(u.second[i-1])+1 || std::get<4>(u.second[i]) != std::get<4>(u.second[i-1])) { + if(i - currentStart == 6 || std::get<0>(u.second[i]) != std::get<0>(u.second[i-1])+1 || std::get<1>(u.second[i]) != std::get<1>(u.second[i-1])) { nodeMutexes[u.first].lock(); allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, i); nodeMutexes[u.first].unlock(); @@ -1077,37 +1087,41 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, u.second.size()); nodeMutexes[u.first].unlock(); }); + std::cout << "Finished" << std::endl; } } -void panmanUtils::Tree::protoMATToTree(const panman::tree& mainTree) { +void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { // Create tree - root = createTreeFromNewickString(mainTree.newick()); - + root = createTreeFromNewickString(mainTree.getNewick().cStr()); + std::cout << root << std::endl; std::map< std::pair, std::vector< uint32_t > > blockIdToConsensusSeq; - for(int i = 0; i < mainTree.consensusseqmap_size(); i++) { + for (auto consensusMapElement: mainTree.getConsensusSeqMap()){ std::vector< uint32_t > seq; - for(int j = 0; j < mainTree.consensusseqmap(i).consensusseq_size(); j++) { - seq.push_back(mainTree.consensusseqmap(i).consensusseq(j)); - } - for(int j = 0; j < mainTree.consensusseqmap(i).blockid_size(); j++) { + for (auto consensusSequenceToBlockIds: consensusMapElement.getConsensusSeq()){ + seq.push_back(consensusSequenceToBlockIds); + } + + auto blockIdList = consensusMapElement.getBlockId(); + auto blockGapExistList = consensusMapElement.getBlockGapExist(); + for (auto j=0;j blockId; - blockId.first = (mainTree.consensusseqmap(i).blockid(j) >> 32); - if(mainTree.consensusseqmap(i).blockgapexist(j)) { - blockId.second = (mainTree.consensusseqmap(i).blockid(j) & 0xFFFFFFFF); + blockId.first = (blockIdList[j] >> 32); + if(blockGapExistList[j]) { + blockId.second = (blockIdList[j] & 0xFFFFFFFF); } else { blockId.second = -1; } blockIdToConsensusSeq[blockId] = seq; - } + } } - std::vector< panman::node > storedNodes; - for(int i = 0; i < mainTree.nodes_size(); i++) { - storedNodes.push_back(mainTree.nodes(i)); + std::vector< panman::Node::Reader> storedNodes; + for (auto nodesFromTree: mainTree.getNodes()){ + storedNodes.push_back(nodesFromTree); } size_t initialIndex = 0; @@ -1120,54 +1134,56 @@ void panmanUtils::Tree::protoMATToTree(const panman::tree& mainTree) { } // Gap List - for(int i = 0; i < mainTree.gaps_size(); i++) { + for (auto gapsFromTree: mainTree.getGaps()){ panmanUtils::GapList tempGaps; - tempGaps.primaryBlockId = (mainTree.gaps(i).blockid() >> 32); - tempGaps.secondaryBlockId = (mainTree.gaps(i).blockgapexist() ? (mainTree.gaps(i).blockid() & 0xFFFF): -1); - for(int j = 0; j < mainTree.gaps(i).nucposition_size(); j++) { - tempGaps.nucPosition.push_back(mainTree.gaps(i).nucposition(j)); - tempGaps.nucGapLength.push_back(mainTree.gaps(i).nucgaplength(j)); + tempGaps.primaryBlockId = (gapsFromTree.getBlockId() >> 32); + tempGaps.secondaryBlockId = (gapsFromTree.getBlockGapExist() ? (gapsFromTree.getBlockId() & 0xFFFF): -1); + for (auto j=0; gapsFromTree.getNucPosition().size(); j++){ + tempGaps.nucPosition.push_back(gapsFromTree.getNucPosition()[j]); + tempGaps.nucGapLength.push_back(gapsFromTree.getNucGapLength()[j]); } gaps.push_back(tempGaps); } + // Circular offsets - for(int i = 0; i < mainTree.circularsequences_size(); i++) { - circularSequences[mainTree.circularsequences(i).sequenceid()] = mainTree.circularsequences(i).offset(); + for(auto circularSeqFromTree: mainTree.getCircularSequences()) { + circularSequences[circularSeqFromTree.getSequenceId()] = circularSeqFromTree.getOffset(); } // Rotation Indexes - for(int i = 0; i < mainTree.rotationindexes_size(); i++) { - rotationIndexes[mainTree.rotationindexes(i).sequenceid()] = mainTree - .rotationindexes(i).blockoffset(); + for (auto rotationIndexFromTree: mainTree.getRotationIndexes()){ + rotationIndexes[rotationIndexFromTree.getSequenceId()] = rotationIndexFromTree.getBlockOffset(); } // Sequence inverted - for(int i = 0; i < mainTree.sequencesinverted_size(); i++) { - sequenceInverted[mainTree.sequencesinverted(i).sequenceid()] = mainTree - .sequencesinverted(i).inverted(); + for(auto seqInvertedFromTree: mainTree.getSequencesInverted()){ + sequenceInverted[seqInvertedFromTree.getSequenceId()] = seqInvertedFromTree.getInverted(); } // Block gap list - for(int i = 0; i < mainTree.blockgaps().blockposition_size(); i++) { - blockGaps.blockPosition.push_back(mainTree.blockgaps().blockposition(i)); - blockGaps.blockGapLength.push_back(mainTree.blockgaps().blockgaplength(i)); + for(int i = 0; i < mainTree.getBlockGaps().getBlockPosition().size(); i++) { + blockGaps.blockPosition.push_back(mainTree.getBlockGaps().getBlockPosition()[i]); + blockGaps.blockGapLength.push_back(mainTree.getBlockGaps().getBlockGapLength()[i]); } } -panmanUtils::Tree::Tree(const panman::tree& mainTree) { +panmanUtils::Tree::Tree(const panman::Tree::Reader& mainTree) { protoMATToTree(mainTree); } panmanUtils::Tree::Tree(std::istream& fin, FILE_TYPE ftype) { if(ftype == panmanUtils::FILE_TYPE::PANMAT) { - panman::tree mainTree; - if(!mainTree.ParseFromIstream(&fin)) { - throw std::invalid_argument("Could not read tree from input file."); - } + kj::std::StdInputStream kjInputStream(fin); + capnp::InputStreamMessageReader messageReader(kjInputStream); + panman::Tree::Reader mainTree = messageReader.getRoot(); + // Todo: Check if above statment returns true? + // if(!mainTree.ParseFromIstream(&fin)) { + // throw std::invalid_argument("Could not read tree from input file."); + // } protoMATToTree(mainTree); } } @@ -1891,7 +1907,7 @@ panmanUtils::Node* panmanUtils::Tree::extractPanMATSegmentHelper(panmanUtils::No } -void panmanUtils::Tree::extractPanMATSegment(std::ostream& fout, int64_t start, int64_t end) { +void panmanUtils::Tree::extractPanMATSegment(kj::std::StdOutputStream& fout, int64_t start, int64_t end) { sequence_t rootSequence; blockExists_t rootBlockExists; blockStrand_t rootBlockStrand; @@ -2038,13 +2054,18 @@ void panmanUtils::Tree::extractPanMATSegment(std::ostream& fout, int64_t start, } } - panman::tree treeToWrite; - getNodesPreorder(newRoot, treeToWrite); + capnp::MallocMessageBuilder message; + panman::Tree::Builder treeToWrite = message.initRoot(); + + capnp::List::Builder nodesBuilder = treeToWrite.initNodes(allNodes.size()); + size_t nodeIndex=0; + getNodesPreorder(newRoot, nodesBuilder, nodeIndex); + assert(nodeIndex==allNodes.size()); std::string newick = getNewickString(newRoot); std::string newick2 = getNewickString(root); - treeToWrite.set_newick(newick); + treeToWrite.setNewick(newick); std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t, bool > > > consensusSeqToBlockIds; @@ -2062,59 +2083,72 @@ void panmanUtils::Tree::extractPanMATSegment(std::ostream& fout, int64_t start, std::make_pair(blockId, blockGapExists)); } + ::capnp::List::Builder consensusSeqMapBuilder = treeToWrite.initConsensusSeqMap(consensusSeqToBlockIds.size()); + int consensusSeqMapBuilderCount = 0; for(auto u: consensusSeqToBlockIds) { - panman::consensusSeqToBlockIds c; - for(auto v: u.first) { - c.add_consensusseq(v); + panman::ConsensusSeqToBlockIds::Builder c = consensusSeqMapBuilder[consensusSeqMapBuilderCount]; + + ::capnp::List::Builder blockIdBuilder = c.initBlockId(u.first.size()); + ::capnp::List::Builder conSeqBuilder = c.initConsensusSeq(u.first.size()); + ::capnp::List::Builder blockGapExistBuilder = c.initBlockGapExist(u.first.size()); + + for(auto v=0; v::Builder gapsBuilder = treeToWrite.initGaps(newGaps.size()); for(size_t i = 0; i < newGaps.size(); i++) { - panman::gapList gl; + panman::GapList::Builder gl = gapsBuilder[i]; + + ::capnp::List::Builder nucGapLengthBuilder = gl.initNucGapLength(newGaps[i].nucPosition.size()); + ::capnp::List::Builder nucPositionBuilder = gl.initNucPosition(newGaps[i].nucPosition.size()); + for(size_t j = 0; j < newGaps[i].nucPosition.size(); j++) { - gl.add_nucposition(newGaps[i].nucPosition[j]); - gl.add_nucgaplength(newGaps[i].nucGapLength[j]); + nucPositionBuilder.set(j, newGaps[i].nucPosition[j]); + nucGapLengthBuilder.set(j,newGaps[i].nucGapLength[j]); } - gl.set_blockid(((int64_t)newGaps[i].primaryBlockId << 32)); - gl.set_blockgapexist(false); - treeToWrite.add_gaps(); - *treeToWrite.mutable_gaps( treeToWrite.gaps_size() - 1 ) = gl; - } - - if (!treeToWrite.SerializeToOstream(&fout)) { - std::cerr << "Failed to write to output file." << std::endl; + gl.setBlockId(((int64_t)newGaps[i].primaryBlockId << 32)); + gl.setBlockGapExist(false); } - + // if (!treeToWrite.SerializeToOstream(&fout)) { + // std::cerr << "Failed to write to output file." << std::endl; + // } + ::capnp::writeMessage(fout, message); } -void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, panman::tree& treeToWrite) { - - panman::node n; - std::map< std::pair< int32_t, int32_t >, std::pair< std::vector< panman::nucMut >, int > > blockToMutations; +void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder& nodesBuilder, size_t& nodeIndex) { + panman::Node::Builder n = nodesBuilder[nodeIndex++]; + std::map< std::pair< int32_t, int32_t >, std::pair< std::vector< panman::NucMut::Builder >, int > > blockToMutations; std::map< std::pair< int32_t, int32_t >, bool > blockToInversion; + capnp::MallocMessageBuilder message; + for(size_t i = 0; i < root->nucMutation.size(); i++) { const panmanUtils::NucMut& mutation = root->nucMutation[i]; - panman::nucMut nm; - nm.set_nucposition(mutation.nucPosition); + panman::NucMut::Builder nm = message.initRoot(); + nm.setNucPosition(mutation.nucPosition); if(mutation.nucGapPosition != -1) { - nm.set_nucgapposition(mutation.nucGapPosition); - nm.set_nucgapexist(true); + nm.setNucGapPosition(mutation.nucGapPosition); + nm.setNucGapExist(true); } else { - nm.set_nucgapexist(false); + nm.setNucGapExist(false); } - nm.set_mutinfo((((mutation.nucs) >> (24 - (mutation.mutInfo >> 4)*4)) << 8) + mutation.mutInfo); + + nm.setMutInfo((((mutation.nucs) >> (24 - (mutation.mutInfo >> 4)*4)) << 8) + mutation.mutInfo); blockToMutations[std::make_pair(mutation.primaryBlockId, mutation.secondaryBlockId)].first.push_back(nm); blockToMutations[std::make_pair(mutation.primaryBlockId, mutation.secondaryBlockId)].second = 2; + } for(size_t i = 0; i < root->blockMutation.size(); i++) { @@ -2123,47 +2157,47 @@ void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, panman::tree& blockToInversion[std::make_pair(mutation.primaryBlockId, mutation.secondaryBlockId)] = mutation.inversion; } + ::capnp::List::Builder mutationsBuilder = n.initMutations(blockToMutations.size()); + size_t blockToMutationsCount=0; for(auto u: blockToMutations) { - panman::mutation mutation; - mutation.set_blockmutexist((u.second.second != 2)); - mutation.set_blockmutinfo(u.second.second); + panman::Mutation::Builder mutation = mutationsBuilder[blockToMutationsCount++]; + mutation.setBlockMutExist((u.second.second != 2)); + mutation.setBlockMutInfo(u.second.second); if(u.second.second != 2) { // block mutation exists - mutation.set_blockinversion(blockToInversion[u.first]); + mutation.setBlockInversion(blockToInversion[u.first]); } else { - mutation.set_blockinversion(true); + mutation.setBlockInversion(true); } int32_t primaryBlockId = u.first.first; int32_t secondaryBlockId = u.first.second; if(secondaryBlockId != -1) { - mutation.set_blockid(((int64_t)primaryBlockId << 32) + secondaryBlockId); - mutation.set_blockgapexist(true); + mutation.setBlockId(((int64_t)primaryBlockId << 32) + secondaryBlockId); + mutation.setBlockGapExist(true); } else { - mutation.set_blockid(((int64_t)primaryBlockId << 32)); - mutation.set_blockgapexist(false); + mutation.setBlockId(((int64_t)primaryBlockId << 32)); + mutation.setBlockGapExist(false); } - for(auto v: u.second.first) { - mutation.add_nucmutation(); - *mutation.mutable_nucmutation(mutation.nucmutation_size() - 1) = v; + + ::capnp::List::Builder nucMutationBuilder = mutation.initNucMutation(u.second.first.size()); + for(auto i=0; i::Builder annotationsBuilder = n.initAnnotations(root->annotations.size()); for(size_t i = 0; i < root->annotations.size(); i++) { - n.add_annotations(root->annotations[i]); + annotationsBuilder.set(i,root->annotations[i]); } - treeToWrite.add_nodes(); - *treeToWrite.mutable_nodes( treeToWrite.nodes_size() - 1 ) = n; - for(auto child: root->children) { - getNodesPreorder(child, treeToWrite); + getNodesPreorder(child, nodesBuilder, nodeIndex); } } -void getNodesRootedAt(std::set< std::string >& nodeIds, panmanUtils::Node* node) { +void getNodesRootedAt(std::set& nodeIds, panmanUtils::Node* node) { if(node == nullptr) { return; } @@ -2175,7 +2209,7 @@ void getNodesRootedAt(std::set< std::string >& nodeIds, panmanUtils::Node* node) } // Write PanMAT to file -void panmanUtils::Tree::writeToFile(std::ostream& fout, panmanUtils::Node* node) { +void panmanUtils::Tree::writeToFile(kj::std::StdOutputStream& fout, panmanUtils::Node* node) { if(node == nullptr) { node = root; } @@ -2184,15 +2218,19 @@ void panmanUtils::Tree::writeToFile(std::ostream& fout, panmanUtils::Node* node) std::set< std::string > nodeIds; getNodesRootedAt(nodeIds, node); - panman::tree treeToWrite; - getNodesPreorder(node, treeToWrite); + capnp::MallocMessageBuilder message; + panman::Tree::Builder treeToWrite = message.initRoot(); + + capnp::List::Builder nodesBuilder = treeToWrite.initNodes(allNodes.size()); + size_t nodeIndex=0; + getNodesPreorder(node, nodesBuilder, nodeIndex); + assert(nodeIndex==allNodes.size()); std::string newick = getNewickString(node); - treeToWrite.set_newick(newick); + treeToWrite.setNewick(newick); - std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t, bool > > > - consensusSeqToBlockIds; + std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t, bool > > > consensusSeqToBlockIds; for(auto block: blocks) { int64_t blockId; @@ -2207,78 +2245,93 @@ void panmanUtils::Tree::writeToFile(std::ostream& fout, panmanUtils::Node* node) std::make_pair(blockId, blockGapExists)); } + ::capnp::List::Builder consensusSeqMapBuilder = treeToWrite.initConsensusSeqMap(consensusSeqToBlockIds.size()); + int consensusSeqMapBuilderCount = 0; for(auto u: consensusSeqToBlockIds) { - panman::consensusSeqToBlockIds c; - for(auto v: u.first) { - c.add_consensusseq(v); + panman::ConsensusSeqToBlockIds::Builder c = consensusSeqMapBuilder[consensusSeqMapBuilderCount]; + + ::capnp::List::Builder blockIdBuilder = c.initBlockId(u.first.size()); + ::capnp::List::Builder conSeqBuilder = c.initConsensusSeq(u.first.size()); + ::capnp::List::Builder blockGapExistBuilder = c.initBlockGapExist(u.first.size()); + + for(auto v=0; v::Builder gapsBuilder = treeToWrite.initGaps(gaps.size()); for(size_t i = 0; i < gaps.size(); i++) { - panman::gapList gl; + panman::GapList::Builder gl = gapsBuilder[i]; + + ::capnp::List::Builder nucGapLengthBuilder = gl.initNucGapLength(gaps[i].nucPosition.size()); + ::capnp::List::Builder nucPositionBuilder = gl.initNucPosition(gaps[i].nucPosition.size()); + for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { - gl.add_nucposition(gaps[i].nucPosition[j]); - gl.add_nucgaplength(gaps[i].nucGapLength[j]); + nucPositionBuilder.set(j, gaps[i].nucPosition[j]); + nucGapLengthBuilder.set(j,gaps[i].nucGapLength[j]); } - if(gaps[i].secondaryBlockId != -1) { - gl.set_blockid(((int64_t)gaps[i].primaryBlockId << 32) + gaps[i].secondaryBlockId); - gl.set_blockgapexist(true); + if (gaps[i].secondaryBlockId != -1) { + gl.setBlockId(((int64_t)gaps[i].primaryBlockId << 32) + gaps[i].secondaryBlockId); + gl.setBlockGapExist(true); } else { - gl.set_blockid(((int64_t)gaps[i].primaryBlockId << 32)); - gl.set_blockgapexist(false); + gl.setBlockId(((int64_t)gaps[i].primaryBlockId << 32)); + gl.setBlockGapExist(false); } - treeToWrite.add_gaps(); - *treeToWrite.mutable_gaps( treeToWrite.gaps_size() - 1 ) = gl; + } + ::capnp::List::Builder circularSeqBuilder = treeToWrite.initCircularSequences(circularSequences.size()); + size_t circularSequencesCount = 0; for(auto u: circularSequences) { // Check if sequence is a part of the subtree being written if(nodeIds.find(u.first) == nodeIds.end()) { continue; } - - panman::circularOffset co; - co.set_sequenceid(u.first); - co.set_offset(u.second); - treeToWrite.add_circularsequences(); - *treeToWrite.mutable_circularsequences(treeToWrite.circularsequences_size()-1) = co; + panman::CircularOffset::Builder co = circularSeqBuilder[circularSequencesCount++]; + co.setSequenceId(u.first); + co.setOffset(u.second); } + assert(circularSequencesCount==circularSequences.size()); + ::capnp::List::Builder rotationIndexesBuilder = treeToWrite.initRotationIndexes(rotationIndexes.size()); + size_t rotationIndexesCount = 0; for(auto u: rotationIndexes) { // Check if sequence is a part of the subtree being written if(nodeIds.find(u.first) == nodeIds.end()) { continue; } - panman::rotationIndex ri; - ri.set_sequenceid(u.first); - ri.set_blockoffset(u.second); - treeToWrite.add_rotationindexes(); - *treeToWrite.mutable_rotationindexes(treeToWrite.rotationindexes_size()-1) = ri; + panman::RotationIndex::Builder ri = rotationIndexesBuilder[rotationIndexesCount++]; + ri.setSequenceId(u.first); + ri.setBlockOffset(u.second); } + assert(rotationIndexesCount==rotationIndexes.size()); + ::capnp::List::Builder sequenceInvertedBuilder = treeToWrite.initSequencesInverted(sequenceInverted.size()); + size_t sequenceInvertedCount = 0; for(auto u: sequenceInverted) { // Check if sequence is a part of the subtree being written if(nodeIds.find(u.first) == nodeIds.end()) { continue; } - panman::sequenceInverted si; - si.set_sequenceid(u.first); - si.set_inverted(u.second); - treeToWrite.add_sequencesinverted(); - *treeToWrite.mutable_sequencesinverted(treeToWrite.sequencesinverted_size()-1) = si; + panman::SequenceInverted::Builder si = sequenceInvertedBuilder[sequenceInvertedCount++]; + si.setSequenceId(u.first); + si.setInverted(u.second); } + assert(sequenceInvertedCount == sequenceInverted.size()); - if (!treeToWrite.SerializeToOstream(&fout)) { - std::cerr << "Failed to write to output file." << std::endl; - } + // Todo:: check if write was successful + ::capnp::writeMessage(fout, message); + // if (!treeToWrite.SerializeToOstream(&fout)) { + // std::cerr << "Failed to write to output file." << std::endl; + // } } void panmanUtils::Tree::getBlockSequenceFromReference(block_t& sequence, bool& blockExists, bool& blockStrand, std::string reference, int64_t primaryBlockId, int64_t secondaryBlockId) { @@ -5229,17 +5282,21 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< std::ifstream >& treeFiles, std:: } panmanUtils::TreeGroup::TreeGroup(std::istream& fin) { - panman::treeGroup TG; + kj::std::StdInputStream kjInputStream(fin); + capnp::InputStreamMessageReader messageReader(kjInputStream); - if(!TG.ParseFromIstream(&fin)) { - throw std::invalid_argument("Could not read tree group from input file."); - } + std::cout << "About to start reading root.." << std::endl; - for(int i = 0; i < TG.trees_size(); i++) { - trees.emplace_back(TG.trees(i)); + panman::TreeGroup::Reader TG = messageReader.getRoot(); + + std::cout << "Ending reading root.." << std::endl; + + for (auto treeFromTG: TG.getTrees()){ + trees.emplace_back(treeFromTG); } - for(int i = 0; i < TG.complexmutations_size(); i++) { - complexMutations.emplace_back(TG.complexmutations(i)); + + for (auto compMutFromTG: TG.getComplexMutations()){ + complexMutations.emplace_back(compMutFromTG); } } @@ -5249,17 +5306,30 @@ void panmanUtils::TreeGroup::printFASTA(std::ofstream& fout) { } } -void panmanUtils::TreeGroup::writeToFile(std::ostream& fout) { - panman::treeGroup treeGroupToWrite; +void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { + capnp::MallocMessageBuilder message; + panman::TreeGroup::Builder treeGroupToWrite = message.initRoot(); + + capnp::List::Builder treestoWriteBuilder = treeGroupToWrite.initTrees(trees.size()); + size_t treesCount = 0; + std::cout << "Writing Trees..." << std::endl; for(auto& tree: trees) { - panman::tree treeToWrite; + std::cout << "Tree Count:" << treesCount << "..." << std::endl; + panman::Tree::Builder treeToWrite = treestoWriteBuilder[treesCount++]; Node* node = tree.root; - tree.getNodesPreorder(node, treeToWrite); + capnp::List::Builder nodesBuilder = treeToWrite.initNodes(tree.allNodes.size()); + size_t nodeIndex=0; + + std::cout << "Writing Nodes..." << std::endl; + tree.getNodesPreorder(node, nodesBuilder, nodeIndex); + assert(nodeIndex == tree.allNodes.size()); + std::string newick = tree.getNewickString(node); - treeToWrite.set_newick(newick); + std::cout << newick << std::endl; + treeToWrite.setNewick(newick); std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t, bool > > > consensusSeqToBlockIds; @@ -5276,72 +5346,90 @@ void panmanUtils::TreeGroup::writeToFile(std::ostream& fout) { std::make_pair(blockId, blockGapExists)); } + + ::capnp::List::Builder consensusSeqMapBuilder = treeToWrite.initConsensusSeqMap(consensusSeqToBlockIds.size()); + int consensusSeqMapBuilderCount = 0; for(auto u: consensusSeqToBlockIds) { - panman::consensusSeqToBlockIds c; - for(auto v: u.first) { - c.add_consensusseq(v); + panman::ConsensusSeqToBlockIds::Builder c = consensusSeqMapBuilder[consensusSeqMapBuilderCount]; + + ::capnp::List::Builder blockIdBuilder = c.initBlockId(u.first.size()); + ::capnp::List::Builder conSeqBuilder = c.initConsensusSeq(u.first.size()); + ::capnp::List::Builder blockGapExistBuilder = c.initBlockGapExist(u.first.size()); + + for(auto v=0; v::Builder gapsBuilder = treeToWrite.initGaps(tree.gaps.size()); for(size_t i = 0; i < tree.gaps.size(); i++) { - panman::gapList gl; + panman::GapList::Builder gl = gapsBuilder[i]; + + ::capnp::List::Builder nucGapLengthBuilder = gl.initNucGapLength(tree.gaps[i].nucPosition.size()); + ::capnp::List::Builder nucPositionBuilder = gl.initNucPosition(tree.gaps[i].nucPosition.size()); + for(size_t j = 0; j < tree.gaps[i].nucPosition.size(); j++) { - gl.add_nucposition(tree.gaps[i].nucPosition[j]); - gl.add_nucgaplength(tree.gaps[i].nucGapLength[j]); + nucPositionBuilder.set(j, tree.gaps[i].nucPosition[j]); + nucGapLengthBuilder.set(j,tree.gaps[i].nucGapLength[j]); } - if(tree.gaps[i].secondaryBlockId != -1) { - gl.set_blockid(((int64_t)tree.gaps[i].primaryBlockId << 32) + tree.gaps[i] - .secondaryBlockId); - gl.set_blockgapexist(true); + if (tree.gaps[i].secondaryBlockId != -1) { + gl.setBlockId(((int64_t)tree.gaps[i].primaryBlockId << 32) + tree.gaps[i].secondaryBlockId); + gl.setBlockGapExist(true); } else { - gl.set_blockid(((int64_t)tree.gaps[i].primaryBlockId << 32)); - gl.set_blockgapexist(false); + gl.setBlockId(((int64_t)tree.gaps[i].primaryBlockId << 32)); + gl.setBlockGapExist(false); } - treeToWrite.add_gaps(); - *treeToWrite.mutable_gaps( treeToWrite.gaps_size() - 1 ) = gl; + } + + ::capnp::List::Builder circularSeqBuilder = treeToWrite.initCircularSequences(tree.circularSequences.size()); + size_t circularSequencesCount = 0; for(auto u: tree.circularSequences) { - panman::circularOffset co; - co.set_sequenceid(u.first); - co.set_offset(u.second); - treeToWrite.add_circularsequences(); - *treeToWrite.mutable_circularsequences(treeToWrite.circularsequences_size()-1) = co; + panman::CircularOffset::Builder co = circularSeqBuilder[circularSequencesCount++]; + co.setSequenceId(u.first); + co.setOffset(u.second); } + assert(circularSequencesCount==tree.circularSequences.size()); + ::capnp::List::Builder rotationIndexesBuilder = treeToWrite.initRotationIndexes(tree.rotationIndexes.size()); + size_t rotationIndexesCount = 0; for(auto u: tree.rotationIndexes) { - panman::rotationIndex ri; - ri.set_sequenceid(u.first); - ri.set_blockoffset(u.second); - treeToWrite.add_rotationindexes(); - *treeToWrite.mutable_rotationindexes(treeToWrite.rotationindexes_size()-1) = ri; + panman::RotationIndex::Builder ri = rotationIndexesBuilder[rotationIndexesCount++]; + ri.setSequenceId(u.first); + ri.setBlockOffset(u.second); } + assert(rotationIndexesCount==tree.rotationIndexes.size()); + ::capnp::List::Builder sequenceInvertedBuilder = treeToWrite.initSequencesInverted(tree.sequenceInverted.size()); + size_t sequenceInvertedCount = 0; for(auto u: tree.sequenceInverted) { - panman::sequenceInverted si; - si.set_sequenceid(u.first); - si.set_inverted(u.second); - treeToWrite.add_sequencesinverted(); - *treeToWrite.mutable_sequencesinverted(treeToWrite.sequencesinverted_size()-1) = si; + panman::SequenceInverted::Builder si = sequenceInvertedBuilder[sequenceInvertedCount++]; + si.setSequenceId(u.first); + si.setInverted(u.second); } - - treeGroupToWrite.add_trees(); - *treeGroupToWrite.mutable_trees( treeGroupToWrite.trees_size() - 1 ) = treeToWrite; + assert(sequenceInvertedCount == tree.sequenceInverted.size()); } + + capnp::List::Builder complexMutBuilder = treeGroupToWrite.initComplexMutations(complexMutations.size()); + size_t cmplxMutCount=0; + std::cout << "Writing Complex Mutations..." << std::endl; for(auto cm: complexMutations) { - treeGroupToWrite.add_complexmutations(); - *treeGroupToWrite.mutable_complexmutations(treeGroupToWrite - .complexmutations_size()-1) = cm.toProtobuf(); + std::cout << "Cmplx mutation Count:" << cmplxMutCount << "..." << std::endl; + complexMutBuilder[cmplxMutCount++] = cm.toCapnProto(); } - if(!treeGroupToWrite.SerializeToOstream(&fout)) { - std::cerr << "Failed to write to output file." << std::endl; - } + // ToDo check if the write was successful + ::capnp::writeMessage(fout, message); + // if(!treeGroupToWrite.SerializeToOstream(&fout)) { + // std::cerr << "Failed to write to output file." << std::endl; + // } } void panmanUtils::TreeGroup::printComplexMutations(std::ostream& fout) { diff --git a/src/panman.hpp b/src/panman.hpp index b9f016c..2829838 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -10,9 +10,12 @@ #include #include -#include "panman.pb.h" +#include "panman.capnp.h" #include "common.hpp" +#include +#include +#include namespace panmanUtils { @@ -54,6 +57,17 @@ struct NucMut { uint8_t mutInfo; uint32_t nucs; + // Create SNP mutation for MSA (optimized for memory) + NucMut( const std::tuple< int, int8_t, int8_t>& mutationInfo ) { + // primaryBlockId, secondaryBlockId, pos, gapPos, type, char + primaryBlockId = 0; + secondaryBlockId = -1; + nucPosition = std::get<0>(mutationInfo); + nucGapPosition = -1; + mutInfo = (int)std::get<1>(mutationInfo) + (1 << 4); + nucs = ((int)std::get<2>(mutationInfo) << 20); + } + // Create SNP mutation NucMut( const std::tuple< int, int, int, int, int, int >& mutationInfo ) { // primaryBlockId, secondaryBlockId, pos, gapPos, type, char @@ -65,6 +79,44 @@ struct NucMut { nucs = (std::get<5>(mutationInfo) << 20); } + // Create non-SNP mutations from SNP mutations at consecutive positions for MSA + NucMut(const std::vector< std::tuple< int, int8_t, int8_t > >& mutationArray, + int start, int end) { + primaryBlockId = 0; + secondaryBlockId = -1; + + mutInfo = ((end - start) << 4); + // type + switch(std::get<1>(mutationArray[start])) { + case panmanUtils::NucMutationType::NSNPS: + mutInfo += panmanUtils::NucMutationType::NS; + break; + case panmanUtils::NucMutationType::NSNPI: + mutInfo += panmanUtils::NucMutationType::NI; + break; + case panmanUtils::NucMutationType::NSNPD: + mutInfo += panmanUtils::NucMutationType::ND; + break; + case panmanUtils::NucMutationType::NS: + mutInfo += panmanUtils::NucMutationType::NS; + break; + case panmanUtils::NucMutationType::NI: + mutInfo += panmanUtils::NucMutationType::NI; + break; + case panmanUtils::NucMutationType::ND: + mutInfo += panmanUtils::NucMutationType::ND; + break; + } + + nucPosition = (int)std::get<0>(mutationArray[start]); + nucGapPosition = -1; + + nucs = 0; + for(int i = start; i < end; i++) { + nucs += (std::get<2>(mutationArray[i]) << (4*(5-(i - start)))); + } + } + // Create non-SNP mutations from SNP mutations at consecutive positions NucMut(const std::vector< std::tuple< int, int, int, int, int, int > >& mutationArray, int start, int end) { @@ -104,11 +156,11 @@ struct NucMut { } // Extract mutation from protobuf nucMut object - NucMut(panman::nucMut mutation, int64_t blockId, bool blockGapExist) { - nucPosition = mutation.nucposition(); + NucMut(panman::NucMut::Reader mutation, int64_t blockId, bool blockGapExist) { + nucPosition = mutation.getNucPosition(); primaryBlockId = (blockId >> 32); - mutInfo = (mutation.mutinfo() & 0xFF); - nucs = (mutation.mutinfo() >> 8); + mutInfo = (mutation.getMutInfo() & 0xFF); + nucs = (mutation.getMutInfo() >> 8); nucs = ((nucs) << (24 - (mutInfo >> 4)*4)); if(blockGapExist) { @@ -117,8 +169,8 @@ struct NucMut { secondaryBlockId = -1; } - if(mutation.nucgapexist()) { - nucGapPosition = mutation.nucgapposition(); + if(mutation.getNucGapExist()) { + nucGapPosition = mutation.getNucGapPosition(); } else { nucGapPosition = -1; } @@ -140,17 +192,17 @@ struct BlockMut { // block is inverted or not bool inversion; - void loadFromProtobuf(panman::mutation mutation) { - primaryBlockId = (mutation.blockid() >> 32); - if(mutation.blockgapexist()) { - secondaryBlockId = (mutation.blockid() & 0xFFFFFFFF); + void loadFromProtobuf(panman::Mutation::Reader mutation) { + primaryBlockId = (mutation.getBlockId() >> 32); + if(mutation.getBlockGapExist()) { + secondaryBlockId = (mutation.getBlockId() & 0xFFFFFFFF); } else { secondaryBlockId = -1; } - blockMutInfo = mutation.blockmutinfo(); + blockMutInfo = mutation.getBlockMutInfo(); // Whether the mutation is a block inversion or not. Inversion is marked by // `blockMutInfo = deletion` and `inversion = true` - inversion = mutation.blockinversion(); + inversion = mutation.getBlockInversion(); } BlockMut(size_t blockId, std::pair< BlockMutationType, bool > type, int secondaryBId = -1) { @@ -234,7 +286,7 @@ class Tree { // memory, assign mutations from the proto file to the tree nodes using preorder // traversal void assignMutationsToNodes(Node* root, size_t& currentIndex, - std::vector< panman::node >& nodes); + std::vector< panman::Node::Reader >& nodes); // Get the total number of mutations of given type int getTotalParsimonyParallel(NucMutationType nucMutType, @@ -321,7 +373,7 @@ class Tree { std::unordered_map< std::string, Node* > allNodes; - Tree(const panman::tree& mainTree); + Tree(const panman::Tree::Reader& mainTree); Tree(std::istream& fin, FILE_TYPE ftype = FILE_TYPE::PANMAT); Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE ftype = FILE_TYPE::GFA, std::string reference = ""); @@ -334,7 +386,7 @@ class Tree { std::unordered_map< std::string, bool >& si, const BlockGapList& bgl); - void protoMATToTree(const panman::tree& mainTree); + void protoMATToTree(const panman::Tree::Reader& mainTree); // Fitch Algorithm on Nucleotide mutations int nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states); @@ -410,11 +462,11 @@ class Tree { // are with respect to the root sequence. The strands of the terminal blocks in all // sequences are assumed to be the same as their strands in the root sequence for the // purpose of splitting the terminal blocks during extraction - void extractPanMATSegment(std::ostream& fout, int64_t start, int64_t end); + void extractPanMATSegment(kj::std::StdOutputStream& fout, int64_t start, int64_t end); Node* subtreeExtractParallel(std::vector< std::string > nodeIds, const std::set< std::string >& nodeIdsToDefinitelyInclude = {}); // Node* subtreeExtractParallel(std::vector< std::string > nodeIds); - void writeToFile(std::ostream& fout, Node* node = nullptr); + void writeToFile(kj::std::StdOutputStream& fout, Node* node = nullptr); std::string getNewickString(Node* node); std::string getStringFromReference(std::string reference, bool aligned = true, bool incorporateInversions=true); @@ -453,7 +505,7 @@ class Tree { std::vector< std::string > searchByAnnotation(std::string annotation); void convertToGFA(std::ostream& fout); void printFASTAFromGFA(std::ifstream& fin, std::ofstream& fout); - void getNodesPreorder(panmanUtils::Node* root, panman::tree& treeToWrite); + void getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder& nodesBuilder, size_t& nodeIndex); size_t getGlobalCoordinate(int primaryBlockId, int secondaryBlockId, int nucPosition, int nucGapPosition); @@ -529,102 +581,102 @@ struct ComplexMutation { nucGapPositionEnd2 = std::get<3>(t4); } - ComplexMutation(panman::complexMutation cm) { - mutationType = (cm.mutationtype()? 'H': 'R'); - treeIndex1 = cm.treeindex1(); - treeIndex2 = cm.treeindex2(); - treeIndex3 = cm.treeindex3(); - sequenceId1 = cm.sequenceid1(); - sequenceId2 = cm.sequenceid2(); - sequenceId3 = cm.sequenceid3(); - - primaryBlockIdStart1 = (cm.blockidstart1() >> 32); - secondaryBlockIdStart1 = (cm.blockgapexiststart1()? - (cm.blockidstart1()&(0xFFFFFFFF)): -1); - nucPositionStart1 = cm.nucpositionstart1(); - nucGapPositionStart1 = (cm.nucgapexiststart1()? (cm.nucgappositionstart1()) : -1); - - primaryBlockIdStart2 = (cm.blockidstart2() >> 32); - secondaryBlockIdStart2 = (cm.blockgapexiststart2()? - (cm.blockidstart2()&(0xFFFFFFFF)): -1); - nucPositionStart2 = cm.nucpositionstart2(); - nucGapPositionStart2 = (cm.nucgapexiststart2()? (cm.nucgappositionstart2()) : -1); - - primaryBlockIdEnd1 = (cm.blockidend1() >> 32); - secondaryBlockIdEnd1 = (cm.blockgapexistend1()? (cm.blockidend1()&(0xFFFFFFFF)): -1); - nucPositionEnd1 = cm.nucpositionend1(); - nucGapPositionEnd1 = (cm.nucgapexistend1()? (cm.nucgappositionend1()) : -1); - - primaryBlockIdEnd2 = (cm.blockidend2() >> 32); - secondaryBlockIdEnd2 = (cm.blockgapexistend2()? (cm.blockidend2()&(0xFFFFFFFF)): -1); - nucPositionEnd2 = cm.nucpositionend2(); - nucGapPositionEnd2 = (cm.nucgapexistend2()? (cm.nucgappositionend2()) : -1); + ComplexMutation(panman::ComplexMutation::Reader cm) { + mutationType = (cm.getMutationType()? 'H': 'R'); + treeIndex1 = cm.getTreeIndex1(); + treeIndex2 = cm.getTreeIndex2(); + treeIndex3 = cm.getTreeIndex3(); + sequenceId1 = cm.getSequenceId1(); + sequenceId2 = cm.getSequenceId2(); + sequenceId3 = cm.getSequenceId3(); + + primaryBlockIdStart1 = (cm.getBlockIdStart1() >> 32); + secondaryBlockIdStart1 = (cm.getBlockGapExistEnd1()? + (cm.getBlockIdStart1()&(0xFFFFFFFF)): -1); + nucPositionStart1 = cm.getNucPositionStart1(); + nucGapPositionStart1 = (cm.getNucGapExistStart1()? (cm.getNucGapPositionStart1()) : -1); + + primaryBlockIdStart2 = (cm.getBlockIdStart2() >> 32); + secondaryBlockIdStart2 = (cm.getNucGapExistStart2()? + (cm.getBlockIdStart2()&(0xFFFFFFFF)): -1); + nucPositionStart2 = cm.getNucPositionStart2(); + nucGapPositionStart2 = (cm.getNucGapExistStart2()? (cm.getNucGapPositionStart2()) : -1); + + primaryBlockIdEnd1 = (cm.getBlockIdEnd1() >> 32); + secondaryBlockIdEnd1 = (cm.getBlockGapExistEnd1()? (cm.getBlockIdEnd1()&(0xFFFFFFFF)): -1); + nucPositionEnd1 = cm.getNucPositionEnd1(); + nucGapPositionEnd1 = (cm.getNucGapExistEnd1()? (cm.getNucGapPositionEnd1()) : -1); + + primaryBlockIdEnd2 = (cm.getBlockIdEnd2() >> 32); + secondaryBlockIdEnd2 = (cm.getBlockGapExistEnd2()? (cm.getBlockIdEnd2()&(0xFFFFFFFF)): -1); + nucPositionEnd2 = cm.getNucPositionEnd2(); + nucGapPositionEnd2 = (cm.getNucGapExistEnd2()? (cm.getNucGapPositionEnd2()) : -1); } - panman::complexMutation toProtobuf() { - panman::complexMutation cm; - cm.set_mutationtype(mutationType == 'H'); - cm.set_treeindex1(treeIndex1); - cm.set_treeindex2(treeIndex2); - cm.set_treeindex3(treeIndex3); - cm.set_sequenceid1(sequenceId1); - cm.set_sequenceid2(sequenceId2); - cm.set_sequenceid3(sequenceId3); + panman::ComplexMutation::Builder toCapnProto() { + panman::ComplexMutation::Builder cm(nullptr); + cm.setMutationType(mutationType == 'H'); + cm.setTreeIndex1(treeIndex1); + cm.setTreeIndex2(treeIndex2); + cm.setTreeIndex3(treeIndex3); + cm.setSequenceId1(sequenceId1); + cm.setSequenceId2(sequenceId2); + cm.setSequenceId3(sequenceId3); if(secondaryBlockIdStart1 != -1) { - cm.set_blockgapexiststart1(true); - cm.set_blockidstart1(((int64_t)primaryBlockIdStart1 << 32)+secondaryBlockIdStart1); + cm.setBlockGapExistStart1(true); + cm.setBlockIdStart1(((int64_t)primaryBlockIdStart1 << 32)+secondaryBlockIdStart1); } else { - cm.set_blockgapexiststart1(false); - cm.set_blockidstart1(((int64_t)primaryBlockIdStart1 << 32)); + cm.setBlockGapExistStart1(false); + cm.setBlockIdStart1(((int64_t)primaryBlockIdStart1 << 32)); } - cm.set_nucpositionstart1(nucPositionStart1); + cm.setNucPositionStart1(nucPositionStart1); if(nucGapPositionStart1 != -1) { - cm.set_nucgapexiststart1(true); - cm.set_nucgappositionstart1(nucGapPositionStart1); + cm.setNucGapExistStart1(true); + cm.setNucGapPositionStart1(nucGapPositionStart1); } if(secondaryBlockIdStart2 != -1) { - cm.set_blockgapexiststart2(true); - cm.set_blockidstart2(((int64_t)primaryBlockIdStart2 << 32)+secondaryBlockIdStart2); + cm.setBlockGapExistStart2(true); + cm.setBlockIdStart2(((int64_t)primaryBlockIdStart2 << 32)+secondaryBlockIdStart2); } else { - cm.set_blockgapexiststart2(false); - cm.set_blockidstart2(((int64_t)primaryBlockIdStart2 << 32)); + cm.setBlockGapExistStart2(false); + cm.setBlockIdStart2(((int64_t)primaryBlockIdStart2 << 32)); } - cm.set_nucpositionstart2(nucPositionStart2); + cm.setNucPositionStart2(nucPositionStart2); if(nucGapPositionStart2 != -1) { - cm.set_nucgapexiststart2(true); - cm.set_nucgappositionstart2(nucGapPositionStart2); + cm.setNucGapExistStart2(true); + cm.setNucGapPositionStart2(nucGapPositionStart2); } if(secondaryBlockIdEnd1 != -1) { - cm.set_blockgapexistend1(true); - cm.set_blockidend1(((int64_t)primaryBlockIdEnd1 << 32)+secondaryBlockIdEnd1); + cm.setBlockGapExistEnd1(true); + cm.setBlockIdEnd1(((int64_t)primaryBlockIdEnd1 << 32)+secondaryBlockIdEnd1); } else { - cm.set_blockgapexistend1(false); - cm.set_blockidend1(((int64_t)primaryBlockIdEnd1 << 32)); + cm.setBlockGapExistEnd1(false); + cm.setBlockIdEnd1(((int64_t)primaryBlockIdEnd1 << 32)); } - cm.set_nucpositionend1(nucPositionEnd1); + cm.setNucPositionEnd1(nucPositionEnd1); if(nucGapPositionEnd1 != -1) { - cm.set_nucgapexistend1(true); - cm.set_nucgappositionend1(nucGapPositionEnd1); + cm.setNucGapExistEnd1(true); + cm.setNucGapPositionEnd1(nucGapPositionEnd1); } if(secondaryBlockIdEnd2 != -1) { - cm.set_blockgapexistend2(true); - cm.set_blockidend2(((int64_t)primaryBlockIdEnd2 << 32)+secondaryBlockIdEnd2); + cm.setBlockGapExistEnd2(true); + cm.setBlockIdEnd2(((int64_t)primaryBlockIdEnd2 << 32)+secondaryBlockIdEnd2); } else { - cm.set_blockgapexistend2(false); - cm.set_blockidend2(((int64_t)primaryBlockIdEnd2 << 32)); + cm.setBlockGapExistEnd2(false); + cm.setBlockIdEnd2(((int64_t)primaryBlockIdEnd2 << 32)); } - cm.set_nucpositionend2(nucPositionEnd2); + cm.setNucPositionEnd2(nucPositionEnd2); if(nucGapPositionEnd2 != -1) { - cm.set_nucgapexistend2(true); - cm.set_nucgappositionend2(nucGapPositionEnd2); + cm.setNucGapExistEnd2(true); + cm.setNucGapPositionEnd2(nucGapPositionEnd2); } return cm; @@ -649,7 +701,7 @@ class TreeGroup { TreeGroup* subnetworkExtract(std::unordered_map< int, std::vector< std::string > >& nodeIds); void printFASTA(std::ofstream& fout); - void writeToFile(std::ostream& fout); + void writeToFile(kj::std::StdOutputStream& fout); void printComplexMutations(std::ostream& fout); }; diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 47039ee..eca52d5 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -275,6 +275,7 @@ search for") } void writePanMAN(po::variables_map &globalVm, panmanUtils::TreeGroup *TG) { + std::cout << "Writing PanMAN" << std::endl; std::string fileName = globalVm["output-file"].as< std::string >(); std::filesystem::create_directory("./panman"); @@ -289,7 +290,10 @@ void writePanMAN(po::variables_map &globalVm, panmanUtils::TreeGroup *TG) { outPMATBuffer.push(boost::iostreams::lzma_compressor(params)); outPMATBuffer.push(outputFile); std::ostream outstream(&outPMATBuffer); - TG->writeToFile(outstream); + + kj::std::StdOutputStream outputStream(outstream); + + TG->writeToFile(outputStream); boost::iostreams::close(outPMATBuffer); outputFile.close(); @@ -315,7 +319,8 @@ void writePanMAN(po::variables_map &globalVm, panmanUtils::Tree *T) { outPMATBuffer.push(boost::iostreams::lzma_compressor(params)); outPMATBuffer.push(outputFile); std::ostream outstream(&outPMATBuffer); - T->writeToFile(outstream); + kj::std::StdOutputStream outputStream(outstream); + T->writeToFile(outputStream); boost::iostreams::close(outPMATBuffer); outputFile.close(); @@ -350,7 +355,7 @@ void parseAndExecute(int argc, char* argv[]) { // Load PanMAT file directly into memory std::string fileName = globalVm["input-panmat"].as< std::string >(); - std::ifstream inputFile(fileName); + std::ifstream inputFile(fileName, std::ios_base::in | std::ios_base::binary); boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; auto treeBuiltStart = std::chrono::high_resolution_clock::now(); @@ -690,7 +695,8 @@ void parseAndExecute(int argc, char* argv[]) { outPMATBuffer.push(boost::iostreams::lzma_compressor(params)); outPMATBuffer.push(outputFile); std::ostream outstream(&outPMATBuffer); - T->writeToFile(outstream, T->subtreeExtractParallel(nodeIds)); + kj::std::StdOutputStream outputStream(outstream); + T->writeToFile(outputStream, T->subtreeExtractParallel(nodeIds)); boost::iostreams::close(outPMATBuffer); outputFile.close(); @@ -755,9 +761,9 @@ void parseAndExecute(int argc, char* argv[]) { outPMATBuffer.push(boost::iostreams::lzma_compressor(params)); outPMATBuffer.push(outputFile); std::ostream outstream(&outPMATBuffer); - + kj::std::StdOutputStream outputStream(outstream); panmanUtils::TreeGroup* subnetwork = TG->subnetworkExtract(nodeIds); - subnetwork->writeToFile(outstream); + subnetwork->writeToFile(outputStream); boost::iostreams::close(outPMATBuffer); outputFile.close(); diff --git a/src/panmanUtils.hpp b/src/panmanUtils.hpp index 52431d9..901bc7a 100644 --- a/src/panmanUtils.hpp +++ b/src/panmanUtils.hpp @@ -12,7 +12,7 @@ #include #include -#include "panman.pb.h" +#include "panman.capnp.h" #include "panman.hpp" diff --git a/src/subnet.cpp b/src/subnet.cpp index fccf3d0..bed5f02 100644 --- a/src/subnet.cpp +++ b/src/subnet.cpp @@ -169,8 +169,9 @@ panmanUtils::TreeGroup* panmanUtils::TreeGroup::subnetworkExtract(std::unordered outPMATBuffer.push(boost::iostreams::gzip_compressor()); outPMATBuffer.push(outputFile); std::ostream outstream(&outPMATBuffer); - - trees[i].writeToFile(outstream, trees[i].subtreeExtractParallel(subtreeNodeIds, cplxMutationNodeIds)); + kj::std::StdOutputStream outputStream(outstream); + + trees[i].writeToFile(outputStream, trees[i].subtreeExtractParallel(subtreeNodeIds, cplxMutationNodeIds)); boost::iostreams::close(outPMATBuffer); outputFile.close(); From 7b4314b1f861f5e56749613846a3da364720b4f1 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Tue, 27 Aug 2024 12:12:06 -0700 Subject: [PATCH 003/103] adding capn proto file --- panman.capnp | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 panman.capnp diff --git a/panman.capnp b/panman.capnp new file mode 100644 index 0000000..6a02858 --- /dev/null +++ b/panman.capnp @@ -0,0 +1,121 @@ +@0xcce15f4779921ec4; #file id + +using Cxx = import "/capnp/c++.capnp"; +$Cxx.namespace("panman"); + +struct NucMut +{ + nucPosition @0: Int32; + nucGapPosition @1: Int32; + nucGapExist @2: Bool; + mutInfo @3: UInt32; +} + +struct Mutation +{ + blockId @0: Int64; + blockGapExist @1: Bool; + blockMutExist @2: Bool; + blockMutInfo @3: Bool; + blockInversion @4: Bool; + nucMutation @5: List(NucMut); +} + +struct Node +{ + mutations @0: List(Mutation); + annotations @1: List(Text); +} + +struct ConsensusSeqToBlockIds +{ + blockId @0: List(Int64); + consensusSeq @1: List(UInt32); + blockGapExist @2: List(Bool); + chromosomeName @3: List(Text); +} + +struct GapList +{ + blockId @0: Int64; + blockGapExist @1: Bool; + nucGapLength @2: List(Int32); + nucPosition @3: List(Int32); +} + +struct BlockGapList +{ + blockPosition @0: List(Int32); + blockGapLength @1: List(Int32); +} + +struct CircularOffset +{ + sequenceId @0: Text; + offset @1: Int32; +} + +struct RotationIndex +{ + sequenceId @0: Text; + blockOffset @1: Int32; +} + +struct SequenceInverted +{ + sequenceId @0: Text; + inverted @1: Bool; +} + +struct Tree +{ + newick @0: Text; + nodes @1: List(Node); + consensusSeqMap @2: List(ConsensusSeqToBlockIds); + gaps @3: List(GapList); + blockGaps @4: BlockGapList; + circularSequences @5: List(CircularOffset); + rotationIndexes @6: List(RotationIndex); + sequencesInverted @7: List(SequenceInverted); +} + +struct ComplexMutation { + mutationType @0: Bool; + treeIndex1 @1: Int32; + treeIndex2 @2: Int32; + treeIndex3 @3: Int32; + sequenceId1 @4: Text; + sequenceId2 @5: Text; + + blockIdStart1 @6: Int64; + blockGapExistStart1 @7: Bool; + nucPositionStart1 @8: Int32; + nucGapPositionStart1 @9: Int32; + nucGapExistStart1 @10: Bool; + + blockIdEnd1 @11: Int64; + blockGapExistEnd1 @12: Bool; + nucPositionEnd1 @13: Int32; + nucGapPositionEnd1 @14: Int32; + nucGapExistEnd1 @15: Bool; + + blockIdStart2 @16: Int64; + blockGapExistStart2 @17: Bool; + nucPositionStart2 @18: Int32; + nucGapPositionStart2 @19: Int32; + nucGapExistStart2 @20: Bool; + + blockIdEnd2 @21: Int64; + blockGapExistEnd2 @22: Bool; + nucPositionEnd2 @23: Int32; + nucGapPositionEnd2 @24: Int32; + nucGapExistEnd2 @25: Bool; + + sequenceId3 @26: Text; +} + +struct TreeGroup +{ + trees @0: List(Tree); + complexMutations @1: List(ComplexMutation); +} From 25de4ef8ac3f72f9bb10086719566954ee846ca7 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 26 Sep 2024 15:57:04 -0700 Subject: [PATCH 004/103] capnp overloading issue resolved --- CMakeLists.txt | 19 +++++++ src/fitchSankoff.cpp | 1 + src/panman.cpp | 128 ++++++++++++++++++++++++++++--------------- src/panman.hpp | 9 ++- src/panmanUtils.cpp | 55 +++++++++++++++++++ 5 files changed, 167 insertions(+), 45 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 524f3e8..22ab5fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ find_package(jsoncpp CONFIG REQUIRED) file(GLOB PANMAT_SRCS "src/panmanUtils.cpp" "src/panman.cpp" "src/panmanUtils.hpp" "src/panman.hpp") +file(GLOB CAP_SRCS "src/capnprototest.cpp") if(DEFINED CapnProto_PATH) add_executable(panmanUtils @@ -61,6 +62,13 @@ if(DEFINED CapnProto_PATH) TARGET panmanUtils PROTOS panman.capnp) + add_executable(capnp ${CAP_SRCS}) + capnp_generate( + LANGUAGE cpp + TARGET capnp + PROTOS panman.capnp + ) + else() # protobuf_generate_cpp( @@ -76,6 +84,12 @@ else() ${CAPNP_SRCS} ${CAPNP_HDRS} ) + + add_executable(capnp + ${CAP_SRCS} + ${CAPNP_SRCS} + ${CAPNP_HDRS} + ) endif() @@ -83,3 +97,8 @@ TARGET_COMPILE_OPTIONS(panmanUtils PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) TARGET_LINK_LIBRARIES(panmanUtils PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) target_include_directories(panmanUtils PUBLIC "${PROJECT_BINARY_DIR}") + +TARGET_COMPILE_OPTIONS(capnp PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) + +TARGET_LINK_LIBRARIES(capnp PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) +target_include_directories(capnp PUBLIC "${PROJECT_BINARY_DIR}") diff --git a/src/fitchSankoff.cpp b/src/fitchSankoff.cpp index cd27710..041ee78 100644 --- a/src/fitchSankoff.cpp +++ b/src/fitchSankoff.cpp @@ -30,6 +30,7 @@ int panmanUtils::Tree::nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states) { if(node->children.size() == 0) { if(states.find(node->identifier) == states.end()) { + std::cerr << "Node ID not found" << std::endl; return states[node->identifier] = 0; } return states[node->identifier]; diff --git a/src/panman.cpp b/src/panman.cpp index f10d337..6cf57ee 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -64,8 +64,10 @@ char panmanUtils::getNucleotideFromCode(int code) { return 'H'; case 7: return 'V'; - default: + case 15: return 'N'; + default: + return '-'; } } @@ -99,10 +101,8 @@ char panmanUtils::getCodeFromNucleotide(char nuc) { return 11; case 'V': return 7; - case 'N': - return 15; default: - return 0; + return 15; } } @@ -379,19 +379,28 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new } void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, - std::vector< panman::Node::Reader >& nodes) { + std::vector &storedNode) { std::vector< panmanUtils::NucMut > storedNucMutation; - - for (auto nodeMutations: nodes[currentIndex].getMutations()){ + // std::cout << root->identifier << std::endl; + // std::cout << "\tMutation: " << currentIndex << std::endl; + for (auto nodeMutations: storedNode[currentIndex].getMutations()){ + auto countt = 0; for (auto nucMut: nodeMutations.getNucMutation()){ + // if (nucMut.getNucPosition()==0){ + // std::cout << "\t Reading " << countt << " "<< nucMut.getNucPosition() << " " << + // nucMut.getMutInfo() << " " << + // nucMut.getNucGapPosition() << " " << + // nucMut.getNucGapExist() << std::endl; + // } storedNucMutation.push_back( panmanUtils::NucMut(nucMut, nodeMutations.getBlockId(), nodeMutations.getBlockGapExist())); + countt++; } } std::vector< panmanUtils::BlockMut > storedBlockMutation; - for (auto nodeMutations: nodes[currentIndex].getMutations()){ + for (auto nodeMutations: storedNode[currentIndex].getMutations()){ panmanUtils::BlockMut tempBlockMut; if (nodeMutations.getBlockMutExist()){ tempBlockMut.loadFromProtobuf(nodeMutations); @@ -399,7 +408,7 @@ void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, } } - for (auto nodeAnnotations: nodes[currentIndex].getAnnotations()){ + for (auto nodeAnnotations: storedNode[currentIndex].getAnnotations()){ root->annotations.push_back(nodeAnnotations); annotationsToNodes[nodeAnnotations].push_back(root->identifier); } @@ -409,7 +418,7 @@ void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, for(auto child: root->children) { currentIndex++; - assignMutationsToNodes(child, currentIndex, nodes); + assignMutationsToNodes(child, currentIndex, storedNode); } } @@ -963,6 +972,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } else if(ftype == panmanUtils::FILE_TYPE::MSA) { std::string newickString; secondFin >> newickString; + root = createTreeFromNewickString(newickString); std::map< std::string, std::string > sequenceIdsToSequences; @@ -998,6 +1008,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE exit(-1); } else { lineLength = currentSequence.length(); + std::cout << lineLength << std::endl; } sequenceIdsToSequences[currentSequenceId] = currentSequence; } @@ -1014,6 +1025,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } if(!nonGapFound) { emptyPositions.insert(i); + std::cout << "OOps" << i << std::endl; } } for(auto& u: sequenceIdsToSequences) { @@ -1036,6 +1048,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE nodeMutexes[u.first]; } + for (auto i=0; i(u.second[i]) != std::get<0>(u.second[i-1])+1 || std::get<1>(u.second[i]) != std::get<1>(u.second[i-1])) { nodeMutexes[u.first].lock(); + if (std::get<0>(u.second[currentStart]) == 0) + std::cout << u.first << std::endl; allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, i); nodeMutexes[u.first].unlock(); currentStart = i; @@ -1086,8 +1101,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE nodeMutexes[u.first].lock(); allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, u.second.size()); nodeMutexes[u.first].unlock(); + // } }); - std::cout << "Finished" << std::endl; } @@ -1096,14 +1111,18 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { // Create tree root = createTreeFromNewickString(mainTree.getNewick().cStr()); - std::cout << root << std::endl; + // std::cout << root->identifier << std::endl; std::map< std::pair, std::vector< uint32_t > > blockIdToConsensusSeq; + int countt = 0; + // std::cout << "consensusmap\n"; for (auto consensusMapElement: mainTree.getConsensusSeqMap()){ std::vector< uint32_t > seq; for (auto consensusSequenceToBlockIds: consensusMapElement.getConsensusSeq()){ seq.push_back(consensusSequenceToBlockIds); - } + } + + // std::cout << "\tSeq size: " << seq.size() << std::endl; auto blockIdList = consensusMapElement.getBlockId(); auto blockGapExistList = consensusMapElement.getBlockGapExist(); @@ -1116,10 +1135,13 @@ void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { blockId.second = -1; } blockIdToConsensusSeq[blockId] = seq; + // std::cout << "\tIDs: " << blockIdList.size() << " " << blockId.first << " " << blockId.second << std::endl; } + countt++; } - std::vector< panman::Node::Reader> storedNodes; + std::vector storedNodes; + for (auto nodesFromTree: mainTree.getNodes()){ storedNodes.push_back(nodesFromTree); } @@ -1134,14 +1156,16 @@ void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { } // Gap List - for (auto gapsFromTree: mainTree.getGaps()){ + for (auto i=0; i< mainTree.getGaps().size(); i++){ panmanUtils::GapList tempGaps; - tempGaps.primaryBlockId = (gapsFromTree.getBlockId() >> 32); - tempGaps.secondaryBlockId = (gapsFromTree.getBlockGapExist() ? (gapsFromTree.getBlockId() & 0xFFFF): -1); - for (auto j=0; gapsFromTree.getNucPosition().size(); j++){ - tempGaps.nucPosition.push_back(gapsFromTree.getNucPosition()[j]); - tempGaps.nucGapLength.push_back(gapsFromTree.getNucGapLength()[j]); + for (auto j=0; mainTree.getGaps()[i].getNucPosition().size(); j++){ + tempGaps.nucPosition.push_back(mainTree.getGaps()[i].getNucPosition()[j]); + tempGaps.nucGapLength.push_back(mainTree.getGaps()[i].getNucGapLength()[j]); + std::cout << "\t " << j << mainTree.getGaps()[i].getNucPosition()[j] << " " << mainTree.getGaps()[i].getNucGapLength()[j] << std::endl; + } + tempGaps.primaryBlockId = (mainTree.getGaps()[i].getBlockId() >> 32); + tempGaps.secondaryBlockId = (mainTree.getGaps()[i].getBlockGapExist() ? (mainTree.getGaps()[i].getBlockId() & 0xFFFF): -1); gaps.push_back(tempGaps); } @@ -2126,29 +2150,30 @@ void panmanUtils::Tree::extractPanMATSegment(kj::std::StdOutputStream& fout, int } void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder& nodesBuilder, size_t& nodeIndex) { + // std::cout << nodeIndex << std::endl; panman::Node::Builder n = nodesBuilder[nodeIndex++]; std::map< std::pair< int32_t, int32_t >, std::pair< std::vector< panman::NucMut::Builder >, int > > blockToMutations; std::map< std::pair< int32_t, int32_t >, bool > blockToInversion; + capnp::MallocMessageBuilder message; + panman::Mutation::Builder mut_ = message.initRoot(); + capnp::List::Builder nm = mut_.initNucMutation(root->nucMutation.size()); for(size_t i = 0; i < root->nucMutation.size(); i++) { const panmanUtils::NucMut& mutation = root->nucMutation[i]; - panman::NucMut::Builder nm = message.initRoot(); - nm.setNucPosition(mutation.nucPosition); + nm[i].setNucPosition(mutation.nucPosition); if(mutation.nucGapPosition != -1) { - nm.setNucGapPosition(mutation.nucGapPosition); - nm.setNucGapExist(true); + nm[i].setNucGapPosition(mutation.nucGapPosition); + nm[i].setNucGapExist(true); } else { - nm.setNucGapExist(false); + nm[i].setNucGapExist(false); } - - nm.setMutInfo((((mutation.nucs) >> (24 - (mutation.mutInfo >> 4)*4)) << 8) + mutation.mutInfo); - blockToMutations[std::make_pair(mutation.primaryBlockId, mutation.secondaryBlockId)].first.push_back(nm); + nm[i].setMutInfo((((mutation.nucs) >> (24 - (mutation.mutInfo >> 4)*4)) << 8) + mutation.mutInfo); + blockToMutations[std::make_pair(mutation.primaryBlockId, mutation.secondaryBlockId)].first.push_back(nm[i]); blockToMutations[std::make_pair(mutation.primaryBlockId, mutation.secondaryBlockId)].second = 2; - } for(size_t i = 0; i < root->blockMutation.size(); i++) { @@ -2158,13 +2183,13 @@ void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder mutationsBuilder = n.initMutations(blockToMutations.size()); + // std::cout << "Mutations\n"; size_t blockToMutationsCount=0; - for(auto u: blockToMutations) { + for(auto &u: blockToMutations) { panman::Mutation::Builder mutation = mutationsBuilder[blockToMutationsCount++]; mutation.setBlockMutExist((u.second.second != 2)); mutation.setBlockMutInfo(u.second.second); if(u.second.second != 2) { - // block mutation exists mutation.setBlockInversion(blockToInversion[u.first]); } else { mutation.setBlockInversion(true); @@ -2179,10 +2204,16 @@ void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder nucMutationBuilder = mutation.initNucMutation(u.second.first.size()); for(auto i=0; i::Builder nucPositionBuilder = gl.initNucPosition(gaps[i].nucPosition.size()); for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + std::cout << "\t " << j << gaps[i].nucPosition[j] << " " << gaps[i].nucGapLength[j] << std::endl; nucPositionBuilder.set(j, gaps[i].nucPosition[j]); nucGapLengthBuilder.set(j,gaps[i].nucGapLength[j]); } @@ -5313,22 +5345,21 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { capnp::List::Builder treestoWriteBuilder = treeGroupToWrite.initTrees(trees.size()); size_t treesCount = 0; - std::cout << "Writing Trees..." << std::endl; + // std::cout << "Writing Trees..." << std::endl; for(auto& tree: trees) { - std::cout << "Tree Count:" << treesCount << "..." << std::endl; + // std::cout << "Tree Count:" << treesCount << "..." << std::endl; panman::Tree::Builder treeToWrite = treestoWriteBuilder[treesCount++]; Node* node = tree.root; capnp::List::Builder nodesBuilder = treeToWrite.initNodes(tree.allNodes.size()); size_t nodeIndex=0; - std::cout << "Writing Nodes..." << std::endl; + // std::cout << "Printting Nodes\n"; tree.getNodesPreorder(node, nodesBuilder, nodeIndex); assert(nodeIndex == tree.allNodes.size()); std::string newick = tree.getNewickString(node); - - std::cout << newick << std::endl; + // std::cout << newick << std::endl; treeToWrite.setNewick(newick); std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t, bool > > > consensusSeqToBlockIds; @@ -5351,33 +5382,42 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { int consensusSeqMapBuilderCount = 0; for(auto u: consensusSeqToBlockIds) { panman::ConsensusSeqToBlockIds::Builder c = consensusSeqMapBuilder[consensusSeqMapBuilderCount]; + // std::cout << "Printing consensusblockIds " << consensusSeqMapBuilderCount << std::endl; - ::capnp::List::Builder blockIdBuilder = c.initBlockId(u.first.size()); ::capnp::List::Builder conSeqBuilder = c.initConsensusSeq(u.first.size()); - ::capnp::List::Builder blockGapExistBuilder = c.initBlockGapExist(u.first.size()); + ::capnp::List::Builder blockIdBuilder = c.initBlockId(u.second.size()); + ::capnp::List::Builder blockGapExistBuilder = c.initBlockGapExist(u.second.size()); for(auto v=0; v::Builder gapsBuilder = treeToWrite.initGaps(tree.gaps.size()); for(size_t i = 0; i < tree.gaps.size(); i++) { panman::GapList::Builder gl = gapsBuilder[i]; + // std::cout << "Printing gap list " << i << std::endl; + + std::cout << "Gaps " << i << std::endl; ::capnp::List::Builder nucGapLengthBuilder = gl.initNucGapLength(tree.gaps[i].nucPosition.size()); ::capnp::List::Builder nucPositionBuilder = gl.initNucPosition(tree.gaps[i].nucPosition.size()); for(size_t j = 0; j < tree.gaps[i].nucPosition.size(); j++) { + // std::cout << "\t Nuc Position and gap length " << j << tree.gaps[i].nucPosition[j] << " " << tree.gaps[i].nucGapLength[j] << std::endl; nucPositionBuilder.set(j, tree.gaps[i].nucPosition[j]); nucGapLengthBuilder.set(j,tree.gaps[i].nucGapLength[j]); } + // std::cout << "\t Block ID" << i << tree.gaps[i].secondaryBlockId << " " << ((int64_t)tree.gaps[i].primaryBlockId << 32) + tree.gaps[i].secondaryBlockId << " " << ((int64_t)tree.gaps[i].primaryBlockId << 32) << std::endl; if (tree.gaps[i].secondaryBlockId != -1) { gl.setBlockId(((int64_t)tree.gaps[i].primaryBlockId << 32) + tree.gaps[i].secondaryBlockId); gl.setBlockGapExist(true); diff --git a/src/panman.hpp b/src/panman.hpp index 2829838..d949bc4 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -115,6 +115,13 @@ struct NucMut { for(int i = start; i < end; i++) { nucs += (std::get<2>(mutationArray[i]) << (4*(5-(i - start)))); } + + // if (nucPosition == 0){ + // std::cout << "\t Writing " << nucPosition << " " << + // (int)mutInfo << " " << + // nucs << " " << + // std::endl; + // } } // Create non-SNP mutations from SNP mutations at consecutive positions @@ -286,7 +293,7 @@ class Tree { // memory, assign mutations from the proto file to the tree nodes using preorder // traversal void assignMutationsToNodes(Node* root, size_t& currentIndex, - std::vector< panman::Node::Reader >& nodes); + std::vector& storedNode); // Get the total number of mutations of given type int getTotalParsimonyParallel(NucMutationType nucMutType, diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index eca52d5..821b838 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -28,6 +28,59 @@ void stripStringInPlace(std::string& s) { } } +std::string printNucMut(int32_t mutInfo){ + std::string s = "Type "; + s += std::to_string(mutInfo&0xf); + s += " Length "; + s += std::to_string(mutInfo>>4); + return s; +} + +std::string printNucs(int32_t mutInfo, int32_t nucs){ + std::string s = " Chars: "; + int len = mutInfo >> 4; + for (int i=0; i> 4; + } + return s; +} + +void checkFunction(panmanUtils::Tree *T) { + std::cout << T->root->identifier << std::endl; + std::ofstream o("new.fa"); + T->printFASTA(o); + + // get node id + panmanUtils::Node * node = T->allNodes["England/MILK-338D3D9/2022|OV784995.1|2022-01-21"]; + std::cout << "Found Node: " << node->identifier << std::endl; + + while (node != nullptr) { + std::cout << "node: " << node->identifier << std::endl; + // Pring block mutations + std::cout << "Block mutations" << std::endl; + for(auto &u: node->blockMutation) { + std::cout << "\t" << u.blockMutInfo << " " << + u.inversion << " " << + u.primaryBlockId << " " << + u.secondaryBlockId << " " << std::endl; + } + + // Pring Nuc mutations + std::cout << "Nuc mutations" << std::endl; + for(auto &u: node->nucMutation) { + std::cout << "\t Position " << u.nucPosition << " Gap-position " << + u.nucGapPosition << " " << + printNucMut(u.mutInfo) << " " << + printNucs(u.mutInfo, u.nucs) << " " << std::endl; + } + node = node->parent; + } + + return; +} + // program option description for building/loading a PanMAT into memory po::options_description globalDesc("panmanUtils Command Line Arguments"); po::positional_options_description globalPositionArgumentDesc; @@ -527,6 +580,8 @@ void parseAndExecute(int argc, char* argv[]) { // panmanUtils::FILE_TYPE::MSA_OPTIMIZE); // } + // checkFunction(T); + std::vector tg; tg.push_back(T); From da065adbbcbfa2a5115d9b1914c58374a3fdad26 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Fri, 4 Oct 2024 12:55:55 -0700 Subject: [PATCH 005/103] updated scripts to build panman --- CMakeLists.txt | 29 ----------------------------- scripts/build_panman.sh | 22 ++++++++++++++++++++++ src/fitchSankoff.cpp | 2 +- src/panman.cpp | 17 ++++++----------- 4 files changed, 29 insertions(+), 41 deletions(-) create mode 100644 scripts/build_panman.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 22ab5fa..f6a26ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,36 +45,18 @@ find_package(jsoncpp CONFIG REQUIRED) file(GLOB PANMAT_SRCS "src/panmanUtils.cpp" "src/panman.cpp" "src/panmanUtils.hpp" "src/panman.hpp") -file(GLOB CAP_SRCS "src/capnprototest.cpp") if(DEFINED CapnProto_PATH) add_executable(panmanUtils ${PANMAT_SRCS} ) - # protobuf_generate( - # LANGUAGE cpp - # TARGET panmanUtils - # PROTOS panman.proto) - capnp_generate( LANGUAGE cpp TARGET panmanUtils PROTOS panman.capnp) - add_executable(capnp ${CAP_SRCS}) - capnp_generate( - LANGUAGE cpp - TARGET capnp - PROTOS panman.capnp - ) - - else() - # protobuf_generate_cpp( - # PROTO_SRCS PROTO_HDRS - # panman.proto) - capnp_generate_cpp( CAPNP_SRCS CAPNP_HDRS panman.capnp) @@ -85,20 +67,9 @@ else() ${CAPNP_HDRS} ) - add_executable(capnp - ${CAP_SRCS} - ${CAPNP_SRCS} - ${CAPNP_HDRS} - ) - endif() TARGET_COMPILE_OPTIONS(panmanUtils PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) TARGET_LINK_LIBRARIES(panmanUtils PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) target_include_directories(panmanUtils PUBLIC "${PROJECT_BINARY_DIR}") - -TARGET_COMPILE_OPTIONS(capnp PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) - -TARGET_LINK_LIBRARIES(capnp PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) -target_include_directories(capnp PUBLIC "${PROJECT_BINARY_DIR}") diff --git a/scripts/build_panman.sh b/scripts/build_panman.sh new file mode 100644 index 0000000..83c695b --- /dev/null +++ b/scripts/build_panman.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +## Defines +PANMAN_HOME=/home/panman +PANMAN_BUILD=/home/panman/build +DATASET_PATH=/home/dataset +DATASET=sars_20 +PANGRAPH_HOME=/home/pangraph/pangraph.sh +PANGRAPH_OUTPUT=$PANMAN_HOME/build/pangraph +panmanUtils=$PANMAN_BUILD/panmanUtils + +cd $PANMAN_BUILD + +##### Commands generate PanGraph (JSON) and Tree Topology (Newick) from raw sequences in FASTA format #### +mkdir -p pangraph +echo "Building PanGraph..." +$PANGRAPH_HOME "$DATASET_PATH/$DATASET.fa" "$PANGRAPH_OUTPUT/$DATASET.json" 2> "$PANGRAPH_OUTPUT/$DATASET.nwk" +echo $(cat "$PANGRAPH_OUTPUT/$DATASET.nwk" | grep "tree" | awk '{split($0,a,"tree: "); print a[2]}') > $PANGRAPH_OUTPUT/$DATASET.nwk + +#### Run panmanUtils to construct PanMAN using PanGraph #### +echo "Building PanMAN..." +$panmanUtils -P $PANGRAPH_OUTPUT/$DATASET.json -N $PANGRAPH_OUTPUT/$DATASET.nwk -o $DATASET \ No newline at end of file diff --git a/src/fitchSankoff.cpp b/src/fitchSankoff.cpp index 041ee78..41b7c05 100644 --- a/src/fitchSankoff.cpp +++ b/src/fitchSankoff.cpp @@ -30,7 +30,7 @@ int panmanUtils::Tree::nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states) { if(node->children.size() == 0) { if(states.find(node->identifier) == states.end()) { - std::cerr << "Node ID not found" << std::endl; + //std::cerr << "Node ID not found" << std::endl; return states[node->identifier] = 0; } return states[node->identifier]; diff --git a/src/panman.cpp b/src/panman.cpp index 6cf57ee..7852476 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -381,8 +381,8 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, std::vector &storedNode) { std::vector< panmanUtils::NucMut > storedNucMutation; - // std::cout << root->identifier << std::endl; - // std::cout << "\tMutation: " << currentIndex << std::endl; + std::cout << root->identifier << "\tMutation: " << currentIndex << std::endl; + for (auto nodeMutations: storedNode[currentIndex].getMutations()){ auto countt = 0; for (auto nucMut: nodeMutations.getNucMutation()){ @@ -1115,14 +1115,12 @@ void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { std::map< std::pair, std::vector< uint32_t > > blockIdToConsensusSeq; int countt = 0; - // std::cout << "consensusmap\n"; for (auto consensusMapElement: mainTree.getConsensusSeqMap()){ std::vector< uint32_t > seq; for (auto consensusSequenceToBlockIds: consensusMapElement.getConsensusSeq()){ seq.push_back(consensusSequenceToBlockIds); } - // std::cout << "\tSeq size: " << seq.size() << std::endl; auto blockIdList = consensusMapElement.getBlockId(); auto blockGapExistList = consensusMapElement.getBlockGapExist(); @@ -1141,7 +1139,6 @@ void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { } std::vector storedNodes; - for (auto nodesFromTree: mainTree.getNodes()){ storedNodes.push_back(nodesFromTree); } @@ -1158,10 +1155,10 @@ void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { // Gap List for (auto i=0; i< mainTree.getGaps().size(); i++){ panmanUtils::GapList tempGaps; - for (auto j=0; mainTree.getGaps()[i].getNucPosition().size(); j++){ + for (auto j=0; j> 32); @@ -2297,14 +2294,15 @@ void panmanUtils::Tree::writeToFile(kj::std::StdOutputStream& fout, panmanUtils: } ::capnp::List::Builder gapsBuilder = treeToWrite.initGaps(gaps.size()); + std::cout << "Writing Gap List " << gaps.size() << "\n"; for(size_t i = 0; i < gaps.size(); i++) { + std::cout << "itr: " << i << " size: " << gaps[i].nucPosition.size() << "\n"; panman::GapList::Builder gl = gapsBuilder[i]; ::capnp::List::Builder nucGapLengthBuilder = gl.initNucGapLength(gaps[i].nucPosition.size()); ::capnp::List::Builder nucPositionBuilder = gl.initNucPosition(gaps[i].nucPosition.size()); for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { - std::cout << "\t " << j << gaps[i].nucPosition[j] << " " << gaps[i].nucGapLength[j] << std::endl; nucPositionBuilder.set(j, gaps[i].nucPosition[j]); nucGapLengthBuilder.set(j,gaps[i].nucGapLength[j]); } @@ -5405,9 +5403,6 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { ::capnp::List::Builder gapsBuilder = treeToWrite.initGaps(tree.gaps.size()); for(size_t i = 0; i < tree.gaps.size(); i++) { panman::GapList::Builder gl = gapsBuilder[i]; - // std::cout << "Printing gap list " << i << std::endl; - - std::cout << "Gaps " << i << std::endl; ::capnp::List::Builder nucGapLengthBuilder = gl.initNucGapLength(tree.gaps[i].nucPosition.size()); ::capnp::List::Builder nucPositionBuilder = gl.initNucPosition(tree.gaps[i].nucPosition.size()); From bdecf3f46858a91a96a71bb7e10d843da0c2cbe1 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Fri, 4 Oct 2024 12:57:45 -0700 Subject: [PATCH 006/103] added executable --- scripts/build_panman.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/build_panman.sh diff --git a/scripts/build_panman.sh b/scripts/build_panman.sh old mode 100644 new mode 100755 From edc6b51aaed7797f962c50b2c119f3609f47a66b Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 6 Oct 2024 21:24:53 -0700 Subject: [PATCH 007/103] Added installation steps in separate file --- docker/DockerFile | 2 +- docs/install.md | 73 +++++++++++++++++++++++++++++++++++ install/installationUbuntu.sh | 4 +- 3 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 docs/install.md diff --git a/docker/DockerFile b/docker/DockerFile index 4137b44..76f05bf 100644 --- a/docker/DockerFile +++ b/docker/DockerFile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 RUN apt update -RUN apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config +RUN apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config capnproto WORKDIR /HOME diff --git a/docs/install.md b/docs/install.md new file mode 100644 index 0000000..dc71cfc --- /dev/null +++ b/docs/install.md @@ -0,0 +1,73 @@ +# Installation Methods + +## Using installation script (requires sudo access) + +0. Dependencies +i. Git + +1. Clone the repository +```bash +git https://github.com/TurakhiaLab/panman.git +cd panman +``` +2. Run the installation script +```bash +chmod +x install/installationUbuntu.sh +./install/installationUbuntu.sh +``` +3. Run panmanUtils +```bash +cd build +./panmanUtils --help +``` +!!!Note + panmanUtils is built using CMake and depends upon libraries such as Boost, cap'n proto, etc, which are also installed in `installationUbuntu.sh`. If users face version issues, try using the docker methods detailed below. + +## Using Docker Image + +To use panmanUtils in a docker container, users can create a docker container from a docker image, by following these steps: + +0. Dependencies +i. Docker +1. Pull the PanMAN docker image from DockerHub +```bash +docker pull swalia14/panman:latest +``` +2. Build and run the docker container +```bash +docker run -it swalia14/panman:latest +``` +3. Run panmanUtils +```bash +# Insider docker container +cd /home/panman/build +./panmanUtils --help +``` +!!!Note + The docker image comes with preinstalled panmanUtils and other tools such as PanGraph, PGGB, and RIVET. + +## Using DockerFile +Docker container with preinstalled panmanUtils can also be built from DockerFile by following these steps: +0. Dependencies +i. Docker +ii. Git +1. Clone the repository +```bash +git https://github.com/TurakhiaLab/panman.git +cd panman +``` +2. Build a docker image +```bash +cd docker +docker build -t panman . +``` +3. Build and run docker container +```bash +docker run -it panman +``` +4. Run panmanUtils +```bash +# Insider docker container +cd /home/panman/build +./panmanUtils --help +``` \ No newline at end of file diff --git a/install/installationUbuntu.sh b/install/installationUbuntu.sh index 9b3fed7..54c734f 100755 --- a/install/installationUbuntu.sh +++ b/install/installationUbuntu.sh @@ -1,4 +1,5 @@ # Install dependencies +sudo apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config capnproto # Build startDir=$pwd @@ -7,14 +8,13 @@ mkdir -p ../build cd ../build git clone https://github.com/microsoft/vcpkg.git -apt-get install pkg-config +sudo apt-get install pkg-config ./vcpkg/bootstrap-vcpkg.sh ./vcpkg/vcpkg install jsoncpp wget https://github.com/oneapi-src/oneTBB/archive/2019_U9.tar.gz tar -xvzf 2019_U9.tar.gz - cmake -DTBB_DIR=${PWD}/oneTBB-2019_U9 -DCMAKE_PREFIX_PATH=${PWD}/oneTBB-2019_U9/cmake -DProtobuf_PROTOC_EXECUTABLE=/usr/bin/protoc -DCMAKE_TOOLCHAIN_FILE=${PWD}/vcpkg/scripts/buildsystems/vcpkg.cmake .. make -j From 68cee2fb8442dc42d97acf3460b94a3450ef7700 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 6 Oct 2024 21:27:38 -0700 Subject: [PATCH 008/103] added tabs in docs --- mkdocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yml b/mkdocs.yml index a8f3f20..f6bf183 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -76,6 +76,7 @@ markdown_extensions: use_directory_urls: false nav: - Home: index.md + - Install: install.md extra_javascript: - javascripts/mathjax.js From 3c8ca45bae122c821580eeca3e8389314fd79324 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 6 Oct 2024 21:32:10 -0700 Subject: [PATCH 009/103] improving aesthetics --- mkdocs.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index f6bf183..fbceed5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,7 +13,7 @@ theme: - content.tooltips - navigation.footer - navigation.expand - - navigation.tabs.sticky + # - navigation.tabs.sticky - navigation.instant.prefetch - navigation.tracking - search.highlight @@ -21,6 +21,11 @@ theme: - search.suggest - toc.follow - toc.integrate + - navigation.tabs + - navigation.sections + - navigation.path + - navigation.top + - content.tabs.link language: en palette: - scheme: default From 3dd9cb55e06471fcbc6660b57876fa14c7626287 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 6 Oct 2024 21:48:20 -0700 Subject: [PATCH 010/103] minor change in install.md --- docs/install.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/install.md b/docs/install.md index dc71cfc..1e64a9a 100644 --- a/docs/install.md +++ b/docs/install.md @@ -3,7 +3,7 @@ ## Using installation script (requires sudo access) 0. Dependencies -i. Git + i. Git 1. Clone the repository ```bash @@ -25,10 +25,10 @@ cd build ## Using Docker Image -To use panmanUtils in a docker container, users can create a docker container from a docker image, by following these steps: +To use panmanUtils in a docker container, users can create a docker container from a docker image, by following these steps 0. Dependencies -i. Docker + i. Docker 1. Pull the PanMAN docker image from DockerHub ```bash docker pull swalia14/panman:latest @@ -47,10 +47,11 @@ cd /home/panman/build The docker image comes with preinstalled panmanUtils and other tools such as PanGraph, PGGB, and RIVET. ## Using DockerFile -Docker container with preinstalled panmanUtils can also be built from DockerFile by following these steps: +Docker container with preinstalled panmanUtils can also be built from DockerFile by following these steps + 0. Dependencies -i. Docker -ii. Git + i. Docker + ii. Git 1. Clone the repository ```bash git https://github.com/TurakhiaLab/panman.git From bc4a8ee2db4ba5e04f52c4f9bf3f9985cc36c34f Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 6 Oct 2024 23:56:24 -0700 Subject: [PATCH 011/103] added quickstart --- docs/quickstart.md | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 docs/quickstart.md diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 0000000..fbc2b26 --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,49 @@ +# Quick start + +Here, we will learn to build PanMAN from various input formats. + +**Step 0:** Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error +```bash +# enter into panman directory (assuming $PANMAN directs to panman repository directory) +cd $PANMAN_HOME +``` +```bash +cd $PANMAN_HOME/build +./panmanUtils --help +``` +### Building PanMAN from PanGraph + +**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Otherwise follow the instructions to download the dataset. + +```bash +cd $PANMAN_HOME/dataset +TODO +``` + +**Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: + +```bash +cd $PANMAN_HOME/build +./panmanUtils -P $PANMAN_HOME/test/sars_20.json -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 +``` +The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. + +### Building PanMAN from raw genome sequences +We provide scripts to first construct PanGraph from raw sequences, followed by building a panman. +**Step 1:** Check if `sars_20.fa` file exist in `test` directory. Otherwise follow the instructions to download the dataset. + +```bash +cd $PANMAN_HOME/dataset +TODO +``` + +**Step 2:** Run the following command to construct a panman from raw sequences. + +```bash +cd $PANMAN_HOME/scripts +chmod +x build_panman.sh +./build_panman.sh +``` +!!!Note + The above script is particuarly designed to be used in the docker container build either from provided docker image or the DockerFile (instructions provided [here](install.md)) + From cc5df269adb74b9cb61b793ed570ecccc8ba9228 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 6 Oct 2024 23:58:15 -0700 Subject: [PATCH 012/103] added quickstart --- docs/quickstart.md | 10 +++++----- mkdocs.yml | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index fbc2b26..f8eee07 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -2,9 +2,9 @@ Here, we will learn to build PanMAN from various input formats. -**Step 0:** Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error +**Step 0:** The Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error ```bash -# enter into panman directory (assuming $PANMAN directs to panman repository directory) +# enter into the panman directory (assuming $PANMAN directs to the panman repository directory) cd $PANMAN_HOME ``` ```bash @@ -13,7 +13,7 @@ cd $PANMAN_HOME/build ``` ### Building PanMAN from PanGraph -**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Otherwise follow the instructions to download the dataset. +**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. ```bash cd $PANMAN_HOME/dataset @@ -30,7 +30,7 @@ The above command will run panmanUtils program and build `sars_20.panman` ### Building PanMAN from raw genome sequences We provide scripts to first construct PanGraph from raw sequences, followed by building a panman. -**Step 1:** Check if `sars_20.fa` file exist in `test` directory. Otherwise follow the instructions to download the dataset. +**Step 1:** Check if the `sars_20.fa` file exists in `test` directory. Otherwise, follow the instructions to download the dataset. ```bash cd $PANMAN_HOME/dataset @@ -45,5 +45,5 @@ chmod +x build_panman.sh ./build_panman.sh ``` !!!Note - The above script is particuarly designed to be used in the docker container build either from provided docker image or the DockerFile (instructions provided [here](install.md)) + The above script is particularly designed to be used in the docker container build either from the provided docker image or the DockerFile (instructions provided [here](install.md)) diff --git a/mkdocs.yml b/mkdocs.yml index fbceed5..8f85b11 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,6 +82,7 @@ use_directory_urls: false nav: - Home: index.md - Install: install.md + - Quick Start: quickstart.md extra_javascript: - javascripts/mathjax.js From a264847963f02769cf18bb5c39de7e485c41e540 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 00:04:32 -0700 Subject: [PATCH 013/103] added quickstart --- docs/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index f8eee07..9ee58a6 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -45,5 +45,5 @@ chmod +x build_panman.sh ./build_panman.sh ``` !!!Note - The above script is particularly designed to be used in the docker container build either from the provided docker image or the DockerFile (instructions provided [here](install.md)) + The above script is particularly designed to be used in the docker container build either from the provided docker image or the DockerFile (instructions provided [here](install.md)) From 1053f49c68c5668dc5226ffffeb9f4e5d413b9f0 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 09:58:51 -0700 Subject: [PATCH 014/103] added construction page --- docs/construction.md | 86 ++++++++++++++++++++++++++++++++++++++++++++ docs/quickstart.md | 49 ------------------------- mkdocs.yml | 3 +- 3 files changed, 88 insertions(+), 50 deletions(-) create mode 100644 docs/construction.md delete mode 100644 docs/quickstart.md diff --git a/docs/construction.md b/docs/construction.md new file mode 100644 index 0000000..5de7cbb --- /dev/null +++ b/docs/construction.md @@ -0,0 +1,86 @@ +# PanMAN Construction + +Here, we will learn to build PanMAN from various input formats. + +**Step 0:** The Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error +```bash +# enter into the panman directory (assuming $PANMAN directs to the panman repository directory) +cd $PANMAN_HOME +``` +```bash +cd $PANMAN_HOME/build +./panmanUtils --help +``` +### Building PanMAN from PanGraph + +**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. Alternatively, users can provide custom PanGraph (JSON) and tree topology (Newick format) files to build a panman. + +```bash +cd $PANMAN_HOME/dataset +TODO +``` + +**Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: + +```bash +cd $PANMAN_HOME/build +./panmanUtils -P $PANMAN_HOME/test/sars_20.json -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 +``` +The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. + +### Building PanMAN from GFA + +**Step 1:** Check if `sars_20.gfa` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. Alternatively, users can provide custom GFA and tree topology (Newick format) files to build a panman. + +```bash +cd $PANMAN_HOME/dataset +TODO +``` + +**Step 2:** Run panmanUtils with the following command to build a panman from GFA: + +```bash +cd $PANMAN_HOME/build +./panmanUtils -G $PANMAN_HOME/test/sars_20.gfa -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 +``` +The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. + +### Building PanMAN from MSA (FASTA format) + +**Step 1:** Check if `sars_20.msa` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. Alternatively, users can provide custom MSA (FASTA format) and tree topology (Newick format) files to build a panman. + +```bash +cd $PANMAN_HOME/dataset +TODO +``` + +**Step 2:** Run panmanUtils with the following command to build a panman from GFA: + +```bash +cd $PANMAN_HOME/build +./panmanUtils -M $PANMAN_HOME/test/sars_20.msa -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 +``` +The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. + +### Building PanMAN from raw genome sequences +We provide scripts to construct panmanUtils inputs (PanGraph/GFA/MSA and Newick) from raw sequences (FASTA format), followed by building a panman. + +!!!Note + This script uses the PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively. MashTree is not equipped when using PanGraph as input since the PanGraph tool constructs the tree topology too. + +**Step 1:** Check if the `sars_20.fa` file exists in `test` directory. Otherwise, follow the instructions to download the dataset. Alternatively, users can provide custom raw sequences (FASTA format) to build a panman. + +```bash +cd $PANMAN_HOME/dataset +TODO +``` + +**Step 2:** Run the following command to construct a panman from raw sequences. + +```bash +cd $PANMAN_HOME/scripts +chmod +x build_panman.sh +./build_panman.sh +``` +!!!Note + The above script is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)) \ No newline at end of file diff --git a/docs/quickstart.md b/docs/quickstart.md deleted file mode 100644 index 9ee58a6..0000000 --- a/docs/quickstart.md +++ /dev/null @@ -1,49 +0,0 @@ -# Quick start - -Here, we will learn to build PanMAN from various input formats. - -**Step 0:** The Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error -```bash -# enter into the panman directory (assuming $PANMAN directs to the panman repository directory) -cd $PANMAN_HOME -``` -```bash -cd $PANMAN_HOME/build -./panmanUtils --help -``` -### Building PanMAN from PanGraph - -**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. - -```bash -cd $PANMAN_HOME/dataset -TODO -``` - -**Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: - -```bash -cd $PANMAN_HOME/build -./panmanUtils -P $PANMAN_HOME/test/sars_20.json -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 -``` -The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. - -### Building PanMAN from raw genome sequences -We provide scripts to first construct PanGraph from raw sequences, followed by building a panman. -**Step 1:** Check if the `sars_20.fa` file exists in `test` directory. Otherwise, follow the instructions to download the dataset. - -```bash -cd $PANMAN_HOME/dataset -TODO -``` - -**Step 2:** Run the following command to construct a panman from raw sequences. - -```bash -cd $PANMAN_HOME/scripts -chmod +x build_panman.sh -./build_panman.sh -``` -!!!Note - The above script is particularly designed to be used in the docker container build either from the provided docker image or the DockerFile (instructions provided [here](install.md)) - diff --git a/mkdocs.yml b/mkdocs.yml index 8f85b11..a28684a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,7 +82,8 @@ use_directory_urls: false nav: - Home: index.md - Install: install.md - - Quick Start: quickstart.md + - Construction: construction.md + - Utils: utils.md extra_javascript: - javascripts/mathjax.js From e4fb18b8886bf402ede39f034b1a3e4663a625bb Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 10:41:21 -0700 Subject: [PATCH 015/103] added utils page --- docs/utils.md | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 2 +- 2 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 docs/utils.md diff --git a/docs/utils.md b/docs/utils.md new file mode 100644 index 0000000..d83a0c6 --- /dev/null +++ b/docs/utils.md @@ -0,0 +1,195 @@ +# Exploring utilities in panmanUtils + +Here, we will learn to use exploit various functionalities provided in panmanUtils software for downstream applications in epidemiological, microbiological, metagenomic, ecological, and evolutionary studies. + +**Step 0:** The Steps below require panmanUtils and a PanMAN. If not done so far, refer to [installation guide](install.md) to install panmanUtils and [construction](construction.md) instructions to build a PanMAN. Alternatively, users can download pre-built PanMANs using the following command +```bash +# Assuming $PANMAN directs to the panman repository directory +cd $PANMAN_HOME +mkdir -p build/panman && cd build/panman +ToDO +``` + +### Functionalities in panmanUtils +All panmanUtils functionality commands manipulate the input PanMAN file. +```bash +cd $PANMAN_HOME/build +./panmanUtils -I {opt} +``` + + + +| **Option** | **Description** | +|----------------------------------|-------------------------------------------------------------------------------------------------------------------| +|`-I`, `--input-panman` | Input PanMAN file path | +| `-s`, `--summary` | Print PanMAN summary | +| `-t`, `--newick` | Print Newick string of all trees in a PanMAN | +| `-f`, `--fasta` | Print tip/internal sequences (FASTA format) | +| `-m`, `--fasta-aligned` | Print MSA of sequences for each PanMAT in a PanMAN (FASTA format) | +| `-b`, `--subnet` | Extract subnet of given PanMAN to a new PanMAN file based on the list of nodes provided in the input file | +| `-v`, `--vcf` | Print variations of all sequences from any PanMAT in a PanMAN (VCF format) | +| `-g`, `--gfa` | Convert any PanMAT in a PanMAN to a GFA file | +| `-w`, `--maf` | Print m-WGA for each PanMAT in a PanMAN (MAF format) | +| `-a`, `--annotate` | Annotate nodes of the input PanMAN based on the list provided in the input file | +| `-r`, `--reroot` | Reroot a PanMAT in a PanMAN based on the input sequence id (`--reference`) | +| `-v`, `--aa-translation` | Extract amino acid translations in tsv file | +| `-e`, `--extended-newick` | Print PanMAN's network in extended-newick format | +| `-k`, `--create-network` | Create PanMAN with network of trees from single or multiple PanMAN files | +| `-p`, `--printMutations` | Create PanMAN with network of trees from single or multiple PanMAN files | +| `-q`, `--acr` | ACR method `[fitch(default), mppa]` | +| `-n`, `--reference` | Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required) | +| `-s`, `--start` | Start coordinate of protein translation | +| `-e`, `--end` | End coordinate of protein translation | +| `-d`, `--treeID` | Tree ID, required for `--vcf` | +| `-i`, `--input-file` | Path to the input file, required for `--subnet`, `--annotate`, and `--create-network` | +| `-o`, `--output-file` | Prefix of the output file name | + + +> **NOTE:** When output-file argument is optional and is not provided to panmanUtils, the output will be printed in the terminal. + +!!!Note + For all the examples below, `sars_20.panman` will be used as input panman. Alternatively, users can provide custom build panman using the instructions provided [here](construction.md). + +#### Summary extract +The summary feature extracts node and tree level statistics of a PanMAN, that contains a summary of its geometric and parsimony information. + +* Usage Syntax +```bash +./panmanUtils -I --summary --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --summary --output-file=sars_20 +``` + +#### Newick extract +Extract Newick string of all trees in a PanMAN. + +* Usage syntax +```bash +./panmanUtils -I --newick --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --newick --output-file=sars_20 +``` + +#### Extended Newick extract +Extract network in Extended Newick format. + +* Usage syntax +```bash +./panmanUtils -I ----extended-newick --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman ----extended-newick --output-file=sars_20 +``` + +#### Tip/internal node sequences extract +Extract tip and internal node sequences from a PanMAN in a FASTA format. + +* Usage syntax +```bash +./panmanUtils -I --fasta --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --fasta --output-file=sars_20 +``` + +#### Multiple Sequence Alignment (MSA) extract +Extract MSA of sequences for each PanMAT (with pseduo-root coordinates) in a PanMAN in a FASTA format. + +* Usage syntax +```bash +./panmanUtils -I --fasta-aligned --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --fasta-aligned --output-file=sars_20 +``` + +#### Multiple Whole Genome Alignment (m-WGA) extract +Extract m-WGA for each PanMAT in a PanMAN in the form of a UCSC multiple alignment format (MAF). + +* Usage syntax +```bash +./panmanUtils -I --maf --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --maf --output-file=sars_20 +``` + +#### Variant Call Format (VCF) extract +Extract variations of all sequences from any PanMAT in a PanMAN in the form of a VCF file with respect to any reference sequence (ref) in the PanMAT. + +* Usage syntax +```bash +./panmanUtils -I --vcf -reference=ref --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --vcf -reference=NC_000913.3 --output-file=sars_20 +``` + +#### Graphical fragment assembly (GFA) extract +Convert any PanMAT in a PanMAN to a Graphical fragment assembly (GFA) file representing the pangenome. + +* Usage syntax +```bash +./panmanUtils -I --gfa --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --gfa --output-file=sars_20 +``` + +#### Subnetwork extract +Extract a subnetwork from a given PanMAN and write it to a new PanMAN file based on the list of nodes provided in the input-file. + +* Usage syntax +```bash +./panmanUtils -I --subnet --input-file= --output-file= +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --subnet --input-file=nodes.txt --output-file=ecoli_10_subnet +``` + +#### Annotate +Annotate nodes in a PanMAN with a custom string, later searched by these annotations, using an input TSV file containing a list of nodes and their corresponding custom annotations. + +* Usage syntax +```bash +./panmanUtils -I --annotate --output-file=ecoli_10_annotate +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --annotate --input-file=annotations.tsv --output-file=ecoli_10_annotate +``` +> **NOTE:** If output-file is not provided to panmanUtils, the annotated PanMAN will be written to the same file. + +#### Amino Acid Translation +Extract amino acid translations from a PanMAN in TSV file. + +* Usage syntax +```bash +./panmanUtils -I --aa-translations --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --aa-translations --output_file=sars_20 +``` \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index a28684a..9683211 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -83,7 +83,7 @@ nav: - Home: index.md - Install: install.md - Construction: construction.md - - Utils: utils.md + - Utility: utils.md extra_javascript: - javascripts/mathjax.js From 00a15057f773e661eb266d4f163115ebe2f38ca8 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 10:49:36 -0700 Subject: [PATCH 016/103] updated index page --- docs/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 6b909fd..2ceb3bd 100644 --- a/docs/index.md +++ b/docs/index.md @@ -38,7 +38,7 @@ PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](ht TBA -### Installation + ## Contributions We welcome contributions from the community to enhance the capabilities of PanMAN and panmanUtils. If you encounter any issues or have suggestions for improvement, please open an issue on [PanMAN GitHub page](https://github.com/TurakhiaLab/panman). For general inquiries and support, reach out to our team. From 79ee35d7c262e95d69300ff87c71dca7aa6255e9 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:02:05 -0700 Subject: [PATCH 017/103] updated logo --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 9683211..663c485 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -42,7 +42,7 @@ theme: name: Switch to light mode favicon: images/icon.svg - logo: images/icon.svg + logo: images/logo.svg icon: admonition: From 5a01631d3ae75062ba2bbbd3e72e6ac91916f02c Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:05:46 -0700 Subject: [PATCH 018/103] updated fevicon --- docs/index.md | 2 +- mkdocs.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/index.md b/docs/index.md index 2ceb3bd..93ba1d3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -# Welcome to PanMAN Wiki +
diff --git a/mkdocs.yml b/mkdocs.yml index 663c485..99a1eaf 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,4 +1,4 @@ -site_name: PanMAN Wiki +site_name: PanMAN repo_name: TurakhiaLab/panman repo_url: https://github.com/TurakhiaLab/panman @@ -41,8 +41,8 @@ theme: icon: material/brightness-4 name: Switch to light mode - favicon: images/icon.svg - logo: images/logo.svg + favicon: images/icon.png + logo: images/icon.svg icon: admonition: From 5ebe2b903a44fa17e7558d20745a6ca3942a6af9 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:13:17 -0700 Subject: [PATCH 019/103] updated fevicon --- docs/index.md | 9 ++++----- mkdocs.yml | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/index.md b/docs/index.md index 93ba1d3..a5cbb6c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,9 +1,8 @@ -
-## What are PanMANs? +## What are PanMANs? PanMAN or Pangenome Mutation-Annotated Network is a novel data representation for pangenomes that provides massive leaps in both representative power and storage efficiency. Specifically, PanMANs are composed of mutation-annotated trees, called PanMATs, which, in addition to substitutions, also annotate inferred indels (Fig. 2b), and even structural mutations (Fig. 2a) on the different branches. Multiple PanMATs are connected in the form of a network using edges to generate a PanMAN (Fig. 2c). PanMAN's representative power is compared against existing pangenomic formats in Fig. 1. PanMANs are the most compressible pangenomic format for the different microbial datasets (SARS-CoV-2, RSV, HIV, Mycobacterium. Tuberculosis, E. Coli, and Klebsiella pneumoniae), providing 2.9 to 559-fold compression over standard pangenomic formats.
@@ -18,7 +17,7 @@ PanMAN or Pangenome Mutation-Annotated Network is a novel data representation fo
-### PanMAN's Protocol Buffer file format +## PanMAN's Protocol Buffer file format PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](https://protobuf.dev/)), a binary serialization file format, to compactly store PanMAN's data structure in a file. Fig. 3 provides the .proto file defining the PanMAN’s structure. At the top level, the file format of PanMANs encodes a list (declared as a repeated identifier in the .protof file) of PanMATs. Each PanMAT object stores the following data elements: (a) a unique identifier, (b) a phylogenetic tree stored as a string in Newick format, (c) a list of mutations on each branch ordered according to the pre-order traversal of the tree topology, (d) a block mapping object to record homologous segments identified as duplications and rearrangements, which are mapped against their common consensus sequence; the block-mapping object is also used to derive the pseudo-root, e) a gap list to store the position and length of gaps corresponding to each block's consensus sequence. Each mutation object encodes the node's block and nucleotide mutations that are inferred on the branches leading to that node. If a block mutation exists at a position described by the Block-ID field (int32), the block mutation field (bool) is set to 1, otherwise set to 0, and its type is stored as a substitution to and from a gap in Block mutation type field (bool), encoded as 0 or 1, respectively. In PanMAN, each nucleotide mutation within a block inferred on a branch has four pieces of information, i.e., position (middle coordinate), gap position (last coordinate), mutation type, and mutated characters. To reduce redundancy in the file, consecutive mutations of the same type are packed together and stored as a mutation info (int32) field, where mutation type, mutation length, and mutated characters use 3, 5, and 24 bits, respectively. PanMAN stores each character using one-hot encoding, hence, one "Nucleotide Mutations" object can store up to 6 consecutive mutations of the same type. PanMAN's file also stores the complex mutation object to encode the type of complex mutation and its metadata such as PanMATs' and nodes' identifiers, breakpoint coordinates, etc. The entire file is then compressed using XZ ([https://github.com/tukaani-project/xz](https://github.com/tukaani-project/xz)) to enhance storage efficiency.
@@ -26,7 +25,7 @@ PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](ht Figure 3: PanMAN's file format
-## panmanUtils +## panmanUtils panmanUtils includes multiple algorithms to construct PanMANs and to support various functionalities to modify and extract useful information from PanMANs (Fig. 4).
@@ -34,7 +33,7 @@ PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](ht Figure 4: Overview of panmanUtils' functionalities
-### panmanUtils Video Tutorial +## Video Tutorial TBA diff --git a/mkdocs.yml b/mkdocs.yml index 99a1eaf..f82599f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,11 +6,11 @@ theme: name: material features: - announce.dismiss - - content.action.edit - - content.action.view - - content.code.annotate + # - content.action.edit + # - content.action.view + - content.code.annotation - content.code.copy - - content.tooltips + # - content.tooltips - navigation.footer - navigation.expand # - navigation.tabs.sticky From bfe216edc0098c1a7e1e4c55bc8ee24a67ff7bb4 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:20:52 -0700 Subject: [PATCH 020/103] updated fevicon --- mkdocs.yml | 65 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index f82599f..2470e58 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,27 +6,29 @@ theme: name: material features: - announce.dismiss - # - content.action.edit - # - content.action.view - content.code.annotation - content.code.copy - # - content.tooltips - - navigation.footer - - navigation.expand - # - navigation.tabs.sticky - - navigation.instant.prefetch - - navigation.tracking - - search.highlight - - search.share - - search.suggest + - content.tabs.link - toc.follow - toc.integrate - - navigation.tabs - navigation.sections - navigation.path + - navigation.tabs - navigation.top - - content.tabs.link + - navigation.footer + - search.highlight + - search.suggest + # - content.action.edit + # - content.action.view + # - content.tooltips + # - navigation.tabs.sticky + # - navigation.expand + # - navigation.instant.prefetch + # - navigation.tracking + # - search.share + language: en + palette: - scheme: default primary: white @@ -42,19 +44,24 @@ theme: name: Switch to light mode favicon: images/icon.png + logo: images/icon.svg - icon: - admonition: - note: octicons/tag-16 - info: octicons/info-16 - tip: octicons/squirrel-16 - success: octicons/check-16 - question: octicons/question-16 - warning: octicons/alert-16 - bug: octicons/bug-16 - example: octicons/beaker-16 - quote: octicons/quote-16 +plugins: + - social + - search + + # icon: + # admonition: + # note: octicons/tag-16 + # info: octicons/info-16 + # tip: octicons/squirrel-16 + # success: octicons/check-16 + # question: octicons/question-16 + # warning: octicons/alert-16 + # bug: octicons/bug-16 + # example: octicons/beaker-16 + # quote: octicons/quote-16 extra: social: @@ -74,11 +81,14 @@ markdown_extensions: - pymdownx.superfences - pymdownx.mark - attr_list + - def_list + - pymdownx.tasklist: + custom_checkbox: true - pymdownx.emoji: - emoji_index: !!python/name:materialx.emoji.twemoji + emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:materialx.emoji.to_svg -use_directory_urls: false +# use_directory_urls: false nav: - Home: index.md - Install: install.md @@ -89,3 +99,6 @@ extra_javascript: - javascripts/mathjax.js - https://polyfill.io/v3/polyfill.min.js?features=es6 - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js + +copyright: | + © 2024 Turakhia Lab From d7a71aaf7ed33ad8448f15f913ce69e940716fd1 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:27:46 -0700 Subject: [PATCH 021/103] updated workflows --- .github/workflows/cmake.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 6ea29f2..4c1dba9 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -10,6 +10,7 @@ permissions: jobs: deploy: + name: Deploy Job runs-on: ubuntu-latest steps: - name: Checkout Code @@ -26,9 +27,10 @@ jobs: key: ${{ github.ref }} path: .cache - - name: build docs + - name: Install dependencies and build mkdocs run: | pip install mkdocs-material + pip install "mkdocs-material[imaging]" mkdocs gh-deploy --force - name: install pre-reqs and build From ed18df0e5f20b236fedebfa86f76eab6e972097f Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:31:01 -0700 Subject: [PATCH 022/103] updated workflows --- mkdocs.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 2470e58..dd8e5c9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -95,10 +95,10 @@ nav: - Construction: construction.md - Utility: utils.md -extra_javascript: - - javascripts/mathjax.js - - https://polyfill.io/v3/polyfill.min.js?features=es6 - - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js +# extra_javascript: +# - javascripts/mathjax.js +# - https://polyfill.io/v3/polyfill.min.js?features=es6 +# - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js copyright: | © 2024 Turakhia Lab From 7d44ab386b571e6592479121c5f35f5bee44597b Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:36:56 -0700 Subject: [PATCH 023/103] updated workflows --- docs/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/index.md b/docs/index.md index a5cbb6c..5704417 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,3 +1,6 @@ +--- +title: +---
From 05462fc71ce46e59ff1f6f035e49e205582e6a4e Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:42:42 -0700 Subject: [PATCH 024/103] updated workflows --- docs/index.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/index.md b/docs/index.md index 5704417..a5cbb6c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,3 @@ ---- -title: ----
From 2587028b32b04449e2fd58500d4196c33ba48302 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:44:11 -0700 Subject: [PATCH 025/103] updated workflows --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index a5cbb6c..1eb1ea6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -
+
From 47efcf738e903986363f62709415475029210278 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:44:36 -0700 Subject: [PATCH 026/103] updated workflows --- mkdocs.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index dd8e5c9..0d88ab1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -36,12 +36,12 @@ theme: toggle: icon: material/brightness-7 name: Switch to dark mode - - scheme: slate - primary: white - accent: white - toggle: - icon: material/brightness-4 - name: Switch to light mode + # - scheme: slate + # primary: white + # accent: white + # toggle: + # icon: material/brightness-4 + # name: Switch to light mode favicon: images/icon.png From d48b55ff310f9d98c64c516a4ad3adde8bc199c0 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:47:44 -0700 Subject: [PATCH 027/103] updated workflows --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 1eb1ea6..d54d984 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,5 +1,5 @@
- +
## What are PanMANs? From 2a5d8627a2b56ced676881c1ded7822ea97046eb Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:49:07 -0700 Subject: [PATCH 028/103] updated workflows --- docs/index.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index d54d984..1381a54 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,5 @@ -
+ +
From 4e9d021b5fdbad4cd1fd7b536a889b9a8d7d5f17 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:52:47 -0700 Subject: [PATCH 029/103] updated workflows --- docs/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 1381a54..579d415 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ - +#
- +
## What are PanMANs? From 1a7eaba830ab4b1d993d55158955764d9d2a3870 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:56:16 -0700 Subject: [PATCH 030/103] updated workflows --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 579d415..e7b04c4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -# +# .
From 50dc8bf69a274aa1eaab7608c486565de5ea8e87 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 11:59:16 -0700 Subject: [PATCH 031/103] updated workflows --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index e7b04c4..0438bf9 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -# . +# Welcome to PanMAN Wiki
From 873a35759e20c2e9ddc98c15a4028e693ae9701a Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 14:05:46 -0700 Subject: [PATCH 032/103] updated README --- README.md | 171 +++++++++++++++++++++++++++++++++++++++++++++----- docs/utils.md | 2 +- 2 files changed, 155 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 4addb3e..ba982ab 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,182 @@ -[license-badge]: https://img.shields.io/badge/License-MIT-yellow.svg +[license-badge]: https://img.shields.io/badge/License-MIT-yellow.svg [license-link]: [https://github.com/TurakhiaLab/panman/LICENSE](https://github.com/TurakhiaLab/panman/blob/main/LICENSE) [![License][license-badge]][license-link] -[![DOI](https://img.shields.io/badge/DOI-10.1101/2024.07.02.601807-blue)](https://doi.org/10.1101/2024.07.02.601807) +[![DOI](https://img.shields.io/badge/DOI-https://zenodo.org/records/12630607-blue)](https://zenodo.org/records/12630607) +[](https://hub.docker.com/r/swalia14/panman) +[](https://doi.org/10.1101/2024.07.02.601807) +[](https://cmake.org) + # Pangenome Mutation Annotated Network (PanMAN)
## Table of Contents -- [Overview of PanMANs and panmanUtils](#overview) -- [Installation and Usage](#install) ([Documentation](https://turakhia.ucsd.edu/panman/)) - +- [Introduction](#intro) ([Wiki](https://turakhia.ucsd.edu/panman/)) + - [PanMANs](#panman) + - [panmanUtils](#panmanUtils) +- [Installation](#install) + - [Using Installation Script](#script) + - [Using Docker Image](#image) + - [Using DockerFile](#file) +- [PanMAN Construction](#construct) + - [Using provided dataset](#pangraph) + - [Using custom dataset](#custom) +- [panmanUtils functionalities](#function) - [Contribute](#contributions) - [Citing PanMAN](#cite_panman) -## Overview of PanMAN and panmanUtils
-### What is a PanMAN? +## Introduction +Here we provide an overview of PanMAN, panmanUtils, and its installation methods and usage. For more information please see our [Wiki](https://turakhia.ucsd.edu/panman/). +### What is a PanMAN? PanMAN or Pangenome Mutation-Annotated Network is a novel data representation for pangenomes that provides massive leaps in both representative power and storage efficiency. Specifically, PanMANs are composed of mutation-annotated trees, called PanMATs, which, in addition to substitutions, also annotate inferred indels (Fig. 1b), and even structural mutations (Fig. 1a) on the different branches. Multiple PanMATs are connected in the form of a network using edges to generate a PanMAN (Fig. 1c). PanMAN's representative power is compared against existing pangenomic formats in Fig. 1d. PanMANs are the most compressible pangenomic format for the different microbial datasets (SARS-CoV-2, RSV, HIV, Mycobacterium. Tuberculosis, E. Coli, and Klebsiella pneumoniae), providing 2.9 to 559-fold compression over standard pangenomic formats.
Figure 1: Overview of the PanMAN data structure
-### panmanUtils +### panmanUtils panmanUtils includes multiple algorithms to construct PanMANs and to support various functionalities to modify and extract useful information from PanMANs (Fig. 2). - -
Figure 2: Overview of panmanUtils' functionalities
-## Installation and Usage
-For information on pnamanUtils installation and usage, please see our documentation page available [here](https://turakhia.ucsd.edu/panman/) +## Installation +### Using installation script (requires sudo access) + +**Step 0:** Dependencies +```bash +Git +``` + +**Step 1:** Clone the repository +```bash +git https://github.com/TurakhiaLab/panman.git +cd panman +``` +**Step 2:** Run the installation script +```bash +chmod +x install/installationUbuntu.sh +./install/installationUbuntu.sh +``` +**Step 3:** Run panmanUtils +```bash +cd build +./panmanUtils --help +``` +### Using Docker Image + +To use panmanUtils in a docker container, users can create a docker container from a docker image, by following these steps + +**Step 0:** Dependencies +```bash +Docker +``` +**Step 1:** Pull the PanMAN docker image from DockerHub +```bash +docker pull swalia14/panman:latest +``` +**Step 2:** Build and run the docker container +```bash +docker run -it swalia14/panman:latest +``` +**Step 3:** Run panmanUtils +```bash +# Insider docker container +cd /home/panman/build +./panmanUtils --help +``` + +### Using DockerFile +Docker container with preinstalled panmanUtils can also be built from DockerFile by following these steps + +**Step 0:** Dependencies +```bash +Docker +Git +``` +**Step 1:** Clone the repository +```bash +git https://github.com/TurakhiaLab/panman.git +cd panman +``` +**Step 2:** Build a docker image +```bash +cd docker +docker build -t panman . +``` +**Step 3:** Build and run docker container +```bash +docker run -it panman +``` +**Step 4:** Run panmanUtils +```bash +# Insider docker container +cd /home/panman/build +./panmanUtils --help +``` + +## PanMAN Construction +Once the package is installed, PanMANs can be constructed from PanGraph [or GFA or MSA] and Tree topology (Newick format) using panmanUtils. Here we provide examples for constructing PanMANs from PanGraph (JSON), users can follow the instructions provided in [wiki](https://turakhia.ucsd.edu/panman/) for other methods. +### Building PanMAN from the provided dataset + +**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. + +```bash +cd $PANMAN_HOME/dataset +TODO +``` + +**Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: + +```bash +cd $PANMAN_HOME/build +./panmanUtils -P $PANMAN_HOME/test/sars_20.json -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 +``` +The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. + +### Building PanMAN from the custom dataset +Alternatively, users can provide custom PanGraph (JSON) and tree topology (Newick format) files to build a panman, using the following command + +```bash +cd $PANMAN_HOME/build +./panmanUtils -P $PANMAN_HOME/test/example.json -N $PANMAN_HOME/test/example.nwk -O example +``` +The above command will run panmanUtils program and build `example.panman` in `$PANMAN_HOME/build/panman` directory. + +## panmanUtils functionalities +panmanUtils provide various functionalities such as summary, [Raw sequence, MSA, VCF, GFA] extract, sub-netwrok pruning, and many more. Please refer to [wiki](https://turakhia.ucsd.edu/panman/) for detailed information. Here we provide usage syntax and examples for summary and VCF extract. + +#### Summary extract +The summary feature extracts node and tree level statistics of a PanMAN, that contains a summary of its geometric and parsimony information. + +* Usage Syntax +```bash +./panmanUtils -I --summary --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --summary --output-file=sars_20 +``` + +#### Variant Call Format (VCF) extract +Extract variations of all sequences from any PanMAT in a PanMAN in the form of a VCF file with respect to any reference sequence (ref) in the PanMAT. + +* Usage syntax +```bash +./panmanUtils -I --vcf -reference=ref --output-file= (optional) +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --vcf -reference="Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12" --output-file=sars_20 +``` + ## Contribute
We welcome contributions from the community to enhance the capabilities of PanMAN and panmanUtils. If you encounter any issues or have suggestions for improvement, please open an issue on [PanMAN GitHub page](https://github.com/TurakhiaLab/panman). For general inquiries and support, reach out to our team. @@ -48,3 +184,4 @@ We welcome contributions from the community to enhance the capabilities of PanMA ## Citing PanMAN
If you use the PanMANs or panmanUtils in your research or publications, we kindly request that you cite the following paper: * Sumit Walia, Harsh Motwani, Kyle Smith, Russell Corbett-Detig, Yatish Turakhia, "Compressive Pangenomics Using Mutation-Annotated Networks", bioRxiv 2024.07.02.601807; doi: [10.1101/2024.07.02.601807](https://doi.org/10.1101/2024.07.02.601807) + diff --git a/docs/utils.md b/docs/utils.md index d83a0c6..adb85fc 100644 --- a/docs/utils.md +++ b/docs/utils.md @@ -138,7 +138,7 @@ Extract variations of all sequences from any PanMAT in a PanMAN in the form of a * Example ```bash cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --vcf -reference=NC_000913.3 --output-file=sars_20 +./panmanUtils -I panman/sars_20.panman --vcf -reference="Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12" --output-file=sars_20 ``` #### Graphical fragment assembly (GFA) extract From 81f171a82db8b96443a27034ecd83f041219902a Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 14:12:06 -0700 Subject: [PATCH 033/103] updated README --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 0d88ab1..d9b95db 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,7 +43,7 @@ theme: # icon: material/brightness-4 # name: Switch to light mode - favicon: images/icon.png + favicon: images/icon.svg logo: images/icon.svg From 8e75e183779fcac1b6318a008169929b53daa89a Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 14:15:27 -0700 Subject: [PATCH 034/103] capnp support --- .github/workflows/cmake.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 4c1dba9..89b1e8d 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -35,7 +35,7 @@ jobs: - name: install pre-reqs and build run: | - sudo apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config + sudo apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config capnproto chmod +x install/installationUbuntu.sh sudo ./install/installationUbuntu.sh - name: test From b55be26fbfdddd6814542d1dbf9d9432255b29b9 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 14:19:22 -0700 Subject: [PATCH 035/103] c++-10 --- .github/workflows/cmake.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 89b1e8d..5569f4e 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -32,7 +32,13 @@ jobs: pip install mkdocs-material pip install "mkdocs-material[imaging]" mkdocs gh-deploy --force - + - name: switch to gcc-10 on linux + if: matrix.configurations.os == "ubuntu-latest" + run: | + sudo apt install gcc-10 g++-10 + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 + sudo update-alternatives --set gcc /usr/bin/gcc-10 + - name: install pre-reqs and build run: | sudo apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config capnproto From ef49d109dce672f932249402e9f96ecb40322c7c Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 14:19:58 -0700 Subject: [PATCH 036/103] c++-10 --- .github/workflows/cmake.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 5569f4e..b89c024 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -32,13 +32,13 @@ jobs: pip install mkdocs-material pip install "mkdocs-material[imaging]" mkdocs gh-deploy --force + - name: switch to gcc-10 on linux - if: matrix.configurations.os == "ubuntu-latest" run: | sudo apt install gcc-10 g++-10 sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 sudo update-alternatives --set gcc /usr/bin/gcc-10 - + - name: install pre-reqs and build run: | sudo apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config capnproto From c94902fd8c44920279fb56050ab6ae993bf0aee5 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 7 Oct 2024 14:27:05 -0700 Subject: [PATCH 037/103] fevicon --- docs/images/icon.png | Bin 0 -> 10662 bytes mkdocs.yml | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 docs/images/icon.png diff --git a/docs/images/icon.png b/docs/images/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..dc4aff737efc87ba9b2cfb56e62e51336e04ae7e GIT binary patch literal 10662 zcmbVS2{@E**H_s>vP7k-*mS`&}0^&vWkkKIeDN@0|NN_j64&($sJdhY$x76Vo0z zOy2_dP6htc*ja(k7i#o3z}F663qxI|^6taaOibL@{h+pfWU3p1gkzFY(c8L`0x7vu zeEppL16)G9yhu13oQpBRkD*0HT~S315K9}wPzVz# zkREVNB6#3{FGC#GgTh$i>VpYXBv63qAh4pkq6Q$+bN8W;fk_6+APq$&MKv{Ll`TDg zA~#S00VB%5WJ`=2#)t41YYe9Xa6Y~Sz{5>{f9quo_+l|6+?E(v%#gTHd|YupTOz<| zkdz8kN(llCF#c5ZTnQMw4~7ifcwzudn+lOY%oMhl6I{1sR_+KdM4+*|t}5D9RYO^w z8fb3(uiAd=BvTE2FkbEmitA=LU8&oElt4hR7?@mXTM~73;Eo}{`~0@E32{qcNeKGw z%5SMZ!4>Dbt!-^Yx?s z&Q%~BUW_pUd6NNMjIY-ge{KXS4&X?4dqCe6W2{Q$u zX}Vw+8>&AxNQr=fStw!kDFG%bt}3oHRTTtHH2_P-1|W!#K!l!VkSiHWfWy#UXj@l3 z7ZtoFoCv`q^dMkU0$3Gc34GJQ%BD0ppdV>yK|*^X7*{Y;C7cbFge0gEU0?`5gk_+= z9f%raf-+J^lS#fV`d7aC?kF(Z-!ar%CmCS+4zEKP}q`>hv6AtF$AU^7Eg z_rKUu!Prqpfv@}B4*^)?Dk_HV<(DkshwQx7FQFV8*vC@QlP~CAh)-EJYAO=%g z5rhKk%mMvM08?O)sa_!1)Do;>N()p%5WrwFy+A5(PcyX)qyqO;AoAPe@BjdZFUCfd z1UEGGwX-!10`PCKLxAIP`cQybXpjjJ@SUKmiU9DK>BId^Q2`{sBLLr#i4pn0jG>I{kO1H(3C0X4)2;o7rMY++uSpTL9LHTUqxX zvV~C{G&8;5UgH^FGpcmcYg0zn1p$==+wvNy7-b+rj0z!QY=NpUw88@!?T%GJXu`Ml z6h`I|arz8)5&l4gfXwy)Ji`N?K^bF==m8lD1#E123G8I#f77ysV7s;uC_DoJBe#{! z^uU3e9O-YyOWA}7XVi--gQMRO_-8r8@D$77cPn}*tUrMGzs{LWJby$FOEn=vlmMOE z(X#^b`!CU>0d+$Ahtbmq@Gxp)tKL))mYezZul4qySpfDQ1QB>LZRr64bsNNBn^Cs_ zc3}VeCv|HMFviH)O%8x-JR{~?KI)rqvW!D9_<#ex|HIr>VpQ87(cjuZ{?)txunRM) z#bb;8?aD$BJph!OmBrAn0`Lh`^CmKYZ8*U8KST#SFaAWwsM#&e3{C%d9|mFz{$n5h zQ^z0ZfD^(06CI6#jlVU;pd+_hzSTdzc8Na zo=76&Igj>4;u)$iu;959wwJ`kJ`BitUtAtCKN6E&=Dz7F1HkCtyQ1f7AX{HkU!tt=CV3?du z#hb^^@m=Z1MNb@R$ScNUsllP0Yfkwa@?yM@0%30RKKz}4Hn+pXcUW*aX&oZp*k`ktK33tVw6ZV=b83{Gsj;Q={9PT= za8s(#uCr@rTl44gihn6wGY1AoP6jKA2;W`FTDADJYVZzTl#p9WX@PacSl5Xcc~N^hx-b{6zL!Vn0QwFtW(i)Z z`K+$ovv^5-mvYb}9yd*6egG!MAbqWculmAAl^*}zr%Ve|L)YxO`$4?rw8m!_4a{Nl7(cf+kpYCcy&+4k00@K`x z_kU@bP8ffvm)tnPiZT+j5h)_w-#OXv`a{ZI?M9n?P;HG;bhZmq>QQT6)JT~`xDj*e zYi<;CD!VK%D%>83%j=^8C;zWIfrqlm03Xj$jTxl zd+OuZp)=b4y>=HHIAgMRW}{>){eFqs-V0%Yoxj=QO3B(Q>DWAB*1TwiOPiM{Vk&j0 z7TKZlkzFh7$FF(Jd=Iv-%-0aIm~jDgn{BZ2QnEwP%B$cdmaGb&Zd?No)`RnPH4#~m z-_$nS_m21MbE#$roJY?hi&<(nJ))Gak$_`q8E5$9W88z(d{d48p$;6~nk(>g$ znP^oAhs!|m8B0pA=U63YUdJIdJ8_$G2ito}2c9)IFMK=x3v@X?x;^#w3-6irUBjcJ zxj}>UBAYtym#;px_2m?eli70~NJ0d>L44wJFW#aC2$!)>DU<}rt`KGS2OA4jVH1g0 zhR@9=I%>H`z-3RsqhF+sH^DnfY7W&`860(27&u9-NRd!4r3srKD_3PuZ>2 zwa{DkNxfLvwDZw}t@7G+yQ-LL&-i{Y0I^FuH0qi+vs{l!kVzCh!#b_E{<5v7LL{2` z{+gC(8Jb#MBPCV`4RA%#q}IB3zx3`wovNxpHyg(bf(_p0jx#v0|p4PEmz zk(KOO4}bP;+5{AFEhYBGsrPs{%^YSE4%C^mQx~SLRzJlWduX!K-&$RpoHR_+&FC5z zv^;X~YHI3rv1e0wS0^WV z#S_Q-SN$L7UrMqwFFUwzvC-1!RyCYrsCRZX5b*MgMV*>%T`4||-{?WU&LHKr(%};! zDKt{CFkWX70Y9o~_td&eQN%f^@1otC{(~_YWmi!!QOC#=B*kPd%n!mVK4ZZuo1Lzn zu-Nl}|06T!oC9We7FHG?ri<5%NQ88j565?5fTS`+s=k+r?&MSXAnm8s= z(*{^o>{(>?>*Ua1bgQO~*&P85Uy<25XH%pdO=ep@=k-oP z3VL3<<31`l#6gYIo>y7U!6o@j4yYu>7T!_ggRqEcY99P9sQBg-Uqr6>t`+R+)EnJm z6}giVMeCX658Bi#!LF*V>D)UBS$;S5uQ#zM`RGtq2O5vH(31`ECqKwbhku$FkrPTe z%MFV+aD8E>GyhZTdy{OeO;lCxu>h0hD{^D*Ws>0>51NV$+2ccg9^93r-Xq?e&h&UK zHt|%i0^UUJ)hP{3s~h&DY4R)1H!6#a<%sXf!4#cLX+2LBHWh!5$LqJP`PN zc|d*W^Kc0yH7}j6w99UXt{lE>oL&NfzVl|U1&3WQ7(zFv4u;1Jjk)h)Eo)K@lDo~4 z%5yWyfURLq>uBx%Qa;pO$Ecab)VUa5a;&pQ6K|_5uo&}`396(2tyKKM+an&|t*zN) z(Z`{Uf_%+ZKMd)gbfi+!Ev{QESX*<;<}{~_-pH+^bIISHL)|@}YiEoLRfAH1CsC)r z>Q}VvZN5JHH9YU_Sya@5#oJN3(F+vVQGd*vYa#&BF+muuj0%;DT|#xtTIAMMRjCR* z6Ab5ZG+O1mo1~hm+?+C1R>}-4FiKZoZt+j6sVI{VKrBi%0rGwY=gRD53r8addpqeO z_vV!HLzy8Jr#%v!@4(C!3z}1e{0jT`W?a;PiNx+om7&TkGGDWBieTQ8r5L+Y;H#w2 z+gP*fIc0tJc2Y%L4QX@^MRfL|YO|1=1``tstf;%mc$4F}G7q`f9Rklh47%PONV_#M zs-~x~6IJ-7pwv6~p@a_fT|vtuj~;ZJ%Tp0l;kSa)xv7pX5FEE7eKg+lsSprZIx0!C zObXIk3c;B=(leS;M*u2}uZ0`+JFYu4 zAjdU>Gra^SIoge@Bb@;p4lk^Wi;JntUk*C9gs>D?M89p1&4Inq9Vt}jFNzmA9d>eM zuh{&;!4tthZeVL`o<#=vf6hkS7MvO{$V2{g9>NkF={K4NFYXn+qb`3K!?yQLSLLN1 zUqeq_^7AUq(OS>lJqCMDZ)ENgXg~)%TSnN{dCQ5M917VDZXlK#zA&8{Ydj{K;_Jp5 z*|HPtNFUh$F@xm%$wO{_XgTz~Q0dqmyU+VZ$HuS^HkO@{=bXCU&oAsN6SzI(vMhh^ z_NQRKuW{KAm_n6XB`L*@G41xD(QHu;sDnyaH8t`pQ&p#R{!W+k6j9jQw>`Hz)=wM+ zn&uonNfU_5uHHO*Jn?MkL4`r%;;*_Zy$4qOoS&zW56qa&=)~;zeYDz8EwR# zlA+W|PxLKv@4m~3u|UC7mci73_LmQe)2+y{F1B++XetPWBvZw)4ndUAB! zT~eAtm>0UaqsOM`c-g%?l`Bo5m3KzeqG{4VFBFk)^mV!9Ma4)j3~fXfWwOfYcgTyf z)$iX)=OL@uTZrw*P(D<}=hX2-igL5n?KAbm6-_NIo&LHF=fO95og)$C`?@7Pu0`|O zu1U}F;i4x(A{v*zz56=0FJGpgC*Y$(z)LGgY8{XB?KMRg>~Y5tuJ(?#g4+Gy8adk1 z-pSyd;&9W6iu?k%3H3K%_md^;??2K2sZ?L$Z=u!k)S5st>12%Q#6(mp3ua`Y#g znAXuX7vd?2oio~6MbY?`+-HI!Fc4ji7FbjrLj=;5`_*vR6c+j_jGDV40x}^L$mMX_N*#cLcY~ zYDmyZR-g;wW03pn%d-;0WgGd)TSH7)EYdF5OQFiAE|*ma|cD`LjSH&iwSo zuKDwvaJofJw%3aInny+$IFu`N^=GuNMnFJ=DzRS^mLTYmlp@(>S{}ae5Eqqmr?UH| zvcSp4KnyCo^$+Q6n$;YT?6qYm3KD0Pj|IynNAlQ~0`hQT8{dB7^CoG=tTsxJT<3F2?n< zK${i4$j_}ls`5J>TXeL1KX4xZVn;py{>Lxa9-f_O#B$c%_VCnK%RCfZ)A}S|GS9F1 zW@q4VWl;3PQIK>II&dn6bN`;D!Sq4dyq#_s*Pewv8os%EE@G9vot^UhuN{7W_@r~a z`XvZtFnexEtdFfD7u>UsO!Iv=wELUs4==fEX1B~-^p?`UT+M39iYp4H)SvO3&hto| z!D|n^xRu~@jjOIA^P-9Dpy6d}y9?p;wZ;$ifg3Flt#UX4&3{_@$xzRaWFgl{w#9?_ z88YjuEH`4}IwFnJ>qBarX!mumQH&izFSPhAK1q@ z*ystbWzXnb3PD$2L240BUF#{yI{*1)i$cAZ*CC5>BKO)XaF}Qb4bHK?{3)xwVxsRV(Sy3z|A2mnhE_BFr znU&`cnd!nl^z31d)*f5^$W3gxYtVwU&SvlV^^;qzu`n!RR_KCOt;dDP?;j47%3Nq! zS9G`+_v1`Z_~h$D8T$q^Am;bA&#Ak9OwT2Z@SZYG%#r<#MNWV_|?NaU5FioX0`!lw79S>z zw`I%!niw^_e(G^-nkUTB^9(AX4m-B5hj)?kNqv5;cZY8Y-OWQxdfxG6td64WJ@1#;>#5f}e5Fl^ zG|Qw{)SdraI$J{FX55L(%3++dKZmRyvO>AQE?qjq!^5+mP#3-|R(IM8YltMB1E-tx z7hd{e_jP=K$C#-B>!B42a^3Kp$@J@vHM_5=J{Cz{ZzMi2A7_F)@h8C|zVy5^@SS#u zI+_<2!}Z8}irCdXb78Xb+cmI$!exhejNYnJ$Q`FzFTBu&XIF*efOm-QwwfaIF?(*F z6n^f;8Wz&m-*46&>@qqTefLY&udY+WO_8zQ>#b65c8HymDdO`>GGgOH2VB9(;v@P} z@V#Qq0f(zT^PUF<&r45apE>g>+de&hYQS;n0`~2mofpnR;?kbx#Dz*Ho<7}?aPVQI zTrw%ISmopEcZTbJGQjJ;*(@bitJ|m#hmtxbu-!M?N>mT=2%fb)MypB8Em2RgYVOOX z*NH(1mmvngGv_V8q-K(&e&RV~F}2gEDkww!`&;@Bj=4wB&POf}-%j{AeB@2g@LXX3 zxwvvX+`0XezwNR;yW>%U)7@ljrgNlTW1efLe`1)p|3eFukb`W;l)#T!J8Ig z8}qB`w07In_{%#>ZoD0SeC+PRG#TbN9%kiqDFhRn;4Im>y(763h>iOrh%_fs2CkLeAmFirC1}{=7eRBo=1C zdn=TiQXvp1r?heDxCGeQeW7o3taqF)sUsC6Bh=M9*XvO2;!eRSA^T3L=BDTF;#7E% zE_=f*B~t4dzn+#vkPN!M_j}*)c;dJb(hO=k_m00C+y2n@k)s)NzE9DYSY4~T@+r_K zJF1>`$^h%xI5=;qQKXvB@k%aSlxrx0>9Os{hs>abFQ0sQ<@Clm0S>#-{zq~LieS4X zl5n@n>X~Iwg+{>lxGap0ZryOdzqJA#w zb^ahPs)dbbOKA5U=`eY;2>d)wBL(*fQ@`OC=aC|^A(`|>zBj=C#9KAR$xo5}iOH65 z`E@DwnwkNQf<6d!fpu`f4q{tL6eb5B&I-KmZL8U$o*dWp)HCcsBEt5RdLsPJeTt~b zO^r18QMqcz?B$P(9@LuIhtJQ=u;zc=9gRR&C7wjaTi@?W_uMhhDqbLBcj<;xQVP|l zJ)=B-e-SL8!tB_O(m3Fytv7YUw99o>)B7H`Ny;vC%}h)z*PIjadgRQW_f=*7Qq4Nh zB)KvF^{@uA4^z;oANv9i$O7nJk9Kh;h*U!Blg1;yj&}&he>MsV`}Cn~%zZTcq;0&u zg~4Hv*JxU~V5L}t_1Al^-@cc7dL-tQ_W`Mx{n#{1&MB6X{Cu0AeJ>JZ9;E@lXYlOd zntjpmJXb7}#qY6;#1f9;z@KZA$pV;s%Hw*65YPq<)BTTj$|A9b?+7I z?BXH%K-X1XJ>`%~qRA%KZp{#Ae;Vx743_s;v6gOsF$|pwi>hO4Cnf+zcW z_gEf{AJisVQIJ=o(E^m2#yI~U**l6D@re2(`|yuHWHXa_PiG+0Q!L!Wn8sl+=S z;EoV|T)QsCLT%2Vw%$S*4-{KjCdTh(mG!5 Date: Mon, 7 Oct 2024 14:30:27 -0700 Subject: [PATCH 038/103] fevicon --- mkdocs.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 159fab1..cb762c0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -44,7 +44,6 @@ theme: # name: Switch to light mode favicon: images/icon.png - logo: images/icon.png plugins: From dfe1c63b3c30d7989c01f05cbff7cfe62700a1b2 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 10 Oct 2024 11:48:19 -0700 Subject: [PATCH 039/103] low mem mode added --- src/panman.cpp | 175 ++++++++++++++++++++++++++++++++++++++++++++ src/panmanUtils.cpp | 15 ++-- 2 files changed, 183 insertions(+), 7 deletions(-) diff --git a/src/panman.cpp b/src/panman.cpp index 7852476..88efbe2 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -435,6 +435,57 @@ bool panmanUtils::Tree::hasPolytomy(Node* node) { return false; } +size_t readFastaInBatch(std::ifstream& fin, std::map< std::string, std::string >& sequenceIdsToSequences, size_t &startIndex, size_t batchSize) { + std::string line; + std::string currentSequence, currentSequenceId; + size_t lineLength = 0; + size_t nextStartIndex = startIndex; + + std::cout << "starting reading for " << nextStartIndex << std::endl; + while(getline(fin,line,'\n')) { + if(line.length() == 0) { + continue; + } + if(line[0] == '>') { + if(currentSequence.length()) { + if(lineLength == 0) { + lineLength = currentSequence.length(); + } else if(lineLength != currentSequence.length()) { + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; + exit(-1); + } + size_t lengthStr = startIndex+batchSize>currentSequence.size() ? currentSequence.size()-startIndex: batchSize; + sequenceIdsToSequences[currentSequenceId] = currentSequence.substr(startIndex, lengthStr); + } + std::vector< std::string > splitLine; + panmanUtils::stringSplit(line,' ',splitLine); + currentSequenceId = splitLine[0].substr(1); + currentSequence = ""; + } else { + currentSequence += line; + } + } + if(currentSequence.length()) { + if(lineLength != 0 && lineLength != currentSequence.length()) { + std::cerr << "Error: sequence lengths don't match!" << std::endl; + exit(-1); + } else { + lineLength = currentSequence.length(); + } + size_t lengthStr = startIndex+batchSize>currentSequence.size() ? currentSequence.size()-startIndex: batchSize; + sequenceIdsToSequences[currentSequenceId] = currentSequence.substr(startIndex, lengthStr); + nextStartIndex += lengthStr; + } + + std::cout << "Done reading till " << nextStartIndex - 1 << std::endl; + + // reset file reader (very important) + fin.clear(); + fin.seekg(0); + + return nextStartIndex; +} + panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE ftype, std::string reference) { if(ftype == panmanUtils::FILE_TYPE::GFA) { @@ -1105,6 +1156,130 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE }); + } else if(ftype == panmanUtils::FILE_TYPE::MSA_OPTIMIZE) { + std::string newickString; + secondFin >> newickString; + root = createTreeFromNewickString(newickString); + + std::string line; + size_t lineLength = 0; + + // Find length of MSA + std::string currentSequence; + while(getline(fin,line,'\n')) { + if(line.length() == 0) { + continue; + } + if(line[0] == '>') { + if(currentSequence.length()) { + if(lineLength == 0) { + lineLength = currentSequence.length(); + break; + } + } + currentSequence = ""; + } else { + currentSequence += line; + } + } + std::cout << "line length: " << lineLength << std::endl; + + // reset file reader (very important) + fin.clear(); + fin.seekg(0); + + // set batch size + size_t memory = 128;//GB + size_t batchSize = 10000; + std::cout << "Batch size set to: " << batchSize << std::endl; + + std::string consensusSeq; + size_t startIndex = 0; + + tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int,int,int,int,int > > > nonGapMutations; + std::unordered_map< std::string, std::mutex > nodeMutexes; + + while (true) { + std::map< std::string, std::string > sequenceIdsToSequences; + size_t nextStartIndex = readFastaInBatch(fin, sequenceIdsToSequences, startIndex, batchSize); + std::set< size_t > emptyPositions; + + for(size_t i = 0; i < nextStartIndex-startIndex; i++) { + bool nonGapFound = false; + for(auto u: sequenceIdsToSequences) { + if(u.second[i] != '-') { + consensusSeq += u.second[i]; + nonGapFound = true; + break; + } + } + if(!nonGapFound) { + std::cout << "ideally should not happen\n" << std::endl; + emptyPositions.insert(i); + } + } + for(auto& u: sequenceIdsToSequences) { + std::string sequenceString; + for(size_t i = 0; i < u.second.length(); i++) { + if(emptyPositions.find(i) == emptyPositions.end()) { + sequenceString += u.second[i]; + } + } + u.second = sequenceString; + } + + for(auto u: allNodes) { + nodeMutexes[u.first]; + } + + tbb::parallel_for((size_t)0, nextStartIndex-startIndex, [&](size_t i) { + std::unordered_map< std::string, int > states; + std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; + for(const auto& u: sequenceIdsToSequences) { + if(u.second[i] != '-') { + states[u.first] = (1 << getCodeFromNucleotide(u.second[i])); + } else { + states[u.first] = 1; + } + } + nucFitchForwardPass(root, states); + nucFitchBackwardPass(root, states, (1 << getCodeFromNucleotide(consensusSeq[startIndex+i]))); + nucFitchAssignMutations(root, states, mutations, (1 << getCodeFromNucleotide(consensusSeq[startIndex+i]))); + for(auto mutation: mutations) { + nodeMutexes[mutation.first].lock(); + nonGapMutations[mutation.first].push_back(std::make_tuple(0, -1, startIndex+i, -1, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); + if (startIndex+i == 29919) std::cout << "OOps" << std::endl; + nodeMutexes[mutation.first].unlock(); + } + }); + std::cout << "Processed characters from " << startIndex << " to " << nextStartIndex - 1 << std::endl; + startIndex = nextStartIndex; + if (startIndex>=lineLength) break; + } + std::cout << "consensus seq len" << consensusSeq.size() << std::endl; + blocks.emplace_back(0, consensusSeq); + root->blockMutation.emplace_back(0, std::make_pair(BlockMutationType::BI, false)); + + tbb::parallel_for_each(nonGapMutations, [&](auto& u) { + nodeMutexes[u.first].lock(); + std::sort(u.second.begin(), u.second.end()); + nodeMutexes[u.first].unlock(); + size_t currentStart = 0; + for(size_t i = 1; i < u.second.size(); i++) { + if(i - currentStart == 6 || std::get<0>(u.second[i]) != std::get<0>(u.second[i-1]) || std::get<2>(u.second[i]) != std::get<2>(u.second[i-1])+1 || std::get<4>(u.second[i]) != std::get<4>(u.second[i-1])) { + nodeMutexes[u.first].lock(); + allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, i); + nodeMutexes[u.first].unlock(); + currentStart = i; + continue; + } + } + nodeMutexes[u.first].lock(); + allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, u.second.size()); + nodeMutexes[u.first].unlock(); + }); + + } } diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 821b838..22ccf00 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -151,6 +151,7 @@ void setupOptionDescriptions() { ("acr,q", "ACR method [fitch(default), mppa]") //("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") + ("low-mem-mode", "Perform Fitch Algrorithm in batch to save memory consumption") ("reference,n", po::value< std::string >(), "Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required)") ("start,s", po::value< std::string >(), "Start coordinate of protein translation") ("end,e", po::value< std::string >(), "End coordinate of protein translation") @@ -559,7 +560,7 @@ void parseAndExecute(int argc, char* argv[]) { } bool optimize = false; - if(globalVm.count("optimize")) { + if(globalVm.count("low-mem-mode")) { optimize = true; } @@ -572,13 +573,13 @@ void parseAndExecute(int argc, char* argv[]) { auto treeBuiltStart = std::chrono::high_resolution_clock::now(); - // if(!optimize) { - T = new panmanUtils::Tree(inputStream, newickInputStream, + if(!optimize) { + T = new panmanUtils::Tree(inputStream, newickInputStream, panmanUtils::FILE_TYPE::MSA); - // } else { - // T = new panmanUtils::Tree(inputStream, newickInputStream, - // panmanUtils::FILE_TYPE::MSA_OPTIMIZE); - // } + } else { + T = new panmanUtils::Tree(inputStream, newickInputStream, + panmanUtils::FILE_TYPE::MSA_OPTIMIZE); + } // checkFunction(T); From 107c98955493bf810362c2920a5ce4d90ab86025 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Tue, 15 Oct 2024 12:17:50 -0700 Subject: [PATCH 040/103] added indexing feature --- src/common.hpp | 10 +- src/fasta.cpp | 710 +++++++++++++++++++++++++++++++++++++++++--- src/panman.cpp | 220 ++++++++++++-- src/panman.hpp | 15 +- src/panmanUtils.cpp | 100 ++++++- src/panmanUtils.hpp | 4 + 6 files changed, 990 insertions(+), 69 deletions(-) diff --git a/src/common.hpp b/src/common.hpp index 165f0aa..022259d 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -15,8 +15,14 @@ static const int SANKOFF_INF = 100000001; -typedef std::vector< std::pair< std::vector< std::pair< char, std::vector< char > > >, - std::vector< std::vector< std::pair< char, std::vector< char > > > > > > sequence_t; +typedef std::vector< // block id + std::pair< + std::vector< std::pair< char, std::vector< char > > >, // vector - nuc id, char - char at nuc id, vector - nuc at gap id + std::vector< + std::vector< std::pair< char, std::vector< char > > > + > + > + > sequence_t; // Individual block typedef std::vector< std::pair< char, std::vector< char > > > block_t; diff --git a/src/fasta.cpp b/src/fasta.cpp index 9a1cbb0..a0f76bd 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -152,22 +152,545 @@ void panmanUtils::printSequenceLines(const sequence_t& sequence,\ } +void panmanUtils::printSubsequenceLines(const sequence_t& sequence,\ + const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, int start, int end, + bool aligned, std::ostream& fout, int offset, bool debug) { + + // String that stores the sequence to be printed + std::string line; + + for(size_t i = 0; i < blockExists.size(); i++) { + // Iterate through gap blocks - NOT BEING USED CURRENTLY + for(size_t j = 0; j < blockExists[i].second.size(); j++) { + // If block exists. Otherwise add gaps if MSA is to be printed + if(blockExists[i].second[j]) { + // If forward strand, iterare in forward direction + if(blockStrand[i].second[j]) { + // Main nucs + for(size_t k = 0; k < sequence[i].second[j].size(); k++) { + // Gap nucs + for(size_t w = 0; w < sequence[i].second[j][k].second.size(); w++) { + if(sequence[i].second[j][k].second[w] != '-') { + line += sequence[i].second[j][k].second[w]; + } else if(aligned) { + line += '-'; + } + } + // Main Nuc + if(sequence[i].second[j][k].first != '-' && sequence[i].second[j][k].first != 'x') { + line += sequence[i].second[j][k].first; + } else if(aligned) { + line += '-'; + } + } + } else { + // If reverse strand, iterate backwards + for(size_t k = sequence[i].second[j].size()-1; k+1 > 0; k--) { + // Main nuc + if(sequence[i].second[j][k].first != '-' && sequence[i].second[j][k].first != 'x') { + line += getComplementCharacter(sequence[i].second[j][k].first); + } else if(aligned) { + line += '-'; + } + // Gap nucs + for(size_t w = sequence[i].second[j][k].second.size()-1; w+1 > 0; w--) { + if(sequence[i].second[j][k].second[w] != '-') { + line += getComplementCharacter(sequence[i].second[j][k].second[w]); + } else if(aligned) { + line += '-'; + } + } + + } + } + } else if(aligned) { + for(size_t k = 0; k < sequence[i].second[j].size(); k++) { + for(size_t w = 0; w < sequence[i].second[j][k].second.size(); w++) { + line += '-'; + } + line += '-'; + } + } + } + + // Non-gap block - the only type being used currently + if(blockExists[i].first) { + // If forward strand + if(blockStrand[i].first) { + // Iterate through main nucs + for(size_t j = 0; j < sequence[i].first.size(); j++) { + // Gap nucs + for(size_t k = 0; k < sequence[i].first[j].second.size(); k++) { + if(sequence[i].first[j].second[k] != '-') { + line += sequence[i].first[j].second[k]; + } else if(aligned) { + line += '-'; + } + } + // Main nuc + if(sequence[i].first[j].first != '-' && sequence[i].first[j].first != 'x') { + line += sequence[i].first[j].first; + } else if(aligned) { + line += '-'; + } + } + } else { + // If reverse strand, iterate backwards + for(size_t j = sequence[i].first.size()-1; j+1 > 0; j--) { + // Main nuc first since we are iterating in reverse direction + if(sequence[i].first[j].first != '-' && sequence[i].first[j].first != 'x') { + line += getComplementCharacter(sequence[i].first[j].first); + } else if(aligned) { + line += '-'; + } + + // Gap nucs + for(size_t k = sequence[i].first[j].second.size()-1; k+1 > 0; k--) { + if(sequence[i].first[j].second[k] != '-') { + line += getComplementCharacter(sequence[i].first[j].second[k]); + } else if(aligned) { + line += '-'; + } + } + } + } + } else if(aligned) { + // If aligned sequence is required, print gaps instead if block does not exist + for(size_t j = 0; j < sequence[i].first.size(); j++) { + for(size_t k = 0; k < sequence[i].first[j].second.size(); k++) { + line+='-'; + } + line+='-'; + } + } + + } + + size_t ctr = 0; + + if(offset != 0) { + for(size_t i = 0; i < line.length(); i++) { + if(line[i] != '-') { + if(ctr == (size_t)offset) { + // mark starting point + ctr = i; + break; + } + ctr++; + } + } + } + + // std::cout << line << std::endl; + std::string currentLine = ""; + bool reachedEnd = false; + int newStart = (line.size()-1-ctr >= start)? ctr+start: start-line.size()-1-ctr; + int newEnd = (line.size()-1-ctr >= end)? ctr+end: end-line.size()-1-ctr; + + // std::cout << newStart << " " << newEnd << " " << ctr << " " << start << " " << end << std::endl; + if (newStart > newEnd) { + currentLine += line.substr(newStart, line.size()-newStart); + currentLine += line.substr(0, newEnd+1); + } else { + currentLine += line.substr(newStart, newEnd-newStart+1); + } + fout << currentLine << std::endl; +} + // Depth first traversal FASTA writer void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& sequence, - blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned) { + blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, std::tuple< int, int, int, int > panMATStart, std::tuple< int, int, int, int > panMATEnd) { + + // Apply mutations + + // For reversing block mutations - primary block id, secondary block id, old mutation, old strand, new mutation, new strand + std::vector< std::tuple< int32_t, int32_t, bool, bool, bool, bool > > blockMutationInfo; + + // Block Mutations + for(auto mutation: root->blockMutation) { + int32_t primaryBlockId = mutation.primaryBlockId; + int32_t secondaryBlockId = mutation.secondaryBlockId; + bool type = mutation.blockMutInfo; + bool inversion = mutation.inversion; + + if (secondaryBlockId != -1) { + std::cout << "Error: Block Secondary ID is not -1" << std::endl; + exit(0); + } + + // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { + if(type == 1) { + // insertion + + bool oldStrand; + bool oldMut; + if(secondaryBlockId != -1) { + oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; + oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; + blockExists[primaryBlockId].second[secondaryBlockId] = true; + + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId].second[secondaryBlockId] = !inversion; + } else { + oldStrand = blockStrand[primaryBlockId].first; + oldMut = blockExists[primaryBlockId].first; + blockExists[primaryBlockId].first = true; + + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId].first = !inversion; + } + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, true, !inversion) ); + + + } else { + bool oldMut; + bool oldStrand; + if(inversion) { + // This means that this is not a deletion, but instead an inversion + if(secondaryBlockId != -1) { + oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; + oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; + blockStrand[primaryBlockId].second[secondaryBlockId] = !oldStrand; + } else { + oldStrand = blockStrand[primaryBlockId].first; + oldMut = blockExists[primaryBlockId].first; + blockStrand[primaryBlockId].first = !oldStrand; + } + if(oldMut != true) { + std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + } + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); + } else { + // Actually a deletion + + if(secondaryBlockId != -1) { + oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; + oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; + blockExists[primaryBlockId].second[secondaryBlockId] = false; + + // resetting strand to true during deletion + blockStrand[primaryBlockId].second[secondaryBlockId] = true; + } else { + oldStrand = blockStrand[primaryBlockId].first; + oldMut = blockExists[primaryBlockId].first; + blockExists[primaryBlockId].first = false; + + // resetting strand to true during deletion + blockStrand[primaryBlockId].first = true; + } + } + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, false, true) ); + + } + + // } + + + } + + // For backtracking. primaryBlockId, secondaryBlockId, pos, gapPos, (oldVal, newVal) in substitution, ('-', newVal) in insertion, (oldVal, '-') in deletion + std::vector< std::tuple< int32_t, int32_t, int, int, char, char > > mutationInfo; + + // Nuc mutations + for(size_t i = 0; i < root->nucMutation.size(); i++) { + int32_t primaryBlockId = root->nucMutation[i].primaryBlockId; + int32_t secondaryBlockId = root->nucMutation[i].secondaryBlockId; + + // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { + int32_t nucPosition = root->nucMutation[i].nucPosition; + int32_t nucGapPosition = root->nucMutation[i].nucGapPosition; + uint32_t type = (root->nucMutation[i].mutInfo & 0x7); + char newVal = '-'; + + if(type < 3) { + // Either S, I or D + int len = ((root->nucMutation[i].mutInfo) >> 4); + + if(primaryBlockId >= sequence.size()) { + std::cout << primaryBlockId << " " << sequence.size() << std::endl; + } + + if(type == panmanUtils::NucMutationType::NS) { + // Substitution + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); + } + + } + } else { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].first[nucPosition+j].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); + } + } + } + } else if(type == panmanUtils::NucMutationType::NI) { + // Insertion + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition + j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); + } + + } + } else { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].first[nucPosition+j].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); + } + } + } + } else if(type == panmanUtils::NucMutationType::ND) { + // Deletion + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j]; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-')); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-')); + } + + } + } else { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-')); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; + sequence[primaryBlockId].first[nucPosition+j].first = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-')); + } + } + } + } + } else { + if(type == panmanUtils::NucMutationType::NSNPS) { + // SNP Substitution + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + } else { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + } + } else { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + } else { + char oldVal = sequence[primaryBlockId].first[nucPosition].first; + sequence[primaryBlockId].first[nucPosition].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + } + } + } else if(type == panmanUtils::NucMutationType::NSNPI) { + // SNP Insertion + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + } else { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + } + } else { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + } else { + char oldVal = sequence[primaryBlockId].first[nucPosition].first; + sequence[primaryBlockId].first[nucPosition].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + } + } + } else if(type == panmanUtils::NucMutationType::NSNPD) { + // SNP Deletion + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); + } else { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); + } + } else { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); + } else { + char oldVal = sequence[primaryBlockId].first[nucPosition].first; + sequence[primaryBlockId].first[nucPosition].first = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); + } + } + } + } + } + // } - // Apply mutations + if(root->children.size() == 0 || rootSeq) { + // Print sequence + + fout << '>' << root->identifier << std::endl; + + int offset = 0; + if(!aligned && circularSequences.find(root->identifier) != circularSequences.end()) { + // If MSA is to be printed, offset doesn't matter + offset = circularSequences[root->identifier]; + } + sequence_t sequencePrint = sequence; + blockExists_t blockExistsPrint = blockExists; + blockStrand_t blockStrandPrint = blockStrand; + + if(rotationIndexes.find(root->identifier) != rotationIndexes.end() && rotationIndexes[root->identifier] != 0) { + int ctr = -1, rotInd = 0; + for(size_t i = 0; i < blockExistsPrint.size(); i++) { + if(blockExistsPrint[i].first) { + ctr++; + } + if(ctr == rotationIndexes[root->identifier]) { + rotInd = i; + break; + } + } + rotate(sequencePrint.begin(), sequencePrint.begin() + rotInd, sequencePrint.end()); + rotate(blockExistsPrint.begin(), blockExistsPrint.begin() + rotInd, blockExistsPrint.end()); + rotate(blockStrandPrint.begin(), blockStrandPrint.begin() + rotInd, blockStrandPrint.end()); + } + + if(sequenceInverted.find(root->identifier) != sequenceInverted.end() && sequenceInverted[root->identifier]) { + reverse(sequencePrint.begin(), sequencePrint.end()); + reverse(blockExistsPrint.begin(), blockExistsPrint.end()); + reverse(blockStrandPrint.begin(), blockStrandPrint.end()); + } + + panmanUtils::printSequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); + } else { + + // DFS on children + for(panmanUtils::Node* child: root->children) { + printFASTAHelper(child, sequence, blockExists, blockStrand, fout, aligned, rootSeq); + } + } + + + // Undo block mutations when current node and its subtree have been processed + for(auto it = blockMutationInfo.rbegin(); it != blockMutationInfo.rend(); it++) { + auto mutation = *it; + if(std::get<1>(mutation) != -1) { + blockExists[std::get<0>(mutation)].second[std::get<1>(mutation)] = std::get<2>(mutation); + blockStrand[std::get<0>(mutation)].second[std::get<1>(mutation)] = std::get<3>(mutation); + } else { + blockExists[std::get<0>(mutation)].first = std::get<2>(mutation); + blockStrand[std::get<0>(mutation)].first = std::get<3>(mutation); + } + } + + // Undo nuc mutations when current node and its subtree have been processed + for(auto it = mutationInfo.rbegin(); it != mutationInfo.rend(); it++) { + auto mutation = *it; + if(std::get<1>(mutation) != -1) { + if(std::get<3>(mutation) != -1) { + sequence[std::get<0>(mutation)].second[std::get<1>(mutation)][std::get<2>(mutation)].second[std::get<3>(mutation)] = std::get<4>(mutation); + } else { + sequence[std::get<0>(mutation)].second[std::get<1>(mutation)][std::get<2>(mutation)].first = std::get<4>(mutation); + } + } else { + if(std::get<3>(mutation) != -1) { + sequence[std::get<0>(mutation)].first[std::get<2>(mutation)].second[std::get<3>(mutation)] = std::get<4>(mutation); + } else { + sequence[std::get<0>(mutation)].first[std::get<2>(mutation)].first = std::get<4>(mutation); + } + } + } +} + +void panmanUtils::Tree::printSingleNodeHelper(std::vector &nodeList, int nodeListIndex, sequence_t& sequence, + blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, int panMATStart, int panMATEnd) { + + panmanUtils::Node* node = nodeList[nodeListIndex--]; + // Apply mutations // For reversing block mutations - primary block id, secondary block id, old mutation, old strand, new mutation, new strand std::vector< std::tuple< int32_t, int32_t, bool, bool, bool, bool > > blockMutationInfo; // Block Mutations - for(auto mutation: root->blockMutation) { + for(auto mutation: node->blockMutation) { int32_t primaryBlockId = mutation.primaryBlockId; int32_t secondaryBlockId = mutation.secondaryBlockId; bool type = mutation.blockMutInfo; bool inversion = mutation.inversion; + if (secondaryBlockId != -1) { + std::cout << "Error: Block Secondary ID is not -1" << std::endl; + exit(0); + } + + // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { if(type == 1) { // insertion @@ -232,24 +755,28 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se } + // } + + } // For backtracking. primaryBlockId, secondaryBlockId, pos, gapPos, (oldVal, newVal) in substitution, ('-', newVal) in insertion, (oldVal, '-') in deletion std::vector< std::tuple< int32_t, int32_t, int, int, char, char > > mutationInfo; // Nuc mutations - for(size_t i = 0; i < root->nucMutation.size(); i++) { - int32_t primaryBlockId = root->nucMutation[i].primaryBlockId; - int32_t secondaryBlockId = root->nucMutation[i].secondaryBlockId; + for(size_t i = 0; i < node->nucMutation.size(); i++) { + int32_t primaryBlockId = node->nucMutation[i].primaryBlockId; + int32_t secondaryBlockId = node->nucMutation[i].secondaryBlockId; - int32_t nucPosition = root->nucMutation[i].nucPosition; - int32_t nucGapPosition = root->nucMutation[i].nucGapPosition; - uint32_t type = (root->nucMutation[i].mutInfo & 0x7); + // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { + int32_t nucPosition = node->nucMutation[i].nucPosition; + int32_t nucGapPosition = node->nucMutation[i].nucGapPosition; + uint32_t type = (node->nucMutation[i].mutInfo & 0x7); char newVal = '-'; if(type < 3) { // Either S, I or D - int len = ((root->nucMutation[i].mutInfo) >> 4); + int len = ((node->nucMutation[i].mutInfo) >> 4); if(primaryBlockId >= sequence.size()) { std::cout << primaryBlockId << " " << sequence.size() << std::endl; @@ -261,14 +788,14 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se if(nucGapPosition != -1) { for(int j = 0; j < len; j++) { char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); } } else { for(int j = 0; j < len; j++) { char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = newVal; mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); } @@ -278,14 +805,14 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se if(nucGapPosition != -1) { for(int j = 0; j < len; j++) { char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = newVal; mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); } } else { for(int j = 0; j < len; j++) { char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); sequence[primaryBlockId].first[nucPosition+j].first = newVal; mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); } @@ -297,14 +824,14 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se if(nucGapPosition != -1) { for(int j = 0; j < len; j++) { char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition + j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); } } else { for(int j = 0; j < len; j++) { char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = newVal; mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); } @@ -314,14 +841,14 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se if(nucGapPosition != -1) { for(int j = 0; j < len; j++) { char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = newVal; mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); } } else { for(int j = 0; j < len; j++) { char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); sequence[primaryBlockId].first[nucPosition+j].first = newVal; mutationInfo.push_back(std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); } @@ -363,7 +890,7 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se } else { if(type == panmanUtils::NucMutationType::NSNPS) { // SNP Substitution - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> 20) & 0xF); if(secondaryBlockId != -1) { if(nucGapPosition != -1) { char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; @@ -387,7 +914,7 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se } } else if(type == panmanUtils::NucMutationType::NSNPI) { // SNP Insertion - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> 20) & 0xF); if(secondaryBlockId != -1) { if(nucGapPosition != -1) { char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; @@ -435,28 +962,28 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se } } } + // } - if(root->children.size() == 0) { + if(nodeListIndex < 0) { // Print sequence - - fout << '>' << root->identifier << std::endl; + fout << '>' << node->identifier << std::endl; int offset = 0; - if(!aligned && circularSequences.find(root->identifier) != circularSequences.end()) { + if(!aligned && circularSequences.find(node->identifier) != circularSequences.end()) { // If MSA is to be printed, offset doesn't matter - offset = circularSequences[root->identifier]; + offset = circularSequences[node->identifier]; } sequence_t sequencePrint = sequence; blockExists_t blockExistsPrint = blockExists; blockStrand_t blockStrandPrint = blockStrand; - if(rotationIndexes.find(root->identifier) != rotationIndexes.end() && rotationIndexes[root->identifier] != 0) { + if(rotationIndexes.find(node->identifier) != rotationIndexes.end() && rotationIndexes[node->identifier] != 0) { int ctr = -1, rotInd = 0; for(size_t i = 0; i < blockExistsPrint.size(); i++) { if(blockExistsPrint[i].first) { ctr++; } - if(ctr == rotationIndexes[root->identifier]) { + if(ctr == rotationIndexes[node->identifier]) { rotInd = i; break; } @@ -466,19 +993,15 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se rotate(blockStrandPrint.begin(), blockStrandPrint.begin() + rotInd, blockStrandPrint.end()); } - if(sequenceInverted.find(root->identifier) != sequenceInverted.end() && sequenceInverted[root->identifier]) { + if(sequenceInverted.find(node->identifier) != sequenceInverted.end() && sequenceInverted[node->identifier]) { reverse(sequencePrint.begin(), sequencePrint.end()); reverse(blockExistsPrint.begin(), blockExistsPrint.end()); reverse(blockStrandPrint.begin(), blockStrandPrint.end()); } - panmanUtils::printSequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); + panmanUtils::printSubsequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, panMATStart, panMATEnd, aligned, fout, offset); } else { - - // DFS on children - for(panmanUtils::Node* child: root->children) { - printFASTAHelper(child, sequence, blockExists, blockStrand, fout, aligned); - } + printSingleNodeHelper(nodeList, nodeListIndex, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd); } @@ -513,7 +1036,7 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se } } -void panmanUtils::Tree::printFASTA(std::ostream& fout, bool aligned) { +void panmanUtils::Tree::printFASTA(std::ostream& fout, bool aligned, bool rootSeq) { // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. std::vector< std::pair< std::vector< std::pair< char, std::vector< char > > >, std::vector< std::vector< std::pair< char, std::vector< char > > > > > > sequence(blocks.size() + 1); std::vector< std::pair< bool, std::vector< bool > > > blockExists(blocks.size() + 1, {false, {}}); @@ -588,6 +1111,121 @@ void panmanUtils::Tree::printFASTA(std::ostream& fout, bool aligned) { } // Run depth first traversal to extract sequences - printFASTAHelper(root, sequence, blockExists, blockStrand, fout, aligned); + printFASTAHelper(root, sequence, blockExists, blockStrand, fout, aligned, rootSeq); + +} + +void panmanUtils::Tree::printSingleNode(std::ostream& fout, const sequence_t& sequenceRef, + const blockExists_t& blockExistsRef, const blockStrand_t& blockStrandRef, + std::string nodeIdentifier, std::tuple< int, int, int, int >& panMATStart, std::tuple< int, int, int, int >& panMATEnd) { + // List nodes from root to nodeIdentifier + std::vector nodeList; + Node* newNode = allNodes[nodeIdentifier]; + nodeList.push_back(newNode); + while (newNode->parent != nullptr) { + newNode = newNode->parent; + nodeList.push_back(newNode); + } + + // for (int i=nodeList.size()-1; i>=0; i--) std::cout << nodeList[i]->identifier << std::endl; + + // std::cout << std::get<0>(panMATStart) << " " << std::get<1>(panMATStart) << " " << std::get<2>(panMATStart) << " " << std::get<3>(panMATStart) << std::endl; + // std::cout << std::get<0>(panMATEnd) << " " << std::get<1>(panMATEnd) << " " << std::get<2>(panMATEnd) << " " << std::get<3>(panMATEnd) << std::endl; + + int startCoordinate = getUnalignedGlobalCoordinate(std::get<0>(panMATStart), + std::get<1>(panMATStart), + std::get<2>(panMATStart), + std::get<3>(panMATStart), + sequenceRef, + blockExistsRef, + blockStrandRef, + circularSequences[nodeIdentifier] + ); + + int endCoordinate = getUnalignedGlobalCoordinate(std::get<0>(panMATEnd), + std::get<1>(panMATEnd), + std::get<2>(panMATEnd), + std::get<3>(panMATEnd), + sequenceRef, + blockExistsRef, + blockStrandRef, + circularSequences[nodeIdentifier] + ); + // std::cout << startCoordinate << ":" << endCoordinate << std::endl; + + // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. + std::vector< std::pair< std::vector< std::pair< char, std::vector< char > > >, std::vector< std::vector< std::pair< char, std::vector< char > > > > > > sequence(blocks.size() + 1); + std::vector< std::pair< bool, std::vector< bool > > > blockExists(blocks.size() + 1, {false, {}}); + blockStrand_t blockStrand(blocks.size() + 1, {true, {}}); + + // Assigning block gaps + for(size_t i = 0; i < blockGaps.blockPosition.size(); i++) { + sequence[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i]); + blockExists[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i], false); + blockStrand[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i], true); + } + + int32_t maxBlockId = 0; + + // Create consensus sequence of blocks + for(size_t i = 0; i < blocks.size(); i++) { + + int32_t primaryBlockId = ((int32_t)blocks[i].primaryBlockId); + int32_t secondaryBlockId = ((int32_t)blocks[i].secondaryBlockId); + + maxBlockId = std::max(maxBlockId, primaryBlockId); + + for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { + bool endFlag = false; + for(size_t k = 0; k < 8; k++) { + const int nucCode = (((blocks[i].consensusSeq[j]) >> (4*(7 - k))) & 15); + + if(nucCode == 0) { + endFlag = true; + break; + } + const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); + + if(secondaryBlockId != -1) { + sequence[primaryBlockId].second[secondaryBlockId].push_back({nucleotide, {}}); + } else { + sequence[primaryBlockId].first.push_back({nucleotide, {}}); + } + } + + if(endFlag) { + break; + } + } + + // End character to incorporate for gaps at the end + if(secondaryBlockId != -1) { + sequence[primaryBlockId].second[secondaryBlockId].push_back({'x', {}}); + } else { + sequence[primaryBlockId].first.push_back({'x', {}}); + } + } + + sequence.resize(maxBlockId + 1); + blockExists.resize(maxBlockId + 1); + blockStrand.resize(maxBlockId + 1); + + // Assigning nucleotide gaps in blocks + for(size_t i = 0; i < gaps.size(); i++) { + int32_t primaryBId = (gaps[i].primaryBlockId); + int32_t secondaryBId = (gaps[i].secondaryBlockId); + + for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + int len = gaps[i].nucGapLength[j]; + int pos = gaps[i].nucPosition[j]; + if(secondaryBId != -1) { + sequence[primaryBId].second[secondaryBId][pos].second.resize(len, '-'); + } else { + sequence[primaryBId].first[pos].second.resize(len, '-'); + } + } + } + // Run traversal on nodeList to extract sequences + printSingleNodeHelper(nodeList, (nodeList.size()-1), sequence, blockExists, blockStrand, fout, false, false, startCoordinate, endCoordinate); } diff --git a/src/panman.cpp b/src/panman.cpp index 88efbe2..025bc39 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -256,6 +256,128 @@ void panmanUtils::stringSplit (std::string const& s, char delim, std::vector leaves; + std::vector numOpen; + std::vector numClose; + std::vector> branchLen (128); // will be resized later if needed + size_t level = 0; + + std::vector s1; + stringSplit(newickString, ',', s1); + + numOpen.reserve(s1.size()); + numClose.reserve(s1.size()); + + + for (auto s: s1) { + size_t no = 0; + size_t nc = 0; + size_t leafDepth = 0; + + bool stop = false; + bool branchStart = false; + std::string leaf = ""; + std::string branch = ""; + + for (auto c: s) { + if (c == ':') { + stop = true; + branch = ""; + branchStart = true; + } else if (c == '(') { + no++; + level++; + if (branchLen.size() <= level) { + branchLen.resize(level*2); + } + } else if (c == ')') { + stop = true; + nc++; + // float len = (branch.size() > 0) ? std::stof(branch) : -1.0; + float len = (branch.size() > 0) ? std::stof(branch) : 1.0; + branchLen[level].push(len); + level--; + branchStart = false; + } else if (!stop) { + leaf += c; + branchStart = false; + leafDepth = level; + + } else if (branchStart) { + if (isdigit(c) || c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+') { + branch += c; + } + } + } + leaves.push_back(std::move(leaf)); + numOpen.push_back(no); + numClose.push_back(nc); + // float len = (branch.size() > 0) ? std::stof(branch) : -1.0; + float len = (branch.size() > 0) ? std::stof(branch) : 1.0; + branchLen[level].push(len); + + // Adjusting max and mean depths + m_maxDepth = std::max(m_maxDepth, leafDepth); + m_meanDepth += leafDepth; + + } + + m_meanDepth /= leaves.size(); + + if (level != 0) { + fprintf(stderr, "ERROR: incorrect Newick format!\n"); + exit(1); + } + + m_numLeaves = leaves.size(); + + std::stack parentStack; + + for (size_t i=0; iidentifier << '\t' << newNode->branchLength << '\n'; + branchLen[level].pop(); + level++; + + allNodes[nid] = newNode; + parentStack.push(newNode); + } + Node* leafNode = new Node(leaf, parentStack.top(), branchLen[level].front()); + allNodes[leaf] = leafNode; + + branchLen[level].pop(); + for (size_t j=0; jbranchLength = 0.0; + return treeRoot; +} + +/* panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string newickString) { newickString = panmanUtils::stripString(newickString); @@ -377,11 +499,11 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new return newTreeRoot; } +*/ void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, std::vector &storedNode) { std::vector< panmanUtils::NucMut > storedNucMutation; - std::cout << root->identifier << "\tMutation: " << currentIndex << std::endl; for (auto nodeMutations: storedNode[currentIndex].getMutations()){ auto countt = 0; @@ -1076,7 +1198,6 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } if(!nonGapFound) { emptyPositions.insert(i); - std::cout << "OOps" << i << std::endl; } } for(auto& u: sequenceIdsToSequences) { @@ -1124,13 +1245,10 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE nodeMutexes[mutation.first].unlock(); } posMutexes[i].lock(); - std::cout << positionCount++ << std::endl; posMutexes[i].unlock(); }); - std::cout << "Mutations computed" << std::endl; sequenceIdsToSequences.clear(); // saving memory - std::cout << "MSA deleted for saving memory" << std::endl; tbb::parallel_for_each(nonGapMutationsMSA, [&](auto& u) { // for(auto &u: nonGapMutationsMSA){ @@ -1141,8 +1259,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE for(size_t i = 1; i < u.second.size(); i++) { if(i - currentStart == 6 || std::get<0>(u.second[i]) != std::get<0>(u.second[i-1])+1 || std::get<1>(u.second[i]) != std::get<1>(u.second[i-1])) { nodeMutexes[u.first].lock(); - if (std::get<0>(u.second[currentStart]) == 0) - std::cout << u.first << std::endl; + // if (std::get<0>(u.second[currentStart]) == 0) + // std::cout << u.first << std::endl; allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, i); nodeMutexes[u.first].unlock(); currentStart = i; @@ -1190,22 +1308,45 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE // set batch size size_t memory = 128;//GB - size_t batchSize = 10000; + size_t batchSize = 100000; std::cout << "Batch size set to: " << batchSize << std::endl; std::string consensusSeq; + consensusSeq.resize(lineLength); size_t startIndex = 0; tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int,int,int,int,int > > > nonGapMutations; std::unordered_map< std::string, std::mutex > nodeMutexes; + while (true) { std::map< std::string, std::string > sequenceIdsToSequences; size_t nextStartIndex = readFastaInBatch(fin, sequenceIdsToSequences, startIndex, batchSize); std::set< size_t > emptyPositions; - + auto newStart = std::chrono::high_resolution_clock::now(); + tbb::parallel_for((size_t)0, nextStartIndex-startIndex, [&](size_t i) { + bool nonGapFound = false; + std::cout << startIndex+i << std::endl; + for(auto u: sequenceIdsToSequences) { + if(u.second[i] != '-') { + consensusSeq[startIndex+i] = u.second[i]; + nonGapFound = true; + break; + } + } + if(!nonGapFound) { + std::cout << "ideally should not happen\n" << std::endl; + exit(1); + // emptyPositions.insert(i); + } + }); + auto newEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds newTime = newEnd - newStart; + std::cout << "Consensus till " << nextStartIndex << " done in " << newTime.count() << " nanoseconds" << std::endl; + /* for(size_t i = 0; i < nextStartIndex-startIndex; i++) { bool nonGapFound = false; + std::cout << i << std::endl; for(auto u: sequenceIdsToSequences) { if(u.second[i] != '-') { consensusSeq += u.second[i]; @@ -1218,6 +1359,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE emptyPositions.insert(i); } } + for(auto& u: sequenceIdsToSequences) { std::string sequenceString; for(size_t i = 0; i < u.second.length(); i++) { @@ -1227,11 +1369,13 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } u.second = sequenceString; } + */ for(auto u: allNodes) { nodeMutexes[u.first]; } + newStart = std::chrono::high_resolution_clock::now(); tbb::parallel_for((size_t)0, nextStartIndex-startIndex, [&](size_t i) { std::unordered_map< std::string, int > states; std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; @@ -1248,11 +1392,12 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE for(auto mutation: mutations) { nodeMutexes[mutation.first].lock(); nonGapMutations[mutation.first].push_back(std::make_tuple(0, -1, startIndex+i, -1, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); - if (startIndex+i == 29919) std::cout << "OOps" << std::endl; nodeMutexes[mutation.first].unlock(); } }); - std::cout << "Processed characters from " << startIndex << " to " << nextStartIndex - 1 << std::endl; + newEnd = std::chrono::high_resolution_clock::now(); + newTime = newEnd - newStart; + std::cout << "Processed characters from " << startIndex << " to " << nextStartIndex - 1 << " in " << newTime.count() << " nanoseconds" << std::endl; startIndex = nextStartIndex; if (startIndex>=lineLength) break; } @@ -1285,6 +1430,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { // Create tree + // std::cout << mainTree.getNewick().cStr() << std::endl; root = createTreeFromNewickString(mainTree.getNewick().cStr()); // std::cout << root->identifier << std::endl; std::map< std::pair, std::vector< uint32_t > > blockIdToConsensusSeq; @@ -1518,14 +1664,13 @@ std::string panmanUtils::Tree::getNewickString(Node* node) { if (branch_length_stack.top() >= 0) { newick += ':'; - newick += branch_length_stack.top(); + newick += std::to_string(branch_length_stack.top()); } node_stack.pop(); branch_length_stack.pop(); } newick += ';'; - return newick; } @@ -2103,6 +2248,31 @@ panmanUtils::Node* panmanUtils::Tree::extractPanMATSegmentHelper(panmanUtils::No } +void panmanUtils::Tree::extractPanMATIndex(std::ostream& fout, int64_t start, int64_t end, std::string nodeIdentifier, bool single) { + sequence_t nodeSequence; + blockExists_t rootBlockExists; + blockStrand_t rootBlockStrand; + + // std::cout << "Indexing for " << nodeIdentifier << " between (" << start << ":" << end << ")" << std::endl; + + // Extract node Identifier Sequence + getSequenceFromReference(nodeSequence, rootBlockExists, rootBlockStrand, nodeIdentifier); + + // Get PanMAT coordinates from global coordinates + std::tuple< int, int, int, int > panMATStart = globalCoordinateToBlockCoordinate(start, + nodeSequence, rootBlockExists, rootBlockStrand); + std::tuple< int, int, int, int > panMATEnd = globalCoordinateToBlockCoordinate(end, + nodeSequence, rootBlockExists, rootBlockStrand); + + if (single) { + printSingleNode(fout, nodeSequence, rootBlockExists, rootBlockStrand, nodeIdentifier, panMATStart, panMATEnd); + } else { + + } + + return; +} + void panmanUtils::Tree::extractPanMATSegment(kj::std::StdOutputStream& fout, int64_t start, int64_t end) { sequence_t rootSequence; blockExists_t rootBlockExists; @@ -2469,9 +2639,9 @@ void panmanUtils::Tree::writeToFile(kj::std::StdOutputStream& fout, panmanUtils: } ::capnp::List::Builder gapsBuilder = treeToWrite.initGaps(gaps.size()); - std::cout << "Writing Gap List " << gaps.size() << "\n"; + // std::cout << "Writing Gap List " << gaps.size() << "\n"; for(size_t i = 0; i < gaps.size(); i++) { - std::cout << "itr: " << i << " size: " << gaps[i].nucPosition.size() << "\n"; + //std::cout << "itr: " << i << " size: " << gaps[i].nucPosition.size() << "\n"; panman::GapList::Builder gl = gapsBuilder[i]; ::capnp::List::Builder nucGapLengthBuilder = gl.initNucGapLength(gaps[i].nucPosition.size()); @@ -3556,6 +3726,7 @@ void panmanUtils::Tree::getSequenceFromReference(sequence_t& sequence, blockExis const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); if(secondaryBlockId != -1) { + std::cout << "Is it used?\n" ; sequence[primaryBlockId].second[secondaryBlockId].push_back({nucleotide, {}}); } else { sequence[primaryBlockId].first.push_back({nucleotide, {}}); @@ -4530,21 +4701,24 @@ int32_t panmanUtils::Tree::getUnalignedGlobalCoordinate(int32_t primaryBlockId, int32_t secondaryBlockId, int32_t pos, int32_t gapPos, const sequence_t& sequence, const blockExists_t& blockExists, const blockStrand_t& blockStrand, int circularOffset) { - // std::cout << "P " << primaryBlockId << " " << secondaryBlockId << " " << pos << " " << gapPos << " " << sequence[primaryBlockId].first[pos].first << std::endl; + // std::cout << "P " << sequence.size() << " " << primaryBlockId << " " << secondaryBlockId << " " << pos << " " << gapPos << " " << circularOffset << " " << sequence[primaryBlockId].first[pos].first << std::endl; int ctr = 0; int ans = -1; int len = 0; for(size_t i = 0; i < blockExists.size(); i++) { + // std::cout << blockExists[i].first << " " << blockExists[i].second.size() << " " << blockStrand[i].first << " " << blockStrand[i].second.size() << std::endl; if(!blockExists[i].first) { continue; } if(blockStrand[i].first) { + // std::cout << "gap size: " << sequence[i].second.size() << std::endl; for(size_t k = 0; k < sequence[i].first.size(); k++) { for(size_t w = 0; w < sequence[i].first[k].second.size(); w++) { if(sequence[i].first[k].second[w] != '-' && sequence[i].first[k].second[w] != 'x') { if((int)i == primaryBlockId && secondaryBlockId == -1 && (int)k == pos && (int)w == gapPos) { ans = ctr; + break; } if(ans==-1) { ctr++; @@ -4555,6 +4729,7 @@ int32_t panmanUtils::Tree::getUnalignedGlobalCoordinate(int32_t primaryBlockId, if(sequence[i].first[k].first != '-' && sequence[i].first[k].first != 'x') { if((int)i == primaryBlockId && secondaryBlockId == -1 && (int)k == pos && gapPos == -1) { ans = ctr; + break; } if(ans==-1) { ctr++; @@ -4567,6 +4742,7 @@ int32_t panmanUtils::Tree::getUnalignedGlobalCoordinate(int32_t primaryBlockId, if(sequence[i].first[k].first != '-' && sequence[i].first[k].first != 'x') { if((int)i == primaryBlockId && secondaryBlockId == -1 && (int)k == pos && gapPos == -1) { ans = ctr; + break; } if(ans==-1) { ctr++; @@ -4579,6 +4755,7 @@ int32_t panmanUtils::Tree::getUnalignedGlobalCoordinate(int32_t primaryBlockId, if((int)i == primaryBlockId && secondaryBlockId == -1 && (int)k == pos && (int)w == gapPos) { ans = ctr; + break; } if(ans==-1) { ctr++; @@ -5490,11 +5667,9 @@ panmanUtils::TreeGroup::TreeGroup(std::istream& fin) { kj::std::StdInputStream kjInputStream(fin); capnp::InputStreamMessageReader messageReader(kjInputStream); - std::cout << "About to start reading root.." << std::endl; panman::TreeGroup::Reader TG = messageReader.getRoot(); - std::cout << "Ending reading root.." << std::endl; for (auto treeFromTG: TG.getTrees()){ trees.emplace_back(treeFromTG); @@ -5505,9 +5680,9 @@ panmanUtils::TreeGroup::TreeGroup(std::istream& fin) { } } -void panmanUtils::TreeGroup::printFASTA(std::ofstream& fout) { +void panmanUtils::TreeGroup::printFASTA(std::ofstream& fout, bool rootSeq ) { for(auto& tree: trees) { - tree.printFASTA(fout); + tree.printFASTA(fout, rootSeq); } } @@ -5532,7 +5707,6 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { assert(nodeIndex == tree.allNodes.size()); std::string newick = tree.getNewickString(node); - // std::cout << newick << std::endl; treeToWrite.setNewick(newick); std::map< std::vector< uint32_t >, std::vector< std::pair< int64_t, bool > > > consensusSeqToBlockIds; @@ -5629,9 +5803,9 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { capnp::List::Builder complexMutBuilder = treeGroupToWrite.initComplexMutations(complexMutations.size()); size_t cmplxMutCount=0; - std::cout << "Writing Complex Mutations..." << std::endl; + // std::cout << "Writing Complex Mutations..." << std::endl; for(auto cm: complexMutations) { - std::cout << "Cmplx mutation Count:" << cmplxMutCount << "..." << std::endl; + // std::cout << "Cmplx mutation Count:" << cmplxMutCount << "..." << std::endl; complexMutBuilder[cmplxMutCount++] = cm.toCapnProto(); } diff --git a/src/panman.hpp b/src/panman.hpp index d949bc4..95c523e 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -270,7 +270,7 @@ struct BlockGapList { // PanMAT tree node class Node { public: - float branchLength; + float branchLength = 0.0; size_t level; std::string identifier; Node* parent; @@ -307,7 +307,10 @@ class Tree { // Tree traversal for FASTA writer void printFASTAHelper(panmanUtils::Node* root, sequence_t& sequence, blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, - bool aligned = false); + bool aligned = false, bool rootSeq = false, std::tuple start={-1,-1,-1,-1}, std::tuple end={-1,-1,-1,-1}); + + void printSingleNodeHelper(std::vector &nodeList, int nodeListIndex, sequence_t& sequence, + blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, int panMATStart, int panMATEnd); // Merge parent and child nodes when compressing subtree void mergeNodes(Node* par, Node* chi); @@ -456,7 +459,10 @@ class Tree { // void printSummary(); void printSummary(std::ostream &out); void printBfs(Node* node = nullptr); - void printFASTA(std::ostream& fout, bool aligned = false); + void printFASTA(std::ostream& fout, bool aligned = false, bool rootSeq = false); + void printSingleNode(std::ostream& fout, const sequence_t& sequence, + const blockExists_t& blockExists, const blockStrand_t& blockStrand, + std::string nodeIdentifier, std::tuple< int, int, int, int > &panMATStart, std::tuple< int, int, int, int > &panMATEnd); void printFASTAParallel(std::ofstream& fout, bool aligned = false); void printMAF(std::ostream& fout); @@ -470,6 +476,7 @@ class Tree { // sequences are assumed to be the same as their strands in the root sequence for the // purpose of splitting the terminal blocks during extraction void extractPanMATSegment(kj::std::StdOutputStream& fout, int64_t start, int64_t end); + void extractPanMATIndex(std::ostream& fout, int64_t start, int64_t end, std::string nodeIdentifier, bool single=true); Node* subtreeExtractParallel(std::vector< std::string > nodeIds, const std::set< std::string >& nodeIdsToDefinitelyInclude = {}); // Node* subtreeExtractParallel(std::vector< std::string > nodeIds); @@ -707,7 +714,7 @@ class TreeGroup { TreeGroup* subnetworkExtract(std::unordered_map< int, std::vector< std::string > >& nodeIds); - void printFASTA(std::ofstream& fout); + void printFASTA(std::ofstream& fout, bool rootSeq = false); void writeToFile(kj::std::StdOutputStream& fout); void printComplexMutations(std::ostream& fout); }; diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 22ccf00..224b2cc 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -149,12 +149,14 @@ void setupOptionDescriptions() { ("create-network,k", "Create PanMAN with network of trees from single or multiple PanMAN files") ("printMutations,p", "Create PanMAN with network of trees from single or multiple PanMAN files") ("acr,q", "ACR method [fitch(default), mppa]") + ("index", "Generating indexes and print sequence (passed as reference) between x:y") + ("printRoot", "Print root sequence") //("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") - + ("low-mem-mode", "Perform Fitch Algrorithm in batch to save memory consumption") ("reference,n", po::value< std::string >(), "Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required)") - ("start,s", po::value< std::string >(), "Start coordinate of protein translation") - ("end,e", po::value< std::string >(), "End coordinate of protein translation") + ("start,s", po::value< int64_t >(), "Start coordinate of protein translation/Start coordinate for indexing") + ("end,e", po::value< int64_t >(), "End coordinate of protein translation/End coordinate for indexing") ("treeID,d", po::value< std::string >(), "Tree ID, required for --vcf") ("input-file,i", po::value< std::string >(), "Path to the input file, required for --subnet, --annotate, and --create-network") ("output-file,o", po::value< std::string >(), "Prefix of the output file name") @@ -451,6 +453,7 @@ void parseAndExecute(int argc, char* argv[]) { inPMATBuffer.push(inputFile); std::istream inputStream(&inPMATBuffer); + std::cout << "starting reading panman" << std::endl; TG = new panmanUtils::TreeGroup(inputStream); auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); @@ -1081,7 +1084,7 @@ void parseAndExecute(int argc, char* argv[]) { panmanUtils::TreeGroup tg = *TG; T = &tg.trees[treeID]; - if(!globalVm.count("start") || !globalVm.count("start")) { + if(!globalVm.count("start") || !globalVm.count("end")) { std::cout << "Start/End Coordinate not provided" << std::endl; return; } @@ -1222,6 +1225,95 @@ void parseAndExecute(int argc, char* argv[]) { << substitutionsTime.count() << " nanoseconds\n"; if(globalVm.count("output-file")) outputFile.close(); + } else if (globalVm.count("index")) { + // indexing + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } + + panmanUtils::TreeGroup tg = *TG; + + // Get start and end coordinate + int64_t startCoordinate = 0; + int64_t endCoordinate = -1; + if(!globalVm.count("start")) { + std::cout << "Start Coordinate not provided, setting it to 0" << std::endl; + } else { + startCoordinate = globalVm["start"].as< int64_t >(); + } + + if(!globalVm.count("end")) { + std::cout << "End Coordinate not provided, setting it to length of seqeunce - 1" << std::endl; + } else { + endCoordinate = globalVm["end"].as< int64_t >(); + } + + // get sequence + std::string reference=""; + if(!globalVm.count("reference")) { + std::cout << "Error: Reference not provided" << std::endl; + return; + } else { + reference = globalVm["reference"].as< std::string >(); + } + + auto fastaStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + T = &tg.trees[i]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".index"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); + + + T->extractPanMATIndex(fout, startCoordinate,endCoordinate, reference); + + if(globalVm.count("output-file")) outputFile.close(); + } + + auto fastaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; + std::cout << "\nIndexing execution time: " << fastaTime.count() << " nanoseconds\n"; + + return; + } else if(globalVm.count("printRoot")) { + // Print raw sequences to output file + + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } + + panmanUtils::TreeGroup tg = *TG; + + auto fastaStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + T = &tg.trees[i]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".fasta"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); + + + T->printFASTA(fout, true, true); + + if(globalVm.count("output-file")) outputFile.close(); + } + + auto fastaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; + std::cout << "\nFASTA execution time: " << fastaTime.count() << " nanoseconds\n"; + + return; } else { return; } diff --git a/src/panmanUtils.hpp b/src/panmanUtils.hpp index 901bc7a..7f9c0b1 100644 --- a/src/panmanUtils.hpp +++ b/src/panmanUtils.hpp @@ -38,6 +38,10 @@ void printSequenceLines(const sequence_t& sequence, const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, bool aligned, std::ostream& fout, int offset = 0, bool debug = false); +void printSubsequenceLines(const sequence_t& sequence,\ + const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, int start, int end, + bool aligned, std::ostream& fout, int offset=0, bool debug=false); + // Remove '-' character from sequence string std::string stripGaps(std::string sequenceString); std::string getDate(); From 1521ccb0add5561d9a19f9fc77aeff2e91f4eb7a Mon Sep 17 00:00:00 2001 From: swalia Date: Mon, 28 Oct 2024 10:37:16 -0700 Subject: [PATCH 041/103] updated install script --- docs/install.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/install.md b/docs/install.md index 1e64a9a..8d0681f 100644 --- a/docs/install.md +++ b/docs/install.md @@ -7,7 +7,7 @@ 1. Clone the repository ```bash -git https://github.com/TurakhiaLab/panman.git +git clone https://github.com/TurakhiaLab/panman.git cd panman ``` 2. Run the installation script @@ -54,7 +54,7 @@ Docker container with preinstalled panmanUtils can also be built from Doc ii. Git 1. Clone the repository ```bash -git https://github.com/TurakhiaLab/panman.git +git clone https://github.com/TurakhiaLab/panman.git cd panman ``` 2. Build a docker image @@ -71,4 +71,4 @@ docker run -it panman # Insider docker container cd /home/panman/build ./panmanUtils --help -``` \ No newline at end of file +``` From ae408ca8d48787f141218b593e62b21d1ed7e4b7 Mon Sep 17 00:00:00 2001 From: swalia Date: Mon, 28 Oct 2024 10:49:45 -0700 Subject: [PATCH 042/103] updated install script --- docs/install.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/install.md b/docs/install.md index 1e64a9a..8d0681f 100644 --- a/docs/install.md +++ b/docs/install.md @@ -7,7 +7,7 @@ 1. Clone the repository ```bash -git https://github.com/TurakhiaLab/panman.git +git clone https://github.com/TurakhiaLab/panman.git cd panman ``` 2. Run the installation script @@ -54,7 +54,7 @@ Docker container with preinstalled panmanUtils can also be built from Doc ii. Git 1. Clone the repository ```bash -git https://github.com/TurakhiaLab/panman.git +git clone https://github.com/TurakhiaLab/panman.git cd panman ``` 2. Build a docker image @@ -71,4 +71,4 @@ docker run -it panman # Insider docker container cd /home/panman/build ./panmanUtils --help -``` \ No newline at end of file +``` From 7376e7014297b001a2d6519a66d1de15cc5f5d59 Mon Sep 17 00:00:00 2001 From: swalia Date: Mon, 28 Oct 2024 11:09:10 -0700 Subject: [PATCH 043/103] updated utils --- docs/utils.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/utils.md b/docs/utils.md index adb85fc..98a3d54 100644 --- a/docs/utils.md +++ b/docs/utils.md @@ -172,7 +172,7 @@ Annotate nodes in a PanMAN with a custom string, later searched by these annotat * Usage syntax ```bash -./panmanUtils -I --annotate --output-file=ecoli_10_annotate +./panmanUtils -I --annotate --input-file= --output-file=ecoli_10_annotate ``` * Example ```bash @@ -192,4 +192,4 @@ Extract amino acid translations from a PanMAN in TSV file. ```bash cd $PANMAN_HOME/build ./panmanUtils -I panman/sars_20.panman --aa-translations --output_file=sars_20 -``` \ No newline at end of file +``` From 34c49f73a5944e420943c4c0a11f14d853dd2860 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Tue, 29 Oct 2024 21:06:48 -0700 Subject: [PATCH 044/103] updated annotations --- src/panman.cpp | 78 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/src/panman.cpp b/src/panman.cpp index 025bc39..0fb7ba9 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -101,8 +101,10 @@ char panmanUtils::getCodeFromNucleotide(char nuc) { return 11; case 'V': return 7; - default: + case 'N': return 15; + default: + return 0; } } @@ -531,8 +533,8 @@ void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, } for (auto nodeAnnotations: storedNode[currentIndex].getAnnotations()){ - root->annotations.push_back(nodeAnnotations); - annotationsToNodes[nodeAnnotations].push_back(root->identifier); + root->annotations.push_back(nodeAnnotations.cStr()); + annotationsToNodes[nodeAnnotations.cStr()].push_back(root->identifier); } root->nucMutation = storedNucMutation; @@ -1153,6 +1155,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE std::string currentSequence, currentSequenceId; size_t lineLength = 0; std::string consensusSeq; + + // Read MSA while(getline(fin,line,'\n')) { if(line.length() == 0) { continue; @@ -1165,6 +1169,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; exit(-1); } + // std::cout << currentSequenceId << "\t" << currentSequence; sequenceIdsToSequences[currentSequenceId] = currentSequence; } std::vector< std::string > splitLine; @@ -1175,6 +1180,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE currentSequence += line; } } + + if(currentSequence.length()) { if(lineLength != 0 && lineLength != currentSequence.length()) { std::cerr << "Error: sequence lengths don't match!" << std::endl; @@ -1187,28 +1194,42 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } std::set< size_t > emptyPositions; - for(size_t i = 0; i < lineLength; i++) { - bool nonGapFound = false; - for(auto u: sequenceIdsToSequences) { - if(u.second[i] != '-') { - consensusSeq += u.second[i]; - nonGapFound = true; - break; - } + // std::cout << root->identifier << std::endl; + // ReRoot tree if reference provided + if (reference != "") { + if (allNodes.find(reference) == allNodes.end()) { + std::cout << reference << " is not a tip!!" << std::endl; + exit(0); } - if(!nonGapFound) { - emptyPositions.insert(i); + transform(allNodes[reference]); + // std::cout << reference << "\t" << root->identifier << std::endl; + consensusSeq = sequenceIdsToSequences[reference]; + } else { + for(size_t i = 0; i < lineLength; i++) { + bool nonGapFound = false; + for(auto u: sequenceIdsToSequences) { + if(u.second[i] != '-') { + consensusSeq += u.second[i]; + nonGapFound = true; + break; + } + } + if(!nonGapFound) { + emptyPositions.insert(i); + } } - } - for(auto& u: sequenceIdsToSequences) { - std::string sequenceString; - for(size_t i = 0; i < u.second.length(); i++) { - if(emptyPositions.find(i) == emptyPositions.end()) { - sequenceString += u.second[i]; + for(auto& u: sequenceIdsToSequences) { + std::string sequenceString; + for(size_t i = 0; i < u.second.length(); i++) { + if(emptyPositions.find(i) == emptyPositions.end()) { + sequenceString += u.second[i]; + } } + u.second = sequenceString; } - u.second = sequenceString; } + // std::cout << root->identifier << std::endl; + // std::cout << consensusSeq << std::endl; blocks.emplace_back(0, consensusSeq); root->blockMutation.emplace_back(0, std::make_pair(BlockMutationType::BI, false)); // pos, start, end @@ -1236,8 +1257,10 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE states[u.first] = 1; } } - nucFitchForwardPass(root, states); - nucFitchBackwardPass(root, states, (1 << getCodeFromNucleotide(consensusSeq[i]))); + int refState = (reference=="")?-1:1< Date: Wed, 30 Oct 2024 15:17:37 -0700 Subject: [PATCH 045/103] less memory consumption --- CMakeLists.txt | 2 +- src/fasta.cpp | 241 +++++++++++++++++++++---------------------- src/fitchSankoff.cpp | 10 +- src/panman.hpp | 10 +- src/panmanUtils.cpp | 30 ++++-- src/panmanUtils.hpp | 4 +- 6 files changed, 151 insertions(+), 146 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f6a26ef..1095d7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 3.8) project(panmanUtils) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options") set(CMAKE_INCLUDE_CURRENT_DIR ON) # if(DEFINED Protobuf_PATH) diff --git a/src/fasta.cpp b/src/fasta.cpp index a0f76bd..c1ae522 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -152,75 +152,39 @@ void panmanUtils::printSequenceLines(const sequence_t& sequence,\ } -void panmanUtils::printSubsequenceLines(const sequence_t& sequence,\ - const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, int start, int end, +void panmanUtils::printSubsequenceLines(const sequence_t& sequence, + const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, + const std::tuple& panMATStart, + const std::tuple& panMATEnd, bool aligned, std::ostream& fout, int offset, bool debug) { - // String that stores the sequence to be printed - std::string line; + int primaryBlockIdStart = std::get<0>(panMATStart); + int secondaryBlockIdStart = std::get<1>(panMATStart); + int posStart = std::get<2>(panMATStart); + int gapPosStart = std::get<3>(panMATStart); - for(size_t i = 0; i < blockExists.size(); i++) { - // Iterate through gap blocks - NOT BEING USED CURRENTLY - for(size_t j = 0; j < blockExists[i].second.size(); j++) { - // If block exists. Otherwise add gaps if MSA is to be printed - if(blockExists[i].second[j]) { - // If forward strand, iterare in forward direction - if(blockStrand[i].second[j]) { - // Main nucs - for(size_t k = 0; k < sequence[i].second[j].size(); k++) { - // Gap nucs - for(size_t w = 0; w < sequence[i].second[j][k].second.size(); w++) { - if(sequence[i].second[j][k].second[w] != '-') { - line += sequence[i].second[j][k].second[w]; - } else if(aligned) { - line += '-'; - } - } - // Main Nuc - if(sequence[i].second[j][k].first != '-' && sequence[i].second[j][k].first != 'x') { - line += sequence[i].second[j][k].first; - } else if(aligned) { - line += '-'; - } - } - } else { - // If reverse strand, iterate backwards - for(size_t k = sequence[i].second[j].size()-1; k+1 > 0; k--) { - // Main nuc - if(sequence[i].second[j][k].first != '-' && sequence[i].second[j][k].first != 'x') { - line += getComplementCharacter(sequence[i].second[j][k].first); - } else if(aligned) { - line += '-'; - } - // Gap nucs - for(size_t w = sequence[i].second[j][k].second.size()-1; w+1 > 0; w--) { - if(sequence[i].second[j][k].second[w] != '-') { - line += getComplementCharacter(sequence[i].second[j][k].second[w]); - } else if(aligned) { - line += '-'; - } - } + int primaryBlockIdEnd = std::get<0>(panMATEnd); + int secondaryBlockIdEnd = std::get<1>(panMATEnd); + int posEnd = std::get<2>(panMATEnd); + int gapPosEnd = std::get<3>(panMATEnd); - } - } - } else if(aligned) { - for(size_t k = 0; k < sequence[i].second[j].size(); k++) { - for(size_t w = 0; w < sequence[i].second[j][k].second.size(); w++) { - line += '-'; - } - line += '-'; - } - } - } + // String that stores the sequence to be printed + std::string line; + for(size_t i = primaryBlockIdStart; i < primaryBlockIdEnd-primaryBlockIdStart+1; i++) { + // Non-gap block - the only type being used currently if(blockExists[i].first) { // If forward strand if(blockStrand[i].first) { // Iterate through main nucs - for(size_t j = 0; j < sequence[i].first.size(); j++) { + size_t nucStart = (i==primaryBlockIdStart)? posStart: 0; + size_t nucEnd = (i==primaryBlockIdEnd)? posEnd + 1: sequence[i].first.size(); + for(size_t j = nucStart; j < nucEnd; j++) { // Gap nucs - for(size_t k = 0; k < sequence[i].first[j].second.size(); k++) { + size_t nucGapStart = (i==primaryBlockIdStart && j == posStart)? gapPosStart: 0; + size_t nucGapEnd = (i==primaryBlockIdEnd && j == posEnd)? gapPosEnd + 1: sequence[i].first[j].second.size(); + for(size_t k = nucGapStart; k < sequence[i].first[j].second.size(); k++) { if(sequence[i].first[j].second[k] != '-') { line += sequence[i].first[j].second[k]; } else if(aligned) { @@ -236,7 +200,9 @@ void panmanUtils::printSubsequenceLines(const sequence_t& sequence,\ } } else { // If reverse strand, iterate backwards - for(size_t j = sequence[i].first.size()-1; j+1 > 0; j--) { + size_t nucStart = (i==primaryBlockIdStart)? posStart: 0; + size_t nucEnd = (i==primaryBlockIdEnd)? posEnd: sequence[i].first.size() - 1; + for(size_t j = nucEnd; j+1 > nucStart; j--) { // Main nuc first since we are iterating in reverse direction if(sequence[i].first[j].first != '-' && sequence[i].first[j].first != 'x') { line += getComplementCharacter(sequence[i].first[j].first); @@ -245,7 +211,9 @@ void panmanUtils::printSubsequenceLines(const sequence_t& sequence,\ } // Gap nucs - for(size_t k = sequence[i].first[j].second.size()-1; k+1 > 0; k--) { + size_t nucGapStart = (i==primaryBlockIdStart && j == posStart)? gapPosStart: 0; + size_t nucGapEnd = (i==primaryBlockIdEnd && j == posEnd)? gapPosEnd: sequence[i].first[j].second.size() - 1; + for(size_t k = nucGapEnd; k+1 > nucGapStart; k--) { if(sequence[i].first[j].second[k] != '-') { line += getComplementCharacter(sequence[i].first[j].second[k]); } else if(aligned) { @@ -256,7 +224,9 @@ void panmanUtils::printSubsequenceLines(const sequence_t& sequence,\ } } else if(aligned) { // If aligned sequence is required, print gaps instead if block does not exist - for(size_t j = 0; j < sequence[i].first.size(); j++) { + size_t nucStart = (i==primaryBlockIdStart)? posStart: 0; + size_t nucEnd = (i==primaryBlockIdEnd)? posEnd + 1: sequence[i].first.size(); + for(size_t j = nucStart; j < nucEnd; j++) { for(size_t k = 0; k < sequence[i].first[j].second.size(); k++) { line+='-'; } @@ -266,42 +236,45 @@ void panmanUtils::printSubsequenceLines(const sequence_t& sequence,\ } - size_t ctr = 0; - - if(offset != 0) { - for(size_t i = 0; i < line.length(); i++) { - if(line[i] != '-') { - if(ctr == (size_t)offset) { - // mark starting point - ctr = i; - break; - } - ctr++; - } - } - } + std::cout << line << std::endl; + // size_t ctr = 0; + + // if(offset != 0) { + // for(size_t i = 0; i < line.length(); i++) { + // if(line[i] != '-') { + // if(ctr == (size_t)offset) { + // // mark starting point + // ctr = i; + // break; + // } + // ctr++; + // } + // } + // } - // std::cout << line << std::endl; - std::string currentLine = ""; - bool reachedEnd = false; - int newStart = (line.size()-1-ctr >= start)? ctr+start: start-line.size()-1-ctr; - int newEnd = (line.size()-1-ctr >= end)? ctr+end: end-line.size()-1-ctr; - - // std::cout << newStart << " " << newEnd << " " << ctr << " " << start << " " << end << std::endl; - if (newStart > newEnd) { - currentLine += line.substr(newStart, line.size()-newStart); - currentLine += line.substr(0, newEnd+1); - } else { - currentLine += line.substr(newStart, newEnd-newStart+1); - } - fout << currentLine << std::endl; + // // std::cout << line << std::endl; + // std::string currentLine = ""; + // bool reachedEnd = false; + // int newStart = (line.size()-1-ctr >= start)? ctr+start: start-line.size()-1-ctr; + // int newEnd = (line.size()-1-ctr >= end)? ctr+end: end-line.size()-1-ctr; + + // // std::cout << newStart << " " << newEnd << " " << ctr << " " << start << " " << end << std::endl; + // if (newStart > newEnd) { + // currentLine += line.substr(newStart, line.size()-newStart); + // currentLine += line.substr(0, newEnd+1); + // } else { + // currentLine += line.substr(newStart, newEnd-newStart+1); + // } + // fout << currentLine << std::endl; } // Depth first traversal FASTA writer void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& sequence, - blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, std::tuple< int, int, int, int > panMATStart, std::tuple< int, int, int, int > panMATEnd) { - + blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { + // Apply mutations + // std::cout << root->identifier << " " << std::get<0>(panMATStart) << " " << std::get<1>(panMATStart) << " " << std::get<2>(panMATStart) << " " << std::get<3>(panMATStart) <identifier << " " << std::get<0>(panMATEnd) << " " << std::get<1>(panMATEnd) << " " << std::get<2>(panMATEnd) << " " << std::get<3>(panMATEnd) < > blockMutationInfo; @@ -318,7 +291,6 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se exit(0); } - // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { if(type == 1) { // insertion @@ -617,23 +589,64 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se break; } } + // std::cout << "rotating" << std::endl; rotate(sequencePrint.begin(), sequencePrint.begin() + rotInd, sequencePrint.end()); rotate(blockExistsPrint.begin(), blockExistsPrint.begin() + rotInd, blockExistsPrint.end()); rotate(blockStrandPrint.begin(), blockStrandPrint.begin() + rotInd, blockStrandPrint.end()); } if(sequenceInverted.find(root->identifier) != sequenceInverted.end() && sequenceInverted[root->identifier]) { + // std::cout << "inverting" << std::endl; reverse(sequencePrint.begin(), sequencePrint.end()); reverse(blockExistsPrint.begin(), blockExistsPrint.end()); reverse(blockStrandPrint.begin(), blockStrandPrint.end()); } - - panmanUtils::printSequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); + if (allIndex) { + // bool* checkA; + // bool* checkB; + // *checkA = false; + // *checkB = false; + // int startCoordinate = getUnalignedGlobalCoordinate(std::get<0>(panMATStart), + // std::get<1>(panMATStart), + // std::get<2>(panMATStart), + // std::get<3>(panMATStart), + // sequencePrint, + // blockExistsPrint, + // blockStrandPrint, + // circularSequences[root->identifier], + // checkA + // ); + + // int endCoordinate = getUnalignedGlobalCoordinate(std::get<0>(panMATEnd), + // std::get<1>(panMATEnd), + // std::get<2>(panMATEnd), + // std::get<3>(panMATEnd), + // sequencePrint, + // blockExistsPrint, + // blockStrandPrint, + // circularSequences[root->identifier], + // checkB + // ); + + // if (checkA) { + // startCoordinate = -1; + // } + // if (checkB) { + // endCoordinate = -1; + // } + // std::cout << root->identifier << " " << startCoordinate << " " << endCoordinate << " offsets " << circularSequences[root->identifier] << " " << offset << std::endl; + // std::cout << "printFASTA start" << std::get<0>(panMATStart) << " " << std::get<1>(panMATStart) << " " << std::get<2>(panMATStart) << " " << std::get<3>(panMATStart) << std::endl; + // std::cout << "printFASTA end" << std::get<0>(panMATEnd) << " " << std::get<1>(panMATEnd) << " " << std::get<2>(panMATEnd) << " " << std::get<3>(panMATEnd) << std::endl; + panmanUtils::printSubsequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, panMATStart, panMATEnd, aligned, fout, offset); + } else { + panmanUtils::printSequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); + } } else { // DFS on children for(panmanUtils::Node* child: root->children) { - printFASTAHelper(child, sequence, blockExists, blockStrand, fout, aligned, rootSeq); + printFASTAHelper(child, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); + } } @@ -667,10 +680,13 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se } } } + + // std::cout << "Done iteration for node: " << root->identifier << std::endl; + } void panmanUtils::Tree::printSingleNodeHelper(std::vector &nodeList, int nodeListIndex, sequence_t& sequence, - blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, int panMATStart, int panMATEnd) { + blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd) { panmanUtils::Node* node = nodeList[nodeListIndex--]; @@ -1036,7 +1052,7 @@ void panmanUtils::Tree::printSingleNodeHelper(std::vector &n } } -void panmanUtils::Tree::printFASTA(std::ostream& fout, bool aligned, bool rootSeq) { +void panmanUtils::Tree::printFASTA(std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. std::vector< std::pair< std::vector< std::pair< char, std::vector< char > > >, std::vector< std::vector< std::pair< char, std::vector< char > > > > > > sequence(blocks.size() + 1); std::vector< std::pair< bool, std::vector< bool > > > blockExists(blocks.size() + 1, {false, {}}); @@ -1111,7 +1127,8 @@ void panmanUtils::Tree::printFASTA(std::ostream& fout, bool aligned, bool rootSe } // Run depth first traversal to extract sequences - printFASTAHelper(root, sequence, blockExists, blockStrand, fout, aligned, rootSeq); + + printFASTAHelper(root, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); } @@ -1127,32 +1144,6 @@ void panmanUtils::Tree::printSingleNode(std::ostream& fout, const sequence_t& se nodeList.push_back(newNode); } - // for (int i=nodeList.size()-1; i>=0; i--) std::cout << nodeList[i]->identifier << std::endl; - - // std::cout << std::get<0>(panMATStart) << " " << std::get<1>(panMATStart) << " " << std::get<2>(panMATStart) << " " << std::get<3>(panMATStart) << std::endl; - // std::cout << std::get<0>(panMATEnd) << " " << std::get<1>(panMATEnd) << " " << std::get<2>(panMATEnd) << " " << std::get<3>(panMATEnd) << std::endl; - - int startCoordinate = getUnalignedGlobalCoordinate(std::get<0>(panMATStart), - std::get<1>(panMATStart), - std::get<2>(panMATStart), - std::get<3>(panMATStart), - sequenceRef, - blockExistsRef, - blockStrandRef, - circularSequences[nodeIdentifier] - ); - - int endCoordinate = getUnalignedGlobalCoordinate(std::get<0>(panMATEnd), - std::get<1>(panMATEnd), - std::get<2>(panMATEnd), - std::get<3>(panMATEnd), - sequenceRef, - blockExistsRef, - blockStrandRef, - circularSequences[nodeIdentifier] - ); - // std::cout << startCoordinate << ":" << endCoordinate << std::endl; - // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. std::vector< std::pair< std::vector< std::pair< char, std::vector< char > > >, std::vector< std::vector< std::pair< char, std::vector< char > > > > > > sequence(blocks.size() + 1); std::vector< std::pair< bool, std::vector< bool > > > blockExists(blocks.size() + 1, {false, {}}); @@ -1226,6 +1217,6 @@ void panmanUtils::Tree::printSingleNode(std::ostream& fout, const sequence_t& se } } // Run traversal on nodeList to extract sequences - printSingleNodeHelper(nodeList, (nodeList.size()-1), sequence, blockExists, blockStrand, fout, false, false, startCoordinate, endCoordinate); + printSingleNodeHelper(nodeList, (nodeList.size()-1), sequence, blockExists, blockStrand, fout, false, false, panMATStart, panMATEnd); } diff --git a/src/fitchSankoff.cpp b/src/fitchSankoff.cpp index 41b7c05..9c754af 100644 --- a/src/fitchSankoff.cpp +++ b/src/fitchSankoff.cpp @@ -27,19 +27,23 @@ int panmanUtils::Tree::nucFitchForwardPassOpt( } int panmanUtils::Tree::nucFitchForwardPass(Node* node, - std::unordered_map< std::string, int >& states) { + std::unordered_map< std::string, int >& states, int refState) { if(node->children.size() == 0) { if(states.find(node->identifier) == states.end()) { - //std::cerr << "Node ID not found" << std::endl; + std::cerr << "Node ID not found" << std::endl; return states[node->identifier] = 0; } return states[node->identifier]; } std::vector< int > childStates; for(auto child: node->children) { - childStates.push_back(nucFitchForwardPass(child, states)); + childStates.push_back(nucFitchForwardPass(child, states, refState)); } + //for root int orStates = 0, andStates = childStates[0]; + if (node->parent==nullptr) { + return states[node->identifier] = refState; + } for(auto u: childStates) { orStates |= u; andStates &= u; diff --git a/src/panman.hpp b/src/panman.hpp index 95c523e..4099b91 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -307,10 +307,10 @@ class Tree { // Tree traversal for FASTA writer void printFASTAHelper(panmanUtils::Node* root, sequence_t& sequence, blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, - bool aligned = false, bool rootSeq = false, std::tuple start={-1,-1,-1,-1}, std::tuple end={-1,-1,-1,-1}); + bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); void printSingleNodeHelper(std::vector &nodeList, int nodeListIndex, sequence_t& sequence, - blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, int panMATStart, int panMATEnd); + blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart={-1,-1,-1,-1}, const std::tuple< int, int, int, int >& panMATEnd={-1,-1,-1,-1}); // Merge parent and child nodes when compressing subtree void mergeNodes(Node* par, Node* chi); @@ -399,7 +399,7 @@ class Tree { void protoMATToTree(const panman::Tree::Reader& mainTree); // Fitch Algorithm on Nucleotide mutations - int nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states); + int nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states, int refState=-1); int nucFitchForwardPassOpt(Node* node, std::unordered_map< std::string, int >& states); // Default state is used in rerooting to a tip sequence. It is used to fix the state at // the root @@ -459,7 +459,7 @@ class Tree { // void printSummary(); void printSummary(std::ostream &out); void printBfs(Node* node = nullptr); - void printFASTA(std::ostream& fout, bool aligned = false, bool rootSeq = false); + void printFASTA(std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start={-1,-1,-1,-1}, const std::tuple &end={-1,-1,-1,-1}, bool allIndex = false); void printSingleNode(std::ostream& fout, const sequence_t& sequence, const blockExists_t& blockExists, const blockStrand_t& blockStrand, std::string nodeIdentifier, std::tuple< int, int, int, int > &panMATStart, std::tuple< int, int, int, int > &panMATEnd); @@ -505,7 +505,7 @@ class Tree { int32_t getUnalignedGlobalCoordinate(int32_t primaryBlockId, int32_t secondaryBlockId, int32_t pos, int32_t gapPos, const sequence_t& sequence, const blockExists_t& blockExists, const blockStrand_t& blockStrand, - int circularOffset = 0); + int circularOffset = 0, bool * check = nullptr); std::tuple< int, int, int, int > globalCoordinateToBlockCoordinate( int64_t globalCoordinate, const sequence_t& sequence, diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 224b2cc..3d35383 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -149,7 +149,7 @@ void setupOptionDescriptions() { ("create-network,k", "Create PanMAN with network of trees from single or multiple PanMAN files") ("printMutations,p", "Create PanMAN with network of trees from single or multiple PanMAN files") ("acr,q", "ACR method [fitch(default), mppa]") - ("index", "Generating indexes and print sequence (passed as reference) between x:y") + ("index",po::value< bool >(0), "Generating indexes and print sequence (passed as reference) between x:y") ("printRoot", "Print root sequence") //("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") @@ -567,6 +567,11 @@ void parseAndExecute(int argc, char* argv[]) { optimize = true; } + std::string reference = ""; + if (globalVm.count("reference")) { + reference = globalVm["reference"].as(); + } + std::string newickFileName = globalVm["input-newick"].as< std::string >(); std::cout << "Creating PanMAN from MSA and Newick" << std::endl; @@ -578,10 +583,10 @@ void parseAndExecute(int argc, char* argv[]) { if(!optimize) { T = new panmanUtils::Tree(inputStream, newickInputStream, - panmanUtils::FILE_TYPE::MSA); + panmanUtils::FILE_TYPE::MSA, reference); } else { T = new panmanUtils::Tree(inputStream, newickInputStream, - panmanUtils::FILE_TYPE::MSA_OPTIMIZE); + panmanUtils::FILE_TYPE::MSA_OPTIMIZE, reference); } // checkFunction(T); @@ -664,7 +669,7 @@ void parseAndExecute(int argc, char* argv[]) { std::ostream fout (buf); - T->printFASTA(fout, false); + T->printFASTA(fout, false, true); if(globalVm.count("output-file")) outputFile.close(); } @@ -1000,12 +1005,10 @@ void parseAndExecute(int argc, char* argv[]) { return; } - int treeID; - if(!globalVm.count("treeID")) { - panmanUtils::printError("TreeID not provided!"); - std::cout << globalDesc; - return; - } else treeID = std::stoi(globalVm["treeID"].as< std::string >()); + int treeID = 0; + if(globalVm.count("treeID")) { + treeID = std::stoi(globalVm["treeID"].as< std::string >()); + } panmanUtils::TreeGroup tg = *TG; T = &tg.trees[treeID]; @@ -1261,6 +1264,10 @@ void parseAndExecute(int argc, char* argv[]) { auto fastaStart = std::chrono::high_resolution_clock::now(); for(int i = 0; i < tg.trees.size(); i++) { T = &tg.trees[i]; + if (T->allNodes.find(reference) == T->allNodes.end()) { + std::cout << "Error: reference " << reference << " does not exist in PanMAN\n"; + exit(0); + } if(globalVm.count("output-file")) { std::string fileName = globalVm["output-file"].as< std::string >(); outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".index"); @@ -1270,8 +1277,9 @@ void parseAndExecute(int argc, char* argv[]) { } std::ostream fout (buf); + bool allIndex = globalVm["index"].as< bool >(); - T->extractPanMATIndex(fout, startCoordinate,endCoordinate, reference); + T->extractPanMATIndex(fout, startCoordinate,endCoordinate, reference, allIndex); if(globalVm.count("output-file")) outputFile.close(); } diff --git a/src/panmanUtils.hpp b/src/panmanUtils.hpp index 7f9c0b1..ea161e7 100644 --- a/src/panmanUtils.hpp +++ b/src/panmanUtils.hpp @@ -39,7 +39,9 @@ void printSequenceLines(const sequence_t& sequence, bool aligned, std::ostream& fout, int offset = 0, bool debug = false); void printSubsequenceLines(const sequence_t& sequence,\ - const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, int start, int end, + const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, + const std::tuple& panMATStart, + const std::tuple& panMATEnd, bool aligned, std::ostream& fout, int offset=0, bool debug=false); // Remove '-' character from sequence string From 0d4cc4a3501c02e8cb34e4018d305770586925cd Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 11 Nov 2024 10:03:43 -0800 Subject: [PATCH 046/103] parallel fasta write --- src/annotate.cpp | 10 +- src/fasta.cpp | 55 ++++++++ src/panman.cpp | 300 +++++++++++++++++++++++--------------------- src/panman.hpp | 2 +- src/panmanUtils.cpp | 67 +++++++++- 5 files changed, 281 insertions(+), 153 deletions(-) diff --git a/src/annotate.cpp b/src/annotate.cpp index 33ce7a2..54da675 100644 --- a/src/annotate.cpp +++ b/src/annotate.cpp @@ -29,11 +29,15 @@ void panmanUtils::Tree::annotate(std::ifstream& fin) { } if(allNodes.find(nodeId) == allNodes.end()) { - std::cout << "Node ID not found. Line: " << line << std::endl; + std::cout << "Node ID not found. Line: " << nodeId << " [" << line << "]" << std::endl; + // for (auto a: allNodes) { + // std::cout << a-> + // } return; } - Node* nodeToAnnotate = allNodes[nodeId]; + + std::cout << "node before annotation: " << nodeToAnnotate->identifier << " " << nodeToAnnotate->annotations.size() << std::endl; // Extract annotations for(; i < line.length(); i++) { @@ -58,6 +62,8 @@ void panmanUtils::Tree::annotate(std::ifstream& fin) { word = ""; } + std::cout << "node annotated: " << nodeToAnnotate->identifier << " " << nodeToAnnotate->annotations[0] << std::endl; + } } diff --git a/src/fasta.cpp b/src/fasta.cpp index c1ae522..5e22160 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -1220,3 +1220,58 @@ void panmanUtils::Tree::printSingleNode(std::ostream& fout, const sequence_t& se printSingleNodeHelper(nodeList, (nodeList.size()-1), sequence, blockExists, blockStrand, fout, false, false, panMATStart, panMATEnd); } + +void panmanUtils::Tree::printFASTAParallel(std::ostream& fout, bool aligned) { + + std::mutex fastaMutex; + size_t lineSize = 70; + + tbb::parallel_for_each(allNodes, [&](auto n) { + if(n.second->children.size() == 0) { + std::string sequence; + sequence = getStringFromReference(n.first, aligned); + + fastaMutex.lock(); + fout << '>' << n.first << '\n'; + for(size_t i = 0; i < sequence.size(); i+=lineSize) { + fout << sequence.substr(i, std::min(lineSize, sequence.size() - i)) << '\n'; + } + fastaMutex.unlock(); + } + + }); +} + +void panmanUtils::Tree::printFASTAFromGFA(std::ifstream& fin, std::ofstream& fout) { + std::map< std::string, std::string > nodes; + std::map< std::string, std::vector< std::string > > paths; + std::string line; + while(getline(fin, line, '\n')) { + std::vector< std::string > separatedLine; + stringSplit(line, '\t', separatedLine); + if(separatedLine[0] == "S") { + nodes[separatedLine[1]] = separatedLine[2]; + } else if(separatedLine[0] == "P") { + std::vector< std::string > v; + stringSplit(separatedLine[2], ',', paths[separatedLine[1]]); + } + } + for(auto p: paths) { + fout << ">" << p.first << "\n"; + std::string sequence; + for(auto s: p.second) { + char strand = s[s.length()-1]; + s.pop_back(); + if(strand == '+') { + sequence += nodes[s]; + } else { + for (std::string::reverse_iterator rit=nodes[s].rbegin(); rit!=nodes[s].rend(); ++rit) { + sequence += getComplementCharacter(*rit); + } + } + } + for(size_t i = 0; i < sequence.size(); i+=70) { + fout << sequence.substr(i,std::min((size_t)70, sequence.size() - i)) << '\n'; + } + } +} diff --git a/src/panman.cpp b/src/panman.cpp index 0fb7ba9..09060ff 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -534,6 +534,7 @@ void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, for (auto nodeAnnotations: storedNode[currentIndex].getAnnotations()){ root->annotations.push_back(nodeAnnotations.cStr()); + std::cout << root->identifier << " " << nodeAnnotations.cStr() << std::endl; annotationsToNodes[nodeAnnotations.cStr()].push_back(root->identifier); } @@ -559,6 +560,48 @@ bool panmanUtils::Tree::hasPolytomy(Node* node) { return false; } + +void readFasta(std::ifstream& fin, std::map< std::string, std::string >& sequenceIdsToSequences) { + std::string line; + std::string currentSequence, currentSequenceId; + size_t lineLength = 0; + + while(getline(fin,line,'\n')) { + if(line.length() == 0) { + continue; + } + if(line[0] == '>') { + if(currentSequence.length()) { + if(lineLength == 0) { + lineLength = currentSequence.length(); + } else if(lineLength != currentSequence.length()) { + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; + exit(-1); + } + sequenceIdsToSequences[currentSequenceId] = currentSequence; + } + std::vector< std::string > splitLine; + panmanUtils::stringSplit(line,' ',splitLine); + currentSequenceId = splitLine[0].substr(1); + currentSequence = ""; + } else { + currentSequence += line; + } + } + if(currentSequence.length()) { + if(lineLength != 0 && lineLength != currentSequence.length()) { + std::cerr << "Error: sequence lengths don't match!" << std::endl; + exit(-1); + } else { + lineLength = currentSequence.length(); + } + sequenceIdsToSequences[currentSequenceId] = currentSequence; + } + +} + + + size_t readFastaInBatch(std::ifstream& fin, std::map< std::string, std::string >& sequenceIdsToSequences, size_t &startIndex, size_t batchSize) { std::string line; std::string currentSequence, currentSequenceId; @@ -603,10 +646,6 @@ size_t readFastaInBatch(std::ifstream& fin, std::map< std::string, std::string > std::cout << "Done reading till " << nextStartIndex - 1 << std::endl; - // reset file reader (very important) - fin.clear(); - fin.seekg(0); - return nextStartIndex; } @@ -1197,15 +1236,16 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE // std::cout << root->identifier << std::endl; // ReRoot tree if reference provided if (reference != "") { - if (allNodes.find(reference) == allNodes.end()) { - std::cout << reference << " is not a tip!!" << std::endl; - exit(0); - } - transform(allNodes[reference]); + // if (allNodes.find(reference) == allNodes.end()) { + // std::cout << reference << " is not a tip!!" << std::endl; + // exit(0); + // } + // transform(allNodes[reference]); // std::cout << reference << "\t" << root->identifier << std::endl; consensusSeq = sequenceIdsToSequences[reference]; } else { - for(size_t i = 0; i < lineLength; i++) { + tbb::parallel_for((size_t)0, lineLength, [&](size_t i) { + // for(size_t i = 0; i < lineLength; i++) { bool nonGapFound = false; for(auto u: sequenceIdsToSequences) { if(u.second[i] != '-') { @@ -1217,7 +1257,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE if(!nonGapFound) { emptyPositions.insert(i); } - } + // } + }); for(auto& u: sequenceIdsToSequences) { std::string sequenceString; for(size_t i = 0; i < u.second.length(); i++) { @@ -1228,11 +1269,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE u.second = sequenceString; } } - // std::cout << root->identifier << std::endl; - // std::cout << consensusSeq << std::endl; - blocks.emplace_back(0, consensusSeq); - root->blockMutation.emplace_back(0, std::make_pair(BlockMutationType::BI, false)); - // pos, start, end + tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int8_t,int8_t > > > nonGapMutationsMSA; std::unordered_map< std::string, std::mutex > nodeMutexes; std::unordered_map< size_t, std::mutex > posMutexes; @@ -1248,6 +1285,30 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE int positionCount = 0; tbb::parallel_for((size_t)0, consensusSeq.length(), [&](size_t i) { + // Sankoff + std::unordered_map< std::string, std::vector< int > > stateSets; + std::unordered_map< std::string, int > states; + std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; + + for(const auto& u: sequenceIdsToSequences) { + std::vector< int > currentState(16, SANKOFF_INF); + if(u.second[i] != '-') { + currentState[getCodeFromNucleotide(u.second[i])] = 0; + } else { + currentState[0] = 0; + } + stateSets[u.first] = currentState; + } + nucSankoffForwardPass(root, stateSets); + nucSankoffBackwardPass(root, stateSets, states, getCodeFromNucleotide(consensusSeq[i])); + nucSankoffAssignMutations(root, states, mutations, getCodeFromNucleotide(consensusSeq[i])); + for(auto mutation: mutations) { + nodeMutexes[mutation.first].lock(); + nonGapMutationsMSA[mutation.first].push_back(std::make_tuple(i, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); + nodeMutexes[mutation.first].unlock(); + } + // Fitch + /* std::unordered_map< std::string, int > states; std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; for(const auto& u: sequenceIdsToSequences) { @@ -1269,8 +1330,15 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } posMutexes[i].lock(); posMutexes[i].unlock(); + */ }); + // std::cout << root->identifier << std::endl; + std::cout << consensusSeq << std::endl; + blocks.emplace_back(0, consensusSeq); + root->blockMutation.emplace_back(0, std::make_pair(BlockMutationType::BI, false)); + // pos, start, end + sequenceIdsToSequences.clear(); // saving memory tbb::parallel_for_each(nonGapMutationsMSA, [&](auto& u) { @@ -1303,10 +1371,11 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE root = createTreeFromNewickString(newickString); std::string line; + std::string currentSequence, currentSequenceId; size_t lineLength = 0; + std::string consensusSeq; // Find length of MSA - std::string currentSequence; while(getline(fin,line,'\n')) { if(line.length() == 0) { continue; @@ -1315,9 +1384,14 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE if(currentSequence.length()) { if(lineLength == 0) { lineLength = currentSequence.length(); - break; + } else if(lineLength != currentSequence.length()) { + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; + exit(-1); } } + std::vector< std::string > splitLine; + stringSplit(line,' ',splitLine); + currentSequenceId = splitLine[0].substr(1); currentSequence = ""; } else { currentSequence += line; @@ -1325,96 +1399,81 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } std::cout << "line length: " << lineLength << std::endl; - // reset file reader (very important) - fin.clear(); - fin.seekg(0); - - // set batch size - size_t memory = 128;//GB - size_t batchSize = 100000; - std::cout << "Batch size set to: " << batchSize << std::endl; - - std::string consensusSeq; consensusSeq.resize(lineLength); - size_t startIndex = 0; - tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int,int,int,int,int > > > nonGapMutations; + tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int8_t,int8_t > > > nonGapMutationsMSA; std::unordered_map< std::string, std::mutex > nodeMutexes; - + for(auto u: allNodes) { + nodeMutexes[u.first]; + } - while (true) { - std::map< std::string, std::string > sequenceIdsToSequences; - size_t nextStartIndex = readFastaInBatch(fin, sequenceIdsToSequences, startIndex, batchSize); - std::set< size_t > emptyPositions; + size_t startIndex = 0; + size_t memory = 128;//GB + size_t batchSize = 5000; + size_t nextStartIndex; + + while (startIndex < lineLength) { auto newStart = std::chrono::high_resolution_clock::now(); - tbb::parallel_for((size_t)0, nextStartIndex-startIndex, [&](size_t i) { - bool nonGapFound = false; - std::cout << startIndex+i << std::endl; - for(auto u: sequenceIdsToSequences) { - if(u.second[i] != '-') { - consensusSeq[startIndex+i] = u.second[i]; - nonGapFound = true; - break; - } - } - if(!nonGapFound) { - std::cout << "ideally should not happen\n" << std::endl; - exit(1); - // emptyPositions.insert(i); + std::map< std::string, std::string > sequenceIdsToSequences; + + //reset file read pointer + fin.clear(); // clear bad state after eof + fin.seekg(0); + + nextStartIndex = readFastaInBatch(fin, sequenceIdsToSequences, startIndex, batchSize); + + if (reference != "") { + std::cout << "writing consensus sequences from" << startIndex << " to " << nextStartIndex << std::endl; + for (int i=0; i > stateSets; std::unordered_map< std::string, int > states; std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; + for(const auto& u: sequenceIdsToSequences) { + std::vector< int > currentState(16, SANKOFF_INF); if(u.second[i] != '-') { - states[u.first] = (1 << getCodeFromNucleotide(u.second[i])); + currentState[getCodeFromNucleotide(u.second[i])] = 0; } else { - states[u.first] = 1; + currentState[0] = 0; } + stateSets[u.first] = currentState; } - nucFitchForwardPass(root, states); - nucFitchBackwardPass(root, states, (1 << getCodeFromNucleotide(consensusSeq[startIndex+i]))); - nucFitchAssignMutations(root, states, mutations, (1 << getCodeFromNucleotide(consensusSeq[startIndex+i]))); + nucSankoffForwardPass(root, stateSets); + nucSankoffBackwardPass(root, stateSets, states, getCodeFromNucleotide(consensusSeq[startIndex + i])); + nucSankoffAssignMutations(root, states, mutations, getCodeFromNucleotide(consensusSeq[startIndex + i])); for(auto mutation: mutations) { nodeMutexes[mutation.first].lock(); - nonGapMutations[mutation.first].push_back(std::make_tuple(0, -1, startIndex+i, -1, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); + nonGapMutationsMSA[mutation.first].push_back(std::make_tuple(startIndex + i, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); nodeMutexes[mutation.first].unlock(); } }); @@ -1422,20 +1481,24 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE newTime = newEnd - newStart; std::cout << "Processed characters from " << startIndex << " to " << nextStartIndex - 1 << " in " << newTime.count() << " nanoseconds" << std::endl; startIndex = nextStartIndex; - if (startIndex>=lineLength) break; } - std::cout << "consensus seq len" << consensusSeq.size() << std::endl; + blocks.emplace_back(0, consensusSeq); root->blockMutation.emplace_back(0, std::make_pair(BlockMutationType::BI, false)); + std::cout << consensusSeq << std::endl; - tbb::parallel_for_each(nonGapMutations, [&](auto& u) { + + tbb::parallel_for_each(nonGapMutationsMSA, [&](auto& u) { + // for(auto &u: nonGapMutationsMSA){ nodeMutexes[u.first].lock(); std::sort(u.second.begin(), u.second.end()); nodeMutexes[u.first].unlock(); size_t currentStart = 0; for(size_t i = 1; i < u.second.size(); i++) { - if(i - currentStart == 6 || std::get<0>(u.second[i]) != std::get<0>(u.second[i-1]) || std::get<2>(u.second[i]) != std::get<2>(u.second[i-1])+1 || std::get<4>(u.second[i]) != std::get<4>(u.second[i-1])) { + if(i - currentStart == 6 || std::get<0>(u.second[i]) != std::get<0>(u.second[i-1])+1 || std::get<1>(u.second[i]) != std::get<1>(u.second[i-1])) { nodeMutexes[u.first].lock(); + // if (std::get<0>(u.second[currentStart]) == 0) + // std::cout << u.first << std::endl; allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, i); nodeMutexes[u.first].unlock(); currentStart = i; @@ -1445,6 +1508,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE nodeMutexes[u.first].lock(); allNodes[u.first]->nucMutation.emplace_back(u.second, currentStart, u.second.size()); nodeMutexes[u.first].unlock(); + // } }); @@ -4664,62 +4728,6 @@ std::string panmanUtils::Tree::getSequenceFromVCF(std::string sequenceId, std::i } -void panmanUtils::Tree::printFASTAParallel(std::ofstream& fout, bool aligned) { - - std::mutex fastaMutex; - size_t lineSize = 70; - - tbb::parallel_for_each(allNodes, [&](auto n) { - if(n.second->children.size() == 0) { - std::string sequence; - sequence = getStringFromReference(n.first, aligned); - - fastaMutex.lock(); - fout << '>' << n.first << '\n'; - for(size_t i = 0; i < sequence.size(); i+=lineSize) { - fout << sequence.substr(i, std::min(lineSize, sequence.size() - i)) << '\n'; - } - fastaMutex.unlock(); - } - - }); -} - - -void panmanUtils::Tree::printFASTAFromGFA(std::ifstream& fin, std::ofstream& fout) { - std::map< std::string, std::string > nodes; - std::map< std::string, std::vector< std::string > > paths; - std::string line; - while(getline(fin, line, '\n')) { - std::vector< std::string > separatedLine; - stringSplit(line, '\t', separatedLine); - if(separatedLine[0] == "S") { - nodes[separatedLine[1]] = separatedLine[2]; - } else if(separatedLine[0] == "P") { - std::vector< std::string > v; - stringSplit(separatedLine[2], ',', paths[separatedLine[1]]); - } - } - for(auto p: paths) { - fout << ">" << p.first << "\n"; - std::string sequence; - for(auto s: p.second) { - char strand = s[s.length()-1]; - s.pop_back(); - if(strand == '+') { - sequence += nodes[s]; - } else { - for (std::string::reverse_iterator rit=nodes[s].rbegin(); rit!=nodes[s].rend(); ++rit) { - sequence += getComplementCharacter(*rit); - } - } - } - for(size_t i = 0; i < sequence.size(); i+=70) { - fout << sequence.substr(i,std::min((size_t)70, sequence.size() - i)) << '\n'; - } - } -} - int32_t panmanUtils::Tree::getUnalignedGlobalCoordinate(int32_t primaryBlockId, int32_t secondaryBlockId, int32_t pos, int32_t gapPos, const sequence_t& sequence, const blockExists_t& blockExists, const blockStrand_t& blockStrand, int circularOffset, bool* check) { diff --git a/src/panman.hpp b/src/panman.hpp index 4099b91..a721859 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -463,7 +463,7 @@ class Tree { void printSingleNode(std::ostream& fout, const sequence_t& sequence, const blockExists_t& blockExists, const blockStrand_t& blockStrand, std::string nodeIdentifier, std::tuple< int, int, int, int > &panMATStart, std::tuple< int, int, int, int > &panMATEnd); - void printFASTAParallel(std::ofstream& fout, bool aligned = false); + void printFASTAParallel(std::ostream& fout, bool aligned = false); void printMAF(std::ostream& fout); void printMAFNew(std::ostream& fout); diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 3d35383..ce25397 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -160,6 +160,7 @@ void setupOptionDescriptions() { ("treeID,d", po::value< std::string >(), "Tree ID, required for --vcf") ("input-file,i", po::value< std::string >(), "Path to the input file, required for --subnet, --annotate, and --create-network") ("output-file,o", po::value< std::string >(), "Prefix of the output file name") + ("threads", po::value< std::int32_t >(), "Number of threads") // ("complexmutation-file", po::value< std::string >(), "File path of complex mutation file for tree group") // ("tree-group", po::value< std::vector< std::string > >()->multitoken(), "File paths of PMATs to generate tree group") @@ -398,6 +399,11 @@ void parseAndExecute(int argc, char* argv[]) { .positional(globalPositionArgumentDesc).allow_unregistered().run(), globalVm); po::notify(globalVm); + int threads = 16; + if (globalVm.count("threads")) threads = globalVm["threads"].as(); + tbb::task_scheduler_init init(threads); + + // If the data structure loaded into memory is a PanMAT, it is pointed to by T panmanUtils::Tree *T = nullptr; @@ -668,8 +674,8 @@ void parseAndExecute(int argc, char* argv[]) { } std::ostream fout (buf); - - T->printFASTA(fout, false, true); + T->printFASTAParallel(fout, false); + // T->printFASTA(fout, false, false); if(globalVm.count("output-file")) outputFile.close(); } @@ -1028,6 +1034,9 @@ void parseAndExecute(int argc, char* argv[]) { auto annotateEnd = std::chrono::high_resolution_clock::now(); std::chrono::nanoseconds annotateTime = annotateEnd - annotateStart; std::cout << "Annotate time: " << annotateTime.count() << " nanoseconds\n"; + + writePanMAN(globalVm,TG); + } else if (globalVm.count("reroot")) { // Reroot the PanMAT to given sequence @@ -1323,7 +1332,58 @@ void parseAndExecute(int argc, char* argv[]) { return; } else { - return; + char** splitCommandArray; + + while(true) { + std::cout << "> "; + + std::string command; + std::getline (std::cin, command); + stripStringInPlace(command); + + // Split command by spaces + std::vector< std::string > splitCommand; + panmanUtils::stringSplit(command, ' ', splitCommand); + splitCommandArray = new char*[splitCommand.size()]; + for(size_t i = 0; i < splitCommand.size(); i++) { + splitCommandArray[i] = new char[splitCommand[i].length() + 1]; + strcpy(splitCommandArray[i], splitCommand[i].c_str()); + } + + try{ + if(strcmp(splitCommandArray[0], "use") == 0) { + // If command was use, select the PanMAT with the given index from the PanMAN + po::variables_map useVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(useDesc) + .run(), useVm); + + if(useVm.count("help")) { + std::cout << useDesc; + } else { + po::notify(useVm); + size_t treeIndex = useVm["index"].as< size_t >(); + if(TG == nullptr) { + std::cout << "No PanMAN loaded" << std::endl; + } else { + if(TG->trees.size() > treeIndex) { + T = &TG->trees[treeIndex]; + std::cout << "PanMAT loaded" << std::endl; + } else { + std::cout << "PanMAT with index " << treeIndex << " doesn't exist." + " There are only " << TG->trees.size() << " PanMATs." << std::endl; + } + } + } + } else if (strcmp(splitCommandArray[0], "root") == 0) { + buf = std::cout.rdbuf(); + std::ostream fout (buf); + TG->trees[0].printFASTA(fout, true, true); + } + } catch (std::exception& e) { + std::cout << e.what() << std::endl; + } + } } } @@ -1467,6 +1527,5 @@ void debuggingCode() { } int main(int argc, char* argv[]) { - tbb::task_scheduler_init init(32); parseAndExecute(argc, argv); } From 473e48fc03023503e6c34713371b312af15a05c7 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 11 Nov 2024 10:20:22 -0800 Subject: [PATCH 047/103] parallel fasta write --- src/fasta.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fasta.cpp b/src/fasta.cpp index 5e22160..de2d7e9 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -1225,6 +1225,7 @@ void panmanUtils::Tree::printFASTAParallel(std::ostream& fout, bool aligned) { std::mutex fastaMutex; size_t lineSize = 70; + std::cout << tbb::this_task_arena::max_concurrency() << std::endl; tbb::parallel_for_each(allNodes, [&](auto n) { if(n.second->children.size() == 0) { From 441611376801d96c0c4011fb2eafaee6232fd338 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 11 Nov 2024 10:29:22 -0800 Subject: [PATCH 048/103] parallel MSA --- src/panmanUtils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index ce25397..17af31f 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -708,7 +708,8 @@ void parseAndExecute(int argc, char* argv[]) { std::ostream fout (buf); - T->printFASTA(fout, true); + // T->printFASTA(fout, true); + T->printFASTAParallel(fout, true); if(globalVm.count("output-file")) outputFile.close(); From 3305e542624fed3ff6ff57e641701c7c73a33769 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 11 Nov 2024 10:54:21 -0800 Subject: [PATCH 049/103] moving build script to base branch --- scripts/build_panman.sh | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100755 scripts/build_panman.sh diff --git a/scripts/build_panman.sh b/scripts/build_panman.sh deleted file mode 100755 index 83c695b..0000000 --- a/scripts/build_panman.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -## Defines -PANMAN_HOME=/home/panman -PANMAN_BUILD=/home/panman/build -DATASET_PATH=/home/dataset -DATASET=sars_20 -PANGRAPH_HOME=/home/pangraph/pangraph.sh -PANGRAPH_OUTPUT=$PANMAN_HOME/build/pangraph -panmanUtils=$PANMAN_BUILD/panmanUtils - -cd $PANMAN_BUILD - -##### Commands generate PanGraph (JSON) and Tree Topology (Newick) from raw sequences in FASTA format #### -mkdir -p pangraph -echo "Building PanGraph..." -$PANGRAPH_HOME "$DATASET_PATH/$DATASET.fa" "$PANGRAPH_OUTPUT/$DATASET.json" 2> "$PANGRAPH_OUTPUT/$DATASET.nwk" -echo $(cat "$PANGRAPH_OUTPUT/$DATASET.nwk" | grep "tree" | awk '{split($0,a,"tree: "); print a[2]}') > $PANGRAPH_OUTPUT/$DATASET.nwk - -#### Run panmanUtils to construct PanMAN using PanGraph #### -echo "Building PanMAN..." -$panmanUtils -P $PANGRAPH_OUTPUT/$DATASET.json -N $PANGRAPH_OUTPUT/$DATASET.nwk -o $DATASET \ No newline at end of file From 9d1e4ba2398697c788cbd09596e2bcbe76b978cf Mon Sep 17 00:00:00 2001 From: swalia Date: Mon, 11 Nov 2024 11:30:52 -0800 Subject: [PATCH 050/103] updated wiki --- docs/construction.md | 39 +++++++++++++------------- docs/images/interactive_mode.png | Bin 0 -> 31828 bytes docs/index.md | 11 ++++---- docs/install.md | 8 +++--- docs/utils.md | 27 ++++++++++++++---- scripts/build_panman.sh | 46 +++++++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 35 deletions(-) create mode 100644 docs/images/interactive_mode.png create mode 100644 scripts/build_panman.sh diff --git a/docs/construction.md b/docs/construction.md index 5de7cbb..9c70b74 100644 --- a/docs/construction.md +++ b/docs/construction.md @@ -2,7 +2,7 @@ Here, we will learn to build PanMAN from various input formats. -**Step 0:** The Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error +**Step 0:** The Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error ```bash # enter into the panman directory (assuming $PANMAN directs to the panman repository directory) cd $PANMAN_HOME @@ -13,14 +13,14 @@ cd $PANMAN_HOME/build ``` ### Building PanMAN from PanGraph -**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. Alternatively, users can provide custom PanGraph (JSON) and tree topology (Newick format) files to build a panman. +**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom PanGraph (JSON) and tree topology (Newick format) files to build a panman. -```bash + -**Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: +**Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: ```bash cd $PANMAN_HOME/build @@ -30,14 +30,14 @@ The above command will run panmanUtils program and build `sars_20.panman` ### Building PanMAN from GFA -**Step 1:** Check if `sars_20.gfa` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. Alternatively, users can provide custom GFA and tree topology (Newick format) files to build a panman. +**Step 1:** Check if `sars_20.gfa` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom GFA and tree topology (Newick format) files to build a panman. -```bash + -**Step 2:** Run panmanUtils with the following command to build a panman from GFA: +**Step 2:** Run panmanUtils with the following command to build a panman from GFA: ```bash cd $PANMAN_HOME/build @@ -47,14 +47,14 @@ The above command will run panmanUtils program and build `sars_20.panman` ### Building PanMAN from MSA (FASTA format) -**Step 1:** Check if `sars_20.msa` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. Alternatively, users can provide custom MSA (FASTA format) and tree topology (Newick format) files to build a panman. - +**Step 1:** Check if `sars_20.msa` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom MSA (FASTA format) and tree topology (Newick format) files to build a panman. + -**Step 2:** Run panmanUtils with the following command to build a panman from GFA: +**Step 2:** Run panmanUtils with the following command to build a panman from GFA: ```bash cd $PANMAN_HOME/build @@ -66,21 +66,20 @@ The above command will run panmanUtils program and build `sars_20.panman` We provide scripts to construct panmanUtils inputs (PanGraph/GFA/MSA and Newick) from raw sequences (FASTA format), followed by building a panman. !!!Note - This script uses the PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively. MashTree is not equipped when using PanGraph as input since the PanGraph tool constructs the tree topology too. + This script uses various tools such as PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively. The script is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)). ß -**Step 1:** Check if the `sars_20.fa` file exists in `test` directory. Otherwise, follow the instructions to download the dataset. Alternatively, users can provide custom raw sequences (FASTA format) to build a panman. +**Step 1:** Check if the `sars_20.fa` file exists in `test` directory. Alternatively, users can provide custom raw sequences (FASTA format) to build a panman. -```bash + **Step 2:** Run the following command to construct a panman from raw sequences. ```bash cd $PANMAN_HOME/scripts chmod +x build_panman.sh -./build_panman.sh +./build_panman.sh [pangraph or gfa or msa] ``` -!!!Note - The above script is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)) \ No newline at end of file + \ No newline at end of file diff --git a/docs/images/interactive_mode.png b/docs/images/interactive_mode.png new file mode 100644 index 0000000000000000000000000000000000000000..ef87ec3b6817d95cc4d0b42cf0f413958fb75d41 GIT binary patch literal 31828 zcma&N1z23mwl#_bcXtWy1PKm}LvVL@cXtWy9^678XmEFJ+#$HTyYo7GpL_3l|J&!k zuj^Y~tJf;8Sygk6IXXf?P67!YA07-04C#xcs1g_$R3hj;85SD!4d=Qt1qOyFXDK40 z@I^#~RKdyK%+kgb3`{a2MIA;%8Hk;ut@QB&tf-*Wfh=kQx}X#+<4r6n-A6c7sc1N% z{#d5U4J|Y@2KpBGf<LUC?j{qD5#JWS4Z8G;Y?J)G~ZqCYhUO6&iAax{VZqm zvvx?Z_9W(%y@Cv|oiA6W)!lF9Kp-uH5~l*>2SEtzfH9n*qO5}h2e1)`=liQCD#7H_ z&oD*vfaiDXDfGx42(WmI@R<-NA>zI;@Gk8SN}2*-qO?0>=DX4|)+ zo7j|c>YMbz9GBHP;kjUfYWvrw>0lN&LOH6WLtlAZa2XvEVf)d;UrFzPBbgt+dZ6`j zOhS);Q9r7k>6`|>WSTvNQ|0Dz2dP#gVmk$BFpcd_JOXAeJEiq1jXaR8HRftfK2{HZ zGhEBr8HH6gat&XXnkFwKorebN@9z=vrZh976-HOND~S;t58;NC38w}+Z+nRNl*f~x zLRWw6!%JKly7-t`fan-M4ZVj_fx3Ua^>IG9Nt^;DnxKZ8G82uL?|cjqFO99w{n|8G zfaVrJ?}W}p=PSc2-A6DzNQh9ol|=eOm{5UKF8ZqmeQtiel3VQ6mlOa zL3hEjM=1jvh({=^d0)ToJ;4cuOuzUj&ca6?C1CRfkpdc3BwR>Tdj;&i zyZyq!<2HB>&wQJ>*U_h%nvc>2l^D$ql?TrKM^uzaVOumHH9q&U5fINfP-`wbMFjF6L3VTsA3;8VdY>(!1`!)20Mh-KXHFQg&8joM8SFYDp zR4o0t>bmgivoFQ^NO3!CcTw;jRkDeJwFBvmt|GnM_>?Hk3pH0F%#s+H&!ylHM$M>92nD9BD+GHiJ= z2f{%oHzzl6!N*s{_BFkJL_c*@vt=ARU=^50E+H;1*Ift#KU^2M`-2qMQ%J30uY43f z7)~raW(@>Z7(MfchrPVs)nb(BK<;0Gmt2sX8&EHTq_9wmq{I}#mXps6P2;8p*xz8V zgN+p6)q-#H&~2cWg^(5zkb}f0A>@Kdx+xu?R>0ghj}9Rwgh)J)1V~|pg)LwqrzB)h zrTd5-#TUa-(8IryS4*-chMJI}iw2I0q>>g5X?;ik9Xd+}6JI!FxkEM`xFSSM#y^xh z}y;FgbHTc>}!^At3S067bVd zW)#!8YY`VGsz>P;qD&}bm((w(a{RKvj7{f`P*0v~OQcYzX zAm1CYvSf0lH3c=LH^q5GE(-a`oKPo7K2dlla?(cjQST;2ej)hcElK@F{fqARMw8Dq zaKFjN9vNE$Gwqmg|su#1uvU;9lD&bf0F0QPwD;`&sS5+^mQduwZ zlX{JPqQ{Rh6T2=fn!K{4x2&&YtAnX)#79JuK*|YKFwPfG5f2_vjlpq&Urij(moLPb z9ytsngzNRu>m)W?SQC^ZQi=vzy`P&Z0ye>igb!-jI@t$$7#cM!pX`3&yk34 z%g$pwYdy+-$2QKv#3o?4V%+oTk_9a@IX#R0F{Wr_>z9)zR(Wl?fU?L8s&dF7$Bf$y zirKswoSB%}^XOTo^4RY^ygk#sialR!Dr_|DYlb5Rzv}NAHH@_koEkZeKE|fDDJJ!! zrBkKLS`!)Ox)!?Tz7wHl+tzF|KbH1d7^hNgGmTq+-$fEx?OUaps++q1Ca!m{#aXs* zILDR1-DivVccY#U|hHzb&0*LJWz$_@AqO(tzN8COeE({ zZ6Obvt-zk|*^@i&HV&~*w||Ubi4jc2l(ooySrpYCHDnq!Vyhl%{q>l2L2W_a?#zz0 z(W{Zz9pRq+UiPeUk>|wX4*m`bB`B? z`z5Y@tnG1axsBIXL%>pC{n_Oypn~V7+`^EH4;$_`| zzxlek7~)sJMnGb~o#1KL*RJ6%7$GyEbRkzE-|ot8=V18|RN)@s`L8Qmc$XjAwDj~# zyboR`YYmSK9Su_q<9beeOgG2SEJQ8(TEe{(QWFw~253cMm+wfn ziD!_rlloXh@#941Fc+Q6^fkFsWnp$}HdDExcs$3;F{8U~#d5HCrue#;M2h@7WznhJ z^HAKD`t|dS=V9jb^9P-7=x*oy++Q+_Z4fd zk@{v~CiY`~Nf;@$^yZ|TwC?)E+{}#UC8FAi7BOVHKe{m$HLbk(l~C6@Omn>-&(lLFDJz0#H^#jGS%+v@5J(7?(o^ViWvcjyccBsMoSaSP;;|sjkeAR#j8FTX;G=W=?HQwqF%o&0Cz*MAMt9 zn=;xyiXow|rL$LgR6kudX|jC$!I>G7S!H**tkgMYVSPA%zaG`u+0N*pEmE)Q`slWG z7j!D`PI1Qd4*49mlnu=*?e4ck`Flo)grr~(p>&1hu$0Vj$@8@ z)_!?#MZJaOME&IbRB$OU6Q&orf!K(o6ky}&F@rXckO=+VP)3n6UZfACfa7WEo%gO64 zInufQ2aIpJ$M>?#HhP3_z2|Wa9Y9}QW>aREH=RqV_qobWoj0q~xHJC?p7Yx-NC5KyX_90g@N2u7E3xB=D}>RD$PMf+r2}5Zwsang&NKV85ohK8ehz0 zWx?n`_po3e!12MLKzHDvO8}hUzwgDtX}}==dJX{w7G??d;a_RwK=GeXJm~t9=kFLY zF%%32^a~wydE`O-S8AxlJjnmL2TKOM0~1mf`SJx6D;qnRn%X&A*t-}*d_V?0fOC-4 zbOr;%ru=h(e^H{i0F}RBsiNVcAuGdUY;VhCXku?<%H(0|@TVLwJ`Wzyt*xnxA*qM0 zjh!=(2S3?gDR@Ble;zZFk^Ys$#hRZ?Lso%Q#NNr2l!NIL(#;wQ6kadF^bW_EXXXL4s_vUf6PX5r@MX8y#=%*x6LO2O#tY3E|-!D#19{&yk& zRgS2sv$2zS6gmHQ71;t6QKBGXHtQ%)<1E z`M=8sW##+xlt;nR!_-Do)Y29-W}rF*SlGGv{!0Jg=sc3l`7nvk=1k3FYwfMx_gWUt z_;$x9Cylk<_j_jMNuu$rWA2|$iN3Ou-;zsKw5 z?Y_{MfZrPc?pVHnCXply)_LUrN9#Jsx*|>V(RQBx*R*_lu)oM2Sx5}di2v6-!|y6q%9ph1n$=qcpm0xT3@Y3_PsPcQyyJ&9&jq0cfz85Dfl-!l{R>|8cf1Hc$=3 zu7Z073>eRsTXE=;X$uSAg>nTxnXPc?;!EIy^M0t*YlnVx5`d~^sg5Tzg>P~@PStfx z7`&p++xWjoLL3qVQ=|#U_1)^|GS1-z%$s}Rx$t=T?~VDf%%Bvk?(WGvLlLw__=iGY z`X#yP=GQQZHzwUS*k%YWf!Y%Rxd(z)B0iJEt(D%38nDdH3WIUFP18~Hg1QneGUoP@RCQCKhNCoLtO2C}= z$D9?8rx?*o-k__rpn3r$&{HKUX8}x%ZwsyWg$U@*Z-S^zE{Etl<2kx@rz#0DSQ>91 zYzPfL;DU<>p#4L%Q7yQUuS_BFHB4kK5=+&Ae#3VpmM9KtQZ3tK`pG&3vKZh4`%ob1 z_u$n__|_5i;sxDirp9qgcSr60WMd547XHXlj+2O(2ovcD&r1 z|4*$6K=b&9=J9|Dl>cV*83JoxbsKA6J_zcrmk;k7CqG2FUv)9g|M2A42U7VSL#%w7Zg+)S@|H9S3yMx{}@frcTA!R)!Lh~R$MT^nl225 zE2dK|D}_QK<^n@_d&L3i4#@Wf=Ry&QMfpAKIvy+2zG4ne>sosGn|pRW7qT_BK(x^4YE_;n$M`R=`O-LLnO zAMrhV;BqWzJL;iiI|_zCOP7U(pYd_R3Jp4%G`dUed?TWkb|)*%7(EeyotR4WBB>q(=8otG zbFkt_lT-kn>O{{%)p`KdNZQX7r@c`)*Q0sWUL0nn7sz@3(@15Q}cA)x?B-w*~@PTz-TFj%%rOy6D3xWdX-RSHaybmScnr4dTLwU%RE1cC+W15R`kw-E7Ggl-hWyhRZ#H@WCb-$K zc%6dU-V57=5;!W2*wxUKvCczM3ab__qqJ=buy%j6`nyQv4$AXQ{b(^qbfk&>a~ZAc z*Z_SlftZsJ3MQ~S|5((&WNbnr8r2brZM$9NaC-Rk`JQp`VZC1(GZE65-}vQv6V6sK zcl7D#yzaO22>{b)u{e%#ec*ELfxP9XQgKa%^s9UhLvxVYz1=g9Q?!WZc;r>v^4m7- zrOdE5`P;z5q+zd%Xy#wwjt>}AV#9Y z8z7I%I}mFc@DE{QGakH3M|(2$9-`T~tP)nC8hoPB4k=jRS;iH|@n{wfR+9)KsUsVf zcPa*4H>FG^Fs4at`2-#TDP+5epXW?z!(s>R!Da~ca|XYJ8}9qAI$7MyXNqoIOY$Ii zD529O*RAUtPA5~vDl^#xjY=PGiZog3=Zv((y?Mf?;(Al*g-G^;PY7grK4_ zHK7DDu@9E@orAH4>Lt_xuQ4u%Rr7;Q;@iVmbeJFN8J=3Ps@q++%?LS4HyzNqT#qna ze0Q;|ASTK1kPEa?s284oeX_vqY!@*85$(W#)Y78bZhdbziMqiS~LqJ=auheDKst-aY;@!fpLd>7-u+H|WkC zYj0@j^K)x=BrV?tnCNiX1C=6IsncxmP@<^NE-ovXK*nveS_1`r_l_A$!dtmlESrga zS4gP9T{|L{Gi57doowzD96(}k@K|H|$#YBe<3vS{C-xpAdzyK{dYhv$tX{hoyK03v zQr;z_V!?+j9#8na*K3r~=V$wbDkz7k`Jfiwkz8ctII>K~icaC)k;x7>p3O^baqqQH^nkzc2~@Y9kH&kC1HT2ue6UOtvFOj2_Ry>85=G;o z6keQ`eQwz8!Dz7sAvot18-83)GepFL1^Hhq6%r^eg;xbfYg=V-7@|B-ULCjCbt3NA z&ikW*B#yRImDlQ(1w;l*=&45F??7}M7`a$u?a&}N|trV1O6Kq8WVlnf3w^KeTeYarQy=lH?@)ZjC z79^l8*6hCSzg`qAzAl!-+&WyU?5Wq}5@C@vuwCK4#*y8r?DwFj9y@IH!qr^+!u_n& zWVa1XG=9*lt6Ad|f-nyP@RY6QODTs7{2h01Jk^^EsvQ$>Naj1^UEwP2Rw`g>ma%GY z4f+l~EMQYgr%c~g8*~R1*qaWgpn2ADn2aYH@OLtj3GY}sJ&b#Y-mdj&J>LV~F>ij7 zs2#U=F8tYWcr#I=F}C|yyZ#|uIE(~DiLt&e#nuM)2lU@UYK6B4V;j3{x*2QKQ;8xG zy#EI2cv*abYDE@?>aA|JLkZ+z)#1{7e0&h4s^xh<3XLGh?oOR)x0wtN6N`@WEoKvn zry{Xauz#q+=bIiL&N#xnR;;(dm8z9hS`D5)t?vh;Y-Cok(_1Q3SOJ%K=!q-$$0+Y{ zu=cI|PnX=61{~|WnAA$*wFt3*v5bkP2%@FY!z6jrT3DZMU*YYE3hszB8%^Rnanx zR6@Y5uml_ez(Qki?X)}rl_4pYq%+U==VgzbTbpqvQ`T)S&2K>Lmf6_t+dSAx7>|u? zA0Rf+zVn&MCq%7hY3_E8>0&uL-3k*>+V!=nbG{~T?0TYm3(66@k6uI=%-1`L51yg- zRsbEP`}>HU-_WryZsq-)Lp!G)XyrxUtoH9d?pNB7UN!sPp92EBbPItX+wlv#p$C|I zDgYEZVLn}MW$u9N=RfvxCrk*~9}C)}_N|E~F&p+Gbu2R<*)D9Ma<@sg!>~|XYxd?$ zMUb4VC*LY0k`76!{On3N!Db*p^I0}w|8vQ=JZm&Nf4imfbo_?r!4ius0cY&Chbw6} z)mgsUJ6du=tm3&zT?pMZy79T;P=C3+*7yP;AuBpsM3lT|XljTW0R~K6?OBXjCcRc@ zkI9tqk-%UWi+xP3e%k~{V ze>`d@*o1V(f=A;5FlBN-W9a!Dni{i=Jlq~HeAMRCv(SZKm~@41ru$_Dunt9Z*d9Pi zS)+6zhzH+Z)8p|Jzh|_-qLZ{dbr*LBe}B16%Xs6aV|dQrnm*F1_TlCupSloD7`-Z% zQ0>*vg+CyMTD_Q+a_Dv5>+u}QS$zs#zCH*wKP(WVOP>fufvMZ!Ok|JAtglMKiMpL= zBGwVa8#DP>$aE~kUI}wqhFlNObFyVQjsxTw7s~}LTpAAwzdxX;b-q3pA3ceW5H5BC zK%4U?mvq|1dM7t`tIqVgHD;X#%#6>{-$=A{js1OfL7S`DgSBUbwLfh!u~`VxnyLG zS&0MLdXnqWLa>wRh|u)+FRO8tU*KvaE36dC@s~M&nq`Xmc>#r4;ZsOC6{c{oTp?53 z6+c0zV3_PmEC{Q#DUysbHT!B#Nvbt@qYz;Zqt~{aO=r<0?%u6V&0Lv^5feASgtc8H zWHI~uae-9WBJ|`J1arAGIcxNbE`G^WhV<~Htnc#a=x6=f3THwgPSsoeVjV2M?8dY# zqWwU73l?zgW?gdGa>dnWyPPgt-~&ftk+JSgh>SYZ4S9!4zZ9Y(%1%^?A^uv9?UeAX zv`q8CJk+pycBPbY4_!Vm?lbWATLR6*^Y)Hsf>1*FR9WN=IT_jP0ZEC1 zT~vojMP(A#oftee=I=GSM9sGkUzq}BGdT(X_gYq7cPADp#OLbTOf+%t)V|L>Y1@Nk zzpL(jAgSbE^_Z;i<>GNB768A6IRTY02{agLX|1<{4U$wO{Woosm2mn#;IB_NiiOvo zs#20UO(qR+Kl^;k*cVMGKR-tbZy{c=r#^1C2|PW1D!v_CXytqBn7;ZU&`7eF_Q~D2 z2efwO)&VQPM9xPB|36`El<<9a^RKDR_1{b6RQZGYY(JU zrpG`0RPy_vr7MjdLf2Cz#(j5F+CPSG5>6GBh3ylT*f`JJfc(HD@7s{Bm6!kRTSgbBoXj3r!tf@vpFapJTcKiTB zjo`J|?Y6L@qEZW0eUzQc9$MWCH#3<#zdl||=D6qaNh~)QQcmnnj*SGqKF_rnW^xoh zM_Hx19Eb?g9R|l!|FKr3HC*wo482q6)I(*L8xF5x66~jl`oa#L0-1Q?&j&4MqQz#L zI_CkJ)mlYTs*Ato%=p9mZ%NbZh;aoNe$ys)623$EOsgwXFbj)0(N9%<42I4p^K8Oz zRH7)m-loqYsaremdknnpKdZ?gdDD46S=#6O4;H7QXqUAZtLxn?Fln+@0!5?U3af8m zAmfe%MKMUTNwv8pqEoFFzDBI3F>(%LFjnN6O7`ozOs!kArCKDEF}LQ_Vxv9)7p-%5 zNn_k{ML*Kn*!!!{l(3Lh4XCHtTP0Rli@!|E66(SUu=>TLcD{YnP+_O0Nv$e0^(a;O zDe7x1s3stQ7`fB&D$spNWNR|RA?KVAHJrnlHs|JD!=%3Z=#WLCL0wnDkT_CE=IP%? zmM(xkISA28PkF{M$s~^nWbntgx^w?6;gn)kQnG4OQ9>!DOeGuo-u@gbH7k{HGI1-f zR<0(b`FjHxR`kr$B=XVFJhFlek1-Jt(I}bI>42t1Pfe2;RzT+YJ%;SD@6wnHkRkjX z=DOFa{eJ(+9B`F2^3QlLpBL=Icggt{@4FLCjfgjuLkSNJ3!0PVR`n63^TV0qL6P>m zg4Upy7MI%aa*f*o^G~#One0}u`PK_RcYyM?ahz)bUPwpV-5w#TBz1=~MPV*@Y=U9? zxnc1e2K8AfTVq3o<&tk^(-a=Zr;RaUe32sp`R#{Qs>U@+^B4;dS88tVk&O3|Chh&K zw#&t*&uTb8Sr8U9d(!6Nu%h^)kg%A>*MUJTAH?bH$yZN6kA>`3chW)=p|c3G^xX+erL?}WgQWtyHtVRG-Qn&#k4ADPsBMgn zj8EVKZwKU`&3;JU-9J%R!K7UFPK%QWFaRTWJP-DpRm(n@UZy3I{hq5A3lh63jYriP zA%OLe*@tO)$rCXMhTnV{ibtCsn21BZdJ)W>9Fy_c8hO2SeZ`P*`bXpvgca(b3hu^l zop*{1#?qeOzLESK>02Mn^*bubX5@8`~)PTcv~D-8NmJfL&J zH-48}^J+&b9Hf7nF-^>ss^h%mYk$ywb30k09G-sg3IZ8XwXUwKp&)dm&T@v%-PqXH z@$7e58jXC76(NSE-b?ixR2Wq`j79s|bk0gg>mQ|CG06QUp}g?3*^0+V+Z!ZI1fPp- zep&Hx7094BYQG1K{&ugSxpR=CQPw}<9~#iadUyeX$Z}~5dKmBl``V53v|!~+{g8vA z**#+Lyrtwb8QjnAik;$P84b5nJZj0epcNKUptzUPQ?u3Puu0bNf`7c!4>~@zD0qJm zptr~wC>2>@Eqfgd3n!~$0dYC6zJ?7(u#$_%xdGEg?%y^<6z>#ODGHyFp-?hZo&&=a z=**dQYffud{A({pf7e1$7Nq-C1^5gx3OE{3!A>bTd zJ)z*!991CB(vbARv5;x1FCoI?J24SZ{AnIzxlXdF&)&9vx zR60!xi~)~;V#@J%28}#|xRj6GOs1fr0Uif>NA2Je`PT^y)qorW%+vHygNw9@FjO>& z>UdA(nIu^BM@vb|IlSv!fKfo*&})U#-2JWmUSRL|-iPfzu4=?z{}A}vST|D0nxBuW z5JH6drBae-aK>9-0swALb%`jAch-?5%pFg|MbYU7lk;{mB*15o}-%O(Or&(y3A4 zL8&B_I^vZ_GJ~qr?>6HwJDCz0g?!-_rvq|Lb@5~Myz@8Q^@t?usE*0O_=u$9rkjvt z8<3q-Nh9&vXj=ItRxkE9bTp#6PLrQ3*zdq!{GTCM-Ef~VN(&VCeJU;!3|%@AO_-0<`Fn07+|{%3TDygvBFac7V4Tx#_RV*X#9fG9f0*9y4i>dtu4 zK;FjcPLB<(iAJ4g=@e)rT(E^IV}PVAhQF@ZiGSv2>6jw9B_#VrJHra0<*GOe=kE&k^`K?PJ)c>>Pz zuo7!UK^Wp30avNy-VmLyyI>=^(V^y>vILHd6(fDAinM{VW z(8%1xyINeWU9O++xt;Ssz6qw7NAK4{NEL#u7CD@jh5#h(hNilolud*kc)`a)(_e25 zPQI~x`lS!3=c0?s-({PyP!$renfUf!7J!zUsl%t)!Fq+(m0S1ac}CP49u3Ay{qwPX zi9dL;dI#G-yJh3}Al9rNspWFMJBtMf#-{>znLV#Chqp}kmcuKS1bpAxlnO-QaI{!o z(^}iMj68o#kw>XJ#gL|Ph62vse_mNFR2D>Mi#a**^#L{3>DCUR%$fMu^$z7;8P6&^ zt2FM@bi_3-jnm$KUu-hr9x=YkNpf)8#67w14j>iTwR<)2C3YSG-ZSWjDSlWZ z$PY9Yir_H9#1Se=rO}c<8FM0ownsNS7)=2}Ae&Y-Ps+Y^qaA%rCP)1)qjhSz)I$!x z)YT5${0OVO(>IXsf;r@v!wz*@;kyNznA@BIC7W5;32=K;Qjf6GpC`9_Zl%fV6LE*- z@sjX4`R||B2yE`eu}{`nB-oYu5Y=x$<6gt_eR$)+#s9D|df;6Q*N1R85H#4SPL|35 zv#H}0LM?_->YP+s+|DZr!=vj|6D{ka-FfOxt`o>rp-fs_4s{RQPp4K>&m!L6lqZ+4 zu?*p)HogNB=|IMY8atHLJuw^;&(LZ}0t{mgkE>PMUA-UvIrDECpU+-q?nmdp(8yDG zN*rX<1hS>QSG{h2?Z^wTPY%WDW;J9GcqTM1weqgS=&(@4xF-pop^xgYS=?9>yzTr{ zPEjh7K7I;S3xaQ!is19p`Q7f{IV3G3Rzto0Tm!T1O?%Wb4YCx9JRGm%$i^}o6{59o zKet7@vw2dD(yK|u@7pDg;PHA=EwK|94n(Uuqy=M$m!LXg0m`=5l zV*c5#>K@gld`}_|B-QdDFNpeo_JVYe1gKXsUchvYxNg2cEw+2({Gbl(2|*C*fOvFW z&d{avh!3z!Qe8l*O96--CC4+#^0?&3&!Ry&eW~_dU}_@4fHWOW4!k*>iSnvb24JtU zp-#vhMht@^PP&~QFI1IPat(oKcXi{>1cD7O;!k&{+75d+cthW4F zz58Hr`(#DvEr*xlAsQSZj|Z$AApm;n{!Az^2r5q@zlW>xV7ROI*p7YJ`GJYFk*r7| zhwWfTQ^+%RCM6$z49(O@0@afXok_RlLwc=AXd?f#%vLOGNpVEa95U>vN?t`hVh%uvni|Wv6ERZ)4`vPHc2oLgZHh#ZSJYV~C zd0aL3Vs}98eG~`KOx;Bk?}CFsX??L>szZAnTTp5Rf!&HK6x0trZqop!-hVdRKqMbQ zqmO^+IW-AB@f@)$Z^6C(N6F_K?Ty|)uVi4t8TISY8J&W4cD!#(RWA^UktMKeTx1_N zy$>#*2$*);zHD6jJ8a8u-;!SXFYem25$kAizHLkk(k7BnyrFo#G{3^|zT^52NNTq=b7zpB2XhM)h5p;iFIJP7 zBlBfjL#8fRF2N1MRDhf9rU6(p?0@13@nQa<66NED4(5UsCi$KXa7Y7s;ym|*jqWCq zs_%B@BdJINC(Af%&+G4`u1B*<$S`=|>u4StyK%^~&+Dz{1~6EBd3B^R={$4y~ia045YS4Gn`X9c^MuKPn7BFt-7t72)i}=6siYSQx;t@HU zv$^Y!t@xtp|AlY^^v&Y!UH@kL>@O2u6>r-Qa6e=ts4* zKMl6u%Z(!Y&B?N}^uN~p2_lXF#Fp_}5l?>p&kN*VO%2FJGZ3rE1vhp2U(4mQ`y&aC z1d3-U|4CT>n_cH84jL#w=wu)ZL_}cMo$d1GF$h5G4MP@QMkdOWU+;*RVrcCCkE<7q z{Nn2$A*OcOXkLzEf`}{-JlyIhy>j?|&<8RZ^Og6<$;ST!sp(&ML02xQwZC&M1W2_^ zBXPpWL98A03Sam>2#PO}oPTYVzwS4{8Sf*S_1oRBuJ}_hj8MN~(k6<`R%pkGt>2%n zB47NI;HA)l;=k-#I1wA5LCq$xqgWLAZk{_*A6!fgm? z!ToFnJn_)i`(EgX{U@pY+KWQ95~XAcT~TI6M4Io*A`2`6QZT#aj9@m8>*miac}l(Q z6?D*OJ`e@ct5*rXWpRJx;J{u-QR=DJ=6qd)#xVImWRh6o1D_g2=i}4yFM~H9Wd^4m z4|u6GMkLVT5~5UcV6|BC(PSibwT^-M2_>-S@w)e?axnwk-bflsY!;to`g@a^PKyhg zlljEQ7Mz&Wwzyu``!n64bVh=emh_8WcQJ=g>=ug`jt6TZt6>YRZo&Og*k1j{Gkc5% zPosFLt_ziLfpLH%$mK>`XWAqdWATHlCF_eX?{D$+>P%@qtzVy6?hRKCSU_IoM$}-# zmOT&^3j};LmhExazkI9yWazUp7$ucb)>U9Cy*i(OKo}AI6@=q4m1|T(7#SPwFbrjJ zv$b6vm3{JB@sLbs4PlzFGyx6UC-CDL)t~TJjk9c^!>1L-7x03*H}MQ3m&;RO-QW>{ zKG{Bad||jQ(BfkJU1Fi~*N5u6J*%ZQ?%@#``%)(&(5~YU+y(NYUA&TRzTP-`0vu*+ z$Ibqa!SxaEG8S_RYp~s1_q-#uR^KYAq@5H zIR0Mn0-Yu!7l{~gpIJvV%7k5ns@3G>eY#)zVj+v$DoMVQ?6Lb@0WtZdLa!cQ9Pt^k z;9Oip!o~6u1U?0#GykCfIJwunZ*VDmjtLyostX8_PL-TR4dSL(MgC<{-H!gyD?svd*p6PdMExJ56W-oTP1<6kd7??)Mht*Q<3cAZq+$+qm# z0CP|EMWc53KoHb(t?$uA5}ki6^z!%k&Pz_q9uA=H^Xe3x$Dxt>F7aI9NRwM4SyF@C zjA@Z%&SzsP7w`Hgt~w@M*a&Dv7ha3X7qUvTJv)kfvJR!DWBcyHHRZs}B>aLA@tF01~q}E#-^~a=)Eh!FOF+`u? zACIfB@4o_E2`{#t8gQX%-1fC=-gr)vCJG*lDy$wz?K3~k- zuOmRgV;ulIP!V5GoZWX`bL3M!KnzVF)@8@<7_#5zhy8KKM*+VX;QQU*;%R!!b$<0idiY>d9ru-kARxj+M zFTNZ*@eI3X>zydQfOkT1WCE(A@rPIUgO?rzGA5Y$3SETuH4jMS0w?v>=^ye4_l4&O zyA{mXz#XL`3k6irG8AY5Vc+B~$5SHK!#7NE{q&Bb$o%AUwyQb{JBh;ZvYhu}T=R~IF^-aESfm<$b z+(|SOQt3c_qm~X<>-AdV2epjl{m1)XV}NyvrP?Ki_dg+L3`8K``}_O{m?xXpLGb_a ziJDUJ6j(<+UY>m%$nzgXnJg~;T@PO|45A!Bn1k@AS~im*FqlihCYyuhxfyA@^_dp5 zO|%lp$~-k^qXZlA^|ZUO1J8^-+2bIYV!vr$3mZS^@T?#Z!Z&gy2U}m$r((x_S6e(DF7f8-8EgZ;pp@hFa-KiD|2?_mz!J4O4;<`5WIqLgD zqw_03D~;hBX93{gszipPbqT~i0})%*$E>F7;288eddj~^<3Ea>-B-K6pjG4WxP9&1 zA6GpT2;+Nsya&WH{CxWyo(wbr5w;eX>Q{axkZ&f*Z!^@*ePDh|zoj1@vCe4Q5%r8v z+3gSS#QE-9+H-mx2iAV`Od&_Cj-33$?F)cad6o;@xbuADkG+oDUB`pqd-r_1esSke zhfs|CTbsON8%ZG3r242$5hotf6LyIWbi!kl!5-d~O2gK61qfTWDO#kMyH9akfLymi zJNY6Gm{=1(9qAprgP`(lny(aEWGq`tt=1}6TQ@q{%(v>8zX_{(k`%lvwpG8i&SDN_b22B3S$}fJxoq^D5hq=5r1Sq7-zhAl(U?X4wla0E-cc@j*H<4> z?OT(KC_xW8FYmhqR_t;ZG+AkMzQZPWzj0VBf+bR1+C=sb&~bGT%w6(_ZcA(Tf5B6Q zu4aybVfS*H()e0vx;^f&!)(M^=?0(N#drWBZo;ve4#A@Ika444HKH?86c!YqvquWo zhg;;+m=7ny;cu(?xGy9WN*L_laV;<*DSCd98lyb;VaQD^EDlpue>zFbYCc?^m{0QC zrDQMk_BbCl*8Mp)d(x^dyp<}T+`-*|pmoZ(+fJoMx7i8mz<|}N%IemtBwl(iHMF`b zeWo~{nodr=N&`-_#xx{N&rkDKI+HmG(U2t%G2S^PUx;6~$(X0@NaA2UH6E7gX*BB5v2L1TF=6RoRm;>=X(wwl zlOL#KNz^MAr8k|k$_Fipy=(Rc`I$Mp4SkSF_}v)JJY$df`K`0d?-wd{A!|?v#q$Nh zcd9Z5)`fQL{RsWJ)QY4&`eP0EUxpQK+_X4uQ+++}yMrdg$>uA&K{rx{@?$0@JL4ON;B_I>YTS- zipN%a6+VG$nsMXpKsuu5b>r@3zUp&?LP22LW%PZzky@93qJ7tGDjB(lsXVjzApBkzoJcLbm)JeuJxdEE}LdW(@md=9^$QT_PA7vK9{l!7sxW@lRW z%aJrJsslaIWJ>P!U!GB>n)LWuEqZ!Rbn1*dV)W-#-VPQe2ff#q6J4C%w@M28RMWfM zFD^+`QahL1u}_a9h}09#&$j{|x)A?%*GQ{XG|f)? z9J^7U82-@cEtfeY7N;Jby!nxq+cdjT8d6`Tp5!NBi)(I|bN%T&@@gpCopy&?LA!eL z3lX;;#5xLo#?_AwlAFT&V1yPQ_g6R0j(!IpPCHkDjmZT8LPrJi7!lb;udq*|ZeeI%x1r-qsMQTJrM1&xMC@mBb5D<`FBLa%jkzNu|5Tr;4 z>AeXFy+fi%M|zVQ>AfVB6hb(g_xsK`_x&Bdaqqu(?0?qC-r0L)t+}4L_MG#1g5p+= z3N8~7gi~z4)=PvUF&9D`YOLdN@^YhJr2EP_Oq_#i?pU`<7#`iL-fGi-E$_?$0he2r znrwvyKT{;f3I>p;suzk?^xpQ0z0{ZRKt&d@I0|dIW(AZ9FWKWk(BVCi%7ZuG$TbqxZiaWG{jP{3ux7qTJZd){d5$F`JWEAMq;GXPS(%6)< z_dD7cDu|1>zzFnVU2>c|#Px=CYvRA^-G9J^|3+KW5=*4KGpZU=JEP75wLsq&!HS?X zx-^)fUYxPPP@#cVb_UOt<2Q%ZAw-CgVRTclrHa`=^A{N!t^Q$L!rNDrrhv=__Vd3G z18t5HNLP*)#6lV!OXjokIVxSN(%d?kA8;W$I8DR`BL&2ueg6Gu0YYbRO6)e=g@@94 z6TT6F9^YG)dr7uu!HF5PVanSv)Er^WU)g`!j_^2!2%1#5>;=(CM;*2PDmGp2lPyu5 z=sbYoF2pn=uoc6ubFE=Qc0)?(wgt{SvT2oEYa_CV@kK%;e~72(^{PPYIU}|<=GX@% zlfF=Xc8mwpoXUo%BDD19UBz()JC@_5LtW+LFNeuTy%3?TEp(-gV_Sf{|7pha{$$&U zI>zNw4~XI}tK4=6TbXw<*T0LC)>A4qr=FZ8b;Pw0#K_Bt@CE-%wP))VHV8CCf7Rs4ARU>_v~hkxaD!t zg?~@RMlUJ8<{{PoT*L2xg-MgqOr(6z{R9^px0f7~wZ0U8d~G2`28T`|2-FRFsN1p$ zFwC6V7gb~Jt*T&SNfZ5$yZRw@;2jDg~Vu7iS=m@mi98q5E*+HLFR35o;kHogU zqYnALk?xXPx7iUkkWuE`Hx0fcsDw(8yfD4GS7kh@>3*%Viw4+K%6a^(oY3=d91>yz z*btwd5!kc%wH8&cxQJd{;J@I!F~Y$*`!l>Y0^~F(1Y5uHI2g%VVJ(cjP>f$MP;hZ8 zTzZ1QEB7lXOzz-}u#(kATfdqjAfEI;_|5Ok)0>dfovs0S=FeMK!??t7ZrgvU=Ix78 z(OZ#dvERglgW_6(_u5_e5h=`OTRt+rHIzNT)L(h^&(#=W2&;}3y2&zoNud{ zh+}cl`31f(%lw`MyKrU+H%48uEP z_-XSZO&v;L-uyZR)JYDZg#QfEavGhH9PV7ZAH`8qb(2(~z(A#abX+^ayq4fL4OgG1 zVZ8XV#{{+hauE+%3KD+0ww|LRX!)B|=CbrcGko&Rw^+yc+&OT5#1r@j3d4qbQ(Aph zV>br=_~rK87X@4_n^?@G2QH{iElEsI&?MaHCmmz_Man7Wg;bx-C-K6pTU!mh;l+%w zR&1lybNioBIhkrr#O`X0rri{J>1AAzsc6FBCM)EN2R@LTfb}B$dC-r(3BDht5_do6 zSJA0DzCMS{jb6i~ib}|bxHD^8wpWoO`^RPdtC=9ng^=)JqNQZ9>ZS-YDnJm%yld6b6zO)otf3fMP(ZS4`)YK0vA-rqv zMmeR76bM7FwH z5`P3jNMpcZMmG@41J!e=Pub^tt_=5A1!Ed3sP@eyXn?Kd%3%C5iJIZ9< zTJL5ndwPWW(BnP$50F82B>I)T>Uh0S_yseIdCKz|V;b*m+qb~%g2U(KOiWc1WFyI2 zufL!hN5~z)m-5JC*i98qqS3IB+bT3>+^F(RSbqFHo%aDThPvLriA(mHaaAR$l7;w= z@54qi1Rse9cHa2S@@z$zmS`$s`+y-<)R4t#MP_2G(lr#Xdk#L7 zB5t^0250sM_)X3rw@^nAlhk#jxsFnaV7R;dnzkk$C<`#giBgsXlr(DoZl3CLGW6JZ z;k0)Nmamj@Crr*L@SV|KrP0V7a@gyNo0CU+2QNa;75ANv-R#0ymg*!{Qk0YzgLQLX&M8TA`)tE840N z3`>AW@Ob8Q#-8QCnE*cO1D-QrdvrX#(veh@Ag*_%hF>mbbhWlVoHOYrxPlUCp}{VC z>J;c7Er5e7ZZ6~m&@JN4T+|q`J>`-4=Vf?gX3KrzP%$1Q1r7_I2R6$FRR za%7PG(b~II$qTTgjaD~ilJzJ_Wo>;psoI!S_FgUMt~@dn*h%QmEZGweoYQykr#*A6 z4s+^1qp?UfC>#Dowqz<$2RMvr9Bb3QZSs{r?%Tshp>+SUqaVKnPNVpu+G-&-;b+pj z3NKK0r`^l*M#16k7i-+H&>H80+v0vN=Myee{l?q^fTAC(n7Dp>?7Bi(QN#*R7H1Sn z%XI0Qd6{QeAx{#oPb9XS6;rh_5wb50l5UDaw{*pYl(|9}q-OdD%lx9y@`~YKP*3(F zCGY)I-?NAR9x>xR5Xq5`nTNq}D|wj$S=6|fLoShuaapc`(O1>Pmz55%Fuu;=0Ta8? z8>j%7`o`rokvR2rfFIp6!NKLrApgD_{h z+{W&1Wh$)7#*Dl7DMm!}35kZsOd5*UiEQU%xQ{$Nuss8l?e0;gy$ z2cdG6BAYg9YOFCGo?BPp2Vvw|l(z@PTSa3v(crxnNM6jAp(f&fA6Mii$u3r1I0Fbg zukj%u&+07r3j0+)O8ZP9YT-u*#d4i%V;`2jkrMV9ARc)kV+VeUb89k`@hW>>(gE@O z0sfOhSKOB}|H!Kn0-TF&*kLQ&l#8A4ozM&ap4qJ1$1}@OR~1oBOMgS89T#A7oxN;! zq7n|F$V_D4H>rGCIh(M*=6u#?H<MM*i@V)yH}f~7C9zlNp9hjQ3*jll9Ll84NSgzs}9BkD*3Pbm1XJ$jZTv#L&`YGCV9&yx4Pwl#ADfVDj zbCtx=SP34E1(*_s+-Za@NPu*(6;3JLB8OU+bE!veFB>2!F%0$IqY|01nuSqCjplv?*Ii;`Afz`g;Pw(Pw)EWjh z+w36J{fgfKrGV5WbtQH0pZ-6Gdf6p+AXUrB$Tm(N-+x&kY5}x_IU<*@gtOorl_zwu zGAu3!bt9jLeIn%*a3d)*AE8hN4@YvC>+iH!1>4sblB$_Hz>T=zA8UEgN&e4RjYpc( zJ#K}Z5PV+05#Ow(rT(8gUidq^r=f9a%}u6uZBQKVV0HzZ8Nzj1NdKA5`+TwBjml3Rzl8k26_vjyYbfdp)!b>QASs5&FRISPN zeJ^&i+A+|V(Z{8+jpNF&^^o(fWVNU1=XSDA;hQl|tZcBY8@X{EyR)GtyJLM_#T#+d z46j-rduosn9?J6Y9za<}^zH2CemSxTJ2&NHpw66Bloz5QTa5h}m^MDkyJN-(D+t+z z33YcI$b$P(W8*XS#OTq-)mt>d$A}N&IvQY&!%3NW=kbX-;y*LGCig%d8T<#uoRgVl zTR-wXs7g^c^o4JGoWRUkK+7}KrM<39sXsFpyn>^xXRjj7yvr5(b zG5PcS>ic=Hh9mrL6g=U{pk+}gxs&NR)ab6`?DkGCp>twK%w}pe4&sp<+Q9p45`=r( zx|CtA7$7MwZDT)lSMzOZib9!e$0W3@PQ);Ylu2;!`_Ozz&wh|Z|6HRgnk`7C=I|t)ZZV>y+8EqP zOlA=e#RG^tNTNbSU$VcjGfaExvSLuOhFhBs?b$XAk{oDerA)PfEr64n<;G2s2m?D;IEF3-w-XTSn5oG4HSeTZNvgN1c0^cl2F%>rd_V z--(BT<}Vi>j~c%Ct+MdB+W>I~E0 zhh|5Vno35}xK{@NrRLkxCPBM?4T+{4j1K~p6owt@|mb&08?|>Zyp0a)W0cRtG)GlCJ zxi)ll;fUmhQt{w`xywYmD#aS4S(1yqU+EQ^npEHcMvdS`hjaB7z_p1A?eO@*xUWMg zB1o=>DU{8RH!ah6{3bY3^P8tmR|ZVmQq_X`7K!(Q#l%6nA|d8< zr?pqTDjJE^kRy%uvaM0fnZ5l0_7i$xTlCV;Bbc>oytu)v zbnvM?#A7BrsrCyc7J=3?PhQoC5%f?28UGJnVlJ^JU%N4q4Jo@N@ zvC=f8^|WgD>So;KOS^5J_aN|x=^=FGkEE2d+^B6);vSIv94`oAG{*Q@$4S>VGh(dhwd&)xQpReajzWK#s(l8}t*A%J(vx0lc3rB1HWF5b279 zX1!61_pWX5ZfYER^rE(iDb8nzN$_QJ97SfSB#kL?f6XJb(<1*-pk@Q+w=+h`MwYgHRab7@_6PCxP*LQ2e@kp8P>xgyc zW~=Rl@oJ~7CZTcq!o`Ap4)Hnxb(acC*90nT&dNqHvLNmoS{+>dE1z=$- z?z4q&0}FQj;m=+S?(IG-ea3#h7>%Jf^${@h?%=H&N!2+pgr#py!e-I%F7r-+%3CJM z?fPvYd;wf)`NA$7XsE{pHeW6$pEacb1!n-c##LH;rD8j?`U^ z-FPU=tm0*k&b%D9EkL5|`ob~|D)wx2>2!Z|-czZ`&xOV{OqOGxO9flxaAi%qhKiZ3GjL>ntFD)WBcXTl2M{a z0hY*QE#Ar@?yCKM6j2Ch>*xW4MTT$)$^0E%31C)zr#bby<@$8e`sm;dWA%wId%Nuy zcJ^YV)2-*81+Exk6?@Mhi~)TaRX6wzY`GWY29EglW9smMA9}(5@86%9YCytecXEiN z;>K?*?!1~kRA5ANv|6zCS?5XKoV8Bc;OV1Jfe+q<{(HMjzI~dLKdPfi)r-h!UZQkI z;Q>dk*eU4Z3XdtjBA)(WU(`nvS}(1tXGgj}DlqMQN^KE{|1T6VggyItvk>y~wRHoK zhB2*m*!#{0ZmnZD=4|Dwh^P{c6v&qSv0mDY9Yb%&g^OomndwUmfsh-msf9>I~6kyp^wgzETcd*^FSB?WF_m=T5w-t5cjGEk5$EE$8f+(>HEC}$MIksgH=t6HW z1j^X)49~c$sr0ME3UMEF2_U*5>tv8((OoIxg;U zyQedA%v(Jhyw}YZV!?0Ny88l*x=E6KZfK5T&#QcJwyw6Hq{bBC#`u9J#ke=#A&=RV zVI$IM;Ooew2PxbzdpE9YF!_!`dMm_0Xt0hu{sApW`YZ016mw%F^TRq+_6lRAow4F$ zjUjQ5)Io`l$GZ-iVAK9(3An#bx1ngupMu`%D?3X`4TFEP!n-lhAXEG(lzI1_?)Us; zl*3O2rmrc|h=cRHEqc(}nEqBx*J3#kx_fl+AegM#WG5Bt#=@f(rA@RU2r=_KAf@VB z&|){GenZl4V7oriI2bm9vR$`ff9;7`9^V)q3@+x9V6XIf)l(1E->v>UkQJLE=)UYR ze!L~@ni^XCsK*=M8-AlT($V5y4ob}!7v z7wQd{VAC$Ah&M4v`txgj4sE^v12gY_1L_zg>8(FsX*G83-d`>?{MJ<9hEcT~koS1c z$zj8MOk`sgH2NGk*1~b?_C3Qhp8Ly}(PT-r`D-aaJ-*zgWUZ!4^cfE{9Ua>vASCUn z64Ot858e_YYhjTcccwcM9o!DRgsXPx(v^JG8;Wk0u(9v+rE-wsWDe_4i3k4#p%0PI zv~r%Gm0A|!C3)~5x%=u&*sK`f8i`7+KMW-`5Imd~?qE`CVlo3g@_nd&R?Fkb7RTud z$J;hz$qsj~a}+V6+~~8_qIK_NeO@V%61UOhgfgO^$Ieg=y-MiX#G0MG5!ki9JT;RJ@ckU3x8q=F)7 zt>;--+7lJJyw=la>+$X;bRqB(tE5M^8D`|4YB7SKDnLHYN`WS?1qy{d;FMk&)C*&w zpI{TWx)RDRZQN}XP$o-b)~V<0EE3c5K-zWvo6<8eQTJ~~KJ772Xl=$SF3{h(_z6h) z1$k@7{YYvwUP`)q1r!uqcUj%C=S=D6Qu(t~occMC=1zUFKCG#G`m{Qv&rJW6!uBam z=TCo~2;U?)H)mJ}GL_Bst6dRyNyinsAKbSsGMWNrdNw57l_pK~x3|y^KvA>io8=NS zU)_>hNT86YgwK7MM_KtAS&!W|2zLeLL(}JH;hYhB%c3XoEd6HFrAm=FwI~L0m(5U` zd{}lIb{-@ywADLSK722qow}k&1c7?n%b6comcgN_6wuen8a0<;JHF2SbvbxV-uN3O zg)(8UQK*aXKSM89+~7|>L}HnP$p)Y%7Z819p-H3o`ednmMPIIogiN7UP3T%zR7(h# z_tJ|CQ}2K$x9}yZ#s-E@Q#AXsW!a=(tCw`BIZeWM%FM`sj6|W%r?7!KI~&P! zqd1X)8P96-?GmZeM{m}ALUaj|#SD1KYQrt;95rX^nrF@TRP^`uG~U3CTF!GyK8ByH z_4y^wJHSN2z+ZpWs25U!fi{e}NC=vm`I;ZJJ=B3&d6H-0Id@vP?;uRy z%->`^FsO5PeC)Vh(+)Pty)^WrHb&H1_fHtjwm{o2ze=9C>6MYWjF6UKM&^YD-{3Sd zEP??;pJ<2%Px#YOfpr6Q z^5z;eZOj5Ob8TYOxpM?l$Hyq)IQIW6=H^sBqvW;qh5qW;Efxt?^q#;Gz-7w+OK-!x zMmNX&sawMJZvJk0w!CfMjEF^TZspR@t%Py${~{?WYwBfjVW6HlAv|f#ARrc0twNFfi3#EW%BR8v?4Oxd65js$Tb7W&k6X0y zK9JzV0D2>Q|3xuW`G;cIc#(sUy{9+V<}S8|E)4}c0`6rE%fTszV%-90^Vjs-kplTg zfFeh~p@G{UQ-`1xeUPM+w(Np`da;?EYu-j1tGcsWApF;}Fym#z4Tp#oak_eIOz7oW z?MO@Oap%V2?*<%&6-oX07&|K}tIx3pthVaBh58RYKLuWPQEdx-L{t*ZS`$QQy&an! z9lw=unMuTYR5_8?8WjnZf_sKolw%?AgOj z4}H|*_pEqx-!yltaEt)_pr?E$brnXQLth=GhDmoZ%9Q7^f(vn+2EW1j4SzOR(`R|5 zj1VNFOBnk0af3yL%QSKn6P2Fd|!dGYgcv z(!9x5aV*D*@iXU(*g>!Y?(?Md z)zjux6tHu=erVIIl=Gs|^Ep7yG$!>SyxfdDd3(rtxOh1d_95ksc=II*c)9i?M^Mocd`uhTIiGTuebX8Eu2*GPJHJzLz^F@WwlkM<&~*TOO^O>4pG;lkGdvxsJ9)73Lg8lOR*(tNkgOpt+vCOJXcXu&gLm* zVstgTH9?{_THT_ycs}@9;aly_eqC#U-sLz2-4_G89};reQ4EYZyN4AZ65&1}$?>aY8%f*%Z}cS7aNl?AVwdhCfV+~eG}(tT7qFns`R%5clRFT19iMkHSI z!4G8E3cqps=8el#sIir(ur)!>z575?t`C>2J^uw(@=8pwE%nuN%Uj|4#zo zxaAj11z6x;`T$>3H^(G0a{GMHmoPtJ%T77c?#ffI3QdVecXY$%=oKP2-}zQyo=w#t z>vC^apRceS=93yXMOo|lcPjEtawc;5c=k)$(uFCYR-B%zYr}$0R}Ak1K|P1X-z+Zt zA6Ck8s03h*vjn?clRq3N+Re7-o~pbc)tC{N=}9P2aOWsIo2bIe6r9SQudUpfgT@M%hsX z0^L8~ECqHe5Y*}XyCZxkPiO!fRE=)RldxVf@8Y{Ab(j9qC?9;S?P&D1^LEWrp&c*Q zxnVLi&mVzlp$h_3NNnQwDjcqQ+bd`B&neIU_<7o4cG=PS?bX7%+6(uL&rfx6PgS*t^t~l-`T37a#S+9K$sl1OqapFH{a=v-z0dYFz$e9D{syrL+nEz6O_a{dD;W{Q4x>I=ei&U)s zTmJpR4p*uCe^U|H4dVMa|BL*0Z4m&X|I*5e|5GlQ(WvM`IRu?Lb#ycqwg5YIif-pv z7k^_))`ne=g=X9Sxd0gXWH=^;%MKSJn@#N{EVpr=8ZX1PU_8-n?j-i^BgHJ3JS*#c>Qy} zz7;yL*CIMHTu;mwfS$iPvFCl_z%xRDNKcP%zv_Sd;^Xehtettp5OpBtd>4P`YKv-H z3f$1n_Q58RS*r(DNldCeD%Y#f58tb%s5MZSCl$LYb4&p0%XPd-yqAr?oYD~N0b5tIH3N(M@H zO~iOmW98Kp`1lp1vOOgf0!RDXq=wxyTmX7h9Gq<{9Z2p-BisH@AKAN8p55ctE7gzg zi7bndTq*T{oM;k>_tPViOFb!LrsqQ6oHY7|yvG;JU+iEJN<3-utx|mDoLZy!x!?Y< zx^sfoIn+5Wlg zEN*npMzNzUGoryVZp7~-i_|EWnUTW{47~i^{kxg@GJ83|7u* z>v{($DJ6^|Ab1W6h^c(dOOs;U372M?sI~`*=#laxjQOfNPI|6YB+y?c%pwj7ZY#a5 z-U`BaT5}A)-Kym^U7gyT?ZRWw$VT8lk)jGU9?_gMk+uvs3L_+aNr!Cq&Dbo(IY5sd zB!ZBH&CLYP5z{F5H_7uUYvWq5tcc{7qkm5N1r-LbGUc2=nAPS1Rsdcc-4oGPMT<-}DBzaBBPl$Q4%M%jm&$_?xT2H61}4_yco zgaZcdaY1}Tb0H$MVw#&Bk+JC~{WhdTh1?19x+ zDX;zJ5L#Nt?Y}>Vs~C)ARI?x zTDYF6kIQ!10EjV4g;(_ENdtC9_snxJH@$JOaqYEm4w7zz8?iAq_Ym(RE!cRlBk zez7Abnl<9YNApwj2h5jk%{Xn0SGsQwl{?OmiTcLmtRneChPHlOQ<#so%iG4rVA^>IJ(56CCa7vbXKJImr?j&i*B< zExc`Va`L@bFz>|Fk##h0 z-KY*aBCn4Teowg-TZ8u1`iV4iYAReFlRKctsw68Id~v2%@{U?RVe20d#qK;@kvK*% z`vHcqBN#CnLS%zLqETAMNcg`mvwlqhAX_mE2noSMK;B3C01{Q66Bm( z{^s63>8qmD*BPS#783M+vey$7A?I}ECqC{`)S1gC4l*C(T~j4f)8Kl~G)Wk#EpN7u z>}@llyM)LX)m7l~=x%y=5;&ia;X4M$G;a@&T0NO*-9<-L$v8P&v;Vo1Ou_d@F10yh zlo&_B;?{X*aTza*N#Bk72~%-972A$DqO3 zt7fouLk!M#vBQ$1PqB>!e+nZ0Vgzs07?DgOmm8nkSakDq_2bOWASoUjQ=zomCzgNg zE8_Di=pT1UnxuOZbL_WeS--=9M$#fVOk2rL?)*!ps3ZppA2_TBqu`JT{`jlR{CIgD zmBeJP{bQ`oMAk=T$GxCaQLg*PR#oDQ&2D!c_Yn3R@UTntrJ|l%Oi|UXEaDeWoT8!I zfCHV`Oxv*hA04EYmXh66=M%FNzpu@m9$|d7;@wGZ<7S^WFcrDT(;CmTm zlRlpaVrq;UN_MNAPwf7G^jN>=7dxg>U*tXG+S#c-TBS`to|_J+2{jUOe7NL*<&Mp0 zzF3fI6a*Y;bpuOFa5f|)mLY0wqUp_v!*t3|`7A>eFXJxk#F0>~^_9cLfJ+8XTIRk# zvX6biqsd>l={t$_fl*#}|J%!@k#oBBrkLDbQPtlU z@h5gl0hsTQU=OJi$J_8uNd~atq;h`$R0sk;C`O$+-nXZbhk5>z=72*>k96zU#?&dR r*BlNVfun%m2S%x%d>6ol;hK@f0cd>f`?crx@|f literal 0 HcmV?d00001 diff --git a/docs/index.md b/docs/index.md index 0438bf9..6750fab 100644 --- a/docs/index.md +++ b/docs/index.md @@ -37,6 +37,12 @@ PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](ht ## Video Tutorial TBA +## Contributions +We welcome contributions from the community to enhance the capabilities of PanMAN and panmanUtils. If you encounter any issues or have suggestions for improvement, please open an issue on [PanMAN GitHub page](https://github.com/TurakhiaLab/panman). For general inquiries and support, reach out to our team. + +## Citing PanMAN +If you use the PanMANs or panmanUtils in your research or publications, we kindly request that you cite the following paper:
+* Sumit Walia, Harsh Motwani, Kyle Smith, Russell Corbett-Detig, Yatish Turakhia, "Compressive Pangenomics Using Mutation-Annotated Networks", bioRxiv 2024.07.02.601807; doi: [10.1101/2024.07.02.601807](https://doi.org/10.1101/2024.07.02.601807) -## Contributions -We welcome contributions from the community to enhance the capabilities of PanMAN and panmanUtils. If you encounter any issues or have suggestions for improvement, please open an issue on [PanMAN GitHub page](https://github.com/TurakhiaLab/panman). For general inquiries and support, reach out to our team. -## Citing PanMAN -If you use the PanMANs or panmanUtils in your research or publications, we kindly request that you cite the following paper:
-* Sumit Walia, Harsh Motwani, Kyle Smith, Russell Corbett-Detig, Yatish Turakhia, "Compressive Pangenomics Using Mutation-Annotated Networks", bioRxiv 2024.07.02.601807; doi: [10.1101/2024.07.02.601807](https://doi.org/10.1101/2024.07.02.601807) diff --git a/docs/install.md b/docs/install.md index 8d0681f..b0cd119 100644 --- a/docs/install.md +++ b/docs/install.md @@ -15,7 +15,7 @@ cd panman chmod +x install/installationUbuntu.sh ./install/installationUbuntu.sh ``` -3. Run panmanUtils +3. Run panmanUtils ```bash cd build ./panmanUtils --help @@ -37,14 +37,14 @@ docker pull swalia14/panman:latest ```bash docker run -it swalia14/panman:latest ``` -3. Run panmanUtils +3. Run panmanUtils ```bash # Insider docker container cd /home/panman/build ./panmanUtils --help ``` !!!Note - The docker image comes with preinstalled panmanUtils and other tools such as PanGraph, PGGB, and RIVET. + The docker image comes with preinstalled panmanUtils and other tools such as PanGraph, PGGB, and RIVET. ## Using DockerFile Docker container with preinstalled panmanUtils can also be built from DockerFile by following these steps @@ -66,7 +66,7 @@ docker build -t panman . ```bash docker run -it panman ``` -4. Run panmanUtils +4. Run panmanUtils ```bash # Insider docker container cd /home/panman/build diff --git a/docs/utils.md b/docs/utils.md index 98a3d54..a9fa86e 100644 --- a/docs/utils.md +++ b/docs/utils.md @@ -2,13 +2,13 @@ Here, we will learn to use exploit various functionalities provided in panmanUtils software for downstream applications in epidemiological, microbiological, metagenomic, ecological, and evolutionary studies. -**Step 0:** The Steps below require panmanUtils and a PanMAN. If not done so far, refer to [installation guide](install.md) to install panmanUtils and [construction](construction.md) instructions to build a PanMAN. Alternatively, users can download pre-built PanMANs using the following command -```bash +**Step 0:** The Steps below require panmanUtils and a PanMAN. We provide a pre-built panman (`sars_20.panman`), othewise, refer to [installation guide](install.md) to install panmanUtils and [construction](construction.md) instructions to build a PanMAN. + ### Functionalities in panmanUtils All panmanUtils functionality commands manipulate the input PanMAN file. @@ -16,8 +16,7 @@ All panmanUtils functionality commands manipulate the input PanMAN file. cd $PANMAN_HOME/build ./panmanUtils -I {opt} ``` - - +
Table 1: List of functionalities supported by panmanUtils
| **Option** | **Description** | |----------------------------------|-------------------------------------------------------------------------------------------------------------------| @@ -45,7 +44,8 @@ cd $PANMAN_HOME/build | `-o`, `--output-file` | Prefix of the output file name | -> **NOTE:** When output-file argument is optional and is not provided to panmanUtils, the output will be printed in the terminal. + +> **Important:** When output-file argument is optional and is not provided to panmanUtils, the output will be printed in the terminal. !!!Note For all the examples below, `sars_20.panman` will be used as input panman. Alternatively, users can provide custom build panman using the instructions provided [here](construction.md). @@ -193,3 +193,18 @@ Extract amino acid translations from a PanMAN in TSV file. cd $PANMAN_HOME/build ./panmanUtils -I panman/sars_20.panman --aa-translations --output_file=sars_20 ``` + +### panmanUtils Interactive mode +**Step 1:** Users can enter panmanUtils's interactive mode by passing input panman as input using the following command: + +```bash +./panmanUtils -I +## Example +./panmanUtils -I panman/sars_20.panman +``` + +!!!Note + The interactive mode should look like the image attached below +
+ +**Step 2:** Use the commands listed in [Table 1](#table1) to perform desired operation diff --git a/scripts/build_panman.sh b/scripts/build_panman.sh new file mode 100644 index 0000000..197f57e --- /dev/null +++ b/scripts/build_panman.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +## Defines +PANMAN_HOME=/home/panman +PANMAN_BUILD=/home/panman/build +DATASET_PATH=/home/dataset +DATASET=sars_20 +PANGRAPH_HOME=/home/pangraph/pangraph.sh +PANGRAPH_OUTPUT=$PANMAN_HOME/build/pangraph +panmanUtils=$PANMAN_BUILD/panmanUtils + +# method +method=$1 + +cd $PANMAN_BUILD + +if [[ $method=="pangraph" ]] then + ##### Commands generate PanGraph (JSON) and Tree Topology (Newick) from raw sequences in FASTA format #### + mkdir -p pangraph + echo "Building PanGraph..." + $PANGRAPH_HOME "$DATASET_PATH/$DATASET.fa" "$PANGRAPH_OUTPUT/$DATASET.json" 2> "$PANGRAPH_OUTPUT/$DATASET.nwk" + echo $(cat "$PANGRAPH_OUTPUT/$DATASET.nwk" | grep "tree" | awk '{split($0,a,"tree: "); print a[2]}') > $PANGRAPH_OUTPUT/$DATASET.nwk + + #### Run panmanUtils to construct PanMAN using PanGraph #### + echo "Building PanMAN from PanGraph..." + $panmanUtils -P $PANGRAPH_OUTPUT/$DATASET.json -N $PANGRAPH_OUTPUT/$DATASET.nwk -o $DATASET +elif [[ $method=="msa" ]] then + ##### Commands generate GFA (JSON) and Tree Topology (Newick) from raw sequences in FASTA format #### + + + #### Run panmanUtils to construct PanMAN using PanGraph #### + echo "Building PanMAN from MSA..." + $panmanUtils -M $PANGRAPH_OUTPUT/$DATASET.msa -N $PANGRAPH_OUTPUT/$DATASET.nwk -o $DATASET +elif [[ $method=="gfa" ]] then + ##### Commands generate GFA (JSON) and Tree Topology (Newick) from raw sequences in FASTA format #### + + + #### Run panmanUtils to construct PanMAN using PanGraph #### + echo "Building PanMAN from GFA..." + $panmanUtils -G $PANGRAPH_OUTPUT/$DATASET.gfa -N $PANGRAPH_OUTPUT/$DATASET.nwk -o $DATASET +else + echo "Error: Either pass pangraph or gfa or msa as an argument"; +fi + + + From 85631d5c97adfd5348c0e13c3f9e3a45d0f529e1 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 11 Nov 2024 11:36:48 -0800 Subject: [PATCH 051/103] updated wiki --- docs/utils.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/utils.md b/docs/utils.md index a9fa86e..34748cf 100644 --- a/docs/utils.md +++ b/docs/utils.md @@ -81,12 +81,12 @@ Extract network in Extended Newick format. * Usage syntax ```bash -./panmanUtils -I ----extended-newick --output-file= (optional) +./panmanUtils -I --extended-newick --output-file= (optional) ``` * Example ```bash cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman ----extended-newick --output-file=sars_20 +./panmanUtils -I panman/sars_20.panman --extended-newick --output-file=sars_20 ``` #### Tip/internal node sequences extract From 247fc0ebf3a11f31f35f0a94dd45a7b842183b17 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 11 Nov 2024 11:55:13 -0800 Subject: [PATCH 052/103] updated readme --- README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ba982ab..204da84 100644 --- a/README.md +++ b/README.md @@ -121,15 +121,10 @@ cd /home/panman/build ``` ## PanMAN Construction -Once the package is installed, PanMANs can be constructed from PanGraph [or GFA or MSA] and Tree topology (Newick format) using panmanUtils. Here we provide examples for constructing PanMANs from PanGraph (JSON), users can follow the instructions provided in [wiki](https://turakhia.ucsd.edu/panman/) for other methods. -### Building PanMAN from the provided dataset +Once the package is installed, PanMANs can be constructed from PanGraph [or GFA or MSA] and Tree topology (Newick format) using panmanUtils. Here we provide examples for constructing PanMANs from PanGraph (JSON) and custom dataset. Alternatively, users can follow the instructions provided in [wiki](https://turakhia.ucsd.edu/panman/) for other methods. +### Building PanMAN from PanGraph -**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Otherwise, follow the instructions to download the dataset. - -```bash -cd $PANMAN_HOME/dataset -TODO -``` +**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. **Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: From b01bf3f95f6666f3c309dc93beeab822158744f8 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 13:40:06 -0800 Subject: [PATCH 053/103] construction methods --- docs/construction.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/construction.md b/docs/construction.md index 9c70b74..f873f3b 100644 --- a/docs/construction.md +++ b/docs/construction.md @@ -54,7 +54,7 @@ cd $PANMAN_HOME/dataset TODO ``` --> -**Step 2:** Run panmanUtils with the following command to build a panman from GFA: +**Step 2:** Run panmanUtils to build a panman from GFA using the following commands: ```bash cd $PANMAN_HOME/build From b857d4910487425623f0b0d155c7d0612d541081 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 13:45:23 -0800 Subject: [PATCH 054/103] construction methods --- docs/construction.md | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/docs/construction.md b/docs/construction.md index f873f3b..dac26d5 100644 --- a/docs/construction.md +++ b/docs/construction.md @@ -15,11 +15,6 @@ cd $PANMAN_HOME/build **Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom PanGraph (JSON) and tree topology (Newick format) files to build a panman. - - **Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: ```bash @@ -32,11 +27,6 @@ The above command will run panmanUtils program and build `sars_20.panman` **Step 1:** Check if `sars_20.gfa` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom GFA and tree topology (Newick format) files to build a panman. - - **Step 2:** Run panmanUtils with the following command to build a panman from GFA: ```bash @@ -48,11 +38,6 @@ The above command will run panmanUtils program and build `sars_20.panman` ### Building PanMAN from MSA (FASTA format) **Step 1:** Check if `sars_20.msa` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom MSA (FASTA format) and tree topology (Newick format) files to build a panman. - **Step 2:** Run panmanUtils to build a panman from GFA using the following commands: @@ -66,20 +51,15 @@ The above command will run panmanUtils program and build `sars_20.panman` We provide scripts to construct panmanUtils inputs (PanGraph/GFA/MSA and Newick) from raw sequences (FASTA format), followed by building a panman. !!!Note - This script uses various tools such as PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively. The script is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)). ß + This script uses various tools such as PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively. The script is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)). **Step 1:** Check if the `sars_20.fa` file exists in `test` directory. Alternatively, users can provide custom raw sequences (FASTA format) to build a panman. - - **Step 2:** Run the following command to construct a panman from raw sequences. ```bash cd $PANMAN_HOME/scripts chmod +x build_panman.sh -./build_panman.sh [pangraph or gfa or msa] +./build_panman.sh pangraph/gfa/msa ``` \ No newline at end of file From 4acc4746faf70823386694b7bef6c898fb62f917 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 13:50:10 -0800 Subject: [PATCH 055/103] updated navigation --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index cb762c0..c746f0f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -11,7 +11,7 @@ theme: - content.tabs.link - toc.follow - toc.integrate - - navigation.sections + # - navigation.sections - navigation.path - navigation.tabs - navigation.top From 537332a7a13a25bc04beb7d92cca16614e65e85d Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 13:51:29 -0800 Subject: [PATCH 056/103] updated navigation --- mkdocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yml b/mkdocs.yml index c746f0f..a5528fa 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -12,6 +12,7 @@ theme: - toc.follow - toc.integrate # - navigation.sections + - navigation.expand - navigation.path - navigation.tabs - navigation.top From d706bbdfb298e0cef1643597acb36d6c7cbc2607 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 13:54:10 -0800 Subject: [PATCH 057/103] updated navigation --- mkdocs.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index a5528fa..8adc438 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -14,7 +14,7 @@ theme: # - navigation.sections - navigation.expand - navigation.path - - navigation.tabs + # - navigation.tabs - navigation.top - navigation.footer - search.highlight @@ -23,7 +23,6 @@ theme: # - content.action.view # - content.tooltips # - navigation.tabs.sticky - # - navigation.expand # - navigation.instant.prefetch # - navigation.tracking # - search.share From 768bff04783fa9f764810e83b45ea314743c7717 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 14:05:07 -0800 Subject: [PATCH 058/103] updated images --- .../{interactive_mode.png => interactiveMode.png} | Bin docs/images/representpower.svg | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/images/{interactive_mode.png => interactiveMode.png} (100%) diff --git a/docs/images/interactive_mode.png b/docs/images/interactiveMode.png similarity index 100% rename from docs/images/interactive_mode.png rename to docs/images/interactiveMode.png diff --git a/docs/images/representpower.svg b/docs/images/representpower.svg index 9854bdc..ceb0ad4 100644 --- a/docs/images/representpower.svg +++ b/docs/images/representpower.svg @@ -1,4 +1,4 @@ -
VG
GFA
GBZ
PanGraph
UShER-MAT
tskit
PanMAN (This work)
Lossless Sequence Encoding
Genomic Variation / m-WGA
Phylogenetic Relationship
Single-nucleotide Substitutions
Small Indels
Structural Mutations
Complex Mutations
\ No newline at end of file +
VG
VG
GFA
GFA
GBZ
GBZ
PanGraph
PanGraph
UShER-MAT
UShER-MAT
tskit
tskit
PanMAN (This work)
PanMAN (This...
Lossless Sequence Encoding
Lossless Sequence Encoding
Genomic Variation / m-WGA
Genomic Variation / m-WGA
Phylogenetic Relationship
Phylogenetic Relationship
Single-nucleotide Substitutions
Single-nucleotide Substitutions
Small Indels
Small Indels
Structural Mutations
Structural Mutations
Complex Mutations
Complex Mutations
Mutations
Mutations
\ No newline at end of file From 20e9c8a326a49b5ee416aa57bfae2a7f253cffa9 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 14:06:41 -0800 Subject: [PATCH 059/103] updated images --- docs/images/utility.svg | 2 +- docs/utils.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/images/utility.svg b/docs/images/utility.svg index 9610b84..7d68216 100644 --- a/docs/images/utility.svg +++ b/docs/images/utility.svg @@ -1,4 +1,4 @@ -
ii) Raw Sequences

 >S1
 TACACGCA 
 >S2
 TGC--GCT 
 >S3
 TGC--G-A
 >S4
 TGC--GCA

iii) MSA

 >S1
 TACACGCA 
 >S2
 TGCGCT 
 >S3
 TGCGA
 >S4
 TGCGCA

vii) MAF
 ##maf

 a
 s S1 0 8 TACACGCA
 s S2 0 6 TGC--GCT 
 s S3 0 5 TGC--G-A
 s S4 0 6 TGC--GCA
  ...
 #VCF
 #reference=S2
 POS REF ALT S1 S2 S3
 1   G   T   1  0  0
 2   C   CAC 1  0  0
 4   C   -   0  1  0
 5   T   A   1  1  1
v) VCF
 opt:--fasta
 opt:--fasta-aligned
 opt:--maf 
 opt:--vcf -n ref
N1
S1
S2
$3, #3
$4
$0, $1, #0, #1
 TreeID NodeID
 0      S1
 0  S2
nodes.tsv
iv) Subnetwork
 opt:--subnet -i nodes.tsv
 Average Height
 Max Height
 # Nodes
 # Substitutions
 # Deletions
 # Insertions
 ...
i) Summary
 opt:--summary
panmanUtils

$2, #2
N1
N2
N3
S3
S1
S2
S4
$1, #1
$3, #3
$4
$5
$6
$0, #0
Block Mutation: #
Nuc Mutation: $
PanMAN
 input.panman
 S N1 T
 S N2 G
 S N3 A
 S N4 C
 S N5 AC
 S N6 GC
 P1 S1 N1 N3 N4 N5 N6 N3
 P2 S2 N1 N2 N4 N6 N1
 P3 S3 N1 N2 N4 N2 N3
 P4 S4 N1 N2 N4 N6 N3
vi) GFA
 opt:--gfa
viii) Annotate
 opt:--annotate -i info.txt
N1
N2
N3
S3
S1
S2
S4
A.1
A.2
A.3
 N2 A.1
 N3 A.2
 S4 A.3
 info.txt
 Node-ID  aa-mutations codon-change
 S3       S:0:Y;       TGC>TAC;

 opt:--aa-mutations
ix) Amino acid translations


 Command: ./panmanUtils -I input.panman {opt}
 ((S1, S2) N2, (S3, S4) N3) N1;
 opt:--newick
x) Newick
\ No newline at end of file +
 Node-ID  aa-mutations codon-change
 S3       S:0:Y;       TGC>TAC;

Node-ID  aa-mutations codon-change...
 opt:--aa-mutations
 opt:--aa-mutations
x) Amino acid translations
x) Amino acid translations
 ((S1, S2) N2, (S3, S4) N3) N1;
 ((S1, S2) N2, (S3, S4) N3) N1;
 opt:--newick
 opt:--newick
xi) Newick
xi) Newick
 #VCF
 #reference=S2
 POS REF ALT S1 S2 S3
 1   G   T   1  0  0
 2   C   CAC 1  0  0
 4   C   -   0  1  0
 5   T   A   1  1  1
#VCF...
vi) VCF
vi) VCF
 opt:--vcf -n ref
 opt:--vcf -n ref

 >S1
 TACACGCA 
 >S2
 TGC--GCT 
 >S3
 TGC--G-A
 >S4
 TGC--GCA

>S1...
iii) MSA
iii) MSA
 opt:--fasta-aligned
 opt:--fasta-aligned
N1
N1
S1
S1
S2
S2
$3, #3
$3, #3
$4
$4
$0, $1, #0, #1
$0, $1, #0, #1
 TreeID NodeID
 0      S1
 0  S2
TreeID NodeID...
nodes.tsv
nodes.tsv
v) Subnetwork
v) Subnetwork
 opt:--subnet -i nodes.tsv
 opt:--subnet -i nodes.tsv
ix) Annotate
ix) Annotate
 opt:--annotate -i info.txt
 opt:--annotate -i info.txt
N1
N1
N2
N2
N3
N3
S3
S3
S1
S1
S2
S2
S4
S4
A.1
A.1
A.2
A.2
A.3
A.3
 N2 A.1
 N3 A.2
 S4 A.3
N2 A.1...
 info.txt
 info.txt
viii) MAF
viii) MAF
 ##maf

 a
 s S1 0 8 TACACGCA
 s S2 0 6 TGC--GCT 
 s S3 0 5 TGC--G-A
 s S4 0 6 TGC--GCA
  ...
##maf...
 opt:--maf 
 opt:--maf 
 S N1 T
 S N2 G
 S N3 A
 S N4 C
 S N5 AC
 S N6 GC
 P1 S1 N1 N3 N4 N5 N6 N3
 P2 S2 N1 N2 N4 N6 N1
 P3 S3 N1 N2 N4 N2 N3
 P4 S4 N1 N2 N4 N6 N3
S N1 T...
vii) GFA
vii) GFA
 opt:--gfa
 opt:--gfa
 >S1
 CACG 
 >S2
 C--G 
 >S3
 C--G
 >S4
 C--G
>S1...
opt:--index --reference S1 
--start 2 --end 5
opt:--index --reference S1...
iv) Range query
iv) Range query
 Average Height
 Max Height
 # Nodes
 # Substitutions
 # Deletions
 # Insertions
 ...
Average Height...
i) Summary
i) Summary
 opt:--summary
 opt:--summary
ii) Raw Sequences
ii) Raw Sequences

 >S1
 TACACGCA 
 >S2
 TGCGCT 
 >S3
 TGCGA
 >S4
 TGCGCA

>S1...
 opt:--fasta
 opt:--fasta
panmanUtils

panmanUtils
$2, #2
$2, #2
N1
N1
N2
N2
N3
N3
S3
S3
S1
S1
S2
S2
S4
S4
$1, #1
$1, #1
$3, #3
$3, #3
$4
$4
$5
$5
$6
$6
$0, #0
$0, #0
Block Mutation: #
Block Mutation: #
Nuc Mutation: $
Nuc Mutation: $
PanMAN
PanMAN
 input.panman
 input.panman


 Command: ./panmanUtils -I input.panman {opt}
Command: ./panmanUtils -I input.panman {opt}...
\ No newline at end of file diff --git a/docs/utils.md b/docs/utils.md index 34748cf..36e3c32 100644 --- a/docs/utils.md +++ b/docs/utils.md @@ -205,6 +205,6 @@ cd $PANMAN_HOME/build !!!Note The interactive mode should look like the image attached below -
+
**Step 2:** Use the commands listed in [Table 1](#table1) to perform desired operation From 83a35e71a9837da284bf9e196463b3da87f9e779 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 14:12:06 -0800 Subject: [PATCH 060/103] added interative mode figure --- docs/utils.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/utils.md b/docs/utils.md index 36e3c32..6e933b7 100644 --- a/docs/utils.md +++ b/docs/utils.md @@ -203,8 +203,9 @@ cd $PANMAN_HOME/build ./panmanUtils -I panman/sars_20.panman ``` -!!!Note +!!! Note The interactive mode should look like the image attached below -
+ + ![Interactive Mode](images/interactiveMode.png) **Step 2:** Use the commands listed in [Table 1](#table1) to perform desired operation From 0b82ede706278caa5bf71d7755ea18ddd84dab66 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 17 Nov 2024 14:28:47 -0800 Subject: [PATCH 061/103] test dataset --- test/sars_20.fa | 11953 ++++++++++++++++++++++++++++++++++++++++++++ test/sars_20.json | 1 + test/sars_20.nwk | 1 + 3 files changed, 11955 insertions(+) create mode 100644 test/sars_20.fa create mode 100644 test/sars_20.json create mode 100644 test/sars_20.nwk diff --git a/test/sars_20.fa b/test/sars_20.fa new file mode 100644 index 0000000..78f85b0 --- /dev/null +++ b/test/sars_20.fa @@ -0,0 +1,11953 @@ +>Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAA +CACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG +GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGC +TACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTAT +ACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCT +GAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATT +CCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGC +TTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAA +CTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGC +ACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCA +AAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACC +ATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCG +CTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT +CTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAA +TATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCAT +CTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAA +TACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGA +TCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTT +ACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGA +GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTA +GTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTG +ATTGGCTTGAAGAGAAGTTTAAGGAAGGTATAGAGTTTCTTAGAGACGGT +TGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGG +ACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCT +TTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATT +GGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAA +ACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGA +TTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGG +TTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGAC +ACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACA +CTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGT +TGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTG +TCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATT +GATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATG +AGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACT +CAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT +GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAG +GACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTA +GTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATT +GTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAA +TGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGA +CCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTC +AACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTT +GCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTT +AAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTG +ATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATG +ATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA +ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAA +TCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCT +TAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTT +TTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTT +ACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTG +CTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAA +TGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC +TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACT +AAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACTAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTT +GTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGC +TGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTT +CACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAA +ACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTA +AGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTT +GAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACC +TCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGT +CCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTG +TTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG +TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTG +CATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTA +CAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTA +GAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAA +AGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCT +TAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACTTGTGGTAAACAAGCTACAAAA +TATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTG +GTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTG +TATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCC +TATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAAC +CAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTA +AGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACT +GGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGA +CTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTT +TTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAAC +AATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGA +AGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA +CCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCT +TGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAAC +CAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTA +ATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTT +CTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAA +CCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAAT +TGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCG +ACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATA +TTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTA +CTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTG +CAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTA +GATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTT +TGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCA +ATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAG +CTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAA +AGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTATAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATG +GTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGC +AAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAG +TACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACA +GTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAG +CTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGT +AAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAG +TCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTG +ATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATG +TCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGAT +GTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGA +CATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACA +AAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGT +GCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAA +TACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCA +ACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGG +TGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTG +TGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTC +ATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA +ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTAT +ACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGT +GGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATG +GTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATC +TGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGC +CAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT +GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTAT +TCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTT +TTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGT +GTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTA +ATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCA +TCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTA +CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTG +CCTTTAATACTTTACTATTCCTTATTTCATTCACTGTACTCTGTTTAACA +CCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGA +TGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATC +ATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAA +GAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG +CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGT +GATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAG +CTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT +TCTGATGTTCTTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTT +GCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTT +GTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT +GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATT +TCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATG +CAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACC +TAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAG +CTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCC +AATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGG +AATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTAT +GGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAAC +TATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAG +ACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTT +GTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG +CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTG +GGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACA +ATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTA +CACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTC +CAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTG +TCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC +ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCG +TATTATGACATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGC +TAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATG +ACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTAT +GAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATC +AAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTATTCA +GGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTG +TGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTA +TAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGC +CTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGA +TTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTAC +TCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTG +GGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAAT +GTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAAC +TCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACAC +AATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGT +TTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACA +AGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCC +TCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGA +AGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAA +AGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCA +GCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAAT +GTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTA +TGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTC +AACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAAT +ACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACA +CATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTG +TGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG +TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAA +CAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGT +CCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGC +TTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTA +GGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGA +TTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACC +TTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTAT +ACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGT +TTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGC +CAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAG +CTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTT +AAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACC +GGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGT +ACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTA +AAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGG +TTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTT +ATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGAT +GCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACA +CCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACAT +CTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTT +GTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTT +GTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTA +TAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGT +TTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACT +AAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGG +TAATTGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATG +ATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGAT +ATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAATCTTTGTT +AAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTG +TACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGT +GATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTA +TTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAG +AGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTG +TTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTT +TAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATG +ACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTG +TTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGG +TGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTG +TACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAA +TTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCT +ATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACA +ATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT +GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATT +AAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATG +ACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTA +TTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTG +TATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTT +TTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGT +TATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCC +TACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAG +CTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAG +TTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGT +AGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAA +CTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCT +AAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGT +TCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATA +GATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGC +GGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAAC +TGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATG +TTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTC +CGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGT +TGACACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCT +CAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTAT +GCATCTCAAGGTCTATTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTA +TTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTG +ACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTT +AAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAAT +CCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACAC +TTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACT +AAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATA +CATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATT +CTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTT +TATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTG +TGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTA +GACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCA +CATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTG +TGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATT +GTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAA +GTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGA +CTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTT +TAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTC +AAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACG +TGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAAC +CTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT +AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTA +TGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACAGATTAAATGTTG +GTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCT +ACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAAC +ACTCAATATCTCAGATGATTTTTCTAGCAATGTTGCAAATTATCAAAAGG +TTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAG +AGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGT +GTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCAT +TAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCT +CGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTA +TGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTG +TCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAAT +GCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATT +ACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATT +TCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTC +GGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTT +GGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCT +TTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATT +AACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGC +TTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAG +CCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGC +TCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTC +TTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCA +TACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACA +AGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGT +AACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTA +CACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGT +TTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACT +CATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTA +ACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATT +GGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAA +TTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTAC +CTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGT +GCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTA +CAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAA +GTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCA +CATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGA +GCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTT +CAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTAT +AATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACA +AAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTA +GTTGTGATGCAATCATGACTAGGTGTTTAGCTGTCCACGAGTGCTTTGTT +AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAA +GATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCAT +TATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCT +ATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACA +GCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATG +CCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGC +AATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAG +AGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATG +TAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTT +AATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTC +TCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTG +CTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCAT +GCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGC +TGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGA +ACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTA +AATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCAT +TAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTG +AAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAG +CGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGT +GGACATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAG +CACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAA +CCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGT +TGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTA +CAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCT +AGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAA +TTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACT +TTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAA +ATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATT +AGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTC +AGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAA +TCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAA +CTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTG +TTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGAT +TTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAAT +TTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAA +AATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTT +TACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGG +TGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATA +CTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAAT +ATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGG +TACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATT +CAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT +TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATAT +GTACGACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGG +GTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGA +GGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTA +TAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGA +ATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAA +CCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTG +GAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGA +GTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAA +GGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTAT +AATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACA +ACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAG +TCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTA +ATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCA +GTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTAC +TTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTG +ATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAG +AAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAA +GACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAG +TCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCAC +AAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGC +GAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTG +AAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAAT +ATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGT +GCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGC +CAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGA +AGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGC +AGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATA +ATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTC +TCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTA +TCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTC +CTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGA +TTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGC +TGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTT +ATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTAT +GCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGG +GCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTA +CAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGT +GGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACC +TTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAACACACCTT +GTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGT +TTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACT +TTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGT +CTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTA +ACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCA +ACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCAC +AGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGT +GTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCA +GGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTA +CTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGT +GCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGA +CATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATT +CTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACT +ATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGC +CATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGT +CTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCA +ACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATT +AAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAG +AAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGAT +TTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAG +CAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAG +ATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCT +AGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACC +TTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGG +GTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATA +CCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACA +GAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTG +CTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGA +AAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGT +TAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATA +TCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTG +ATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAAT +TAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGT +CAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGC +TATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTT +GCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTG +CCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTT +TCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACA +AATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAA +TAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGAC +TCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGA +TGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTC +AAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCT +CTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC +ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGG +TGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGC +TGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCC +AGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTT +GTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAA +TCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCG +ATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCT +TGCTGTTTTTCATAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGC +AACTATCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTG +TTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGC +CCCTTTTCTTTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACT +TTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAA +AACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTG +TTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTA +CTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATT +GGTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATT +ACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAATTGA +GTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATT +GTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATC +CGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGACGA +CTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC +TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCT +TTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTG +CGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTA +AAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAG +AGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCT +GTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCG +TTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC +CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAA +TAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAG +TAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTATAGAATAAATTGGATC +ACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCT +CAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGT +GGTCATTCAATCCAGAAACTAACATCCTTCTCAACGTGCCACTCCATGGC +ACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT +GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTG +ACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTT +TCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTT +TGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACC +ATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACA +GATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAA +TTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA +AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTC +TCAATTAGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAA +TTATTCTTTTCTTGGCACTGATAACACTCGCTATTTGTGAGCTTTATCAC +TACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTC +TTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACA +AATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGAC +GGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT +GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTA +TTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAG +ACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCC +TTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACT +TGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAAT +TTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAA +TGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC +GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAA +AATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCA +CCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTT +TACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTT +CGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGAT +TTCATCTAAACGAACAAACTAAAATGTCTGATAATGGACCCCAAAATCAG +CGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG +TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCC +AAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACAT +GGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACAC +CAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGAC +GAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTAT +TTCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAA +CAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA +AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTA +CAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAG +CAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACA +GTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGA +ATGGCTGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAG +ATTGAACCAGCTTGAGAGCAAAATCTCTGGTAAAGGCCAACAACAACAAG +GCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG +CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAG +ACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCA +GACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCC +AGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACC +TTCGGGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAG +ATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCA +TACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGAC +TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGA +CTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAA +TCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACAC +AAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATA +GTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGAT +GTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTA +GGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC +GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAG +CCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNN +>Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28 +AGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCT +GCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTT +GACAGGACACGAGTAACTCTTCTATCTTCTGCAGGCTGCTTACGGTTTCG +TCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTGAC +CGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACAC +GTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGG +CTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTA +AAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTCTTGCCTCAA +CTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACC +TCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGT +ACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAA +ATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGG +AGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCG +ACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACT +AAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGG +GGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACC +CTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGC +ACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTG +CTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAA +AGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTT +GACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCAT +AATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTA +TGGGTAGAATTCGATCTGTCTATCCAGTTKCGTCACCAAATGAATGCAAC +CAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTC +ATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTG +AGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAAT +GCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACC +TGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTC +TTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTAT +GTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAA +CATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTA +ATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATT +GTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTT +TTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATA +AAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAA +GGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACT +GAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAA +TTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAG +AAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACT +CATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTG +TAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTA +ACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTG +GCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGG +AAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAA +ATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAA +GCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTG +GAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCA +AAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACT +CATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACAC +TTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTA +CAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGG +TACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAG +AAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTC +ACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGT +GATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATG +AAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAA +CTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCAT +AAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGATT +TAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAG +TTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGA +TGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAAT +ATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGGT +GCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTT +AGATGATGATAGTCAACAAACTGTTGGTCAAMRRRRVRKHRKKKWRRRMA +RWMAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAG +ATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGG +TTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGG +AAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATGTT +TACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTAA +CAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCAC +TTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACAC +TGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACT +TCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCAC +CATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGA +GTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATAA +AAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAAA +AGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCA +TTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAA +GAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTA +AGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTT +CATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAA +GAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAA +CTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAATGCTA +GCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATWWDWHSRMWWRMSM +RRRWMAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTA +AAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGAG +AAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGC +ACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAG +CCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAG +GGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAAC +TGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTA +CAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGCT +CGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCACC +TGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACAC +CTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGAT +TGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAG +AGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAG +ATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGA +GAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCCA +CACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCAA +CTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACAT +GAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGA +GGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACA +TGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGGT +TTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATT +GTTAACACTCCAACAAATAGAGTTGAAGTTTAAWMMMMCWRMWYTAMAAR +ATSCWTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTT +ATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGA +AACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAG +TCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAAG +GGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTAA +GAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATC +TAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAG +TATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAA +TTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATT +GCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATT +ACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAGT +TACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTTGG +ACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGAT +CTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTT +TGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTT +ATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTA +AATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAA +GAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATG +CAACTAATAAAGCCACGTATAAACCAAATACTTGGTGTATACGTTGTCTT +TGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTC +AGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAG +TCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAG +TGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGC +AAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGG +CTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTA +TCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGT +TAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTA +ACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGT +GTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTG +TACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTA +CTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCT +TCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTAT +AATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAA +CCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGT +ACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAAC +CTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATT +CTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCT +TTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGC +ATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCA +TGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGG +CTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTAT +GGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTT +ATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTAC +AAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGT +TAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAGAGGCTTTTGCAAAC +TACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACA +TTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACC +AATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGA +AGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTAT +GAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAA +TAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAAT +CAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAG +CTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGT +TGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATA +CGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTT +GCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTT +ATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAG +AAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATA +GAAGTTACTGGCGATAGTTGTAATAACTACATGCTCACCTATAACAAAGT +TGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGC +GTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATATGG +AACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACG +TAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTA +CTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGT +AAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTT +CCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGT +CTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGAT +GGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAA +ACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTA +ATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGGT +TTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGA +CTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTT +ACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGT +GTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGT +ACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAA +GTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAA +TTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGA +TTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTT +GTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCT +TTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATAT +GTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTA +TAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTACTAT +TTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTT +TAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCAG +TTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACA +TTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGT +TATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTT +GTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGA +CGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCT +GTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATG +TGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAG +TACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGC +TTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTG +ATGTTCTTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTTGCAG +AGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTAT +GGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGATG +ACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTT +AACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTT +GGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAA +ATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAG +TATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTG +TTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCCAATT +TCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTT +AACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATT +ACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGAC +CTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTATT +ACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAG +GTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGG +CTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTA +GGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTC +ATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTA +GTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATGC +TCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACACA +CCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGA +GTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCT +TTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAA +ACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTG +TAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATT +ATGACATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAA +AGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAG +CAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAAT +GTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGRWYWWKY +YWWTTYYWTKTKGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTG +TAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTT +GAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAAT +GCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCT +TTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTAC +TTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCC +ACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTG +TTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCA +GATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAG +AGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATG +ACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCA +CTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCT +TTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAG +AGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCT +TATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTT +GAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCA +TGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTAT +AAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCA +GACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACA +ACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCT +CTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATA +TAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGG +AAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAA +ATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGC +TTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTG +TTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGC +ACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTT +TGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCC +CTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACCTTGT +AGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTT +TATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAG +CTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAAT +TCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTA +CAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGA +TGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAA +GCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTTTACTG +CCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAG +GTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTT +ACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGG +CTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCAC +AATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGT +GCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTAC +AATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTWRDHK +HWWYYWWKWWWWKKWYRAAGATGACAATTTAATTGATTCTTACTTTGTAG +TTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAAT +TTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAG +AATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAAT +ACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAAT +TGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGA +TTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATAT +TACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAA +ACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACT +GACATTAGATAACCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATT +TCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTAT +TCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTC +ACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAA +AATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAA +TATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAG +ATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCC +CACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTT +CCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACA +TAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTAC +TTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTA +CTAGATAAACGCACTWCGTGCTTTTCAGTAGCTGCACTTACTAACAATGT +TGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACT +TTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAA +CACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTA +CTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTG +TAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATT +AATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCC +ATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATG +AGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACT +ATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCG +CACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTC +ATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTA +ATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGT +TTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAAT +GTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTT +GCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATT +AGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTT +CACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCT +TATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAA +TGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCA +ATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGAC +ACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAAT +GATGATACTYTSTGWYKWWSCTRTTKTGTGTTTCAATAGCACTTATGCAT +CTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTAT +CAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCT +TACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAAC +AGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTA +GGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTAT +GATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAAC +ATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATA +AGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGT +TATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATG +AGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTT +CTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACC +ATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATA +AATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGAT +GTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAA +ATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTT +TTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTT +AATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGC +TAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAG +CTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTAMGTKAA +KTGYTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAG +ACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAA +ACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGT +GATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGA +TTATTTTGTGCTGACATCACATACCGTAATGCCATTAAGTGCACCTACAC +TAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTC +AATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGG +TATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTC +ATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTAT +ACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAA +ATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTG +TAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTC +TTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTT +TGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCA +GATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCT +GCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAA +TTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAA +CTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTT +TATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAA +AATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACA +GGCCACAAATAGGCGTGGTAAGAGAATTYYTTACAMGTAACCCTGCTTGG +AGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTC +AAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAG +AATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGT +AATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACT +TTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTC +TTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACA +GGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACA +GGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTAT +GTGTTGACATACCTGGCATACCTAAGGACATTACCTATAGAAGACTCATC +TCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACAT +GTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCT +TCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTA +CCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTAC +AGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTA +AACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAA +GGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGA +CACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATG +GCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGC +ACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGA +CACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATC +CGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGC +AACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTG +TGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGC +GTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATT +AATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATT +AGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTA +AGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCT +TGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCAC +ACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATG +TCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTG +CTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAA +TAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATT +TAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCAT +GGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTAC +GTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTA +ATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGC +TTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACAC +TTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATA +ARRKWYWYWWWRWTKKAMAAMAGGGTGAAGTACCAGTTTCTATCATTAAT +AACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAA +TAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCA +ACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGAC +ATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACA +TATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAA +CTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGAT +GGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGA +AGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTC +TTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTAT +TATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTAC +TCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTG +ATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAA +GGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTT +AGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCAC +CTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTAT +TTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTAT +TGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTAT +CTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCA +TTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATT +ACAATCTAGTCAAGCGTGGCAAMYRGKYRWKSSTRKSMCTAATCTTTACA +AAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGAT +AGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCA +ACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGA +GAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACA +GCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGA +TCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTG +CAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTAC +GACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTT +TTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTT +CCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAG +CTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGC +CTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCAC +GCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGG +AATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAA +ATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTC +AAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATT +AGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTA +AACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAG +TGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTC +TTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTT +TACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGG +TTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAA +CCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGT +CTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACC +CAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTG +TGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAA +ACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAAT +AATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGG +AAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTG +ATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGT +GATCTCCCTCAGGGTTTTTCGGTTTTAGAACCATTGGTAGATTTGCCAAT +AGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTT +ATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCT +TATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGA +AAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAG +AAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAA +ACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAA +TATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAAMGCCACCAGATTTG +CATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGAT +TATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGG +AGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAG +ATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAA +ACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGG +CTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTA +ATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTT +GAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAA +TGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCC +AACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCT +TTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTAC +TAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAG +GCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAA +TTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGAC +ACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTA +TAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGT +GTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCC +TACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAG +GCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATA +CCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCC +TCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGT +CACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATA +CCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTAT +GACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTG +AATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAAC +CGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGT +TTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTG +GTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAG +AGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGC +TGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAG +ACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTG +CTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTAC +AATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCAT +TTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAAT +GTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTAT +TGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAAC +TTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAA +CAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCT +TTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCA +CAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGA +GCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGA +GTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATC +ATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCAT +GTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCAT +TTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAA +ATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATC +ATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGG +AATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCAT +TCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTT +GATTTAGGTGACATCTCTGGCATTAATGCTTCATTTGTAAACATTCAAAA +AGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCA +TCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGG +TACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGAC +AATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTT +GTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTG +CTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTT +ATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAA +GGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCGATAC +AAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCT +GTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACT +AGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTG +TAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCT +TTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGT +AAGAATAATAATGARRYTTTRRYTTTGMTGGAAATGCCGTTCCAAAAACC +CATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTAC +GACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTC +AGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTG +GTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACAC +AGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAGTAC +AGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTG +ATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGA +GTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTAC +TAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCAT +TCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTT +CTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCT +TCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAAC +CTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTT +CCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTT +GGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGA +AGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTCCTAT +TCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGG +TTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAAC +TTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCG +GTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTGAGC +TACTTCATTGCTTCTTTCAGACTGTTTGCGCBDWYKYBKKYYWYKKKKBY +WTTCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTA +TTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATC +CTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACAT +CAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTT +ATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCT +GCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTC +CAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATG +TTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTAT +TATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACC +TCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAA +TTAGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTAT +TCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACC +AAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCT +GGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATT +TGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCG +TAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTC +ATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGT +TGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAG +AATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTC +TGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAA +CTGCAAGATCATAATGAAWYTTDTYWYRYYTAAACGAACATGAAATTTCT +TGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTA +GTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGT +CCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATC +AGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCA +TTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACA +ATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTT +CTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCA +TCTAAACGAACAAACTAAAATGTCTGATAATGGACCCCAAAATCAGCGAA +ATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAAC +CAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGG +TTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCA +AGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAAT +AGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAAT +TCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCT +ACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAA +GACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGA +TCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAAC +TTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGA +GGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTC +AAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGG +CTGGCAATGGCGGTGATGCTGCTCTTGTTTTGCTGCTGCTTGACAGATTG +AACCAGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCA +AACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAA +AACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGT +GGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACA +AGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCG +CTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCG +GGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCC +AAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACA +AAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTGAT +GAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCT +TCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCA +TGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGG +CAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCT +ACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAATTAGATGTAG +TTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGA +GGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACGATC +GAGTGTACAGTGAARVDHDDVBVNVVDKDVNR +>England/NORW-30F5277/2021|OV313316.1|2021-11-17 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTTTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAA +CACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG +GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGC +TACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTAT +ACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCT +GAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTTAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATT +CCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGC +TTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAA +CTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGC +ACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCA +AAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACC +ATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCG +CTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT +CTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAA +TATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCAT +CTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAA +TACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGA +TCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTT +ACAGAAGGCCGCTATAACAATATTAGATGGAATTTCACAGTATTCACTGA +GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTA +GTTGTAATGGCCTACATTACAGGTGGTGTTGTCCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTG +ATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGT +TGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGG +ACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCT +TTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCGAACTCTATCATTATT +GGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAA +ACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGA +TTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGG +TTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGAC +ACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACA +CTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGT +TGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTG +TCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTAGGCATT +GATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATG +AGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACT +CAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT +GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAG +GACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTA +GTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATT +GTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAA +TGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGA +CCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTC +AACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTT +GCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTT +AAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTG +ATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATG +ATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA +ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAA +TCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCT +TAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTT +TTAACTGCTGTGGTTATACCTACTAAAAAGTCTGGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTT +ACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTG +CTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAA +TGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC +TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACT +AAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTT +GTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGC +TGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTT +CACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAA +ACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTA +AGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTT +GAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACC +TCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGT +CCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTG +TTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG +TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTG +CATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTA +CAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTA +GAGAAACAATGAGTTACTTGTTTCAACATACCAATTTAGATTCTTGCAAA +AGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCT +TAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAA +TATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTG +GTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTG +TATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCC +TATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAAC +CAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTA +AGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACT +GGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGA +CTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTT +TTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAAC +AATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGA +AGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA +CTAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCT +TGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAAC +CAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTA +ATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTT +CTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAA +CCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAAT +TGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCG +ACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATA +TTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTA +CTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTG +CAACCTACTGTACTGGTTCTATATCTTGTAGTGTTTGTCTTAGTGGTTTA +GATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTT +TGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCA +ATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAG +CTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAA +AGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATG +GTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGC +AAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAG +TACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACA +GTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAG +CTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGT +AAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTATTACAG +TCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTG +ATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATG +TCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGAT +GTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGA +CATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACA +AAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGT +GCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAA +TACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCA +ACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGG +TGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTG +TGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTC +ATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA +ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTAT +ACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGT +GGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATG +GTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATC +TGTTACACACCATCAAAACTTATAGAGTACACTGATTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGC +CATTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT +GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTAT +TCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTT +TTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGT +GTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTA +ATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCA +TCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTA +CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTG +CCTTTAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACA +CCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGA +TGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATC +ATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAA +GAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG +CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGT +GATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAG +CTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT +TCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTT +GCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTT +GTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT +GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATT +TCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATG +CAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACC +TAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAG +CTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCC +AATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGG +AATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTAT +GGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAAC +TATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAG +ACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTT +GTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG +CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTG +GGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACA +ATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTA +CACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTC +CAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTG +TCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC +GCTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCG +TATTATGACATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGC +TAAAAGACTGTGTTATGTATGCATCAGCTGTGGTGTTACTAATCCTTATG +ACAGCAAGAACTGTTTATGATGATGGTGCTAGGAGAGTGTGGACACTTAT +GAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATC +AAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCA +GGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTG +TGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTA +TAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGC +CTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGA +TTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTAC +TCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTG +GGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAAT +GTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAAC +TCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACAC +AATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGT +TTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACA +AGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCC +TCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGA +AGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAA +AGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCA +GCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAAT +GTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTA +TGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTC +AACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAAT +ACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACA +CATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTG +TGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG +TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAA +CAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGT +CCTGTTGCACTACGACATATGTCTTGTGCTGCCGGTACTACACAAACTGC +TTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTA +GGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGA +TTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACC +TTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTAT +ACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGT +TTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGC +CAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAG +CTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTT +AAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACC +GGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGT +ACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTA +AAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGG +TTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTT +ATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGAT +GCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACA +CCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACAT +CTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTT +GTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTT +GTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTA +TAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGT +TTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACT +AAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGG +TAATTGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATG +ATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGAT +ATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTT +AAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTG +TACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGT +GATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTA +TTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAG +AGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTG +TTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTT +TAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATG +ACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTG +TTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGG +TGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTG +TACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAA +TTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCT +ATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACA +ATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT +GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATT +AAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATG +ACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTA +TTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTG +TATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTT +TTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGT +TATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCC +TACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAG +CTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAG +TTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGT +AGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAA +CTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCT +AAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGT +TCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATA +GATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGC +AGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAAC +TGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATG +TTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTC +CGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGT +TGACACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCT +CAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTAT +GCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTA +TTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTG +ACCTTACTAAAGGACCTCATGAATTTTGCTCKCAACATACAATGCTAGTT +AAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAAT +CCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACAC +TTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACT +AAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATA +CATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATT +CTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTT +TATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTG +TGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTA +GACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCA +CATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTG +TGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATT +GTAAATCACATAAACTACCCATTAGTTTTCCATTGTGTGCTAATGGACAA +GTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGA +CTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTT +TAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTC +AAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACG +TGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAAC +CTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT +AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTA +TGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTG +GTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCT +ACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAAC +ACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGG +TTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAG +AGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGT +GTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCAT +TAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCT +CGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTA +TGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTG +TCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAAT +GCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATT +ACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATT +TCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTC +GGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTT +GGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCT +TTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATT +AACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGC +TTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAG +CCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGC +TCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTC +TTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCA +TACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACA +AGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGT +AACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTA +CACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGT +TTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACT +CATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTA +ACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATT +GGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAA +TTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTAC +CTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGT +GCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTA +CAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAA +GTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCA +CATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGA +GCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTT +CAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTAT +AATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACA +AAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTA +GTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT +AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAA +GATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCAT +TATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCT +ATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACA +GCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATG +CCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGC +AATGTCGATAGATATCCTGTTAATTCCATTGTTTGTAGATTTGACACTAG +AGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATG +TAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTT +AATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTC +TCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTG +CTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCAT +GCTAATGAGTACAGATTGTATCTCGATGCNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNACAAACAATTTGATACTTATAACCTCTGGA +ACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTA +AATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCAT +TAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTG +AAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAG +CGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGT +GGACATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAG +CACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAA +CCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGT +TGATGGTCAAGTATACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTA +CAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCT +AGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAA +TTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACT +TTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAA +ATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATT +AGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTC +AGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAA +TCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAA +CTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTG +TTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGAT +TTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAAT +TTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAA +AATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTT +TACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGG +TGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATA +CTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAAT +ATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGG +TACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATT +CAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT +TGTGCAATTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATAT +GTACGACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGG +GTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGA +GGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTA +TAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGA +ATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAA +CCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTG +GAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGA +GTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAA +GGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTAT +AATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACA +ACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAG +TCAGTGTGTTAATCTTAGAACCAGAACTCAATTACCCCCTGCATACACTA +ATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCA +GTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTAC +TTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTG +ATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAG +AAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAA +GACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAG +TCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGRTGTTTATTACCAC +AAAAACAACAAAAGTTGGATGGAAAGTGGAGTTTATTCTAGTGCGAATAA +TTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAA +AACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGAT +GGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGA +TCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAG +GTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTAT +TTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTA +TTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAA +ATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAA +ACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAAC +TTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATA +TTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCA +TCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTA +TTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAG +TGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGAT +TCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAAC +TGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCT +GCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAAT +TATAATTACCGGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGA +GAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCAAACCTTGTAATG +GTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAA +CCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTT +TGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTA +ATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGC +ACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATT +TGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACAC +TTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATA +ACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGT +TAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTA +CTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGC +TGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACC +CATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCGTC +GGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCA +CTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACC +CACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGA +CCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAA +TGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCG +TGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTT +TTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGT +GGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAG +GTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTG +GCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGAC +CTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCT +CACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAA +TCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTT +GCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGT +TCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTG +GCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTT +CAARATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACA +ACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTT +CACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACA +GGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGC +TGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGT +GTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCAT +CTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGT +GACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTT +GTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAAT +GGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCAT +TACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAA +TTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTC +AAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGA +TTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAG +AAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATC +GATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTA +CATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAA +TTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGT +TCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCT +CAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTAT +GAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGTTGAAATCAAGG +ATGCTACTCCTTTAGATTTTGTTCGCGCTACTGCAACGATACCGATACAA +GCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGT +TTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAG +CACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTA +ACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTT +TCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAA +GAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCA +TTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGA +CTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAG +GTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGT +TATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAG +TTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAG +ACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGAT +GAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGT +TGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTA +GCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTC +GTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCT +TGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTC +GATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCT +TCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCC +TGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGG +AACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAG +AGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTCCTATTC +CTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTT +TTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTT +TAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGT +GGAATTGCTACCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTA +CTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCAT +TCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATT +CTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCT +TCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCA +AGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTAT +TACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGC +ATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCA +GTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTT +TCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTA +TGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACCTC +ATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATT +AGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTC +TTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAA +GAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGG +AACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTG +CACTGACTTTCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTA +AAACACGTCTATCAGTTACGTGACAGATCAGCTTCACCTAAACTGTTCAT +CAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTTTTG +CGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGATAGAA +TGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTG +CTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACT +GCAAGATCATAATGAAATTTGTCACGCCTAAACGAACATGAAATTTCTTG +TTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGT +TTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCC +TATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAG +CACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATT +CAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAAT +TAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCT +ATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAATCTAAACG +AACAAACTAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACCCC +GCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGA +GAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAA +TAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGGCC +TTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGTCCA +GATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGGTGG +TGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAG +GAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATC +ATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGG +CACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAG +GAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGT +CAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTC +AACTCCAGGCAGCAGTATGGGAACTTCTCCTGCTAGAATGGCTGGCAATG +GCTGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTT +GAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCAC +TAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTG +CCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAA +CAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGA +TTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGT +TCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGG +TTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAA +AGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCC +CACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTTATGAAACTCAA +GCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGC +TGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTG +CTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGC +TATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTG +CAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTA +ATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAA +AGAGCCACCACATTTTCACCGAGGCCACTCGGAGTACTATCGAGTGTACA +GTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAA +AATTAATTTTAGTAGTGCTATCCNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +>Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29 +ACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTG +TGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATA +ACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAG +GCTGCTTACGGTTTCGTCCGTTTTGCAGCCGATCATCAGCACATCTAGGT +TTTGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTT +CAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCG +ACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAG +GCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAA +AGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGG +ATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAA +CTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGT +CCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTA +AGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAG +TCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCA +AGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGC +GTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGT +GGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGC +TGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTA +AGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTAC +ACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAA +ATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTAT +TTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAA +AAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTC +ACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATC +ATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGC +GAATTTTGTGGCACTGAGAGTTTGACTAAAGAAGGTGCCACTACTTGTGG +TTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACA +ATTCAGAAGTAGGACTTGAGCATAGTCTTGCCGAATACCATAATGAATCT +GGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGG +CTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTC +CACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAA +GGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAA +AGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCA +TTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTG +AAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAA +TTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTG +AACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCT +CGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTC +TGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCAC +AGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCT +ACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTT +GACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCA +AACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTT +CTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGA +AATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTG +TTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGAC +TCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAAC +ATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAG +AAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTC +TTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTT +GAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTG +AAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTC +GAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGT +AACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTT +TTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATC +ACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTC +TGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTG +TGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACA +CCACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATT +TGATGAGTCTGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTT +ACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTT +GAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAA +ACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGC +AAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAA +GACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGT +TCAACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAG +TGAATAGTTTTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAAA +AATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGT +TAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCT +TAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATA +GCTACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACA +CAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAG +GTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCAC +GAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCC +TATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACT +TAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTG +GAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAA +AGAGGAAGTTAAGCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGA +GAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACA +ACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGA +CATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTG +ACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTT +CAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGTCTGGTGG +CACTACTGAAATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATT +ATATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAG +GCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATC +TATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATT +TGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTC +TGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGG +TATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTT +ACACCAGTAAAACAACTGTAGCGTCACTTATCAACACACTTAACGATCTA +AATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAA +TTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAG +TTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACT +TCTTCTTCTAAAATACCTGAAGAACATTTTATTGAAACCATCTCACTTGC +TGGTTTCTATAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGTA +TAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCT +ACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGAC +ACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAG +ACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGA +CAACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAA +ACCTCATAATTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGATG +ACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGT +TTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATA +CCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTT +ATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAAT +CCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGC +TAACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGT +TAGGTGATGTTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTA +GATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACA +GCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTT +CTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAA +CAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTC +AGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTA +GTGAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCT +AAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGA +ATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAA +CAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAA +ATTGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCAC +AGAGCAACCAATTGATCTTGTACCAAACCAACCATATCCAAACGCAAGCT +TCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTA +AACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTAC +ATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACACT +ACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTT +TGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTG +GTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGT +TTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGC +GAAGATCTAAAACTAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACA +GAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACA +TTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGC +CACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAA +GAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTC +ATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTAT +GCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTAC +ACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTT +TATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAA +GCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAA +ATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTA +AACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGT +TCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGG +CATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTA +ATGTCACTATTGCAACCTACTGTACTGGTTCTATATCTTGTAGTGTTTGT +CTTAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACA +AATTACCATTTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTG +CAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTT +GGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTT +TATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGG +CCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTAT +TATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAAC +TTGTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAA +CTATTGTTAATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGT +AAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGATACATT +CTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCAC +TACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTT +GATAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGC +TGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAG +ACAACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATA +GTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTC +TGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGG +CATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTT +GATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAA +ACTCAAAACACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGT +CCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTT +GTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTC +ACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGC +TCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGT +ATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAA +CATTGCTTTGATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAAC +TACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAG +TTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGAT +AGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTA +AAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACA +CCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGG +ATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATA +CTTGTTTTGCTAACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGT +GGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCAT +AACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTAC +GCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCA +GTTGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGATTT +TGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATG +CTTCTGGTAAGCCATTACCATATTGTTATGATACCAATGTACTAGAAGGT +TCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGA +TGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAG +TGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGA +TCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAA +TGATTATTACAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAA +ATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTTG +GACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAAC +ATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACA +GTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCACTGTA +CTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTAT +TTACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCAC +ATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACA +ATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAG +TAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTT +TTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTA +AAGTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTT +AGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTA +GCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTC +AGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAATCTCTATCAC +CTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTA +AAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAAC +GGTCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCAC +CTCTGAAGACATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGT +CTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATT +GGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAA +TCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTT +TTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGT +GCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATG +TGGTAGTGTTGGTTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACA +TGCACCATATGGAATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAA +GGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGG +TACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTG +TTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAAT +GACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGA +CCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTT +TAGATATGTGTGCTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGA +CGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGA +TGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAA +CAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTT +TTAGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGA +AAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTG +CAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTA +CCTTCTCTTGCCGCTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAG +TTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGT +CTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTGGTGTTA +CTAATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGT +GTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTA +ATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACT +TCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTAT +TGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATA +CACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACT +TGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCT +TGGTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATT +CACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAAC +ATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGT +ACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAG +TTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGT +GTCCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTT +TGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTG +TAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTA +CAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGC +TACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAG +TTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTT +GACCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGC +TATGACCCAAATGTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAG +TTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGAT +AATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCC +CTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATAC +CAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTAT +GCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAAT +TGTTCAACTTAGTGAAATTAGTATGGACAATTCACCTAATTTAGCATGGC +CTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAAT +AATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTAC +TACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAA +CAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTG +AAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGA +ACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAG +TGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATG +GTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAAC +AGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAG +ATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATC +ACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAAT +AACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCAT +CGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGA +TTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAA +TGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTA +TGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTT +CAGTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCA +GCCCGTCTTACACCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAG +GGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAA +AAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATT +GATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGA +AGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATG +ACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGT +CAACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCA +TTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTCACATACA +ATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTA +GAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACG +CCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTG +GTATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGG +TATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGT +TGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGG +CTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATT +AAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTT +TGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTA +ACTGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTA +TTCTCTACAGTGTTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAAT +ATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAG +AGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTT +AGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGC +TTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTG +CACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAAC +AAAGACTTCTATGACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAG +TTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTA +TCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATC +AGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTA +CGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACA +AATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTAT +GATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACG +TAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCCATTAGTG +CAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATG +ACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAG +AGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACA +ACATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGT +TGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTTAGAATTAT +GGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCAC +ACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATG +GTCATGTGTGGCAGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGG +AGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTG +TCACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCC +GATAAGTATGTCCGCAATTTACAACACAGACTTTATGAGTGTCTCTATAG +AAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCATATTTGC +GTAAACATTTCTCAATGATGATACTTTCTGACGATGCTGTTGTGTGTTTC +AATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAA +GTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTT +GGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACAT +ACAATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGA +TCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAA +CAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCT +TACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTT +GTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGT +TAGACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGG +GAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGC +TGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTG +CTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTC +ATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAA +TGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTA +TGAGCTATTATTGTAAATCACATAAACTACCCATTAGTTTTCCATTGTGT +GCTAATGGACAAGTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGA +TAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTG +GTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCA +GCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTAT +TGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGG +AAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGT +TATCGTGTAACTAAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGA +AAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACA +AATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCA +TTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGG +CTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAA +ATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCT +GGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTC +TGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTAT +GTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATA +CCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAAC +ATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAG +CAGATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTG +AGTGTTGTCAATGCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGA +CCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAG +AACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCA +GACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACAC +TGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAAT +CAGCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTT +TCATCTGCAATTAACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTAC +ACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCAC +AGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGAT +TCATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGA +AACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAG +CAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAG +TTGCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACA +AGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTG +GGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTC +AAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGAC +CTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTA +ATGGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTA +CGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGC +TGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACC +TAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTT +TCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCAT +ACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTG +TACAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTT +GTCTTATGGGCACATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAA +AATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCT +TTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTT +GATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTAC +AGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATG +CACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCAC +GAGTGCTTTGTTAAGCGTGTTGACTGGACTATTGAATATCCTATAATTGG +TGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTG +TTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGT +AACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTT +CTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTAT +TCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTA +TTTTGGAATTGCAATGTCGATAGATATCCTGTTAATTCCATTGTTTGTAG +ATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTG +GNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNGCTTGTGGGTTTACAAACAATTTGATACTT +ATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCT +TTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACC +AGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATG +TAGAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAG +CTTTGGGCTAAGCGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAA +TAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGACTACAAAA +GAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGAC +ATAGCCAAGAAACCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTT +TGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATG +GTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGT +CCCAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAA +AACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTAC +CTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGG +AGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGA +ACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATT +TTAGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAA +CGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAG +TACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGT +GTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATA +AAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGA +CTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAA +CATTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCT +ATGCCTAATCTTTACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCT +TCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATG +TCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCT +GTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGG +AGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGC +TGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACT +TTGATTGGTGATTGTGCAACTGTACATACAGCTAATAAATGGGATCTCAT +TATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAAGAAAATG +ACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAG +CTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAA +TGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTG +TTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAAT +TATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAA +TTACATATTTTGGAGGAATACAAATCCAATCCAGTTGTCTTCCTATTCTT +TATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATG +TCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAA +AGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATG +TTCTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCC +ACTAGTCTCTAGTCAGTGTGTTAATCTTAGAACCAGAACTCAATTACCCC +CTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTT +TTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTT +TTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTA +CTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTT +GCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTAC +TTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATG +TTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGT +GTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGGAGTTTATTC +TAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGG +ACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTT +AAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAA +TTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAG +ATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTA +CATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGG +TGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAA +AATATAATGAAAATGGAACTATTACAGATGCTGTAGACTGTGCACTTGAC +CCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGG +AATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTA +GATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCC +ACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTG +TGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTA +AGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAAT +GTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGC +TCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATG +ATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAG +GTTGGTGGTAATTATAATTACCGGTATAGATTGTTTAGGAAGTCTAATCT +CAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCA +AACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCA +TATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGT +AGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTA +AAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAAT +GGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCC +TTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTG +ATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGT +GTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCT +TTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATC +AACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAA +ACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGA +GTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGA +CTAATTCTCGTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCC +TACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTC +TATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTAC +CAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGT +GATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTAC +ACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACA +CCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATT +AAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAA +ACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACAC +TTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATT +GCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTT +GCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGT +TAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTA +CAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGT +TACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTA +ATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCA +CTTGGAAAACTTCAAAATGTGGTCAACCAAAATGCACAAGCTTTAAACAC +GCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAA +ATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGAT +AGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACA +ATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTA +AAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGA +AAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGT +CTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTG +CTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTC +TTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGA +ACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATG +TTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAA +TTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATC +ACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAA +ACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAAT +GAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAA +ATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAG +TAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTC +AAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTC +TGAGCCATTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTAT +GGATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAG +GTGAAATCAAGGATGCTACTCCTTTAGATTTTGTTCGCGCTACTGCAACG +ATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGC +ACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGA +GATGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTG +TTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCT +TGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTA +TAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGT +TCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATAC +TAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTG +TCATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTAC +CAGATTGGTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGT +TGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACTC +AATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAAT +AAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGG +TTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGA +CGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTT +ATGTACTCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGT +ACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCC +TTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGT +CTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTC +TTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGT +TTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTA +TTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATA +GGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAA +CAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTAT +GGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAAT +TGGATCACCGGTGGAATTGCTACCGCAATGGCTTGTCTTGTAGGCTTGAT +GTGGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTT +CCATGTGGTCATTCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTC +CATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGG +AGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGAC +GCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGA +ACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTC +AGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACA +CAGACCATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTG +ACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATAT +TACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTAC +ATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAA +ATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTGATTAAACGAACA +TGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTT +TATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACC +TTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTG +ATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGT +CCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGCTTCACC +TAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTT +TTCTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAA +AGAAAGATAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTT +TTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTT +CTCACTTGAACTGCAAGATCATAATGAAATTTGTCACGCCTAAACGAACA +TGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCAC +CAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGA +TGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAG +CTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCT +AAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTT +ACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGC +GTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTT +TTAATCTAAACGAACAAACTAAATGTCTGATAATGGACCCCAAAATCAGC +GAAATGCACCTCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGT +AACCAGAATGGAGAACGCAGTGGGGCGCGATCAAGACAACGTCGGCCCCA +AGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATG +GCAAGGAAGGCCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACC +AATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACG +AATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATT +TCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAAC +AAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAA +AGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTAC +AACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGC +AGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAG +TTCAAGAAATTCAACTCCAGGCAGCAGTATGGGAACTTCTCCTGCTAGAA +TGGCTGGCAATGGCTGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGA +TTGAACCAGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGG +CCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGC +AAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGA +CGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAG +ACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCA +GCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCT +TCTGGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGA +TCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCAT +ACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCT +TATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGAC +TCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAAT +CCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACA +AGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAG +TCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATG +TAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAG +GGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACTCGGAGTACG +ATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGC +CCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAAT +>England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTTTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAGACTTAGGCGACGAG +CTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACA +TAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCAT +ACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTT +GAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTT +GTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCC +GTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGC +TATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACAC +CTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCA +AGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGT +AGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAAT +GTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGC +AGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAAT +TTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGT +TGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGC +ATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGT +AAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGG +TTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAG +GTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGAC +AACTTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGG +TGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTG +CTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCA +TTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAA +AGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTC +CTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTC +TCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGC +CGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTG +ATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATG +GCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAA +CATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTG +AAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATT +GTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGT +CACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTG +TAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCT +AAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGG +ATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGC +CTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCC +ACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACC +ATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACAC +CAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAG +TACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACT +CAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAG +AAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGG +ATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGG +TACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAA +CTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGATTTAGAT +GAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAA +ATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAG +AAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAG +TATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCAC +TTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATG +ATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAG +ACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGA +ACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATT +TAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAA +GCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCT +TAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTAACAATG +CCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAA +GTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCT +TCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTA +AGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTA +TTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTG +TGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATAAAAATC +TCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAAAAGCAA +GTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTAT +AACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAA +TCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTC +CTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCC +AGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAG +ATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAACTGCT +GTGGTTATACCTACTAAAAAGTCTGGTGGCACTACTGAAATGCTAGCGAA +AGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTC +AGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAG +TGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCA +AGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATG +CAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATA +GTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGT +GGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAG +CGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATG +CCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTA +TATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCACCTGATG +CTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAA +GAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTC +CTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTG +ATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGT +GAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGT +GAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCCACACGC +AAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCAACTTAT +TTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGG +TAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTT +TTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCA +GCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAAC +TTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAA +CACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCT +TATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTT +AGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAA +TGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTG +AACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGT +AGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTAAGAAAG +GTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTA +CAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGA +ACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACC +AGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATA +GACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGA +TGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAGTTACTT +ATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTTGGACAAT +TATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGT +ACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTAT +GTGATAATATCAAATTTTCTGATGATTTAAACCAGTTAACTGGTTATAAG +AAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGG +TGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAG +GAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATGCAACT +AATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAG +CACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGG +ACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACTAGTCTCT +GAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAA +TGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATA +ATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCT +TATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAG +AGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATA +GTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAA +GTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTG +TACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTGTACTT +TTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATA +GCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATT +TAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTT +GGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCT +GCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGG +TTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAACCTACT +GTACTGGTTCTATATCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCTTTA +GACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAA +ATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATA +TTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAA +TTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTAT +GTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTA +GAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTG +CATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACG +TAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAA +GGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACAC +AATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTAT +TAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACCAATAA +ATCCTACTGACCAGTCTTCTTACATTGTTGATAGTGTTACAGTGAAGAAT +GGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAG +ACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACA +CTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAA +TGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTAT +GTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGTTGGTG +ATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATACGTTT +TCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAAC +TGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTA +CTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACT +AAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGT +TACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAA +ACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGCGTCAT +ATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATATGGAACGT +TAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTG +CTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGA +CAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAAT +TGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTT +TTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAA +CATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGG +TGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATG +CTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGAC +AAAGCTTGCCCATTGATTGCTGCAGYCATAACAAGAGAAGTGGGTTTTGT +CGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGACTTTT +TGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACA +CCATCAAAACTTATAGAGTACACTGATTTTGCAACATCAGCTTGTGTTTT +GGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCATTACCAT +ATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTA +CGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCC +TAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTG +AGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTTGTGTA +TCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACC +AGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTA +CACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTA +GCTGGTGGTATTGTAGCTATCGTAGTAACATTCCTTGCCTACTATTTTAT +GAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATA +CTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTAC +TCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTA +TCTTACTAATGATATTTCTTTTTTAGCACATATTCAGTGGATGGTTATGT +TCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATT +TCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGT +AGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCTGTGCA +CCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTA +TTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAA +GTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTT +GTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTT +CTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTTGCAGAGTGG +TTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTATGGTAC +AAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGATGACGTA +GTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAACCC +TAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTAC +AGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGT +GTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAA +GTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACA +ATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCCAATTTCACT +ATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACAT +AGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAA +CTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTT +GTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTATTACAGT +TAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGT +TTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATG +AAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGACC +TCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAA +AAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCT +TTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGG +TGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACACACCACT +GGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACT +CAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGC +TATGGGTATTATTGCTATGTCTGCTTTTGTAATGATGTTTGTCAAACATA +AGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCGCTGTAGCT +TATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGAC +ATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAAAGACT +GTGTTATGTATGCATCAGCTGTGGTGTTACTAATCCTTATGACAGCAAGA +ACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTT +GACACTCTTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTT +CCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTT +ACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTA +TTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAG +TTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGT +TTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGT +TTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCA +AGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGT +GGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGT +AAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAG +AATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATT +CTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACT +TTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTG +AAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTT +AGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGA +GCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGA +AGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAA +CGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACA +GGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAA +TGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATT +ATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTAC +AACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAA +ATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATC +CAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAG +TATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAA +GGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCA +CTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGA +TGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTAC +TTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAG +AGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACCTTGTAGGTT +TGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTA +AAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCC +ACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAAC +TGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAG +ATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTG +TGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAA +TATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTT +GCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAG +TATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACT +TAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTA +GTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAACCG +TTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGC +ACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGA +TAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCC +AAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAG +AGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACT +TAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAG +ACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACA +ATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGA +CACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATT +TCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGC +GTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGT +ACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACAT +TAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATA +CAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATT +GTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATG +TTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATAT +GACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTG +GGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCA +TTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTT +ACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCGTT +TGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATC +AGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTG +TATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGA +TAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTT +TTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCT +GTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTT +CTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATC +GTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTT +GAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGC +TAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTA +ATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGAT +CAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAAC +TCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCG +TAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAA +AAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGG +AACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATA +GTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGAT +AGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCG +CAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTA +ATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCAGTTCACTA +TATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGC +TAATAGTGTTTWTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCAC +TTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTA +CAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGA +CTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGA +TACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAA +GGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAA +CAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTA +AAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGT +GATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGC +CGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTG +AACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCT +AATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAA +GCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGC +TTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCT +ATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTG +CAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCT +TATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTA +GTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCAC +AGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCAC +ATAAACTACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGT +TTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGC +AATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACA +CCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACT +GAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCT +GTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCAC +CACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGT +AAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGC +TGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATT +TTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTG +CCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATAT +CTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGC +AAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTT +GCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGC +TTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATT +TGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAG +TGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTG +TACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATG +AAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTA +CGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACC +ACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAG +TGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGT +CGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA +TAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGT +TTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCA +CAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAA +AGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGA +TTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATAT +GACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGT +AAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCA +TAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAA +ATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACT +CTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCAC +CTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTT +GACATACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTAT +GATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATTTTTA +TCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGAT +GTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTT +ACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTT +ATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCA +CCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACT +TCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACAC +TTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTT +GAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTG +TTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTT +ATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTT +ATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCA +TGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATG +CAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTT +GACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGC +GGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAG +ACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGT +GTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAG +TGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATT +CTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGAT +AGATATCCTGTTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATC +TAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAAC +ATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAA +CAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAA +ACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTA +TAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAG +TACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAG +CTTATGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTA +CAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGA +CACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACAC +TGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAA +CAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATT +AAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGC +TGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATAT +CTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGAA +ACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCA +AGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTA +GTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAAT +GGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAA +GAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGA +GTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTC +TTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTA +TGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTG +GTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTT +GAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCAT +AACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATT +TATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTA +GTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTAT +GCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAAT +CTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATG +CAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGC +AACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGT +GTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTT +ATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGT +TTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTA +ATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACT +GTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCC +TAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCA +TTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTG +GCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCAT +GGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCAT +CATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAA +CAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATAC +AAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTC +CCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATC +AATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGA +AAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGA +ACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGT +TAATCTTAGAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCA +CACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACAT +TCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCA +TGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTG +TCCTACCATTTAATGATGGTGTTTATTTTGCTTCCATTGAGAAGTCTAAC +ATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTC +CCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAAT +TTCAATTTTGTAATGATCCATTTTTGGATGTTTATTACCACAAAAACAAC +AAAAGTTGGATGGAAAGTGGAGTTTATTCTAGTGCGAATAATTGCACTTT +TGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTA +ATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTT +AAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCA +GGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACA +TCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCT +GGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGG +TTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCA +TTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGT +ACGTTGAAATCCTTCACTGTAGAAAAAGGAATTTATCAAACTTCTAACTT +TAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACT +TGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTAT +GCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCT +ATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTA +CTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTA +ATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGAT +TGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAG +CTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTAC +CGGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATAT +TTCAACTGAAATCTATCAGGCCGGTAGCAAACCTTGTAATGGTGTTGAAG +GTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAAT +GGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCT +ACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTA +AAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTT +CTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGA +CATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTC +TTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGA +ACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCAC +AGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTG +TTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATA +GGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGC +AGGTATATGCGCTAGTTATCAGACTCACACTAATTCTCGTCGGCGGGCAC +GTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCA +GAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTT +TACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACAT +CAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAAT +CTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAAC +TGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAG +TCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAAT +TTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTAT +TGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCA +AACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGT +GCACAAAAGTTTAACGGCCTTATTGTTTTGCCACCTTTGCTCACAGATGA +AATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTG +GTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAA +ATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGA +GAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTC +AAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAAATGTG +GTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTC +CAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTG +ACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTT +CAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAAT +CAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTG +GACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCC +TTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGT +CCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATG +GAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACAC +TGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGA +CAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACA +ACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAG +TTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGA +CATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACC +GCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAA +GAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCT +AGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTT +GCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGA +TCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGT +CAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTT +CACAATTGGAACTGTAACTTTGAAGCAAGGTGAAACCAAGGATGCTACTC +CTTTAGATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTC +TCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAG +CGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCA +AGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTAC +TCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCCCCTTTTCTCTA +TCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAA +TAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTT +TATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTG +TATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATG +GCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACT +GAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTT +CACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACACTG +GTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAACCT +GAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAA +TCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGC +CTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCG +GAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTT +CGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGT +GTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTT +TACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCT +TCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTT +AATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTA +AAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTCCTATTCCTTACA +TGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTA +TATAATTAAGTTAATTTTTCTCTGGCTGTTATGGCCAGTAACTTTAGCTT +GTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATT +GCTACCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCAT +TGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATC +CAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACC +AGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGG +ACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACC +TTCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAA +TTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAG +TCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCA +GTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCT +CGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGAGGA +CTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATT +AAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGA +AGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCT +TGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGT +GTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATA +CGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGA +CTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACAC +GTCTATCAGTTACGTGCCAGATCAGCTTCACCTAAACTGTTCATCAGACA +AGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAA +TAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGATAGAATGATTG +AACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGCTATTC +CTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGA +TCATAATGAAATTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCT +TAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAG +TCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCA +CTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTT +TAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTAC +ATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTG +CCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAG +ACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAATCTAAACGAACAAA +CTAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTA +CGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGAGAACGC +AGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATAC +TGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGGCCTTAAAT +TCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGAC +CAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGG +TAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTG +GGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGG +GTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCG +CAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAA +CATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCC +TCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCC +AGGCAGCAGTATGGGAACTTCTCCTGCTAGAATGGCTGGCAATGGCTGTG +ATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGC +AAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAA +ATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTA +AAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACC +CAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAA +ACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCG +GAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACC +TACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCA +AGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAA +CAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTTATGAAACTCAAGCCTTA +CCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGA +TTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACT +CAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATA +AACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAAT +GAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCA +CATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCC +ACCACATTTTCACCGAGGCCACTCGGAGTACGATCGAGTGTACAGTGAAC +AATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAA +TTTTAGTANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +>England/MILK-338D3D9/2022|OV784995.1|2022-01-21 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAA +CACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG +GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGC +TACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTAT +ACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCT +GAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATT +CCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGC +TTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAA +CTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGC +ACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCA +AAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACC +ATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCG +CTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT +CTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAA +TATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCAT +CTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAA +TACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGA +TCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTT +ACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGA +GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTA +GTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTG +ATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGT +TGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGG +ACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCT +TTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATT +GGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCTCCAAAAGAAATTATCTTCTTAGAGGGAGAA +ACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGA +TTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGG +TTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGAC +ACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACA +CTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAGGTGCTCTGCCTATACAGT +TGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTG +TCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATT +GATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATG +AGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACT +CAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT +GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAG +GACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTA +GTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATT +GTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAA +TGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGA +CCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTC +AACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTT +GCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTT +AAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTG +ATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATG +ATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA +ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAA +TCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCT +TAAAGAAAGATGCTTCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTT +TTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTT +ACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTG +CTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAA +TGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC +TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACT +AAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTT +GTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGC +TGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTT +CACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAA +ACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTA +AGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTT +GAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACC +TCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGT +CCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTG +TTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG +TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTG +CATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTA +CAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCGGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTA +GAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAA +AGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCT +TAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAA +TATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTG +GTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTG +TATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCC +TATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAAC +CAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTA +AGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACT +GGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGA +CTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTT +TTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAAC +AATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGA +AGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA +CCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCT +TGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAAC +CAGCAAATAATATAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATG +GCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATT +ATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTG +TTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTT +AACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCG +TGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGT +GTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACT +ACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGC +TTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTA +TAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCA +ACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTG +TACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAA +CCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGAT +TCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATC +TTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGG +CATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATC +ATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTG +GCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTA +TGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGT +TATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTA +CAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTG +TTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAA +CTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTAC +ATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGAC +CAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTG +AAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTA +TGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTA +ATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAA +TCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCA +GCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATG +TTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAAT +ACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGT +TGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCT +TATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTA +GAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACAT +AGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAG +TTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCG +CGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTACTTTGATATG +GAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATAC +GTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACT +ACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGG +TAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGT +TCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATG +TCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGA +TGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACA +AACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACT +AATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGG +TTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTG +ACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGT +TACACACCATCAAAACTTATAGAGTACACTGATTTTGCAACATCAGCTTG +TGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAG +TACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAA +AGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCA +ATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTG +ATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTT +TGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATC +TTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATA +TGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCT +ATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTACTA +TTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCT +TTAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCA +GTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGAC +ATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGG +TTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATT +TGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAG +ACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGC +TGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGAT +GTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAA +GTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTG +CTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCT +GATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTTGCA +GAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTA +TGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGAT +GACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCT +TAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCT +TGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAA +AATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAA +GTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTT +GTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCACAAT +TTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTT +TAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAAT +TACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGA +CCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTAT +TACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACA +GGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTG +GCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACT +AGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTT +CATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGT +AGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATG +CTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACAC +ACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAG +AGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACC +TTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCA +AACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACT +GTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTAT +TATGACATGGTTGGATATGGTTGATACTAGTTTTAAGCTAAAAGACTGTG +TTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAACT +GTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGAC +ACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCA +TGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACA +ACTGTCATGTTTTTGGCCAGAGGTGTTGTTTTTATGTGTGTTGAGTATTG +CCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTT +ATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTA +CTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTC +TACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAAGA +ATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGC +AAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAA +GTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAGAAT +CATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTC +TTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTC +TGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAG +AAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGT +TCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCA +GGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGT +CTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACGT +AAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGC +TAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGC +TTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATC +AACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAAC +AGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATA +CGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCAA +CAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTAT +GGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGG +CCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTA +CGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGA +CAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACTTG +CACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGT +GATGGAACTGGTACTATCTATACAGAACTGGAACCACCTTGTAGGTTTGT +TACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAG +GATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACA +GTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGT +ATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGATT +ATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTGT +ACACACACTGGTACTGGTCAGGCAATAACAGTCACACCGGAAGCCAATAT +GGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCC +ACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTAT +GTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAA +AAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTT +GTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTTT +TTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACA +GGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAA +AGTAGCTAGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAG +AAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGA +CACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAA +GGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACG +GTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATG +GCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACAC +ATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCA +ATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTA +TACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGTACA +ATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAG +ATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAA +ACCATGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTT +AATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTG +ACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGAC +TTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTGGGA +TCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATTC +TGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTACA +AGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGT +AGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGG +ATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTAT +GCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAA +ACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTTC +AAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTG +TCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTT +CTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTT +ATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAA +GTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCTAA +CCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATA +AATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAA +GATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCA +AATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAG +CTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAA +TTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGGAAC +AAGCAAATTCTATGGTGGTTGGCACAATATGTTAAAAACTGTTTATAGTG +ATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGA +GCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAA +ACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATG +AGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATAT +GTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAA +TAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTTT +TATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAA +CACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTT +TGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATAC +TCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGT +CTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAA +TGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAG +GACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGAT +GATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGG +CTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAAC +GGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAAT +CAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAAGCT +ACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTA +CTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATG +TACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAA +TTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTAT +GTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTC +TTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCACAGA +TGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACATA +AACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTA +TATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAAT +TGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCT +GTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAG +GAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTC +TGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCAC +TTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAA +GTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGT +TGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTG +TGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGCCA +CAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATCTC +AGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAA +AGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCT +ATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTG +CTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGC +CTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGT +TTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTGTAC +TGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGAAA +TTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGT +GCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACG +CACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGT +GTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGG +CGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAA +TAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTTT +ATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAA +ATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGC +TGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTT +TGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGAC +TATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAA +CAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAA +TGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATT +CCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTT +TAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTA +CACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGAC +GTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTATGAT +GGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCA +CCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTC +GAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACA +GCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATG +TTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCG +CCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACTTCC +TTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTTA +AAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAG +TTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTG +TCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATG +CCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATG +ATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGA +TCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCAA +TCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGAC +TGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGC +TTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACA +AATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTA +CCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGTGA +CAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTG +ACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGA +TATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAA +CCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATG +CATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAA +TTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAAACA +AGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATAA +CACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTAC +AGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTT +GTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAA +GACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACAC +TTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGT +TTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACAA +CATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAA +CCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGC +TAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTA +CTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGAAACG +ATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGT +AGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTG +TTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGA +GTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAA +AGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTA +GAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTA +GAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTATGC +CTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTT +TACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAA +TTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAAC +AGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTAT +TACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTT +TCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTATGCT +TTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCTA +GTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAA +AGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAAC +ATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTC +AATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATA +CATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTT +AAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAATG +ACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTA +CATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAA +GACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTT +ACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCT +ATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGGG +ACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCAT +CTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAA +ATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAA +TCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCC +TTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAAT +GATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGAAAA +CAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAACA +ATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAA +TCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACAC +GTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCA +ACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGT +TATCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCAT +TTAATGATGGTGTTTATTTTGCTTCCATTGAGAAGTCTAACATAATAAGA +GGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTAT +TGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTT +GTAATGATCCATTTTTGGACCACAAAAACAACAAAAGTTGGATGGAAAGT +GAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTC +TCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATC +TTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCT +AAGCACACGCCTATTATAGTGCGTGAGCCAGAAGATCTCCCTCAGGGTTT +TTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTA +GGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGAT +TCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCT +TCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAG +ATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTG +AAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGT +CCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCC +CTTTTGATGAAGTTTTTAACGCCACCAAATTTGCATCTGTTTATGCTTGG +AACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAA +TCTCGCACCATTTTTCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAAT +TAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGA +GGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAATATTGCTGA +TTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGA +ATTCTAACAAGCTTGATTCTAAGGTTAGTGGTAATTATAATTACCTGTAT +AGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAAC +TGAAATCTATCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTTTTA +ATTGTTACTTTCCTTTACGATCATATAGTTTCCGACCCACTTATGGTGTT +GGTCACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGC +ACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACA +AATGTGTCAATTTCAACTTCAATGGTTTAAAAGGCACAGGTGTTCTTACT +GAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGC +TGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACA +TTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAAT +ACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGT +CCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATT +CTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCT +GAATATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTAT +ATGCGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTG +TAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAAT +TCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTAT +TAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAG +ATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTG +TTGCAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAAT +AGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAAC +AAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCA +CAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGA +TCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAAT +ATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAA +AAGTTTAAAGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGAT +TGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGA +CCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCT +TATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCA +AAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACT +CACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAAC +CATAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATT +TGGTGCAATTTCAAGTGTTTTAAATGATATCTTTTCACGTCTTGACAAAG +TTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGT +TTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGC +TTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAAT +CAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCT +CAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGC +ACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAG +CACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTT +GTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACAC +ATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAG +TTTATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGAT +AAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTC +TGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCA +ATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTT +GGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTT +TATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTA +TGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGC +TGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATT +ACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAAT +TGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAG +ATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTC +GGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTC +CAAAATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTG +TTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACAC +CTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGC +TTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGC +TTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCC +AACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTA +CAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAA +GTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGG +GAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGA +CTATTACCAGCTGTACTCAACTCAATTGAGTACAGACACTGGTGTTGAAC +ATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACAT +GTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAAT +GGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAG +CACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATA +GGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATT +CTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACT +GCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTAC +TCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTA +AACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGC +CATGGCAGGTTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCC +TTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGT +CTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAA +GTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGC +TTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCA +ATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTT +CAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTA +ACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTT +CTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCG +TATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAG +AAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCT +TCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAG +GATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATA +TTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTT +TCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAG +TTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTA +TCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACC +AATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGA +TAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGT +ACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAA +TTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTA +GCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAG +TTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGT +TCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTA +TAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCAT +TAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTA +ATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGA +AACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCA +TCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACT +CAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTC +TAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAAT +TGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATC +GGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACC +TAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAG +AGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTT +AAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACTCCGCATTACG +TTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGCG +ATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGT +TCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGA +CAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTA +CTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAG +ATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT +GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGA +GGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTA +ACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAA +GGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTC +CTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTA +AACGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTT +GCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG +TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTG +AGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAAT +GTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTT +TGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGC +AAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGC +ATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGC +CATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC +TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAA +AAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACA +GAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATT +TCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCC +TAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGC +TTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTA +ACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT +TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTC +ACCGAGGCCACGCGGAGTACGATCCAGTGTACAGTGAACAATGCTAGGGA +GAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTANNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNN +>USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03 +CTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCG +TTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTT +CGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTG +ACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACAC +ACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGT +GGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCT +TAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTC +AACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCA +CCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAATGCATTCA +GTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCG +AAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAA +GGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGACTTAGG +CGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACA +CTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGA +GGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTA +CCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCAT +GCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATAC +TGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGA +AAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAAT +TTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCC +ATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTT +TATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCA +ACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACT +TCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCAC +TGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAA +ATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGA +CCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCAT +TCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTT +ATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCT +AACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCT +TAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATA +TTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCT +TTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTA +TAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAA +AAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATA +CTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATC +AATTTTCTCCCGCACTCTTGAAACTGCTAAAAATTCTGTGCGTGTTTTAC +AGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGA +CTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGT +TGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGC +TAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGAT +TGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTG +GGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGAC +AAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTT +AAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGG +TGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACT +CAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTA +CTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAAC +ACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATT +TACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTT +GGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACAC +AGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCT +TCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACT +GTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAACTTGA +TGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTG +AACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTC +ATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGA +TCTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTG +AGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAG +GATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCA +ATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTG +GTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGG +TTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGA +CAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAG +AGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGT +GGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGT +GGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATG +TTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACT +AACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACC +ACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAAC +ACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAA +CTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGC +ACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAA +GAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGAT +AAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGA +AAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGC +CATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGAT +AAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAAC +TAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATC +TTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTA +AAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTT +AACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAATGC +TAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTAC +CCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCT +TAAAAAGTGTAAAAGTGCTTTTTACATTCTACCATCTATTATCTCTAATG +AGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTT +GCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAA +AGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAG +AGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACA +ACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGT +TACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTG +CTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCA +CCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAAC +ACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAG +ATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAG +AGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCT +AGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGA +GAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTC +CACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCC +AACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCAC +ATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTT +GAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTA +CATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATG +GTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCA +TTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACA +AGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCAC +TTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGA +GAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAG +AGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTA +AGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTT +AAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATA +TCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTC +AGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGT +AATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTA +TTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTA +TTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCA +GTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTT +GGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTG +ATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAG +TTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGG +TTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACT +TAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTT +AAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAA +TGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTC +TTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAG +TCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACC +AGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTG +AGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCA +GCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAAT +GGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAAT +TATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCT +GTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCT +TAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACC +GTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTG +TGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGAC +TACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGG +CTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATT +ATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTC +AACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACT +GTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCA +ACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGA +TTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCAT +CTTTTAAATGGGATTTAACTGCTTTNNNNNNNNNNNNNGAGTGGTTTTTG +GCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAAT +CATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTT +GGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCT +ATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAG +TTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTT +ACAAACGTAATAGAGCAACAAGAGTTGAATGTACAACTATTGTTAATGGT +GTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAA +ACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTA +CATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGA +CCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGT +GAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTT +ATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCT +AATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAA +ATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTC +AGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGAT +GTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAA +TACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAG +TTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTC +TTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGT +AGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACA +TAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAA +GTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGC +GCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATAT +GGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATA +CGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAAC +TACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTG +GTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTG +TTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCAT +GTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTG +ATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAAC +AAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATAC +TAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGG +GTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGT +GACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTG +TTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTT +GTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCA +GTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGA +AAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTC +AATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTT +GATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGT +TTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGAT +CTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTAAT +ATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATC +TATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTGCCTACT +ATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCC +TTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACACC +AGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGA +CATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATG +GTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCAT +TTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGA +GACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCG +CTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGA +TGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATA +AGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCT +GCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTC +TGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTTGC +AGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGT +ATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGA +TGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATATGC +TTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTC +TTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCA +AAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTA +AGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCT +TGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACACAA +TTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTT +TTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAA +TTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGG +ACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTA +TTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGAC +AGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGT +GGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATAC +TAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCT +TCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGG +TAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAAT +GCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACA +CACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCA +GAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTAC +CTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTC +AAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCAC +TGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTA +TTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAAGACTGT +GTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAAC +TGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGA +CACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCC +ATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTAC +AACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATT +GCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTT +TATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTT +ACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTT +CTACACAGGAGTTTAGATATATGAATTCACAGGGACTATTCCCACCCAAG +AATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGG +CAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAA +AGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAGAA +TCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCT +CTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTT +CTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAA +GAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAG +TTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAAC +AGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGAAG +TCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACG +TAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGG +CTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATG +CTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTAT +CAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAA +CAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAAT +ACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCA +ACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTA +TGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGG +GCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACT +ACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATG +ACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACTT +GCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAG +TGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTTTG +TTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAA +GGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCAC +AGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTG +TATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGAT +TATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTG +TACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATA +TGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGC +CACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTA +TGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTA +AAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGT +TGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTT +TTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCAC +AGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATA +AAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAA +GAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAG +ACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTA +AGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGAC +GGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAAT +GGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACA +CATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTC +AATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGT +ATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGTAC +AATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTA +GATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACA +AACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGT +TAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTT +GACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAACATGA +CTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTGGG +ATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATT +CTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTAC +AAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTG +TAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAG +GATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTA +TGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATA +AACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTT +CAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGT +GTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCT +TCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGT +TATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGA +AGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCTA +ACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAAT +AAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCA +AGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTC +AAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTA +GCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAA +ATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGGAA +CAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGT +GATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAG +AGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCA +AACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAAT +GAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATA +TGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTA +ATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTT +TTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACA +ACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACT +TTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATA +CTTTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGG +TCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACA +ATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAA +GGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGA +TGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCG +GCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAA +CGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAA +TCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAAGC +TACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTT +ACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTAT +GTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCA +ATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTA +TGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGT +CTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCACAG +ATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACAT +AAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTT +ATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAA +TTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACC +TGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGA +GGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGT +CTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCA +CTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAA +AGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTG +TTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTT +GTGCTGACATCACATACAGTAATACCATTAAGTGCACCTACACTAGTGCC +ACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATCT +CAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAA +AAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGC +TATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTT +GCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTG +CCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTG +TTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTGTA +CTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGAA +ATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTATG +TGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCAC +GCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTG +TGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCG +GCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATA +ATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTT +TATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACA +AATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAG +CTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATT +TTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGA +CTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAA +ACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATA +ATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAAT +TCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCT +TTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCT +ACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGA +CGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTATGA +TGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATC +ACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGT +CGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTAC +AGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTAT +GTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACC +GCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACTTC +CTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTT +AAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGA +GTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTT +GTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTAT +GCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTAT +GATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATG +ATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCA +ATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGA +CTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGG +CTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGAC +AAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGT +ACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGTG +ACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCT +GACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAG +ATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTA +ACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACAT +GCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACA +ATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAAAC +AAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATA +ACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTA +CAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCT +TGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACA +AGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACA +CTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTG +TTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACA +ACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAA +ACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTG +CTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCT +ACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAATTGAAAC +GATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAG +TAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTAGT +GTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGG +AGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGA +AAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGT +AGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTT +AGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTATG +CCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGT +TTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGA +ATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAA +CAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTA +TTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGT +TTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTATGC +TTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCT +AGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCA +AAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAA +CATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGT +CAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTAT +ACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTT +TAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAAT +GACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGT +ACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTA +AGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACT +TACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGC +TATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGG +GACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCA +TCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACA +AATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAA +ATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCC +CTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAA +TGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGAAA +ACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAAC +AATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTA +ATCTTATAACGAGAACTCAATCATACACTAATTCTTTCACACGTGGTGTT +TATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGA +CTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATCTCTG +GGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGAT +GGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGAT +TTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATA +ACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGAT +CCATTTTTGGATGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAG +TGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCT +CTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAAT +CTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTC +TAAGCACACGCCTATTAATTTAGGGCGTGATCTCCCTCAGGGTTTTTCGG +CTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTT +CAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTC +TTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAAC +CTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCT +GTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATC +CTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAAC +CAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTT +GATGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAG +GAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTTCG +CACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAAT +GATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTAA +TGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTATA +ATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCT +AACAAGCTTGATTCTACGGTTGGTGGTAATTATAATTACCGGTATAGATT +GTTTAGGAAGTCTAAACTCAAACCTTTTGAGAGAGATATTTCAACTGAAA +TCTATCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTGTTAATTGT +TACTTTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTTGGTCA +CCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAG +CAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGT +GTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTC +TAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACA +CTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACA +CCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTC +TAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTG +TTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACA +GGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAATA +TGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCG +CTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTGTAGCT +AGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGT +TGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTG +TTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGT +ACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCA +ATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAATAGCTG +TTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATT +TACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCACAAAT +ATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTAC +TTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGT +GATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTT +TAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTC +AATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTT +GGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAG +GTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAAT +TGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTT +TCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCATAA +TGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATTTGGTG +CAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAG +GCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCA +GACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTG +CTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAA +AGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTC +AGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAG +AAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACAC +TTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACATACTGGTTTGTAAC +ACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTG +TGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTAT +GATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGATAAATA +TTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCA +TTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAG +GTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAA +GTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAG +CTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACC +AGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAA +ATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATT +ACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAA +CTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTT +GTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATG +GCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAA +TCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTGTTCAC +TTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTT +GCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAG +TCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTTGG +CTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTA +TTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATA +GTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAAGTCCT +ATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATC +TGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATT +ACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAACATGTT +ACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCA +AATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAAC +CAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAA +GCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATAGGTAC +GTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTTGTGGTATTCTTGC +TAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGC +AATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCG +TGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGA +ACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGG +CAAATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAA +GAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCT +ACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAA +TTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTGCT +GCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGC +TTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTTCAGAC +TGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATT +CTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGA +AAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTG +CTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATC +ACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCA +GCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTG +GCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCT +TTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGG +TTACTATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCC +ATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAA +GTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGG +AGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACA +CTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAAC +AGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCAC +CATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACT +CAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACG +TGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAG +AACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTATAACA +CTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATT +GACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTAATTAT +GCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTT +GTCACGCCTAAATGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACA +ACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACA +TCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAAT +GGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGC +GTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAA +TTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAAT +TGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTAT +CATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAATG +TCTGATAATGGACCCCAAAATCAGCGAAATGCACTTCGCATTACGTTTGG +TGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGCGATCAA +AACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACC +GCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGG +CGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACC +GAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTC +AGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGACT +TCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGATGGAG +CCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAAT +GCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTT +CTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCAT +CACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAACGA +ACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTTGCTTT +GCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGGTAAAG +GCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCT +TCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAAC +ACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGG +ACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATT +GCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGG +CATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGCCATCA +AATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAAT +AAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGA +CAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGA +AACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCC +AAACAATTGCAACAATCCATGAGCCGTGCTGACTCAACTCAGGCCTAAAC +TCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTC +CGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTAC +ATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAAT +CAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCTA +CAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGT +AAAATTA +>USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30 +ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTTGATCTC +TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAA +CACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG +GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGC +TACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTAT +ACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCT +GAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATT +CCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGC +TTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAA +CTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGC +ACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCA +AAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACC +ATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCG +CTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT +CTTAATGACAACCTTCTTGAAATACTTCAAAAAGAGAAAGTCAACATCAA +TATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCAT +CTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAA +TACTGAGCCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGA +TCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTT +ACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGA +GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTA +GTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTG +ATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGT +TGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGG +ACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCT +TTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATT +GGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAA +ACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGA +TTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGG +TTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGAC +ACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACA +CTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGT +TGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTG +TCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATT +GATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATG +AGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACT +CAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT +GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAG +GACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTA +GTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATT +GTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAA +TGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGA +CCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTC +AACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTT +GCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTT +AAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTG +ATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATG +ATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA +ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAA +TCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCT +TAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTT +TTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTT +ACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTG +CTTAAAAAGTGTAAAAGTGCTTTTTACATTCTACCATCTATTATCTCTAA +TGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC +TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACT +AAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTT +GTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGC +TGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTT +CACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAA +ACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTA +AGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTT +GAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACC +TCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGT +CCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTG +TTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG +TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTG +CATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTA +CAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTA +GAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAA +AGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCT +TAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAA +TATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTG +GTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTG +TATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCC +TATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAAC +CAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTA +AGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACT +GGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGA +CTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTT +TTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAAC +AATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGA +AGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA +CCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCT +TGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAAC +CAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACMCAGATCTA +ATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTT +CTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAA +CCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAAT +TGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCG +ACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATA +TTATAATTTGGTTTTTACTATTAAGTGTGTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTA +CTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTG +CAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTA +GATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTT +TGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCA +ATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAG +CTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAA +AGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATG +GTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGC +AAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAG +TACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACA +GTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAG +CTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGT +AAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAG +TCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTG +ATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATG +TCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGAT +GTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGA +CATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACA +AAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGT +GCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAA +TACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCA +ACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGG +TGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTG +TGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTC +ATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA +ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTAT +ACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGT +GGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATG +GTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATC +TGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGC +CAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT +GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTAT +TCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTT +TTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGT +GTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTA +ATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCA +TCTATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTGCCTA +CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTG +CCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACA +CCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGA +TGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATC +ATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAA +GAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG +CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGT +GATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAG +CTGCTTGTTGTCATCTTGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT +TCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTT +GCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTT +GTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT +GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATT +TCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATG +CAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACC +TAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAG +CTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACAC +AATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGG +AATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTAT +GGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAAC +TATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAG +ACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTT +GTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG +CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTG +GGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACA +ATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTA +CACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTC +CAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTG +TCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC +ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCG +TATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAAGACT +GTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGA +ACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTT +GACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTT +CCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTT +ACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTA +TTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAG +TTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGT +TTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGT +TTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCA +AGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGT +GGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGT +AAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAG +AATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATT +CTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACT +TTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTG +AAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTT +AGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGA +ACAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGA +AGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAA +CGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACA +GGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAA +TGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATT +ATCAACAATGCAAGAGATGGTTGTGTTCCCTTAAACATAATACCTCTTAC +AACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAA +ATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATC +CAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAG +TATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAA +GGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCA +CTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGA +TGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTAC +TTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAG +AGTGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTT +TGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTA +AAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCC +ACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAAC +TGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAG +ATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTG +TGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAA +TATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTT +GCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAG +TATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACT +TAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTA +GTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCG +TTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGC +ACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGA +TAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCC +AAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAG +AGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACT +TAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAG +ACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACA +ATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGA +CACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATT +TCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGC +GTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGT +ACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACAT +TAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATA +CAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATT +GTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATG +TTGACACTGATTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATAT +GACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTG +GGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCA +TTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTT +ACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATT +TGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATC +AGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTG +TATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGA +TAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTT +TTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCT +GTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTT +CTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATC +GTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTT +GAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGC +TAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTA +ATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGAT +CAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAAC +TCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCG +TAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAA +AAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGG +AACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATA +GTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGAT +AGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCG +CAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTA +ATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTA +TATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGC +TAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCAC +TTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTA +CAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGA +CTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGA +TACTTTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAA +GGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAA +CAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTA +AAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGT +GATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGC +CGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTG +AACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCT +AATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAA +GCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGC +TTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCT +ATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTG +CAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCT +TATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTA +GTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCAC +AGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCAC +ATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGT +TTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGC +AATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACA +CCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACT +GAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCT +GTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCAC +CACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGT +AAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGC +TGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATT +TTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTG +CCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATAT +CTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGC +AAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTT +GCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGC +TTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATT +TGCCTATAGATAAATGTAGTAGAATTGTACCTGCACGTGCTCGTGTAGAG +TGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTG +TACTGTAAATGCATTGCCTGAGACGACAGTAGATATAGTTGTCTTTGATG +AAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTA +TGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACC +ACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAG +TGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGT +CGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA +TAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGT +TTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCA +CAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAA +AGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGA +TTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATAT +GACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGT +AAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCA +TAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAA +ATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACT +CTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCAC +CTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTT +GACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTAT +GATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTA +TCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGAT +GTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTT +ACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTT +ATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCA +CCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACT +TCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACAC +TTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTT +GAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTG +TTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTT +ATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTT +ATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCA +TGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATG +CAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTT +GACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGC +GGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAG +ACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGT +GTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAG +TGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATT +CTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGAT +AGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATC +TAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAAC +ATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAA +CAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAA +ACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTA +TAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAG +TACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAG +CTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTA +CAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGA +CACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACAC +TGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAA +CAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATT +AAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGC +TGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATAT +CTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAATTGAA +ACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCA +AGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTA +GTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAAT +GGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAA +GAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGA +GTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTC +TTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTA +TGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTG +GTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTT +GAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCAT +AACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATT +TATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTA +GTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTAT +GCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAAT +CTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTATAAAATG +CAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGC +AACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGT +GTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTT +ATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGT +TTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTA +ATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACT +GTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCC +TAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCA +CTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTG +GCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCAT +GGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCAT +CATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAA +CAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATAC +AAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTC +CCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATC +AATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGA +AAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGA +ACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGT +TAATCTTATAACCAGAACTCAATCATACACTAATTCTTTCACACGTGGTG +TTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAG +GACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGTTTCCATGCTATCTC +TGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATG +ATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGG +ATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAA +TAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATG +ATCCATTTTTGGATGTTTATTACCACAAAAACAACAAAAGTTGGATGGAA +AGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGT +CTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAA +ATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATAT +TCTAAGCACACGCCTATTAATTTAGGGCGTGATCTCCCTCAGGGTTTTTC +GGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGT +TTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCT +TCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCA +ACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATG +CTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAA +TCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCA +ACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTT +TTGATGAAGTTTTTAACGCCACCACATTTGCATCTGTTTATGCTTGGAAC +AGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTT +CGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAA +ATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGT +AATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTA +TAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATT +CTAACAAGCTTGATTCTAAGGTTGGTGGTAATTATAATTACCGGTATAGA +TTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGA +AATCTATCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTGTTAATT +GTTACTTTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTTGGT +CACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACC +AGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAAT +GTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAG +TCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGA +CACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTA +CACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACT +TCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCC +TGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTA +CAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAA +TATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATG +CGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTGTAG +CTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCA +GTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAG +TGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATT +GTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTG +CAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAATAGC +TGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAA +TTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCACAA +ATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCT +ACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATG +GTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAG +TTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGC +TCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCT +TTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTAT +AGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAA +ATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCAC +TTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAT +AATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATTTGG +TGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTG +AGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTG +CAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTC +TGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAA +AAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAG +TCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACA +AGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCAC +ACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTA +ACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATT +TGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTT +ATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGATAAA +TATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGG +CATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATG +AGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGA +AAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTAT +AGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGA +CCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGC +AAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACA +TTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGG +AACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATT +TTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGA +TGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAA +AATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTGTTC +ACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTT +TTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTT +AGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTT +GGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAAC +TATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAA +TAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAAGTC +CTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAA +TCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTA +TTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAACATG +TTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTC +CAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGA +ACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCAC +AAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATAGGT +ACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTT +GCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCT +GCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCT +CGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAAC +GAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCAT +GGCAAATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTG +AAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTT +CTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTT +AATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTG +CTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATG +GCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTTCAG +ACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACA +TTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTA +GAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTAT +TGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAA +TCACTGTTGCTACGTCACGAACGCTTTCTTATTACAAATTGGGAGCTTCG +CAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAGGAT +TGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTG +CTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCA +GGTTACTATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTT +CCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCT +AAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAAT +GGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAA +CACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACA +ACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTC +ACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCA +CTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTA +CGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCA +AGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTATAA +CACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAA +TTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTAATT +ATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAAC +TTGTCACGCCTAAATGAACATGAAATTTCTTGTTTTCTTAGGAATCATCA +CAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAA +CATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAA +ATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGT +GCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGT +AATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAA +ATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGT +ATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAA +TGTCTGATAATGGACCCCAAAATCAGCGAAATGCATTCCGCATTACGTTT +GGTGGGCCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGCGATC +AAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCA +CCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAA +GGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTA +CCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATC +TCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGA +CTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGG +AGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACA +ATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGC +TTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTC +ATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAAC +GAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTTGCT +TTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGGTAA +AGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGG +CTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTA +ACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGG +GGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAA +TTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATT +GGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGCCAT +CAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGA +ATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAG +GACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAA +GAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCT +CCAAACAATTGCAACAATCCATGAGCCGTGCTGACTCAACTCAGGCCTAA +ACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTT +TCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACT +ACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTA +ATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACC +TACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGT +GTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTT +AGGAGAATAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24 +TCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTC +ACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTA +CTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTA +CGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCG +GGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGA +AAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTC +GTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCA +ACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTT +TGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGA +ACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGG +CATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATG +TGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGT +AATAAAGGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGA +CTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACT +GGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTT +AACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGA +TGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAG +CTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGT +GTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACG +TTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAA +AGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTA +AATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGA +TGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATG +AATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGT +GAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTG +TGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTAC +CCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAA +GTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAA +AATCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGT +TCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCT +AGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGA +AGGTCTTAATGACAACCTTCTTGAAATACTTCAAAAAGAGAAAGTCAACA +TCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTG +GCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTT +GGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAG +TTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAA +TCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGT +ACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTG +TTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCA +CTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAA +TCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGC +AGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTC +CTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGA +CGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCG +GTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACA +TTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCAT +TATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCA +CGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACT +GGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGG +AGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTG +GTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCA +TTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAA +AGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACA +ATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGAT +GACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGA +ACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATA +CAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGAT +GCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGG +CATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGT +CTGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCA +GATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATC +AACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGG +AATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAA +GATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAG +TGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTC +AATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGT +TTTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGA +CATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAG +CCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAG +GCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAA +TGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTG +CTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGAC +ATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCT +ACTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATT +CTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTC +TTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAA +GAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAG +TTAAGCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAA +GATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGA +AGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATG +GCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACT +TTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGG +TGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTG +AAATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACC +ACTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGAC +AGTGCTTAAAAAGTGTAAAAGTGCTTTTTACATTCTACCATCTATTATCT +CTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAA +ATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGA +AACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAA +TACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGT +AAAACAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAAC +TCTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAG +AAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTT +TCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTC +TAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCT +ATAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTT +CTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATT +CCACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTT +CTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATT +AACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTT +TGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATA +ATTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTA +CGTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGG +TAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAG +TTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCC +ACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGC +TCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTT +GTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGAT +GTTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTG +CAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAA +CCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAA +CAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTAC +AAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCAC +CTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTAC +ACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAAC +TTTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAG +GTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATA +AAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCC +TAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAAC +CAATTGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAAT +TTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTT +AACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCC +CTGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCC +TCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGT +TAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATAC +GTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGCTGTA +CTGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCT +AAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACG +TTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTT +AAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGA +TCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTA +ATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTA +GCTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCC +TTTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTT +TAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTA +CAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTAT +GCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTC +TAGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATA +AATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAAT +CTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTT +CTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACT +ATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGG +TTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCA +TTTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGG +TTTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGC +TGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTA +ATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATT +TCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATG +GAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGA +TGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTT +AATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTT +TTGCAAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTG +GTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTT +AAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGT +TACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAA +AGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTG +AGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGA +TGGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACT +ACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTG +TCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTA +CGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAA +CACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGAC +AATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTC +AGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAAT +CTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTAT +AACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTG +TAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTT +TGATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAA +CAAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATG +TGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTA +AGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACA +CTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCA +TGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGG +CTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTT +GCTAACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAG +TTATACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAG +AAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACT +AATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAA +CATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACAT +CAGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGT +AAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGC +TTATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTA +TTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACA +ACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGC +TGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATT +ACAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTT +ACTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATC +AGCATCTATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTG +CCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTA +GTTGCCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTT +AACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGT +ACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAG +TGGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTA +TATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACC +TAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAA +GCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCG +TAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTT +ATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGA +GAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTC +AGGTTCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTG +TTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAG +GGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTG +GCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAG +ATATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCAT +AATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTC +TATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGA +CACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTG +TTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAG +ACACAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTG +TTGGTTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCAT +ATGGAATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTT +TTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACA +CAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAAT +GGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAA +CCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTG +ACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATG +TGTGCTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCAT +ATTGGGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTA +GACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAG +GGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTT +AGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCT +TTTTACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATG +TTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCT +TGCCACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGA +TGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAA +GACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGC +AAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATG +TCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCC +ATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGT +AGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTG +AGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATG +CTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTT +TTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACT +TAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCA +CCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGT +TGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAG +ATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGA +GTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGA +CATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCAC +TACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTT +TGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGA +GTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTT +ATGAACAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTG +AAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCAT +GCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATA +AACAAGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAG +ACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAA +CATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTC +TTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATAT +AAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGA +AATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAA +TTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCT +TTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGT +TGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCA +CTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTT +GTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCC +TAAGAGTGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTA +GGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTT +ATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGC +TGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATT +CAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTAC +AAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGAT +GTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAG +CCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGC +CGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGG +TAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTA +CACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGC +TGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACA +ATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTG +CGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACA +ATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGC +TTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGT +TAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATT +TACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGA +ATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATA +CACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATT +GTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGAT +TATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATT +ACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAA +CAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTG +ACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTT +CATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATT +CATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCA +CATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAA +ATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAAT +ATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGA +TGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCC +ACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTC +CATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACAT +AATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACT +TGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTAC +TAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTT +GCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTT +TGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAAC +ACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTAC +TATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGT +AGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTA +ATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCA +TTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGA +GGATCAAGATGTACTTTTCGCATATACAAAACGTAATGTCATCCCTACTA +TAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGC +ACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCA +TCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAA +TTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTT +TATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATG +TGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTG +CTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTA +GCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTC +ACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTT +ATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAAT +GCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAA +TTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACA +CAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATG +ATGATACTTTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATC +TCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATC +AAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTT +ACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACA +GGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAG +GGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATG +ATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACA +TCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAA +GAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTT +ATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGA +GGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTC +TTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCA +TTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAA +ATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATG +TCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAA +TCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTT +TGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTA +ATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCT +AACACCTGTAATGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGC +TACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAG +TGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGA +CCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAA +CAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTG +ATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGAT +TATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACT +AGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCA +ATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGT +ATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCA +TTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATA +CAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAA +TATTTGCCTATAGATAAATGTAGTAGAATTATACCAGCACGTGCTCGTGT +AGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCT +TTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTT +GATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAG +ATTATGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTG +CACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAAT +TCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAAC +TTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTT +ATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAA +ATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAG +GCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGA +GAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCA +AAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGA +ATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTA +ATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTT +TGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCT +TGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAG +GACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAG +GCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATG +TGTTGACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCT +CTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATG +TTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTT +CGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTAC +CTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACA +GGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAA +ACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAG +GACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGAC +ACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGG +CTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCA +CCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGAC +ACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCC +GTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCA +ACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGT +GATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCG +TGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTA +ATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTA +GCAGATAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAA +GTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTT +GTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACA +CATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGT +CGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGC +TATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAAT +AAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTT +AAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATG +GAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACG +TGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAA +TGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCT +TTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACT +TTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAA +GGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATA +ACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAAT +AAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAA +CATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACA +TTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACAT +ATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAAT +TGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATG +GTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAG +GGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCT +TAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATT +ATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACT +CAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGA +TTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAG +GCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTA +GGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACC +TTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATT +TCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATT +GATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATC +TGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCAT +TTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTA +CAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAA +AATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATA +GTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAA +CTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAG +AGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAG +CTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGAT +CTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGC +AACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACG +ACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTT +TTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTC +CGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGC +TCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCG +TCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACG +CGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGA +ATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAA +TTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCA +AATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTA +GAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAA +ACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGT +GTGTTAATCTTATAACCAGAACTCAATCATACACTAATTCTTTCACACGT +GGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAAC +TCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTA +TCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTT +AATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGG +CTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTG +TTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGT +AATGATCCATTTTTGGATGTTTATTACCACAAAAACAACAAAAGTTGGAT +GGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAAT +ATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTC +AAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAAT +ATATTCTAAGCACACGCCTATTAATTTAGGGCGTGATCTCCCTCAGGGTT +TTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACT +AGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGA +TTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATC +TTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACA +GATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTT +GAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAG +TCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGC +CCTTTTGATGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTG +GAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATA +ATTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAA +TTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAG +AGGTAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTG +ATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGG +AATTCTAACAAGCTTGATTCTAAGGTTGGTGGTAATTATAATTACCGGTA +TAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAA +CTGAAATCTATCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTGTT +AATTGTTACTTTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGT +TGGTCACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATG +CACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAAC +AAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTAC +TGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTG +CTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGAC +ATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAA +TACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAG +TCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTAT +TCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC +TGAATATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTA +TATGCGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGT +GTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAA +TTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTA +TTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTA +GATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTT +GTTGCAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAA +TAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAA +CAAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTC +ACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAG +ATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAA +TATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACA +AAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGA +TTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGG +ACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGC +TTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACC +AAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGAC +TCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAA +CCATAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAAT +TTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAA +GTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAG +TTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAG +CTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAA +TCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCC +TCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTG +CACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAA +GCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTT +TGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACA +CATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACA +GTTTATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGA +TAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCT +CTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTC +AATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACT +TGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTT +TTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGT +ATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTG +CTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAAT +TACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAA +TTGGAACTGTAATTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCA +GATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTT +CGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTT +CCAAAATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGT +GTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACA +CCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATG +CTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGG +CTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGC +CAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTT +ACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACA +AGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATG +GGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAG +ACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAA +CATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACA +TGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAA +TGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAA +GCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGAT +AGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT +TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTAC +TGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTA +CTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCT +AAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAG +CCATGGCAAATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTC +CTTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTG +TCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA +AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTG +CTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGC +AATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTT +TCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACT +AACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCT +TCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTC +GTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACTTGCCTAAA +GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGC +TTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACA +GGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAAT +ATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACT +TTCAGGTTACTATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAA +GTTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTT +ATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC +CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTG +ATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGG +TACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATATGAGGGCA +ATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTT +AGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCA +GTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAG +TTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT +ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCA +TTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTT +AATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATG +AAACTTGTCACGCCTAAATGAACATGAAATTTCTTGTTTTCTTAGGAATC +ATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTAC +TCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATT +CTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA +TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATAT +CGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAAC +CTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTA +GAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACT +TAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACTCCGCATTAC +GTTTGGTGGGCCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGC +GATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGG +TTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGG +ACAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCT +ACTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAA +GATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGC +TGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTG +AGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCT +AACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAA +AGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTT +CCTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGT +AAACGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCT +TGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTG +GTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCT +GAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAA +TGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATT +TTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCG +CAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCG +CATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTG +CCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTG +CTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAA +AAAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGAC +AGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGAT +TTCTCCAAACAATTGCAACAATCCATGAGCCGTGCTGACTCAACTCAGGC +CTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCG +CTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGT +AACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATC +TTTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTT +CACCTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTA +ATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATAGCT +TCTTAGGAGAATAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31 +AACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT +GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAAT +AACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCA +GGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGG +TTTTGTCCGGGTGTGACCGAAAGNNNNNNNGGAGAGCCTTGTCCCTGGTT +TCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGC +GACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGA +GGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAA +AAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCG +GATGCTCGAACTGCACCTCATGGTCATNNNNTGGTTGAGCTGGTAGCAGA +ACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTG +TCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGT +AAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAA +GTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTC +AAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATG +CGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTG +TGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTG +CTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACT +AAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTA +CACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTA +AATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTA +TTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAA +AAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGT +CACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGAT +CATTGTGGTGAAACTTCGTGGCAGACGGGCGATTTTGTTAAAGCCACTTG +CGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTG +GTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCAC +AATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATC +TGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAG +GCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTT +CCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGA +AGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGA +AAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCC +ATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTNNNNNNNNN +NNNNNNNNNNNATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTA +ATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGT +GAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGC +TCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATT +CTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCA +CAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGC +TACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGT +TGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTC +AAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTT +TCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTG +AAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGT +GTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGA +CTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAA +CATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGA +GAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTT +CTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCT +TGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTT +GAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCT +CGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGG +TAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACT +TTTGGTGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCT +CTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTT +GTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTAC +ACCACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTAT +TTGATGAGTCTGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTT +TACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTT +TGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTA +AACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAG +CAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACA +AGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGG +TTCAACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAA +GTGAATAGTTTTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAA +AAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTG +TTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCC +TTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACAT +AGCTACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGAC +ACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAA +GGTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCA +TGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACC +CTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTAC +TTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTT +GGAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTTCTA +AAGAGGAAGTTAAGCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAG +AGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAAC +AACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTG +ACATTAATGGCAATCNNNNNNNNNNNNNNNNNNNNNTTGTTAGTGACATT +GACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGT +TCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTG +GCACTACTGAAATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAAT +TATATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGA +GGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCAT +CTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAAT +TTGCGAGAAATGCTTGNNNNNNNNNNNNNNNCACGCAAATTAATGCCTGT +CTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGG +GTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTT +TACACCAGTAAAACAACTGTAGCGTCACTTATCAACACACTTAACGATCT +AAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAA +ATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACA +GTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTAC +TTCTTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTG +CTGGTTCCTNNAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGT +ATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCC +TACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGA +CACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTA +GACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGG +ACAACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAA +AACCTCATAATTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGAT +GACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAG +TNNTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAAT +ACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGT +TATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAA +TCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTG +CTAACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGANNNNNNNNNNN +NNNNNNNNNNTTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTT +AGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAAC +AGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTT +TCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAA +ACAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGT +CAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCT +AGTGAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTC +TAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAG +AATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACA +ACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGA +AATTGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCA +CAGAGCAACCAATTGATCTTGTACCAAACCAACCATATCCGAACGCAAGC +TTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTT +AAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTA +CATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACAC +TACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGT +TTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCT +GGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCG +TTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTG +CGAAGATCTAANACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATAC +AGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGAC +ATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGG +CCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTA +AGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGTTACT +CATGNNNNNNCTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTA +TGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTA +CACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACT +TTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAA +AGCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTA +AATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCT +AAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGG +TTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAG +GCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACT +AATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTG +TCTTGGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATAC +AAATTACCATTTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTT +GCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACT +TGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATT +TTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATG +GCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTA +TTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAA +CTTGTATGATGTGTTANNNNNNNNNNNNNNNNNNNAGAGTCGAATGTACA +ACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGG +TAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGATACAT +TCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCA +CTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGT +TNNTAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAG +CTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTA +GACAACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTAT +AGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGT +CTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAG +GCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTT +TGATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAA +AACTCAAAACACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTG +TCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTT +TGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGT +CACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATG +CTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTG +TATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACA +ACATTGCTTTGATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAA +CTACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAA +GTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGA +TAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATT +AAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAAC +ACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAG +GATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGAT +ACTTGTTTTGCTAACAAACATGCTGATTTTGACACATGGNNNNNNNNNNN +NNNNNNNNNNNNNNNTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCA +TAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTA +CGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGC +AGTTGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACT +TTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGAT +GCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGG +TTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGG +ATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGA +GTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAG +ATCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACA +ATGATTATTACAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTA +AATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTT +GGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAA +CATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATAC +AGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCACTGT +ACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTA +TTTACTTGTACTTGACATTTTATCTTGCTAATGATGTTTCTTTTTTAGCA +CATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAAC +AATTGTTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTA +GTAATTACCTAAAGAGCCGTGTAGTCTTTAATGGTGTTTCCTTTAGTACT +TTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCT +AAAGTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACT +TAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACT +AGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTT +CAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAACCTCTATCA +CCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGT +AAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAA +CGGTCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCA +CCTCTGAAGACATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAG +TCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTAT +TGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCA +ATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACT +TTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATG +TGCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCAT +GTGGTAGTGTTGGTTTTAACATAGATTATGACTGTGTCTCTTTTTGTTAC +ATGCACCATATGGAATTACCAACTGGAGTTCATGCTGGCACAGACTTAGA +AGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTG +GTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCT +GTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNAGTACAATTATGAACCTCTAACACAAG +ACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTT +TTAGATATGTGTGCTTCATTAAAAGAATTACTGCAAAATGGTATGAATGG +ACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTG +ATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGA +ACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACT +TTTAGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATG +AAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTT +GCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTT +ACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTA +GTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTG +AAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCT +TATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACAC +TTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTA +GATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTA +CTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTA +TGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAG +TGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTT +TGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTT +ATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGA +CTACTCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATT +GTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTA +AAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAA +CAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTT +ACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAA +TGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATA +AACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTAT +AGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTC +AAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTT +AAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGA +TGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCC +AAATGTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGT +GCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGC +ACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACA +TAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTAT +AACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGC +ATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAAC +TTAGTGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATT +GTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCT +TAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAA +CTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGA +GGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGC +TAGATTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAAC +CACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTAT +TTATACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGG +TAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGC +CTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCT +AAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTG +TGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTA +CACCGGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGT +CTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGA +CTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTG +TGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAA +GGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGC +TGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCT +TACACCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTG +ACATCTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAAT +TGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTA +CTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAA +TTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTT +AAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCT +TACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATG +AAGGTAATTGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGT +GATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCC +AGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTT +TGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTT +GGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTT +CGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATT +CTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACT +GCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGA +TTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTT +ATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTG +GATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTAC +AGTGTTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTG +ATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGT +GTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAA +GGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTA +ATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACT +AACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTT +CTATGACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTG +AATTAAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGAT +TATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACT +ACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTG +GCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCT +GGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAAT +GAGTTATGAGGATCAAGATGCACTTTTCGTATATACAAAACGTAATGTCA +TCCCTACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAAT +AGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAG +ACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTA +CTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTA +AAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTA +TCCTAAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCAC +TTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTC +TATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTG +TGGCAGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCA +CAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCC +AATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTA +TGTCCGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAG +ATGTTGACACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACAT +TTCTCAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCAC +TTATGCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTC +TTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAG +ACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCT +AGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAA +GAATCCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGT +ACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACT +TACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTAC +AATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATG +TATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGA +GTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGG +CTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATA +CGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAAC +ATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAG +GTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTAT +TATTGTAAATCACATAAACTACCCATTAGTTTTCCATTGTGTGCTAATGG +ACAAGTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTA +CTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTAC +ATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAAC +GCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTG +TACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGT +AAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGT +AACTAAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTG +ACTATGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAAT +GTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGC +ACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACC +CAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAA +AAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGG +TAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCA +TAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAG +GCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACG +TGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAAC +AGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATA +GTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGT +CAATGCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTC +AATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAA +TATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTT +CCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTG +CTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAA +TGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGC +AATTAACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACC +CTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCT +GTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACA +GGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTC +ACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTA +GGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATT +TACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAA +ATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACAT +CCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGA +AGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAA +GACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTAC +CCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATG +GATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTA +CCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCT +GTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGT +TAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTA +TGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATG +TTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATG +GGCACATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGAC +CTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACT +GCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGT +CTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACC +TACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTA +GCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTT +TGTTAAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAAC +TGAAGATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCT +GCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAA +AGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATG +CACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCT +TATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAA +TTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACA +CTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTG +TATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTT +TGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTG +AGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAG +TCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACA +TCATGCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCT +CAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTC +TGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGT +TGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTA +TCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTG +TTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGC +TAAGCGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGG +GTGTGGACATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCT +CCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAA +GAAACCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTA +GAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTT +ATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACA +AGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGT +TCAATTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACT +TACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAAT +GGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATA +AATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCAT +AGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAA +GGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTA +AAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGT +TCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCA +AGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAG +AAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTAC +CCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAA +TCTTTACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATT +ATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAA +TATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTA +TAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCAC +CAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTC +GATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGG +TGATTGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTG +ATATGTACGATCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAA +GAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCT +TGNNNNNNNCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATC +TTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAAT +GTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGG +CAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATAT +TTTGGAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGAC +ATGAGTAAATTTCCCCTTAAATTAAGNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAAAGGTAGAC +TTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTT +AACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCT +CTAGTCAGTGTGTTAATCGTACAACCAGAACTCAATTACCCCCTGCATAC +ACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATC +CTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATG +TTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGG +TTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCAT +TGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATT +CGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATT +AAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTA +CCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGCGTTTATTCTA +GTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGAC +CTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAA +GAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATT +TAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGAT +TTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACA +TAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTG +CTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAA +TATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCC +TCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAA +TCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGA +TTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCAC +CAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTG +TTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAG +TGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGT +CTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTC +CAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGAT +TTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAACTTGATTCTAAGGT +TGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCA +AACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACA +CCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATA +TGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAG +TACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAA +AAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGG +TTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTT +TCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGAT +CCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGT +CAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTT +ATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAA +CTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAAC +ACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGT +GTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACT +AATTCTCATCGGCGGGCACGTAGTGTAGTTAGTCAATCCATTATTGCCTA +CACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTA +TTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCA +GTGTCTATGACCAAGACAGCAGTAGATTGTACAATGTACATTTGTGGTGA +TTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACAC +AATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACC +CAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAA +AGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAAC +CAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTT +GCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGC +TGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGC +CACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTA +GCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACA +AATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTA +CACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAAT +AGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACT +TGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAATACGC +TTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAAT +GATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAG +GTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAAT +TAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTATTAAA +ATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAA +GGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCT +TCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCT +CCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTT +TGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAAC +CACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTT +GTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATT +AGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCAC +CAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAAC +ATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGA +ATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAAT +GGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTA +ATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAA +GGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTG +AGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGG +ATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGT +GAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGAT +ACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCAC +TTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGA +TGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTT +GTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTG +AAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATA +AACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTC +CAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTA +ATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTC +ATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCA +GATTGGTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTG +TATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAA +TTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAA +AATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTT +CATCCGGAGTTATCCAGTAATGGAACCAATTTATGATGAACCGACGACGA +CTACTAGCGTGCCTTTGTAAGTACAAGCTGATGAGTACGAACTTATGTAC +TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCT +TTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTG +CGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTA +AAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAG +AGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCT +GTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCG +TTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC +CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAA +TAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAG +TAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATC +ACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCT +CAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGT +GGTCATTCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGC +ACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT +GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTG +ACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTT +TCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTT +TGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACC +ATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACA +GATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAA +TTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA +AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTC +TCAATTAGATGAAGAGCAACCAATGGAGATTGATTAAATGAACATGAAAA +TTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCAC +TACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTC +TTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACA +AATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGAC +GGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT +GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTA +TTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAG +ACAGAATGATTGAACTTTTATTAATTGACTTCTATTTGTGCTTTTTAGCC +TTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACT +TGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAAT +TTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAA +TGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC +GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAAAA +AATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCA +CCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTT +TACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTT +CGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGAT +TTCATCTAAACGAACAAACTAAATGTCTGATAATGGACCCCAAAATCAGC +GAAATGCACCCCGCATTACGTTTGGTGGGCCCTCAGATTCAACTGGCAGT +AACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCA +AGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATG +GCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACC +AATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACG +AATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATT +TCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAAC +AAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAA +AGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTAC +AACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGC +AGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAG +TTTAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAA +TGGCTGGCTATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGA +TTGAACCAGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGG +CCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGC +AAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGA +CGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAG +ACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCA +GCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCT +TCGGGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGA +TCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCAT +ACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCT +GATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGAC +TCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAAT +CCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACA +AGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAG +TCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATG +TAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAG +GGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACG +ATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGC +CCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAA +>USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07 +AGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCT +GCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTT +GACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCG +TCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTGAC +CGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACAC +GTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGG +CTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTA +AAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAA +CTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACC +TCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGT +ACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAA +ATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGG +AGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCG +ACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACT +AAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGG +GGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACC +CTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGC +ACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTG +CTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAA +AGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTT +GACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCAT +AATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTA +TGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAAC +CAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTC +ATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTG +AGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAAT +GCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACC +TGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTC +TTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTAT +GTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAA +CATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTA +ATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATT +GTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTT +TTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATA +AAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAA +GGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACT +GAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAA +TTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAG +AAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACT +CATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTG +TAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTA +ACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTG +GCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGG +AAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAA +ATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAA +GCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTG +GAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCA +AAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACT +CATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACAC +TTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTA +CAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGG +TACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAG +AAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTC +ACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGT +GATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATG +AAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAA +CTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCAT +AAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGATT +TAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAG +TTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGA +TGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAAT +ATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGGT +GCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTT +AGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGACA +ATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAG +ATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGG +TTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGG +AAGAAGCTAAAAAGGTAAAACCAGCAGTGGTTGTTAATGCAGCCAATGTT +TACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTAA +CAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCAC +TTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACAC +TGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACT +TCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCAC +CATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGA +GTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATAA +AAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAAA +AGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCA +TTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAA +GAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTA +AGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTT +CATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAA +GAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAA +CTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAATGCTA +GCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCC +GGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTA +AAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGAG +AAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGC +ACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAG +CCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAG +GGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAAC +TGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTA +CAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGCT +CGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCACC +TGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACAC +CTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGAT +TGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAG +AGGTGATAAAAGTGTATATTACATTAGTAATCCTACCACATTCCACCTAG +ATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGA +GAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCCA +CACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCAA +CTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACAT +GAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGA +GGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTTTGGGTAGGTACA +TGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGGT +TTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATT +GTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACAAG +ATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTT +ATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGA +AACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAG +TCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAAG +GGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTAA +GAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATC +TAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAG +TATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAA +TTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATT +GCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATT +ACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAGT +TACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTTGG +ACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGAT +CTTGTACCAAACCAACCATATCCAAACGCAAGTTTCGATAATTTTAAGTT +TGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTT +ATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTA +AATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAA +GAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATG +CAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTT +TGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTC +AGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAG +TCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAG +TGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGC +AAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGG +CTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTA +TCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGT +TAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTG +ACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGT +GTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTG +TACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTA +CTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCT +TCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTAT +AATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAA +CCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGT +ACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAAC +CTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATT +CTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCT +TTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGC +ATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCA +TGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGG +CTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTAT +GGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTT +ATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTAC +AAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGT +TAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAAC +TACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACA +TTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACC +AATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGA +AGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTAT +GAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAA +TAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAAT +CAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAG +CTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGT +TGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATA +CGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTT +GCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTT +ATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAG +AAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATA +GAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGT +TGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGC +GTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATATGG +AACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACG +TAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTA +CTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGT +AAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTT +CCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGT +CTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGAT +GGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAA +ACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTA +ATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGGT +TTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGA +CTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTT +ACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGT +GTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGT +ACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAA +GTTTACGCCCTGACATACGTTATGTGCTCATGGATGGCTCTATTATTCAA +TTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGA +TTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTT +GTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCT +TTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATAT +GTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTA +TAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTACTAT +TTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTT +TAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCAG +TTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACA +TTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGT +TATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTT +GTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGA +CGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCT +GTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATG +TGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAG +TACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGC +TTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTG +ATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTTGCAG +AGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTAT +GGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGATG +ACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTT +AACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTT +GGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAA +ATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAG +TATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTG +TTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCCAATT +TCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTT +AACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATT +ACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGAC +CTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTATT +ACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAG +GTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGG +CTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTA +GGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTC +ATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTA +GTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATGC +TCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACACA +CCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGA +GTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCT +TTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAA +ACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTG +TAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATT +ATGACATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAA +AGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAG +CAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAAT +GTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCGAGC +CATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTG +TAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTT +GAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAAT +GCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCT +TTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTAC +TTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCC +ACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTG +TTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCA +GATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAG +AGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATG +ACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCA +CTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCT +TTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAG +AGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCT +TATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTT +GAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCA +TGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTAT +AAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCA +GACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACA +ACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCT +CTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATA +TAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGG +AAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAA +ATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGC +TTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTG +TTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGC +ACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTT +TGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCC +CTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACCTTGT +AGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTT +TATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAG +CTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAAT +TCTACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTA +CAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGA +TGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAA +GCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTG +CCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAG +GTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTT +ACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGG +CTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCAC +AATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGT +GCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTAC +AATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCG +CTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAG +TTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAAT +TTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAG +AATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAAT +ACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAAT +TGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGA +TTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATAT +TACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAA +ACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACT +GACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATT +TCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTAT +TCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTC +ACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAA +AATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAA +TATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAG +ATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCC +CACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTT +CCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACA +TAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTAC +TTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTA +CTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGT +TGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACT +TTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAA +CACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTA +CTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTG +TAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATT +AATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCC +ATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATG +AGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACT +ATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCG +CACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTC +ATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTA +ATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGT +TTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAAT +GTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTT +GCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATT +AGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTT +CACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCT +TATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAA +TGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCA +ATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGAC +ACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAAT +GATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCAT +CTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTAT +CAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCT +TACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAAC +AGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTA +GGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTAT +GATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAAC +ATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATA +AGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGT +TATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATG +AGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTT +CTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACC +ATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATA +AATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGAT +GTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAA +ATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTT +TTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTT +AATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGC +TAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAG +CTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAA +GTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAG +ACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAA +ACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGT +GATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGA +TTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACAC +TAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTC +AATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGG +TATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTC +ATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTAT +ACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAA +ATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTG +TAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTC +TTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTT +TGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCA +GATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCT +GCACCACGCACATTGCTAACTAAGGGCACACTAGAATCAGAATATTTCAA +TTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAA +CTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTT +TATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAA +AATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACA +GGTCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGG +AGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTC +AAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAG +AATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGT +AATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACT +TTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTC +TTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACA +GGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACA +GGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTAT +GTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATC +TCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACAT +GTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCT +TCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTA +CCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTAC +AGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTA +AACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAA +GGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGA +CACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATG +GCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGC +ACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGA +CACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATC +CGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGC +AACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTG +TGATGCAATCATGACTAGGTGTTTAGCTGTCCACGAGTGCTTTGTTAAGC +GTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATT +AATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATT +AGCAGACAAATTCCCAGTTCTTCACGACATCGGTAACCCTAAAGCTATTA +AGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCT +TGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCAC +ACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATG +TCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTG +CTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAA +TAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATT +TAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCAT +GGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTAC +GTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTA +ATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGC +TTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACAC +TTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATA +AGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAAT +AACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAA +TAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCA +ACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGAC +ATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACA +TATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAA +CTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGAT +GGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGA +AGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTC +TTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTTAATTAT +TATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTAC +TCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTG +ATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAA +GGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTT +AGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATTAC +CTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTAT +TTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTAT +TGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTAT +CTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCA +TTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATT +ACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACA +AAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGAT +AGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCA +ACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGA +GAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACA +GCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGA +TCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTG +CAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTAC +GACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTT +TTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTT +CCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAG +CTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGC +GTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCAC +GCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGG +AATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAA +ATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTC +AAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATT +AGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTA +AACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAG +TGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTC +TTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTT +TACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGG +TTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAA +CCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCATTGAGAAGT +CTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACC +CAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTG +TGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTACTTCTAACCACA +AAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCG +AATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGA +AGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATA +TTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTG +CGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCC +AATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAA +GTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCA +GCTTATTATGTGGGTTATCTTCAACCTAGGACATTTCTATTAAAATATAA +TGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCT +CAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTAT +CAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCC +TAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAAAT +TTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCT +GATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTA +TGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATG +CAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGG +CAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTAC +AGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTG +GTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCT +TTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTG +TAATGGTGTTAAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTT +TCCAACCCACTTATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTT +TCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTC +TACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAA +CAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAA +CAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACA +GACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTG +TTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAG +GGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTAC +TCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTG +CAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGAC +ATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTC +TCATCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTA +TGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCC +ATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTC +TATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAA +CTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTA +AACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGA +AGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATT +TTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGC +AAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGA +TGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTA +GAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCT +TTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGG +TACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATAC +CATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAG +AATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGC +TATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAA +AACTTCAAAATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTT +AAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATAT +CCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGA +TCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATT +AGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTC +AGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCT +ATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTG +CATGTGACTTATGTCCCTGCACAAGAAAAGAATTTCACAACTGCTCCTGC +CATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTT +CAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAA +ATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAAT +AGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACT +CATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGAT +GTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCA +AAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTC +TCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCA +TGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGT +GACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCT +GTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCA +GTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTG +TTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAAT +CAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCGA +TACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTT +GCTGTTTTTCATAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCA +ACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGT +TTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCC +CCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTT +TGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAA +ACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGT +TACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTAC +TTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTG +GTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTA +CACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAG +TACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTG +TTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCC +GGAGTTATCCATTAATGGAACCAATTTATGATGAACCGACGACGACTACT +AGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATT +CGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTC +TTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTT +CGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACC +TTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTC +CTGATCTTCTGGTCTAAACGAACTAAATATTATATTTGTTTTTCTGTTTG +GAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGAA +GAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTCCTATT +CCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGT +TTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACT +TTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGG +TGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCT +ACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCA +TTCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTAT +TCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCC +TTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATC +AAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTA +TTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTG +CATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCC +AGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGT +TTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATT +ATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACCT +CATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAAT +TAGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATT +CTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCA +AGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTG +GAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTT +GCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGT +AAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCA +TCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTT +GCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAGA +ATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCT +GCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAAC +TGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTT +GTTTTCTTAGGAATCATCAAAACTGTAGCTGCATTTCACCAAGAATGTAG +TTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTT +CTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCA +GCACCTTTAATTGAATTGTGCGTGGATGAGGCTGAAATCACCCATTCAGT +ACATCGATATCGGTAATTATACAGTTTCCTGTTTANNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTCTAAA +CGAACAAACTATAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCAC +CCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAAT +GGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACC +CAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAG +ACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGT +CCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG +TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACC +TAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGATGGC +ATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACAT +TGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTC +AAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGC +AGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAA +TTCAACTCCAGGCAGCAGTAGGGGAATTTCTCCTGCTAGAATGGCTGGCA +ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAG +CTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGT +CACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTA +CTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCA +GAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAAC +TGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAG +CGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG +TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTT +CAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACAT +TCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTGATGAAACT +CAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCC +TGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCA +GTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATG +GGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT +GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACT +TTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGACTT +GAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGCACGATCGAGTGT +ACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTG +TAAAATTAATTTTAGTAGTGCTATCC +>USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17 +TAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTTGATCTCTT +GTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGG +CTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCG +TTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTT +CGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTG +ACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACAC +ACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGT +GGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCT +TAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTC +AACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCA +CCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCA +GTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCG +AAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAA +GGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGACTTAGG +CGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACA +CTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGA +GGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTA +CCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCAT +GCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATAC +TGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGA +AAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAAT +TTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCC +ATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTT +TATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCA +ACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACT +TCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCAC +TGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAA +ATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGA +CCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCAT +TCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTT +ATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCT +AACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCT +TAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATA +TTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCT +TTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTA +TAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAA +AAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATA +CTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATC +AATTTTCTCCCGCACTCTTGAAACTGCTAAAAATTCTGTGCGTGTTTTAC +AGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGA +CTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGT +TGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGC +TAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGAT +TGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTG +GGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGAC +AAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTT +AAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGG +TGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACT +CAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTA +CTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAAC +ACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTYTTGAAAACTGGTGATT +TACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTT +GGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACAC +AGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCT +TCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACT +GTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAACTTGA +TGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTG +AACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTC +ATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGA +TCTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTG +AGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAG +GATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCA +ATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTG +GTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGG +TTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGA +CAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAG +AGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGT +GGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGT +GGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATG +TTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACT +AACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACC +ACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAAC +ACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAA +CTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGC +ACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAA +GAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGAT +AAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGA +AAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGC +CATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGAT +AAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAAC +TAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATC +TTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTA +AAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTT +AACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAATGC +TAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTAC +CCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCT +TAAAAAGTGTAAAAGTGCTTTTTACATTCTACCATCTATTATCTCTAATG +AGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTT +GCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAA +AGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAG +AGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACA +ACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGT +TACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTG +CTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCA +CCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAAC +ACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAG +ATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAG +AGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCT +AGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGA +GAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTC +CACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCC +AACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCAC +ATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTT +GAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTA +CATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATG +GTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCA +TTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACA +AGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCAC +TTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGA +GAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAG +AGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTA +AGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTT +AAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATA +TCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTC +AGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGT +AATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTA +TTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTA +TTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCA +GTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTT +GGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTG +ATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAG +TTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGG +TTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACT +TAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTT +AAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAA +TGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTC +TTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAG +TCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACC +AGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTG +AGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCA +GCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAAT +GGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAAT +TATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCT +GTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCT +TAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACC +GTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTG +TGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGAC +TACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGG +CTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATT +ATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTC +AACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACT +GTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCA +ACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGA +TTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCAT +CTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTG +GCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAAT +CATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTT +GGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCT +ATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAG +TTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTT +ACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGT +GTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAA +ACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTA +CATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGA +CCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGT +GAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTT +ATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCT +AATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAA +ATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTC +AGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGAT +GTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAA +TACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAG +TTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTC +TTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGT +AGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACA +TAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAA +GTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGC +GCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATAT +GGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATA +CGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAAC +TACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTG +GTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTG +TTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCAT +GTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTG +ATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAAC +AAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATAC +TAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGG +GTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGT +GACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTG +TTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTT +GTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCA +GTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGA +AAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTC +AATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTT +GATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGT +TTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGAT +CTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTAAT +ATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATC +TATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTGCCTACT +ATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCC +TTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACACC +AGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGA +CATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATG +GTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCAT +TTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGA +GACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCG +CTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGA +TGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATA +AGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCT +GCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTC +TGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTTGC +AGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGT +ATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGA +TGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATATGC +TTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTC +TTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCA +AAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTA +AGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCT +TGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACACAA +TTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTT +TTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAA +TTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGG +ACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTA +TTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGAC +AGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGT +GGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATAC +TAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCT +TCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGG +TAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAAT +GCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACA +CACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCA +GAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTAC +CTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTC +AAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCAC +TGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTA +TTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAAGACTGT +GTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAAC +TGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGA +CACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCC +ATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTAC +AACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATT +GCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTT +TATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTT +ACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTT +CTACACAGGAGTTTAGATATATGAATTCACAGGGACTATTCCCACCCAAG +AATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGG +CAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAA +AGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAGAA +TCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCT +CTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTT +CTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAA +GAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAG +TTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAAC +AGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGAAG +TCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACG +TAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGG +CTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATG +CTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTAT +CAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAA +CAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAAT +ACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCA +ACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTA +TGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGG +GCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACT +ACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATG +ACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACTT +GCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAG +TGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTTTG +TTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAA +GGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCAC +AGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTG +TATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGAT +TATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTG +TACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATA +TGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGC +CACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTA +TGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTA +AAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGT +TGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTT +TTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCAC +AGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATA +AAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAA +GAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAG +ACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTA +AGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGAC +GGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAAT +GGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACA +CATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTC +AATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGT +ATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGTAC +AATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTA +GATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACA +AACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGT +TAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTT +GACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAACATGA +CTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTGGG +ATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATT +CTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTAC +AAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTG +TAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAG +GATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTA +TGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATA +AACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTT +CAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGT +GTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCT +TCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGT +TATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGA +AGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCTA +ACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAAT +AAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCA +AGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTC +AAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTA +GCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAA +ATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGGAA +CAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGT +GATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAG +AGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCA +AACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAAT +GAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATA +TGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTA +ATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTT +TTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACA +ACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACT +TTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATA +CTTTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGG +TCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACA +ATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAA +GGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGA +TGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCG +GCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAA +CGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAA +TCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAAGC +TACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTT +ACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTAT +GTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCA +ATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTA +TGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGT +CTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCACAG +ATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACAT +AAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTT +ATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAA +TTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACC +TGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGA +GGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGT +CTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCA +CTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAA +AGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTG +TTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTT +GTGCTGACATCACATACAGTAATACCATTAAGTGCACCTACACTAGTGCC +ACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATCT +CAGATGAGTTTTCTAGCAATGTTGCAAGTTATCAAAAGGTTGGTATGCAA +AAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGC +TATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTT +GCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTG +CCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTG +TTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTGTA +CTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGAA +ATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTATG +TGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCAC +GCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTG +TGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCG +GCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATA +ATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTT +TATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACA +AATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAG +CTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATT +TTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGA +CTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAA +ACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATA +ATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAAT +TCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCT +TTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCT +ACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGA +CGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTATGA +TGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATC +ACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGT +CGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTAC +AGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTAT +GTTGATACACCTAATAATACAGATTTTTCCAGATTTAGTGCTAAACCACC +GCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACTTC +CTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTT +AAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGA +GTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTT +GTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTAT +GCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTAT +GATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATG +ATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCA +ATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGA +CTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGG +CTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGAC +AAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGT +ACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGTG +ACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCT +GACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAG +ATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTA +ACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACAT +GCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACA +ATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAAAC +AAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATA +ACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTA +CAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCT +TGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACA +AGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACA +CTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTG +TTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACA +ACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAA +ACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTG +CTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCT +ACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAATTGAAAC +GATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAG +TAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTAGT +GTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGG +AGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGA +AAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGT +AGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTT +AGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTATG +CCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGT +TTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGA +ATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAA +CAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTA +TTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGT +TTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTATGC +TTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCT +AGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCA +AAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAA +CATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGT +CAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTAT +ACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTT +TAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAAT +GACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGT +ACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTA +AGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACT +TACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGC +TATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGG +GACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCA +TCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACA +AATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAA +ATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCC +CTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAA +TGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGAAA +ACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAAC +AATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTA +ATCTTATAACGAGAACTCAATCATACACTAATTCTTTCACACGTGGTGTT +TATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCGGGA +CTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATCTCTG +GGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGAT +GGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGAT +TTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATA +ACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGAT +CCATTTTTGGATGTTTATTACCACATAAACAACAAAAGTTGGATGGAAAG +TGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCT +CTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAAT +CTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTC +TAAGCACACGCCTATTAATTTAGGGCGTGATCTCCCTCAGGGTTTTTCGG +CTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTT +CAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTC +TTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAAC +CTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCT +GTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATC +CTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAAC +CAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTT +GATGAAGTTTTTAACGCCACCACATTTGCATCTGTTTATGCTTGGAACAG +GAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTTCG +CACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAAT +GATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTAA +TGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTATA +ATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCT +AACAAGCTTGATTCTACGGTTGGTGGTAATTATAATTACCGGTATAGATT +GTTTAGGAAGTCTAAACTCAAACCTTTTGAGAGAGATATTTCAACTGAAA +TCTATCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTGTTAATTGT +TACTTTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTTGGTCA +CCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAG +CAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGT +GTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTC +TAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACA +CTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACA +CCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTC +TAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTG +TTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACA +GGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAATA +TGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCG +CTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTGTAGCT +AGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGT +TGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTG +TTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGT +ACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCA +ATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAATAGCTG +TTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATT +TACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCACAAAT +ATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTAC +TTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGT +GATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTT +TAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTC +AATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTT +GGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAG +GTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAAT +TGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTT +TCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCATAA +TGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATTTGGTG +CAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAG +GCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCA +GACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTG +CTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAA +AGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTC +AGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAG +AAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACAC +TTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAAC +ACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTG +TGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTAT +GATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGATAAATA +TTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCA +TTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAG +GTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAA +GTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAG +CTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACC +AGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAA +ATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATT +ACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAA +CTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTT +GTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATG +GCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAA +TCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTGTTCAC +TTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTT +GCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAG +TCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTTGG +CTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTA +TTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATA +GTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAAGTCCT +ATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATC +TGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATT +ACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAACATGTT +ACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCA +AATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAAC +CAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAA +GCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATAGGTAC +GTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGC +TAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGC +AATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCG +TGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGA +ACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGG +CAAATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAA +GAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCT +ACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAA +TTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTGCT +GCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGC +TTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTTCAGAC +TGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATT +CTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGA +AAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTG +CTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATC +ACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCA +GCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTG +GCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCT +TTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGG +TTACTATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCC +ATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAA +GTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGG +AGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACA +CTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAAC +AGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCAC +CATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACT +CAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACG +TGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAG +AACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTATAACA +CTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATT +GACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTAATTAT +GCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTT +GTCACGCCTAAATGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACA +ACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACA +TCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAAT +GGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGC +GTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAA +TTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAAT +TGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTAT +CATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAATG +TCTGATAATGGACCCCAAAATCAGCGAAATGCACTTCGCATTACGTTTGG +TGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGCGATCAA +AACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACC +GCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGG +CGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACC +GAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTC +AGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGACT +TCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAATTGATGGAG +CCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAAT +GCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTT +CTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCAT +CACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAACGA +ACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTTGCTTT +GCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGGTAAAG +GCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCT +TCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAAC +ACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGG +ACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATT +GCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGG +CATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGCCATCA +AATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAAT +AAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGA +CAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGA +AACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCC +AAACAATTGCAACAATCCATGAGCCGTGCTGACTCAACTCAGGCCTAAAC +TCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTC +CGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTAC +ATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAAT +CAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCTA +CAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGT +AAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAG +GAGAATGACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>England/QEUH-326F56B2/2023|OX452944.1|2023-03-01 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAGAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAA +CACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG +GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGC +TACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTAT +ACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCT +GAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATT +CCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGC +TTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAA +CTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGC +ACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCA +AAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACC +ATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCG +CTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT +CTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAA +TATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCAT +CTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAA +TACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGA +TCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTT +ACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGA +GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTA +GTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTG +ATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGT +TGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGG +ACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCT +TTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATT +GGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAA +ACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGA +TTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGG +TTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGAC +ACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACA +CTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGT +TGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTG +TCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATT +GATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATG +AGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACT +CAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT +GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAG +GACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTA +GTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATT +GTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAA +TGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGA +CCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTC +AACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTT +GCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTT +AAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTG +ATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATG +ATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA +ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAA +TCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCT +TAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTT +TTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTT +ACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTG +CTTAAAAAGTGTAAAAGTGCTTTTTACATTCTACCATCTATTATCTCTAA +TGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC +TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACT +AAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTT +GTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGC +TGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTT +CACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAA +ACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTA +AGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTT +GAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACC +TCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGT +CCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTG +TTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG +TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTG +CATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTA +CAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTA +GAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAA +AGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCT +TAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAA +TATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTG +GTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTG +TATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCC +TATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAAC +CAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTA +AGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACT +GGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGA +CTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTT +TTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAAC +AATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGA +AGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA +CCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCT +TGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAAC +CAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTA +ATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTT +CTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAA +CCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAAT +TGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCG +ACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATA +TTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTA +CTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTG +CAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTA +GATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTT +TGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCA +ATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAG +CTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAA +AGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATG +GTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGC +AAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAG +TACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACA +GTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAG +CTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGT +AAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAG +TCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTG +ATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATG +TCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGAT +GTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGA +CATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACA +AAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGT +GCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAA +TACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCA +ACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGG +TGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTG +TGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTC +ATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA +ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTAT +ACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGT +GGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATG +GTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATC +TGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGC +CAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT +GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTAT +TCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTT +TTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGT +GTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTA +ATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCA +TCTATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTGCCTA +CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTG +CCTTTAATACTTTACTATTCCTTATGTCATTTATTGTACTCTGTTTAACA +CCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGA +TGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATC +ATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAA +GAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG +CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGT +GATGTGCTATTACCTTTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAG +CTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT +TCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTT +GCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTT +GTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT +GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATT +TCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATG +CAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACC +TAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAG +CTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACAC +AATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGG +AATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTAT +GGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAAC +TATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAG +ACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTT +GTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG +CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTG +GGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACA +ATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTA +CACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTC +CAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTG +TCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC +ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCG +TATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAAGACT +GTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGA +ACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTT +GACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTT +CCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTT +ACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTA +TTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAG +TTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGT +TTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGT +TTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCA +AGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGT +GGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGT +AAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAG +AATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATT +CTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACT +TTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTG +AAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTT +AGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGA +GCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGA +AGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAA +CGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACA +GGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAA +TGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATT +ATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTAC +AACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAA +ATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATC +CAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAG +TATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAA +GGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCA +CTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGA +TGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTAC +TTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAG +AGTGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTT +TGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTA +AAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCC +ACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAAC +TGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAG +ATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTG +TGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAA +TATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTT +GCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAG +TATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACT +TAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTA +GTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCG +TTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGC +ACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGA +TAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCC +AAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAG +AGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACT +TAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAG +ACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACA +ATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGA +CACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATT +TCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGC +GTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGT +ACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACAT +TAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATA +CAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATT +GTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATG +TTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATAT +GACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTG +GGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCA +TTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTT +ACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATT +TGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATC +AGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTG +TATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGA +TAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAATAATGTTGCTT +TTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCT +GTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTT +CTTCTTCGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATC +GTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTT +GAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGC +TAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTA +ATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGAT +CAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAAC +TCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCG +TAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAA +AAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGG +AACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATA +GTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGAT +AGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCG +CAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTA +ATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCAGTTCACTA +TATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGC +TAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCAC +TTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTA +CAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGA +CTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGA +TACTTTCTGACGATGCTGTTGTGTGTTTTAATAGCACTTATGCATCTCAA +GGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAA +CAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTA +AAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGT +GATGATTATGTGTACCTTCCTTACCCAGACCCATCAAGAATCCTAGGGGC +CGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTG +AACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCT +AATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAA +GCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGC +TTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCT +ATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTG +CAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCT +TATGTTGTAAATGCTGTTACGACCATGTCATACCAACATCACATAAATTA +GTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCAC +AGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCAC +ATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGT +TTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGC +AATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACA +CCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACT +GAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCT +GTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCAC +CACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGT +AAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGC +TGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATT +TTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTG +CCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATAT +CTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGC +AAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTT +GCTATTGGCCTAGCCCTCTACTACCCTTCTGCTCGCATAGTGTATACAGC +TTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATT +TGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAG +TGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTG +TACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATG +AAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTA +TGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACC +ACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAG +TGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGT +CGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA +TAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGT +TTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCA +CAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAA +AGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGA +TTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATAC +GACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGT +AAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCA +TAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAA +ATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACT +CTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCAC +CTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTT +GACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTAT +GATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTA +TCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGAT +GTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTT +ACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTT +ATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCA +CCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACT +TCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACAC +TTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTT +GAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTG +TTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTT +ATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTT +ATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCA +TGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATG +CAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTT +GACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGC +GGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAG +ACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGT +GTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAG +TGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATT +CTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGAT +AGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATC +TAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAAC +ATGCATTCCACACACCGGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAA +CAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAA +ACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTA +TAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAG +TACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAG +CTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTA +CAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGA +CACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACAC +TGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAA +CAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATT +AAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGC +TGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATAT +CTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAATTGAA +ACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCA +AGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTA +GTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAAT +GGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAA +GAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGA +GTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTC +TTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTA +TGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTG +GTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTT +GAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCAT +AACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATT +TATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTA +GTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTAT +GCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAAT +CTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATG +CAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGC +AACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGT +GTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTT +ATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGT +TTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTA +ATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACT +GTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCC +TAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCA +CTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTG +GCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCAT +GGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCAT +CATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAA +CAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATAC +AAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTC +CCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATC +AATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGA +AAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGA +ACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGT +TAATCTTATAACCAGAACTCAATCATACACTAATTCTTTCACACGTGGTG +TTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAG +GACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACA +TGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGCCCTACCAT +TTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGA +GGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTAT +TGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTT +GTAATGATCCATTTTTGGATGTTTACCAAAAAAACAACAAAAGTTGGATG +GAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATA +TGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAAGAGGGTAATTTCA +AAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATA +TATTCTAAGCACACGCCTATTAATTTAGAGCGTGATCTCCCTCAGGGTTT +TTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTA +GGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGTTGAT +TCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCT +TCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAG +ATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTG +AAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGT +CCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCC +CTTTTCATGAAGTTTTTAACGCCACCACATTTGCATCTGTTTATGCTTGG +AACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCATATATAA +TTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAAT +TAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGA +GGTAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGA +TTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGA +ATTCTAACAAGCTTGATTCTAAGCCTAGTGGTAATTATAATTACCTGTAT +AGATTGTTTAGGAAGTCTAAGCTCAAACCTTTTGAGAGAGATATTTCAAC +TGAAATCTATCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTCCTA +ATTGTTACTCTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTT +GGTCACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGC +ACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACA +AATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACT +GAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGC +TGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACA +TTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAAT +ACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGT +CCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATT +CTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCT +GAATATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTAT +ATGCGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTG +TAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAAT +TCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTAT +TAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAG +ATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTG +TTGCAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAAT +AGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAAC +AAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCA +CAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGA +TCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAAT +ATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAA +AAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGAT +TGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGA +CCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCT +TATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCA +AAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAGTTCAAGACT +CACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAAC +CATAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATT +TGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAG +TTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGT +TTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGC +TTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAAT +CAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCT +CAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGC +ACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAG +CACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTT +GTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACAC +ATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAG +TTTATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGAT +AAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTC +TGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCA +ATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTT +GGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTT +TATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTA +TGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGC +TGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATT +ACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTTACAAT +TGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAG +ATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTC +GGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTC +CAAAATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTG +TTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACAC +CTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGC +TTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGC +TTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCC +AACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTA +CAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAA +GTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGG +GAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGA +CTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAAC +ATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACAT +GTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAAT +GGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAG +CACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATA +GGTGCGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATT +CTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACT +GCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTAC +TCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTA +AACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGC +CATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCC +TTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGT +CTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAA +GTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGC +TTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCA +ATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTT +CAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTTAATCCAGAAACTA +ACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTT +TTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCG +TATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAG +AAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCT +TCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAG +GATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATA +TTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTT +TCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAG +TTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTA +TCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACC +AATGGAGATTCTCTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGA +TAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGT +ACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAA +TTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTA +GCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAG +TTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGT +TCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTA +TAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCAT +TAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTA +ATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGA +AACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTATGAATCA +TCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACT +CAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTC +TAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAAT +TGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATC +GGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACC +TAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAG +AGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTT +AAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACTCCGCATTACG +TTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGCG +ATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGT +TCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGA +CAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTA +CTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAG +ATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT +GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGA +GGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTA +ACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAA +GGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTC +CTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTA +AACGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTT +GCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG +TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTG +AGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAAT +GTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTT +TGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGC +AAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGC +ATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGC +CATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC +TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAA +AAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACA +GAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATT +TCTCCAAACAATTGCAACAATCCATGAGCCGTGCTGACTCAACTCAGGCC +TAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGC +TTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTA +ACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT +TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTC +ACCTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAA +TGTGTAAAATTAATTTTAGTANNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +>USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03 +CTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCA +CTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTAC +TGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTAC +GGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG +GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAA +AACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCG +TACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAA +CATCTTAGAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTT +GCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAA +CTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGC +ATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGT +GGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTA +ATAAAGGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGAC +TTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTG +GAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTA +ACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGAT +GGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGC +TTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTG +TATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGT +TCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAA +GAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAA +ATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGAT +GGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGA +ATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTG +AAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGT +GGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACC +CCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAG +TAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAA +ACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTT +CTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTA +GCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAA +GGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACAT +CAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGG +CATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTG +GATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGT +TACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAAT +CAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTA +CGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGT +TTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCAC +TGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAAT +CTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCA +GTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCC +TTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGAC +GGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGG +TGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACAT +TCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATT +ATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCAC +GCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTG +GCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGA +GAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGG +TGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCAT +TGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAA +GACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAA +TACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATG +ACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAA +CTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATAC +AGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATG +CTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGC +ATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTC +TGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAG +ATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCA +ACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGA +ATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAG +ATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGT +GAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCA +ATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTT +TTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGAC +ATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGC +CAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGG +CTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAAT +GGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGC +TAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACA +TTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTA +CTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTC +TTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCT +TTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAG +AGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGT +TAAGCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAG +ATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAA +GAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGG +CAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTT +TCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGT +GTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGA +AATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCA +CTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACA +GTGCTTAAAAAGTGTAAAAGTGCTTTTTACATTCTACCATCTATTATCTC +TAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAA +TGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAA +ACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAAT +ACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTA +AAACAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACT +CTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGA +AGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTT +CTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCT +AAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTA +TAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTC +TTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTC +CACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTC +TTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTA +ACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTT +GGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAA +TTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTAC +GTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGT +AGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGT +TAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCA +CTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCT +CTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTG +TGCACTTATCTTAGCCTACTGTAACAAGACAGTAGGTGAGTTAGGTGATG +TTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGC +AAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAAC +CCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAAC +AATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACA +AAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACC +TGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACA +CTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACT +TTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGG +TCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAA +AACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCT +AAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACC +AATTGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATT +TTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTA +ACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCC +TGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCT +CTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTT +AACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACG +TTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTTGTTTGATGTAC +TGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTA +AAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGT +TCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTA +AACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGAT +CTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAA +TGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAG +CTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCT +TTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTT +AAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTAC +AATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATG +CCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCT +AGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAA +ATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATC +TACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCATTC +TTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTA +TTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGT +TTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCAT +TTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGT +TTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCT +GCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAA +TTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTT +CAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGG +AAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGAT +GTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTA +ATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTT +TGCAAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGG +TAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTA +AAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTT +ACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAA +GACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGA +GAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGAT +GGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTA +CAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGT +CTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTAC +GTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAAC +ACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACA +ATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCA +GATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATC +TGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATA +ACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGT +AGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTT +GATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAAC +AAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGT +GCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAA +GGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACAC +TTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCAT +GTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGC +TATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTG +CTAACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGT +TATACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGA +AGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTA +ATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAAC +ATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATC +AGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTA +AGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCT +TATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTAT +TATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAA +CTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCT +GGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTA +CAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTA +CTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCA +GCATCTATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTGC +CTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAG +TTGCCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTA +ACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTA +CTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGT +GGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTAT +ATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCT +AAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAG +CTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGT +AGTGATGTGCTATTACCTTTTACGCAATATAATAGATACTTAGCTCTTTA +TAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAG +AAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCA +GGTTCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGT +TTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGG +GTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGG +CTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGA +TATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATA +ATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCT +ATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGAC +ACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGT +TAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGA +CACAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGT +TGGTTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATA +TGGAATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTT +TATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACAC +AACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATG +GAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAAC +CTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGA +CATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGT +GTGCTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATA +TTGGGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAG +ACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGG +GTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTA +GTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTT +TTTACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGT +TTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTT +GCCACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGAT +GCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAAG +ACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCA +AGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGT +CTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCA +TTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTA +GTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGA +GTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGC +TAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTT +TGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTT +AGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTATTCCCAC +CCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTT +GGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGA +TGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAG +TAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGAT +ATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACT +ACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTT +GTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAG +TTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTA +TGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGA +AGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATG +CAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAA +ACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGA +CAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAAC +ATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCT +TACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATA +AAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAA +ATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAAT +TAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTT +TAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTT +GCACTACGACAGATGTCTTGTGCAGCCGGTACTACACAAACTGCTTGCAC +TGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTG +TACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCT +AAGAGTGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAG +GTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTA +TTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCT +GCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTC +AACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACA +AAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATG +TTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGC +CAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCC +GTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGT +AAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTAC +ACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCT +GTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAA +TCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGC +GGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAA +TGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCT +TCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTT +AAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTT +ACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAA +TAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATAC +ACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTG +TGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATT +ATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTA +CGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAAC +AGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGA +CATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTC +ATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTC +ATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCAC +ATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAA +TATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATA +TTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGAT +GCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCA +CTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCC +ATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATA +ATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTT +GTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACT +AGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTG +CTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTT +GCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACA +CTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACT +ATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTG +GTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAA +TGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCAT +TTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAG +GATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTAT +AACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCA +CCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCAT +CAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAAT +TGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTT +ATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGT +GATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGC +TCGCAAACACACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAG +CTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCAGTTCA +CTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTA +TGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATG +CACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAAT +TTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACAC +AGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGA +TGATACTTTCTGACGATGCTGTTGTGTGTTTTAATAGCACTTATGCATCT +CAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCA +AAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTA +CTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAG +GGTGATGATTATGTGTACCTTCCTTACCCAGACCCATCAAGAATCCTAGG +GGCCGGCTGTTTTGTAGATGATATCATAAAAACAGATGGTACACTTATGA +TTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACAT +CCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAG +AAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTA +TGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAG +GCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCT +TTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCAT +TCTTATGTTGTAAATGCTGTTACGACCATGTCATACCAACATCACATAAA +TTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGT +CACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAAT +CACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTT +GGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAA +TGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTA +ACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCT +ACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGT +GCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGAC +CACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAAC +AGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGA +TGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATT +ATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTA +GTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAA +TATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTA +TGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCAT +TTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATAC +AGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAAT +ATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTA +GAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTT +TTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTG +ATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGA +TTATGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGC +ACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATT +CAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACT +TGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTA +TGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAA +TGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGG +CCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAG +AAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAA +AGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAA +TACGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAA +TGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTT +GCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTT +GAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGG +ACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGG +CACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGT +GTTGACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTC +TATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGT +TTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTC +GATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACC +TTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAG +GTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAA +CCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGG +ACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACA +CACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGC +TTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCAC +CTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCATACA +CTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCG +TTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAA +CCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTG +ATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGT +GTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAA +TGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAG +CAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAG +TGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTG +TAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACAC +ATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTC +GATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCT +ATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATA +AACATGCATTCCACACACCGGCTTTTGATAAAAGTGCTTTTGTTAATTTA +AAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGG +AAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGT +GTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAAT +GAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTT +TAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTT +TTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAG +GGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAA +CACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATA +AAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAAC +ATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACAT +TGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATA +TATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAATT +GAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGG +TCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGG +GTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTT +AATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTA +TAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTC +AGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGAT +TTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGG +CTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAG +GTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCT +TTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTT +CATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTG +ATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCT +GTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATT +TATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTAC +AATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAA +ATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAG +TGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAAC +TGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGA +GTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGC +TGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATC +TTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCA +ACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGA +CCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTT +TCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCC +GTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCT +CATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGT +CATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGC +GAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAA +TACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAAT +TTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAA +ATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAG +AGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAA +CGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTG +TGTTAATCTTATAACCAGAACTCAATCATACACTAATTCTTTCACACGTG +GTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACT +CAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTAT +ACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGCCCTAC +CATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATA +AGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACT +TATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAAT +TTTGTAATGATCCATTTTTGGATGTTTACCAAAAAAACAACAAAAGTTGG +ATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGA +ATATGTCTCTCAGCCTTTTCTTATGGACCTTGTAGGAAAAGAGGGTAATT +TCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAA +ATATATTCTAAGCACACGCCTATTAATTTAGAGCGTGATCTCCCTCAGGG +TTTTTCTGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCA +CTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGTT +GATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTA +TCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTA +CAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACG +TTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAG +AGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGT +GCCCTTTTCATGAAGTTTTTAACGCCACCACATTTGCATCTGTTTATGCT +TGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCATATA +TAATTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTA +AATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATT +AGAGGTAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGC +TGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTT +GGAATTCTAACAAGCTTGATTCTAAGCCTAGTGGTAATTATAATTACCTG +TATAGATTGTTTAGGAAGTCTAAGCTCAAACCTTTTGAGAGAGATATTTC +AACTGAAATCTATCAGGCCGGTAACAGACCTTGTAATGGTGTTGCAGGTC +CTAATTGTTACTCTCCTTTACAATCATATGGTTTCCGACCCACTTATGGT +GTTGGTCACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACA +TGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAA +ACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTT +ACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACAT +TGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTG +ACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACA +AATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGA +AGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTT +ATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGG +GCTGAATATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGG +TATATGCGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTA +GTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAA +AATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTAC +TATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAG +TAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTT +TTGTTGCAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGG +AATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCA +AACAAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTT +TCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGA +AGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAAC +AATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCA +CAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAAT +GATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTT +GGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATG +GCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAA +CCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAG +ACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTC +AACCATAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAA +ATTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACA +AAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAA +AGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAG +AGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGAC +AATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTC +CCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCC +TGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAA +AAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGG +TTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAA +CACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACA +CAGTTTATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTA +GATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACAT +CTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCC +TCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAA +CTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGG +TTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCT +GTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCC +TGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAA +ATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTTAC +AATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTT +CAGATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCT +TTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGC +TTCCAAAATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGG +GTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCA +CACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTA +TGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGA +GGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGAT +GCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACC +TTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAA +CAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAA +TGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTC +AGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTG +AACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAA +CATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGT +AATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGT +AAGCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAG +ATAGGTGCGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGT +ATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGT +ACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTT +TACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGT +CTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTT +AGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGC +TCCTTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATT +TGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAAT +TAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTG +TGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATC +GCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTC +TTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTTAATCCAGAAA +CTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCG +CTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCT +TCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTA +AAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTAGGA +GCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTA +CAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACA +ATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGA +CTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTA +AAGTTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAAT +TTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCA +ACCAATGGAGATTCTCTAAATGAACATGAAAATTATTCTTTTCTTGGCAC +TGATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGA +GGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGG +CAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCT +TTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTAT +CAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGA +AGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGT +TTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTT +CATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTT +TTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAA +TGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTATGAA +TCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGT +ACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTA +TTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTG +AATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGAT +ATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGA +ACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTT +TAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAA +CTTAAATGTCTGATAATGGACCCCAAAACCAGCGAAATGCACTCCGCATT +ACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGC +GCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTT +GGTTCACCGCTCTCACTCAGCATGGCAAGGAAGACCTTAAATTCCCTCGA +GGACAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGG +CTACTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGA +AAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAA +GCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAAC +TGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTG +CTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCA +AAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCG +TTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCA +GTAAACGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCT +CTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTC +TGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTG +CTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATAC +AATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAA +TTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGC +CGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCG +CGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGG +TGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTT +TGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCT +AAAAAGGATAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAG +ACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATG +ATTTCTCCAAACAATTGCAACAATCCATGAGCCGTGCTGACTCAACTCAG +GCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTT +CGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTC +GTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAA +TCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATT +TTCACCTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCC +TAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATAG +CTTCTTAGG +>USA/LA-EVTL19995/2023|OR649055.1|2023-09-03 +GGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACG +AACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCA +CGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACT +CGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATC +ATCAGCACATCTAGGTTTTGTCCGGGTGTGACCGAAAGGTAAGATGGAGA +GCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCT +GTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGA +GGAGGTCTTATCAGAGGCACGTCAACATCTTAGAGATGGCACTTGTGGCT +TAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTG +TTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGT +TGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGA +CACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGC +AAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGGTA +CGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATC +CTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTT +ACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGT +CGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAG +ACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTG +GACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCA +TGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGA +CACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAA +TGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACC +AAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTG +TCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACT +CTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTT +TGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAG +GTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTAT +TGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGA +ATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCA +CTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAG +TGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATAC +AGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAA +TACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTT +AATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGC +TTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTG +TTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGT +GCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATT +TGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTG +AAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATA +CTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTT +CACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAG +GTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACT +GTTTATGAAAAACTCAAACCCTTCCTTGATTGGCTTGAAGAGAAGTTTAA +GGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCT +CAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAG +GAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTT +GGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCT +TGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAG +TGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCC +AAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAA +CAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCT +ACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAA +CGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTG +CACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCA +CCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTA +CAAGAGTGTGAATATCATTTTTGAACTTGATGAAAGGATTGATAAAGTAC +TTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAAT +GAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGT +ATCTGAATTACTTACACCACTGGGCATTGATTTAGATGAGTGGAGTATGG +CTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATTGGCTTCACAT +ATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTG +TGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAG +ATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTT +CAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACA +AACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTC +AAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACTTACACCAGTT +GTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGA +CAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAA +AACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGA +GGTGTTGCAGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGA +ATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAGTT +GTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGC +CCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGA +AAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTA +TTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTT +CGCACAAATGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACT +TGTTTCAAGCTTTTTGGAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGA +TCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTAAA +CCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGT +TGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACT +TGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACT +CTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATAT +AGTGGGTGATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTA +CTAAAAAGGCTAGTGGCACTACTGAAATGCTAGCGAAAGCTTTGAGAAAA +GTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGG +TTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCT +TTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGA +ACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACG +CAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATAC +AGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGT +GCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAA +CACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATG +TAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTC +AAAGTGCCAGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTA +TAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAGAACATTTTATTG +AAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAA +TCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATA +TTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCT +TTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAG +GTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACAT +GTCAATGACATATGGACAACAGTTTGGTCCAACTTATTTGGATGGAGCTG +ATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAAAACATTTTAT +GTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCA +CACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACA +CTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGG +GCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAAT +AGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAA +GGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGCCTACTGTAAT +AAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTT +TCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTA +AAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATG +TACATGGGCACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACC +TTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTCAC +CTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGT +ACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTA +TAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTAC +TTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAA +GAAAACAGTTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGG +TGTTGTTTGTACAGAAATTGACCCTAAGTTGGACAATTATTATAAGAAAG +ACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAACCA +TATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAA +ATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAA +GAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCT +ATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTT +ACATAAACCTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGT +ATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTT +GAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAAT +GGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGG +AAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACC +GAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAAT +TACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATT +CTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTG +AAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGA +TACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAA +CTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATG +CCTTATTTCTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTAC +AAATTCTAGAATTAAAGCATCTATGCCGACTACTATAGCAAAGAATACTG +TTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAG +TCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATT +AAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTT +TAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGC +TATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTAT +ACCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCTTTAGACACCTATCCTT +CTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATGGGATTTAACT +GCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAG +GTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCT +ATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATT +AATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTT +CTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACG +GTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAATAGAGCAACA +AGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGT +CTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTG +TTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTT +GCGAGAGACTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCA +GTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCATC +TTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCT +CATTTTGTTAACTTAGACAACCTGAGAGCTAATAACACTAAAGGTTCATT +GCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCAT +CTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATA +CTGTTACTAGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGT +TGCAGTTAAAATGTTTGATGCTTACGTTAATACGTTTTCATCAACTTTTA +ACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTGAA +CTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGC +AGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTG +AATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGT +TGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCG +TGACCTTGGTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGG +TAGCAAAAAGTCACAACATTGCTTTGATATGGAACGTTAAAGATTTCATG +TCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAA +TAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATG +TTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGG +TTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTAT +TTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTT +CAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGAC +ATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACAC +ATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCAT +TGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTG +CCTGGCACGATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACC +TAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACACCATCAAAACTTA +TAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGT +ACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATAC +CAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACAC +GTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTT +GAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCA +CGGCACTTGTGAAAGATCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTA +GATGGGTACTTAACAATGATTATTACAGATCTTTACCAGGAGTTTTCTGT +GGTGTAGATGCTGTAAATTTATTTACTAATATGTTTACACCACTAATTCA +ACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTG +TGGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGA +GCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCT +TATGTCATTCATTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTG +GTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCTTACTAATGAT +GTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGT +ACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATT +TCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGT +GTTTCCTTTAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAA +TAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTTTTACGC +AATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGA +GCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAA +GGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCAC +CACAAATCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATG +GCATTCCCATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGG +TACAACTACACTTAACGGTCTTTGGCTTGATGACGTAGTTTACTGTCCAA +GACATGTGATCTGCACCTCTGAAGATATGCTTAACCCTAATTATGAAGAT +TTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGT +TCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTA +AGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATT +CAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATC +TGGTGTTTACCAATGTGCTATGAGACACAATTTCACTATTAAGGGTTCAT +TCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACATAGATTATGACTGT +GTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGC +TGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAA +CAGCACAAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCT +TGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATT +TACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATG +AACCTCTAACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAA +ACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCA +AAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATG +AATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAA +AGTGCAGTGAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCAC +AATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTCAATGGTCTTTGT +TCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATT +GCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCT +CTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGG +TCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATG +GTTGATACTAGTTTGAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGT +AGTGTTACTAATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTA +GGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTAT +TATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTC +TGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCA +GAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACT +GGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTT +TTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGAC +TGACTCTTGGTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATAT +ATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAA +ACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAG +CCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTA +CTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGC +TCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTG +AAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAG +GGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGC +AACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAG +CTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGAT +TCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATC +TGAATTTGACCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTG +ATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTGAGGACAAGAGG +GCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAA +GTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTT +GTGTTCCCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTT +GTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATT +TACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATA +GTAAAATTGTTCAACTTAGTGAAATTAGTATGGACAATTCACCTAATTTA +GCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATT +ACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTG +CCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTAC +AACACAACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACA +GGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATTT +ATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGT +CCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAG +AGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTA +ATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTT +GCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACA +ACCAATCACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTC +AGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGGT +GGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCC +TAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTT +GTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTC +TGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACC +CATGCTTCAGTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGT +AAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAGTACTGATGTCG +TATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAA +TTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAA +TTTAATTGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACC +AACATGAAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCT +AAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATAT +ATCACGTCAACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTT +TAAGGCATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTC +ACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGA +TTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAAC +GTGTACGCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGA +AATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGG +TAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAG +TTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTG +ACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCC +TTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAA +AACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAAT +TGTGTTAACTGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAA +TGTTTTATTCTCTACAGTGTTCCCACTTACAAGTTTTGGACCACTAGTGA +GAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCAC +TTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTC +TAGACTTAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGC +ACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCA +GTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAA +TTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGGTTTCTTTAAGG +AAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAAT +GCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTG +TGATATCAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTG +ATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAAC +CTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACT +TTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATA +CAAAACGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCC +ATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAG +TACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCG +CCACTAGAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGT +TGGCACAACATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCT +TATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTTA +GAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGC +TTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAG +TGAAATGGTCATGTGTGGCAGTTCACTATATGTTAAACCAGGTGGAACCT +CATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGT +CAAGCTGTCACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAA +AATTGCCGATAAGTATGTCCGCAATTTACAACACAGACTTTATGAGTGTC +TCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCA +TATTTGCGTAAACATTTCTCAATGATGATACTTTCTGACGATGCTGTTGT +GTGTTTTAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAGA +ACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCA +AAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTC +TCAACATACAATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTT +ACCCAGACCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATC +GTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTAT +AGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCT +TTCATTTGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGA +CACATGTTAGACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAG +GTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCT +TACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGA +TGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGA +CCATGTCATACCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATG +TTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTA +GGAGGTATGAGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCC +ATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAATACATGTGTTG +GTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACA +AATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCT +TTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTT +ATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTT +TCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTT +TACTGGTTATCGTGTAACTAAAAACAGTAAAGTACAAATAGGAGAGTACA +CCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACA +ACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGT +AATGCCATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAA +TTACTGGCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAAT +GTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGG +ACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACT +ACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGAT +GCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAG +AATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGA +ATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAG +ACGACAGCAGATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTA +TGATTTGAGTGTTGTCAATGCCAGATTATGTGCTAAGCACTATGTGTACA +TTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGC +ACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTAT +AGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTG +TTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAA +GACAAATCAGCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCA +TGATGTTTCATCTGCAATTAACAGGCCACAAATAGGCGTGGTAAGAGAAT +TCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTAT +AATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAAC +TGTTGATTCATCACAGGGCTCAGAATACGACTATGTCATATTCACTCAAA +CCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATT +ACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTA +TGACAAGTTGCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAA +CTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTA +ATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACAC +TAAATTCAAAACTGAAGGTTTATGTGTTGACGTACCTGGCATACCTAAGG +ACATGACCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTAT +CAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAG +ACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTA +GAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGT +GTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATAC +AGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAAC +ACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATA +AAGATTGTACAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGT +CGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATCTATGAAGTATT +TTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCC +ACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTAT +TGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGG +GTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCAT +GGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGC +TGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGGACTATTGAATATCCTA +TAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACAC +ATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGA +CATTGGTAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAAT +GGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAA +GAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGT +ATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTG +TTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGT +GATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCACACACCGGCTTT +TGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACT +CTGACAGTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGAT +TATGTACCACTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGG +TGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCTT +ATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTT +GATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAA +TGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTG +AAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGT +GTTGATGTAGAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGC +ATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGTACCAGAGGTGAAAA +TACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGAC +TACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTAT +GACTGACATAGCCAAGAAACCAATTGAAACGATTTGTGCACCACTCACTG +TCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCC +CGTAATGGTGTTCTTATTACAGAGGGTAGTGTTAAAGGTTTACAACCATC +TGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAG +CCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAA +CAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAA +ACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAAT +TCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTAT +GGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACT +AGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTA +TGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCA +TCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGA +AATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGA +CTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCAT +GTAGAAACATTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGG +TGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCTATTAGAAAAGT +GTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATG +ATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAAC +ATTAGCTGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTG +ATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACG +GGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGA +TTCAACTTTGATTGGTGATTGTGCAACTGTACATACAGCTAATAAATGGG +ATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAA +GAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACA +ACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATT +CTTGGAATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACA +GCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGG +ATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGC +ATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCC +TATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGC +TGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTC +TTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCT +AGTGATGTTCTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTT +TATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTATAACCAGAACTCAA +TCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTT +CAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTT +CCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACT +AAGAGGTTTGATAACCCTGCCCTACCATTTAATGATGGTGTTTATTTTGC +TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTT +TAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTT +GTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGATGT +TTACCAAAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATT +CTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATG +GACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTT +TAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTA +ATTTAGAGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTA +GATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTT +ACATAGAAGTTATTTGACTCCTGTTGATTCTTCTTCAGGTTGGACAGCTG +GTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTA +AAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGA +CCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAG +GAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTT +AGATTTCCTAATATTACAAACTTGTGCCCTTTTCATGAAGTTTTTAACGC +CACCACATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACT +GTGTTGCTGATTATTCTGTCATATATAATTTCGCACCATTTTTCGCTTTT +AAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAA +TGTCTATGCAGATTCATTTGTAATTAGAGGTAATGAAGTCAGCCAAATCG +CTCCAGGGCAAACTGGAAATATTGCTGATTATAATTATAAATTACCAGAT +GATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAGCTTGATTCTAA +GCCTAGTGGTAATTATAATTACCTGTATAGATTTCTTAGGAAGTCTAAGC +TCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAAC +AAACCTTGTAATGGTGTTGCAGGTCCTAATTGTTACTCTCCTTTACAATC +ATATGGTTTCCGACCCACTTATGGTGTTGGTCACCAACCATACAGAGTAG +TAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCT +AAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAA +TGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGC +CTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGT +GATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGG +TGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTC +TTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGAT +CAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCA +AACACGTGCAGGCTGTTTAATAGGGGCTGAATATGTCAACAACTCATATG +AGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAG +ACTAAGTCTCATCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGC +CTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACT +CTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTA +CCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGG +TGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTA +CACAATTAAAACGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAAC +ACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAAT +TAAATATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAA +AACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACA +CTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATAT +TGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTT +TGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTG +TTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATT +ACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAG +TTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTT +AATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGC +ACTTGGAAAACTTCAAGATGTGGTCAACCATAATGCACAAGCTTTAAACA +CGCTTGTTAAACAACTTAGCTCCAAATTTGGTGCAATTTCAAGTGTTTTA +AATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGA +TAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAAC +AATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACT +AAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGG +AAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAG +TCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACT +GCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGT +CTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATG +AACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGAT +GTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGA +ATTAGATTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACAT +CACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTA +AACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAA +TGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAA +AATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATA +GTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCT +CAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACT +CTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTA +TGGATTTGTTTATGAGAATCTTTACAATTGGAACTGTAACTTTGAAGCAA +GGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAAC +GATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTG +CACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACTCTCAAAAAG +AGATGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCT +GTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCC +TTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGT +ATAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCG +TTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATA +CTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATT +GTCATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTA +CCAGATTGGTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTG +TTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACT +CAATTGAGTACAGACATTGGTGTTGAACATGTTACCTTCTTCATCTACAA +TAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACG +GTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCG +ACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACT +TATGTACTCATTCGTTTCGGAAGAGATAGGTGCGTTAATAGTTAATAGCG +TACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATC +CTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAG +TCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATT +CTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAG +TTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACT +ATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAAGAATGGAACCTAGTAAT +AGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCA +ACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTA +TGGCCAGTAACTTTAACTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAA +TTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGA +TGTGGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGT +TCCATGTGGTCATTTAATCCAGAAACTAACATTCTTCTCAACGTGCCACT +CCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCG +GAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGA +CGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACG +AACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACT +CAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAAC +ACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGT +GACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATA +TTACTAATTATTATGCGGACTTTTAAAGTTTCCATTTGGAATCTTGATTA +CATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATA +AATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTCTCTAAACGAAC +ATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCT +TTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAAC +CTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCT +GATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTG +TCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCAC +CTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATT +TTTCTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAA +AAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTT +TTTAGCCTTTCTGTTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGT +TCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAAC +ATGAAATTTCTTGTTTTCTTATGAATCATCACAACTGTAGCTGCATTTCA +CCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTG +ATGACTCGTGTTCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGA +GCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTC +TAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTT +TACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTG +CGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGT +TTTAGATTTCATCTAAACGAACAAACTTAAATGTCTGATAATGGACCCCA +AAATCAGCGAAATGCACTCCGCATTACGTTTGGTGGACCCTCAGATTCAA +CTGGCAGTAACCAGAATGGTGGGGCGCGATCAAAACAACGTCGGCCCCAA +GGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGG +CAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCA +ATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGA +ATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTT +CTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACA +AAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAA +GATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACA +ACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCA +GAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGT +TCAAGAAATTCAACTCCAGGCAGCAGTAAACGAACTTCTCCTGCTAGAAT +GGCTGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGAT +TGAACCAGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGC +CAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCA +AAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGAC +GTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGA +CAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAG +CGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTT +CGGGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGAT +CCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATA +CAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTG +ATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACT +CTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATC +CATGAGCCGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAA +GGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGT +CTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGT +AGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGG +GAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACGA +TCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCC +CTAATGTGTAAAATTAATTTTAGTAGTGCTATCC +>USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09 +TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAGAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAA +CACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG +GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGC +TACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTAT +ACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCT +GAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATT +CCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGC +TTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAA +CTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGC +ACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCA +AAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACC +ATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCG +CTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT +CTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAA +TATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCAT +CTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAA +TACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGA +TCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTT +ACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGA +GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTA +GTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTG +ATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGT +TGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGG +ACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCT +TTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATT +GGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAA +ACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGA +TTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGG +TTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGAC +ACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACA +CTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGT +TGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTG +TCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATT +GATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATG +AGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACT +CAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT +GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAG +GACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTA +GTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATT +GTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAA +TGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGA +CCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTC +AACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTT +GCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTT +AAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTG +ATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATG +ATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA +ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAA +TCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCT +TAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTT +TTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTT +ACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTG +CTTAAAAAGTGTAAAAGTGCTTTTTACATTCTACCATCTATTATCTCTAA +TGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC +TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACT +AAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTT +GTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGC +TGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTT +CACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAA +ACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTA +AGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTT +GAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACC +TCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGT +CCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTG +TTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG +TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTG +CATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTA +CAAGATGCTTATTATAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTA +GAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAA +AGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCT +TAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAA +TATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTG +GTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTG +TATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCC +TATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAAC +CAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTA +AGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACT +GGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGA +CTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTT +TTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAAC +AATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGA +AGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA +CCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCT +TGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAAC +CAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTA +ATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTT +CTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAA +CCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAAT +TGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCG +ACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATA +TTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTA +CTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTG +CAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTA +GATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTT +TGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCA +ATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAG +CTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAA +AGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATG +GTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGC +AAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAG +TACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACA +GTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAG +CTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGT +AAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAG +TCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTG +ATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATG +TCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGAT +GTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGA +CATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACA +AAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGT +GCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAA +TACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCA +ACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGG +TGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTG +TGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTC +ATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA +ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTAT +ACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGT +GGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATG +GTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATC +TGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGC +CAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT +GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTAT +TCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTT +TTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGT +GTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTA +ATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCA +TCTATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTGCCTA +CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTG +CCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACA +CCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGA +TGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATC +ATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAA +GAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG +CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGT +GATGTGCTATTACCTTTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAG +CTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT +TCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTT +GCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTT +GTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT +GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATT +TCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATG +CAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACC +TAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAG +CTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACAC +AATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGG +AATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTAT +GGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAAC +TATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAG +ACAGGTGGTTTCTCAATCGACTTACCACAACTCTTAATGACTTTAACCTT +GTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG +CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTG +GGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACA +ATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTA +CACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTC +CAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTG +TCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC +ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCG +TATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAAGACT +GTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGA +ACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTT +GACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTT +CCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTT +ACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTA +TTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAG +TTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGT +TTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGT +TTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCA +AGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGT +GGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGT +AAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAG +AATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATT +CTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACT +TTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTG +AAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTT +AGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGA +GCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGA +AGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAA +CGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACA +GGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAA +TGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATT +ATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTAC +AACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAA +ATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATC +CAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAG +TATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAA +GGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCA +CTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGA +TGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTAC +TTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAG +AGTGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTT +TGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTA +AAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCC +ACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAAC +TGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAG +ATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTG +TGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAA +TATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTT +GCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAG +TATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACT +TAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTA +GTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCG +TTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGC +ACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGA +TAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCC +AAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAG +AGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACT +TAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAG +ACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACA +ATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGA +CACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATT +TCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGC +GTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGT +ACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACAT +TAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATA +CAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATT +GTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATG +TTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATAT +GACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTG +GGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCA +TTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTT +ACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATT +TGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATC +AGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTG +TATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGA +TAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTT +TTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCT +GTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTT +CTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATC +GTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTT +GAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGC +TAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTA +ATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGAT +CAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAAC +TCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCG +TAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAA +AAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGG +AACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATA +GTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGAT +AGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCG +CAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTA +ATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCAGTTCACTA +TATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGC +TAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCAC +TTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTA +CAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGA +CTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGA +TACTTTCTGACGATGCTGTTGTGTGTTTTAATAGCACTTATGCATCTCAA +GGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAATTCTTTATTATCAAAA +CAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTA +AAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGT +GATGATTATGTGTACCTTCCTTACCCAGACCCATCAAGAATCCTAGGGGC +CGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTG +AACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCT +AATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAA +GCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGC +TTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCT +ATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTG +CAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCT +TATGTTGTAAATGCTGTTACGACCATGTCATACCAACATCACATAAATTA +GTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCAC +AGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCAC +ATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGT +TTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGC +AATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACA +CCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACT +GAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCT +GTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCAC +CACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGT +AAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGC +TGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATT +TTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTG +CCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATAT +CTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGC +AAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTT +GCTATTGGCCTAGCCCTCTACTACCCTTCTGCTCGCATAGTGTATACAGC +TTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATT +TGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAG +TGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTG +TACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATG +AAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTA +TGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACC +ACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAG +TGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGT +CGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA +TAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGT +TTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCA +CAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAA +AGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGA +TTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATAC +GACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGT +AAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCA +TAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAA +ATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACT +CTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCAC +CTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTT +GACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTAT +GATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTA +TCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGAT +GTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTT +ACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTT +ATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCA +CCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTATAAAGGACT +TCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACAC +TTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTT +GAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTG +TTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTT +ATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTT +ATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCA +TGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATG +CAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTT +GACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGC +GGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAG +ACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGT +GTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAG +TGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATT +CTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGAT +AGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATC +TAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAAC +ATGCATTCCACACACCGGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAA +CAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAA +ACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTA +TAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAG +TACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAG +CTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTA +CAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGA +CACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACAC +TGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAA +CAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATT +AAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGC +TGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATAT +CTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAATTGAA +ACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCA +AGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTA +GTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAAT +GGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAA +GAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGA +GTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTC +TTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTA +TGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTG +GTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTT +GAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCAT +AACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATT +TATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTA +GTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTAT +GCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAAT +CTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATG +CAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGC +AACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGT +GTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTT +ATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGT +TTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTA +ATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACT +GTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCC +TAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCA +CTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTG +GCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCAT +GGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCAT +CATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAA +CAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATAC +AAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTC +CCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATC +AATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGA +AAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGA +ACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGT +TAATCTTATAACCAGAACTCAATCATACACTAATTCTTTCACACGTGGTG +TTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAG +GACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACA +TGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGCCCTACCAT +TTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGA +GGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTAT +TGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTT +GTAATGATCCATTTTTGGATGTTTACCAAAAAAACAACAAAAGTTGGATG +GAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATA +TGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAAGAGGGTAATTTCA +AAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATA +TATTCTAAGCACACGCCTATTAATTTAGAGCGTGATCTCCCTCAGGGTTT +TTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTA +GGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGTTGAT +TCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCT +TCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAG +ATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTG +AAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGT +CCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCC +CTTTTCATGAAGTTTTTAACGCCACCACATTTGCATCTGTTTATGCTTGG +AACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCATATATAA +TTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAAT +TAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGA +GGTAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGA +TTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGA +ATTCTAACAAGCTTGATTCTAAGCCTAGTGGTAATTATAATTACCTGTAT +AGATTGTTTAGGAAGTCTAAGCTCAAACCTTTTGAGAGAGATATTTCAAC +TGAAATCTATCAGGCCGGTAACAGACCTTGTAATGGTGTTGCAGGTCCTA +ATTGTTACTCTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTT +GGTCACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGC +AACAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACA +AATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACT +GAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGATATTGC +TGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACA +TTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAAT +ACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGT +CCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATT +CTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCT +GAATATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTAT +ATGCGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTG +TAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAAT +TCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTAT +TAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAG +ATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTG +TTGCAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAAT +AGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAAC +AAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCA +CAAATATTACCAGATCCATCAAAACCAAACAAGAGGTCATTTATTGAAGA +TCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAAT +ATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAA +AAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGAT +TGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGA +CCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCT +TATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCA +AAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACT +CACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAAC +CATAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATT +TGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAG +TTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGT +TTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGC +TTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAAT +CAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCT +CAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGC +ACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAG +CACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTT +GTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACAC +ATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAG +TTTATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGAT +AAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTC +TGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCA +ATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTT +GGAAAGTATGAGCAGTACATAAAATGGCCATGGTACATTTGGCTAGGTTT +TATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTA +TGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGC +TGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATT +ACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTTACAAT +TGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAG +ATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTC +GGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTC +CAAAATCATAACTCTCAAAAAGAGATGGCAAATAGCACTCTCCAAGGGTG +TTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACAC +CTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGC +TTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGC +TTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCC +AACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTA +CAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAA +GTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGG +GAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGA +CTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAAC +ATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACAT +GTCCAAATTCACACAATTGACGGTTCATCCGGAGTTGTTAATCCAGTAAT +GGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAG +CACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATA +GGTGCGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATT +CTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACT +GCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTAC +TCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTA +AACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGC +CATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCC +TTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGT +CTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAA +GTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGC +TTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCA +ATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTT +CAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTTAATCCAGAAACTA +ACATTCTTCTTAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTT +CTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCG +TATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAG +AAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCT +TCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAG +GATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATA +TTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTT +TCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAG +TTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTA +TCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACC +AATGGAGATTCTCTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGA +TAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGT +ACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAA +TTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTA +GCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAG +TTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGT +TCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTA +TAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCAT +TAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTA +ATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGA +AACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTATGAATCA +TCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACT +CAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTC +TAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAAT +TGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCTGTACATCGATATC +GGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACC +TAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAG +AGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTT +AAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACTCCGCATTACG +TTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGCG +ATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGT +TCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGA +CAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTA +CTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAG +ATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT +GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGA +GGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTA +ACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAA +GGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTC +CTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTA +AACGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTT +GCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG +TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTG +AGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAAT +GTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTT +TGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGC +AAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGC +ATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGC +CATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC +TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAA +AAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACA +GAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATT +TCTCCAAACAATTGCAACAATCCATGAGCCGTGCTGACTCAACTCAGGCC +TAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGC +TTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTA +ACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT +TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTC +ACCTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAA +TGTGTAAAATTAATTTTAGTA +>USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27 +AAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGT +ATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTA +TCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGC +ACATCTAGGTTTTGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTG +TCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTA +CAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGT +CTTATCAGAGGCACGTCAACATCTTAGAGATGGCACTTGTGGCTTAGTAG +AAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATC +AAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCT +GGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTG +GTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTT +CTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGGTACGGCGC +CGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATG +AAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGT +GAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAA +CAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTC +TAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTT +ATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAAT +TGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTT +TTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCA +AATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAGGGGT +TGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATC +CAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATG +AAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAA +AGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCA +CTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCA +GCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCA +TAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTG +CCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCC +TATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGT +TGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCC +AAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAA +GAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGT +GGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAAT +CCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGG +AATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATC +AGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTG +CTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATATTAGAT +GGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATC +TGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTG +TTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTAT +GAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGCGAAGTTTAAGGAAGG +TGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCT +GTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATT +AAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTT +GTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATT +TAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTT +AAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGA +AATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGG +AAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGT +GAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCT +TATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTA +ATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACA +AAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAG +TGTGAATATCATTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATG +AGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTC +GCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGA +ATTACTTACACCACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACAT +ACTACTTATTTGATGAGTCTGGTGAGTTTAAATTGGCTTCACATATGTAT +TGTTCTTTTTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGA +AGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATT +ACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCT +GAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGT +TGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAA +TTGTTGAGGTTCAACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAG +ACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGACAATGT +ATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAA +CAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTT +GCAGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGA +TGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTT +TAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAAT +GTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTT +TAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTG +GTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACA +AATGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTC +AAGCTTTTTGGAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTG +AGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTAAACCTTCA +GTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGA +AGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTAC +TTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTT +AGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGG +TGATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAA +AGGCTAGTGGCACTACTGAAATGCTAGCGAAAGCTTTGAGAAAAGTGCCA +ACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACAC +TGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCTTTTTACA +TTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTT +TCTTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATT +AATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTA +AATATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGA +TTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAACACACT +TAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACAC +ATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTG +CCAGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGG +TTATCTTACTTCTTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCA +TCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACA +CAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACAC +TAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACA +ATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTT +ACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAAT +GACATATGGACAACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTA +CTAAAATAAAACCTCATAATTCACATGAAGGTAAAACATTTTATGTTTTA +CCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAAC +TGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAA +AGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGAT +AACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTT +GAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTG +GTGAAGCTGCTAACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACA +GTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTTTCAACA +TGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTT +GTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATG +GGCACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTAC +GTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTG +TTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTT +ACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTATAAACA +TATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAA +AGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAAC +AGTTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGT +TTGTACAGAAATTGACCCTAAGTTGGACAATTATTATAAGAAAGACAATT +CTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAACCATATCCA +AACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGC +TGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGC +TTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGAT +TATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAA +ACCTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAAC +CAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACA +TCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGATAA +TCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATC +CTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTT +GTAGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGA +AGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTC +TTACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACC +CTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGATACTAT +AGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTA +ACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTAT +TTCTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTC +TAGAATTAAAGCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGA +GTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCT +AATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGT +TTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGT +CTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTG +AACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTG +TAGTGTTTGTCTTAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAG +AAACTATACAAATTACCATTTCATCTTTTAAATGGGATTTAACTGCTTTT +GGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTT +CTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTG +CAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTT +GTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGC +ATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTA +ATTCATCAACTTGTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTC +GAATGTACAACTATTGTTAATGGTGTTAGAAGGTCTTTTTATGTCTATGC +TAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATT +GTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGA +GACTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTC +TTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACT +TTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTT +GTTAACTTAGACAACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTAT +TAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAA +AATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTA +CTAGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGT +TAAAATGTTTGATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTAC +CAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTGAACTTGCA +AAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCG +GCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTC +TTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAAT +AACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCT +TGGTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAA +AAAGTCACAACATTGCTTTGATATGGAACGTTAAAGATTTCATGTCATTG +TCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTT +ACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAA +CAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAG +CAGTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTA +TTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTG +AAATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCA +TCTACAGACACTTGTTTTGCTAACAAACATGCTGATTTTGACACATGGTT +TAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTG +CTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGC +ACGATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGT +TTTTAGTGCAGTTGGTAACATCTGTTACACACCATCAAAACTTATAGAGT +ACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATT +TTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGT +ACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATG +TGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGT +TCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGACACGGCAC +TTGTGAAAGATCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGG +TACTTAACAATGATTATTACAGATCTTTACCAGGAGTTTTCTGTGGTGTA +GATGCTGTAAATTTATTTACTAATATGTTTACACCACTAATTCAACCTAT +TGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTGGCTA +TCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTT +GGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTC +ATTCATTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTT +ATTCTGTTATTTACTTGTACTTGACATTTTATCTTACTAATGATGTTTCT +TTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTT +CTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATT +GGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCC +TTTAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGA +AATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTTTTACGCAATATA +ATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATG +GATACAACTAGCTACAGAGAAGCCGCTTGTTGTCATCTCGCAAAGGCTCT +CAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAA +TCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTC +CCATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAAC +TACACTTAACGGTCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATG +TGATCTGCACCTCTGAAGATATGCTTAACCCTAATTATGAAGATTTACTC +ATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACT +CAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTG +ATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCA +GGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGT +TTACCAATGTGCTATGAGACACAATTTCACTATTAAGGGTTCATTCCTTA +ATGGTTCATGTGGTAGTGTTGGTTTTAACATAGATTATGACTGTGTCTCT +TTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGCTGGCAC +AGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCAC +AAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTG +TACGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCAC +AACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTC +TAACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGA +ATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCAAAATGG +TATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTA +CACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCA +GTGAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTT +GACTTCACTTTTAGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTT +TTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATG +TCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTT +GTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATA +TGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGAT +ACTAGTTTGAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTT +ACTAATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAG +TGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGT +AATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTAC +TTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTA +TTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAAT +ACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTAC +TTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTC +TTGGTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAAT +TCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAA +CATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTG +TACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCA +GTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATG +TGTCCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCT +TTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCT +GTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTT +ACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTG +CTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAA +GTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATT +TGACCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAG +CTATGACCCAAATGTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAA +GTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGA +TAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTC +CCTTGAACATAATACCTCTTACAACAGCATCCAAACTAATGGTTGTCATA +CCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTA +TGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAA +TTGTTCAACTTAGTGAAATTAGTATGGACAATTCACCTAATTTAGCATGG +CCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAA +TAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTA +CTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACA +ACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTT +GAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATTTATACAG +AACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAA +GTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAGAGGTAT +GGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAA +CAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTA +GATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAAT +CACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAA +TAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCA +TCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGG +ATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTA +ATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGT +ATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCT +TCAGTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGC +AGCCCGTCTTACACCGTGCGGCACAGGCACTAGTACTGATGTCGTATACA +GGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTA +AAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAAT +TGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATG +AAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACAT +GACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACG +TCAACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGC +ATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTCACATAC +AATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGT +AGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTAC +GCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCT +GGTATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTG +GTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTG +TTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGG +GCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACAT +TAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCT +TTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTT +AACTGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTT +ATTCTCTACAGTGTTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAA +TATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGA +GAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACT +TAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTG +CTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCT +GCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAA +CAAAGACTTCTATGACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAA +GTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCT +ATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATAT +CAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTT +ACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGAC +AAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTA +TGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAAC +GTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCCATTAGT +GCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTAT +GACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTA +GAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCAC +AACATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGG +TTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTTAGAATTA +TGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCA +CACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAAT +GGTCATGTGTGGCAGTTCACTATATGTTAAACCAGGTGGAACCTCATCAG +GAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCT +GTCACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGC +CGATAAGTATGTCCGCAATTTACAACACAGACTTTATGAGTGTCTCTATA +GAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCATATTTG +CGTAAACATTTCTCAATGATGATACTTTCTGACGATGCTGTTGTGTGTTT +TAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTA +AGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGT +TGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACA +TACAATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAG +ACCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAA +ACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGC +TTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATT +TGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATG +TTAGACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTG +GGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGG +CTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGT +GCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGT +CATACCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCA +ATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGT +ATGAGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCCATTGTG +TGCTAATGGACAAGTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCG +ATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCT +GGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGC +AGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTA +TTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGG +GAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGG +TTATCGTGTAACTAAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTG +AAAAAGGTGACTATGGTGGTGCTGTTGTTTACCGAGGTACAACAACTTAC +AAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCC +ATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTG +GCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCA +AATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACC +TGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCCCTCTACTACCCTT +CTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTA +TGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTAT +ACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAA +CATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACA +GCAGATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTT +GAGTGTTGTCAATGCCAGATTATGTGCTAAGCACTATGTGTACATTGGCG +ACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTA +GAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCC +AGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACA +CTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAA +TCAGCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGT +TTCATCTGCAATTAACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTA +CACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCA +CAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGA +TTCATCACAGGGCTCAGAATACGACTATGTCATATTCACTCAAACCACTG +AAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGA +GCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAA +GTTGCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTAC +AAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACT +GGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATT +CAAAACTGAAGGTTTATGTGTTGACGTACCTGGCATACCTAAGGACATGA +CCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTT +AATGGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGT +ACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAG +CTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAAC +CTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTT +TTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCA +TACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATT +GTACAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATT +TGTCTTATGGGCACATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGA +AAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGC +TTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATT +TGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTA +CAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAAT +GCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCA +CGAGTGCTTTGTTAAGCGTGTTGACTGGACTATTGAATATCCTATAATTG +GTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTT +GTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGG +TAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGT +TCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTA +TTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCT +ATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTA +GATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGT +GGCAGTTTGTATGTAAATAAACATGCATTCCACACACCGGCTTTTGATAA +AAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACA +GTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTA +CCACTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGT +CTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCTTATAACA +TGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACT +TATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGC +TTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTAC +CAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGAT +GTAGAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGA +GCTTTGGGCTAAGCGCAACATTAAACCAGTACCAGAGGTGAAAATACTCA +ATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGACTACAAA +AGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGA +CATAGCCAAGAAACCAATTGAAACGATTTGTGCACCACTCACTGTCTTTT +TTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAAT +GGTGTTCTTATTACAGAGGGTAGTGTTAAAGGTTTACAACCATCTGTAGG +TCCCAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAA +AAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTA +CCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAG +GAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTG +AACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGAT +TTTAGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAA +ACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACA +GTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAG +TGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAAT +AAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTG +ACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAA +ACATTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGC +TATGCCTAATCTTTACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACC +TTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAAT +GTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGC +TGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAG +GAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACG +CTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAAC +TTTGATTGGTGATTGTGCAACTGTACATACAGCTAATAAATGGGATCTCA +TTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAAGAAAAT +GACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAA +GCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGA +ATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTT +GTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAA +TTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAA +ATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCT +TTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTAT +GTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTA +AAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGAT +GTTCTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGC +CACTAGTCTCTAGTCAGTGTGTTAATCTTATAACCAGAACTCAATCATAC +ACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATC +CTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATG +TTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGG +TTTGATAACCCTGCCCTACCATTTAATGATGGTGTTTATTTTGCTTCCAC +TGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATT +CGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATT +AAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGATGTTTACAA +AAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTG +CGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTT +GAAGGAAAAGAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAA +TATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAG +AGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTG +CCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAG +AAGTTATTTGACTCCTGTTGATTCTTCTTCAGGTTGGACAGCTGGTGCTG +CAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATAT +AATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCT +CTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCT +ATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTT +CCTAATATTACAAACTTGTGCCCTTTTCATGAAGTTTTTAACGCCACCAC +ATTTGCATCTGTTTATGCTTGGAATAGGAAGAGAATCAGCAACTGTGTTG +CTGATTATTCTGTCATATATAATTTCGCACCATTTTTCGCTTTTAAGTGT +TATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTA +TGCAGATTCATTTGTAATTAGAGGTAATGAAGTCAGCCAAATCGCTCCAG +GGCAAACTGGAAATATTGCTGATTATAATTATAAATTACCAGATGATTTT +ACAGGCTGCGTTATAGCTTGGAATTCTAACAAGCTTGATTCTAAGCCTAG +TGGTAATTATAATTACCTGTATAGATTTCTTAGGAAGTCTAAGCTCAAAC +CTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAACAAACCT +TGTAATGGTGTTGCAGGTCCTAATTGTTACTCTCCTTTACAATCATATGG +TTTCCGACCCACTTATGGTGTTGGTCACCAACCATACAGAGTAGTAGTAC +TTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAG +TCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTT +AACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCC +AACAATTTGGCAGAGACATTGCTGACACTATTGATGCTGTCCGTGATCCA +CAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAG +TGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATC +AGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTT +ACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACG +TGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTG +ACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAAG +TCTCATCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACAC +TATGTCACTTGGTGCAGAAAATTTAGTTGCTTACTCTAATAACTCTATTG +CCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTG +TCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTC +AACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAAT +TAAAACGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAA +GAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAATA +TTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAA +GCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCA +GATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGC +TAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCAC +CTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCG +GGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAAT +ACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACAC +AGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGT +GCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGG +AAAACTTCAAGATGTGGTCAACCATAATGCACAAGCTTTAAACACGCTTG +TTAAACAACTTAGCTCCAAATTTGGTGCAATTTCAAGTGTTTTAAATGAT +ATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTT +GATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAA +TTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATG +TCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGG +CTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCT +TGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCT +GCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGT +TTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCAC +AAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTA +ATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGA +TTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAG +ATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATT +CAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATC +TCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGC +CATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATG +GTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGG +CTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGC +CAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATT +TGTTTATGAGAATCTTTACAATTGGAACTGTAACTTTGAAGCAAGGTGAA +ATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACC +GATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTC +TTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACTCTCAAAAAGAGATGG +CAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTT +GTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAG +CCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTANNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAACTCAATTG +AGTACAAACATTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAAT +TGTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCAT +CCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGACG +ACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTA +CTCATTCGTTTCGGAAGAGATAGGTGCGTTAATAGTTAATAGCGTACTTC +TTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACT +GCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGT +AAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTA +GAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTC +TGTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACC +GTTGAAGAGCTTAAAAAGCTCCTTGAAGAATGGAACCTAGTAATAGGTTT +CCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGA +ATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCA +GTAACTTTAACTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGAT +CACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGC +TCAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATG +TGGTCATTTAATCCAGAAACTAACATTCTTCTCAACGTGCCACTTCATGG +CACTATTTTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTG +TGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGT +GACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCT +TTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTT +TTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGAC +CATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAAC +AGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTA +ATTATTATGCGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCAT +AAACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATT +CTCAATTAGATGAAGAGCAACCAATGGAGATTCTCTAAACGAACATGAAA +ATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCA +CTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCT +CTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAAC +AAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGA +CGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAAC +TGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTT +ATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTTAAAAGAAA +GACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGC +CTTTCTGTTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCAC +TTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAA +TTTCTTGTTTTCTTATGAATCATCACAACTGTAGCTGCATTTCACCAAGA +ATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACC +CGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGA +AAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATC +ACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTT +TTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGT +TCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGA +TTTCATCTAAACGAACAAACTTAAATGTCTGATAATGGACTCCAAAACCA +GCGAAATGCACTCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCA +GTAACCAGAATGGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTA +CCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGA +AGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCA +GTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGT +GGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTA +CCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGATG +GCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCAC +ATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCC +TCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCG +GCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGA +AATTCAACTCCAGGCAGCAGTAAACGAACTTCTCCTGCTAGAATGGCTGG +CAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACC +AGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACT +GTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACG +TACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTC +CAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGA +ACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTC +AGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAA +CGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAAT +TTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAAC +ATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTGATGAAA +CTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTT +CCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAG +CCGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGA +TGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTC +TTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAA +CTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGAC +TTGAAAGAGCCACCACATTTTCACCTACAGTGAATAATGCTAGGGAGAGC +TGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTAT +CCCCATGTGATTTTAAT +>England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAGAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAA +CACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG +GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGC +TACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTAT +ACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCT +GAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATT +CCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGC +TTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAA +CTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGC +ACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCA +AAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACC +ATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGTG +CTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT +CTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAA +TATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCAT +CTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAA +TACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGA +TCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTT +ACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGA +GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTA +GTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTG +ATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGT +TGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGG +ACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCT +TTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATT +GGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAA +ACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGA +TTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGG +TTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGAC +ACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACA +CTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGT +TGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTG +TCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATT +GATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATG +AGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACT +CAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT +GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAG +GACAATCATACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTA +GTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATT +GTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAA +TGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGA +CCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTC +AACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTT +GCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTT +AAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTG +ATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATG +ATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA +ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAA +TCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCT +TAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTT +TTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGATAATTATATAACCACTT +ACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTG +CTTAAAAAGTGTAAAAGTGCTTTTTACATTCTACCATCTATTATCTCTAA +TGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC +TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACT +AAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTT +GTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGC +TGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTT +CACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAA +ACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTA +AGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTT +GAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACC +TCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGT +CCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTG +TTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG +TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTG +CATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTA +CAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTA +GAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAA +AGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCT +TAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAA +TATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTG +GTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTG +TATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCC +TATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAAC +CAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTA +AGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACT +GGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGA +CTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTT +TTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAAC +AATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGA +AGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA +CCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCT +TGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAAC +CAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTAGCCACACAGATCTA +ATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTT +CTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAA +CCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAAT +TGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCG +ACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATA +TTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTA +CTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTG +CAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTA +GATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTT +TGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCA +ATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAG +CTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAA +AGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATG +GTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGC +AAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAG +TACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACA +GTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTGTCTCATTTTGTTAACTTAGACAACCTGAGAG +CTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGT +AAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAG +TCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTG +ATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATG +TCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGAT +GTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGA +CATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACA +AAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGT +GCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAA +TACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCA +ACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGG +TGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTG +TGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTC +ATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA +ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTAT +ACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGT +GGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATG +GTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATC +TGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGC +CAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT +GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTAT +TCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTT +TTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGT +GTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTA +ATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCA +TCTATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTGCCTA +CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTG +CCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACA +CCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGA +TGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATC +ATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAA +GAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG +CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGT +GATGTGCTATTACCTTTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAG +CTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT +TCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTT +GCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTT +GTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT +GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATT +TCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATG +CAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACC +TAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAG +CTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACAC +AATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGG +AATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTAT +GGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAAC +TATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAG +ACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTT +GTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG +CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTG +GGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACA +ATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTA +CACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTC +CAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTG +TCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC +ACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCG +TATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAAGACT +GTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGA +ACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTT +GACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTT +CCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTT +ACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTA +TTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAG +TTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGT +TTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGT +TTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCA +AGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGT +GGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGT +AAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAG +AATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATT +CTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACT +TTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTG +AAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTT +AGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGA +GCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGA +AGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAA +CGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACA +GGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAA +TGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATT +ATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTAC +AACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAA +ATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATC +CAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAG +TATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAA +GGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCA +CTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGA +TGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTAC +TTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAG +AGTGATGGAACTGGTACTGTTTATACAGAACTGGAACCACCTTGTAGGTT +TGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTA +AAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCC +ACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAAC +TGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAG +ATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTG +TGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAA +TATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTT +GCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAG +TATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACT +TAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTA +GTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCG +TTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGC +ACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGA +TAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCC +AAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAG +AGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACT +TAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAG +ACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACA +ATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGA +CACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATT +TCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGC +GTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGT +ACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACAT +TAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATA +CAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATT +GTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATG +TTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATAT +GACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTG +GGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCA +TTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTT +ACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATT +TGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATC +AGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTG +TATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGA +TAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTT +TTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCT +GTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTT +CTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTATTATC +GTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTT +GAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGC +TAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTA +ATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGAT +CAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAAC +TCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCG +TAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAA +AAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGG +AACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATA +GTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGAT +AGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCG +CAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTA +ATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCAGTTCACTA +TATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGC +TAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCAC +TTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTA +CAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGA +CTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGA +TACTTTCTGACGATGCTGTTGTGTGTTTTAATAGCACTTATGCATCTCAA +GGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAA +CAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTA +AAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGT +GATGATTATGTGTACCTTCCTTACCCAGACCCATCAAGAATCCTAGGGGC +CGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTG +AACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCT +AATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAA +GCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGC +TTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCT +ATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTG +CAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCT +TATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTA +GTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCAC +AGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCAC +ATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGT +TTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGC +AATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACA +CCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACT +GAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCT +GTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCAC +CACTTAACCGAAATTATGTCTTTACTGGTTATCGTATAACTAAAAACAGT +AAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGC +TGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATT +TTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTG +CCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATAT +CTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGC +AAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTT +GCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGC +TTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATT +TGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAG +TGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTG +TACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATG +AAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTA +TGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACC +ACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAG +TGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGT +CGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA +TAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGT +TTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCA +CAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAA +AGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGA +TTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATAC +GACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGT +AAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCA +TAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAA +ATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACT +CTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCAC +CTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTT +GACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTAT +GATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTA +TCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGAT +GTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTT +ACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTT +ATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCA +CCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACT +TCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACAC +TTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTT +GAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTG +TTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTT +ATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTT +ATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCA +TGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATG +CAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTT +GACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGC +GGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAG +ACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGT +GTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAG +TGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATT +CTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGAT +AGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATC +TAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAAC +ATGCATTCCACACACCGGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAA +CAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAA +ACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTA +TAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAG +TACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAG +CTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTA +CAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGA +CACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACAC +TGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAA +CAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATT +AAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGC +TGCTAATACTGTGATCTGGGACTATAAAAGAGATGCTCCAGCACATATAT +CTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAATTGAA +ACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCA +AGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTA +GTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAAT +GGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAA +GAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGA +GTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTC +TTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTA +TGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTG +GTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTT +GAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCAT +AACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATT +TATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTA +GTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTAT +GCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAAT +CTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATG +CAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGC +AACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGT +GTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTT +ATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGT +TTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTA +ATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACT +GTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCC +TAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCA +CTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTG +GCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCAT +GGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCAT +CATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAA +CAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATAC +AAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTC +CCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATC +AATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGA +AAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGA +ACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGT +TAATCTTATAACCAGAACTCAATCATACACTAATTCTTTCACACGTGGTG +TTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCGG +GACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACA +TGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGCCCTACCAT +TTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGA +GGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTAT +TGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTT +GTAATGATCCATTTTTGGATGTTTACCAAAAAAACAACAAAAGTTGGATG +GAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATA +TGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAATGAGGGTAATTTCA +AAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATA +TATTCTAAGCACACGCCTATTAATTTAGAGCGTGATCTCCCTCAGGGTTT +TTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTA +GGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGGT +TCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCT +TCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAG +ATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTG +AAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGT +CCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCC +CTTTTCATGAAGTTTTTAACGCCACCACATTTGCATCTGTTTATGCTTGG +AACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCATATATAA +TTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAAT +TAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGA +GGTAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGA +TTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGA +ATTCTAACAAGCTTGATTCTAAGCCTAGTGGTAATTATGATTACCTGTAT +AGATTGTTTAGGAAGTCTAAGCTCAAACCTTTTGAGAGAGATATTTCAAC +TGAAATCTATCAGGCCGGTAACAGACCTTGTAATGGTGTTGCAGGTCCTA +ATTGTTACTCTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTT +GGTCACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGC +ATCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACA +AATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACT +GAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGC +TGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACA +TTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAAT +ACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGT +CCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATT +CTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCT +GAATATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTAT +ATGCGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTG +TAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAAT +TCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTAT +TAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAG +ATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTG +TTGCAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAAT +AGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAAC +AAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCA +CAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGA +TCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAAT +ATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAA +AAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGAT +TGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGA +CCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCT +TATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCA +AAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACT +CACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAAC +CATAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATT +TGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAG +TTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGT +TTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGC +TTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAAT +CAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCT +CAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGC +ACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAG +CACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTT +GTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACAC +ATTTGCGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAG +TTTATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGAT +AAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTC +TGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCA +ATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTT +GGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTT +TATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTA +TGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGC +TGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATT +ACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTTACAAT +TGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAG +ATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTT +GGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTC +CAAAATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTG +TTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACAC +CTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGC +TTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGC +TTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCC +AACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTA +CAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAA +GTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGG +GAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGA +CTATTACCAGCTGTACTCAACTCAATTGAGTACAGACATTGGTGTTGAAC +ATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACAT +GTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAAT +GGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAG +CACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATA +GGTGCGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATT +CTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACT +GCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTAC +TCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTA +AACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGC +CATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCC +TTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGT +CTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAA +GTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGC +TTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCA +ATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTT +CAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTTAATCCAGAAACTA +ACATTCTTCTCAATGTGCCACTCCATGGCACTATTCTGACCAGACCGCTT +CTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCG +TATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAG +AAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCT +TCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAG +GATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATA +TTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTT +TCAGGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAG +TTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTA +TCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACC +AATGGAGATTCTCTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGA +TAACACTCGTTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGT +ACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAA +TTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTA +GCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAG +TTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGT +TCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTA +TAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCAT +TAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTA +ATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGA +AACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCA +TCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACT +CAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTC +TAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAAT +TGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATC +GGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACC +TAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATCAAGACTTTTTAG +AGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTT +AAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACTCCGCATTACG +TTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGNNNNNNNNNG +TGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTG +CGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTC +CCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCA +AATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTA +AAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGG +CCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGT +TGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCA +ATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACA +TTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTC +TTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAG +GCAGCAGTAAACGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGAT +GCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAA +AATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAAT +CTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAA +GCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCA +AGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAAC +ATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGA +ATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTA +CACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAG +TCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACA +GAGCCTAAAAAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACC +GCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATT +TGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCCGTGCTGACTCA +ACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAA +CGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGA +ATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACA +TAGCAATCTTTAATCAGTGTGTAACATTAGGGAGTACTTGAAAGAGCCAC +CACATTTTCACCNNNNNNNNNNNNNNNNNNNNNNNNNNTACAGTGAACAA +TGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATT +TTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +>Denmark/DCGC-129060/2021|OV917631.1|2021-07-11 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCGATCTC +TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTTTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATT +CAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATA +AAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA +GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAA +CACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACG +GAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGC +TACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTAT +ACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCT +GAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA +ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATT +CCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGC +TTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAA +CTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGC +ACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCA +AAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG +GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACC +ATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCG +CTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGT +CTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAA +TATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCAT +CTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT +TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAA +TACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGA +TCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTT +ACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGA +GACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTA +GTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTG +ATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGT +TGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGG +ACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCT +TTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATT +GGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC +TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAA +ACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGA +TTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGG +TTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGAC +ACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACA +CTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT +GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGT +TGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTG +TCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATT +GATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATG +AGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACT +CAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT +TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATT +GGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAG +GACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTA +GTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATT +GTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAA +TGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA +CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGA +CCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTC +AACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTT +GCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTT +AAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTG +ATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGT +GAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATG +ATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAA +ACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAA +TCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCT +TAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTT +TTAACTGCTGTGGTTATACCTACTAAAAAGTCTGGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTT +ACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTG +CTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAA +TGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGC +TTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACT +AAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAA +CAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTT +GTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGC +TGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTT +CACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAA +ACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTA +AGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCAC +CTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTT +GAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACC +TCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGT +CCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTG +TTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGG +TACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAA +TGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTG +CATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTA +CAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTA +GAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAA +AGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCT +TAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAAT +TTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAA +TATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTG +GTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTG +TATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCC +TATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAAC +CAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAG +TTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTA +AGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACT +GGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGA +CTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTT +TTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAAC +AATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGA +AGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAA +CTAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCT +TGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAAC +CAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTA +ATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTG +CTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTT +CTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAA +CCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAAT +TGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCG +ACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATA +TTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTAC +TCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTA +CTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTG +CAACCTACTGTACTGGTTCTATATCTTGTAGTGTTTGTCTTAGTGGTTTA +GATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTT +TGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCA +ATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTC +TTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAG +CTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAA +AGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATG +GTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGC +AAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAG +TACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAA +GACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACA +GTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAG +TTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGT +AAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAG +TCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTG +ATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTT +AATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATG +TCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGAT +GTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGA +CATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACA +AAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGT +GCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAA +TACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCA +ACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGG +TGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTG +TGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTC +ATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTA +ACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTAT +ACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGT +GGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATG +GTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATC +TGTTACACACCATCAAAACTTATAGAGTACACTGATTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGC +CATTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTAT +GAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTAT +TCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTT +TTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGT +GTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTA +ATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCA +TCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTA +CTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTG +CCTTTAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACA +CCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGA +TGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATC +ATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAA +GAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTG +CGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGT +GATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAG +CTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT +TCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTT +GCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTT +GTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTT +GATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATT +TCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATG +CAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACC +TAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAG +CTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCC +AATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGG +AATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTAT +GGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAAC +TATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAG +ACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTT +GTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTG +CTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTG +GGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACA +ATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTA +CACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTC +CAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTG +TCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCC +GCTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCG +TATTATGACATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGC +TAAAAGACTGTGTTATGTATGCATCAGCTGTGGTGTTACTAATCCTTATG +ACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTAT +GAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATC +AAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCA +GGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTG +TGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTA +TAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGC +CTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGA +TTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTAC +TCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTG +GGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAAT +GTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAAC +TCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACAC +AATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGT +TTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACA +AGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCC +TCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGA +AGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAA +AGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCA +GCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAAT +GTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTA +TGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTC +AACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAAT +ACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACA +CATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTG +TGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG +TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAA +CAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGT +CCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGC +TTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTA +GGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGA +TTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACC +TTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTAT +ACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGT +TTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGC +CAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAG +CTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTT +AAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACC +GGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGT +ACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTA +AAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGG +TTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTT +ATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGAT +GCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACA +CCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACAT +CTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTT +GTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTT +GTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTA +TAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGT +TTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACT +AAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGG +TAATTGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATG +ATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGAT +ATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTT +AAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTG +TACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGT +GATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTA +TTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAG +AGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTG +TTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTT +TAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATG +ACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTG +TTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGG +TGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTG +TACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAA +TTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCT +ATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACA +ATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT +GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATT +AAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATG +ACTACTATCGTTATAATCTACCAACAATTTGTGATATCAGACAACTACTA +TTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTG +TATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTT +TTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGT +TATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCC +TACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAG +CTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAG +TTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGT +AGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAA +CTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCT +AAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGT +TCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATA +GATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGC +AGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAAC +TGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATG +TTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTC +CGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGT +TGACACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCT +CAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTAT +GCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTA +TTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTG +ACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTT +AAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAAT +CCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACAC +TTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACT +AAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATA +CATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATT +CTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTT +TATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTG +TGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTA +GACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCA +CATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTG +TGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATT +GTAAATCACATAAACTACCCATTAGTTTTCCATTGTGTGCTAATGGACAA +GTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGA +CTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTT +TAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTC +AAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACG +TGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAAC +CTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT +AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTA +TGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTG +GTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCT +ACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAAC +ACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGG +TTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAG +AGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGT +GTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGATAAGGCAT +TAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCT +CGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTA +TGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTG +TCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAAT +GCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATT +ACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATT +TCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTC +GGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTT +GGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCT +TTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATT +AACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGC +TTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAG +CCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGC +TCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTC +TTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCA +TACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACA +AGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGT +AACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTA +CACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGT +TTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACT +CATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTA +ACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATT +GGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAA +TTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTAC +CTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGT +GCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTA +CAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAA +GTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCA +CATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGA +GCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTT +CAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTAT +AATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACA +AAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTA +GTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT +AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAA +GATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCAT +TATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCT +ATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACA +GCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATG +CCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGC +AATGTCGATAGATATCCTGTTAATTCCATTGTTTGTAGATTTGACACTAG +AGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATG +TAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTT +AATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTC +TCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTG +CTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCAT +GCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGC +TGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGA +ACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTA +AATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCAT +TAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTG +AAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAG +CGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGT +GGACATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAG +CACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAA +CCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGT +TGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTA +CAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCT +AGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAA +TTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACT +TTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAA +ATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATT +AGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTC +AGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAA +TCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAA +CTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTG +TTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGAT +TTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAAT +TTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAA +AATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTT +TACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGG +TGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATA +CTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAAT +ATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGG +TACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATT +CAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT +TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATAT +GTACGACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGG +GTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGA +GGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTA +TAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGA +ATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAA +CCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTG +GAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGA +GTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAA +GGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTAT +AATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACA +ACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAG +TCAGTGTGTTAATCTTAGAACCAGAACTCAATTACCCCCTGCATACACTA +ATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCA +GTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNCTCTGGGACCAATGGTACNNNNNNNNNNN +NNNACNNNNNNNNNNNATTTAATGATGNNNNTTATTTTGCTTCCATTGAG +AAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAA +GACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTATTACCAC +AAAAACAACAAAAGTTGGATGGAAAGTGGAGTTTATTCTAGTGCGAATAA +TTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAA +AACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGAT +GGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGA +TCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAG +GTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTAT +TTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTA +TTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAA +ATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAA +ACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAAC +TTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATA +TTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCA +TCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTA +TTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAG +TGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGAT +TCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAAC +TGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCT +GCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAAT +TATAATTACCGGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGA +GAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCAAACCTTGTAATG +GTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAA +CCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTT +TGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTA +ATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGC +ACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATT +TGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACAC +TTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATA +ACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGT +TAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTA +CTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGC +TGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACC +CATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCGTC +GGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCA +CTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACC +CACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGA +CCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAA +TGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCG +TGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTT +TTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGT +GGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAG +GTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTG +GCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGAC +CTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCT +CACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAA +TCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTT +GCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGT +TCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTG +GCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTT +CAAAATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACA +ACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTT +CACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACA +GGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGC +TGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGT +GTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCAT +CTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGT +GACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTT +GTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAAT +GGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCAT +TACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAA +TTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTC +AAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGA +TTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAG +AAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATC +GATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTA +CATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAA +TTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGT +TCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCT +CAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTAT +GAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGG +ATGCTACTCCTTTAGATTTTGTTCGCGCTACTGCAACGATACCGATACAA +GCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGT +TTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAG +CACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTA +ACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTT +TCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAA +GAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCA +TTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGA +CTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAG +GTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGT +TATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAG +TTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAG +ACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGAT +GAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGT +TGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTA +GCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTC +GTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCT +TGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTC +GATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCT +TCTTTTTACTTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCC +TGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGG +AACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAG +AGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTCCTATTC +CTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTT +TTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTT +TAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGT +GGAATTGCTACCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTA +CTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCAT +TCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATT +CTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCT +TCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCA +AGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTAT +TACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGC +ATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCA +GTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTT +TCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTA +TGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACCTC +ATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATT +AGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTC +TTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAA +GAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGG +AACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTG +CACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTA +AAACACGTCTATCAGTTACGTGCCAGATCAGCTTCACCTAAACTGTTCAT +CAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTG +CGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGATAGAA +TGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTG +CTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACT +GCAAGATCATAATGAAATTTGTCACGCCTAAACGAACATGAAATTTCTTG +TTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGT +TTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCC +TATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAG +CACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATT +CAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAAT +TAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCT +ATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAATCTAAACG +AACAAACTAAATGTCTGATAATGGACCTCAAAATCAGCGAAATGCACCCC +GCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGA +GAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAA +TAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGGCC +TTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGTCCA +GATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGGTGG +TGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAG +GAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATC +ATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGG +CACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAG +GAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGT +CAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTC +AACTCCAGGCAGCAGTATGGGAACTTCTCCTGCTAGAATGGCTGGCAATG +GCTGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTT +GAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCAC +TAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTG +CCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAA +CAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGA +TTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGT +TCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGG +TTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAA +AGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCC +CACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTTATGAAACTCAA +GCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGC +TGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTG +CTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGC +TATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTG +CAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTA +ATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAA +AGAGCCACCACATTTTCACCGAGGCCACTCGGAGTACGATCGAGTGTACA +GTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAA +AATTAATTTTAGTAGTGCTATCCNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>England/ALDP-337E0B6/2022|OV828376.1|2022-01-20 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC +GGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGT +CGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGT +TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTG +TGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC +GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACAT +CTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCC +TCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTG +CACCTCATGGTCATGTTGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAG +TACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGA +AATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAG +GAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGC +GACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACAC +TAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAG +GGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTAC +CCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATG +CACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACT +GCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAA +AAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATT +TGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCA +TAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTT +ATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAA +CCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTT +CATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACT +GAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAA +TGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGAC +CTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATT +CTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTA +TGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTA +ACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTT +AATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATAT +TGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTT +TTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTAT +AAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAA +AGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATAC +TGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCA +ATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACA +GAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGAC +TCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTT +GTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCT +AACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATT +GGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGG +GAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACA +AATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTA +AGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGT +GGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTC +AAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTAC +TCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACA +CTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTT +ACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTG +GTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACA +GAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTT +CACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTG +TGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGAT +GAAAGGATTGATAAAGTACTTAATGAGAGGTGCTCTGCCTATACAGTTGA +ACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCA +TAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGAT +TTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGA +GTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGG +ATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAA +TATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGG +TGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGT +TAGATGATGATAGTCAACAAACTGTTGGTCAACAAGATGGCAGTGAGGAC +AATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGA +GATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTG +GTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTG +GAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATGT +TTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTA +ACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCA +CTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACA +CTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAAC +TTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCA +CCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAG +AGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATA +AAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAA +AAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCC +ATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATA +AGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACT +AAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCT +TCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAA +AGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTA +ACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAATGCT +AGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACC +CGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTT +AAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGA +GAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTG +CACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAA +GCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGA +GGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAA +CTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTT +ACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGC +TCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCAC +CTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACA +CCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGA +TTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGA +GAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTA +GATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAG +AGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCC +ACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCA +ACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACA +TGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTG +AGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTAC +ATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGG +TTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCAT +TGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACAA +GATGCTTATTACAGAGCAAGGGCTGGTGAAGCGGCTAACTTTTGTGCACT +TATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAG +AAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGA +GTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAA +TGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTA +AGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATAT +CTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCA +GTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTA +ATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTAT +TGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTAT +TACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAG +TTACTTATAAATTGGATGGTATTGTTTGTACAGAAATTGACCCTAAGTTG +GACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGA +TCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGT +TTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGT +TATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTT +AAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTA +AGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAAT +GCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCT +TTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGT +CAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCA +GTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGA +GTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAG +CAAATAATATAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCT +GCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATC +TAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTA +ATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAAC +AAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGTGT +TTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTGTA +CTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACT +ATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTC +ATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAA +TTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACC +GCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTAC +TGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAACCT +ACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCT +TTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTT +TAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCAT +ATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATG +CAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGGCT +TATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGG +TTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTTAT +GTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAA +ACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTA +GAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTA +CACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACATT +TATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACCAA +TAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAG +AATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGA +AAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATA +ACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCA +AAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCT +TATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGTTG +GTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATACG +TTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGC +AACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTAT +CTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAA +ACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGA +AGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTG +AAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGCGT +CATATTAATGCGCAGGTAGCAAAAAGTCACAACATTACTTTGATATGGAA +CGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTA +GTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACT +AGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAA +AATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCC +TTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCT +AAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGG +TGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAAC +ATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAAT +GACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTT +TGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGACT +TTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTAC +ACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGT +TTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTAC +CATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGT +TTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATT +TCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATT +CTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTTGT +GTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTT +ACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGT +TTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATA +GTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTACTATTT +TATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTA +ATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCAGTT +TACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATT +TTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTA +TGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGT +ATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACG +TGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCTGT +GCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTG +CTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTA +CAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTT +GTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGAT +GTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTTGCAGAG +TGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTATGG +TACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGATGAC +GTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAA +CCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTTTTGG +TACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAAT +TGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTA +TAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTT +ACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCACAATTTC +ACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAA +CATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTAC +CAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCT +TTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTATTAC +AGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGT +GGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCT +ATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGG +ACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCAT +TAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGT +GCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATGCTC +AGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACACACC +ACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGT +ACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTT +TGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAAC +ATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTA +GCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTAT +GACATGGTTGGATATGGTTGATACTAGTTTTAAGCTAAAAGACTGTGTTA +TGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAACTGTG +TATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACT +CGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGT +GGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACT +GTCATGTTTTTGGCCAGAGGTGTTGTTTTTATGTGTGTTGAGTATTGCCC +TATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATT +GTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTC +AACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTAC +ACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAAGAATA +GCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAA +CCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAAGTG +CACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAGAATCAT +CATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTA +GCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGT +TTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAA +TGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCC +CTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGC +TGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTT +TGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACGTAAG +TTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAG +ATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTT +TCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATCAAC +AATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAACAGC +AGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGT +GTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCAACAG +GTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGA +CAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCA +ATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGA +CAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAA +TGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACTTGCAC +TGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGAT +GGAACTGGTACTATCTATACAGAACTGGAACCACCTTGTAGGTTTGTTAC +AGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGAT +TAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTA +CGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATT +ATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGATTATC +TAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTGTACA +CACACTGGTACTGGTCAGGCAATAACAGTCACACCGGAAGCCAATATGGA +TCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACA +TAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTA +CAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAA +CACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTG +ATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTTTTTA +AACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGC +ACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGT +AGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAA +AGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGACAC +ACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGA +TTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTG +ACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCA +GACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACACATT +AAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATA +AAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTATAC +GCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGTACAATT +CTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATA +ATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACC +ACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAAT +GCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACA +CTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTC +ACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTGGGATCA +GACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATTCTGC +ATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTACAAGT +TTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGT +TTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATG +TAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTATGCT +GCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACG +CACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTTCAAA +CTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCT +AAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTT +TGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATA +ATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAAGTT +GTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCTAACCA +AGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAAT +GGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGAT +GCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCAAAT +GAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTG +GTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTA +TTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGGAACAAG +CAAATTCTATGGTGGTTGGCACAATATGTTAAAAACTGTTTATAGTGATG +TAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCC +ATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACA +TACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGT +GTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTT +AAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAG +TGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTTTTAT +CTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACAC +AGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGT +GAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATACTCT +CTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGTCTA +GTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGT +TTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGAC +CTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGAT +TATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTG +TTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGT +TCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAATCAG +GAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAAGCTACA +TGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTA +ATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTAC +ACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTC +ACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTT +GTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTG +TCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCACAGATGT +GACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACATAAAC +CACCCATTAGTTTTCCATTGTGTGCCAATGGACAAGTTTTTGGTTTATAT +AAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGC +AACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCTGTA +CTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAG +ACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGA +CAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCACTTA +ACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTA +CAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGT +TTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGC +TGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGCCACAA +GAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATCTCAGA +TGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGT +ATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATT +GGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTC +TCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTA +TAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTT +GATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTGTACTGT +AAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGAAATTT +CAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGTGCT +AAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCAC +ATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTA +GACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGT +TGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAA +GCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTTTATA +AGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATA +GGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGT +CTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGG +GACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGACTAT +GTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAG +ATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGT +CTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCA +CGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAA +AGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACAC +ACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGACGTA +CCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTATGATGGG +TTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCC +GCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAG +GGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCT +AGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTG +ATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCT +GGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACTTCCTTG +GAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTTAAAA +ATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTG +ACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCT +ATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCT +GTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATT +GATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCT +GTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCAATCA +TGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGG +ACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTG +TAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAAT +TCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTACCT +CAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGTGACAA +AGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACA +AATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATAT +CCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCT +TAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCAT +TCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTA +CCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAAACAAGT +AGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATAACAC +GTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGA +TTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTGTG +GGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAAGAC +TTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTT +GATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTA +CACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACAACAT +TACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCA +GTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAA +TACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTACTA +TTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGAAACGATT +TGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGA +CTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTGTTA +AAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTC +ACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGT +TGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAA +ATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTAGAA +TTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTATGCCTT +CGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTAC +ATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTA +GAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGA +TGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTAC +TTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCT +AAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTATGCTTTG +GTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCTAGTC +AAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGA +ATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATT +ACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTCAAT +ATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATACAT +TTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAG +ACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAATGACT +TTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTACAT +ACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGAC +TAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACA +TTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATA +AAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGGGACA +CTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTG +AAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATA +GATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCC +AATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTA +AATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGAT +ATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGAAAACAA +CAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAACAATG +TTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCT +TACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTG +GTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACT +CAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGTTAT +CTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTA +ATGATGGTGTTTATTTTGCTTCCATTGAGAAGTCTAACATAATAAGAGGC +TGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGT +TAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTA +ATGATCCATTTTTGGACCACAAAAACAACAAAAGTTGGATGGAAAGTGAG +TTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCA +GCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTA +GGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAG +CACACGCCTATTATAGTGCGTGAGCCAGAAGATCTCCCTCAGGGTTTTTC +GGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGT +TTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCT +TCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCA +ACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATG +CTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAA +TCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCA +ACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTT +TTGATGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAAC +AGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATCT +CGCACCATTTTTCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAA +ATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGT +GATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTA +TAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATT +CTAACAAGCTTGATTCTAAGGTTAGTGGTAATTATAATTACCTGTATAGA +TTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGA +AATCTATCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTTTTAATT +GTTACTTTCCTTTACGATCATATAGTTTCCGACCCACTTATGGTGTTGGT +CACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACC +AGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAAT +GTGTCAATTTCAACTTCAATGGTTTAAAAGGCACAGGTGTTCTTACTGAG +TCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGA +CACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTA +CACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACT +TCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCC +TGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTA +CAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAA +TATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATG +CGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTGTAG +CTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGTAGAAAATTCA +GTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAG +TGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATT +GTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTG +CAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAATAGC +TGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAA +TTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCACAA +ATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCT +ACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATG +GTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAG +TTTAAAGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGC +TCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCT +TTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTAT +AGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAA +ATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCAC +TTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAT +AATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATTTGG +TGCAATTTCAAGTGTTTTAAATGATATCTTTTCACGTCTTGACAAAGTTG +AGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTG +CAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTC +TGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAA +AAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAG +TCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACA +AGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCAC +ACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTA +ACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATT +TGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTT +ATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGATAAA +TATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGG +CATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATG +AGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGA +AAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTAT +AGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGA +CCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGC +AAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACA +TTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGG +AACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATT +TTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGA +TGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAA +AATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTGTTC +ACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTT +TTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTT +AGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTT +GGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAAC +TATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAA +TAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAAGTC +CTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAA +TCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTA +TTACCAGCTGTACTCAACTCAATTGAGTACAGACACTGGTGTTGAACATG +TTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTC +CAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGA +ACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCAC +AAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATAGGT +ACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTT +GCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCT +GCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCT +CGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAAC +GAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCAT +GGCAGGTTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTG +AAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTT +CTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTT +AATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTG +CTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATG +GCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTTCAG +ACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACA +TTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTA +GAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTAT +TGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAA +TCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCG +CAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAGGAT +TGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTG +CTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCA +GGTTACTATAGCAGAGATATTACTAATTATTATGCGGACTTTTAAAGTTT +CCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCT +AAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAAT +GGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAA +CACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACA +ACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTC +ACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCA +CTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTA +CGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCA +AGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTATAA +CACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAA +TTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTAATT +ATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAAC +TTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCA +CAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAA +CATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAA +ATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGT +GCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGT +AATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAA +ATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGT +ATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAA +TGTCTGATAATGGACCCCAAAATCAGCGAAATGCACTCCGCATTACGTTT +GGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGCGATC +AAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCA +CCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAA +GGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTA +CCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATC +TCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGA +CTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGG +AGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACA +ATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGC +TTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTC +ATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAAC +GAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTTGCT +TTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGGTAA +AGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGG +CTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTA +ACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGG +GGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAA +TTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATT +GGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGCCAT +CAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGA +ATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAG +GACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAA +GAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCT +CCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAA +ACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTT +TCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACT +ACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTA +ATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACC +GAGGCCACGCGGAGTACGATCGAGTGTACAGTGAACAATGCTAGGGAGAG +CTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTANNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNN diff --git a/test/sars_20.json b/test/sars_20.json new file mode 100644 index 0000000..090ab3b --- /dev/null +++ b/test/sars_20.json @@ -0,0 +1 @@ +{"paths":[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","offset":null,"circular":false,"position":[1,19179,19453,25619,25914,29599,29657],"blocks":[{"id":"XUJZJMSKTM","name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},{"id":"GSXRICVFNW","name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true}]},{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","offset":null,"circular":false,"position":[1,19244,19518,25699,25994,29683,29799],"blocks":[{"id":"XUJZJMSKTM","name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},{"id":"GSXRICVFNW","name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true}]},{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","offset":null,"circular":false,"position":[1,19290,19564,25739,26034,29721,29890],"blocks":[{"id":"XUJZJMSKTM","name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},{"id":"GSXRICVFNW","name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true}]},{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","offset":null,"circular":false,"position":[1,19231,19505,25674,25969,29721],"blocks":[{"id":"XUJZJMSKTM","name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true}]},{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","offset":null,"circular":false,"position":[1,19234,19508,25677,25972,29759],"blocks":[{"id":"XUJZJMSKTM","name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true}]},{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","offset":null,"circular":false,"position":[1,101,19279,19553,25719,26014,29699,29840],"blocks":[{"id":"IYZCSMYUUN","name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},{"id":"XUJZJMSKTM","name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},{"id":"GSXRICVFNW","name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true}]},{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","offset":null,"circular":false,"position":[1,19259,19533,25702,25997,29784],"blocks":[{"id":"XUJZJMSKTM","name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true}]},{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","offset":null,"circular":false,"position":[1,19281,19555,25724,26019,29847],"blocks":[{"id":"XUJZJMSKTM","name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true}]},{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","offset":null,"circular":false,"position":[1,19290,19571,25739,26034,29721,29890],"blocks":[{"id":"XUJZJMSKTM","name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},{"id":"GSXRICVFNW","name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true}]},{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","offset":null,"circular":false,"position":[1,19235,19509,25675,25970,29655,29798],"blocks":[{"id":"XUJZJMSKTM","name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},{"id":"GSXRICVFNW","name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true}]},{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","offset":null,"circular":false,"position":[1,19290,19564,25745,26040,29734,29903],"blocks":[{"id":"XUJZJMSKTM","name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},{"id":"GSXRICVFNW","name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true}]},{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","offset":null,"circular":false,"position":[1,19252,19526,25701,25996,29683,29800],"blocks":[{"id":"XUJZJMSKTM","name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},{"id":"VCWRFXPALK","name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},{"id":"GSXRICVFNW","name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true}]},{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","offset":null,"circular":false,"position":[1,19236,19510,25691,25986,29732],"blocks":[{"id":"XUJZJMSKTM","name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true}]},{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","offset":null,"circular":false,"position":[1,19275,19549,25721,26016,29736,29870],"blocks":[{"id":"XUJZJMSKTM","name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},{"id":"LVHHZKTHFV","name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true}]},{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","offset":null,"circular":false,"position":[1,19281,19555,25724,26019,29748,29882],"blocks":[{"id":"XUJZJMSKTM","name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},{"id":"LVHHZKTHFV","name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true}]},{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","offset":null,"circular":false,"position":[1,19278,19552,25724,26019,29739,29873],"blocks":[{"id":"XUJZJMSKTM","name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},{"id":"LVHHZKTHFV","name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true}]},{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","offset":null,"circular":false,"position":[1,19281,19555,25721,26016,29701,29844],"blocks":[{"id":"XUJZJMSKTM","name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},{"id":"GSXRICVFNW","name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true}]},{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","offset":null,"circular":false,"position":[1,19203,19477,25646,25941,29717],"blocks":[{"id":"XUJZJMSKTM","name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},{"id":"YYKDESFHEY","name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true}]},{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","offset":null,"circular":false,"position":[1,19281,19555,25733,26028,29715,29884],"blocks":[{"id":"XUJZJMSKTM","name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},{"id":"GSXRICVFNW","name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true}]},{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","offset":null,"circular":false,"position":[1,19236,19510,25694,25989,29776],"blocks":[{"id":"XUJZJMSKTM","name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},{"id":"NXVBNGKFGP","name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},{"id":"ZOTARKVMSA","name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},{"id":"KQYCZWLRQK","name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},{"id":"CIFRKDCUUV","name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true}]}],"blocks":[{"id":"CIFRKDCUUV","sequence":"AACTCAATTGAGTACAGACATTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAAGAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAACTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTTAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACTCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAAACGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCCGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACC","gaps":{"3685":170,"2322":9,"2232":1},"mutate":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[[2069,"T"],[843,"T"],[1343,"C"],[1344,"T"],[1876,"T"],[236,"G"],[1345,"C"],[1220,"C"],[100,"T"],[819,"T"]]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[[868,"T"],[2609,"T"],[17,"A"],[2258,"C"],[2251,"T"],[855,"T"],[1343,"C"],[1344,"T"],[1876,"T"],[236,"G"],[1345,"C"],[1702,"T"],[1220,"C"],[819,"T"]]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[[21,"C"],[3462,"A"],[1220,"C"],[491,"G"]]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[1392,"T"],[1343,"C"],[3658,"T"],[1344,"T"],[846,"T"],[236,"G"],[1345,"C"],[2170,"C"],[1220,"C"],[819,"T"]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[[21,"C"],[3462,"A"],[1220,"C"],[491,"G"]]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[[3338,"T"],[2258,"C"],[1343,"C"],[1026,"A"],[1344,"T"],[1350,"T"],[236,"G"],[1345,"C"],[1876,"T"],[2399,"G"],[1220,"C"],[819,"T"]]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[[1343,"C"],[1960,"T"],[1344,"T"],[1876,"T"],[236,"G"],[1345,"C"],[1966,"T"],[1220,"C"],[819,"T"]]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[[883,"T"],[1343,"C"],[1344,"T"],[1876,"T"],[236,"G"],[1345,"C"],[1220,"C"],[819,"T"]]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[[799,"W"],[2835,"G"],[797,"B"],[2272,"C"],[2834,"G"],[1843,"R"],[803,"B"],[807,"Y"],[808,"W"],[811,"K"],[798,"D"],[1845,"Y"],[762,"G"],[804,"K"],[670,"G"],[814,"B"],[1841,"W"],[2232,"A"],[815,"Y"],[21,"C"],[805,"K"],[816,"W"],[1844,"Y"],[2833,"G"],[806,"Y"],[812,"K"],[3597,"T"],[1838,"D"],[809,"Y"],[2884,"T"],[1842,"Y"],[802,"Y"],[800,"Y"],[801,"K"],[810,"K"],[3462,"A"],[1768,"C"],[1834,"W"],[231,"C"],[813,"K"],[1840,"Y"],[1835,"Y"],[538,"C"]]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[[2204,"N"],[2180,"N"],[2199,"N"],[2108,"N"],[2272,"C"],[2834,"G"],[2183,"N"],[2120,"N"],[2158,"N"],[2214,"N"],[2156,"N"],[2182,"N"],[2142,"N"],[2208,"N"],[2165,"N"],[2207,"N"],[2191,"N"],[2187,"N"],[2164,"N"],[2193,"N"],[2115,"N"],[2202,"N"],[2198,"N"],[2123,"N"],[2174,"N"],[2232,"A"],[2179,"N"],[2211,"N"],[2173,"N"],[2833,"G"],[2132,"N"],[2126,"N"],[2138,"N"],[2177,"N"],[2184,"N"],[2161,"N"],[2119,"N"],[2145,"N"],[2206,"N"],[2185,"N"],[2205,"N"],[2125,"N"],[2162,"N"],[1886,"A"],[2128,"N"],[2133,"N"],[2131,"N"],[2154,"N"],[2169,"N"],[2196,"N"],[2188,"N"],[2189,"N"],[2139,"N"],[2141,"N"],[2609,"T"],[1768,"C"],[2201,"N"],[2140,"N"],[2147,"N"],[2107,"N"],[2129,"N"],[2149,"N"],[2134,"N"],[2136,"N"],[2124,"N"],[2144,"N"],[2835,"G"],[2152,"N"],[2167,"N"],[2127,"N"],[2110,"N"],[2200,"N"],[2151,"N"],[2839,"T"],[2143,"N"],[2168,"N"],[2175,"N"],[2155,"N"],[2150,"N"],[2112,"N"],[2148,"N"],[670,"G"],[453,"T"],[2159,"N"],[2166,"N"],[2170,"N"],[2172,"N"],[2116,"N"],[128,"T"],[2192,"N"],[2195,"N"],[2215,"N"],[21,"C"],[2117,"N"],[2146,"N"],[2121,"N"],[2157,"N"],[2130,"N"],[2213,"N"],[2186,"N"],[2194,"N"],[2118,"N"],[2122,"N"],[2163,"N"],[2176,"N"],[2153,"N"],[2137,"N"],[1966,"T"],[2135,"N"],[2160,"N"],[2114,"N"],[2203,"N"],[2109,"N"],[2111,"N"],[2233,"T"],[3462,"A"],[2181,"N"],[2171,"N"],[2190,"N"],[231,"C"],[2209,"N"],[2113,"N"],[2210,"N"],[2178,"N"],[2197,"N"],[538,"C"],[2212,"N"]]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[[1850,"T"],[274,"T"],[2273,"T"],[490,"A"],[2633,"T"]]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[2835,"G"],[2862,"T"],[2272,"C"],[670,"G"],[2806,"T"],[2834,"G"],[3462,"A"],[1730,"T"],[1768,"C"],[231,"C"],[21,"C"],[1350,"T"],[2009,"A"],[183,"T"],[538,"C"],[2291,"G"],[2833,"G"]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[377,"T"],[1599,"C"],[2252,"T"],[2413,"G"],[2835,"G"],[2272,"C"],[670,"G"],[1713,"T"],[2834,"G"],[3462,"A"],[1768,"C"],[231,"C"],[21,"C"],[728,"C"],[1835,"T"],[2868,"T"],[3354,"T"],[538,"C"],[2833,"T"]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[[973,"T"],[1850,"T"],[1474,"T"],[490,"A"],[2291,"G"]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[[1850,"T"],[2271,"T"],[490,"A"],[2291,"G"],[999,"G"]]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[2835,"G"],[2272,"C"],[670,"G"],[696,"T"],[2834,"G"],[3351,"A"],[3462,"A"],[1768,"C"],[231,"C"],[21,"C"],[837,"C"],[1395,"T"],[538,"C"],[2833,"G"],[2927,"C"]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[975,"T"],[1599,"C"],[2835,"G"],[2413,"G"],[2272,"C"],[670,"G"],[1713,"T"],[2834,"G"],[3462,"A"],[1768,"C"],[70,"A"],[231,"C"],[21,"C"],[728,"C"],[1835,"T"],[2868,"T"],[3354,"T"],[538,"C"],[642,"T"],[2833,"T"]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[[1850,"T"],[2273,"T"],[2629,"T"],[490,"A"],[2633,"T"]]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[2338,"G"],[1599,"C"],[2835,"G"],[2413,"G"],[2272,"C"],[670,"G"],[1713,"T"],[2834,"G"],[3462,"A"],[3206,"T"],[1768,"C"],[231,"C"],[21,"C"],[728,"C"],[2273,"T"],[1835,"T"],[2868,"T"],[3354,"T"],[538,"C"],[2833,"T"]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[2835,"G"],[1599,"C"],[2413,"G"],[2272,"C"],[670,"G"],[1713,"T"],[2834,"G"],[1527,"T"],[3462,"A"],[1768,"C"],[1664,"T"],[231,"C"],[21,"C"],[728,"C"],[1835,"T"],[2868,"T"],[3354,"T"],[538,"C"],[2833,"T"],[1590,"A"]]]],"insert":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[[[3685,26],"TACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTA"]]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[[[3685,26],"TACAGTGAATAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAAT"]]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[[[3685,26],"GAGGCCACGCGGAGTACGATCGAGTGTACAGTGAA"]]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[[3685,26],"NNNNNNNNNNNNNNNNNNNNNNNNNNTACAGTGAA"],[[2322,0],"NNNNNNNNN"]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[[[3685,26],"GAGGCCACGCGGAGTACGATCCAGTGTACAGTGAA"]]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[[[3685,26],"TACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGG"]]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[[[3685,0],"GAGGCCACGCGGAGTACGATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCC"]]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[[[3685,26],"TACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"]]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[[[3685,26],"GAGGCCACGCGGAGTACGATCGAGTGTACAGTGAARVDHDDVBVNVVDKDVNR"],[[2322,0],"GAGAACGCA"]]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[[[3685,26],"GAGGCCACGCGGAGCACGATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCC"],[[2322,0],"GAGAACGCA"]]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[[2322,0],"GAGAACGCA"]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[[2322,0],"GAGAACGCA"]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[[2322,0],"GAGAACGCA"],[[2232,0],"A"]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[[2322,0],"GAGAACGCA"]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[[2322,0],"GAGAACGCA"]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[[2322,0],"GAGAACGCA"]]]],"delete":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[[2051,5],[119,4]]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[2231,1],[119,4]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[2231,1],[2209,6]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[2231,1]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[2231,1],[2209,6]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[2231,1],[2209,6]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[2231,1],[2209,6]]]],"positions":[[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[26019,29738]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[26016,29735]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[25914,29598]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[25996,29682]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[25941,0]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[26016,29700]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[26034,29720]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[26019,29747]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[25972,0]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[26014,29698]],[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[25969,0]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[25994,29682]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[26019,0]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[25997,0]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[26028,29714]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[25986,0]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[26034,29720]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[26040,29733]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[25989,0]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[25970,29654]]]},{"id":"NXVBNGKFGP","sequence":"CAGTTTGTATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTG","gaps":{"274":41},"mutate":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[[37,"G"]]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[[37,"G"]]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[[37,"G"]]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[[37,"G"]]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[37,"G"]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[[37,"G"]]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[274,"A"]]]],"insert":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[[274,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[]]],"delete":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[241,34]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[]]],"positions":[[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[19244,19517]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[19281,19554]],[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[19231,19504]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[19290,19563]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[19281,19554]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[19179,19452]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[19281,19554]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[19234,19507]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[19279,19552]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[19290,19570]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[19290,19563]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[19259,19532]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[19278,19551]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[19275,19548]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[19236,19509]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[19203,19476]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[19236,19509]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[19235,19508]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[19281,19554]]]},{"id":"XUJZJMSKTM","sequence":"AGATCTGTTCTCTAAACGTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTGGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAATCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGACACAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATTTATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCAGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATACTTTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTATGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGACGTACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGG","gaps":{"0":54,"11229":9,"20":58,"18":4,"11224":9,"19":23},"mutate":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[[17792,"C"],[9808,"T"],[4263,"T"],[5307,"T"],[15671,"T"],[16275,"C"],[347,"G"],[15726,"A"],[15872,"C"],[10663,"C"],[18434,"T"],[17057,"C"]]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[[17792,"C"],[9808,"T"],[1915,"T"],[4263,"T"],[9894,"C"],[1066,"G"],[9162,"A"],[15671,"T"],[347,"G"],[16275,"C"],[15872,"C"],[2105,"C"],[7506,"T"],[8679,"C"],[12400,"T"],[17057,"C"],[16789,"G"]]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[[8335,"A"],[9286,"C"],[12813,"C"],[13128,"C"],[11470,"G"],[2774,"G"],[15384,"G"],[15647,"C"],[17343,"C"],[10140,"C"],[2732,"C"],[10194,"T"],[612,"T"],[15173,"T"],[3183,"T"],[5496,"T"],[9366,"A"],[11229,"T"],[5866,"A"],[16424,"C"],[5328,"G"],[9476,"C"],[4126,"G"],[10389,"G"]]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[17792,"C"],[4176,"T"],[1491,"T"],[9808,"T"],[4263,"T"],[3201,"T"],[15671,"T"],[1,"N"],[347,"G"],[7761,"G"],[14738,"T"],[15872,"C"],[16728,"A"],[12811,"G"],[6478,"A"]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[[13503,"A"],[8335,"A"],[2412,"T"],[4057,"T"],[9286,"C"],[12813,"C"],[13128,"C"],[11470,"G"],[2774,"G"],[15384,"G"],[15647,"C"],[17343,"C"],[10140,"C"],[2732,"C"],[612,"T"],[15173,"T"],[9366,"A"],[8928,"T"],[11229,"T"],[5328,"G"],[9476,"C"],[4126,"G"],[10389,"G"],[14050,"T"]]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[[5,"T"],[16,"T"],[9808,"T"],[4263,"T"],[11683,"T"],[8,"A"],[15671,"T"],[1,"C"],[19,"C"],[12663,"A"],[6,"G"],[9,"G"],[6278,"T"],[14,"G"],[6986,"A"],[3,"C"],[17792,"C"],[7,"T"],[14789,"G"],[16275,"C"],[347,"G"],[11889,"T"],[15872,"C"],[15,"T"],[2,"T"],[10,"A"],[18,"T"],[5364,"C"],[18636,"T"],[15915,"A"],[15299,"C"]]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[[5,"A"],[2086,"T"],[9808,"T"],[12,"A"],[8,"A"],[17,"A"],[15671,"T"],[1,"G"],[6,"C"],[11,"C"],[9,"A"],[14,"C"],[3,"T"],[17792,"C"],[7,"A"],[16275,"C"],[347,"G"],[4,"A"],[13,"A"],[15,"C"],[15872,"C"],[18,"C"]]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[[5,"N"],[16,"N"],[20,"N"],[9808,"T"],[4263,"T"],[12,"N"],[9474,"T"],[8,"N"],[17,"N"],[15671,"T"],[1,"N"],[19,"N"],[6,"N"],[11,"N"],[14699,"C"],[9,"N"],[14,"N"],[3,"N"],[17792,"C"],[7,"N"],[16275,"C"],[347,"G"],[4,"N"],[13,"N"],[15,"N"],[2,"N"],[10,"N"],[18,"N"],[15872,"C"],[17057,"C"],[14582,"T"]]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[[15660,"R"],[4186,"W"],[3198,"W"],[11399,"K"],[4196,"M"],[11388,"Y"],[13537,"K"],[13551,"K"],[13553,"Y"],[14000,"C"],[15654,"K"],[612,"T"],[3188,"K"],[16862,"C"],[11385,"W"],[4191,"W"],[13541,"Y"],[1126,"K"],[14553,"W"],[3186,"H"],[4201,"M"],[5299,"S"],[15652,"W"],[16641,"Y"],[5290,"Y"],[13545,"K"],[18096,"A"],[4189,"R"],[12813,"C"],[13542,"Y"],[4197,"R"],[5286,"W"],[5301,"W"],[13546,"W"],[3179,"R"],[11384,"W"],[4198,"R"],[15384,"G"],[17343,"C"],[5296,"R"],[3185,"K"],[7532,"G"],[11386,"K"],[10140,"C"],[4193,"R"],[13535,"D"],[9366,"A"],[11387,"Y"],[3192,"R"],[13540,"W"],[5289,"W"],[116,"T"],[3180,"R"],[4126,"G"],[4185,"D"],[5287,"R"],[9971,"C"],[13552,"W"],[11383,"Y"],[13554,"R"],[4187,"H"],[3189,"K"],[3195,"M"],[3199,"M"],[4200,"W"],[5280,"W"],[6228,"T"],[15655,"W"],[15649,"S"],[3181,"R"],[15656,"W"],[17666,"Y"],[17673,"M"],[17667,"Y"],[3187,"R"],[5283,"M"],[2732,"C"],[3182,"R"],[4184,"W"],[3193,"R"],[11395,"W"],[11393,"Y"],[3194,"R"],[4195,"S"],[4199,"R"],[8226,"C"],[9476,"C"],[13538,"H"],[11381,"R"],[13182,"T"],[13548,"W"],[4188,"S"],[3178,"M"],[5284,"M"],[4194,"M"],[9286,"C"],[5282,"M"],[13550,"K"],[13543,"W"],[16635,"K"],[16638,"K"],[15663,"K"],[3190,"K"],[13534,"R"],[13549,"W"],[3191,"W"],[15647,"Y"],[387,"C"],[3184,"R"],[3197,"R"],[5288,"M"],[11382,"W"],[4183,"W"],[16632,"M"],[13547,"W"],[4190,"M"],[13544,"W"],[11397,"K"],[3183,"V"],[4192,"W"],[10391,"C"],[11229,"T"],[11390,"W"],[11389,"W"],[13533,"W"],[13539,"W"],[15653,"Y"],[15657,"S"],[11394,"Y"],[5281,"M"],[5293,"M"],[10389,"G"],[13536,"H"],[18119,"T"]]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[[17424,"T"],[18096,"A"],[9286,"C"],[12813,"C"],[9062,"T"],[11384,"G"],[5979,"T"],[3370,"G"],[12990,"T"],[15384,"G"],[15647,"C"],[17343,"C"],[17640,"T"],[5134,"T"],[6646,"G"],[18810,"T"],[10140,"C"],[2732,"C"],[4820,"T"],[612,"T"],[9366,"A"],[10391,"C"],[11229,"T"],[18968,"C"],[9476,"C"],[10389,"G"],[4126,"G"]]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[[1873,"A"],[7170,"N"],[4263,"T"],[7172,"N"],[2896,"C"],[7176,"N"],[7179,"N"],[11683,"T"],[14190,"C"],[7171,"N"],[7470,"T"],[7181,"N"],[487,"T"],[15384,"G"],[7178,"N"],[12093,"A"],[7177,"N"],[7175,"N"],[16868,"A"],[7174,"N"],[7182,"N"],[7173,"N"],[7180,"N"]]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[9635,"T"],[7450,"N"],[5389,"N"],[2709,"N"],[8743,"N"],[8732,"N"],[5375,"N"],[2693,"N"],[8734,"N"],[2688,"N"],[8725,"N"],[2692,"N"],[7447,"N"],[9696,"C"],[10688,"N"],[6588,"N"],[4001,"N"],[2719,"N"],[6585,"N"],[5376,"N"],[4013,"N"],[612,"T"],[2694,"N"],[3999,"N"],[10694,"N"],[2700,"N"],[4348,"N"],[7453,"N"],[6589,"N"],[8727,"N"],[8728,"N"],[5386,"N"],[207,"N"],[3826,"T"],[7451,"N"],[2728,"N"],[10693,"N"],[4360,"N"],[10673,"N"],[4005,"N"],[1689,"N"],[8740,"N"],[5383,"N"],[2722,"N"],[10686,"N"],[12813,"C"],[4011,"N"],[18096,"A"],[2704,"N"],[10680,"N"],[7446,"N"],[7455,"N"],[8724,"N"],[17343,"C"],[1673,"N"],[2702,"N"],[4357,"N"],[2703,"N"],[6586,"N"],[6575,"T"],[2698,"N"],[10140,"C"],[5388,"N"],[7459,"N"],[4004,"N"],[10685,"N"],[7456,"N"],[208,"N"],[10678,"N"],[8721,"N"],[10696,"N"],[3997,"N"],[4346,"N"],[10666,"N"],[1676,"N"],[9971,"C"],[10683,"N"],[7682,"N"],[4353,"N"],[3995,"N"],[5371,"N"],[5380,"N"],[2729,"N"],[8741,"N"],[1684,"N"],[8722,"N"],[2689,"N"],[5378,"N"],[2687,"N"],[8731,"N"],[1197,"G"],[2732,"C"],[10682,"N"],[5132,"N"],[10684,"N"],[5377,"N"],[5381,"N"],[2726,"N"],[10689,"N"],[3630,"T"],[7448,"N"],[5384,"N"],[1688,"N"],[8723,"N"],[2695,"N"],[10697,"N"],[10698,"N"],[209,"N"],[5387,"N"],[1680,"N"],[2714,"N"],[1690,"N"],[7452,"N"],[8720,"N"],[6341,"N"],[4003,"N"],[2711,"N"],[4008,"N"],[7463,"N"],[9556,"G"],[4010,"N"],[2706,"N"],[4015,"N"],[8736,"N"],[206,"N"],[4355,"N"],[4347,"N"],[6584,"N"],[7084,"G"],[8733,"N"],[2712,"N"],[15647,"C"],[10691,"N"],[4354,"N"],[2727,"N"],[4002,"N"],[5373,"N"],[4356,"N"],[1674,"N"],[10667,"N"],[2691,"N"],[1681,"N"],[3996,"N"],[10676,"N"],[8738,"N"],[2696,"N"],[2708,"N"],[8742,"N"],[5372,"N"],[7461,"N"],[1682,"N"],[10692,"N"],[10695,"N"],[2723,"N"],[4350,"N"],[5374,"N"],[8719,"N"],[10681,"N"],[5385,"N"],[4012,"N"],[5970,"G"],[8739,"N"],[4009,"N"],[2701,"N"],[457,"N"],[460,"N"],[2715,"N"],[4739,"N"],[4000,"N"],[8744,"N"],[205,"N"],[1672,"N"],[2725,"N"],[10669,"N"],[10671,"N"],[2707,"N"],[10674,"N"],[2713,"N"],[1685,"N"],[2710,"N"],[5379,"N"],[4358,"N"],[2716,"N"],[1671,"N"],[7457,"N"],[2724,"N"],[204,"N"],[2717,"N"],[1678,"N"],[7458,"N"],[203,"N"],[2699,"N"],[10699,"N"],[4006,"N"],[2697,"N"],[10672,"N"],[9366,"A"],[5131,"N"],[10679,"N"],[2730,"N"],[4126,"G"],[458,"N"],[16399,"T"],[10702,"N"],[8737,"N"],[14959,"T"],[5370,"N"],[2718,"N"],[7462,"N"],[8730,"N"],[1679,"N"],[10675,"N"],[4359,"N"],[4352,"N"],[4007,"N"],[5369,"N"],[5382,"N"],[4740,"N"],[2705,"N"],[2721,"N"],[1675,"N"],[6587,"N"],[10700,"N"],[2720,"N"],[9476,"C"],[1683,"N"],[2690,"N"],[4014,"N"],[1686,"N"],[7454,"N"],[7460,"N"],[7464,"N"],[8729,"N"],[4349,"N"],[9286,"C"],[10670,"N"],[8726,"N"],[10677,"N"],[3998,"N"],[459,"N"],[1687,"N"],[10690,"N"],[10687,"N"],[8735,"N"],[10391,"C"],[10701,"N"],[7449,"N"],[1677,"N"],[4351,"N"],[7681,"N"],[10389,"G"],[10668,"N"]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[16399,"T"],[19153,"T"],[18096,"A"],[9286,"C"],[12813,"C"],[7066,"T"],[11143,"G"],[17126,"T"],[4123,"T"],[15647,"C"],[17343,"C"],[10140,"C"],[2732,"C"],[612,"T"],[14762,"T"],[152,"T"],[8928,"T"],[6344,"T"],[9366,"A"],[10391,"C"],[8995,"T"],[9476,"C"],[10389,"G"],[4126,"G"],[7793,"T"],[11265,"G"]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[[1569,"T"],[12243,"A"],[4263,"T"],[14950,"T"],[1391,"T"],[6284,"C"],[18944,"T"],[16549,"A"],[17174,"A"],[15384,"G"],[12093,"A"]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[[9909,"T"],[1800,"C"],[1569,"T"],[4263,"T"],[6484,"M"],[17169,"G"],[12375,"A"],[17272,"T"],[6921,"G"],[14153,"T"],[15384,"G"],[12093,"A"]]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[9971,"C"],[15699,"T"],[18096,"A"],[9286,"C"],[12813,"C"],[2122,"A"],[7446,"T"],[5571,"T"],[15384,"G"],[15647,"C"],[17343,"C"],[16952,"T"],[16822,"G"],[18810,"T"],[10140,"C"],[2732,"C"],[612,"T"],[13926,"T"],[9366,"A"],[10391,"C"],[9468,"T"],[11430,"T"],[9476,"C"],[10389,"G"],[4126,"G"],[4485,"T"]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[16399,"T"],[13390,"C"],[19153,"T"],[15454,"W"],[18096,"A"],[9286,"C"],[12813,"C"],[7066,"T"],[7677,"T"],[9565,"A"],[18188,"T"],[11143,"G"],[1555,"T"],[4123,"T"],[15647,"C"],[17343,"C"],[6019,"T"],[9383,"T"],[10140,"C"],[2732,"C"],[612,"T"],[152,"T"],[8928,"T"],[6344,"T"],[9366,"A"],[10391,"C"],[11350,"T"],[11081,"T"],[14390,"G"],[8995,"T"],[8777,"Y"],[9476,"C"],[4126,"G"],[10389,"G"],[11265,"G"]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[[2478,"Y"],[1873,"A"],[4263,"T"],[11683,"T"],[2896,"C"],[14190,"C"],[16868,"A"],[18378,"T"],[15384,"G"],[16972,"G"],[12093,"A"]]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[19153,"T"],[4694,"T"],[18096,"A"],[9286,"C"],[12813,"C"],[7066,"T"],[11143,"G"],[4736,"T"],[4123,"T"],[17343,"C"],[10140,"C"],[2732,"C"],[612,"T"],[152,"T"],[8928,"T"],[6344,"T"],[1250,"G"],[9366,"A"],[10391,"C"],[1346,"T"],[8995,"T"],[9476,"C"],[10389,"G"],[4126,"G"],[11265,"G"],[16399,"T"]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[19153,"T"],[18096,"A"],[9286,"C"],[12813,"C"],[12651,"T"],[5422,"A"],[7066,"T"],[11143,"G"],[11298,"T"],[2025,"C"],[2277,"G"],[4123,"T"],[15647,"C"],[17343,"C"],[7887,"T"],[2886,"A"],[1005,"T"],[2732,"C"],[1915,"T"],[612,"T"],[10140,"C"],[15815,"K"],[152,"T"],[8928,"T"],[6344,"T"],[2278,"A"],[9366,"A"],[10391,"C"],[8995,"T"],[9476,"C"],[10389,"G"],[4126,"G"],[11265,"G"],[16399,"T"]]]],"insert":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[[[18,0],"AACT"],[[0,46],"TTGT"]]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[[[0,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTGT"],[[18,0],"AACT"]]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[[0,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"],[[18,0],"AACT"]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[[[0,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTGT"],[[18,0],"AACT"]]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[[[20,0],"AAACGAACTTT"]]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[[[20,0],"TCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTT"]]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[[[20,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTGTAGATCTGTTCTCTAAACGAACTTT"]]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[[[18,0],"AACT"],[[11224,0],"GTTTGTCTG"]]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[[[18,0],"AACT"],[[11224,0],"GTTTGTCTG"]]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[[0,0],"AACTTTCGATCTCTTG"],[[20,0],"AAAATCTGTGTGGCTGTCACTCGG"],[[19,0],"AGATCTGTTCTCTAAACGAACTT"]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[[0,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTCGATCTCTTGT"],[[11229,0],"TCTGGTTTT"],[[18,0],"AACT"]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[[[0,0],"TCTCTTGTAGATCTGT"],[[20,0],"GTGGCTGTCACTCGG"],[[19,0],"CTCTAAACGAACTTTAAAATCTG"]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[[[0,0],"ATTAAAGGTTTATACC"],[[20,0],"TTTGATCTCTTGTAGATCTGTTCTCTAAACGAACTTT"],[[19,0],"TCCCAGGTAACAAACCAACCAAC"]]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[[0,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"],[[11229,0],"TCTGGTTTT"],[[18,0],"AACT"]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[[0,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTGT"],[[11229,0],"TCTGGTTTT"],[[18,0],"AACT"]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[[0,0],"ACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACT"],[[11229,0],"TCTGGTTTT"]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[[0,0],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"],[[11229,0],"TCTGGTTTT"],[[18,0],"AACT"]]]],"delete":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[[1,20]]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[[6455,3],[460,3]]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[[6455,3]]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[[1,44]]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[21,24],[1,18]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[[21,24],[1,18]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[[1,18]]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[628,9]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[[1,44]]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[1,18]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[]]],"positions":[[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[1,19258]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[1,19235]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[1,19235]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[1,19274]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[1,19233]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[1,19280]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[1,19280]],[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[1,19230]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[1,19251]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[1,19202]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[1,19243]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[1,19277]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[1,19280]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[1,19234]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[1,19289]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[1,19280]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[101,19278]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[1,19178]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[1,19289]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[1,19289]]]},{"id":"YYKDESFHEY","sequence":"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN","gaps":{},"mutate":[[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[]]],"insert":[[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[]]],"delete":[[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[]]],"positions":[[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[25646,25940]]]},{"id":"KQYCZWLRQK","sequence":"TAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTC","gaps":{},"mutate":[[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[[32,"R"],[33,"Y"],[25,"R"],[27,"Y"],[38,"M"],[31,"R"],[26,"R"]]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[]]],"insert":[[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[]]],"delete":[[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[]]],"positions":[[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[25677,25971]],[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[25674,25968]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[25724,26018]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[25745,26039]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[25719,26013]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[25739,26033]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[25619,25913]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[25724,26018]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[25721,26015]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[25724,26018]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[25675,25969]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[25694,25988]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[25701,25995]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[25691,25985]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[25699,25993]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[25721,26015]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[25739,26033]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[25733,26027]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[25702,25996]]]},{"id":"VCWRFXPALK","sequence":"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCTTG","gaps":{},"mutate":[[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[]]],"insert":[[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[]]],"delete":[[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[]]],"positions":[[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[19252,19525]]]},{"id":"GSXRICVFNW","sequence":"GAGGCCACCGGAGTACGATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAA","gaps":{"8":1,"116":118},"mutate":[[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[17,"T"]]]],"insert":[[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[[8,0],"G"]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[[8,0],"T"],[[116,51],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[[[116,0],"TAGCTTCTTAGGAGAATAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[[[116,0],"TAGCTTCTTAGGAGAATAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"]]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[[8,0],"G"],[[116,51],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[[116,1],"NNNNNNNNN"],[[8,0],"T"],[[116,51],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[[[116,0],"TAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"]]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[[8,0],"T"],[[116,0],"T"]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[[116,1],"GTGCTATCC"],[[8,0],"T"],[[116,51],"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"]]]],"delete":[[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[[85,32],[1,25]]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[103,14]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[[1,25]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[[1,25]]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[103,14]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[94,23]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[[1,25]]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[94,23]]]],"positions":[[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[29699,0]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[29701,0]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[29599,0]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[29683,0]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[29683,0]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[29721,0]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[29721,0]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[29655,0]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[29734,0]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[29715,0]]]},{"id":"LVHHZKTHFV","sequence":"CAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN","gaps":{"59":37},"mutate":[[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]]],"insert":[[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[[59,0],"GTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAG"]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]]],"delete":[[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[99,37]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[]]],"positions":[[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[29739,0]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[29736,0]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[29748,0]]]},{"id":"IYZCSMYUUN","sequence":"TAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTTGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGG","gaps":{},"mutate":[[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]]],"insert":[[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]]],"delete":[[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]]],"positions":[[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[1,100]]]},{"id":"ZOTARKVMSA","sequence":"TGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAATTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAGGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTATAACCAGAACTCAATCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGATGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGATGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTTCGCACCATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTAATGAAGTCAGCCAAATCGCTCCAGGGCAAACTGGAAATATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAAGCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCGACCCACTTATGGTGTTGGTCACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAATATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAATATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCATAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTA","gaps":{"2421":1,"2069":9,"2632":9,"2423":3,"6127":3},"mutate":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[[2628,"A"],[3446,"C"],[3324,"C"],[3459,"C"],[5844,"T"],[3423,"G"],[3323,"C"],[6031,"A"],[2745,"T"],[3370,"G"],[2537,"G"],[3694,"T"],[4428,"A"],[3447,"C"],[3027,"C"],[3326,"A"],[2238,"C"],[3005,"C"],[5617,"C"],[2428,"A"],[3092,"A"],[3551,"A"]]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[[2537,"G"],[2628,"A"],[3446,"C"],[3324,"C"],[2529,"T"],[3459,"C"],[3447,"C"],[2653,"T"],[3027,"C"],[2238,"C"],[3323,"C"],[3326,"A"],[3423,"G"],[5844,"T"],[3005,"C"],[2745,"T"],[2428,"A"],[3092,"A"],[3370,"G"]]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[[2628,"A"],[3446,"C"],[3324,"C"],[3355,"T"],[3459,"C"],[3447,"C"],[3027,"C"],[3326,"A"],[2238,"C"],[3323,"C"],[3356,"C"],[5844,"T"],[3005,"C"],[2745,"T"],[2428,"A"],[3092,"A"],[3370,"G"]]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[[2537,"G"],[2628,"A"],[3446,"C"],[3324,"C"],[3459,"C"],[4790,"G"],[3447,"C"],[3027,"C"],[3326,"A"],[2238,"C"],[3323,"C"],[5844,"T"],[3005,"C"],[2745,"T"],[2428,"A"],[3092,"A"],[3370,"G"]]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[[4376,"G"],[3102,"C"],[392,"C"],[1118,"K"],[2655,"T"],[3423,"C"],[1119,"Y"],[3203,"G"],[1124,"S"],[3214,"A"],[3953,"C"],[3483,"A"],[1121,"W"],[4032,"C"],[106,"K"],[109,"M"],[112,"M"],[3241,"G"],[94,"R"],[1123,"S"],[3491,"A"],[3006,"G"],[107,"K"],[1127,"K"],[3503,"T"],[4897,"T"],[1692,"C"],[102,"W"],[3114,"C"],[1126,"R"],[104,"W"],[1116,"R"],[101,"W"],[1129,"M"],[4852,"A"],[2415,"G"],[4027,"T"],[6012,"C"],[492,"A"],[1122,"K"],[3310,"T"],[4282,"C"],[99,"Y"],[98,"W"],[96,"W"],[3420,"G"],[95,"K"],[1114,"M"],[3107,"T"],[1128,"S"],[103,"R"],[3019,"M"],[3116,"A"],[3441,"A"],[93,"R"],[2055,"C"],[5428,"C"],[5516,"T"],[97,"Y"],[1120,"R"],[100,"W"],[1115,"Y"]]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[[4931,"T"],[392,"C"],[2274,"T"],[3326,"A"],[3101,"C"],[4092,"T"],[3116,"A"],[3630,"A"],[492,"A"],[2055,"C"],[2190,"T"],[3203,"G"],[3214,"A"],[3476,"A"],[4558,"A"],[3468,"G"]]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[2628,"A"],[3446,"C"],[3324,"C"],[3459,"C"],[2145,"G"],[5355,"C"],[5844,"T"],[3423,"G"],[3323,"C"],[3338,"G"],[2536,"T"],[3370,"G"],[5949,"T"],[2537,"G"],[321,"T"],[3027,"C"],[3326,"A"],[2238,"C"],[3447,"C"],[3005,"C"],[2748,"G"],[2428,"A"],[3092,"A"],[3551,"T"]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[[4931,"T"],[392,"C"],[2274,"T"],[3027,"A"],[3101,"C"],[3326,"A"],[3116,"A"],[3630,"A"],[492,"A"],[2055,"C"],[2190,"T"],[3203,"G"],[3214,"A"],[3476,"A"],[4558,"A"],[3468,"G"]]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[[2628,"A"],[3446,"C"],[3324,"C"],[3355,"T"],[3459,"C"],[3708,"T"],[3052,"T"],[5844,"T"],[3323,"C"],[3356,"C"],[3953,"C"],[2745,"T"],[3370,"G"],[2537,"G"],[3447,"C"],[3027,"C"],[3326,"A"],[2238,"C"],[4101,"T"],[3005,"C"],[2426,"A"],[2428,"A"],[3092,"A"]]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[[4376,"G"],[3102,"C"],[392,"C"],[4852,"A"],[2415,"G"],[4027,"T"],[3423,"C"],[4838,"A"],[5212,"T"],[6012,"C"],[492,"A"],[3203,"G"],[3214,"A"],[3310,"T"],[3483,"A"],[3953,"C"],[4282,"C"],[5991,"T"],[585,"T"],[3241,"G"],[3420,"G"],[2274,"T"],[3006,"G"],[3503,"T"],[3027,"A"],[3107,"T"],[3440,"A"],[3441,"A"],[3116,"A"],[4897,"T"],[5428,"C"],[839,"T"],[2055,"C"],[3114,"C"],[2812,"A"]]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[[2628,"G"],[3446,"G"],[5291,"T"],[3321,"C"],[2059,"G"],[3345,"G"],[3370,"A"]]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[1906,"N"],[1860,"N"],[1881,"N"],[1923,"N"],[3102,"C"],[1897,"N"],[5070,"T"],[1910,"N"],[1900,"N"],[3423,"C"],[1879,"N"],[1912,"N"],[1882,"N"],[1867,"N"],[1878,"N"],[3006,"G"],[4897,"T"],[2464,"C"],[1587,"N"],[1863,"N"],[3114,"C"],[1899,"N"],[1901,"N"],[1865,"N"],[1590,"N"],[2415,"G"],[1877,"N"],[1895,"N"],[1874,"N"],[1917,"N"],[1921,"N"],[1886,"N"],[6012,"C"],[1591,"N"],[1494,"T"],[1903,"N"],[3420,"G"],[2274,"T"],[1894,"N"],[1915,"N"],[3107,"T"],[4193,"G"],[1592,"N"],[2055,"C"],[1914,"N"],[1883,"N"],[1884,"N"],[4376,"G"],[392,"C"],[1905,"N"],[1909,"N"],[1887,"N"],[1864,"N"],[3203,"G"],[3214,"A"],[1902,"N"],[3483,"A"],[3953,"C"],[1871,"N"],[4870,"T"],[1862,"N"],[1888,"N"],[3241,"G"],[3491,"A"],[1904,"N"],[1872,"N"],[1861,"N"],[1907,"N"],[1911,"N"],[1869,"N"],[3503,"T"],[1918,"N"],[1868,"N"],[1892,"N"],[1896,"N"],[1880,"N"],[1875,"N"],[1893,"N"],[1913,"N"],[1873,"N"],[4852,"A"],[2052,"G"],[1870,"N"],[1920,"N"],[4027,"T"],[1922,"N"],[492,"A"],[3310,"A"],[4282,"C"],[1876,"N"],[1889,"N"],[1586,"N"],[4066,"T"],[1588,"N"],[1885,"N"],[1589,"N"],[1866,"N"],[3441,"A"],[1898,"N"],[3116,"A"],[5428,"C"],[4053,"T"],[1891,"N"],[1919,"N"],[1890,"N"],[1908,"N"],[1916,"N"]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[3102,"C"],[2180,"N"],[2199,"N"],[2375,"N"],[2183,"N"],[2392,"N"],[2230,"N"],[4838,"A"],[2231,"N"],[2182,"N"],[2237,"N"],[2239,"N"],[4032,"G"],[2191,"N"],[2187,"N"],[2236,"N"],[2193,"N"],[2256,"N"],[2406,"N"],[2259,"N"],[3006,"G"],[2377,"N"],[4897,"T"],[2198,"N"],[2238,"N"],[2371,"N"],[2174,"N"],[2380,"N"],[2179,"N"],[2386,"N"],[3114,"C"],[2374,"N"],[2173,"N"],[2234,"N"],[2401,"N"],[2390,"N"],[2398,"N"],[2184,"N"],[2220,"N"],[2177,"N"],[2223,"N"],[2396,"N"],[5897,"T"],[2415,"N"],[2244,"N"],[2185,"N"],[2385,"N"],[6012,"C"],[2394,"N"],[2416,"N"],[2383,"N"],[2384,"N"],[2226,"N"],[2402,"N"],[2169,"N"],[3420,"G"],[2196,"N"],[2188,"N"],[2189,"N"],[2227,"N"],[2274,"T"],[2242,"N"],[2382,"N"],[3107,"T"],[2369,"N"],[2387,"N"],[2055,"G"],[2388,"N"],[2407,"N"],[2224,"N"],[2411,"N"],[2243,"N"],[2221,"N"],[4376,"G"],[392,"C"],[2228,"N"],[2241,"N"],[2397,"N"],[2408,"N"],[3203,"G"],[2381,"N"],[3214,"A"],[3953,"C"],[3483,"A"],[2168,"N"],[2175,"N"],[2373,"N"],[2225,"N"],[2376,"N"],[3241,"G"],[3491,"A"],[2405,"N"],[2229,"N"],[2400,"N"],[2170,"N"],[2172,"N"],[2192,"N"],[2219,"N"],[2393,"N"],[2414,"N"],[3503,"T"],[2195,"N"],[2417,"N"],[2379,"N"],[2389,"N"],[4852,"A"],[2186,"N"],[2218,"N"],[2368,"N"],[2412,"N"],[2194,"N"],[2222,"N"],[2176,"N"],[492,"A"],[2240,"N"],[2404,"N"],[2410,"N"],[2378,"N"],[3310,"T"],[4027,"T"],[4282,"C"],[2395,"N"],[2399,"N"],[2413,"N"],[2403,"N"],[2370,"N"],[3441,"A"],[2181,"N"],[2257,"N"],[2258,"N"],[3116,"A"],[2171,"N"],[2190,"N"],[3345,"G"],[2391,"N"],[2418,"N"],[5428,"C"],[2178,"N"],[2197,"N"],[2235,"N"],[2409,"N"],[2372,"N"]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[[2628,"G"],[3446,"G"],[2430,"T"],[2145,"G"],[3321,"C"],[3027,"C"],[2059,"G"],[3345,"G"],[3370,"A"]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[4376,"G"],[3102,"C"],[392,"C"],[4852,"A"],[5897,"T"],[2415,"R"],[4027,"T"],[4838,"R"],[6012,"C"],[492,"A"],[3203,"G"],[3214,"A"],[3310,"T"],[3483,"A"],[3953,"C"],[4282,"C"],[451,"T"],[4032,"G"],[3241,"G"],[3491,"A"],[3420,"G"],[3006,"G"],[3503,"T"],[3441,"A"],[3107,"T"],[3345,"G"],[4897,"T"],[3116,"A"],[5428,"C"],[5873,"T"],[2055,"G"],[3114,"C"],[1445,"T"]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[[2628,"G"],[3446,"G"],[5861,"T"],[3345,"G"]]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[4376,"G"],[3102,"C"],[392,"C"],[4852,"A"],[2415,"G"],[4027,"T"],[3423,"C"],[6012,"C"],[492,"A"],[3203,"G"],[3214,"A"],[3310,"T"],[3483,"A"],[3953,"C"],[4282,"C"],[5991,"T"],[4032,"C"],[3241,"G"],[3491,"A"],[3006,"G"],[3503,"T"],[3441,"A"],[3107,"T"],[4897,"T"],[3116,"A"],[5428,"C"],[6138,"T"],[2055,"C"],[3114,"C"],[6034,"T"]]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[4376,"G"],[3102,"C"],[392,"C"],[4852,"A"],[5897,"T"],[2415,"G"],[4027,"T"],[1806,"C"],[4838,"A"],[6012,"C"],[492,"A"],[3203,"G"],[3214,"A"],[3310,"T"],[3483,"A"],[3953,"C"],[4282,"C"],[5780,"T"],[4032,"G"],[3241,"G"],[3491,"A"],[3420,"G"],[3006,"G"],[2842,"T"],[3441,"A"],[3107,"T"],[3345,"G"],[3503,"T"],[3116,"A"],[4897,"T"],[5428,"C"],[2055,"G"],[3114,"C"]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[[2182,"T"],[2628,"G"],[3446,"G"],[1140,"T"],[3027,"C"],[3345,"G"]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[4376,"G"],[3102,"C"],[392,"C"],[4566,"T"],[4852,"A"],[5897,"T"],[4027,"T"],[5879,"C"],[4838,"A"],[6012,"C"],[492,"A"],[3203,"G"],[3214,"A"],[3310,"T"],[3483,"A"],[3953,"C"],[4282,"C"],[5944,"T"],[4032,"G"],[3241,"G"],[3491,"A"],[3420,"G"],[4021,"C"],[2274,"T"],[3006,"G"],[1547,"T"],[3441,"A"],[3107,"T"],[3345,"G"],[3503,"T"],[3116,"A"],[4897,"T"],[5428,"C"],[2055,"G"],[3114,"C"],[2926,"T"]]]],"insert":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[[[2069,0],"TACCCCCTG"],[[2423,0],"ATT"]]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[[[2069,0],"TACCCCCTG"],[[2632,0],"GAGCCAGAA"]]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[[[2069,0],"TACCCCCTG"],[[2632,0],"GAGCCAGAA"]]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[[[2421,0],"C"],[[2069,0],"TACCCCCTG"],[[2423,0],"CTA"]]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[[[2069,0],"TACCCCCTG"]]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[[2069,0],"TACCCCCTG"]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[[2069,0],"TACCCCCTG"]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[[[2069,0],"TACCCCCTG"]]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[[2069,0],"TACCCCCTG"]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[[2069,0],"TACCCCCTG"],[[6127,0],"CCC"]]]],"delete":[[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[[2419,3]]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[[2419,3]]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[[2419,3]]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[[2419,3]]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[[2419,3]]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[[2193,6],[2415,9],[2622,3]]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[[2419,3]]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[[2193,6],[2415,9],[2622,3]]],[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[[2419,3]]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[[2419,1]]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[[2193,6]]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[[2457,6]]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[[2193,6]]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[[2457,6],[1,7]]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[[2193,6]]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[[2457,6]]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[[2193,6]]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[[2457,6]]]],"positions":[[{"name":"USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27","number":1,"strand":true},[19477,25645]],[{"name":"England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09","number":1,"strand":true},[19555,25723]],[{"name":"England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05","number":1,"strand":true},[19555,25732]],[{"name":"England/NORW-30F5277/2021|OV313316.1|2021-11-17","number":1,"strand":true},[19571,25738]],[{"name":"Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29","number":1,"strand":true},[19526,25700]],[{"name":"England/MILK-338D3D9/2022|OV784995.1|2022-01-21","number":1,"strand":true},[19552,25723]],[{"name":"Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12","number":1,"strand":true},[19564,25744]],[{"name":"USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31","number":1,"strand":true},[19518,25698]],[{"name":"England/QEUH-326F56B2/2023|OX452944.1|2023-03-01","number":1,"strand":true},[19555,25723]],[{"name":"USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24","number":1,"strand":true},[19509,25674]],[{"name":"USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09","number":1,"strand":true},[19505,25673]],[{"name":"USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03","number":1,"strand":true},[19508,25676]],[{"name":"USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03","number":1,"strand":true},[19453,25618]],[{"name":"Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28","number":1,"strand":true},[19510,25690]],[{"name":"USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17","number":1,"strand":true},[19553,25718]],[{"name":"England/ALDP-337E0B6/2022|OV828376.1|2022-01-20","number":1,"strand":true},[19549,25720]],[{"name":"Denmark/DCGC-129060/2021|OV917631.1|2021-07-11","number":1,"strand":true},[19564,25738]],[{"name":"USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07","number":1,"strand":true},[19510,25693]],[{"name":"USA/LA-EVTL19995/2023|OR649055.1|2023-09-03","number":1,"strand":true},[19533,25701]],[{"name":"USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30","number":1,"strand":true},[19555,25720]]]}]} \ No newline at end of file diff --git a/test/sars_20.nwk b/test/sars_20.nwk new file mode 100644 index 0000000..fe4ce7d --- /dev/null +++ b/test/sars_20.nwk @@ -0,0 +1 @@ +((((USA/NY-CDC-LC0906971/2022|OP671333.1|2022-10-03,USA/CA-CDC-VSX-A078011/2023|OQ582387.1|2023-02-17),(USA/MA-CDCBI-CRSP_DVHKMC6PPRXWPTXC/2022|OP606163.1|2022-09-24,(USA/FL-CDC-STM-H5KCN9XBD/2022|OP732003.1|2022-09-30,USA/CA-CDC-FG-019030/2021|OK212520.1|2021-03-31))),((Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12,Denmark/DCGC-129060/2021|OV917631.1|2021-07-11),(England/NORW-30F5277/2021|OV313316.1|2021-11-17,(Germany/Molecular_surveillance_of_SARS-CoV-2_in_Germany/2021|OV412394.1|2021-09-29,England/MILK-2D4CB8C/2021|OV443913.1|2021-12-05)))),(((Germany/IMS-10023-CVDP-00098/2021|OU080963.1|2021-01-28,USA/ME-CDC-QDX25704339/2021|OK252479.1|2021-06-07),(England/ALDP-337E0B6/2022|OV828376.1|2022-01-20,(England/MILK-338D3D9/2022|OV784995.1|2022-01-21,England/CLIMB-CM7YMICA/2023|OY753779.1|2023-10-09))),((USA/CA-LACPHL-AY03056/2023|OR736613.1|2023-09-27,USA/CA-CDC-QDX86168728/2023|OR708425.1|2023-10-09),(USA/UT-UPHL-230926445744/2023|OR622733.1|2023-09-03,(USA/LA-EVTL19995/2023|OR649055.1|2023-09-03,England/QEUH-326F56B2/2023|OX452944.1|2023-03-01))))); From 5aac6a52353e657b6426c6987114c73b79889cbf Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 18 Nov 2024 12:09:28 -0800 Subject: [PATCH 062/103] code with interative mode --- src/annotate.cpp | 2 +- src/fasta.cpp | 462 +++++++++++ src/fitchSankoff.cpp | 2 +- src/panman.cpp | 51 +- src/panman.hpp | 4 + src/panmanUtils.cpp | 1860 ++++++++++++++++++++++-------------------- 6 files changed, 1477 insertions(+), 904 deletions(-) diff --git a/src/annotate.cpp b/src/annotate.cpp index 54da675..ff219bf 100644 --- a/src/annotate.cpp +++ b/src/annotate.cpp @@ -29,7 +29,7 @@ void panmanUtils::Tree::annotate(std::ifstream& fin) { } if(allNodes.find(nodeId) == allNodes.end()) { - std::cout << "Node ID not found. Line: " << nodeId << " [" << line << "]" << std::endl; + // std::cout << "Node ID not found. Line: " << nodeId << " [" << line << "]" << std::endl; // for (auto a: allNodes) { // std::cout << a-> // } diff --git a/src/fasta.cpp b/src/fasta.cpp index de2d7e9..1cc08bf 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -1276,3 +1276,465 @@ void panmanUtils::Tree::printFASTAFromGFA(std::ifstream& fin, std::ofstream& fou } } } + +void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, sequence_t& sequence, + blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { + + // For reversing block mutations - primary block id, secondary block id, old mutation, old strand, new mutation, new strand + std::vector< std::tuple< int32_t, int32_t, bool, bool, bool, bool > > blockMutationInfo (root->blockMutation.size()); + + // Block Mutations + tbb::parallel_for((size_t)0, root->blockMutation.size(), [&](size_t i) { + // for(int i=0; iblockMutation.size(); i++) { + auto mutation = root->blockMutation[i]; + int32_t primaryBlockId = mutation.primaryBlockId; + int32_t secondaryBlockId = mutation.secondaryBlockId; + bool type = mutation.blockMutInfo; + bool inversion = mutation.inversion; + + if (secondaryBlockId != -1) { + std::cout << "Error: Block Secondary ID is not -1" << std::endl; + exit(0); + } + + if(type == 1) { + // insertion + bool oldStrand; + bool oldMut; + if(secondaryBlockId != -1) { + oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; + oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; + blockExists[primaryBlockId].second[secondaryBlockId] = true; + + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId].second[secondaryBlockId] = !inversion; + } else { + oldStrand = blockStrand[primaryBlockId].first; + oldMut = blockExists[primaryBlockId].first; + blockExists[primaryBlockId].first = true; + + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId].first = !inversion; + } + blockMutationInfo[i] = std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, true, !inversion); + + + } else { + bool oldMut; + bool oldStrand; + if(inversion) { + // This means that this is not a deletion, but instead an inversion + if(secondaryBlockId != -1) { + oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; + oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; + blockStrand[primaryBlockId].second[secondaryBlockId] = !oldStrand; + } else { + oldStrand = blockStrand[primaryBlockId].first; + oldMut = blockExists[primaryBlockId].first; + blockStrand[primaryBlockId].first = !oldStrand; + } + if(oldMut != true) { + std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + } + blockMutationInfo[i] = std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, oldMut, !oldStrand); + } else { + // Actually a deletion + + if(secondaryBlockId != -1) { + oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; + oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; + blockExists[primaryBlockId].second[secondaryBlockId] = false; + + // resetting strand to true during deletion + blockStrand[primaryBlockId].second[secondaryBlockId] = true; + } else { + oldStrand = blockStrand[primaryBlockId].first; + oldMut = blockExists[primaryBlockId].first; + blockExists[primaryBlockId].first = false; + + // resetting strand to true during deletion + blockStrand[primaryBlockId].first = true; + } + } + blockMutationInfo[i] = std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, false, true); + + } + }); + + // For backtracking. primaryBlockId, secondaryBlockId, pos, gapPos, (oldVal, newVal) in substitution, ('-', newVal) in insertion, (oldVal, '-') in deletion + std::vector< std::vector > > mutationInfo(root->nucMutation.size()); + + // Nuc mutations + tbb::parallel_for((size_t)0, root->nucMutation.size(), [&](size_t i) { + int32_t primaryBlockId = root->nucMutation[i].primaryBlockId; + int32_t secondaryBlockId = root->nucMutation[i].secondaryBlockId; + + // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { + int32_t nucPosition = root->nucMutation[i].nucPosition; + int32_t nucGapPosition = root->nucMutation[i].nucGapPosition; + uint32_t type = (root->nucMutation[i].mutInfo & 0x7); + char newVal = '-'; + + if(type < 3) { + // Either S, I or D + int len = ((root->nucMutation[i].mutInfo) >> 4); + mutationInfo[i].resize(len); + if(primaryBlockId >= sequence.size()) { + std::cout << primaryBlockId << " " << sequence.size() << std::endl; + } + + if(type == panmanUtils::NucMutationType::NS) { + // Substitution + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = newVal; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal); + } + + } + } else { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].first[nucPosition+j].first = newVal; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal); + } + } + } + } else if(type == panmanUtils::NucMutationType::NI) { + // Insertion + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition + j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = newVal; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal); + } + + } + } else { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId].first[nucPosition+j].first = newVal; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal); + } + } + } + } else if(type == panmanUtils::NucMutationType::ND) { + // Deletion + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j]; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-'); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = '-'; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-'); + } + + } + } else { + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = '-'; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-'); + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; + sequence[primaryBlockId].first[nucPosition+j].first = '-'; + mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-'); + } + } + } + } + } else { + mutationInfo[i].resize(1); + if(type == panmanUtils::NucMutationType::NSNPS) { + // SNP Substitution + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = newVal; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); + } else { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = newVal; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); + } + } else { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = newVal; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); + } else { + char oldVal = sequence[primaryBlockId].first[nucPosition].first; + sequence[primaryBlockId].first[nucPosition].first = newVal; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); + } + } + } else if(type == panmanUtils::NucMutationType::NSNPI) { + // SNP Insertion + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = newVal; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); + } else { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = newVal; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); + } + } else { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = newVal; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); + } else { + char oldVal = sequence[primaryBlockId].first[nucPosition].first; + sequence[primaryBlockId].first[nucPosition].first = newVal; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); + } + } + } else if(type == panmanUtils::NucMutationType::NSNPD) { + // SNP Deletion + if(secondaryBlockId != -1) { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = '-'; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-'); + } else { + char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; + sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = '-'; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-'); + } + } else { + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; + sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = '-'; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-'); + } else { + char oldVal = sequence[primaryBlockId].first[nucPosition].first; + sequence[primaryBlockId].first[nucPosition].first = '-'; + mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-'); + } + } + } + } + }); + // } + + if(root->children.size() == 0 || rootSeq) { + // Print sequence + + fout << '>' << root->identifier << std::endl; + + int offset = 0; + if(!aligned && circularSequences.find(root->identifier) != circularSequences.end()) { + // If MSA is to be printed, offset doesn't matter + offset = circularSequences[root->identifier]; + } + sequence_t sequencePrint = sequence; + blockExists_t blockExistsPrint = blockExists; + blockStrand_t blockStrandPrint = blockStrand; + + if(rotationIndexes.find(root->identifier) != rotationIndexes.end() && rotationIndexes[root->identifier] != 0) { + int ctr = -1, rotInd = 0; + for(size_t i = 0; i < blockExistsPrint.size(); i++) { + if(blockExistsPrint[i].first) { + ctr++; + } + if(ctr == rotationIndexes[root->identifier]) { + rotInd = i; + break; + } + } + // std::cout << "rotating" << std::endl; + rotate(sequencePrint.begin(), sequencePrint.begin() + rotInd, sequencePrint.end()); + rotate(blockExistsPrint.begin(), blockExistsPrint.begin() + rotInd, blockExistsPrint.end()); + rotate(blockStrandPrint.begin(), blockStrandPrint.begin() + rotInd, blockStrandPrint.end()); + } + + if(sequenceInverted.find(root->identifier) != sequenceInverted.end() && sequenceInverted[root->identifier]) { + // std::cout << "inverting" << std::endl; + reverse(sequencePrint.begin(), sequencePrint.end()); + reverse(blockExistsPrint.begin(), blockExistsPrint.end()); + reverse(blockStrandPrint.begin(), blockStrandPrint.end()); + } + if (allIndex) { + panmanUtils::printSubsequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, panMATStart, panMATEnd, aligned, fout, offset); + } else { + panmanUtils::printSequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); + } + } else { + // DFS on children + for(panmanUtils::Node* child: root->children) { + printFASTAHelperNew(child, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); + + } + } + + + // Undo block mutations when current node and its subtree have been processed + // for(auto it = blockMutationInfo.rbegin(); it != blockMutationInfo.rend(); it++) { + tbb::parallel_for((size_t)0, blockMutationInfo.size(), [&](size_t i) { + auto mutation = blockMutationInfo[i]; + if(std::get<1>(mutation) != -1) { + blockExists[std::get<0>(mutation)].second[std::get<1>(mutation)] = std::get<2>(mutation); + blockStrand[std::get<0>(mutation)].second[std::get<1>(mutation)] = std::get<3>(mutation); + } else { + blockExists[std::get<0>(mutation)].first = std::get<2>(mutation); + blockStrand[std::get<0>(mutation)].first = std::get<3>(mutation); + } + }); + + // Undo nuc mutations when current node and its subtree have been processed + // for(auto it = mutationInfo.rbegin(); it != mutationInfo.rend(); it++) { + tbb::parallel_for((size_t)0, mutationInfo.size(), [&](size_t i) { + auto mutationList = mutationInfo[i]; + for(auto jt = mutationList.rbegin(); jt != mutationList.rend(); jt++) { + auto mutation = *jt; + if(std::get<1>(mutation) != -1) { + if(std::get<3>(mutation) != -1) { + sequence[std::get<0>(mutation)].second[std::get<1>(mutation)][std::get<2>(mutation)].second[std::get<3>(mutation)] = std::get<4>(mutation); + } else { + sequence[std::get<0>(mutation)].second[std::get<1>(mutation)][std::get<2>(mutation)].first = std::get<4>(mutation); + } + } else { + if(std::get<3>(mutation) != -1) { + sequence[std::get<0>(mutation)].first[std::get<2>(mutation)].second[std::get<3>(mutation)] = std::get<4>(mutation); + } else { + sequence[std::get<0>(mutation)].first[std::get<2>(mutation)].first = std::get<4>(mutation); + } + } + } + }); +} + + +void panmanUtils::Tree::printFASTANew(std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { + // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. + std::vector< std::pair< std::vector< std::pair< char, std::vector< char > > >, std::vector< std::vector< std::pair< char, std::vector< char > > > > > > sequence(blocks.size() + 1); + std::vector< std::pair< bool, std::vector< bool > > > blockExists(blocks.size() + 1, {false, {}}); + blockStrand_t blockStrand(blocks.size() + 1, {true, {}}); + + // Assigning block gaps + // for(size_t i = 0; i < blockGaps.blockPosition.size(); i++) { + tbb::parallel_for((size_t)0, blockGaps.blockPosition.size(), [&](size_t i) { + sequence[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i]); + blockExists[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i], false); + blockStrand[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i], true); + }); + // } + + int32_t maxBlockId = 0; + + // Create consensus sequence of blocks + // for(size_t i = 0; i < blocks.size(); i++) { + tbb::parallel_for((size_t)0, blocks.size(), [&](size_t i) { + int32_t primaryBlockId = ((int32_t)blocks[i].primaryBlockId); + int32_t secondaryBlockId = ((int32_t)blocks[i].secondaryBlockId); + + // maxBlockId = std::max(maxBlockId, primaryBlockId); + + for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { + bool endFlag = false; + for(size_t k = 0; k < 8; k++) { + const int nucCode = (((blocks[i].consensusSeq[j]) >> (4*(7 - k))) & 15); + + if(nucCode == 0) { + endFlag = true; + break; + } + const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); + + if(secondaryBlockId != -1) { + sequence[primaryBlockId].second[secondaryBlockId].push_back({nucleotide, {}}); + } else { + sequence[primaryBlockId].first.push_back({nucleotide, {}}); + } + } + + if(endFlag) { + break; + } + } + + // End character to incorporate for gaps at the end + if(secondaryBlockId != -1) { + sequence[primaryBlockId].second[secondaryBlockId].push_back({'x', {}}); + } else { + sequence[primaryBlockId].first.push_back({'x', {}}); + } + }); + // } + // std::cout << maxBlockId << " " << blocks.size() << std::endl; + // sequence.resize(maxBlockId + 1); + // blockExists.resize(maxBlockId + 1); + // blockStrand.resize(maxBlockId + 1); + + // Assigning nucleotide gaps in blocks + // for(size_t i = 0; i < gaps.size(); i++) { + tbb::parallel_for((size_t)0, gaps.size(), [&](size_t i) { + int32_t primaryBId = (gaps[i].primaryBlockId); + int32_t secondaryBId = (gaps[i].secondaryBlockId); + + for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + int len = gaps[i].nucGapLength[j]; + int pos = gaps[i].nucPosition[j]; + if(secondaryBId != -1) { + sequence[primaryBId].second[secondaryBId][pos].second.resize(len, '-'); + } else { + sequence[primaryBId].first[pos].second.resize(len, '-'); + } + } + }); + // } + + // Run depth first traversal to extract sequences + + printFASTAHelperNew(root, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); + +} \ No newline at end of file diff --git a/src/fitchSankoff.cpp b/src/fitchSankoff.cpp index 9c754af..022d52d 100644 --- a/src/fitchSankoff.cpp +++ b/src/fitchSankoff.cpp @@ -30,7 +30,7 @@ int panmanUtils::Tree::nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states, int refState) { if(node->children.size() == 0) { if(states.find(node->identifier) == states.end()) { - std::cerr << "Node ID not found" << std::endl; + // std::cerr << "Node ID not found" << std::endl; return states[node->identifier] = 0; } return states[node->identifier]; diff --git a/src/panman.cpp b/src/panman.cpp index 09060ff..7242dc5 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -3377,18 +3377,51 @@ void panmanUtils::Tree::printMutations(std::ostream& fout) { void panmanUtils::Tree::printNodePaths(std::ostream& fout) { - for (auto &u: allNodes) { - Node* it = u.second; + // for (auto &u: allNodes) { + // Node* it = u.second; - while(it != root) { - std::cout << it->identifier << "\t"; - it = it->parent; - if (it != root) std::cout << "<\t"; - else std::cout << "\n"; - } + // while(it != root) { + // std::cout << it->identifier << "\t"; + // it = it->parent; + // if (it != root) std::cout << "<\t"; + // else std::cout << "\n"; + // } + // } + string name; + std::cout << "Enter sequence name:"; + std::cin >> name; + + std::string positionString; + int position; + std::cout << "Enter position:"; + std::cin >> positionString; + position = std::stoi(positionString); + + Node * currentNode = allNodes[name]; + while (true){ + for (auto &n: currentNode->nucMutation){ + if (n.nucPosition==position){ + std::cout << " >> " << currentNode->identifier << ": " << (getNucleotideFromCode(n.nucs&0xF)) << std::endl; + break; + } else if (position > n.nucGapPosition && position - n.nucPosition < 6) { + int len = (n.mutInfo>>4)&0xF; + if (n.nucPosition+len>position) { + int itr = position - n.nucPosition; + int nuc = n.nucs; + while (itr>0) { + nuc = nuc>>4; + itr--; + } + std::cout << " >(" << n.nucPosition << ", " << len << ", " << (NucMutationType)(n.mutInfo&0xF) << ")" << currentNode->identifier << ": " << (getNucleotideFromCode(nuc&0xF)) << std::endl; + } + } + } + if (currentNode == root) break; + currentNode = currentNode->parent; } - + std::cout << "\n"; + return; } diff --git a/src/panman.hpp b/src/panman.hpp index a721859..9f3530b 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -308,6 +308,9 @@ class Tree { void printFASTAHelper(panmanUtils::Node* root, sequence_t& sequence, blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); + void printFASTAHelperNew(panmanUtils::Node* root, sequence_t& sequence, + blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, + bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); void printSingleNodeHelper(std::vector &nodeList, int nodeListIndex, sequence_t& sequence, blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart={-1,-1,-1,-1}, const std::tuple< int, int, int, int >& panMATEnd={-1,-1,-1,-1}); @@ -460,6 +463,7 @@ class Tree { void printSummary(std::ostream &out); void printBfs(Node* node = nullptr); void printFASTA(std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start={-1,-1,-1,-1}, const std::tuple &end={-1,-1,-1,-1}, bool allIndex = false); + void printFASTANew(std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start={-1,-1,-1,-1}, const std::tuple &end={-1,-1,-1,-1}, bool allIndex = false); void printSingleNode(std::ostream& fout, const sequence_t& sequence, const blockExists_t& blockExists, const blockStrand_t& blockStrand, std::string nodeIdentifier, std::tuple< int, int, int, int > &panMATStart, std::tuple< int, int, int, int > &panMATEnd); diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 17af31f..34be6ba 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -81,6 +81,8 @@ void checkFunction(panmanUtils::Tree *T) { return; } + + // program option description for building/loading a PanMAT into memory po::options_description globalDesc("panmanUtils Command Line Arguments"); po::positional_options_description globalPositionArgumentDesc; @@ -90,36 +92,37 @@ po::options_description summaryDesc("Summary Command Line Arguments"); po::options_description useDesc("Use Command Line Arguments"); po::options_description fastaDesc("FASTA Command Line Arguments"); po::positional_options_description fastaPositionArgumentDesc; -po::options_description mafDesc("MAF Writer Command Line Arguments"); -po::positional_options_description mafPositionArgumentDesc; -po::options_description writeDesc("MAT Writer Command Line Arguments"); -po::positional_options_description writePositionArgumentDesc; -po::options_description subtreeDesc("Subtree Extract Command Line Arguments"); -po::positional_options_description subtreePositionArgumentDesc; +po::options_description fastaAlignDesc("FASTA Command Line Arguments"); +po::positional_options_description fastaAlignPositionArgumentDesc; +po::options_description subnetDesc("Subnetwork Command Line Arguments"); +po::positional_options_description subnetPositionArgumentDesc; po::options_description vcfDesc("VCF writer Command Line Arguments"); po::positional_options_description vcfPositionArgumentDesc; -po::options_description annotateDesc("MAT Annotate Command Line Arguments"); +po::options_description gfaDesc("GFA writer Command Line Arguments"); +po::positional_options_description gfaPositionArgumentDesc; +po::options_description mafDesc("MAF Writer Command Line Arguments"); +po::positional_options_description mafPositionArgumentDesc; +po::options_description newickDesc("Newick Writer Command Line Arguments"); +po::positional_options_description newickPositionArgumentDesc; +po::options_description extendNewickDesc("Extended Newick Writer Command Line Arguments"); +po::positional_options_description extedNewickDescPositionArgumentDesc; +po::options_description annotateDesc("Annotate Command Line Arguments"); po::positional_options_description annotatePositionArgumentDesc; -po::options_description searchDesc("Search by annotation Command Line Arguments"); -po::positional_options_description searchPositionArgumentDesc; -po::options_description generateGFADesc("Generate GFA Command Line Arguments"); -po::positional_options_description generateGFAArgumentDesc; po::options_description rerootDesc("Reroot Command Line Arguments"); po::positional_options_description rerootArgumentDesc; -po::options_description substitutionsDesc("Substitutions Command Line Arguments"); -po::positional_options_description substitutionsArgumentDesc; -po::options_description aaTranslationDesc("Amino Acid Translation Command Line Arguments"); +po::options_description aaDesc("Amino Acid Translation Command Line Arguments"); po::positional_options_description aaTranslationArgumentDesc; -po::options_description segmentExtractDesc("Segment Extract Command Line Arguments"); -po::positional_options_description segmentExtractArgumentDesc; -po::options_description GFAToFASTADesc("GFA to Fasta writer Command Line Arguments"); -po::positional_options_description GFAToFASTAArgumentDesc; -po::options_description groupWriteDesc("Group MAT Writer Command Line Arguments"); -po::positional_options_description groupWritePositionArgumentDesc; -po::options_description sequenceExtractDesc("Sequence Extract Command Line Arguments"); -po::positional_options_description sequenceExtractPositionArgumentDesc; -po::options_description groupFastaDesc("Tree Group FASTA writer Command Line Arguments"); -po::positional_options_description groupFastaPositionArgumentDesc; +po::options_description createNetDesc("Create Network Command Line Arguments"); +po::positional_options_description createNetDescTranslationArgumentDesc; +po::options_description printMutDesc("Print Mutations Command Line Arguments"); +po::positional_options_description printMutPositionArgumentDesc; +po::options_description printPathDesc("Print Paths Command Line Arguments"); +po::positional_options_description printPathsArgumentDesc; +po::options_description indexDesc("Indexing Command Line Arguments"); +po::positional_options_description indexArgumentDesc; +po::options_description printRootDesc("Root Printer Command Line Arguments"); +po::positional_options_description printRootPositionArgumentDesc; + void setupOptionDescriptions() { // Global option descriptions @@ -151,19 +154,19 @@ void setupOptionDescriptions() { ("acr,q", "ACR method [fitch(default), mppa]") ("index",po::value< bool >(0), "Generating indexes and print sequence (passed as reference) between x:y") ("printRoot", "Print root sequence") - //("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") + ("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") ("low-mem-mode", "Perform Fitch Algrorithm in batch to save memory consumption") ("reference,n", po::value< std::string >(), "Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required)") ("start,s", po::value< int64_t >(), "Start coordinate of protein translation/Start coordinate for indexing") ("end,e", po::value< int64_t >(), "End coordinate of protein translation/End coordinate for indexing") ("treeID,d", po::value< std::string >(), "Tree ID, required for --vcf") + ("tree-group", po::value< std::vector< std::string > >()->multitoken(), "File paths of PMATs to generate tree group") ("input-file,i", po::value< std::string >(), "Path to the input file, required for --subnet, --annotate, and --create-network") ("output-file,o", po::value< std::string >(), "Prefix of the output file name") ("threads", po::value< std::int32_t >(), "Number of threads") // ("complexmutation-file", po::value< std::string >(), "File path of complex mutation file for tree group") - // ("tree-group", po::value< std::vector< std::string > >()->multitoken(), "File paths of PMATs to generate tree group") // ("panman-in", po::value< std::string >(), "Input file path for PanMAT Group") ; @@ -177,158 +180,78 @@ void setupOptionDescriptions() { ("index", po::value< size_t >()->required(), "PanMAT index") ; + summaryDesc.add_options() + ("output-file,o", po::value< std::string >(), "Output file name"); + // FASTA option descriptions fastaDesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ("aligned", "print in aligned format (MSA)") - ("parallel", "Whether we should execute in parallel or not") - ; - - // Adding output file as positional argument (doesn't require the --output-file tag) - fastaPositionArgumentDesc.add("output-file,o", -1); - - // MAF option descriptions - mafDesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ; - - // Adding output file as positional argument (doesn't require the --output-file tag) - mafPositionArgumentDesc.add("output-file,o", -1); - - // MAT Writer option descriptions - writeDesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ; - - // Adding output file as positional argument (doesn't require the --output-file tag) - writePositionArgumentDesc.add("output-file,o", -1); - - // Subtree Extract option descriptions - subtreeDesc.add_options() - ("help", "produce help message") - ("newick", po::value< bool >()->default_value(false), "just print newick string") - ("input-file", po::value< std::string >(), "Input file name if reading node IDs from file") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ("node-ids", po::value< std::vector< std::string > >()->multitoken(), "Node \ -IDs to extract") - ; - - // Adding output file as positional argument - subtreePositionArgumentDesc.add("output-file,o", -1); - - // Sequence Extract option descriptions - sequenceExtractDesc.add_options() - ("help", "produce help message") - ("list", po::value< std::vector< std::string > >()->multitoken()->required(), "Sequence names") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ; + ("output-file,o", po::value< std::string >(), "Output file name"); + + fastaAlignDesc.add_options() + ("output-file,o", po::value< std::string >(), "Output file name"); - // Adding output file as positional argument (doesn't require the --output-file tag) - sequenceExtractPositionArgumentDesc.add("output-file,o", -1); + subnetDesc.add_options() + ("input-file", po::value< std::string >(), "Input file name") + ("output-file,o", po::value< std::string >(), "Output file name"); - // VCF Writer option descriptions vcfDesc.add_options() - ("help", "produce help message") - ("reference", po::value< std::string >()->required(), "Sequence ID of the reference \ -sequence") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ("fasta-file", po::value< std::string >(), "FASTA file name if it should also be created \ - from VCF File. Mainly used to verify the correctness of VCF file") - ; - - // Adding output file as positional argument - vcfPositionArgumentDesc.add("output-file,o", -1); + ("treeID", po::value< std::int64_t >(), "Tree ID [default 0]") + ("reference", po::value< std::string >(), "Reference name") + ("output-file,o", po::value< std::string >(), "Output file name"); - // MAT Annotate option descriptions - annotateDesc.add_options() - ("help", "produce help message") - ("input-file", po::value< std::string >()->required(), "Name of the file containing \ -annotation info") - ; - - // Adding input file as positional argument - annotatePositionArgumentDesc.add("input-file", -1); - - // Search by annotation option descriptions - searchDesc.add_options() - ("help", "produce help message") - ("keywords", po::value< std::vector< std::string > >()->multitoken(), "list of keywords to \ -search for") - ; - - searchPositionArgumentDesc.add("keywords", -1); + gfaDesc.add_options() + ("treeID", po::value< std::int64_t >(), "Tree ID [default 0]") + ("output-file,o", po::value< std::string >(), "Output file name"); - // Generate GFA option descriptions - generateGFADesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ; + // MAF option descriptions + mafDesc.add_options() + ("treeID", po::value< std::int64_t >(), "Tree ID [default 0]") + ("output-file,o", po::value< std::string >(), "Output file name"); - generateGFAArgumentDesc.add("output-file,o", -1); + newickDesc.add_options() + ("output-file,o", po::value< std::string >(), "Output file name"); - // GFA to FASTA option descriptions - GFAToFASTADesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ("input-file", po::value< std::string >()->required(), "Input file name") - ; + extendNewickDesc.add_options() + ("output-file,o", po::value< std::string >(), "Output file name"); - GFAToFASTAArgumentDesc.add("output-file,o", -1); + annotateDesc.add_options() + ("treeID", po::value< std::int64_t >(), "Tree ID [default 0]") + ("input-file", po::value< std::string >(), "Input file name") + ("output-file,o", po::value< std::string >(), "Output file name"); rerootDesc.add_options() - ("help", "produce help message") - ("sequence-name", po::value< std::string >()->required(), "Name of sequence to reroot to") - ; - - rerootArgumentDesc.add("sequence-name", -1); - - substitutionsDesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), "Name of the output file") - ; - - substitutionsArgumentDesc.add("output-file,o", -1); - - aaTranslationDesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), - "Name of output file to store tsv file") - ("start", po::value< int64_t >()->required(), "Root coordinate to start transcription") - ("end", po::value< int64_t >()->required(), "Root coordinate to end transcription") - ; - - aaTranslationArgumentDesc.add("output-file,o", -1); - - segmentExtractDesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), - "Name of output file to store tsv file") - ("start", po::value< int64_t >()->required(), "Root coordinate to start extraction") - ("end", po::value< int64_t >()->required(), "Root coordinate to end extraction") - ; - - segmentExtractArgumentDesc.add("output-file,o", -1); - - // Tree Group FASTA option descriptions - groupFastaDesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ; - - // Adding output file as positional argument - groupFastaPositionArgumentDesc.add("output-file,o", -1); - - // Group MAT Writer option descriptions - groupWriteDesc.add_options() - ("help", "produce help message") - ("output-file,o", po::value< std::string >()->required(), "Output file name") - ; - - // Adding output file as positional argument - groupWritePositionArgumentDesc.add("output-file,o", -1); + ("treeID", po::value< std::int64_t >(), "Tree ID [default 0]") + ("reference", po::value< std::string >(), "Reference name") + ("output-file,o", po::value< std::string >(), "Output file name"); + + aaDesc.add_options() + ("treeID", po::value< std::int64_t >(), "Tree ID [default 0]") + ("start,s", po::value< int64_t >(), "Start coordinate of protein translation/Start coordinate for indexing") + ("end,e", po::value< int64_t >(), "End coordinate of protein translation/End coordinate for indexing") + ("output-file,o", po::value< std::string >(), "Output file name"); + + createNetDesc.add_options() + ("input-file", po::value< std::string >(), "File containing complex mutations") + ("tree-group", po::value< int64_t >(), "List of PanMATs") + ("output-file,o", po::value< std::string >(), "Output file name"); + + printMutDesc.add_options() + ("treeID", po::value< std::int64_t >(), "Tree ID [default 0]") + ("output-file,o", po::value< std::string >(), "Output file name"); + + printPathDesc.add_options() + ("treeID", po::value< std::int64_t >(), "Tree ID [default 0]") + ("output-file,o", po::value< std::string >(), "Output file name"); + + indexDesc.add_options() + ("reference", po::value< std::string >(), "Reference name") + ("start,s", po::value< int64_t >(), "Start coordinate of protein translation/Start coordinate for indexing") + ("end,e", po::value< int64_t >(), "End coordinate of protein translation/End coordinate for indexing") + ("index",po::value< bool >(0), "Generating indexes and print sequence (passed as reference) between x:y") + ("output-file,o", po::value< std::string >(), "Output file name"); + + printRootDesc.add_options() + ("output-file,o", po::value< std::string >(), "Output file name"); } void writePanMAN(po::variables_map &globalVm, panmanUtils::TreeGroup *TG) { @@ -388,600 +311,697 @@ void writePanMAN(po::variables_map &globalVm, panmanUtils::Tree *T) { } -void parseAndExecute(int argc, char* argv[]) { +void summary(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // If command was summary, print the summary of the PanMAT + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } - // Setup boost::program_options - setupOptionDescriptions(); + panmanUtils::TreeGroup tg = *TG; - // Initial command line arguments consisting of input file types - po::variables_map globalVm; - po::store(po::command_line_parser(argc, argv).options(globalDesc) - .positional(globalPositionArgumentDesc).allow_unregistered().run(), globalVm); - po::notify(globalVm); + auto summaryStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + panmanUtils::Tree *T = &tg.trees[i]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".summary"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); + T->printSummary(fout); - int threads = 16; - if (globalVm.count("threads")) threads = globalVm["threads"].as(); - tbb::task_scheduler_init init(threads); + if(globalVm.count("output-file")) outputFile.close(); + } + auto summaryEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds summaryTime = summaryEnd - summaryStart; + std::cout << "\nSummary creation time: " << summaryTime.count() << " nanoseconds\n"; +} - // If the data structure loaded into memory is a PanMAT, it is pointed to by T - panmanUtils::Tree *T = nullptr; +void fasta(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Print raw sequences to output file + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } - // If the data structure loaded into memory is a PanMAN, it is pointed to by TG - panmanUtils::TreeGroup *TG = nullptr; + panmanUtils::TreeGroup tg = *TG; - if(globalVm.count("help")) { - std::cout << globalDesc; - return; - } else if(globalVm.count("input-panmat")) { - // Load PanMAT file directly into memory + auto fastaStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + panmanUtils::Tree *T = &tg.trees[i]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".fasta"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); - std::string fileName = globalVm["input-panmat"].as< std::string >(); - std::ifstream inputFile(fileName, std::ios_base::in | std::ios_base::binary); - boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; + // T->printFASTAParallel(fout, false); + T->printFASTANew(fout, false, false); - auto treeBuiltStart = std::chrono::high_resolution_clock::now(); + if(globalVm.count("output-file")) outputFile.close(); + } - inPMATBuffer.push(boost::iostreams::lzma_decompressor()); - inPMATBuffer.push(inputFile); - std::istream inputStream(&inPMATBuffer); + auto fastaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; + std::cout << "\nFASTA execution time: " << fastaTime.count() << " nanoseconds\n"; +} - T = new panmanUtils::Tree(inputStream); +void fastaAligned(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Print multiple sequence alignment to output file + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } - auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; + panmanUtils::TreeGroup tg = *TG; - std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; + auto fastaStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + panmanUtils::Tree *T = &tg.trees[i]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".msa"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); - std::vector tg; - tg.push_back(T); - TG = new panmanUtils::TreeGroup(tg); + // T->printFASTA(fout, true); + T->printFASTAParallel(fout, true); - inputFile.close(); + if(globalVm.count("output-file")) outputFile.close(); + } - writePanMAN(globalVm, TG); + auto fastaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; + std::cout << "\nFASTA execution time: " << fastaTime.count() << " nanoseconds\n"; +} - std::filesystem::create_directory("./info"); +void subnetwork(panmanUtils::Tree *T, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Extract subnet of PanMAN to new file + if(T == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } + if(!globalVm.count("output-file")) { + panmanUtils::printError("Output file not provided!"); + std::cout << globalDesc; + return; + } - } else if(globalVm.count("input-panman")) { - // Load PanMAN file directly into memory + // List of node identifiers that need to be extracted from the tree + std::vector< std::string > nodeIds; + std::string nodeId; - std::string fileName = globalVm["input-panman"].as< std::string >(); - std::ifstream inputFile(fileName); - boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; + if(globalVm.count("input-file")) { + std::string inputFileName = globalVm["input-file"].as< std::string >(); + std::ifstream fin(inputFileName); + while(fin >> nodeId) { + nodeIds.push_back(nodeId); + } + fin.close(); + } else { + panmanUtils::printError("No source of node ids provided"); + exit(0); + } - auto treeBuiltStart = std::chrono::high_resolution_clock::now(); - inPMATBuffer.push(boost::iostreams::lzma_decompressor()); - // inPMATBuffer.push(boost::iostreams::gzip_decompressor()); - inPMATBuffer.push(inputFile); - std::istream inputStream(&inPMATBuffer); + if(nodeIds.size() == 0) { + std::cout << "No node identifiers provided!" << std::endl; + } - std::cout << "starting reading panman" << std::endl; - TG = new panmanUtils::TreeGroup(inputStream); + std::string outputFileName = globalVm["output-file"].as< std::string >(); + std::filesystem::create_directory("./panman"); + std::ofstream outputFiles("./panman/" + outputFileName + ".panman"); + boost::iostreams::filtering_streambuf< boost::iostreams::output> outPMATBuffer; - auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; + auto subtreeStart = std::chrono::high_resolution_clock::now(); - std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; - inputFile.close(); + // outPMATBuffer.push(boost::iostreams::gzip_compressor()); + boost::iostreams::lzma_params params; + params.level = 9; // Highest compression level + outPMATBuffer.push(boost::iostreams::lzma_compressor(params)); + outPMATBuffer.push(outputFiles); + std::ostream outstream(&outPMATBuffer); + kj::std::StdOutputStream outputStream(outstream); + T->writeToFile(outputStream, T->subtreeExtractParallel(nodeIds)); + boost::iostreams::close(outPMATBuffer); + outputFiles.close(); - std::filesystem::create_directory("./info"); + auto subtreeEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds subtreeTime = subtreeEnd - subtreeStart; - } else if(globalVm.count("input-gfa")) { - // Create PanMAT from GFA and Newick files + std::cout << "\nParallel Subtree Extract execution time: " + << subtreeTime.count() << " nanoseconds\n"; +} - std::string fileName = globalVm["input-gfa"].as< std::string >(); - if(!globalVm.count("input-newick")) { - panmanUtils::printError("File containing newick string not provided!"); - return; - } - if(!globalVm.count("output-file")) { - panmanUtils::printError("Output file not provided!"); - std::cout << globalDesc; - return; - } - std::string newickFileName = globalVm["input-newick"].as< std::string >(); +void subnet(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Extract the subnetwork consisting of given node IDs from PanMAN - std::cout << "Creating PanMAN from GFA and Newick" << std::endl; + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } - std::ifstream inputStream(fileName); - std::ifstream newickInputStream(newickFileName); + std::string outputFileName; + if(!globalVm.count("output-file")) { + panmanUtils::printError("Output file not provided!"); + std::cout << globalDesc; + return; + } else outputFileName = globalVm["output-file"].as< std::string >(); + + // List of node identifiers that need to be extracted from the tree + std::unordered_map< int, std::vector< std::string > > nodeIds; + std::string nodeId; + + if(globalVm.count("input-file")) { + std::string inputFileName = globalVm["input-file"].as< std::string >(); + std::ifstream fin(inputFileName); + std::string line; + int treeId; + while(std::getline(fin, line)) { + std::stringstream ss(line); + ss >> treeId; + while(ss >> nodeId) { + nodeIds[treeId].push_back(nodeId); + } + } + fin.close(); + } else { + panmanUtils::printError("Input file not provided!"); + std::cout << globalDesc; + return; + } - auto treeBuiltStart = std::chrono::high_resolution_clock::now(); + if(nodeIds.size() == 0) { + std::cout << "No node identifiers selected!" << std::endl; + } - T = new panmanUtils::Tree(inputStream, newickInputStream, panmanUtils::FILE_TYPE::GFA); + std::filesystem::create_directory("./panman"); + std::ofstream outputFiles("./panman/" + outputFileName + ".panman"); + boost::iostreams::filtering_streambuf< boost::iostreams::output> + outPMATBuffer; - std::vector tg; - tg.push_back(T); + auto subtreeStart = std::chrono::high_resolution_clock::now(); - TG = new panmanUtils::TreeGroup(tg); + // outPMATBuffer.push(boost::iostreams::gzip_compressor()); + boost::iostreams::lzma_params params; + params.level = 9; // Highest compression level + outPMATBuffer.push(boost::iostreams::lzma_compressor(params)); + outPMATBuffer.push(outputFiles); + std::ostream outstream(&outPMATBuffer); + kj::std::StdOutputStream outputStream(outstream); + panmanUtils::TreeGroup* subnetwork = TG->subnetworkExtract(nodeIds); + subnetwork->writeToFile(outputStream); - auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; - std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; + boost::iostreams::close(outPMATBuffer); + outputFiles.close(); - newickInputStream.close(); - inputStream.close(); + auto subtreeEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds subtreeTime = subtreeEnd - subtreeStart; - writePanMAN(globalVm, TG); + std::cout << "\nParallel Subnetwork Extract execution time: " + << subtreeTime.count() << " nanoseconds\n"; +} - } else if(globalVm.count("input-pangraph")) { - // Create PanMAT from PanGraph and Newick files +void vcf(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } + int treeID = 0; + if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); - std::string fileName = globalVm["input-pangraph"].as< std::string >(); - if(!globalVm.count("input-newick")) { - panmanUtils::printError("File containing newick string not provided!"); - std::cout << globalDesc; - return; - } - if(!globalVm.count("output-file")) { - panmanUtils::printError("Output file not provided!"); - std::cout << globalDesc; - return; - } + panmanUtils::TreeGroup tg = *TG; + panmanUtils::Tree * T = &tg.trees[treeID]; - std::string newickFileName = globalVm["input-newick"].as< std::string >(); - std::string referenceSequence; - if(globalVm.count("reference")) { - referenceSequence = globalVm["reference"].as< std::string >(); + std::string reference; + if(!globalVm.count("reference")) { + for (auto &n: T->allNodes) { + reference = n.first; + break; } + } else reference = globalVm["reference"].as< std::string >(); - std::cout << "Creating PanMAN from PanGraph and Newick" << std::endl; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + ".vcf"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); - std::ifstream inputStream(fileName); - std::ifstream newickInputStream(newickFileName); + auto vcfStart = std::chrono::high_resolution_clock::now(); - auto treeBuiltStart = std::chrono::high_resolution_clock::now(); + T->printVCFParallel(reference, fout); - T = new panmanUtils::Tree(inputStream, newickInputStream, - panmanUtils::FILE_TYPE::PANGRAPH, referenceSequence); + auto vcfEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds vcfTime = vcfEnd - vcfStart; + std::cout << "\nVCF execution time: " << vcfTime.count() << " nanoseconds\n"; + if(globalVm.count("output-file")) outputFile.close(); +} - std::vector tg; - tg.push_back(T); - - TG = new panmanUtils::TreeGroup(tg); - - auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; - std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; - - newickInputStream.close(); - inputStream.close(); - - writePanMAN(globalVm, TG); - - } else if(globalVm.count("input-msa")) { - // Create PanMAT from MSA and Newick files - - std::string fileName = globalVm["input-msa"].as< std::string >(); - if(!globalVm.count("input-newick")) { - panmanUtils::printError("File containing newick string not provided!"); - return; - } - - if(!globalVm.count("output-file")) { - panmanUtils::printError("Output file not provided!"); - std::cout << globalDesc; - return; - } - - bool optimize = false; - if(globalVm.count("low-mem-mode")) { - optimize = true; - } - - std::string reference = ""; - if (globalVm.count("reference")) { - reference = globalVm["reference"].as(); - } +void gfa(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // If GFA is to be extracted from PanMAN - std::string newickFileName = globalVm["input-newick"].as< std::string >(); + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } - std::cout << "Creating PanMAN from MSA and Newick" << std::endl; + int treeID = 0; + if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); - std::ifstream inputStream(fileName); - std::ifstream newickInputStream(newickFileName); + panmanUtils::TreeGroup tg = *TG; + panmanUtils::Tree * T = &tg.trees[treeID]; - auto treeBuiltStart = std::chrono::high_resolution_clock::now(); + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + ".gfa"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); - if(!optimize) { - T = new panmanUtils::Tree(inputStream, newickInputStream, - panmanUtils::FILE_TYPE::MSA, reference); - } else { - T = new panmanUtils::Tree(inputStream, newickInputStream, - panmanUtils::FILE_TYPE::MSA_OPTIMIZE, reference); - } + auto generateVGStart = std::chrono::high_resolution_clock::now(); - // checkFunction(T); + T->convertToGFA(fout); - std::vector tg; - tg.push_back(T); + auto generateVGEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds generateVGTime = generateVGEnd - generateVGStart; - TG = new panmanUtils::TreeGroup(tg); + std::cout << "GFA generation time: " << generateVGTime.count() + << " nanoseconds\n"; + if(globalVm.count("output-file")) outputFile.close(); +} - auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; - std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; +void maf(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + if(TG == nullptr) { + std::cout << "No PanMAN selected. Try groupFasta for FASTA of the whole" + " PanMAN" << std::endl; + return; + } - newickInputStream.close(); - inputStream.close(); + int treeID = 0; + if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); - writePanMAN(globalVm, TG); + panmanUtils::TreeGroup tg = *TG; + panmanUtils::Tree * T = &tg.trees[treeID]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + ".maf"); + buf = outputFile.rdbuf(); } else { - panmanUtils::printError("Incorrect Format"); - std::cout << globalDesc; - return; + buf = std::cout.rdbuf(); } + std::ostream fout (buf); - // If only one function needs to be performed on the loaded PanMAT/PanMAN, do not start the - // command line utility. - std::ofstream outputFile; - std::streambuf * buf; + auto mafStart = std::chrono::high_resolution_clock::now(); - if(globalVm.count("summary")) { - // If command was summary, print the summary of the PanMAT - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } + T->printMAF(fout); - panmanUtils::TreeGroup tg = *TG; + auto mafEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds mafTime = mafEnd - mafStart; + std::cout << "\nMAF execution time: " << mafTime.count() << " nanoseconds\n"; + if(globalVm.count("output-file")) outputFile.close(); +} - auto summaryStart = std::chrono::high_resolution_clock::now(); - for(int i = 0; i < tg.trees.size(); i++) { - T = &tg.trees[i]; +void newick (panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Print newick string of the PanMAT or PanMAN loaded into memory + if(TG) { + int index = 0; + for(auto& t: TG->trees) { if(globalVm.count("output-file")) { std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".summary"); + outputFile.open("./info/" + fileName + "_" + std::to_string(index) + ".newick"); buf = outputFile.rdbuf(); } else { buf = std::cout.rdbuf(); } std::ostream fout (buf); - T->printSummary(fout); - + fout << t.getNewickString(t.root) << std::endl; if(globalVm.count("output-file")) outputFile.close(); } + } else { + std::cout << "No PanMAN selected" << std::endl; + return; + } +} + +void extendNewick(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Print Extended Newick String + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } + + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + ".extended-newick"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); + + + auto writeStart = std::chrono::high_resolution_clock::now(); + + for (auto& tree: TG->trees) { + fout << tree.getNewickString(tree.root) << std::endl; + } + + TG->printComplexMutations(fout); + + if(globalVm.count("output-file")) outputFile.close(); - auto summaryEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds summaryTime = summaryEnd - summaryStart; - std::cout << "\nSummary creation time: " << summaryTime.count() << " nanoseconds\n"; + auto writeEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds writeTime = writeEnd - writeStart; + std::cout << "\nExtended Newick execution time: " << writeTime.count() + << " nanoseconds\n"; +} +void annotate(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Annotate nodes of PanMAT + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; return; - } else if(globalVm.count("fasta")) { - // Print raw sequences to output file + } - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } + int treeID = 0; + if(globalVm.count("treeID")) { + treeID = std::stoi(globalVm["treeID"].as< std::string >()); + } - panmanUtils::TreeGroup tg = *TG; + panmanUtils::TreeGroup tg = *TG; + panmanUtils::Tree * T = &tg.trees[treeID]; - auto fastaStart = std::chrono::high_resolution_clock::now(); - for(int i = 0; i < tg.trees.size(); i++) { - T = &tg.trees[i]; - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".fasta"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); + if(!globalVm.count("input-file")) { + panmanUtils::printError("Input file not provided!"); + std::cout << globalDesc; + return; + } - T->printFASTAParallel(fout, false); - // T->printFASTA(fout, false, false); + std::string fileName = globalVm["input-file"].as< std::string >(); + std::ifstream fin(fileName); + auto annotateStart = std::chrono::high_resolution_clock::now(); - if(globalVm.count("output-file")) outputFile.close(); - } + T->annotate(fin); - auto fastaEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; - std::cout << "\nFASTA execution time: " << fastaTime.count() << " nanoseconds\n"; + auto annotateEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds annotateTime = annotateEnd - annotateStart; + std::cout << "Annotate time: " << annotateTime.count() << " nanoseconds\n"; + writePanMAN(globalVm,TG); +} + +void reroot(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Reroot the PanMAT to given sequence + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; return; - } else if(globalVm.count("fasta-aligned")) { - // Print multiple sequence alignment to output file + } - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } + int treeID; + if(!globalVm.count("treeID")) { + panmanUtils::printError("TreeID not provided!"); + std::cout << globalDesc; + return; + } else treeID = std::stoi(globalVm["treeID"].as< std::string >()); - panmanUtils::TreeGroup tg = *TG; + panmanUtils::TreeGroup tg = *TG; + panmanUtils::Tree * T = &tg.trees[treeID]; - auto fastaStart = std::chrono::high_resolution_clock::now(); - for(int i = 0; i < tg.trees.size(); i++) { - T = &tg.trees[i]; - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".msa"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); + if(!globalVm.count("reference")) { + panmanUtils::printError("Refence ID not provided!"); + std::cout << globalDesc; + return; + } + std::string sequenceName = globalVm["reference"].as< std::string >(); - // T->printFASTA(fout, true); - T->printFASTAParallel(fout, true); + auto rerootStart = std::chrono::high_resolution_clock::now(); + T->reroot(sequenceName); - if(globalVm.count("output-file")) outputFile.close(); - } + auto rerootEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds rerootTime = rerootEnd - rerootStart; + std::cout << "\nReroot execution time: " << rerootTime.count() + << " nanoseconds\n"; - auto fastaEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; - std::cout << "\nFASTA execution time: " << fastaTime.count() << " nanoseconds\n"; + TG->trees[treeID] = *T; + + writePanMAN(globalVm, TG); +} + +void aa(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Extract amino acid translations in tsv file + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; return; - } else if(globalVm.count("subnetwork")) { // for PanMAT -> Old - // Extract subnet of PanMAN to new file + } - if(T == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } - if(!globalVm.count("output-file")) { - panmanUtils::printError("Output file not provided!"); - std::cout << globalDesc; - return; - } + int treeID; + if(!globalVm.count("treeID")) { + panmanUtils::printError("TreeID not provided!"); + std::cout << globalDesc; + return; + } else treeID = std::stoi(globalVm["treeID"].as< std::string >()); - // List of node identifiers that need to be extracted from the tree - std::vector< std::string > nodeIds; - std::string nodeId; + panmanUtils::TreeGroup tg = *TG; + panmanUtils::Tree * T = &tg.trees[treeID]; - if(globalVm.count("input-file")) { - std::string inputFileName = globalVm["input-file"].as< std::string >(); - std::ifstream fin(inputFileName); - while(fin >> nodeId) { - nodeIds.push_back(nodeId); - } - fin.close(); - } else { - panmanUtils::printError("No source of node ids provided"); - std::cout << subtreeDesc; - } + if(!globalVm.count("start") || !globalVm.count("end")) { + std::cout << "Start/End Coordinate not provided" << std::endl; + return; + } - if(nodeIds.size() == 0) { - std::cout << "No node identifiers provided!" << std::endl; - } + int64_t startCoordinate = globalVm["start"].as< int64_t >(); + int64_t endCoordinate = globalVm["end"].as< int64_t >(); + + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + ".tsv"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); + + auto aaStart = std::chrono::high_resolution_clock::now(); - std::string outputFileName = globalVm["output-file"].as< std::string >(); - std::filesystem::create_directory("./panman"); - std::ofstream outputFile("./panman/" + outputFileName + ".panman"); - boost::iostreams::filtering_streambuf< boost::iostreams::output> outPMATBuffer; - - auto subtreeStart = std::chrono::high_resolution_clock::now(); - - // outPMATBuffer.push(boost::iostreams::gzip_compressor()); - boost::iostreams::lzma_params params; - params.level = 9; // Highest compression level - outPMATBuffer.push(boost::iostreams::lzma_compressor(params)); - outPMATBuffer.push(outputFile); - std::ostream outstream(&outPMATBuffer); - kj::std::StdOutputStream outputStream(outstream); - T->writeToFile(outputStream, T->subtreeExtractParallel(nodeIds)); - boost::iostreams::close(outPMATBuffer); - outputFile.close(); - - auto subtreeEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds subtreeTime = subtreeEnd - subtreeStart; - - std::cout << "\nParallel Subtree Extract execution time: " - << subtreeTime.count() << " nanoseconds\n"; + T->extractAminoAcidTranslations(fout, startCoordinate, endCoordinate); + + auto aaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds aaTime = aaEnd - aaStart; + std::cout << "\nAmino Acid translate execution time: " << aaTime.count() + << " nanoseconds\n"; + if(globalVm.count("output-file")) outputFile.close(); +} + +void createNet(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Create PanMAN from list of PanMAT files and a complex mutation file listing the complex + // mutations relating these PanMATs + + std::vector< std::string > fileNames; + + std::string mutationFileName; + if(!globalVm.count("input-file")) { + panmanUtils::printError("Input File containing complex mutations not provided!"); return; - } else if(globalVm.count("subnet")) { - // Extract the subnetwork consisting of given node IDs from PanMAN + } - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } + fileNames = globalVm["tree-group"].as< std::vector< std::string > >(); + mutationFileName = globalVm["input-file"].as< std::string >(); - std::string outputFileName; - if(!globalVm.count("output-file")) { - panmanUtils::printError("Output file not provided!"); - std::cout << globalDesc; - return; - } else outputFileName = globalVm["output-file"].as< std::string >(); - - // List of node identifiers that need to be extracted from the tree - std::unordered_map< int, std::vector< std::string > > nodeIds; - std::string nodeId; - - if(globalVm.count("input-file")) { - std::string inputFileName = globalVm["input-file"].as< std::string >(); - std::ifstream fin(inputFileName); - std::string line; - int treeId; - while(std::getline(fin, line)) { - std::stringstream ss(line); - ss >> treeId; - while(ss >> nodeId) { - nodeIds[treeId].push_back(nodeId); - } - } - fin.close(); - } else { - panmanUtils::printError("Input file not provided!"); - std::cout << subtreeDesc; - return; - } + std::ifstream mutationFile(mutationFileName); - if(nodeIds.size() == 0) { - std::cout << "No node identifiers selected!" << std::endl; - } + std::vector< std::ifstream > files; + for(auto u: fileNames) { + files.emplace_back(u); + } - std::filesystem::create_directory("./panman"); - std::ofstream outputFile("./panman/" + outputFileName + ".panman"); - boost::iostreams::filtering_streambuf< boost::iostreams::output> - outPMATBuffer; + auto treeBuiltStart = std::chrono::high_resolution_clock::now(); - auto subtreeStart = std::chrono::high_resolution_clock::now(); + TG = new panmanUtils::TreeGroup(files, mutationFile); - // outPMATBuffer.push(boost::iostreams::gzip_compressor()); - boost::iostreams::lzma_params params; - params.level = 9; // Highest compression level - outPMATBuffer.push(boost::iostreams::lzma_compressor(params)); - outPMATBuffer.push(outputFile); - std::ostream outstream(&outPMATBuffer); - kj::std::StdOutputStream outputStream(outstream); - panmanUtils::TreeGroup* subnetwork = TG->subnetworkExtract(nodeIds); - subnetwork->writeToFile(outputStream); + auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; + std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; - boost::iostreams::close(outPMATBuffer); - outputFile.close(); + mutationFile.close(); + for(auto& u: files) { + u.close(); + } +} - auto subtreeEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds subtreeTime = subtreeEnd - subtreeStart; +void printMut(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } - std::cout << "\nParallel Subnetwork Extract execution time: " - << subtreeTime.count() << " nanoseconds\n"; - } else if(globalVm.count("vcf")) { - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } + int treeID = 0; + if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); + panmanUtils::TreeGroup tg = *TG; + panmanUtils::Tree * T = &TG->trees[treeID]; + // T = &tg.trees[treeID]; - int treeID = 0; - if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); - panmanUtils::TreeGroup tg = *TG; - T = &tg.trees[treeID]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + ".mutations"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); - std::string reference; - if(!globalVm.count("reference")) { - for (auto &n: T->allNodes) { - reference = n.first; - break; - } - } else reference = globalVm["reference"].as< std::string >(); - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + ".vcf"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); + auto substitutionsStart = std::chrono::high_resolution_clock::now(); + + std::cout << T->root->identifier << std::endl; - auto vcfStart = std::chrono::high_resolution_clock::now(); + // T->printMutations(fout); + T->printMutationsNew(fout); - T->printVCFParallel(reference, fout); + auto substitutionsEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds substitutionsTime = substitutionsEnd - substitutionsStart; + std::cout << "\nMutation extract execution time: " + << substitutionsTime.count() << " nanoseconds\n"; - auto vcfEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds vcfTime = vcfEnd - vcfStart; - std::cout << "\nVCF execution time: " << vcfTime.count() << " nanoseconds\n"; - if(globalVm.count("output-file")) outputFile.close(); + if(globalVm.count("output-file")) outputFile.close(); +} +void printPath(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; return; - } else if(globalVm.count("gfa")) { - // If GFA is to be extracted from PanMAN + } + int treeID = 0; + if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } + panmanUtils::TreeGroup tg = *TG; + panmanUtils::Tree * T = &TG->trees[treeID]; + // T = &tg.trees[treeID]; - int treeID = 0; - if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); - panmanUtils::TreeGroup tg = *TG; - T = &tg.trees[treeID]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + ".mutations"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + ".gfa"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); - auto generateVGStart = std::chrono::high_resolution_clock::now(); + auto substitutionsStart = std::chrono::high_resolution_clock::now(); - T->convertToGFA(fout); + std::cout << T->root->identifier << std::endl; - auto generateVGEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds generateVGTime = generateVGEnd - generateVGStart; + // T->printMutations(fout); + T->printNodePaths(fout); - std::cout << "GFA generation time: " << generateVGTime.count() - << " nanoseconds\n"; - if(globalVm.count("output-file")) outputFile.close(); + auto substitutionsEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds substitutionsTime = substitutionsEnd - substitutionsStart; + std::cout << "\nMutation extract execution time: " + << substitutionsTime.count() << " nanoseconds\n"; + + if(globalVm.count("output-file")) outputFile.close(); +} + +void index(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // indexing + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; return; - } else if(globalVm.count("maf")) { - if(TG == nullptr) { - std::cout << "No PanMAN selected. Try groupFasta for FASTA of the whole" - " PanMAN" << std::endl; - return; - } + } + + panmanUtils::TreeGroup tg = *TG; - int treeID = 0; - if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); + // Get start and end coordinate + int64_t startCoordinate = 0; + int64_t endCoordinate = -1; + if(!globalVm.count("start")) { + std::cout << "Start Coordinate not provided, setting it to 0" << std::endl; + } else { + startCoordinate = globalVm["start"].as< int64_t >(); + } + + if(!globalVm.count("end")) { + std::cout << "End Coordinate not provided, setting it to length of seqeunce - 1" << std::endl; + } else { + endCoordinate = globalVm["end"].as< int64_t >(); + } - panmanUtils::TreeGroup tg = *TG; - T = &tg.trees[treeID]; + // get sequence + std::string reference=""; + if(!globalVm.count("reference")) { + std::cout << "Error: Reference not provided" << std::endl; + return; + } else { + reference = globalVm["reference"].as< std::string >(); + } + auto fastaStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + panmanUtils::Tree * T = &tg.trees[i]; + if (T->allNodes.find(reference) == T->allNodes.end()) { + std::cout << "Error: reference " << reference << " does not exist in PanMAN\n"; + exit(0); + } if(globalVm.count("output-file")) { std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + ".maf"); + outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".index"); buf = outputFile.rdbuf(); } else { buf = std::cout.rdbuf(); } std::ostream fout (buf); - auto mafStart = std::chrono::high_resolution_clock::now(); + bool allIndex = globalVm["index"].as< bool >(); - T->printMAF(fout); + T->extractPanMATIndex(fout, startCoordinate,endCoordinate, reference, allIndex); - auto mafEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds mafTime = mafEnd - mafStart; - std::cout << "\nMAF execution time: " << mafTime.count() << " nanoseconds\n"; if(globalVm.count("output-file")) outputFile.close(); - return; - } else if(globalVm.count("newick")) { - // Print newick string of the PanMAT or PanMAN loaded into memory - if(TG) { - int index = 0; - for(auto& t: TG->trees) { - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + "_" + std::to_string(index) + ".newick"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); - fout << t.getNewickString(t.root) << std::endl; - if(globalVm.count("output-file")) outputFile.close(); - } - } else { - std::cout << "No PanMAN selected" << std::endl; - return; - } - return; + } - } else if(globalVm.count("extended-newick")) { - // Print Extended Newick String + auto fastaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; + std::cout << "\nIndexing execution time: " << fastaTime.count() << " nanoseconds\n"; +} - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } +void printRoot(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Print raw sequences to output file + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } + panmanUtils::TreeGroup tg = *TG; + + auto fastaStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + panmanUtils::Tree * T = &tg.trees[i]; if(globalVm.count("output-file")) { std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + ".extended-newick"); + outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".fasta"); buf = outputFile.rdbuf(); } else { buf = std::cout.rdbuf(); @@ -989,348 +1009,304 @@ void parseAndExecute(int argc, char* argv[]) { std::ostream fout (buf); - auto writeStart = std::chrono::high_resolution_clock::now(); - - for (auto& tree: TG->trees) { - fout << tree.getNewickString(tree.root) << std::endl; - } - - TG->printComplexMutations(fout); + T->printFASTA(fout, true, true); if(globalVm.count("output-file")) outputFile.close(); + } - auto writeEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds writeTime = writeEnd - writeStart; - std::cout << "\nExtended Newick execution time: " << writeTime.count() - << " nanoseconds\n"; - } else if (globalVm.count("annotate")) { - // Annotate nodes of PanMAT + auto fastaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; + std::cout << "\nFASTA execution time: " << fastaTime.count() << " nanoseconds\n"; +} - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } - - int treeID = 0; - if(globalVm.count("treeID")) { - treeID = std::stoi(globalVm["treeID"].as< std::string >()); - } +void parseAndExecute(int argc, char* argv[]) { - panmanUtils::TreeGroup tg = *TG; - T = &tg.trees[treeID]; + // Setup boost::program_options + setupOptionDescriptions(); - if(!globalVm.count("input-file")) { - panmanUtils::printError("Input file not provided!"); - std::cout << globalDesc; - return; - } + // Initial command line arguments consisting of input file types + po::variables_map globalVm; + po::store(po::command_line_parser(argc, argv).options(globalDesc) + .positional(globalPositionArgumentDesc).allow_unregistered().run(), globalVm); + po::notify(globalVm); - std::string fileName = globalVm["input-file"].as< std::string >(); - std::ifstream fin(fileName); - auto annotateStart = std::chrono::high_resolution_clock::now(); + int threads = 64; + if (globalVm.count("threads")) threads = globalVm["threads"].as(); + tbb::task_scheduler_init init(threads); - T->annotate(fin); - auto annotateEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds annotateTime = annotateEnd - annotateStart; - std::cout << "Annotate time: " << annotateTime.count() << " nanoseconds\n"; + // If the data structure loaded into memory is a PanMAT, it is pointed to by T + panmanUtils::Tree *T = nullptr; - writePanMAN(globalVm,TG); + // If the data structure loaded into memory is a PanMAN, it is pointed to by TG + panmanUtils::TreeGroup *TG = nullptr; - } else if (globalVm.count("reroot")) { - // Reroot the PanMAT to given sequence + if(globalVm.count("help")) { + std::cout << globalDesc; + return; + } else if(globalVm.count("input-panmat")) { + // Load PanMAT file directly into memory - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } + std::string fileName = globalVm["input-panmat"].as< std::string >(); + std::ifstream inputFile(fileName, std::ios_base::in | std::ios_base::binary); + boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; - int treeID; - if(!globalVm.count("treeID")) { - panmanUtils::printError("TreeID not provided!"); - std::cout << globalDesc; - return; - } else treeID = std::stoi(globalVm["treeID"].as< std::string >()); + auto treeBuiltStart = std::chrono::high_resolution_clock::now(); - panmanUtils::TreeGroup tg = *TG; - T = &tg.trees[treeID]; + inPMATBuffer.push(boost::iostreams::lzma_decompressor()); + inPMATBuffer.push(inputFile); + std::istream inputStream(&inPMATBuffer); - if(!globalVm.count("reference")) { - panmanUtils::printError("Refence ID not provided!"); - std::cout << globalDesc; - return; - } + T = new panmanUtils::Tree(inputStream); - std::string sequenceName = globalVm["reference"].as< std::string >(); + auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; - auto rerootStart = std::chrono::high_resolution_clock::now(); + std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; - T->reroot(sequenceName); + std::vector tg; + tg.push_back(T); - auto rerootEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds rerootTime = rerootEnd - rerootStart; - std::cout << "\nReroot execution time: " << rerootTime.count() - << " nanoseconds\n"; + TG = new panmanUtils::TreeGroup(tg); - TG->trees[treeID] = *T; + inputFile.close(); writePanMAN(globalVm, TG); - return; - - } else if (globalVm.count("aa-mutations")) { - // Extract amino acid translations in tsv file - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; - } + std::filesystem::create_directory("./info"); - int treeID; - if(!globalVm.count("treeID")) { - panmanUtils::printError("TreeID not provided!"); - std::cout << globalDesc; - return; - } else treeID = std::stoi(globalVm["treeID"].as< std::string >()); - panmanUtils::TreeGroup tg = *TG; - T = &tg.trees[treeID]; + } else if(globalVm.count("input-panman")) { + // Load PanMAN file directly into memory - if(!globalVm.count("start") || !globalVm.count("end")) { - std::cout << "Start/End Coordinate not provided" << std::endl; - return; - } + std::string fileName = globalVm["input-panman"].as< std::string >(); + std::ifstream inputFile(fileName); + boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; - int64_t startCoordinate = globalVm["start"].as< int64_t >(); - int64_t endCoordinate = globalVm["end"].as< int64_t >(); + auto treeBuiltStart = std::chrono::high_resolution_clock::now(); + inPMATBuffer.push(boost::iostreams::lzma_decompressor()); + // inPMATBuffer.push(boost::iostreams::gzip_decompressor()); + inPMATBuffer.push(inputFile); + std::istream inputStream(&inPMATBuffer); - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + ".tsv"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); + std::cout << "starting reading panman" << std::endl; + TG = new panmanUtils::TreeGroup(inputStream); - auto aaStart = std::chrono::high_resolution_clock::now(); + auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; - T->extractAminoAcidTranslations(fout, startCoordinate, endCoordinate); + std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; + inputFile.close(); - auto aaEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds aaTime = aaEnd - aaStart; - std::cout << "\nAmino Acid translate execution time: " << aaTime.count() - << " nanoseconds\n"; - if(globalVm.count("output-file")) outputFile.close(); - return; - } else if(globalVm.count("create-network")) { - // Create PanMAN from list of PanMAT files and a complex mutation file listing the complex - // mutations relating these PanMATs + std::filesystem::create_directory("./info"); - std::vector< std::string > fileNames; + } else if(globalVm.count("input-gfa")) { + // Create PanMAT from GFA and Newick files - std::string mutationFileName; - if(!globalVm.count("input-file")) { - panmanUtils::printError("Input File containing complex mutations not provided!"); + std::string fileName = globalVm["input-gfa"].as< std::string >(); + if(!globalVm.count("input-newick")) { + panmanUtils::printError("File containing newick string not provided!"); return; } + if(!globalVm.count("output-file")) { + panmanUtils::printError("Output file not provided!"); + std::cout << globalDesc; + return; + } + std::string newickFileName = globalVm["input-newick"].as< std::string >(); - fileNames = globalVm["tree-group"].as< std::vector< std::string > >(); - mutationFileName = globalVm["input-file"].as< std::string >(); - - std::ifstream mutationFile(mutationFileName); + std::cout << "Creating PanMAN from GFA and Newick" << std::endl; - std::vector< std::ifstream > files; - for(auto u: fileNames) { - files.emplace_back(u); - } + std::ifstream inputStream(fileName); + std::ifstream newickInputStream(newickFileName); auto treeBuiltStart = std::chrono::high_resolution_clock::now(); - TG = new panmanUtils::TreeGroup(files, mutationFile); + T = new panmanUtils::Tree(inputStream, newickInputStream, panmanUtils::FILE_TYPE::GFA); + + std::vector tg; + tg.push_back(T); + + TG = new panmanUtils::TreeGroup(tg); auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; - mutationFile.close(); - for(auto& u: files) { - u.close(); - } - } else if(globalVm.count("printMutations")) { + newickInputStream.close(); + inputStream.close(); + + writePanMAN(globalVm, TG); + } else if(globalVm.count("input-pangraph")) { + // Create PanMAT from PanGraph and Newick files - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; + std::string fileName = globalVm["input-pangraph"].as< std::string >(); + if(!globalVm.count("input-newick")) { + panmanUtils::printError("File containing newick string not provided!"); + std::cout << globalDesc; return; } - - - int treeID = 0; - if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); - - panmanUtils::TreeGroup tg = *TG; - T = &TG->trees[treeID]; - // T = &tg.trees[treeID]; - - - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + ".mutations"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); + if(!globalVm.count("output-file")) { + panmanUtils::printError("Output file not provided!"); + std::cout << globalDesc; + return; } - std::ostream fout (buf); - - - auto substitutionsStart = std::chrono::high_resolution_clock::now(); - - std::cout << T->root->identifier << std::endl; - - // T->printMutations(fout); - T->printMutationsNew(fout); - auto substitutionsEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds substitutionsTime = substitutionsEnd - substitutionsStart; - std::cout << "\nMutation extract execution time: " - << substitutionsTime.count() << " nanoseconds\n"; - - if(globalVm.count("output-file")) outputFile.close(); - } else if(globalVm.count("printNodePaths")) { - - - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; + std::string newickFileName = globalVm["input-newick"].as< std::string >(); + std::string referenceSequence; + if(globalVm.count("reference")) { + referenceSequence = globalVm["reference"].as< std::string >(); } + std::cout << "Creating PanMAN from PanGraph and Newick" << std::endl; - int treeID = 0; - if(globalVm.count("treeID")) treeID = std::stoi(globalVm["treeID"].as< std::string >()); + std::ifstream inputStream(fileName); + std::ifstream newickInputStream(newickFileName); - panmanUtils::TreeGroup tg = *TG; - T = &TG->trees[treeID]; - // T = &tg.trees[treeID]; + auto treeBuiltStart = std::chrono::high_resolution_clock::now(); + T = new panmanUtils::Tree(inputStream, newickInputStream, + panmanUtils::FILE_TYPE::PANGRAPH, referenceSequence); - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + ".mutations"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); + std::vector tg; + tg.push_back(T); + TG = new panmanUtils::TreeGroup(tg); - auto substitutionsStart = std::chrono::high_resolution_clock::now(); + auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; + std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; - std::cout << T->root->identifier << std::endl; + newickInputStream.close(); + inputStream.close(); - // T->printMutations(fout); - T->printNodePaths(fout); + writePanMAN(globalVm, TG); - auto substitutionsEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds substitutionsTime = substitutionsEnd - substitutionsStart; - std::cout << "\nMutation extract execution time: " - << substitutionsTime.count() << " nanoseconds\n"; + } else if(globalVm.count("input-msa")) { + // Create PanMAT from MSA and Newick files - if(globalVm.count("output-file")) outputFile.close(); - } else if (globalVm.count("index")) { - // indexing - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; + std::string fileName = globalVm["input-msa"].as< std::string >(); + if(!globalVm.count("input-newick")) { + panmanUtils::printError("File containing newick string not provided!"); return; } - panmanUtils::TreeGroup tg = *TG; - - // Get start and end coordinate - int64_t startCoordinate = 0; - int64_t endCoordinate = -1; - if(!globalVm.count("start")) { - std::cout << "Start Coordinate not provided, setting it to 0" << std::endl; - } else { - startCoordinate = globalVm["start"].as< int64_t >(); + if(!globalVm.count("output-file")) { + panmanUtils::printError("Output file not provided!"); + std::cout << globalDesc; + return; } - if(!globalVm.count("end")) { - std::cout << "End Coordinate not provided, setting it to length of seqeunce - 1" << std::endl; - } else { - endCoordinate = globalVm["end"].as< int64_t >(); + bool optimize = false; + if(globalVm.count("low-mem-mode")) { + optimize = true; } - // get sequence - std::string reference=""; - if(!globalVm.count("reference")) { - std::cout << "Error: Reference not provided" << std::endl; - return; - } else { - reference = globalVm["reference"].as< std::string >(); + std::string reference = ""; + if (globalVm.count("reference")) { + reference = globalVm["reference"].as(); } - - auto fastaStart = std::chrono::high_resolution_clock::now(); - for(int i = 0; i < tg.trees.size(); i++) { - T = &tg.trees[i]; - if (T->allNodes.find(reference) == T->allNodes.end()) { - std::cout << "Error: reference " << reference << " does not exist in PanMAN\n"; - exit(0); - } - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".index"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); - bool allIndex = globalVm["index"].as< bool >(); - - T->extractPanMATIndex(fout, startCoordinate,endCoordinate, reference, allIndex); + std::string newickFileName = globalVm["input-newick"].as< std::string >(); - if(globalVm.count("output-file")) outputFile.close(); - } + std::cout << "Creating PanMAN from MSA and Newick" << std::endl; - auto fastaEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; - std::cout << "\nIndexing execution time: " << fastaTime.count() << " nanoseconds\n"; + std::ifstream inputStream(fileName); + std::ifstream newickInputStream(newickFileName); - return; - } else if(globalVm.count("printRoot")) { - // Print raw sequences to output file + auto treeBuiltStart = std::chrono::high_resolution_clock::now(); - if(TG == nullptr) { - std::cout << "No PanMAN selected" << std::endl; - return; + if(!optimize) { + T = new panmanUtils::Tree(inputStream, newickInputStream, + panmanUtils::FILE_TYPE::MSA, reference); + } else { + T = new panmanUtils::Tree(inputStream, newickInputStream, + panmanUtils::FILE_TYPE::MSA_OPTIMIZE, reference); } - panmanUtils::TreeGroup tg = *TG; + // checkFunction(T); - auto fastaStart = std::chrono::high_resolution_clock::now(); - for(int i = 0; i < tg.trees.size(); i++) { - T = &tg.trees[i]; - if(globalVm.count("output-file")) { - std::string fileName = globalVm["output-file"].as< std::string >(); - outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".fasta"); - buf = outputFile.rdbuf(); - } else { - buf = std::cout.rdbuf(); - } - std::ostream fout (buf); + std::vector tg; + tg.push_back(T); + TG = new panmanUtils::TreeGroup(tg); - T->printFASTA(fout, true, true); + auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; + std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; - if(globalVm.count("output-file")) outputFile.close(); - } + newickInputStream.close(); + inputStream.close(); - auto fastaEnd = std::chrono::high_resolution_clock::now(); - std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; - std::cout << "\nFASTA execution time: " << fastaTime.count() << " nanoseconds\n"; + writePanMAN(globalVm, TG); + } else { + panmanUtils::printError("Incorrect Format"); + std::cout << globalDesc; + return; + } + + // If only one function needs to be performed on the loaded PanMAT/PanMAN, do not start the + // command line utility. + std::ofstream outputFile; + std::streambuf * buf; + + if(globalVm.count("summary")) { + summary(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("fasta")) { + fasta(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("fasta-aligned")) { + fastaAligned(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("subnetwork")) { // for PanMAT -> Old + subnetwork(T, globalVm, outputFile, buf); + return; + } else if(globalVm.count("subnet")) { + subnet(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("vcf")) { + vcf(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("gfa")) { + gfa(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("maf")) { + maf(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("newick")) { + newick(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("extended-newick")) { + extendNewick(TG, globalVm, outputFile, buf); + return; + } else if (globalVm.count("annotate")) { + annotate(TG, globalVm, outputFile, buf); + return; + } else if (globalVm.count("reroot")) { + reroot(TG, globalVm, outputFile, buf); + return; + } else if (globalVm.count("aa-mutations")) { + aa(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("create-network")) { + createNet(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("printMutations")) { + printMut(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("printNodePaths")) { + printPath(TG, globalVm, outputFile, buf); + return; + } else if (globalVm.count("index")) { + index(TG, globalVm, outputFile, buf); + return; + } else if(globalVm.count("printRoot")) { + printRoot(TG, globalVm, outputFile, buf); return; } else { char** splitCommandArray; @@ -1352,35 +1328,133 @@ void parseAndExecute(int argc, char* argv[]) { } try{ - if(strcmp(splitCommandArray[0], "use") == 0) { - // If command was use, select the PanMAT with the given index from the PanMAN - po::variables_map useVm; - po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) - .options(useDesc) - .run(), useVm); - - if(useVm.count("help")) { - std::cout << useDesc; - } else { - po::notify(useVm); - size_t treeIndex = useVm["index"].as< size_t >(); - if(TG == nullptr) { - std::cout << "No PanMAN loaded" << std::endl; - } else { - if(TG->trees.size() > treeIndex) { - T = &TG->trees[treeIndex]; - std::cout << "PanMAT loaded" << std::endl; - } else { - std::cout << "PanMAT with index " << treeIndex << " doesn't exist." - " There are only " << TG->trees.size() << " PanMATs." << std::endl; - } - } - } - } else if (strcmp(splitCommandArray[0], "root") == 0) { + if(strcmp(splitCommandArray[0], "help") == 0) { + std::cout << globalDesc; + } + else if (strcmp(splitCommandArray[0], "root") == 0) { buf = std::cout.rdbuf(); std::ostream fout (buf); TG->trees[0].printFASTA(fout, true, true); } + else if(strcmp(splitCommandArray[0], "summary") == 0) { + po::variables_map summaryVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(summaryDesc) + .run(), summaryVm); + + summary(TG, summaryVm, outputFile, buf); + } else if(strcmp(splitCommandArray[0], "fasta") == 0) { + po::variables_map fastaVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(fastaDesc) + .run(), fastaVm); + + fasta(TG, fastaVm, outputFile, buf); + } else if(strcmp(splitCommandArray[0], "fasta-aligned") == 0) { + po::variables_map fastaAlignVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(fastaAlignDesc) + .run(), fastaAlignVm); + + fastaAligned(TG, fastaAlignVm, outputFile, buf); + } else if(strcmp(splitCommandArray[0], "subnet") == 0) { + po::variables_map subnetVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(subnetDesc) + .run(), subnetVm); + + subnet(TG, subnetVm, outputFile, buf); + } else if(strcmp(splitCommandArray[0], "vcf") == 0) { + po::variables_map vcfVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(vcfDesc) + .run(), vcfVm); + vcf(TG, vcfVm, outputFile, buf); + + } else if(strcmp(splitCommandArray[0], "gfa") == 0) { + po::variables_map gfaVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(gfaDesc) + .run(), gfaVm); + gfa(TG, gfaVm, outputFile, buf); + } else if(strcmp(splitCommandArray[0], "maf") == 0) { + po::variables_map mafVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(mafDesc) + .run(), mafVm); + + maf(TG, mafVm, outputFile, buf); + } else if(strcmp(splitCommandArray[0], "newick") == 0) { + po::variables_map newickVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(newickDesc) + .run(), newickVm); + newick(TG, newickVm, outputFile, buf); + + } else if(strcmp(splitCommandArray[0], "extended-newick") == 0) { + po::variables_map extendNewickVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(extendNewickDesc) + .positional(fastaPositionArgumentDesc).run(), extendNewickVm); + extendNewick(TG, extendNewickVm, outputFile, buf); + } else if (strcmp(splitCommandArray[0], "annotate") == 0) { + po::variables_map annotateVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(annotateDesc) + .run(), annotateVm); + annotate(TG, annotateVm, outputFile, buf); + + } else if (strcmp(splitCommandArray[0], "reroot") == 0) { + po::variables_map rerootVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(rerootDesc) + .run(), rerootVm); + reroot(TG, rerootVm, outputFile, buf); + + } else if (strcmp(splitCommandArray[0], "aa-mutations") == 0) { + po::variables_map aaVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(aaDesc) + .run(), aaVm); + aa(TG, aaVm, outputFile, buf); + + } else if(strcmp(splitCommandArray[0], "create-network") == 0) { + po::variables_map createNetVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(createNetDesc) + .run(), createNetVm); + createNet(TG, createNetVm, outputFile, buf); + + } else if(strcmp(splitCommandArray[0], "printMutations") == 0) { + po::variables_map printMutVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(printMutDesc) + .run(), printMutVm); + printMut(TG, printMutVm, outputFile, buf); + + } else if(strcmp(splitCommandArray[0], "printNodes") == 0) { + po::variables_map printNodeVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(printPathDesc) + .run(), printNodeVm); + printPath(TG, printNodeVm, outputFile, buf); + } else if (strcmp(splitCommandArray[0], "index") == 0) { + po::variables_map indexVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(indexDesc) + .run(), indexVm); + index(TG, indexVm, outputFile, buf); + } else if(strcmp(splitCommandArray[0], "printRoot") == 0) { + po::variables_map printRootVm; + po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + .options(printRootDesc) + .run(), printRootVm); + printRoot(TG, printRootVm, outputFile, buf); + } else if (strcmp(splitCommandArray[0], "exit") == 0 || strcmp(splitCommandArray[0], "q") == 0) { + return; + } else { + std::cout << "type exit or q to exit" << std::endl; + } } catch (std::exception& e) { std::cout << e.what() << std::endl; } From ffa7ba4506dad38c385aab97a626eb5b2c0d6599 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 18 Nov 2024 14:14:15 -0800 Subject: [PATCH 063/103] updated scripts --- CMakeLists.txt | 2 +- install/installationUbuntu.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1095d7d..473ea1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 3.8) project(panmanUtils) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options") set(CMAKE_INCLUDE_CURRENT_DIR ON) # if(DEFINED Protobuf_PATH) diff --git a/install/installationUbuntu.sh b/install/installationUbuntu.sh index 9b3fed7..223a694 100755 --- a/install/installationUbuntu.sh +++ b/install/installationUbuntu.sh @@ -1,5 +1,5 @@ # Install dependencies - +sudo apt install -y git build-essential cmake wget curl zip unzip tar libboost-all-dev pkg-config capnproto # Build startDir=$pwd cd $(dirname "$0") @@ -15,7 +15,7 @@ wget https://github.com/oneapi-src/oneTBB/archive/2019_U9.tar.gz tar -xvzf 2019_U9.tar.gz -cmake -DTBB_DIR=${PWD}/oneTBB-2019_U9 -DCMAKE_PREFIX_PATH=${PWD}/oneTBB-2019_U9/cmake -DProtobuf_PROTOC_EXECUTABLE=/usr/bin/protoc -DCMAKE_TOOLCHAIN_FILE=${PWD}/vcpkg/scripts/buildsystems/vcpkg.cmake .. +cmake -DTBB_DIR=${PWD}/oneTBB-2019_U9 -DCMAKE_PREFIX_PATH=${PWD}/oneTBB-2019_U9/cmake -DCapnProto_PATH=/usr/bin/capnp -DCMAKE_TOOLCHAIN_FILE=${PWD}/vcpkg/scripts/buildsystems/vcpkg.cmake .. make -j cd $startDir From 50fd9bf6e07b9da29e312a24d68622f1b3ba3c11 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 18 Nov 2024 14:23:48 -0800 Subject: [PATCH 064/103] deployment working --- .github/workflows/cmake.yml | 1 - install/installationUbuntu.sh | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 6ea29f2..2609183 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -33,7 +33,6 @@ jobs: - name: install pre-reqs and build run: | - sudo apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config chmod +x install/installationUbuntu.sh sudo ./install/installationUbuntu.sh - name: test diff --git a/install/installationUbuntu.sh b/install/installationUbuntu.sh index 223a694..def824a 100755 --- a/install/installationUbuntu.sh +++ b/install/installationUbuntu.sh @@ -1,5 +1,8 @@ # Install dependencies sudo apt install -y git build-essential cmake wget curl zip unzip tar libboost-all-dev pkg-config capnproto + +which capnp + # Build startDir=$pwd cd $(dirname "$0") From af716d8032d412bbce4d5b831fd3595e736b72e3 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 18 Nov 2024 14:27:20 -0800 Subject: [PATCH 065/103] new changes --- .github/workflows/cmake.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 04cbbbf..7517cfe 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -41,8 +41,6 @@ jobs: - name: install pre-reqs and build run: | - sudo apt install -y git build-essential cmake wget curl zip unzip tar protobuf-compiler libboost-all-dev pkg-config capnproto - chmod +x install/installationUbuntu.sh sudo ./install/installationUbuntu.sh - name: test From 5e7628e50a22e306bea53be021fb73b4093d801c Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 18 Nov 2024 14:34:49 -0800 Subject: [PATCH 066/103] solving workflow issue --- install/installationUbuntu.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/install/installationUbuntu.sh b/install/installationUbuntu.sh index b1079a3..56bc58c 100755 --- a/install/installationUbuntu.sh +++ b/install/installationUbuntu.sh @@ -2,6 +2,8 @@ sudo apt install -y git build-essential cmake wget curl zip unzip tar libboost-all-dev pkg-config capnproto +which capnp + # Build startDir=$pwd cd $(dirname "$0") @@ -16,7 +18,7 @@ sudo apt-get install pkg-config wget https://github.com/oneapi-src/oneTBB/archive/2019_U9.tar.gz tar -xvzf 2019_U9.tar.gz -cmake -DTBB_DIR=${PWD}/oneTBB-2019_U9 -DCMAKE_PREFIX_PATH=${PWD}/oneTBB-2019_U9/cmake -DCapnProto_PATH=/usr/bin/capnp -DCMAKE_TOOLCHAIN_FILE=${PWD}/vcpkg/scripts/buildsystems/vcpkg.cmake .. +cmake -DTBB_DIR=${PWD}/oneTBB-2019_U9 -DCMAKE_PREFIX_PATH=${PWD}/oneTBB-2019_U9/cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/vcpkg/scripts/buildsystems/vcpkg.cmake .. make -j From c01bfaddb2c675356602fd50b1eac144326d85cf Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 18 Nov 2024 14:49:43 -0800 Subject: [PATCH 067/103] solving workflow issue --- CMakeLists.txt | 9 ++++----- install/installationUbuntu.sh | 12 ++++++++++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 473ea1d..8f0a05c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,11 +18,10 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) # include_directories(${Protobuf_INCLUDE_DIRS}) # Adding capn proto -if(DEFINED CapnProto_PATH) - find_package(CapnProto REQUIRED HINTS ${CapnProto_PATH}) -else() - find_package(CapnProto REQUIRED) -endif() + +find_program(CAPNP_EXECUTABLE capnp REQUIRED) +find_program(CAPNPC_CXX_EXECUTABLE capnpc-c++ REQUIRED) +find_package(CapnProto REQUIRED) if(CapnProto_FOUND) message(STATUS "Using Capn Proto ${CapnProto_VERSION}") diff --git a/install/installationUbuntu.sh b/install/installationUbuntu.sh index 56bc58c..cf0629c 100755 --- a/install/installationUbuntu.sh +++ b/install/installationUbuntu.sh @@ -1,8 +1,7 @@ # Install dependencies -sudo apt install -y git build-essential cmake wget curl zip unzip tar libboost-all-dev pkg-config capnproto +sudo apt install -y git build-essential cmake wget curl zip unzip tar libboost-all-dev pkg-config -which capnp # Build startDir=$pwd @@ -10,6 +9,15 @@ cd $(dirname "$0") mkdir -p ../build cd ../build +# install capnp +curl -O https://capnproto.org/capnproto-c++-1.0.2.tar.gz +tar zxf capnproto-c++-1.0.2.tar.gz +cd capnproto-c++-1.0.2 +./configure +make -j6 check +sudo make install +which capnp + git clone https://github.com/microsoft/vcpkg.git sudo apt-get install pkg-config ./vcpkg/bootstrap-vcpkg.sh From 51c2376746e1f1c660dcaccba8293752ee4d8fdf Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 18 Nov 2024 14:51:52 -0800 Subject: [PATCH 068/103] solving workflow issue --- install/installationUbuntu.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/install/installationUbuntu.sh b/install/installationUbuntu.sh index cf0629c..cb1955c 100755 --- a/install/installationUbuntu.sh +++ b/install/installationUbuntu.sh @@ -17,6 +17,7 @@ cd capnproto-c++-1.0.2 make -j6 check sudo make install which capnp +cd ../ git clone https://github.com/microsoft/vcpkg.git sudo apt-get install pkg-config From 3582f5d733715614a260a6c54456c0f2b47c779e Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 21 Nov 2024 11:06:55 -0800 Subject: [PATCH 069/103] less memory consumption --- CMakeLists.txt | 14 +- src/fasta.cpp | 505 +++++++++++++++++++------------------------ src/fitchSankoff.cpp | 9 +- src/panman.cpp | 111 +++++----- src/panman.hpp | 7 +- src/panmanUtils.cpp | 3 +- src/panmanUtils.hpp | 4 + 7 files changed, 309 insertions(+), 344 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f0a05c..558bee0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required (VERSION 3.8) -project(panmanUtils) +project(panmanUtilsNew) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options") set(CMAKE_INCLUDE_CURRENT_DIR ON) @@ -46,13 +46,13 @@ find_package(jsoncpp CONFIG REQUIRED) file(GLOB PANMAT_SRCS "src/panmanUtils.cpp" "src/panman.cpp" "src/panmanUtils.hpp" "src/panman.hpp") if(DEFINED CapnProto_PATH) - add_executable(panmanUtils + add_executable(panmanUtilsNew ${PANMAT_SRCS} ) capnp_generate( LANGUAGE cpp - TARGET panmanUtils + TARGET panmanUtilsNew PROTOS panman.capnp) else() @@ -60,7 +60,7 @@ else() CAPNP_SRCS CAPNP_HDRS panman.capnp) - add_executable(panmanUtils + add_executable(panmanUtilsNew ${PANMAT_SRCS} ${CAPNP_SRCS} ${CAPNP_HDRS} @@ -68,7 +68,7 @@ else() endif() -TARGET_COMPILE_OPTIONS(panmanUtils PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) +TARGET_COMPILE_OPTIONS(panmanUtilsNew PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) -TARGET_LINK_LIBRARIES(panmanUtils PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) -target_include_directories(panmanUtils PUBLIC "${PROJECT_BINARY_DIR}") +TARGET_LINK_LIBRARIES(panmanUtilsNew PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) +target_include_directories(panmanUtilsNew PUBLIC "${PROJECT_BINARY_DIR}") diff --git a/src/fasta.cpp b/src/fasta.cpp index 1cc08bf..fcb9b54 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -152,6 +152,106 @@ void panmanUtils::printSequenceLines(const sequence_t& sequence,\ } +void panmanUtils::printSequenceLinesNew(const std::vector>>>& sequence, + const std::vector& blockExists, + const std::vector& blockStrand, size_t lineSize, bool aligned, std::ostream& fout, int offset, bool debug) { + + // String that stores the sequence to be printed + std::string line; + + for(size_t i = 0; i < blockExists.size(); i++) { + // Non-gap block - the only type being used currently + if(blockExists[i]) { + // If forward strand + if(blockStrand[i]) { + // Iterate through main nucs + for(size_t j = 0; j < sequence[i].size(); j++) { + // Gap nucs + for(size_t k = 0; k < sequence[i][j].second.size(); k++) { + if(sequence[i][j].second[k] != '-') { + line += sequence[i][j].second[k]; + } else if(aligned) { + line += '-'; + } + } + // Main nuc + if(sequence[i][j].first != '-' && sequence[i][j].first != 'x') { + line += sequence[i][j].first; + } else if(aligned) { + line += '-'; + } + } + } else { + // If reverse strand, iterate backwards + for(size_t j = sequence[i].size()-1; j+1 > 0; j--) { + // Main nuc first since we are iterating in reverse direction + if(sequence[i][j].first != '-' && sequence[i][j].first != 'x') { + line += getComplementCharacter(sequence[i][j].first); + } else if(aligned) { + line += '-'; + } + + // Gap nucs + for(size_t k = sequence[i][j].second.size()-1; k+1 > 0; k--) { + if(sequence[i][j].second[k] != '-') { + line += getComplementCharacter(sequence[i][j].second[k]); + } else if(aligned) { + line += '-'; + } + } + } + } + } else if(aligned) { + // If aligned sequence is required, print gaps instead if block does not exist + for(size_t j = 0; j < sequence[i].size(); j++) { + for(size_t k = 0; k < sequence[i][j].second.size(); k++) { + line+='-'; + } + line+='-'; + } + } + + } + + size_t ctr = 0; + + if(offset != 0) { + for(size_t i = 0; i < line.length(); i++) { + if(line[i] != '-') { + if(ctr == (size_t)offset) { + // mark starting point + ctr = i; + break; + } + ctr++; + } + } + } + + std::string currentLine = ""; + // From offset to end + for(size_t i = ctr; i < line.length(); i++) { + currentLine += line[i]; + if(currentLine.length() == lineSize) { + fout << currentLine << '\n'; + currentLine = ""; + } + } + // From beginning to offset + for(size_t i = 0; i < ctr; i++) { + currentLine += line[i]; + if(currentLine.length() == lineSize) { + fout << currentLine << '\n'; + currentLine = ""; + } + } + if(currentLine.length()) { + fout << currentLine << '\n'; + currentLine = ""; + } + +} + void panmanUtils::printSubsequenceLines(const sequence_t& sequence, const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, const std::tuple& panMATStart, @@ -1277,46 +1377,32 @@ void panmanUtils::Tree::printFASTAFromGFA(std::ifstream& fin, std::ofstream& fou } } -void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, sequence_t& sequence, - blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { +void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, + std::vector>>>& sequence, + std::vector& blockExists, + std::vector& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { // For reversing block mutations - primary block id, secondary block id, old mutation, old strand, new mutation, new strand - std::vector< std::tuple< int32_t, int32_t, bool, bool, bool, bool > > blockMutationInfo (root->blockMutation.size()); + std::vector< std::tuple< int32_t, bool, bool, bool, bool > > blockMutationInfo; // Block Mutations - tbb::parallel_for((size_t)0, root->blockMutation.size(), [&](size_t i) { - // for(int i=0; iblockMutation.size(); i++) { - auto mutation = root->blockMutation[i]; + for(auto mutation: root->blockMutation) { int32_t primaryBlockId = mutation.primaryBlockId; - int32_t secondaryBlockId = mutation.secondaryBlockId; bool type = mutation.blockMutInfo; bool inversion = mutation.inversion; - if (secondaryBlockId != -1) { - std::cout << "Error: Block Secondary ID is not -1" << std::endl; - exit(0); - } - if(type == 1) { // insertion + bool oldStrand; bool oldMut; - if(secondaryBlockId != -1) { - oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; - oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; - blockExists[primaryBlockId].second[secondaryBlockId] = true; + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = true; - // if insertion of inverted block takes place, the strand is backwards - blockStrand[primaryBlockId].second[secondaryBlockId] = !inversion; - } else { - oldStrand = blockStrand[primaryBlockId].first; - oldMut = blockExists[primaryBlockId].first; - blockExists[primaryBlockId].first = true; - - // if insertion of inverted block takes place, the strand is backwards - blockStrand[primaryBlockId].first = !inversion; - } - blockMutationInfo[i] = std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, true, !inversion); + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId] = !inversion; + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, true, !inversion) ); } else { @@ -1324,48 +1410,37 @@ void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, sequence_t& bool oldStrand; if(inversion) { // This means that this is not a deletion, but instead an inversion - if(secondaryBlockId != -1) { - oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; - oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; - blockStrand[primaryBlockId].second[secondaryBlockId] = !oldStrand; - } else { - oldStrand = blockStrand[primaryBlockId].first; - oldMut = blockExists[primaryBlockId].first; - blockStrand[primaryBlockId].first = !oldStrand; - } + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockStrand[primaryBlockId] = !oldStrand; + if(oldMut != true) { std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; } - blockMutationInfo[i] = std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, oldMut, !oldStrand); + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); } else { // Actually a deletion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = false; - if(secondaryBlockId != -1) { - oldStrand = blockStrand[primaryBlockId].second[secondaryBlockId]; - oldMut = blockExists[primaryBlockId].second[secondaryBlockId]; - blockExists[primaryBlockId].second[secondaryBlockId] = false; - - // resetting strand to true during deletion - blockStrand[primaryBlockId].second[secondaryBlockId] = true; - } else { - oldStrand = blockStrand[primaryBlockId].first; - oldMut = blockExists[primaryBlockId].first; - blockExists[primaryBlockId].first = false; - - // resetting strand to true during deletion - blockStrand[primaryBlockId].first = true; - } + // resetting strand to true during deletion + blockStrand[primaryBlockId] = true; } - blockMutationInfo[i] = std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, false, true); + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, false, true) ); } - }); + + // } + + + } // For backtracking. primaryBlockId, secondaryBlockId, pos, gapPos, (oldVal, newVal) in substitution, ('-', newVal) in insertion, (oldVal, '-') in deletion - std::vector< std::vector > > mutationInfo(root->nucMutation.size()); + std::vector< std::tuple< int32_t, int, int, char, char > > mutationInfo; // Nuc mutations - tbb::parallel_for((size_t)0, root->nucMutation.size(), [&](size_t i) { + for(size_t i = 0; i < root->nucMutation.size(); i++) { int32_t primaryBlockId = root->nucMutation[i].primaryBlockId; int32_t secondaryBlockId = root->nucMutation[i].secondaryBlockId; @@ -1378,192 +1453,100 @@ void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, sequence_t& if(type < 3) { // Either S, I or D int len = ((root->nucMutation[i].mutInfo) >> 4); - mutationInfo[i].resize(len); + if(primaryBlockId >= sequence.size()) { std::cout << primaryBlockId << " " << sequence.size() << std::endl; } if(type == panmanUtils::NucMutationType::NS) { // Substitution - if(secondaryBlockId != -1) { - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal); - } - } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = newVal; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal); - } - + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); } } else { - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = newVal; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal); - } - } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId].first[nucPosition+j].first = newVal; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal); - } + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition+j].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); } } } else if(type == panmanUtils::NucMutationType::NI) { // Insertion - if(secondaryBlockId != -1) { - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition + j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal); - } - } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = newVal; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal); - } - + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); } } else { - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = newVal; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal); - } - } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId].first[nucPosition+j].first = newVal; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal); - } + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition+j].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); } } } else if(type == panmanUtils::NucMutationType::ND) { // Deletion - if(secondaryBlockId != -1) { - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j]; - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-'); - } - } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first; - sequence[primaryBlockId].second[secondaryBlockId][nucPosition + j].first = '-'; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-'); - } - + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-')); } } else { - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j]; - sequence[primaryBlockId].first[nucPosition].second[nucGapPosition+j] = '-'; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-'); - } - } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId].first[nucPosition+j].first; - sequence[primaryBlockId].first[nucPosition+j].first = '-'; - mutationInfo[i][j] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-'); - } + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + sequence[primaryBlockId][nucPosition+j].first = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-')); } } } } else { - mutationInfo[i].resize(1); if(type == panmanUtils::NucMutationType::NSNPS) { // SNP Substitution newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); - if(secondaryBlockId != -1) { - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = newVal; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); - } else { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = newVal; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); - } + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); } else { - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; - sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = newVal; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); - } else { - char oldVal = sequence[primaryBlockId].first[nucPosition].first; - sequence[primaryBlockId].first[nucPosition].first = newVal; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); - } + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); } } else if(type == panmanUtils::NucMutationType::NSNPI) { // SNP Insertion newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); - if(secondaryBlockId != -1) { - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = newVal; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); - } else { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = newVal; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); - } + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); } else { - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; - sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = newVal; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); - } else { - char oldVal = sequence[primaryBlockId].first[nucPosition].first; - sequence[primaryBlockId].first[nucPosition].first = newVal; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, newVal); - } + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); } } else if(type == panmanUtils::NucMutationType::NSNPD) { // SNP Deletion - if(secondaryBlockId != -1) { - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition]; - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].second[nucGapPosition] = '-'; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-'); - } else { - char oldVal = sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first; - sequence[primaryBlockId].second[secondaryBlockId][nucPosition].first = '-'; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-'); - } + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); } else { - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId].first[nucPosition].second[nucGapPosition]; - sequence[primaryBlockId].first[nucPosition].second[nucGapPosition] = '-'; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-'); - } else { - char oldVal = sequence[primaryBlockId].first[nucPosition].first; - sequence[primaryBlockId].first[nucPosition].first = '-'; - mutationInfo[i][0] = std::make_tuple(primaryBlockId, secondaryBlockId, nucPosition, nucGapPosition, oldVal, '-'); - } + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); } } } - }); + } // } if(root->children.size() == 0 || rootSeq) { @@ -1576,14 +1559,14 @@ void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, sequence_t& // If MSA is to be printed, offset doesn't matter offset = circularSequences[root->identifier]; } - sequence_t sequencePrint = sequence; - blockExists_t blockExistsPrint = blockExists; - blockStrand_t blockStrandPrint = blockStrand; + std::vector< std::vector< std::pair< char, std::vector< char > > > > sequencePrint = sequence; + std::vector< bool > blockExistsPrint = blockExists; + std::vector< bool > blockStrandPrint = blockStrand; if(rotationIndexes.find(root->identifier) != rotationIndexes.end() && rotationIndexes[root->identifier] != 0) { int ctr = -1, rotInd = 0; for(size_t i = 0; i < blockExistsPrint.size(); i++) { - if(blockExistsPrint[i].first) { + if(blockExistsPrint[i]) { ctr++; } if(ctr == rotationIndexes[root->identifier]) { @@ -1603,12 +1586,10 @@ void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, sequence_t& reverse(blockExistsPrint.begin(), blockExistsPrint.end()); reverse(blockStrandPrint.begin(), blockStrandPrint.end()); } - if (allIndex) { - panmanUtils::printSubsequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, panMATStart, panMATEnd, aligned, fout, offset); - } else { - panmanUtils::printSequenceLines(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); - } + + panmanUtils::printSequenceLinesNew(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); } else { + // DFS on children for(panmanUtils::Node* child: root->children) { printFASTAHelperNew(child, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); @@ -1618,66 +1599,43 @@ void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, sequence_t& // Undo block mutations when current node and its subtree have been processed - // for(auto it = blockMutationInfo.rbegin(); it != blockMutationInfo.rend(); it++) { - tbb::parallel_for((size_t)0, blockMutationInfo.size(), [&](size_t i) { - auto mutation = blockMutationInfo[i]; - if(std::get<1>(mutation) != -1) { - blockExists[std::get<0>(mutation)].second[std::get<1>(mutation)] = std::get<2>(mutation); - blockStrand[std::get<0>(mutation)].second[std::get<1>(mutation)] = std::get<3>(mutation); - } else { - blockExists[std::get<0>(mutation)].first = std::get<2>(mutation); - blockStrand[std::get<0>(mutation)].first = std::get<3>(mutation); - } - }); + for(auto it = blockMutationInfo.rbegin(); it != blockMutationInfo.rend(); it++) { + auto mutation = *it; + blockExists[std::get<0>(mutation)] = std::get<1>(mutation); + blockStrand[std::get<0>(mutation)] = std::get<2>(mutation); + + } // Undo nuc mutations when current node and its subtree have been processed - // for(auto it = mutationInfo.rbegin(); it != mutationInfo.rend(); it++) { - tbb::parallel_for((size_t)0, mutationInfo.size(), [&](size_t i) { - auto mutationList = mutationInfo[i]; - for(auto jt = mutationList.rbegin(); jt != mutationList.rend(); jt++) { - auto mutation = *jt; - if(std::get<1>(mutation) != -1) { - if(std::get<3>(mutation) != -1) { - sequence[std::get<0>(mutation)].second[std::get<1>(mutation)][std::get<2>(mutation)].second[std::get<3>(mutation)] = std::get<4>(mutation); - } else { - sequence[std::get<0>(mutation)].second[std::get<1>(mutation)][std::get<2>(mutation)].first = std::get<4>(mutation); - } - } else { - if(std::get<3>(mutation) != -1) { - sequence[std::get<0>(mutation)].first[std::get<2>(mutation)].second[std::get<3>(mutation)] = std::get<4>(mutation); - } else { - sequence[std::get<0>(mutation)].first[std::get<2>(mutation)].first = std::get<4>(mutation); - } - } + for(auto it = mutationInfo.rbegin(); it != mutationInfo.rend(); it++) { + auto mutation = *it; + if(std::get<2>(mutation) != -1) { + sequence[std::get<0>(mutation)][std::get<1>(mutation)].second[std::get<2>(mutation)] = std::get<3>(mutation); + } else { + sequence[std::get<0>(mutation)][std::get<1>(mutation)].first = std::get<3>(mutation); } - }); + + } + + // std::cout << "Done iteration for node: " << root->identifier << std::endl; } void panmanUtils::Tree::printFASTANew(std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. - std::vector< std::pair< std::vector< std::pair< char, std::vector< char > > >, std::vector< std::vector< std::pair< char, std::vector< char > > > > > > sequence(blocks.size() + 1); - std::vector< std::pair< bool, std::vector< bool > > > blockExists(blocks.size() + 1, {false, {}}); - blockStrand_t blockStrand(blocks.size() + 1, {true, {}}); + std::vector< std::vector< std::pair< char, std::vector< char > > > > sequence(blocks.size() + 1); + std::vector< bool > blockExists(blocks.size() + 1, false, {}); + std::vector< bool > blockStrand(blocks.size() + 1, true, {}); - // Assigning block gaps - // for(size_t i = 0; i < blockGaps.blockPosition.size(); i++) { - tbb::parallel_for((size_t)0, blockGaps.blockPosition.size(), [&](size_t i) { - sequence[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i]); - blockExists[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i], false); - blockStrand[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i], true); - }); - // } int32_t maxBlockId = 0; // Create consensus sequence of blocks - // for(size_t i = 0; i < blocks.size(); i++) { - tbb::parallel_for((size_t)0, blocks.size(), [&](size_t i) { + for(size_t i = 0; i < blocks.size(); i++) { + int32_t primaryBlockId = ((int32_t)blocks[i].primaryBlockId); - int32_t secondaryBlockId = ((int32_t)blocks[i].secondaryBlockId); - // maxBlockId = std::max(maxBlockId, primaryBlockId); + maxBlockId = std::max(maxBlockId, primaryBlockId); for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { bool endFlag = false; @@ -1690,11 +1648,7 @@ void panmanUtils::Tree::printFASTANew(std::ostream& fout, bool aligned, bool roo } const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); - if(secondaryBlockId != -1) { - sequence[primaryBlockId].second[secondaryBlockId].push_back({nucleotide, {}}); - } else { - sequence[primaryBlockId].first.push_back({nucleotide, {}}); - } + sequence[primaryBlockId].push_back({nucleotide, {}}); } if(endFlag) { @@ -1703,35 +1657,24 @@ void panmanUtils::Tree::printFASTANew(std::ostream& fout, bool aligned, bool roo } // End character to incorporate for gaps at the end - if(secondaryBlockId != -1) { - sequence[primaryBlockId].second[secondaryBlockId].push_back({'x', {}}); - } else { - sequence[primaryBlockId].first.push_back({'x', {}}); - } - }); - // } - // std::cout << maxBlockId << " " << blocks.size() << std::endl; - // sequence.resize(maxBlockId + 1); - // blockExists.resize(maxBlockId + 1); - // blockStrand.resize(maxBlockId + 1); + sequence[primaryBlockId].push_back({'x', {}}); + } + + sequence.resize(maxBlockId + 1); + blockExists.resize(maxBlockId + 1); + blockStrand.resize(maxBlockId + 1); // Assigning nucleotide gaps in blocks - // for(size_t i = 0; i < gaps.size(); i++) { - tbb::parallel_for((size_t)0, gaps.size(), [&](size_t i) { + for(size_t i = 0; i < gaps.size(); i++) { int32_t primaryBId = (gaps[i].primaryBlockId); int32_t secondaryBId = (gaps[i].secondaryBlockId); for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { int len = gaps[i].nucGapLength[j]; int pos = gaps[i].nucPosition[j]; - if(secondaryBId != -1) { - sequence[primaryBId].second[secondaryBId][pos].second.resize(len, '-'); - } else { - sequence[primaryBId].first[pos].second.resize(len, '-'); - } + sequence[primaryBId][pos].second.resize(len, '-'); } - }); - // } + } // Run depth first traversal to extract sequences diff --git a/src/fitchSankoff.cpp b/src/fitchSankoff.cpp index 022d52d..61afc4a 100644 --- a/src/fitchSankoff.cpp +++ b/src/fitchSankoff.cpp @@ -30,7 +30,7 @@ int panmanUtils::Tree::nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states, int refState) { if(node->children.size() == 0) { if(states.find(node->identifier) == states.end()) { - // std::cerr << "Node ID not found" << std::endl; + std::cerr << "Node ID " << node->identifier << " not found" << std::endl; return states[node->identifier] = 0; } return states[node->identifier]; @@ -41,7 +41,7 @@ int panmanUtils::Tree::nucFitchForwardPass(Node* node, } //for root int orStates = 0, andStates = childStates[0]; - if (node->parent==nullptr) { + if (node->parent==nullptr && refState != -1) { return states[node->identifier] = refState; } for(auto u: childStates) { @@ -102,10 +102,14 @@ void panmanUtils::Tree::nucFitchBackwardPass(Node* node, } if(node == root) { // The root sequence should take any of its values and not care about the parent state + // check for non "-" states first int currentState = 1; while(!(states[node->identifier] & currentState)) { currentState <<= 1; + // condition for "-" state + // if (currentState == (1<<16)) currentState = 1; } + states[node->identifier] = currentState; } else if(parentState & states[node->identifier]) { states[node->identifier] = parentState; @@ -411,6 +415,7 @@ void panmanUtils::Tree::nucSankoffBackwardPass(Node* node, int minVal = SANKOFF_INF; int minPtr = -1; for(int i = 0; i < 16; i++) { + std::cout << stateSets[node->identifier][i] << " " << SANKOFF_INF << std::endl; if(stateSets[node->identifier][i] < minVal) { minVal = stateSets[node->identifier][i]; minPtr = i; diff --git a/src/panman.cpp b/src/panman.cpp index 7242dc5..5651a52 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -1198,6 +1198,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE // Read MSA while(getline(fin,line,'\n')) { if(line.length() == 0) { + std::cout << "here"; continue; } if(line[0] == '>') { @@ -1208,57 +1209,57 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; exit(-1); } - // std::cout << currentSequenceId << "\t" << currentSequence; - sequenceIdsToSequences[currentSequenceId] = currentSequence; + std::vector< std::string > splitLine; + stringSplit(currentSequenceId,'\r',splitLine); + sequenceIdsToSequences[splitLine[0]] = currentSequence; } std::vector< std::string > splitLine; stringSplit(line,' ',splitLine); currentSequenceId = splitLine[0].substr(1); currentSequence = ""; } else { - currentSequence += line; + std::vector< std::string > splitLine; + stringSplit(line,'\r',splitLine); + currentSequence += splitLine[0]; } } - if(currentSequence.length()) { if(lineLength != 0 && lineLength != currentSequence.length()) { std::cerr << "Error: sequence lengths don't match!" << std::endl; exit(-1); } else { lineLength = currentSequence.length(); - std::cout << lineLength << std::endl; } sequenceIdsToSequences[currentSequenceId] = currentSequence; } + + + std::cout << lineLength << std::endl; std::set< size_t > emptyPositions; - // std::cout << root->identifier << std::endl; - // ReRoot tree if reference provided + if (reference != "") { - // if (allNodes.find(reference) == allNodes.end()) { - // std::cout << reference << " is not a tip!!" << std::endl; - // exit(0); - // } - // transform(allNodes[reference]); - // std::cout << reference << "\t" << root->identifier << std::endl; consensusSeq = sequenceIdsToSequences[reference]; } else { - tbb::parallel_for((size_t)0, lineLength, [&](size_t i) { - // for(size_t i = 0; i < lineLength; i++) { + // tbb::parallel_for((size_t)0, lineLength, [&](size_t i) { + consensusSeq.resize(lineLength); + int countEmpty=0; + for(size_t i = 0; i < lineLength; i++) { bool nonGapFound = false; for(auto u: sequenceIdsToSequences) { if(u.second[i] != '-') { - consensusSeq += u.second[i]; + consensusSeq[i] = u.second[i]; nonGapFound = true; break; } } if(!nonGapFound) { + countEmpty++; emptyPositions.insert(i); } - // } - }); + } + // }); for(auto& u: sequenceIdsToSequences) { std::string sequenceString; for(size_t i = 0; i < u.second.length(); i++) { @@ -1269,7 +1270,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE u.second = sequenceString; } } - + tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int8_t,int8_t > > > nonGapMutationsMSA; std::unordered_map< std::string, std::mutex > nodeMutexes; std::unordered_map< size_t, std::mutex > posMutexes; @@ -1284,44 +1285,53 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } int positionCount = 0; - tbb::parallel_for((size_t)0, consensusSeq.length(), [&](size_t i) { - // Sankoff - std::unordered_map< std::string, std::vector< int > > stateSets; - std::unordered_map< std::string, int > states; - std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; + - for(const auto& u: sequenceIdsToSequences) { - std::vector< int > currentState(16, SANKOFF_INF); - if(u.second[i] != '-') { - currentState[getCodeFromNucleotide(u.second[i])] = 0; - } else { - currentState[0] = 0; - } - stateSets[u.first] = currentState; - } - nucSankoffForwardPass(root, stateSets); - nucSankoffBackwardPass(root, stateSets, states, getCodeFromNucleotide(consensusSeq[i])); - nucSankoffAssignMutations(root, states, mutations, getCodeFromNucleotide(consensusSeq[i])); - for(auto mutation: mutations) { - nodeMutexes[mutation.first].lock(); - nonGapMutationsMSA[mutation.first].push_back(std::make_tuple(i, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); - nodeMutexes[mutation.first].unlock(); - } + // tbb::parallel_for((size_t)0, consensusSeq.length(), [&](size_t i) { + for(int i=0; i > stateSets; + // std::unordered_map< std::string, int > states; + // std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; + + // for(const auto& u: sequenceIdsToSequences) { + // std::vector< int > currentState(16, SANKOFF_INF); + // if(u.second[i] != '-') { + // currentState[getCodeFromNucleotide(u.second[i])] = 0; + // } else { + // currentState[0] = 0; + // } + // stateSets[u.first] = currentState; + // } + // nucSankoffForwardPass(root, stateSets); + // nucSankoffBackwardPass(root, stateSets, states, getCodeFromNucleotide(consensusSeq[i])); + // nucSankoffAssignMutations(root, states, mutations, getCodeFromNucleotide(consensusSeq[i])); + // for(auto mutation: mutations) { + // nodeMutexes[mutation.first].lock(); + // nonGapMutationsMSA[mutation.first].push_back(std::make_tuple(i, mutation.second.first, getCodeFromNucleotide(mutation.second.second))); + // nodeMutexes[mutation.first].unlock(); + // } // Fitch - /* + std::unordered_map< std::string, int > states; std::unordered_map< std::string, std::pair< panmanUtils::NucMutationType, char > > mutations; for(const auto& u: sequenceIdsToSequences) { if(u.second[i] != '-') { - states[u.first] = (1 << getCodeFromNucleotide(u.second[i])); + states.insert({u.first, (1 << getCodeFromNucleotide(u.second[i]))}); + // states[u.first] = (1 << getCodeFromNucleotide(u.second[i])); } else { - states[u.first] = 1; + states.insert({u.first, 1}); + // states[u.first] = 1; } - } + } + // exit(0); int refState = (reference=="")?-1:1<identifier] << std::endl; + nucFitchBackwardPass(root, states, (1 << getCodeFromNucleotide(consensusSeq[i]))); nucFitchAssignMutations(root, states, mutations, (1 << getCodeFromNucleotide(consensusSeq[i]))); for(auto mutation: mutations) { nodeMutexes[mutation.first].lock(); @@ -1330,8 +1340,9 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } posMutexes[i].lock(); posMutexes[i].unlock(); - */ - }); + + // }); + } // std::cout << root->identifier << std::endl; std::cout << consensusSeq << std::endl; @@ -1363,8 +1374,6 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE nodeMutexes[u.first].unlock(); // } }); - - } else if(ftype == panmanUtils::FILE_TYPE::MSA_OPTIMIZE) { std::string newickString; secondFin >> newickString; diff --git a/src/panman.hpp b/src/panman.hpp index 9f3530b..15e0580 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -308,8 +308,11 @@ class Tree { void printFASTAHelper(panmanUtils::Node* root, sequence_t& sequence, blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); - void printFASTAHelperNew(panmanUtils::Node* root, sequence_t& sequence, - blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, + void printFASTAHelperNew(panmanUtils::Node* root, + std::vector>>>& sequence, + std::vector& blockExists, + std::vector& blockStrand, + std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); void printSingleNodeHelper(std::vector &nodeList, int nodeListIndex, sequence_t& sequence, diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 34be6ba..f94cd15 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -364,6 +364,7 @@ void fasta(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstrea // T->printFASTAParallel(fout, false); T->printFASTANew(fout, false, false); + // T->printFASTA(fout,false,false); if(globalVm.count("output-file")) outputFile.close(); } @@ -1031,7 +1032,7 @@ void parseAndExecute(int argc, char* argv[]) { .positional(globalPositionArgumentDesc).allow_unregistered().run(), globalVm); po::notify(globalVm); - int threads = 64; + int threads = 32; if (globalVm.count("threads")) threads = globalVm["threads"].as(); tbb::task_scheduler_init init(threads); diff --git a/src/panmanUtils.hpp b/src/panmanUtils.hpp index ea161e7..79ff833 100644 --- a/src/panmanUtils.hpp +++ b/src/panmanUtils.hpp @@ -37,6 +37,10 @@ char getComplementCharacter(char nuc); void printSequenceLines(const sequence_t& sequence, const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, bool aligned, std::ostream& fout, int offset = 0, bool debug = false); +void printSequenceLinesNew(const std::vector>>>& sequence, + const std::vector& blockExists, + const std::vector& blockStrand, size_t lineSize, + bool aligned, std::ostream& fout, int offset = 0, bool debug = false); void printSubsequenceLines(const sequence_t& sequence,\ const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, From 7b35612328f30f9edfc5540f3fe24a0ab2948fe6 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 2 Dec 2024 07:40:40 -0800 Subject: [PATCH 070/103] added snakemake workflow --- workflows/Snakefile | 54 ++++++++++++++++++++++++++++++++++ workflows/envs/panmanUtils.yml | 4 +++ 2 files changed, 58 insertions(+) create mode 100644 workflows/Snakefile create mode 100644 workflows/envs/panmanUtils.yml diff --git a/workflows/Snakefile b/workflows/Snakefile new file mode 100644 index 0000000..3a7ad72 --- /dev/null +++ b/workflows/Snakefile @@ -0,0 +1,54 @@ + +rule all: + input: + config["RUNTYPE"] + +rule pangraph: + input: + config["FASTA"] + output: + output/pangraph.json + output/pangraph.nwk + conda: + envs/panamanUtils.yml + threads: + threads = 32 + shell: + ''' + echo "Building Alignment with PanGraph..." + export JULIA_NUM_THREADS={threads} + mkdir -p output + pangraph {input[0]} > {output[0]} 2> {output[1]} + echo $(cat {output[1]} | grep "tree" | awk '{split($0,a,"tree: "); print a[2]}') > {output[1]} + ''' + +rule mashtree: + input: + config["FASTA"] + output: + output/mashtree.nwk + threads: + threads = 32 + shell: + ''' + echo "Building Tree with MashTree..." + mkdir -p output + mashtree --numcpus {threads} {input[0]} > {output[0]} + ''' + +rule msa: + input: + config["FASTA"] + output/mashtree.nwk + output: + output/mafft.aln + threads: + threads = 32 + shell: + ''' + echo "Building Alignment with MAFFT..." + mafft {input[0]} > {output[0]} + ''' + + + diff --git a/workflows/envs/panmanUtils.yml b/workflows/envs/panmanUtils.yml new file mode 100644 index 0000000..71e6ea3 --- /dev/null +++ b/workflows/envs/panmanUtils.yml @@ -0,0 +1,4 @@ +channels: + - defaults + - bioconda + - conda-forge \ No newline at end of file From 15947a2da8cf95cb8f590c506a5d50f06d5719a6 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 2 Dec 2024 08:53:41 -0800 Subject: [PATCH 071/103] added scripts for mashtree workflow --- workflows/Snakefile | 29 +++++++++-------- workflows/scripts/splitSeqs.py | 37 +++++++++++++++++++++ workflows/scripts/updateNewick.py | 53 +++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 14 deletions(-) create mode 100644 workflows/scripts/splitSeqs.py create mode 100644 workflows/scripts/updateNewick.py diff --git a/workflows/Snakefile b/workflows/Snakefile index 3a7ad72..6a747d5 100644 --- a/workflows/Snakefile +++ b/workflows/Snakefile @@ -12,29 +12,32 @@ rule pangraph: conda: envs/panamanUtils.yml threads: - threads = 32 + thread = 32 shell: ''' echo "Building Alignment with PanGraph..." - export JULIA_NUM_THREADS={threads} + export JULIA_NUM_THREADS={thread} mkdir -p output pangraph {input[0]} > {output[0]} 2> {output[1]} echo $(cat {output[1]} | grep "tree" | awk '{split($0,a,"tree: "); print a[2]}') > {output[1]} ''' rule mashtree: - input: - config["FASTA"] - output: - output/mashtree.nwk + input: + config["FASTA"] + output: + output/mashtree.nwk threads: - threads = 32 - shell: - ''' + thread = 32 + shell: + ''' echo "Building Tree with MashTree..." mkdir -p output - mashtree --numcpus {threads} {input[0]} > {output[0]} - ''' + python3 scripts/splitSeqs.py {input[0]} temp + mashtree --numcpus {thread} temp/* > {output[0]} + python3 scripts/updateNewick.py {output[0]} /temp {output[0]} + rm -r temp /temp + ''' rule msa: input: @@ -43,12 +46,10 @@ rule msa: output: output/mafft.aln threads: - threads = 32 + thread = 32 shell: ''' echo "Building Alignment with MAFFT..." mafft {input[0]} > {output[0]} ''' - - diff --git a/workflows/scripts/splitSeqs.py b/workflows/scripts/splitSeqs.py new file mode 100644 index 0000000..18ad9be --- /dev/null +++ b/workflows/scripts/splitSeqs.py @@ -0,0 +1,37 @@ +from Bio import SeqIO +import os + + +def splitFasta(input_file, output_dir): + + seqNameMap="" + ff = open("/temp", "w") + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + count=1 + + + for record in SeqIO.parse(input_file, "fasta"): + filename = f"{count}.fasta" + seqNameMap = str(count) + "," + record.id + ff.write(seqNameMap) + ff.write("\n") + output_path = os.path.join(output_dir, filename) + + with open(output_path, "w") as output_file: + SeqIO.write(record, output_file, "fasta") + count+=1 + print(f"Saved: {output_path}") + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Split sequences in a FASTA file into individual files.") + parser.add_argument("inp", help="Path to the input FASTA file.") + parser.add_argument("outDir", help="Directory to save the split FASTA files.") + + args = parser.parse_args() + + splitFasta(args.inp, args.outDir) + diff --git a/workflows/scripts/updateNewick.py b/workflows/scripts/updateNewick.py new file mode 100644 index 0000000..37d14d1 --- /dev/null +++ b/workflows/scripts/updateNewick.py @@ -0,0 +1,53 @@ +from ete3 import Tree +import csv + +def read_csv_mapping(csv_file): + """ + Reads a CSV file mapping sequence numbers to actual names. + Expects two columns: 'number' and 'name'. + :param csv_file: Path to the CSV file. + :return: Dictionary mapping sequence numbers to names. + """ + mapping = {} + with open(csv_file, mode='r') as file: + reader = csv.reader(file) + for row in reader: + if len(row) != 2: + continue + number, name = row + mapping[number] = name + return mapping + +def replace_names_in_newick(newick_file, mapping, output_file): + """ + Replaces sequence numbers with actual names in a Newick file. + :param newick_file: Path to the Newick file. + :param mapping: Dictionary mapping numbers to names. + :param output_file: Path to save the modified Newick file. + """ + # Load the Newick tree + tree = Tree(newick_file, format=1) + + # Replace node names based on the mapping + for leaf in tree: + if leaf.name in mapping: + leaf.name = mapping[leaf.name] + + # Write the modified tree to a new file + tree.write(outfile=output_file, format=1) + print(f"Modified Newick tree saved to: {output_file}") + +if __name__ == "__main__": + import argparse + + # Parse command-line arguments + parser = argparse.ArgumentParser(description="Replace sequence numbers in a Newick file with actual names.") + parser.add_argument("newick_file", help="Path to the input Newick file.") + parser.add_argument("csv_file", help="Path to the CSV file containing number-to-name mapping.") + parser.add_argument("output_file", help="Path to save the modified Newick file.") + + args = parser.parse_args() + + # Read the mapping and process the Newick file + mapping = read_csv_mapping(args.csv_file) + replace_names_in_newick(args.newick_file, mapping, args.output_file) From 170803cdfff8e5ae38c84a4d391fce87bca353bb Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 2 Dec 2024 11:24:42 -0800 Subject: [PATCH 072/103] snakemake workflow verified --- workflows/Snakefile | 93 +++++++++++++++++++++++++++------- workflows/envs/panmanUtils.yml | 5 +- 2 files changed, 78 insertions(+), 20 deletions(-) diff --git a/workflows/Snakefile b/workflows/Snakefile index 6a747d5..82c5fe0 100644 --- a/workflows/Snakefile +++ b/workflows/Snakefile @@ -1,55 +1,110 @@ +''' +This is a snakemake workflow for building PanMAN from PanGraph, GFA, or MSA. + +Users can run workflow as: + +Building PanMAN from PanGraph Alignment + snakemake --use-conda --cores [num threads] --config RUNTYPE="pangraph" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] +Building PanMAN from PGGB Alignment + snakemake --use-conda --cores [num threads] --config RUNTYPE="gfa" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] +Building PanMAN from MAFFT Alignment + snakemake --use-conda --cores [num threads] --config RUNTYPE="msa" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] + +Note: This workflow uses MashTree to build tree input for panmanUtils when building panman from gfa or msa. +''' + +def config_select(config): + target_rule = config.get("RUNTYPE", None) + + if target_rule == "pangraph": + return "output/pangraph.aln" + elif target_rule == "gfa": + return "output/pggb.aln" + elif target_rule == "msa": + return "output/mafft.aln" + else: + raise ValueError(f"Unknown target rule '{target_rule}'. Choose from 'pangraph', 'gfa', or 'msa'.") rule all: input: - config["RUNTYPE"] + config_select(config) rule pangraph: input: config["FASTA"] output: - output/pangraph.json - output/pangraph.nwk - conda: - envs/panamanUtils.yml + aln = "output/pangraph.aln", + newick = "output/pangraph.nwk", + panman = "panman/out.panman" threads: - thread = 32 + threads = 32 shell: ''' echo "Building Alignment with PanGraph..." - export JULIA_NUM_THREADS={thread} + export JULIA_NUM_THREADS={threads} mkdir -p output - pangraph {input[0]} > {output[0]} 2> {output[1]} - echo $(cat {output[1]} | grep "tree" | awk '{split($0,a,"tree: "); print a[2]}') > {output[1]} + pangraph build {input[0]} > {output.aln} 2> {output.newick} + awk '/tree/ {{split($0,a,"tree: "); print a[2]}}' {output.newick} > temp.newick && mv temp.newick {output.newick} + + echo "Building PanMAN from Pangraph alignment..." + ../build/panmanUtils -P {output.aln} -N {output.newick} -o out ''' rule mashtree: input: config["FASTA"] output: - output/mashtree.nwk + newick = "output/mashtree.nwk" threads: - thread = 32 + threads = 32 shell: ''' echo "Building Tree with MashTree..." mkdir -p output python3 scripts/splitSeqs.py {input[0]} temp - mashtree --numcpus {thread} temp/* > {output[0]} - python3 scripts/updateNewick.py {output[0]} /temp {output[0]} + mashtree --numcpus {threads} temp/* > {output.newick} + python3 scripts/updateNewick.py {output.newick} /temp {output.newick} rm -r temp /temp ''' -rule msa: +rule mafft: input: - config["FASTA"] - output/mashtree.nwk + config["FASTA"], + newick = "output/mashtree.nwk" output: - output/mafft.aln + aln = "output/mafft.aln", + panman = "panman/out.panman" threads: - thread = 32 + threads = 32 shell: ''' echo "Building Alignment with MAFFT..." - mafft {input[0]} > {output[0]} + mafft --auto {input.newick} > {output.aln} + + echo "Building PanMAN from MAFFT alignment..." + ../build/panmanUtils -M {output.aln} -N {input.newick} -o out + ''' + +rule pggb: + input: + fasta = config["FASTA"], + newick = "output/mashtree.nwk" + output: + aln = "output/pggb.aln", + panman = "panman/out.panman" + threads: + threads = 32 + params: + sq = config["SEQ_COUNT"] + shell: + ''' + echo "Building Alignment with PGGB..." + samtools faidx {input.fasta} + pggb --threads {threads} -i {input.fasta} -o pggb_temp -n {params.sq} + mv pggb_temp/*final.gfa {output.aln} + rm -r pggb_temp + + echo "Building PanMAN from PGGB alignment..." + ../build/panmanUtils -G {output.aln} -N {input.newick} -o out ''' diff --git a/workflows/envs/panmanUtils.yml b/workflows/envs/panmanUtils.yml index 71e6ea3..2972d67 100644 --- a/workflows/envs/panmanUtils.yml +++ b/workflows/envs/panmanUtils.yml @@ -1,4 +1,7 @@ channels: - defaults - bioconda - - conda-forge \ No newline at end of file + - conda-forge +dependencies: + - biopython + - mashtree \ No newline at end of file From fc394719169ac965617f53b9c255c204dc853502 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Mon, 2 Dec 2024 17:20:53 -0800 Subject: [PATCH 073/103] panman to usher --- src/panman.cpp | 40 ++- src/panman.hpp | 1 + src/panman2usher.cpp | 604 +++++++++++++++++++++++++++++++++++++++++++ src/panmanUtils.cpp | 41 +++ src/panmanUtils.hpp | 5 +- src/rotation.cpp | 2 +- 6 files changed, 678 insertions(+), 15 deletions(-) create mode 100644 src/panman2usher.cpp diff --git a/src/panman.cpp b/src/panman.cpp index 5651a52..4f4e956 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -31,6 +31,7 @@ #include "annotate.cpp" #include "reroot.cpp" #include "aaTrans.cpp" +#include "panman2usher.cpp" #include "panmanUtils.hpp" @@ -747,10 +748,10 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE secondFin >> newickString; Json::Value pangraphData; fin >> pangraphData; - + root = createTreeFromNewickString(newickString); auto start = std::chrono::high_resolution_clock::now(); - panmanUtils::Pangraph pg(pangraphData); + panmanUtils::Pangraph pg(pangraphData, root); auto end = std::chrono::high_resolution_clock::now(); std::chrono::nanoseconds timing = end -start; @@ -767,7 +768,6 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE std::unordered_map< std::string, std::vector< int > > alignedStrandSequences = pg.getAlignedStrandSequences(topoArray); - root = createTreeFromNewickString(newickString); // Check if tree is a polytomy to check if Sankoff algorithm needs to be applied bool polytomy = hasPolytomy(root); @@ -792,8 +792,11 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE tbb::concurrent_unordered_map< size_t, std::unordered_map< std::string, std::pair< BlockMutationType, bool > > > globalBlockMutations; - std::cout << "Inferring mutations..." << std::endl; + + + std::cout << "Inferring Block mutations..." << std::endl; tbb::parallel_for((size_t)0, topoArray.size(), [&](size_t i) { + // for(size_t i=0; i states; @@ -881,6 +884,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE blockSankoffAssignMutations(root, states, mutations, 0); globalBlockMutations[i] = mutations; } + // } }); std::unordered_map< std::string, std::mutex > nodeMutexes; @@ -921,10 +925,13 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE tbb::concurrent_unordered_map< std::string, std::vector< std::tuple< int,int,int,int,int,int > > > gapMutations; + std::cout << "Inferring Nuc mutations..." << std::endl; tbb::parallel_for((size_t)0, topoArray.size(), [&](size_t i) { + // for(size_t i=0; i > > sequence(consensusSeq.size()+1, - {'-', {}}); + std::vector< std::pair< char, std::vector< char > > > sequence(consensusSeq.size()+1,{'-', {}}); + std::vector< std::pair< char, std::vector< char > > > dumysequence(consensusSeq.size()+1,{'-', {}}); + for(size_t j = 0; j < consensusSeq.length(); j++) { sequence[j].first = consensusSeq[j]; } @@ -932,15 +939,19 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE sequence[pg.stringIdToGaps[pg.intIdToStringId[topoArray[i]]][j].first] .second.resize(pg.stringIdToGaps[pg.intIdToStringId[topoArray[i]]][j].second, '-'); + dumysequence[pg.stringIdToGaps[pg.intIdToStringId[topoArray[i]]][j].first] + .second.resize(pg.stringIdToGaps[pg.intIdToStringId[topoArray[i]]][j].second, + '-'); } tbb::concurrent_unordered_map< std::string, std::vector< std::pair< char, std::vector< char > > > > individualSequences; tbb::parallel_for_each(alignedSequences, [&](const auto& u) { + std::vector< std::pair< char, std::vector< char > > > currentSequence = sequence; if(u.second[i] == -1) { + // individualSequences[u.first] = dumysequence; return; } - std::vector< std::pair< char, std::vector< char > > > currentSequence = sequence; for(const auto& v: pg.substitutions[pg.intIdToStringId[topoArray[i]]][u.first][blockCounts[u.first][i]]) { currentSequence[v.first-1].first = v.second[0]; @@ -958,6 +969,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE individualSequences[u.first] = currentSequence; }); + tbb::parallel_for((size_t) 0, sequence.size(), [&](size_t j) { tbb::parallel_for((size_t)0, sequence[j].second.size(), [&](size_t k) { if(!polytomy) { @@ -1144,6 +1156,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } }); }); + tbb::parallel_for_each(nonGapMutations, [&](auto& u) { nodeMutexes[u.first].lock(); @@ -5304,7 +5317,7 @@ std::vector< size_t > panmanUtils::GfaGraph::getTopologicalSort() { } -panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData) { +panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData, panmanUtils::Node* root) { // load paths bool circular=false; for(size_t i = 0; i < pangraphData["paths"].size(); i++) { @@ -5405,7 +5418,7 @@ panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData) { bool invert = false; sample_new= rotate_sample(sample_base, sample_dumy, strandPaths[p.first], blockNumbers[p.first], blockSizeMap, rotation_index, invert); - std::cout << p.first << "\n"; + // std::cout << p.first << "\n"; // std::vector temp1({"a","b","c","d","e","f"}); // std::vector temp2({"a","b","c","d","g","h"}); // std::vector temp3({1,1,1,1,1,1}); @@ -5461,7 +5474,7 @@ panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData) { std::vector intSequenceConsensus= {}; std::vector intSequenceSample= {}; std::vector intSequenceConsensusNew= {}; - + std::cout << "Resolving rearrangements and duplications..." << std::endl; for(const auto& p: paths) { if (seqCount == 0) { // Load first sequence path @@ -5473,6 +5486,7 @@ panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData) { intSequenceConsensus.push_back(numNodes); numNodes++; } + // std::cout << "Len of consensus: " << consensus.size() << std::endl; } else { intSequenceSample.clear(); intSequenceConsensusNew.clear(); @@ -5505,10 +5519,10 @@ panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData) { for (auto &b: intSequenceConsensusNew) { intSequenceConsensus.push_back(b); } - + std::cout << "Len of consensus: " << consensus.size() << std::endl; } seqCount++; - // std::cout << seqCount << " " << intSequenceConsensusNew.size() << endl; + std::cout << seqCount << " " << intSequenceConsensusNew.size() << endl; } // re-assigning IDs in fixed order @@ -5525,7 +5539,9 @@ panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData) { for (auto &s: m.second) { s = order_map[s]; } + // std::cout << m.first << " " << m.first.size() << std::endl; } + } std::unordered_map< std::string,std::vector< int > > panmanUtils::Pangraph::getAlignedStrandSequences(const std::vector< size_t >& topoArray) { diff --git a/src/panman.hpp b/src/panman.hpp index 15e0580..6cc73e2 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -1,3 +1,4 @@ +#pragma once #include #include #include diff --git a/src/panman2usher.cpp b/src/panman2usher.cpp new file mode 100644 index 0000000..4f3bcf8 --- /dev/null +++ b/src/panman2usher.cpp @@ -0,0 +1,604 @@ +#include "panmanUtils.hpp" + +void getCoordMap(panmanUtils::Tree* panmanTree, std::vector>>> &globalCoords_t) { + const std::vector &blocks = panmanTree->blocks; + const std::vector &gaps = panmanTree->gaps; + + // blocks + for (size_t block_id = 0; block_id < blocks.size(); block_id++) { + int32_t blockId = ((int32_t)blocks[block_id].primaryBlockId); + for (size_t nuc_pos = 0; nuc_pos < blocks[block_id].consensusSeq.size(); nuc_pos++) { + bool endFlag = false; + for (size_t k = 0; k < 8; k++) { + const int nucCode = (((blocks[block_id].consensusSeq[nuc_pos]) >> (4 * (7 - k))) & 15); + if (nucCode == 0) { + endFlag = true; + break; + } + globalCoords_t[blockId].push_back({0, {}}); + } + if (endFlag){ + break; + } + } + globalCoords_t[blockId].push_back({0, {}}); // do I need this? + } + + // nuc gaps + for (size_t i = 0; i < gaps.size(); i++) { + int32_t blockId = (gaps[i].primaryBlockId); + for (size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + int len = gaps[i].nucGapLength[j]; + int pos = gaps[i].nucPosition[j]; + globalCoords_t[blockId][pos].second.resize(len, 0); + } + } + + // Assigning coordinates + int index = 1; + for (size_t blockId = 0; blockId < globalCoords_t.size(); blockId++){ + for (size_t j = 0; j < globalCoords_t[blockId].size(); j++){ + for (size_t k = 0; k < globalCoords_t[blockId][j].second.size(); k++){ + globalCoords_t[blockId][j].second[k] = index; + index++; + } + globalCoords_t[blockId][j].first = index; + index++; + } + } + return; + +} + +void getPseudoRoot(panmanUtils::Tree* panmanTree, std::vector>>> &pseudoRoot) { + const std::vector &blocks = panmanTree->blocks; + const std::vector &gaps = panmanTree->gaps; + + // blocks + for (size_t block_id = 0; block_id < blocks.size(); block_id++) { + int32_t blockId = ((int32_t)blocks[block_id].primaryBlockId); + for (size_t nuc_pos = 0; nuc_pos < blocks[block_id].consensusSeq.size(); nuc_pos++) { + bool endFlag = false; + for (size_t k = 0; k < 8; k++) { + const int nucCode = (((blocks[block_id].consensusSeq[nuc_pos]) >> (4 * (7 - k))) & 15); + if (nucCode == 0) { + endFlag = true; + break; + } + const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); + pseudoRoot[blockId].push_back({nucleotide, {}}); + } + if (endFlag){ + break; + } + } + pseudoRoot[blockId].push_back({'x', {}}); // do I need this? + } + + // nuc gaps + for (size_t i = 0; i < gaps.size(); i++) { + int32_t blockId = (gaps[i].primaryBlockId); + for (size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + int len = gaps[i].nucGapLength[j]; + int pos = gaps[i].nucPosition[j]; + pseudoRoot[blockId][pos].second.resize(len, '-'); + } + } + + return; + +} + +int8_t get_nuc_id (char nuc) { + int8_t ret = 0b1111; + switch(nuc) { + case 'a': + case 'A': + ret = 0b1; + break; + case 'c': + case 'C': + ret = 0b10; + break; + case 'g': + case 'G': + ret = 0b100; + break; + case 't': + case 'T': + ret = 0b1000; + break; + case 'R': + ret = 0b101; + break; + case 'Y': + ret = 0b1010; + break; + case 'S': + ret = 0b110; + break; + case 'W': + ret = 0b1001; + break; + case 'K': + ret = 0b1100; + break; + case 'M': + ret = 0b11; + break; + case 'B': + ret = 0b1110; + break; + case 'D': + ret = 0b1101; + break; + case 'H': + ret = 0b1011; + break; + case 'V': + ret = 0b111; + case 'n': + case 'N': + default: + ret = 0b1111; + break; + } + return ret; +} + +// Sets bits at positions specified by nuc_vec to 1 in int8 +int8_t get_nuc_id (std::vector nuc_vec) { + int8_t ret = 0; + int8_t one = 1; + for (auto nuc: nuc_vec) { + assert((nuc >= 0) && (nuc <=3)); + ret += (one << nuc); + } + return ret; +} + +// Convert nuc_id back to IUPAC base +char get_nuc (int8_t nuc_id) { + char ret = 'N'; + //assert ((nuc_id >= 1) && (nuc_id <= 15)); + switch(nuc_id) { + case 1: + ret = 'A'; + break; + case 2: + ret = 'C'; + break; + case 3: + ret = 'M'; + break; + case 4: + ret = 'G'; + break; + case 5: + ret = 'R'; + break; + case 6: + ret = 'S'; + break; + case 7: + ret = 'V'; + break; + case 8: + ret = 'T'; + break; + case 9: + ret = 'W'; + break; + case 10: + ret = 'Y'; + break; + case 11: + ret = 'H'; + break; + case 12: + ret = 'K'; + break; + case 13: + ret = 'D'; + break; + case 14: + ret = 'B'; + break; + default: + ret = 'N'; + break; + } + return ret; +} + +// A:0, C:1, G:2, T:3 +int8_t get_nt (int8_t nuc_id) { + int8_t ret = 0; + switch(nuc_id) { + case 1: + ret = 0; + break; + case 2: + ret = 1; + break; + case 4: + ret = 2; + break; + case 8: + ret = 3; + break; + default: + ret = -1; + break; + } + return ret; +} + +std::vector get_nuc_vec (char c) { + switch (c) { + case 'a': + case 'A': + return std::vector {0}; + case 'c': + case 'C': + return std::vector {1}; + case 'g': + case 'G': + return std::vector {2}; + case 't': + case 'T': + return std::vector {3}; + case 'R': + return std::vector {0,2}; + case 'Y': + return std::vector {1,3}; + case 'S': + return std::vector {1,2}; + case 'W': + return std::vector {0,3}; + case 'K': + return std::vector {2,3}; + case 'M': + return std::vector {0,1}; + case 'B': + return std::vector {1,2,3}; + case 'D': + return std::vector {0,2,3}; + case 'H': + return std::vector {0,1,3}; + case 'V': + return std::vector {0,1,2}; + case 'n': + case 'N': + return std::vector {0,1,2,3}; + default: + return std::vector {0,1,2,3}; + } +} +std::vector get_nuc_vec_from_id (int8_t nuc_id) { + return get_nuc_vec(get_nuc(nuc_id)); +} + +void getNodeDFS(Parsimony::data &data, panmanUtils::Node* node, + std::vector>>> &globalCoords_t, + std::vector>>> &pseudoRoot, + std::vector>>> &sequence, + std::vector< bool > &blockExists, + std::vector< bool > &blockStrand){ + // write nuc mutations + auto mutation_list = data.add_node_mutations(); + + std::vector< std::tuple< int32_t, bool, bool, bool, bool > > blockMutationInfo; + + // Block Mutations + for(auto mutation: node->blockMutation) { + int32_t primaryBlockId = mutation.primaryBlockId; + bool type = mutation.blockMutInfo; + bool inversion = mutation.inversion; + if(type == 1) { + // insertion + bool oldStrand; + bool oldMut; + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = true; + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId] = !inversion; + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, true, !inversion) ); + } else { + bool oldMut; + bool oldStrand; + if(inversion) { + // This means that this is not a deletion, but instead an inversion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockStrand[primaryBlockId] = !oldStrand; + + if(oldMut != true) { + std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + } + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); + } else { + // Actually a deletion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = false; + + // resetting strand to true during deletion + blockStrand[primaryBlockId] = true; + } + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, false, true) ); + + } + } + + // For backtracking. primaryBlockId, secondaryBlockId, pos, gapPos, (oldVal, newVal) in substitution, ('-', newVal) in insertion, (oldVal, '-') in deletion + std::vector< std::tuple< int32_t, int, int, char, char > > mutationInfo; + + for (int i=0; inucMutation.size(); i++) { + int32_t primaryBlockId = node->nucMutation[i].primaryBlockId; + int32_t secondaryBlockId = node->nucMutation[i].secondaryBlockId; + int32_t nucPosition = node->nucMutation[i].nucPosition; + int32_t nucGapPosition = node->nucMutation[i].nucGapPosition; + uint32_t type = (node->nucMutation[i].mutInfo & 0x7); + char newVal = '-'; + + if(type < 3) { + // Either S, I or D + int len = ((node->nucMutation[i].mutInfo) >> 4); + + if(primaryBlockId >= sequence.size()) { + std::cout << primaryBlockId << " " << sequence.size() << std::endl; + } + + if(type == panmanUtils::NucMutationType::NS) { + // Substitution + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].second[nucGapPosition+j]); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].second[nucGapPosition+j])); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } else { + for(int j = 0; j < len; j++) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition+j].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition+j].first); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition+j].first)); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } + } else if(type == panmanUtils::NucMutationType::NI) { + // Insertion + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].second[nucGapPosition+j]); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].second[nucGapPosition+j])); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } else { + for(int j = 0; j < len; j++) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition+j].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition+j].first); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition+j].first)); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } + } else if(type == panmanUtils::NucMutationType::ND) { + // Deletion + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-')); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].second[nucGapPosition+j]); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].second[nucGapPosition+j])); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } else { + for(int j = 0; j < len; j++) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + sequence[primaryBlockId][nucPosition+j].first = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-')); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition+j].first); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition+j].first)); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } + } + } else { + if(type == panmanUtils::NucMutationType::NSNPS) { + // SNP Substitution + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> 20) & 0xF); + if(nucGapPosition != -1) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].second[nucGapPosition]); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].second[nucGapPosition])); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } else { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].first); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].first)); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } else if(type == panmanUtils::NucMutationType::NSNPI) { + // SNP Insertion + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> 20) & 0xF); + if(nucGapPosition != -1) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].second[nucGapPosition]); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].second[nucGapPosition])); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } else { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = newVal; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].first); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].first)); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } else if(type == panmanUtils::NucMutationType::NSNPD) { + // SNP Deletion + if(nucGapPosition != -1) { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].second[nucGapPosition]); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].second[nucGapPosition])); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } else { + auto mut = mutation_list->add_mutation(); + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = '-'; + mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); + + mut->set_position(globalCoords_t[primaryBlockId][nucPosition].first); + mut->set_par_nuc(panmanUtils::getCodeFromNucleotide(oldVal)); + mut->set_ref_nuc(panmanUtils::getCodeFromNucleotide(pseudoRoot[primaryBlockId][nucPosition].first)); + for (auto nuc: get_nuc_vec_from_id(panmanUtils::getCodeFromNucleotide(newVal))) { + mut->add_mut_nuc(nuc); + } + } + } + } + } + + for(panmanUtils::Node* child: node->children) { + getNodeDFS(data, child, globalCoords_t, pseudoRoot, sequence, blockExists, blockStrand); + } + + // Undo block mutations when current node and its subtree have been processed + for(auto it = blockMutationInfo.rbegin(); it != blockMutationInfo.rend(); it++) { + auto mutation = *it; + blockExists[std::get<0>(mutation)] = std::get<1>(mutation); + blockStrand[std::get<0>(mutation)] = std::get<2>(mutation); + + } + + // Undo nuc mutations when current node and its subtree have been processed + for(auto it = mutationInfo.rbegin(); it != mutationInfo.rend(); it++) { + auto mutation = *it; + if(std::get<2>(mutation) != -1) { + sequence[std::get<0>(mutation)][std::get<1>(mutation)].second[std::get<2>(mutation)] = std::get<3>(mutation); + } else { + sequence[std::get<0>(mutation)][std::get<1>(mutation)].first = std::get<3>(mutation); + } + + } +} + +void panmanUtils::panmanToUsher(panmanUtils::Tree* panmanTree, std::string refName, std::string filename,std::string refSeq) { + std::vector>>> globalCoords_t; + std::vector>>> pseudoRoot; + + getCoordMap(panmanTree, globalCoords_t); + getPseudoRoot(panmanTree, pseudoRoot); + + std::vector>>> sequence = pseudoRoot; + + std::vector< bool > blockExists(panmanTree->blocks.size() + 1, false, {}); + std::vector< bool > blockStrand(panmanTree->blocks.size() + 1, true, {}); + + panmanUtils::Node* root = panmanTree->root; + + // Write Usher + Parsimony::data data; + data.set_newick(panmanTree->getNewickString(root)); + + getNodeDFS(data, root, globalCoords_t, pseudoRoot, sequence, blockExists, blockStrand); + + std::ofstream outfile(filename, std::ios::out | std::ios::binary); + boost::iostreams::filtering_streambuf< boost::iostreams::output> outbuf; + + if (filename.find(".gz\0") != std::string::npos) { + try { + outbuf.push(boost::iostreams::gzip_compressor()); + outbuf.push(outfile); + std::ostream outstream(&outbuf); + data.SerializeToOstream(&outstream); + boost::iostreams::close(outbuf); + outfile.close(); + } catch(const boost::iostreams::gzip_error& e) { + std::cout << e.what() << '\n'; + } + } else { + data.SerializeToOstream(&outfile); + outfile.close(); + } + + return; +} \ No newline at end of file diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index f94cd15..75de866 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -155,6 +155,7 @@ void setupOptionDescriptions() { ("index",po::value< bool >(0), "Generating indexes and print sequence (passed as reference) between x:y") ("printRoot", "Print root sequence") ("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") + ("toUsher", "Convert a PanMAT in PanMAN to Usher-MAT") ("low-mem-mode", "Perform Fitch Algrorithm in batch to save memory consumption") ("reference,n", po::value< std::string >(), "Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required)") @@ -1021,6 +1022,43 @@ void printRoot(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofs } +void toUsher(panmanUtils::TreeGroup *TG, po::variables_map &globalVm) { + // Print raw sequences to output file + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } + + panmanUtils::TreeGroup tg = *TG; + + auto fastaStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + panmanUtils::Tree * T = &tg.trees[i]; + std::string fileName; + if(globalVm.count("output-file")) { + fileName = globalVm["output-file"].as< std::string >(); + } else { + std::cout << "Output File not provided" << std::endl; + return; + } + std::string refName; + if(globalVm.count("reference")) { + refName = globalVm["reference"].as< std::string >(); + } else { + std::cout << "Reference not provided" << std::endl; + return; + } + + panmanUtils::panmanToUsher(T, refName, fileName); + + } + + auto fastaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; + std::cout << "\nUsher Conversion time: " << fastaTime.count() << " nanoseconds\n"; + +} + void parseAndExecute(int argc, char* argv[]) { // Setup boost::program_options @@ -1309,6 +1347,9 @@ void parseAndExecute(int argc, char* argv[]) { } else if(globalVm.count("printRoot")) { printRoot(TG, globalVm, outputFile, buf); return; + } else if(globalVm.count("toUser")) { + toUsher(TG, globalVm); + return; } else { char** splitCommandArray; diff --git a/src/panmanUtils.hpp b/src/panmanUtils.hpp index 79ff833..78879e1 100644 --- a/src/panmanUtils.hpp +++ b/src/panmanUtils.hpp @@ -13,6 +13,7 @@ #include #include "panman.capnp.h" +#include "usher.pb.h" #include "panman.hpp" @@ -55,7 +56,7 @@ std::string stripString(std::string s); void stringSplit (std::string const& s, char delim, std::vector& words); - +void panmanToUsher(panmanUtils::Tree* panmanTree, std::string refName, std::string filename, std::string refSeq=""); // Represents input PanGraph information for PanMAT generation @@ -106,7 +107,7 @@ class Pangraph { tbb::concurrent_unordered_map< size_t, std::vector< std::pair< size_t, size_t > > > > > deletions; - Pangraph(Json::Value& pangraphData); + Pangraph(Json::Value& pangraphData, panmanUtils::Node* node=nullptr); std::vector< size_t > getTopologicalSort(); std::unordered_map< std::string,std::vector< int > > getAlignedSequences(const std::vector< size_t >& topoArray); diff --git a/src/rotation.cpp b/src/rotation.cpp index c72f484..43bd017 100644 --- a/src/rotation.cpp +++ b/src/rotation.cpp @@ -58,7 +58,7 @@ std::pair rotate_alignment(const std::vector& consensus, } - cout << max_.first << " " << max_.second << " "; + // cout << max_.first << " " << max_.second << " "; return max_; From e788da3732f8f8369fd1ff51c0d88bc9894657d5 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sat, 14 Dec 2024 03:17:52 -0800 Subject: [PATCH 074/103] snakemake workflow documentation --- docs/construction.md | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/docs/construction.md b/docs/construction.md index dac26d5..7ec48fb 100644 --- a/docs/construction.md +++ b/docs/construction.md @@ -47,19 +47,17 @@ cd $PANMAN_HOME/build ``` The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. -### Building PanMAN from raw genome sequences -We provide scripts to construct panmanUtils inputs (PanGraph/GFA/MSA and Newick) from raw sequences (FASTA format), followed by building a panman. +### Building PanMAN from raw genome sequences (Snakemake Workflow) +We provide a Snakemake workflow to construct PanMANs from raw sequences (FASTA format). !!!Note - This script uses various tools such as PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively. The script is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)). + The Snakemake workflow uses various tools such as PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively and it is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)). -**Step 1:** Check if the `sars_20.fa` file exists in `test` directory. Alternatively, users can provide custom raw sequences (FASTA format) to build a panman. - -**Step 2:** Run the following command to construct a panman from raw sequences. +**Step 1:** Run the following command to construct a panman from raw sequences. ```bash -cd $PANMAN_HOME/scripts -chmod +x build_panman.sh -./build_panman.sh pangraph/gfa/msa +cd $PANMAN_HOME/workflows +conda activate snakemake +snakemake --use-conda --cores [num threads] --config RUNTYPE="[pangraph/gfa/msa]" FASTA="[user_fasta]" SEQ_COUNT=[haplotype_count] ``` \ No newline at end of file From 86f3fa429420b49e4f07be7def1d274bd9445b9a Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Fri, 27 Dec 2024 10:04:38 -0800 Subject: [PATCH 075/103] updated summary --- src/chaining.cpp | 73 +++++++++++++++- src/fitchSankoff.cpp | 2 +- src/panman.cpp | 102 +++++++++++++--------- src/panman.hpp | 7 +- src/panmanUtils.cpp | 48 ++++++++--- src/summary.cpp | 198 ++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 369 insertions(+), 61 deletions(-) diff --git a/src/chaining.cpp b/src/chaining.cpp index 5e32a04..604d5b0 100644 --- a/src/chaining.cpp +++ b/src/chaining.cpp @@ -12,6 +12,8 @@ #include #include +#include "panman.hpp" + std::pair origin (-1,-1); std::pair base (0,0); @@ -19,7 +21,6 @@ std::pair base (0,0); using namespace std; - struct hashPair { template size_t operator()(const pair& p) const { @@ -151,7 +152,7 @@ void find_chain(NodeRangeTree* root, std::pair point, std::unordered_ma std::vector> chaining (std::vector &consensus, std::vector &sample) { std::vector> chain; - int K = 500; + int K = 4000; std::vector> points; // std::cout << "Finding seeds sequencial "; @@ -307,3 +308,71 @@ void chain_align ( } } + +void buildConsensusTree ( + std::vector> &chain, + std::vector &consensus, + std::vector &sample, + std::vector &consensus_new +) { + + int prev_consensus_coord = -1; + int prev_sample_coord = -1; + for (vector>::reverse_iterator i = chain.rbegin(); i != chain.rend(); ++i ) { + int consensus_coord = i->first; + int sample_coord = i->second; + + for (auto j = prev_consensus_coord + 1; j < consensus_coord; ++j) { + consensus_new.push_back(consensus[j]); + } + for (auto j = prev_sample_coord + 1; j < sample_coord; ++j) { + consensus_new.push_back(sample[j]); + } + consensus_new.push_back(consensus[consensus_coord]); + prev_consensus_coord = consensus_coord; + prev_sample_coord = sample_coord; + } + + for (auto j = prev_consensus_coord + 1; j < (int)consensus.size(); ++j) { + consensus_new.push_back(consensus[j]); + } + + for (auto j = prev_sample_coord + 1; j < (int)sample.size(); ++j) { + consensus_new.push_back(sample[j]); + } +} + +std::vector dfs( + panmanUtils::Node* node, + std::unordered_map< std::string, std::vector< std::string > >& paths) { + std::vector consensus; + if (node->children.size() == 0){ + std::vector path = paths[node->identifier]; + std::vector nodeConsensus(path.size()); + for (int i=0; i< path.size(); i++) { + nodeConsensus[i] = path[i]; + } + return nodeConsensus; + } + + for (auto &n: node->children) { + std::vector childConsensus = dfs(n, paths); + std::vector consensus_new; + std::vector> chain = chaining(consensus, childConsensus); + buildConsensusTree (chain, consensus, childConsensus, consensus_new); + std::cout << node->identifier << " consensus size: " << consensus_new.size() << "(" << childConsensus.size() << "," << consensus.size() << ")" << std::endl; + consensus.resize(consensus_new.size()); + for (int i=0; i >& paths) { + std::vector consensus = dfs(node, paths); + std::cout << "Size of conensus using tree structure: " << consensus.size() << std::endl; + return; +} diff --git a/src/fitchSankoff.cpp b/src/fitchSankoff.cpp index 61afc4a..b7c8629 100644 --- a/src/fitchSankoff.cpp +++ b/src/fitchSankoff.cpp @@ -30,7 +30,7 @@ int panmanUtils::Tree::nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states, int refState) { if(node->children.size() == 0) { if(states.find(node->identifier) == states.end()) { - std::cerr << "Node ID " << node->identifier << " not found" << std::endl; + // std::cerr << "Node ID " << node->identifier << " " << node->identifier.size() << " not found" << std::endl; return states[node->identifier] = 0; } return states[node->identifier]; diff --git a/src/panman.cpp b/src/panman.cpp index 4f4e956..7bfa0fc 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -18,6 +18,8 @@ #include #include #include +#include + #include "chaining.cpp" #include "rotation.cpp" @@ -32,7 +34,6 @@ #include "reroot.cpp" #include "aaTrans.cpp" #include "panman2usher.cpp" - #include "panmanUtils.hpp" char panmanUtils::getNucleotideFromCode(int code) { @@ -261,7 +262,6 @@ void panmanUtils::stringSplit (std::string const& s, char delim, std::vector leaves; @@ -272,7 +272,7 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new std::vector s1; stringSplit(newickString, ',', s1); - + numOpen.reserve(s1.size()); numClose.reserve(s1.size()); @@ -317,9 +317,11 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new } } } + leaves.push_back(std::move(leaf)); numOpen.push_back(no); numClose.push_back(nc); + // float len = (branch.size() > 0) ? std::stof(branch) : -1.0; float len = (branch.size() > 0) ? std::stof(branch) : 1.0; branchLen[level].push(len); @@ -340,35 +342,45 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new m_numLeaves = leaves.size(); std::stack parentStack; + // std::cout << "branchLen " << branchLen[level].size() << " " <identifier << '\t' << newNode->branchLength << '\n'; + branchLen[level].pop(); + level++; - for (size_t i=0; iidentifier << '\t' << newNode->branchLength << '\n'; - branchLen[level].pop(); - level++; - - allNodes[nid] = newNode; - parentStack.push(newNode); - } - Node* leafNode = new Node(leaf, parentStack.top(), branchLen[level].front()); - allNodes[leaf] = leafNode; + Node* leafNode = new Node(leaf, parentStack.top(), branchLen[level].front()); + allNodes[leaf] = leafNode; - branchLen[level].pop(); - for (size_t j=0; jidentifier << std::endl; std::map< std::pair, std::vector< uint32_t > > blockIdToConsensusSeq; @@ -5601,7 +5612,7 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg) { } panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg, std::ifstream& mutationFile) { - + // std::cout << "I am here" << std::endl; for (auto& t: tg) { trees.push_back(*t); } @@ -5610,7 +5621,10 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg, std::ifstream& mutat std::string line; while(getline(mutationFile, line, '\n')) { std::vector< std::string > tokens; - stringSplit(line, ' ', tokens); + stringSplit(line, '\t', tokens); + for (auto a: tokens) { + std::cout << a << std::endl; + } char mutationType = tokens[0][0]; size_t treeIndex1 = std::stoll(tokens[1]); std::string sequenceId1 = tokens[2]; @@ -5624,6 +5638,8 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg, std::ifstream& mutat std::string sequenceId3 = tokens[10]; bool splitOccurred = false; + std::cout << sequenceId1 << ", " << sequenceId2 << ": " << sequenceId3 << std::endl; + if(treeIndex3 == treeIndex1 && treeIndex3 == treeIndex2) { // If all three sequences are from the same tree, split this tree std::cout << "Performing Split" << std::endl; @@ -5679,10 +5695,16 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg, std::ifstream& mutat panmanUtils::TreeGroup::TreeGroup(std::vector< std::ifstream >& treeFiles, std::ifstream& mutationFile) { for(size_t i = 0; i < treeFiles.size(); i++) { boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; - inPMATBuffer.push(boost::iostreams::gzip_decompressor()); + + inPMATBuffer.push(boost::iostreams::lzma_decompressor()); inPMATBuffer.push(treeFiles[i]); std::istream inputStream(&inPMATBuffer); + // boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; + // inPMATBuffer.push(boost::iostreams::gzip_decompressor()); + // inPMATBuffer.push(treeFiles[i]); + // std::istream inputStream(&inPMATBuffer); + trees.emplace_back(inputStream); } @@ -5762,12 +5784,14 @@ panmanUtils::TreeGroup::TreeGroup(std::istream& fin) { panman::TreeGroup::Reader TG = messageReader.getRoot(); - + int count=0; for (auto treeFromTG: TG.getTrees()){ + // std::cout << "Tree " << count++ << ".." << std::endl; trees.emplace_back(treeFromTG); } - + count=0; for (auto compMutFromTG: TG.getComplexMutations()){ + // std::cout << "Complex Mutation " << count++ << ".." << std::endl; complexMutations.emplace_back(compMutFromTG); } } @@ -5787,7 +5811,7 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { // std::cout << "Writing Trees..." << std::endl; for(auto& tree: trees) { - // std::cout << "Tree Count:" << treesCount << "..." << std::endl; + std::cout << "Tree Count:" << treesCount << "..." << std::endl; panman::Tree::Builder treeToWrite = treestoWriteBuilder[treesCount++]; Node* node = tree.root; @@ -5897,8 +5921,10 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { size_t cmplxMutCount=0; // std::cout << "Writing Complex Mutations..." << std::endl; for(auto cm: complexMutations) { - // std::cout << "Cmplx mutation Count:" << cmplxMutCount << "..." << std::endl; - complexMutBuilder[cmplxMutCount++] = cm.toCapnProto(); + panman::ComplexMutation::Builder cmBuilder = complexMutBuilder[cmplxMutCount++]; + cm.toCapnProto(cmBuilder); + + } // ToDo check if the write was successful diff --git a/src/panman.hpp b/src/panman.hpp index 6cc73e2..2449d61 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -299,6 +299,8 @@ class Tree { // Get the total number of mutations of given type int getTotalParsimonyParallel(NucMutationType nucMutType, BlockMutationType blockMutType = NONE); + + void getBlockMutationsParallel(); // Run tree traversal to extract mutations in range panmanUtils::Node* extractPanMATSegmentHelper(panmanUtils::Node* node, @@ -635,8 +637,7 @@ struct ComplexMutation { nucGapPositionEnd2 = (cm.getNucGapExistEnd2()? (cm.getNucGapPositionEnd2()) : -1); } - panman::ComplexMutation::Builder toCapnProto() { - panman::ComplexMutation::Builder cm(nullptr); + void toCapnProto(panman::ComplexMutation::Builder& cm) { cm.setMutationType(mutationType == 'H'); cm.setTreeIndex1(treeIndex1); cm.setTreeIndex2(treeIndex2); @@ -701,7 +702,7 @@ struct ComplexMutation { cm.setNucGapPositionEnd2(nucGapPositionEnd2); } - return cm; + // return cm; } }; diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 75de866..c0c1e33 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -134,6 +134,7 @@ void setupOptionDescriptions() { ("input-gfa,G", po::value< std::string >(), "Input GFA file to build a PanMAN") ("input-msa,M", po::value< std::string >(), "Input MSA file (FASTA format) to build a PanMAN") ("input-newick,N", po::value< std::string >(), "Input tree topology as Newick string") + ("create-network,K",po::value< std::vector>(), "Create PanMAN with network of trees from single or multiple PanMAN files") // ("optimize", "currently UNSUPPORTED: whether given msa file should be optimized or not") @@ -149,7 +150,6 @@ void setupOptionDescriptions() { ("reroot,r", "Reroot a PanMAT in a PanMAN based on the input sequence id (--reference)") ("aa-translation,v", "Extract amino acid translations in tsv file") ("extended-newick,e", "Print PanMAN's network in extended-newick format") - ("create-network,k", "Create PanMAN with network of trees from single or multiple PanMAN files") ("printMutations,p", "Create PanMAN with network of trees from single or multiple PanMAN files") ("acr,q", "ACR method [fitch(default), mppa]") ("index",po::value< bool >(0), "Generating indexes and print sequence (passed as reference) between x:y") @@ -815,7 +815,7 @@ void aa(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream & if(globalVm.count("output-file")) outputFile.close(); } -void createNet(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { +void createNet(po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { // Create PanMAN from list of PanMAT files and a complex mutation file listing the complex // mutations relating these PanMATs @@ -827,19 +827,33 @@ void createNet(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofs return; } - fileNames = globalVm["tree-group"].as< std::vector< std::string > >(); + fileNames = globalVm["create-network"].as< std::vector< std::string > >(); mutationFileName = globalVm["input-file"].as< std::string >(); std::ifstream mutationFile(mutationFileName); std::vector< std::ifstream > files; for(auto u: fileNames) { + std::cout << u << std::endl; files.emplace_back(u); } auto treeBuiltStart = std::chrono::high_resolution_clock::now(); - TG = new panmanUtils::TreeGroup(files, mutationFile); + // Currently handle only one file + boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; + inPMATBuffer.push(boost::iostreams::lzma_decompressor()); + inPMATBuffer.push(files[0]); + std::istream inputStream(&inPMATBuffer); + panmanUtils::TreeGroup* TG = new panmanUtils::TreeGroup(inputStream); + + + std::vector< panmanUtils::Tree* > tg; + for (int i=0; itrees.size(); i++) { + tg.push_back(&TG->trees[i]); + } + + panmanUtils::TreeGroup* TG_new = new panmanUtils::TreeGroup(tg, mutationFile); auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; @@ -847,8 +861,11 @@ void createNet(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofs mutationFile.close(); for(auto& u: files) { - u.close(); + u.close(); } + + writePanMAN(globalVm,TG_new); + } void printMut(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { @@ -1282,6 +1299,12 @@ void parseAndExecute(int argc, char* argv[]) { writePanMAN(globalVm, TG); + } else if (globalVm.count("create-network")) { + std::cout << "Entering here" << std::endl; + std::ofstream outputFile; + std::streambuf * buf; + createNet(globalVm, outputFile, buf); + return; } else { panmanUtils::printError("Incorrect Format"); std::cout << globalDesc; @@ -1332,9 +1355,6 @@ void parseAndExecute(int argc, char* argv[]) { } else if (globalVm.count("aa-mutations")) { aa(TG, globalVm, outputFile, buf); return; - } else if(globalVm.count("create-network")) { - createNet(TG, globalVm, outputFile, buf); - return; } else if(globalVm.count("printMutations")) { printMut(TG, globalVm, outputFile, buf); return; @@ -1460,12 +1480,12 @@ void parseAndExecute(int argc, char* argv[]) { .run(), aaVm); aa(TG, aaVm, outputFile, buf); - } else if(strcmp(splitCommandArray[0], "create-network") == 0) { - po::variables_map createNetVm; - po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) - .options(createNetDesc) - .run(), createNetVm); - createNet(TG, createNetVm, outputFile, buf); + // } else if(strcmp(splitCommandArray[0], "create-network") == 0) { + // po::variables_map createNetVm; + // po::store(po::command_line_parser((int)splitCommand.size(), splitCommandArray) + // .options(createNetDesc) + // .run(), createNetVm); + // createNet(TG, createNetVm, outputFile, buf); } else if(strcmp(splitCommandArray[0], "printMutations") == 0) { po::variables_map printMutVm; diff --git a/src/summary.cpp b/src/summary.cpp index 0ebb5be..d057237 100644 --- a/src/summary.cpp +++ b/src/summary.cpp @@ -64,6 +64,196 @@ int panmanUtils::Tree::getTotalParsimonyParallel(NucMutationType nucMutType, Blo } +std::tuple getBlockMutationsParallelHelper(panmanUtils::Node* root) { + std::tuple muts(0,0,0); + + std::get<0>(muts) += tbb::parallel_reduce(tbb::blocked_range(0, root->blockMutation.size()), 0, [&](tbb::blocked_range r, int init) -> int{ + for(int i = r.begin(); i != r.end(); i++) { + if(root->blockMutation[i].blockMutInfo == panmanUtils::BlockMutationType::BI) { + init++; + } + } + return init; + }, [&](int x, int y) { + return x + y; + }); + + std::get<1>(muts) += tbb::parallel_reduce(tbb::blocked_range(0, root->blockMutation.size()), 0, [&](tbb::blocked_range r, int init) -> int{ + for(int i = r.begin(); i != r.end(); i++) { + if(root->blockMutation[i].inversion == false && root->blockMutation[i].blockMutInfo == panmanUtils::BlockMutationType::BD) { + init++; + } + } + return init; + }, [&](int x, int y) { + return x + y; + }); + + std::get<2>(muts) += tbb::parallel_reduce(tbb::blocked_range(0, root->blockMutation.size()), 0, [&](tbb::blocked_range r, int init) -> int{ + for(int i = r.begin(); i != r.end(); i++) { + if(root->blockMutation[i].inversion == true && root->blockMutation[i].blockMutInfo == panmanUtils::BlockMutationType::BD) { + init++; + } + } + return init; + }, [&](int x, int y) { + return x + y; + }); + + + for (auto i=0; i< root->children.size(); i++) { + std::tuple child_muts = getBlockMutationsParallelHelper(root->children[i]); + std::get<0>(muts) += std::get<0>(child_muts); + std::get<1>(muts) += std::get<1>(child_muts); + std::get<2>(muts) += std::get<2>(child_muts); + } + + return muts; +} + +std::tuple getOtherBlockMutationsParallelHelper( + panmanUtils::Node* root, + std::vector< bool >& blockExists, + std::vector< bool >& blockStrand, + std::vector>& dups, + std::vector& dupsPos) { + + std::tuple muts(0,0); + std::vector< bool > blockExistsParent = blockExists; + // For reversing block mutations - primary block id, secondary block id, old mutation, old strand, new mutation, new strand + std::vector< std::tuple< int32_t, bool, bool, bool, bool > > blockMutationInfo; + + // Block Mutations + for(auto mutation: root->blockMutation) { + int32_t primaryBlockId = mutation.primaryBlockId; + bool type = mutation.blockMutInfo; + bool inversion = mutation.inversion; + + // std::vector copies = map_[blocks[]] + + if(type == 1) { + // insertion + bool oldStrand; + bool oldMut; + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = true; + + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId] = !inversion; + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, true, !inversion) ); + + } else { + bool oldMut; + bool oldStrand; + if(inversion) { + // This means that this is not a deletion, but instead an inversion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockStrand[primaryBlockId] = !oldStrand; + + if(oldMut != true) { + std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + } + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); + } else { + // Actually a deletion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = false; + + // resetting strand to true during deletion + blockStrand[primaryBlockId] = true; + } + blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, false, true) ); + } + } + + // std::cout << (blockExists == blockExistsParent) << std::endl; + + for(auto mutation: root->blockMutation) { + int32_t primaryBlockId = mutation.primaryBlockId; + bool type = mutation.blockMutInfo; + + if (type==1) { + std::vector localDups = dups[dupsPos[primaryBlockId]]; + for (auto d: localDups){ + if (d!=primaryBlockId & blockExists[d] & blockExistsParent[d]) { + std::get<0>(muts) += 1; + break; + } + + if (d!=primaryBlockId & !blockExists[d] & blockExistsParent[d]) { + std::get<1>(muts) += 1; + break; + } + } + } + } + + for(panmanUtils::Node* child: root->children) { + std::tuple mutsChild = getOtherBlockMutationsParallelHelper(child, blockExists, blockStrand, dups, dupsPos); + std::get<0>(muts) += std::get<0>(mutsChild); + std::get<1>(muts) += std::get<1>(mutsChild); + } + + // Undo block mutations when current node and its subtree have been processed + for(auto it = blockMutationInfo.rbegin(); it != blockMutationInfo.rend(); it++) { + auto mutation = *it; + blockExists[std::get<0>(mutation)] = std::get<1>(mutation); + blockStrand[std::get<0>(mutation)] = std::get<2>(mutation); + } + + return muts; +} + +struct VectorHash { + std::size_t operator()(const std::vector& vec) const { + std::size_t hash = 0; + for (uint32_t num : vec) { + hash ^= std::hash()(num) + 0x9e3779b9 + (hash << 6) + (hash >> 2); + } + return hash; + } +}; + +void panmanUtils::Tree::getBlockMutationsParallel() { + //insertions, deletions, inversions + std::tuple muts = getBlockMutationsParallelHelper(root); + std::cout << "Total Block Insertoins: " << std::get<0>(muts) << std::endl; + std::cout << "Total Block Deletions: " << std::get<1>(muts) << std::endl; + std::cout << "Total Block Inversion: " << std::get<2>(muts) << std::endl; + + // get duplicate blocks mapping (consensus to blockIDs) + std::unordered_map, std::vector, VectorHash> map_; + for (auto i=0; i consensus = blocks[i].consensusSeq; + map_[consensus].push_back(blocks[i].primaryBlockId); + } + + std::unordered_map, int, VectorHash> mapIndex; + std::vector> dups(map_.size()); + std::vector dupsPos(blocks.size()); + + int index = 0; + for(auto &a: map_){ + mapIndex[a.first] = index; + for (auto &b: a.second){ + dupsPos[b] = index; + } + dups[index] = a.second; + index++; + } + + // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. + std::vector< bool > blockExists(blocks.size(), false); + std::vector< bool > blockStrand(blocks.size(), true); + + std::tuple otherMuts = getOtherBlockMutationsParallelHelper(root, blockExists, blockStrand, dups, dupsPos); + std::cout << "Total Block Duplications: " << std::get<0>(otherMuts) << std::endl; + std::cout << "Total Block Translocation: " << std::get<1>(otherMuts) << std::endl; +} + void panmanUtils::Tree::printSummary(std::ostream &out) { out << "Total Nodes in Tree: " << m_currInternalNode + m_numLeaves << std::endl; @@ -72,10 +262,12 @@ void panmanUtils::Tree::printSummary(std::ostream &out) { out << "Total Insertions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::NI, panmanUtils::BlockMutationType::BI) << std::endl; out << "Total Deletions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::ND, panmanUtils::BlockMutationType::BD) << std::endl; out << "Total Inversions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::NNONE, panmanUtils::BlockMutationType::BIn) << std::endl; - out << "Total SNP Substitutions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::NSNPS) << std::endl; - out << "Total SNP Insertions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::NSNPI) << std::endl; - out << "Total SNP Deletions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::NSNPD) << std::endl; + // out << "Total SNP Substitutions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::NSNPS) << std::endl; + // out << "Total SNP Insertions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::NSNPI) << std::endl; + // out << "Total SNP Deletions: " << getTotalParsimonyParallel(panmanUtils::NucMutationType::NSNPD) << std::endl; out << "Max Tree Depth: " << m_maxDepth << std::endl; out << "Mean Tree Depth: " << m_meanDepth << std::endl; + getBlockMutationsParallel(); + } From 73a4fce6e3a690adf5f8b99b7bb11ccaac98dfec Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sat, 28 Dec 2024 05:23:29 -0800 Subject: [PATCH 076/103] usher proto --- usher.proto | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 usher.proto diff --git a/usher.proto b/usher.proto new file mode 100644 index 0000000..3c7532a --- /dev/null +++ b/usher.proto @@ -0,0 +1,31 @@ +syntax = "proto3"; +package Parsimony; + +message mut { + int32 position = 1; // Position in the chromosome + /* All nucleotides are encoded as integers (0:A, 1:C, 2:G, 3:T) */ + int32 ref_nuc = 2; // Reference nucleotide at this position + int32 par_nuc = 3; // Nucleotide of parent at this position + repeated int32 mut_nuc = 4; // Mutated nucleotide in this node at this position + string chromosome = 5; // Chromosome string. Currently unused. +} + +message mutation_list { + repeated mut mutation = 1; +} + +message condensed_node { + string node_name = 1; // The node name as given in the newick tree + repeated string condensed_leaves = 2; // A list of strings for the names of identical sequences all of which are represented by the node above +} + +message node_metadata { + repeated string clade_annotations = 1; +} + +message data { + string newick = 1; // Newick tree string. May contain distances, but note that these may be distinct from distances as calculated with UShER + repeated mutation_list node_mutations = 2; // Mutations_list object for each node of this tree, in the order that nodes are encountered in a preorder traversal of the tree in the newick string + repeated condensed_node condensed_nodes = 3; // A dictionary-like object mapping names in the newick tree to a larger set of identical nodes that have been collapsed into this single node + repeated node_metadata metadata = 4; // Clade annotations on a per-node basis, in the order that nodes are encountered in a preorder traversal of the tree +} \ No newline at end of file From a463256ffd148279f635f4296864723657b8ffb1 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sat, 28 Dec 2024 05:24:58 -0800 Subject: [PATCH 077/103] usher proto --- CMakeLists.txt | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 558bee0..941fdd5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,23 +1,24 @@ cmake_minimum_required (VERSION 3.8) -project(panmanUtilsNew) +project(panmanUtils) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options") set(CMAKE_INCLUDE_CURRENT_DIR ON) -# if(DEFINED Protobuf_PATH) -# find_package(Protobuf REQUIRED HINTS ${Protobuf_PATH}) -# else() -# find_package(Protobuf REQUIRED) -# endif() -# Print version messages -# if(Protobuf_FOUND) -# message(STATUS "Using Protocol Buffers ${Protobuf_VERSION}") -# endif() -# include_directories(${Protobuf_INCLUDE_DIRS}) +# add google proto +if(DEFINED Protobuf_PATH) + find_package(Protobuf REQUIRED HINTS ${Protobuf_PATH}) +else() + find_package(Protobuf REQUIRED) +endif() +if(Protobuf_FOUND) + message(STATUS "Using Protocol Buffers ${Protobuf_VERSION}") +endif() +include_directories(${Protobuf_INCLUDE_DIRS}) + -# Adding capn proto +# add capn proto find_program(CAPNP_EXECUTABLE capnp REQUIRED) find_program(CAPNPC_CXX_EXECUTABLE capnpc-c++ REQUIRED) @@ -46,29 +47,40 @@ find_package(jsoncpp CONFIG REQUIRED) file(GLOB PANMAT_SRCS "src/panmanUtils.cpp" "src/panman.cpp" "src/panmanUtils.hpp" "src/panman.hpp") if(DEFINED CapnProto_PATH) - add_executable(panmanUtilsNew + add_executable(panmanUtils ${PANMAT_SRCS} ) capnp_generate( LANGUAGE cpp - TARGET panmanUtilsNew + TARGET panmanUtils PROTOS panman.capnp) + + protouf_generate( + LANGUAGE cpp + TARGET panmanUtils + PROTOS usher.proto) else() capnp_generate_cpp( CAPNP_SRCS CAPNP_HDRS panman.capnp) - add_executable(panmanUtilsNew + protobuf_generate_cpp( + PROTO_SRCS PROTO_HDRS + usher.proto) + + add_executable(panmanUtils ${PANMAT_SRCS} ${CAPNP_SRCS} ${CAPNP_HDRS} + ${PROTO_SRCS} + ${PROTO_HDRS} ) endif() -TARGET_COMPILE_OPTIONS(panmanUtilsNew PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) +TARGET_COMPILE_OPTIONS(panmanUtils PRIVATE -DTBB_SUPPRESS_DEPRECATED_MESSAGES) -TARGET_LINK_LIBRARIES(panmanUtilsNew PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) -target_include_directories(panmanUtilsNew PUBLIC "${PROJECT_BINARY_DIR}") +TARGET_LINK_LIBRARIES(panmanUtils PRIVATE stdc++ JsonCpp::JsonCpp ${Boost_LIBRARIES} ${TBB_IMPORTED_TARGETS} ${CAPNP_LIBRARIES} ${Protobuf_LIBRARIES}) #${Protobuf_LIBRARIES} ${Boost_LIBRARIES} ) # OpenMP::OpenMP_CXX) +target_include_directories(panmanUtils PUBLIC "${PROJECT_BINARY_DIR}") From 912d480a62202855a9ea52880af6c5e605d8110b Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 29 Dec 2024 09:35:01 -0800 Subject: [PATCH 078/103] ultra fast fasta write --- src/fasta.cpp | 1118 ++++++++++++++++++++++++++++++++++--------- src/panman.cpp | 21 +- src/panman.hpp | 27 +- src/panmanUtils.cpp | 53 +- src/panmanUtils.hpp | 8 +- src/vcf.cpp | 180 ++++++- 6 files changed, 1151 insertions(+), 256 deletions(-) diff --git a/src/fasta.cpp b/src/fasta.cpp index fcb9b54..ae3f5fc 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -152,9 +152,10 @@ void panmanUtils::printSequenceLines(const sequence_t& sequence,\ } -void panmanUtils::printSequenceLinesNew(const std::vector>>>& sequence, +std::string panmanUtils::printSequenceLinesNew(const std::vector>>>& sequence, + std::unordered_map& blockLengths, const std::vector& blockExists, - const std::vector& blockStrand, size_t lineSize, bool aligned, std::ostream& fout, int offset, bool debug) { + const std::vector& blockStrand, size_t lineSize, bool aligned, int offset, bool debug) { // String that stores the sequence to be printed std::string line; @@ -162,6 +163,7 @@ void panmanUtils::printSequenceLinesNew(const std::vector>>>& sequence, - std::vector& blockExists, - std::vector& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { +// void panmanUtils::Tree::printFASTAHelperNew(panmanUtils::Node* root, +// std::vector>>>& sequence, +// std::vector& blockExists, +// std::vector& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { - // For reversing block mutations - primary block id, secondary block id, old mutation, old strand, new mutation, new strand - std::vector< std::tuple< int32_t, bool, bool, bool, bool > > blockMutationInfo; +// // For reversing block mutations - primary block id, secondary block id, old mutation, old strand, new mutation, new strand +// std::vector< std::tuple< int32_t, bool, bool, bool, bool > > blockMutationInfo; + +// // Block Mutations +// for(auto mutation: root->blockMutation) { +// int32_t primaryBlockId = mutation.primaryBlockId; +// bool type = mutation.blockMutInfo; +// bool inversion = mutation.inversion; + +// if(type == 1) { +// // insertion + +// bool oldStrand; +// bool oldMut; +// oldStrand = blockStrand[primaryBlockId]; +// oldMut = blockExists[primaryBlockId]; +// blockExists[primaryBlockId] = true; + +// // if insertion of inverted block takes place, the strand is backwards +// blockStrand[primaryBlockId] = !inversion; +// blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, true, !inversion) ); + + +// } else { +// bool oldMut; +// bool oldStrand; +// if(inversion) { +// // This means that this is not a deletion, but instead an inversion +// oldStrand = blockStrand[primaryBlockId]; +// oldMut = blockExists[primaryBlockId]; +// blockStrand[primaryBlockId] = !oldStrand; + +// if(oldMut != true) { +// std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; +// } +// blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); +// } else { +// // Actually a deletion +// oldStrand = blockStrand[primaryBlockId]; +// oldMut = blockExists[primaryBlockId]; +// blockExists[primaryBlockId] = false; - // Block Mutations - for(auto mutation: root->blockMutation) { - int32_t primaryBlockId = mutation.primaryBlockId; - bool type = mutation.blockMutInfo; - bool inversion = mutation.inversion; +// // resetting strand to true during deletion +// blockStrand[primaryBlockId] = true; +// } +// blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, false, true) ); - if(type == 1) { - // insertion +// } - bool oldStrand; - bool oldMut; - oldStrand = blockStrand[primaryBlockId]; - oldMut = blockExists[primaryBlockId]; - blockExists[primaryBlockId] = true; +// // } - // if insertion of inverted block takes place, the strand is backwards - blockStrand[primaryBlockId] = !inversion; - blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, true, !inversion) ); + +// } + +// // For backtracking. primaryBlockId, secondaryBlockId, pos, gapPos, (oldVal, newVal) in substitution, ('-', newVal) in insertion, (oldVal, '-') in deletion +// std::vector< std::tuple< int32_t, int, int, char, char > > mutationInfo; + +// // Nuc mutations +// for(size_t i = 0; i < root->nucMutation.size(); i++) { +// int32_t primaryBlockId = root->nucMutation[i].primaryBlockId; +// int32_t secondaryBlockId = root->nucMutation[i].secondaryBlockId; + +// // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { +// int32_t nucPosition = root->nucMutation[i].nucPosition; +// int32_t nucGapPosition = root->nucMutation[i].nucGapPosition; +// uint32_t type = (root->nucMutation[i].mutInfo & 0x7); +// char newVal = '-'; + +// if(type < 3) { +// // Either S, I or D +// int len = ((root->nucMutation[i].mutInfo) >> 4); + +// if(primaryBlockId >= sequence.size()) { +// std::cout << primaryBlockId << " " << sequence.size() << std::endl; +// } + +// if(type == panmanUtils::NucMutationType::NS) { +// // Substitution +// if(nucGapPosition != -1) { +// for(int j = 0; j < len; j++) { +// char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; +// newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); +// sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); +// } +// } else { +// for(int j = 0; j < len; j++) { +// char oldVal = sequence[primaryBlockId][nucPosition+j].first; +// newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); +// sequence[primaryBlockId][nucPosition+j].first = newVal; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); +// } +// } +// } else if(type == panmanUtils::NucMutationType::NI) { +// // Insertion +// if(nucGapPosition != -1) { +// for(int j = 0; j < len; j++) { +// char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; +// newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); +// sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); +// } +// } else { +// for(int j = 0; j < len; j++) { +// char oldVal = sequence[primaryBlockId][nucPosition+j].first; +// newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); +// sequence[primaryBlockId][nucPosition+j].first = newVal; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); +// } +// } +// } else if(type == panmanUtils::NucMutationType::ND) { +// // Deletion +// if(nucGapPosition != -1) { +// for(int j = 0; j < len; j++) { +// char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; +// sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-')); +// } +// } else { +// for(int j = 0; j < len; j++) { +// char oldVal = sequence[primaryBlockId][nucPosition+j].first; +// sequence[primaryBlockId][nucPosition+j].first = '-'; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-')); +// } +// } +// } +// } else { +// if(type == panmanUtils::NucMutationType::NSNPS) { +// // SNP Substitution +// newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); +// if(nucGapPosition != -1) { +// char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; +// sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); +// } else { +// char oldVal = sequence[primaryBlockId][nucPosition].first; +// sequence[primaryBlockId][nucPosition].first = newVal; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); +// } +// } else if(type == panmanUtils::NucMutationType::NSNPI) { +// // SNP Insertion +// newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); +// if(nucGapPosition != -1) { +// char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; +// sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); +// } else { +// char oldVal = sequence[primaryBlockId][nucPosition].first; +// sequence[primaryBlockId][nucPosition].first = newVal; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); +// } +// } else if(type == panmanUtils::NucMutationType::NSNPD) { +// // SNP Deletion +// if(nucGapPosition != -1) { +// char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; +// sequence[primaryBlockId][nucPosition].second[nucGapPosition] = '-'; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); +// } else { +// char oldVal = sequence[primaryBlockId][nucPosition].first; +// sequence[primaryBlockId][nucPosition].first = '-'; +// mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); +// } +// } +// } +// } +// // } + +// if(root->children.size() == 0 || rootSeq) { +// // Print sequence + +// fout << '>' << root->identifier << std::endl; + +// int offset = 0; +// if(!aligned && circularSequences.find(root->identifier) != circularSequences.end()) { +// // If MSA is to be printed, offset doesn't matter +// offset = circularSequences[root->identifier]; +// } +// std::vector< std::vector< std::pair< char, std::vector< char > > > > sequencePrint = sequence; +// std::vector< bool > blockExistsPrint = blockExists; +// std::vector< bool > blockStrandPrint = blockStrand; + +// if(rotationIndexes.find(root->identifier) != rotationIndexes.end() && rotationIndexes[root->identifier] != 0) { +// int ctr = -1, rotInd = 0; +// for(size_t i = 0; i < blockExistsPrint.size(); i++) { +// if(blockExistsPrint[i]) { +// ctr++; +// } +// if(ctr == rotationIndexes[root->identifier]) { +// rotInd = i; +// break; +// } +// } +// // std::cout << "rotating" << std::endl; +// rotate(sequencePrint.begin(), sequencePrint.begin() + rotInd, sequencePrint.end()); +// rotate(blockExistsPrint.begin(), blockExistsPrint.begin() + rotInd, blockExistsPrint.end()); +// rotate(blockStrandPrint.begin(), blockStrandPrint.begin() + rotInd, blockStrandPrint.end()); +// } + +// if(sequenceInverted.find(root->identifier) != sequenceInverted.end() && sequenceInverted[root->identifier]) { +// // std::cout << "inverting" << std::endl; +// reverse(sequencePrint.begin(), sequencePrint.end()); +// reverse(blockExistsPrint.begin(), blockExistsPrint.end()); +// reverse(blockStrandPrint.begin(), blockStrandPrint.end()); +// } + +// panmanUtils::printSequenceLinesNew(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); +// } else { +// // DFS on children +// for(panmanUtils::Node* child: root->children) { +// printFASTAHelperNew(child, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); - } else { - bool oldMut; - bool oldStrand; - if(inversion) { - // This means that this is not a deletion, but instead an inversion - oldStrand = blockStrand[primaryBlockId]; - oldMut = blockExists[primaryBlockId]; - blockStrand[primaryBlockId] = !oldStrand; - - if(oldMut != true) { - std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; - } - blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); - } else { - // Actually a deletion - oldStrand = blockStrand[primaryBlockId]; - oldMut = blockExists[primaryBlockId]; - blockExists[primaryBlockId] = false; +// } +// } - // resetting strand to true during deletion - blockStrand[primaryBlockId] = true; - } - blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, false, true) ); - } +// // Undo block mutations when current node and its subtree have been processed +// for(auto it = blockMutationInfo.rbegin(); it != blockMutationInfo.rend(); it++) { +// auto mutation = *it; +// blockExists[std::get<0>(mutation)] = std::get<1>(mutation); +// blockStrand[std::get<0>(mutation)] = std::get<2>(mutation); - // } +// } - - } +// // Undo nuc mutations when current node and its subtree have been processed +// for(auto it = mutationInfo.rbegin(); it != mutationInfo.rend(); it++) { +// auto mutation = *it; +// if(std::get<2>(mutation) != -1) { +// sequence[std::get<0>(mutation)][std::get<1>(mutation)].second[std::get<2>(mutation)] = std::get<3>(mutation); +// } else { +// sequence[std::get<0>(mutation)][std::get<1>(mutation)].first = std::get<3>(mutation); +// } - // For backtracking. primaryBlockId, secondaryBlockId, pos, gapPos, (oldVal, newVal) in substitution, ('-', newVal) in insertion, (oldVal, '-') in deletion - std::vector< std::tuple< int32_t, int, int, char, char > > mutationInfo; +// } - // Nuc mutations - for(size_t i = 0; i < root->nucMutation.size(); i++) { - int32_t primaryBlockId = root->nucMutation[i].primaryBlockId; - int32_t secondaryBlockId = root->nucMutation[i].secondaryBlockId; +// // std::cout << "Done iteration for node: " << root->identifier << std::endl; +// } - // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { - int32_t nucPosition = root->nucMutation[i].nucPosition; - int32_t nucGapPosition = root->nucMutation[i].nucGapPosition; - uint32_t type = (root->nucMutation[i].mutInfo & 0x7); - char newVal = '-'; - if(type < 3) { - // Either S, I or D - int len = ((root->nucMutation[i].mutInfo) >> 4); +// void panmanUtils::Tree::printFASTANew(std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { +// // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. +// std::vector< std::vector< std::pair< char, std::vector< char > > > > sequence(blocks.size() + 1); +// std::vector< bool > blockExists(blocks.size() + 1, false, {}); +// std::vector< bool > blockStrand(blocks.size() + 1, true, {}); - if(primaryBlockId >= sequence.size()) { - std::cout << primaryBlockId << " " << sequence.size() << std::endl; + +// int32_t maxBlockId = 0; + +// // Create consensus sequence of blocks +// for(size_t i = 0; i < blocks.size(); i++) { + +// int32_t primaryBlockId = ((int32_t)blocks[i].primaryBlockId); + +// maxBlockId = std::max(maxBlockId, primaryBlockId); + +// for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { +// bool endFlag = false; +// for(size_t k = 0; k < 8; k++) { +// const int nucCode = (((blocks[i].consensusSeq[j]) >> (4*(7 - k))) & 15); + +// if(nucCode == 0) { +// endFlag = true; +// break; +// } +// const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); + +// sequence[primaryBlockId].push_back({nucleotide, {}}); +// } + +// if(endFlag) { +// break; +// } +// } + +// // End character to incorporate for gaps at the end +// sequence[primaryBlockId].push_back({'x', {}}); +// } + +// sequence.resize(maxBlockId + 1); +// blockExists.resize(maxBlockId + 1); +// blockStrand.resize(maxBlockId + 1); + +// // Assigning nucleotide gaps in blocks +// for(size_t i = 0; i < gaps.size(); i++) { +// int32_t primaryBId = (gaps[i].primaryBlockId); +// int32_t secondaryBId = (gaps[i].secondaryBlockId); + +// for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { +// int len = gaps[i].nucGapLength[j]; +// int pos = gaps[i].nucPosition[j]; +// sequence[primaryBId][pos].second.resize(len, '-'); +// } +// } + +// // Run depth first traversal to extract sequences + +// printFASTAHelperNew(root, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); + +// } + +void getNodesFromTipToRoot(panmanUtils::Node* node, std::vector &nodesFromTipToRoot){ + panmanUtils::Node* currentNode = node; + while(currentNode->parent != nullptr){ + nodesFromTipToRoot.push_back(currentNode); + currentNode = currentNode->parent; + } + nodesFromTipToRoot.push_back(currentNode); + + reverse(nodesFromTipToRoot.begin(), nodesFromTipToRoot.end()); + + return; +} + +void getBlockSequence(std::vector &nodesFromTipToRoot, + std::vector< bool >& blockExists){ + // panmanUtils::Node* node; + for (auto node: nodesFromTipToRoot){ + // node = nodesFromTipToRoot[i]; + for(auto mutation: node->blockMutation) { + int32_t primaryBlockId = mutation.primaryBlockId; + bool type = mutation.blockMutInfo; + bool inversion = mutation.inversion; + if(type == 1) { + // insertion + blockExists[primaryBlockId] = true; + } else { + // deletion + if(!inversion) { + blockExists[primaryBlockId] = false; + } } + } + } - if(type == panmanUtils::NucMutationType::NS) { - // Substitution - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); - } +} + +std::string panmanUtils::Tree::printFASTAUltraFastHelper( + const std::vector& blockSequence, + std::unordered_map& blockLengths, + const std::vector& nodesFromTipToRoot, + std::vector>>>& sequence, + std::vector& blockExists, + std::vector& blockStrand, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { + + + for (auto node: nodesFromTipToRoot){ + // Block Mutations + for(auto mutation: node->blockMutation) { + int32_t primaryBlockId = mutation.primaryBlockId; + bool type = mutation.blockMutInfo; + bool inversion = mutation.inversion; + if (blockSequence[primaryBlockId]) { + if(type == 1) { + // insertion + bool oldStrand; + bool oldMut; + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = true; + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId] = !inversion; } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId][nucPosition+j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId][nucPosition+j].first = newVal; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); + bool oldMut; + bool oldStrand; + if(inversion) { + // This means that this is not a deletion, but instead an inversion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockStrand[primaryBlockId] = !oldStrand; + if(oldMut != true) { + std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + } + } else { + // Actually a deletion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = false; + // resetting strand to true during deletion + blockStrand[primaryBlockId] = true; } } - } else if(type == panmanUtils::NucMutationType::NI) { - // Insertion - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, newVal)); - } - } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId][nucPosition+j].first; - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); - sequence[primaryBlockId][nucPosition+j].first = newVal; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, newVal)); + } + } + + // Nuc mutations + for(size_t i = 0; i < node->nucMutation.size(); i++) { + int32_t primaryBlockId = node->nucMutation[i].primaryBlockId; + int32_t secondaryBlockId = node->nucMutation[i].secondaryBlockId; + + if (blockSequence[primaryBlockId]) { + // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { + int32_t nucPosition = node->nucMutation[i].nucPosition; + int32_t nucGapPosition = node->nucMutation[i].nucGapPosition; + uint32_t type = (node->nucMutation[i].mutInfo & 0x7); + char newVal = '-'; + + if(type < 3) { + // Either S, I or D + int len = ((node->nucMutation[i].mutInfo) >> 4); + + if(primaryBlockId >= sequence.size()) { + std::cout << primaryBlockId << " " << sequence.size() << std::endl; } - } - } else if(type == panmanUtils::NucMutationType::ND) { - // Deletion - if(nucGapPosition != -1) { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; - sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition+j, oldVal, '-')); + + if(type == panmanUtils::NucMutationType::NS) { + // Substitution + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition+j].first = newVal; + } + } + } else if(type == panmanUtils::NucMutationType::NI) { + // Insertion + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition+j].first = newVal; + } + } + } else if(type == panmanUtils::NucMutationType::ND) { + // Deletion + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + sequence[primaryBlockId][nucPosition+j].first = '-'; + } + } } } else { - for(int j = 0; j < len; j++) { - char oldVal = sequence[primaryBlockId][nucPosition+j].first; - sequence[primaryBlockId][nucPosition+j].first = '-'; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition + j, nucGapPosition, oldVal, '-')); + if(type == panmanUtils::NucMutationType::NSNPS) { + // SNP Substitution + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> 20) & 0xF); + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; + } else { + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = newVal; + } + } else if(type == panmanUtils::NucMutationType::NSNPI) { + // SNP Insertion + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> 20) & 0xF); + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; + } else { + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = newVal; + } + } else if(type == panmanUtils::NucMutationType::NSNPD) { + // SNP Deletion + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = '-'; + } else { + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = '-'; + } } } } - } else { - if(type == panmanUtils::NucMutationType::NSNPS) { - // SNP Substitution - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; - sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); - } else { - char oldVal = sequence[primaryBlockId][nucPosition].first; - sequence[primaryBlockId][nucPosition].first = newVal; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); - } - } else if(type == panmanUtils::NucMutationType::NSNPI) { - // SNP Insertion - newVal = panmanUtils::getNucleotideFromCode(((root->nucMutation[i].nucs) >> 20) & 0xF); - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; - sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); - } else { - char oldVal = sequence[primaryBlockId][nucPosition].first; - sequence[primaryBlockId][nucPosition].first = newVal; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, newVal)); - } - } else if(type == panmanUtils::NucMutationType::NSNPD) { - // SNP Deletion - if(nucGapPosition != -1) { - char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; - sequence[primaryBlockId][nucPosition].second[nucGapPosition] = '-'; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); - } else { - char oldVal = sequence[primaryBlockId][nucPosition].first; - sequence[primaryBlockId][nucPosition].first = '-'; - mutationInfo.push_back(std::make_tuple(primaryBlockId, nucPosition, nucGapPosition, oldVal, '-')); - } + } + } + + + // Store sequence + panmanUtils::Node* tipNode = nodesFromTipToRoot[nodesFromTipToRoot.size()-1]; + std::string line=""; + line += '>' + tipNode->identifier + '\n'; + + + int offset = 0; + if(!aligned && circularSequences.find(tipNode->identifier) != circularSequences.end()) { + // If MSA is to be printed, offset doesn't matter + offset = circularSequences[tipNode->identifier]; + } + std::vector< std::vector< std::pair< char, std::vector< char > > > > sequencePrint = sequence; + std::vector< bool > blockExistsPrint = blockExists; + std::vector< bool > blockStrandPrint = blockStrand; + + if(rotationIndexes.find(tipNode->identifier) != rotationIndexes.end() && rotationIndexes[tipNode->identifier] != 0) { + int ctr = -1, rotInd = 0; + for(size_t i = 0; i < blockExistsPrint.size(); i++) { + if(blockExistsPrint[i]) { + ctr++; + } + if(ctr == rotationIndexes[tipNode->identifier]) { + rotInd = i; + break; } } + // std::cout << "rotating" << std::endl; + rotate(sequencePrint.begin(), sequencePrint.begin() + rotInd, sequencePrint.end()); + rotate(blockExistsPrint.begin(), blockExistsPrint.begin() + rotInd, blockExistsPrint.end()); + rotate(blockStrandPrint.begin(), blockStrandPrint.begin() + rotInd, blockStrandPrint.end()); } - // } - if(root->children.size() == 0 || rootSeq) { - // Print sequence + if(sequenceInverted.find(tipNode->identifier) != sequenceInverted.end() && sequenceInverted[tipNode->identifier]) { + // std::cout << "inverting" << std::endl; + reverse(sequencePrint.begin(), sequencePrint.end()); + reverse(blockExistsPrint.begin(), blockExistsPrint.end()); + reverse(blockStrandPrint.begin(), blockStrandPrint.end()); + } + + line += panmanUtils::printSequenceLinesNew(sequencePrint, blockLengths, blockExistsPrint, blockStrandPrint, 70, aligned, offset, false); + return line; +} - fout << '>' << root->identifier << std::endl; +void panmanUtils::Tree::printFASTAUltraFast(std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { - int offset = 0; - if(!aligned && circularSequences.find(root->identifier) != circularSequences.end()) { - // If MSA is to be printed, offset doesn't matter - offset = circularSequences[root->identifier]; + std::unordered_map< std::string, std::mutex > nodeMutexes; + for(auto u: allNodes) { + if (u.second->children.size() == 0) { + nodeMutexes[u.first]; } - std::vector< std::vector< std::pair< char, std::vector< char > > > > sequencePrint = sequence; - std::vector< bool > blockExistsPrint = blockExists; - std::vector< bool > blockStrandPrint = blockStrand; + } - if(rotationIndexes.find(root->identifier) != rotationIndexes.end() && rotationIndexes[root->identifier] != 0) { - int ctr = -1, rotInd = 0; - for(size_t i = 0; i < blockExistsPrint.size(); i++) { - if(blockExistsPrint[i]) { - ctr++; + std::mutex printMutex; + + // for (auto &keyValue: allNodes) { + tbb::parallel_for_each(allNodes.begin(), allNodes.end(), [&](const std::pair& keyValue) { + panmanUtils::Node* node = keyValue.second; + + // Create a stringstream for each thread to avoid race conditions on fout + if (node->children.size() != 0) { + return; + } + // Get block sequnece of the Tip + std::vector< bool > blockSequence(blocks.size() + 1, false, {}); + std::vector nodesFromTipToRoot; + getNodesFromTipToRoot(node, nodesFromTipToRoot); + getBlockSequence(nodesFromTipToRoot, blockSequence); + + // Blocks length + std::unordered_map blockLengths; + + // Expanding blocks only if exist in tip + std::vector< std::vector< std::pair< char, std::vector< char > > > > sequence(blocks.size() + 1); + std::vector< bool > blockExists(blocks.size() + 1, false, {}); + std::vector< bool > blockStrand(blocks.size() + 1, true, {}); + + + int32_t maxBlockId = 0; + + // Create consensus sequence of blocks + for(size_t i = 0; i < blocks.size(); i++) { + int32_t primaryBlockId = ((int32_t)blocks[i].primaryBlockId); + blockLengths[primaryBlockId] = 0; + maxBlockId = std::max(maxBlockId, primaryBlockId); + if (blockSequence[primaryBlockId]) { + int len = 0; + for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { + bool endFlag = false; + for(size_t k = 0; k < 8; k++) { + const int nucCode = (((blocks[i].consensusSeq[j]) >> (4*(7 - k))) & 15); + + if(nucCode == 0) { + endFlag = true; + break; + } + len++; + const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); + sequence[primaryBlockId].push_back({nucleotide, {}}); + } + + if(endFlag) { + break; + } } - if(ctr == rotationIndexes[root->identifier]) { - rotInd = i; - break; + // End character to incorporate for gaps at the end + sequence[primaryBlockId].push_back({'x', {}}); + blockLengths[primaryBlockId] += len; + } else { + int len = 0; + for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { + bool endFlag = false; + for(size_t k = 0; k < 8; k++) { + const int nucCode = (((blocks[i].consensusSeq[j]) >> (4*(7 - k))) & 15); + if(nucCode == 0) { + endFlag = true; + break; + } + len++; + const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); + } + + if(endFlag) { + break; + } } + blockLengths[primaryBlockId] += len; } - // std::cout << "rotating" << std::endl; - rotate(sequencePrint.begin(), sequencePrint.begin() + rotInd, sequencePrint.end()); - rotate(blockExistsPrint.begin(), blockExistsPrint.begin() + rotInd, blockExistsPrint.end()); - rotate(blockStrandPrint.begin(), blockStrandPrint.begin() + rotInd, blockStrandPrint.end()); } - if(sequenceInverted.find(root->identifier) != sequenceInverted.end() && sequenceInverted[root->identifier]) { - // std::cout << "inverting" << std::endl; - reverse(sequencePrint.begin(), sequencePrint.end()); - reverse(blockExistsPrint.begin(), blockExistsPrint.end()); - reverse(blockStrandPrint.begin(), blockStrandPrint.end()); + sequence.resize(maxBlockId + 1); + blockExists.resize(maxBlockId + 1); + blockStrand.resize(maxBlockId + 1); + + // Assigning nucleotide gaps in blocks + for(size_t i = 0; i < gaps.size(); i++) { + int32_t primaryBId = (gaps[i].primaryBlockId); + int32_t secondaryBId = (gaps[i].secondaryBlockId); + if (blockSequence[primaryBId]){ + for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + int len = gaps[i].nucGapLength[j]; + int pos = gaps[i].nucPosition[j]; + sequence[primaryBId][pos].second.resize(len, '-'); + blockLengths[primaryBId] += len; + } + } else { + int len=0; + for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + len += gaps[i].nucGapLength[j]; + } + blockLengths[primaryBId] += len; + } } - - panmanUtils::printSequenceLinesNew(sequencePrint, blockExistsPrint, blockStrandPrint, 70, aligned, fout, offset); - } else { - // DFS on children - for(panmanUtils::Node* child: root->children) { - printFASTAHelperNew(child, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); + std::string line = printFASTAUltraFastHelper(blockSequence, blockLengths, nodesFromTipToRoot, sequence, blockExists, blockStrand, aligned, rootSeq, panMATStart, panMATEnd, allIndex); + // nodeMutexes[node->identifier].lock(); + std::lock_guard guard(printMutex); + fout << line << "\n"; + // nodeMutexes[node->identifier].unlock(); + // break; + }); + // } +} - } +std::string panmanUtils::Tree::extractSequenceHelper( + const std::vector& blockSequence, + std::unordered_map& blockLengths, + const std::vector& nodesFromTipToRootIn, + std::vector>>>& sequence, + std::vector& blockExists, + std::vector& blockStrand, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { + + // reverse traversal of nodes + std::vector nodesFromTipToRoot(nodesFromTipToRootIn.size()); + for (int i = 0; i < nodesFromTipToRoot.size(); i++){ + nodesFromTipToRoot[i] = nodesFromTipToRootIn[nodesFromTipToRootIn.size() - 1- i]; } + for (auto node: nodesFromTipToRoot){ + // Block Mutations + for(auto mutation: node->blockMutation) { + int32_t primaryBlockId = mutation.primaryBlockId; + bool type = mutation.blockMutInfo; + bool inversion = mutation.inversion; + if (blockSequence[primaryBlockId]) { + if(type == 1) { + // insertion + bool oldStrand; + bool oldMut; + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = true; + // if insertion of inverted block takes place, the strand is backwards + blockStrand[primaryBlockId] = !inversion; + } else { + bool oldMut; + bool oldStrand; + if(inversion) { + // This means that this is not a deletion, but instead an inversion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockStrand[primaryBlockId] = !oldStrand; + if(oldMut != true) { + std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + } + } else { + // Actually a deletion + oldStrand = blockStrand[primaryBlockId]; + oldMut = blockExists[primaryBlockId]; + blockExists[primaryBlockId] = false; + // resetting strand to true during deletion + blockStrand[primaryBlockId] = true; + } + } + } + } - // Undo block mutations when current node and its subtree have been processed - for(auto it = blockMutationInfo.rbegin(); it != blockMutationInfo.rend(); it++) { - auto mutation = *it; - blockExists[std::get<0>(mutation)] = std::get<1>(mutation); - blockStrand[std::get<0>(mutation)] = std::get<2>(mutation); + // Nuc mutations + for(size_t i = 0; i < node->nucMutation.size(); i++) { + int32_t primaryBlockId = node->nucMutation[i].primaryBlockId; + int32_t secondaryBlockId = node->nucMutation[i].secondaryBlockId; - } + if (blockSequence[primaryBlockId]) { + // if (rootSeq && (primaryBlockId>=std::get<0>(panMATStart) && primaryBlockId<=std::get<0>(panMATEnd)) && (secondaryBlockId<=std::get<1>(panMATStart) && secondaryBlockId<=std::get<1>(panMATEnd)) ) { + int32_t nucPosition = node->nucMutation[i].nucPosition; + int32_t nucGapPosition = node->nucMutation[i].nucGapPosition; + uint32_t type = (node->nucMutation[i].mutInfo & 0x7); + char newVal = '-'; - // Undo nuc mutations when current node and its subtree have been processed - for(auto it = mutationInfo.rbegin(); it != mutationInfo.rend(); it++) { - auto mutation = *it; - if(std::get<2>(mutation) != -1) { - sequence[std::get<0>(mutation)][std::get<1>(mutation)].second[std::get<2>(mutation)] = std::get<3>(mutation); - } else { - sequence[std::get<0>(mutation)][std::get<1>(mutation)].first = std::get<3>(mutation); + if(type < 3) { + // Either S, I or D + int len = ((node->nucMutation[i].mutInfo) >> 4); + + if(primaryBlockId >= sequence.size()) { + std::cout << primaryBlockId << " " << sequence.size() << std::endl; + } + + if(type == panmanUtils::NucMutationType::NS) { + // Substitution + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition+j].first = newVal; + } + } + } else if(type == panmanUtils::NucMutationType::NI) { + // Insertion + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = newVal; + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> (4*(5-j))) & 0xF); + sequence[primaryBlockId][nucPosition+j].first = newVal; + } + } + } else if(type == panmanUtils::NucMutationType::ND) { + // Deletion + if(nucGapPosition != -1) { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition+j]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition+j] = '-'; + } + } else { + for(int j = 0; j < len; j++) { + char oldVal = sequence[primaryBlockId][nucPosition+j].first; + sequence[primaryBlockId][nucPosition+j].first = '-'; + } + } + } + } else { + if(type == panmanUtils::NucMutationType::NSNPS) { + // SNP Substitution + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> 20) & 0xF); + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; + } else { + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = newVal; + } + } else if(type == panmanUtils::NucMutationType::NSNPI) { + // SNP Insertion + newVal = panmanUtils::getNucleotideFromCode(((node->nucMutation[i].nucs) >> 20) & 0xF); + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = newVal; + } else { + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = newVal; + } + } else if(type == panmanUtils::NucMutationType::NSNPD) { + // SNP Deletion + if(nucGapPosition != -1) { + char oldVal = sequence[primaryBlockId][nucPosition].second[nucGapPosition]; + sequence[primaryBlockId][nucPosition].second[nucGapPosition] = '-'; + } else { + char oldVal = sequence[primaryBlockId][nucPosition].first; + sequence[primaryBlockId][nucPosition].first = '-'; + } + } + } + } } + } + + // Store sequence + panmanUtils::Node* tipNode = nodesFromTipToRoot[nodesFromTipToRoot.size()-1]; + std::string line=""; + + int offset = 0; + if(!aligned && circularSequences.find(tipNode->identifier) != circularSequences.end()) { + // If MSA is to be printed, offset doesn't matter + offset = circularSequences[tipNode->identifier]; + } + std::vector< std::vector< std::pair< char, std::vector< char > > > > sequencePrint = sequence; + std::vector< bool > blockExistsPrint = blockExists; + std::vector< bool > blockStrandPrint = blockStrand; + + if(rotationIndexes.find(tipNode->identifier) != rotationIndexes.end() && rotationIndexes[tipNode->identifier] != 0) { + int ctr = -1, rotInd = 0; + for(size_t i = 0; i < blockExistsPrint.size(); i++) { + if(blockExistsPrint[i]) { + ctr++; + } + if(ctr == rotationIndexes[tipNode->identifier]) { + rotInd = i; + break; + } + } + // std::cout << "rotating" << std::endl; + rotate(sequencePrint.begin(), sequencePrint.begin() + rotInd, sequencePrint.end()); + rotate(blockExistsPrint.begin(), blockExistsPrint.begin() + rotInd, blockExistsPrint.end()); + rotate(blockStrandPrint.begin(), blockStrandPrint.begin() + rotInd, blockStrandPrint.end()); } - // std::cout << "Done iteration for node: " << root->identifier << std::endl; + if(sequenceInverted.find(tipNode->identifier) != sequenceInverted.end() && sequenceInverted[tipNode->identifier]) { + // std::cout << "inverting" << std::endl; + reverse(sequencePrint.begin(), sequencePrint.end()); + reverse(blockExistsPrint.begin(), blockExistsPrint.end()); + reverse(blockStrandPrint.begin(), blockStrandPrint.end()); + } + + line += panmanUtils::printSequenceLinesNew(sequencePrint, blockLengths, blockExistsPrint, blockStrandPrint, 70, aligned, offset, false); + return line; } +std::string panmanUtils::Tree::extractSingleSequence(panmanUtils::Node* node, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { -void panmanUtils::Tree::printFASTANew(std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { - // List of blocks. Each block has a nucleotide list. Along with each nucleotide is a gap list. + // Create a stringstream for each thread to avoid race conditions on fout + if (node->children.size() != 0) { + std::cerr << "Node is not a tip" << std::endl; + exit(0); + } + // Get block sequnece of the Tip + std::vector< bool > blockSequence(blocks.size() + 1, false, {}); + std::vector nodesFromTipToRoot; + getNodesFromTipToRoot(node, nodesFromTipToRoot); + getBlockSequence(nodesFromTipToRoot, blockSequence); + + // Blocks length + std::unordered_map blockLengths; + + // Expanding blocks only if exist in tip std::vector< std::vector< std::pair< char, std::vector< char > > > > sequence(blocks.size() + 1); std::vector< bool > blockExists(blocks.size() + 1, false, {}); std::vector< bool > blockStrand(blocks.size() + 1, true, {}); @@ -1632,32 +2252,52 @@ void panmanUtils::Tree::printFASTANew(std::ostream& fout, bool aligned, bool roo // Create consensus sequence of blocks for(size_t i = 0; i < blocks.size(); i++) { - int32_t primaryBlockId = ((int32_t)blocks[i].primaryBlockId); - + blockLengths[primaryBlockId] = 0; maxBlockId = std::max(maxBlockId, primaryBlockId); + if (blockSequence[i]) { + int len = 0; + for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { + bool endFlag = false; + for(size_t k = 0; k < 8; k++) { + const int nucCode = (((blocks[i].consensusSeq[j]) >> (4*(7 - k))) & 15); + + if(nucCode == 0) { + endFlag = true; + break; + } + len++; + const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); - for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { - bool endFlag = false; - for(size_t k = 0; k < 8; k++) { - const int nucCode = (((blocks[i].consensusSeq[j]) >> (4*(7 - k))) & 15); + sequence[primaryBlockId].push_back({nucleotide, {}}); + } - if(nucCode == 0) { - endFlag = true; + if(endFlag) { break; } - const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); - - sequence[primaryBlockId].push_back({nucleotide, {}}); } + // End character to incorporate for gaps at the end + sequence[primaryBlockId].push_back({'x', {}}); + blockLengths[primaryBlockId] += len; + } else { + int len = 0; + for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { + bool endFlag = false; + for(size_t k = 0; k < 8; k++) { + const int nucCode = (((blocks[i].consensusSeq[j]) >> (4*(7 - k))) & 15); + if(nucCode == 0) { + endFlag = true; + break; + } + len++; + } - if(endFlag) { - break; + if(endFlag) { + break; + } } + blockLengths[primaryBlockId] += len; } - - // End character to incorporate for gaps at the end - sequence[primaryBlockId].push_back({'x', {}}); } sequence.resize(maxBlockId + 1); @@ -1668,16 +2308,22 @@ void panmanUtils::Tree::printFASTANew(std::ostream& fout, bool aligned, bool roo for(size_t i = 0; i < gaps.size(); i++) { int32_t primaryBId = (gaps[i].primaryBlockId); int32_t secondaryBId = (gaps[i].secondaryBlockId); - - for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { - int len = gaps[i].nucGapLength[j]; - int pos = gaps[i].nucPosition[j]; - sequence[primaryBId][pos].second.resize(len, '-'); + if (blockSequence[primaryBId]){ + for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + int len = gaps[i].nucGapLength[j]; + int pos = gaps[i].nucPosition[j]; + sequence[primaryBId][pos].second.resize(len, '-'); + blockLengths[primaryBId] += len; + } + } else { + int len=0; + for(size_t j = 0; j < gaps[i].nucPosition.size(); j++) { + len += gaps[i].nucGapLength[j]; + } + blockLengths[primaryBId] += len; } } - // Run depth first traversal to extract sequences - - printFASTAHelperNew(root, sequence, blockExists, blockStrand, fout, aligned, rootSeq, panMATStart, panMATEnd, allIndex); - + std::string line = extractSequenceHelper(blockSequence, blockLengths, nodesFromTipToRoot, sequence, blockExists, blockStrand, aligned, rootSeq, panMATStart, panMATEnd, allIndex); + return line; } \ No newline at end of file diff --git a/src/panman.cpp b/src/panman.cpp index 7bfa0fc..2b7f7b1 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -588,7 +588,8 @@ void readFasta(std::ifstream& fin, std::map< std::string, std::string >& sequenc if(lineLength == 0) { lineLength = currentSequence.length(); } else if(lineLength != currentSequence.length()) { - std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << + "Expected: " << lineLength << "Produced:" << currentSequence.length() << std::endl; exit(-1); } sequenceIdsToSequences[currentSequenceId] = currentSequence; @@ -603,7 +604,8 @@ void readFasta(std::ifstream& fin, std::map< std::string, std::string >& sequenc } if(currentSequence.length()) { if(lineLength != 0 && lineLength != currentSequence.length()) { - std::cerr << "Error: sequence lengths don't match!" << std::endl; + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << + "Expected: " << lineLength << "Produced:" << currentSequence.length() << std::endl; exit(-1); } else { lineLength = currentSequence.length(); @@ -631,7 +633,8 @@ size_t readFastaInBatch(std::ifstream& fin, std::map< std::string, std::string > if(lineLength == 0) { lineLength = currentSequence.length(); } else if(lineLength != currentSequence.length()) { - std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << + "Expected: " << lineLength << "Produced:" << currentSequence.length() << std::endl; exit(-1); } size_t lengthStr = startIndex+batchSize>currentSequence.size() ? currentSequence.size()-startIndex: batchSize; @@ -647,7 +650,8 @@ size_t readFastaInBatch(std::ifstream& fin, std::map< std::string, std::string > } if(currentSequence.length()) { if(lineLength != 0 && lineLength != currentSequence.length()) { - std::cerr << "Error: sequence lengths don't match!" << std::endl; + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << + "Expected: " << lineLength << "Produced:" << currentSequence.length() << std::endl; exit(-1); } else { lineLength = currentSequence.length(); @@ -1231,7 +1235,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE if(lineLength == 0) { lineLength = currentSequence.length(); } else if(lineLength != currentSequence.length()) { - std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << + "Expected: " << lineLength << "Produced:" << currentSequence.length() << std::endl; exit(-1); } std::vector< std::string > splitLine; @@ -1251,7 +1256,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE if(currentSequence.length()) { if(lineLength != 0 && lineLength != currentSequence.length()) { - std::cerr << "Error: sequence lengths don't match!" << std::endl; + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << + "Expected: " << lineLength << "Produced:" << currentSequence.length() << std::endl; exit(-1); } else { lineLength = currentSequence.length(); @@ -1419,7 +1425,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE if(lineLength == 0) { lineLength = currentSequence.length(); } else if(lineLength != currentSequence.length()) { - std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << std::endl; + std::cerr << "Error: sequence lengths don't match! " << currentSequenceId << + "Expected: " << lineLength << "Produced:" << currentSequence.length() << std::endl; exit(-1); } } diff --git a/src/panman.hpp b/src/panman.hpp index 2449d61..28da979 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -14,13 +14,14 @@ #include "panman.capnp.h" #include "common.hpp" +#include + #include #include #include namespace panmanUtils { - enum NucMutationType { // Nucleotide Substutution NS = 0, @@ -317,7 +318,27 @@ class Tree { std::vector& blockStrand, std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); - + + std::string printFASTAUltraFastHelper( + const std::vector& blockSequence, + std::unordered_map& blockLengths, + const std::vector& nodesFromTipToRoot, + std::vector>>>& sequence, + std::vector& blockExists, + std::vector& blockStrand, + bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); + + std::string extractSequenceHelper( + const std::vector& blockSequence, + std::unordered_map& blockLengths, + const std::vector& nodesFromTipToRoot, + std::vector>>>& sequence, + std::vector& blockExists, + std::vector& blockStrand, + bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); + + std::string extractSingleSequence(panmanUtils::Node* node, bool aligned=false, bool rootSeq=false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); + void printSingleNodeHelper(std::vector &nodeList, int nodeListIndex, sequence_t& sequence, blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart={-1,-1,-1,-1}, const std::tuple< int, int, int, int >& panMATEnd={-1,-1,-1,-1}); @@ -470,6 +491,7 @@ class Tree { void printBfs(Node* node = nullptr); void printFASTA(std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start={-1,-1,-1,-1}, const std::tuple &end={-1,-1,-1,-1}, bool allIndex = false); void printFASTANew(std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start={-1,-1,-1,-1}, const std::tuple &end={-1,-1,-1,-1}, bool allIndex = false); + void printFASTAUltraFast(std::ostream& fout, bool aligned = false, bool rootSeq = false, const std::tuple &start={-1,-1,-1,-1}, const std::tuple &end={-1,-1,-1,-1}, bool allIndex = false); void printSingleNode(std::ostream& fout, const sequence_t& sequence, const blockExists_t& blockExists, const blockStrand_t& blockStrand, std::string nodeIdentifier, std::tuple< int, int, int, int > &panMATStart, std::tuple< int, int, int, int > &panMATEnd); @@ -479,6 +501,7 @@ class Tree { void printMAFNew(std::ostream& fout); void generateSequencesFromMAF(std::ifstream& fin, std::ofstream& fout); void printVCFParallel(std::string reference, std::ostream& fout); + void printVCFParallel(panmanUtils::Node* node, std::ostream& fout); void extractAminoAcidTranslations(std::ostream& fout, int64_t start, int64_t end); // Extract PanMAT representing a segment of the genome. The start and end coordinates diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index c0c1e33..d5ed8cd 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -141,6 +141,7 @@ void setupOptionDescriptions() { ("summary,s", "Print PanMAN summary") ("newick,t", "Print newick string of all trees in a PanMAN") ("fasta,f", "Print tip/internal sequences (FASTA format)") + // ("fasta-fast", "Print tip/internal sequences (FASTA format)") ("fasta-aligned,m", "Print MSA of sequences for each PanMAT in a PanMAN (FASTA format)") ("subnet,b", "Extract subnet of given PanMAN to a new PanMAN file based on the list of nodes provided in the input-file") ("vcf,v", "Print variations of all sequences from any PanMAT in a PanMAN (VCF format)") @@ -363,9 +364,7 @@ void fasta(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstrea } std::ostream fout (buf); - // T->printFASTAParallel(fout, false); - T->printFASTANew(fout, false, false); - // T->printFASTA(fout,false,false); + T->printFASTAUltraFast(fout, false, false); if(globalVm.count("output-file")) outputFile.close(); } @@ -397,8 +396,40 @@ void fastaAligned(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std:: std::ostream fout (buf); - // T->printFASTA(fout, true); - T->printFASTAParallel(fout, true); + T->printFASTAUltraFast(fout, true); + + + if(globalVm.count("output-file")) outputFile.close(); + } + + auto fastaEnd = std::chrono::high_resolution_clock::now(); + std::chrono::nanoseconds fastaTime = fastaEnd - fastaStart; + std::cout << "\nFASTA execution time: " << fastaTime.count() << " nanoseconds\n"; +} + +void fastaFast(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { + // Print multiple sequence alignment to output file + if(TG == nullptr) { + std::cout << "No PanMAN selected" << std::endl; + return; + } + + panmanUtils::TreeGroup tg = *TG; + + auto fastaStart = std::chrono::high_resolution_clock::now(); + for(int i = 0; i < tg.trees.size(); i++) { + panmanUtils::Tree *T = &tg.trees[i]; + if(globalVm.count("output-file")) { + std::string fileName = globalVm["output-file"].as< std::string >(); + outputFile.open("./info/" + fileName + "_" + std::to_string(i) + ".fasta"); + buf = outputFile.rdbuf(); + } else { + buf = std::cout.rdbuf(); + } + std::ostream fout (buf); + + + T->printFASTAUltraFast(fout, false, false); if(globalVm.count("output-file")) outputFile.close(); @@ -566,7 +597,14 @@ void vcf(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream auto vcfStart = std::chrono::high_resolution_clock::now(); - T->printVCFParallel(reference, fout); + panmanUtils::Node* refNode; + for (auto &n: T->allNodes) { + if (n.first == reference) { + refNode = n.second; + break; + } + } + T->printVCFParallel(refNode, fout); auto vcfEnd = std::chrono::high_resolution_clock::now(); std::chrono::nanoseconds vcfTime = vcfEnd - vcfStart; @@ -1370,6 +1408,9 @@ void parseAndExecute(int argc, char* argv[]) { } else if(globalVm.count("toUser")) { toUsher(TG, globalVm); return; + // } else if(globalVm.count("fasta-fast")){ + // fastaFast(TG, globalVm, outputFile, buf); + // return; } else { char** splitCommandArray; diff --git a/src/panmanUtils.hpp b/src/panmanUtils.hpp index 78879e1..b20fe3e 100644 --- a/src/panmanUtils.hpp +++ b/src/panmanUtils.hpp @@ -35,14 +35,16 @@ char getComplementCharacter(char nuc); // Given a sequence and block presence/strand information, print the sequence in FASTA format // where each line has length lineSize + void printSequenceLines(const sequence_t& sequence, const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, bool aligned, std::ostream& fout, int offset = 0, bool debug = false); -void printSequenceLinesNew(const std::vector>>>& sequence, + +std::string printSequenceLinesNew(const std::vector>>>& sequence, + std::unordered_map& blockLengths, const std::vector& blockExists, const std::vector& blockStrand, size_t lineSize, - bool aligned, std::ostream& fout, int offset = 0, bool debug = false); - + bool aligned, int offset = 0, bool debug = false); void printSubsequenceLines(const sequence_t& sequence,\ const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, const std::tuple& panMATStart, diff --git a/src/vcf.cpp b/src/vcf.cpp index 824d7b4..35d88ea 100644 --- a/src/vcf.cpp +++ b/src/vcf.cpp @@ -23,9 +23,9 @@ void panmanUtils::Tree::printVCFParallel(std::string reference, std::ostream& fo } std::string currentRefString, currentAltString; - int currentCoordinate = 0; + int currentCoordinate = 1; - int diffStart = 0; + int diffStart = 1; for(size_t i = 0; i < referenceSequence.length(); i++) { @@ -105,6 +105,182 @@ void panmanUtils::Tree::printVCFParallel(std::string reference, std::ostream& fo } }); + std::cout << vcfMap.size() << std::endl; + + std::mutex sequenceIdsMutex; + std::map< std::string, size_t > sequenceIds; + tbb::parallel_for_each(allNodes, [&](auto& u) { + if(u.second->children.size() == 0 && u.first != reference) { + sequenceIdsMutex.lock(); + sequenceIds[u.first] = 0; + sequenceIdsMutex.unlock(); + } + }); + + + fout << "##fileformat=VCFv" << VCF_VERSION << '\n'; + fout << "##fileDate=" << panmanUtils::getDate() << '\n'; + fout << "##source=PanMATv" << PMAT_VERSION << '\n'; + fout << "##reference=" << reference << '\n'; + fout << "#CHROM\t" << "POS\t" << "ID\t" << "REF\t" << "ALT\t" << "QUAL\t" << "FILTER\t" << "INFO\t" << "FORMAT\t"; + + // fout << std::left << std::setw(20) << "#CHROM " << std::setw(20) << "POS " << std::setw(20) << "ID " << std::setw(20) << "REF " << std::setw(20) << "ALT " << std::setw(20) << "QUAL " << std::setw(20) << "FILTER " << std::setw(20) << "INFO " << std::setw(20) << "FORMAT "; + for(auto u: sequenceIds) { + if(u.first != sequenceIds.rbegin()->first) { + fout << u.first + "\t"; + } else { + fout << u.first; + } + } + fout << '\n'; + + for(auto u: vcfMap) { + for(auto v: u.second) { + if(v.first == "") { + fout << reference << "\t" << u.first << "\t" << recordID++ << "\t" << ".\t"; + } else { + fout << reference << "\t" << u.first << "\t" << recordID++ << "\t" << v.first << "\t"; + } + + std::map< std::string, size_t > tempSequenceIds = sequenceIds; + + int ctr = 1; + std::string altStrings; + + for(auto w: v.second) { + altStrings += (w.first == "" ? ".": w.first); + altStrings += ","; + for(auto uu: w.second) { + tempSequenceIds[uu] = ctr; + } + ctr++; + } + + altStrings.pop_back(); + + fout << altStrings << "\t.\t.\t.\t.\t"; + + for(auto w: tempSequenceIds) { + if(w.first != sequenceIds.rbegin()->first) { + fout << w.second << "\t"; + } else { + fout << w.second; + } + } + + fout << '\n'; + } + } +} + + +void panmanUtils::Tree::printVCFParallel(panmanUtils::Node* refnode, std::ostream& fout) { + + std::string reference = refnode->identifier; + std::string referenceSequence = extractSingleSequence(refnode, true); + + if(referenceSequence == "Error: Reference sequence with matching name not found!") { + std::cerr << referenceSequence << std::endl; + return; + } + + size_t recordID = 0; + + std::mutex vcfMapMutex; + std::map< int, std::map< std::string, std::map< std::string, std::vector< std::string > > > > vcfMap; + + tbb::parallel_for_each(allNodes, [&](auto& n) { + if(n.second->children.size() == 0 && n.first != refnode->identifier) { + std::string altSequence = extractSingleSequence(n.second, true); + if(altSequence.length() != referenceSequence.length()) { + std::cerr << "Logic error. String lengths don't match: " << referenceSequence.length() << " " << altSequence.length() << std::endl; + return; + } + + std::string currentRefString, currentAltString; + int currentCoordinate = 1; + + int diffStart = 1; + + for(size_t i = 0; i < referenceSequence.length(); i++) { + + if(referenceSequence[i] == '-' && altSequence[i] == '-') { + continue; + } else if(referenceSequence[i] != '-' && altSequence[i] == '-') { + if(currentRefString == "" && currentAltString == "") { + diffStart = currentCoordinate; + } + + currentRefString += referenceSequence[i]; + } else if(referenceSequence[i] == '-' && altSequence[i] != '-') { + if(currentRefString == "" && currentAltString == "") { + diffStart = currentCoordinate; + } + + currentAltString += altSequence[i]; + } else if(referenceSequence[i] != altSequence[i]) { + if(currentRefString == "" && currentAltString == "") { + diffStart = currentCoordinate; + } + if(currentRefString == currentAltString) { + currentRefString = ""; + currentAltString = ""; + diffStart = currentCoordinate; + } + currentRefString += referenceSequence[i]; + currentAltString += altSequence[i]; + } else if(referenceSequence[i] == altSequence[i]) { + if(currentRefString == currentAltString) { + // Reset + diffStart = currentCoordinate; + currentRefString = ""; + currentRefString += referenceSequence[i]; + currentAltString = currentRefString; + } else { + // Create VCF record at position i + if(currentRefString == "") { + currentRefString += referenceSequence[i]; + currentAltString += altSequence[i]; + diffStart = currentCoordinate; + vcfMapMutex.lock(); + vcfMap[diffStart][currentRefString][currentAltString].push_back(n.first); + vcfMapMutex.unlock(); + diffStart = currentCoordinate+1; + currentRefString = ""; + currentAltString = ""; + } else { + vcfMapMutex.lock(); + vcfMap[diffStart][currentRefString][currentAltString].push_back(n.first); + vcfMapMutex.unlock(); + + // Reset + diffStart = currentCoordinate; + currentRefString = ""; + currentRefString += referenceSequence[i]; + currentAltString = currentRefString; + } + } + } + + if(referenceSequence[i] != '-') { + currentCoordinate++; + } + } + + if(currentRefString != currentAltString) { + vcfMapMutex.lock(); + vcfMap[diffStart][currentRefString][currentAltString].push_back(n.first); + vcfMapMutex.unlock(); + + // Reset + diffStart = referenceSequence.size(); + currentRefString = ""; + currentAltString = currentRefString; + } + } + }); + + std::mutex sequenceIdsMutex; std::map< std::string, size_t > sequenceIds; tbb::parallel_for_each(allNodes, [&](auto& u) { From 7e1d35d00fe672052961b20dbef13ea8754b5e00 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sun, 29 Dec 2024 10:02:42 -0800 Subject: [PATCH 079/103] ultra fast fasta write --- src/fasta.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fasta.cpp b/src/fasta.cpp index ae3f5fc..a9431bf 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -1953,7 +1953,7 @@ void panmanUtils::Tree::printFASTAUltraFast(std::ostream& fout, bool aligned, bo blockLengths[primaryBlockId] = 0; maxBlockId = std::max(maxBlockId, primaryBlockId); if (blockSequence[primaryBlockId]) { - int len = 0; + // int len = 0; for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { bool endFlag = false; for(size_t k = 0; k < 8; k++) { @@ -1963,7 +1963,7 @@ void panmanUtils::Tree::printFASTAUltraFast(std::ostream& fout, bool aligned, bo endFlag = true; break; } - len++; + // len++; const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); sequence[primaryBlockId].push_back({nucleotide, {}}); } @@ -1974,7 +1974,7 @@ void panmanUtils::Tree::printFASTAUltraFast(std::ostream& fout, bool aligned, bo } // End character to incorporate for gaps at the end sequence[primaryBlockId].push_back({'x', {}}); - blockLengths[primaryBlockId] += len; + // blockLengths[primaryBlockId] += len; } else { int len = 0; for(size_t j = 0; j < blocks[i].consensusSeq.size(); j++) { @@ -2010,7 +2010,7 @@ void panmanUtils::Tree::printFASTAUltraFast(std::ostream& fout, bool aligned, bo int len = gaps[i].nucGapLength[j]; int pos = gaps[i].nucPosition[j]; sequence[primaryBId][pos].second.resize(len, '-'); - blockLengths[primaryBId] += len; + // blockLengths[primaryBId] += len; } } else { int len=0; From ad265cdef65c789b568ce6152a11e9f46830cfbc Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 2 Jan 2025 22:17:25 -0800 Subject: [PATCH 080/103] google proto to capn proto --- src/fitchSankoff.cpp | 2 +- src/panman.cpp | 617 ++++++++++++++++++++++++++++++++++--------- src/panman.hpp | 145 +++++++++- src/panmanUtils.cpp | 23 +- 4 files changed, 659 insertions(+), 128 deletions(-) diff --git a/src/fitchSankoff.cpp b/src/fitchSankoff.cpp index b7c8629..0006120 100644 --- a/src/fitchSankoff.cpp +++ b/src/fitchSankoff.cpp @@ -415,7 +415,7 @@ void panmanUtils::Tree::nucSankoffBackwardPass(Node* node, int minVal = SANKOFF_INF; int minPtr = -1; for(int i = 0; i < 16; i++) { - std::cout << stateSets[node->identifier][i] << " " << SANKOFF_INF << std::endl; + // std::cout << stateSets[node->identifier][i] << " " << SANKOFF_INF << std::endl; if(stateSets[node->identifier][i] < minVal) { minVal = stateSets[node->identifier][i]; minPtr = i; diff --git a/src/panman.cpp b/src/panman.cpp index 2b7f7b1..61e90a8 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include "chaining.cpp" @@ -246,12 +247,29 @@ panmanUtils::Block::Block(int32_t pBlockId, int32_t sBlockId, const std::vector< } void panmanUtils::stringSplit (std::string const& s, char delim, std::vector& words) { - size_t start_pos = 0, end_pos = 0; + size_t start_pos = 0, end_pos = 0, temp_pos = 0; while ((end_pos = s.find(delim, start_pos)) != std::string::npos) { if (end_pos >= s.length()) { break; } - words.emplace_back(s.substr(start_pos, end_pos-start_pos)); + std::string sub; + if (temp_pos == 0) { + sub = s.substr(start_pos, end_pos-start_pos); + if (std::count(sub.begin(), sub.end(), '\'') % 2 == 1) { + temp_pos = start_pos; + } + else { + words.emplace_back(sub); + } + } + else { + sub = s.substr(temp_pos, end_pos-temp_pos); + if (std::count(sub.begin(), sub.end(), '\'') % 2 == 0) { + temp_pos = 0; + words.emplace_back(sub); + } + } + // words.emplace_back(s.substr(start_pos, end_pos-start_pos)); start_pos = end_pos+1; } auto last = s.substr(start_pos, s.size()-start_pos); @@ -260,19 +278,34 @@ void panmanUtils::stringSplit (std::string const& s, char delim, std::vector leaves; std::vector numOpen; std::vector numClose; std::vector> branchLen (128); // will be resized later if needed size_t level = 0; + // std::cout << newickString << std::endl; std::vector s1; stringSplit(newickString, ',', s1); - + numOpen.reserve(s1.size()); numClose.reserve(s1.size()); @@ -284,11 +317,20 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new bool stop = false; bool branchStart = false; + bool nameZone = false; + bool hasApo = false; std::string leaf = ""; std::string branch = ""; for (auto c: s) { - if (c == ':') { + if (nameZone) { + leaf += c; + if (c == '\'') nameZone = false; + } else if (c == '\'' && !nameZone) { + nameZone = true; + hasApo = true; + leaf += c; + } else if (c == ':') { stop = true; branch = ""; branchStart = true; @@ -303,6 +345,7 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new nc++; // float len = (branch.size() > 0) ? std::stof(branch) : -1.0; float len = (branch.size() > 0) ? std::stof(branch) : 1.0; + if (len == 0) len = 1.0; branchLen[level].push(len); level--; branchStart = false; @@ -312,18 +355,17 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new leafDepth = level; } else if (branchStart) { - if (isdigit(c) || c == '.' || c == 'e' || c == 'E' || c == '-' || c == '+') { + if (isdigit(c) || c == '.') { branch += c; } } } - + if (hasApo && leaf[0] == '\'' && leaf[leaf.length()-1] == '\'') leaf = leaf.substr(1, leaf.length()-2); leaves.push_back(std::move(leaf)); numOpen.push_back(no); numClose.push_back(nc); - - // float len = (branch.size() > 0) ? std::stof(branch) : -1.0; float len = (branch.size() > 0) ? std::stof(branch) : 1.0; + if (len == 0) len = 1.0; branchLen[level].push(len); // Adjusting max and mean depths @@ -332,8 +374,10 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new } + m_meanDepth /= leaves.size(); + // std::cout << m_meanDepth << " " << level << std::endl; if (level != 0) { fprintf(stderr, "ERROR: incorrect Newick format!\n"); exit(1); @@ -342,45 +386,41 @@ panmanUtils::Node* panmanUtils::Tree::createTreeFromNewickString(std::string new m_numLeaves = leaves.size(); std::stack parentStack; - // std::cout << "branchLen " << branchLen[level].size() << " " <identifier << '\t' << newNode->branchLength << '\n'; - branchLen[level].pop(); - level++; - - allNodes[nid] = newNode; - parentStack.push(newNode); + int cc = 0; + for (size_t i=0; ibranchLength = 0.0; + std::cout << "Tree created with " << m_numLeaves << " leaves and " << allNodes.size() << " nodes\n"; return treeRoot; } @@ -547,7 +588,7 @@ void panmanUtils::Tree::assignMutationsToNodes(Node* root, size_t& currentIndex, for (auto nodeAnnotations: storedNode[currentIndex].getAnnotations()){ root->annotations.push_back(nodeAnnotations.cStr()); - std::cout << root->identifier << " " << nodeAnnotations.cStr() << std::endl; + // std::cout << root->identifier << " " << nodeAnnotations.cStr() << std::endl; annotationsToNodes[nodeAnnotations.cStr()].push_back(root->identifier); } @@ -623,7 +664,7 @@ size_t readFastaInBatch(std::ifstream& fin, std::map< std::string, std::string > size_t lineLength = 0; size_t nextStartIndex = startIndex; - std::cout << "starting reading for " << nextStartIndex << std::endl; + // std::cout << "starting reading for " << nextStartIndex << std::endl; while(getline(fin,line,'\n')) { if(line.length() == 0) { continue; @@ -661,7 +702,7 @@ size_t readFastaInBatch(std::ifstream& fin, std::map< std::string, std::string > nextStartIndex += lengthStr; } - std::cout << "Done reading till " << nextStartIndex - 1 << std::endl; + // std::cout << "Done reading till " << nextStartIndex - 1 << std::endl; return nextStartIndex; } @@ -706,7 +747,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE std::string newickString; - secondFin >> newickString; + // secondFin >> newickString; + std::getline(secondFin, newickString); root = createTreeFromNewickString(newickString); std::unordered_map< std::string, std::vector< int64_t > > pathIdToSequence; @@ -761,7 +803,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE }); } else if(ftype == panmanUtils::FILE_TYPE::PANGRAPH) { std::string newickString; - secondFin >> newickString; + // secondFin >> newickString; + std::getline(secondFin, newickString); Json::Value pangraphData; fin >> pangraphData; root = createTreeFromNewickString(newickString); @@ -1214,7 +1257,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } else if(ftype == panmanUtils::FILE_TYPE::MSA) { std::string newickString; - secondFin >> newickString; + // secondFin >> newickString; + std::getline(secondFin, newickString); root = createTreeFromNewickString(newickString); @@ -1227,7 +1271,6 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE // Read MSA while(getline(fin,line,'\n')) { if(line.length() == 0) { - std::cout << "here"; continue; } if(line[0] == '>') { @@ -1266,7 +1309,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } - std::cout << lineLength << std::endl; + // std::cout << lineLength << std::endl; std::set< size_t > emptyPositions; @@ -1376,7 +1419,7 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } // std::cout << root->identifier << std::endl; - std::cout << consensusSeq << std::endl; + // std::cout << consensusSeq << std::endl; blocks.emplace_back(0, consensusSeq); root->blockMutation.emplace_back(0, std::make_pair(BlockMutationType::BI, false)); // pos, start, end @@ -1407,7 +1450,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE }); } else if(ftype == panmanUtils::FILE_TYPE::MSA_OPTIMIZE) { std::string newickString; - secondFin >> newickString; + // secondFin >> newickString; + std::getline(secondFin, newickString); root = createTreeFromNewickString(newickString); std::string line; @@ -1463,8 +1507,8 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE nextStartIndex = readFastaInBatch(fin, sequenceIdsToSequences, startIndex, batchSize); + std::cout << "writing consensus sequences from" << startIndex << " to " << nextStartIndex << std::endl; if (reference != "") { - std::cout << "writing consensus sequences from" << startIndex << " to " << nextStartIndex << std::endl; for (int i=0; iblockMutation.emplace_back(0, std::make_pair(BlockMutationType::BI, false)); - std::cout << consensusSeq << std::endl; + // std::cout << consensusSeq << std::endl; tbb::parallel_for_each(nonGapMutationsMSA, [&](auto& u) { @@ -1556,10 +1600,21 @@ panmanUtils::Tree::Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE } } +int doPreOrderLoop(panmanUtils::Node* node){ + int c = 1; + if (node->children.size() == 0) return c; + for (auto &n: node->children){ + c += doPreOrderLoop(n); + } + return c; +} + void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { // Create tree root = createTreeFromNewickString(mainTree.getNewick().cStr()); - // std::cout << root->identifier << std::endl; + // std::cout << "Size of nodes: " << allNodes.size() << std::endl; + // std::cout << doPreOrderLoop(root) << std::endl; + std::map< std::pair, std::vector< uint32_t > > blockIdToConsensusSeq; int countt = 0; @@ -1586,6 +1641,7 @@ void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { countt++; } + // std::cout << "Assigning nodes" << std::endl; std::vector storedNodes; for (auto nodesFromTree: mainTree.getNodes()){ storedNodes.push_back(nodesFromTree); @@ -1593,14 +1649,17 @@ void panmanUtils::Tree::protoMATToTree(const panman::Tree::Reader& mainTree) { size_t initialIndex = 0; + // std::cout << "Assigning mutations to nodes" << std::endl; assignMutationsToNodes(root, initialIndex, storedNodes); // Block sequence + // std::cout << "Assigning Blocks" << std::endl; for(auto u: blockIdToConsensusSeq) { blocks.emplace_back(u.first.first, u.first.second, u.second); } // Gap List + // std::cout << "Assigning Gap List" << std::endl; for (auto i=0; i< mainTree.getGaps().size(); i++){ panmanUtils::GapList tempGaps; for (auto j=0; j& nodes) { + std::vector< panmanUtils::NucMut > storedNucMutation; + for(int i = 0; i < nodes[currentIndex].mutations_size(); i++) { + for(auto nucMut: nodes[currentIndex].mutations(i).nucmutation()) { + storedNucMutation.push_back( panmanUtils::NucMut(nucMut, + nodes[currentIndex].mutations(i).blockid(), + nodes[currentIndex].mutations(i).blockgapexist())); + } + } + std::vector< panmanUtils::BlockMut > storedBlockMutation; + for(int i = 0; i < nodes[currentIndex].mutations_size(); i++) { + panmanUtils::BlockMut tempBlockMut; + if(nodes[currentIndex].mutations(i).blockmutexist()) { + tempBlockMut.loadFromProtobuf(nodes[currentIndex].mutations(i)); + storedBlockMutation.push_back(tempBlockMut); + } + } + for(int i = 0; i < nodes[currentIndex].annotations_size(); i++) { + root->annotations.push_back(nodes[currentIndex].annotations(i)); + annotationsToNodes[nodes[currentIndex].annotations(i)].push_back(root->identifier); + } + root->nucMutation = storedNucMutation; + root->blockMutation = storedBlockMutation; + for(auto child: root->children) { + currentIndex++; + assignMutationsToNodes(child, currentIndex, nodes); + } +} + +void panmanUtils::Tree::protoMATToTree(const panmanOld::tree& mainTree) { + // Create tree + root = createTreeFromNewickString(mainTree.newick()); + std::map< std::pair, std::vector< uint32_t > > blockIdToConsensusSeq; + for(int i = 0; i < mainTree.consensusseqmap_size(); i++) { + std::vector< uint32_t > seq; + for(int j = 0; j < mainTree.consensusseqmap(i).consensusseq_size(); j++) { + seq.push_back(mainTree.consensusseqmap(i).consensusseq(j)); + } + for(int j = 0; j < mainTree.consensusseqmap(i).blockid_size(); j++) { + std::pair< int32_t, int32_t > blockId; + blockId.first = (mainTree.consensusseqmap(i).blockid(j) >> 32); + if(mainTree.consensusseqmap(i).blockgapexist(j)) { + blockId.second = (mainTree.consensusseqmap(i).blockid(j) & 0xFFFFFFFF); + } else { + blockId.second = -1; + } + blockIdToConsensusSeq[blockId] = seq; + } + } + std::vector< panmanOld::node > storedNodes; + for(int i = 0; i < mainTree.nodes_size(); i++) { + storedNodes.push_back(mainTree.nodes(i)); + } + size_t initialIndex = 0; + assignMutationsToNodes(root, initialIndex, storedNodes); + // Block sequence + for(auto u: blockIdToConsensusSeq) { + blocks.emplace_back(u.first.first, u.first.second, u.second); + } + // Gap List + for(int i = 0; i < mainTree.gaps_size(); i++) { + panmanUtils::GapList tempGaps; + tempGaps.primaryBlockId = (mainTree.gaps(i).blockid() >> 32); + tempGaps.secondaryBlockId = (mainTree.gaps(i).blockgapexist() ? (mainTree.gaps(i).blockid() & 0xFFFF): -1); + for(int j = 0; j < mainTree.gaps(i).nucposition_size(); j++) { + tempGaps.nucPosition.push_back(mainTree.gaps(i).nucposition(j)); + tempGaps.nucGapLength.push_back(mainTree.gaps(i).nucgaplength(j)); + } + gaps.push_back(tempGaps); + } + // Circular offsets + for(int i = 0; i < mainTree.circularsequences_size(); i++) { + circularSequences[mainTree.circularsequences(i).sequenceid()] = mainTree.circularsequences(i).offset(); + } + // Rotation Indexes + for(int i = 0; i < mainTree.rotationindexes_size(); i++) { + rotationIndexes[mainTree.rotationindexes(i).sequenceid()] = mainTree + .rotationindexes(i).blockoffset(); + } + // Sequence inverted + for(int i = 0; i < mainTree.sequencesinverted_size(); i++) { + sequenceInverted[mainTree.sequencesinverted(i).sequenceid()] = mainTree + .sequencesinverted(i).inverted(); + } + // Block gap list + for(int i = 0; i < mainTree.blockgaps().blockposition_size(); i++) { + blockGaps.blockPosition.push_back(mainTree.blockgaps().blockposition(i)); + blockGaps.blockGapLength.push_back(mainTree.blockgaps().blockgaplength(i)); + } +} +panmanUtils::Tree::Tree(const panmanOld::tree& mainTree) { + protoMATToTree(mainTree); +} +////////////////////////////////////////////////////////////////////// void panmanUtils::Tree::printBfs(Node* node) { if(node == nullptr) { @@ -1704,6 +1863,11 @@ std::string panmanUtils::Tree::getNewickString(Node* node) { std::string newick; + if (traversal.size() == 1) { + newick += node->identifier; + return newick; + } + size_t level_offset = node->level-1; size_t curr_level = 0; bool prev_open = true; @@ -2414,7 +2578,7 @@ void panmanUtils::Tree::extractPanMATSegment(kj::std::StdOutputStream& fout, int std::tuple< int, int, int, int > panMATEnd = globalCoordinateToBlockCoordinate(end, rootSequence, rootBlockExists, rootBlockStrand); - std::cout << std::get<0>(panMATStart) << " " << std::get<2>(panMATStart) << " " << std::get<3>(panMATStart) << std::endl; + // std::cout << std::get<0>(panMATStart) << " " << std::get<2>(panMATStart) << " " << std::get<3>(panMATStart) << std::endl; panmanUtils::Node* newRoot = extractPanMATSegmentHelper(root, panMATStart, panMATEnd, rootBlockStrand); @@ -2619,7 +2783,7 @@ void panmanUtils::Tree::extractPanMATSegment(kj::std::StdOutputStream& fout, int } void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder& nodesBuilder, size_t& nodeIndex) { - // std::cout << nodeIndex << std::endl; + // std::cout << nodeIndex << " " << root->identifier << std::endl; panman::Node::Builder n = nodesBuilder[nodeIndex++]; std::map< std::pair< int32_t, int32_t >, std::pair< std::vector< panman::NucMut::Builder >, int > > blockToMutations; std::map< std::pair< int32_t, int32_t >, bool > blockToInversion; @@ -2652,7 +2816,6 @@ void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder mutationsBuilder = n.initMutations(blockToMutations.size()); - // std::cout << "Mutations\n"; size_t blockToMutationsCount=0; for(auto &u: blockToMutations) { panman::Mutation::Builder mutation = mutationsBuilder[blockToMutationsCount++]; @@ -2686,7 +2849,6 @@ void panmanUtils::Tree::getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder annotationsBuilder = n.initAnnotations(root->annotations.size()); for(size_t i = 0; i < root->annotations.size(); i++) { annotationsBuilder.set(i,root->annotations[i]); @@ -3146,7 +3308,7 @@ void panmanUtils::Tree::printMutations(std::ostream& fout) { tbb::parallel_for_each(allNodes, [&](auto u) { // for (auto &u: allNodes) { std::map< std::tuple< std::string, int, int, int >, char > seqChar; - std::cout << u.first << "\t" << "\t" << countNodeID++; + // std::cout << u.first << "\t" << "\t" << countNodeID++; sequence_t st; blockExists_t bt; blockStrand_t bst; @@ -4592,17 +4754,6 @@ std::string panmanUtils::Tree::getStringFromReference(std::string reference, boo } -std::string panmanUtils::stripString(std::string s) { - while(s.length() && s[s.length() - 1] == ' ') { - s.pop_back(); - } - for(size_t i = 0; i < s.length(); i++) { - if(s[i] != ' ') { - return s.substr(i); - } - } - return s; -} std::string panmanUtils::stripGaps(const std::string sequenceString) { std::string result; @@ -4805,13 +4956,16 @@ int32_t panmanUtils::Tree::getUnalignedGlobalCoordinate(int32_t primaryBlockId, int32_t secondaryBlockId, int32_t pos, int32_t gapPos, const sequence_t& sequence, const blockExists_t& blockExists, const blockStrand_t& blockStrand, int circularOffset, bool* check) { - // std::cout << "P " << sequence.size() << " " << primaryBlockId << " " << secondaryBlockId << " " << pos << " " << gapPos << " " << circularOffset << " " << sequence[primaryBlockId].first[pos].first << std::endl; + std::cout << "P " << primaryBlockId << " " << secondaryBlockId << " " << pos << " " << gapPos << " " << circularOffset << " " << blockExists.size() << std::endl; *check = false; int ctr = 0; int ans = -1; int len = 0; for(size_t i = 0; i < blockExists.size(); i++) { - // std::cout << blockExists[i].first << " " << blockExists[i].second.size() << " " << blockStrand[i].first << " " << blockStrand[i].second.size() << std::endl; + std::cout << blockExists[i].first << std::endl; + std::cout << blockExists[i].second.size() << std::endl; + std::cout << blockStrand[i].first << std::endl; + std::cout << blockStrand[i].second.size() << std::endl; if(!blockExists[i].first) { continue; } @@ -4871,7 +5025,7 @@ int32_t panmanUtils::Tree::getUnalignedGlobalCoordinate(int32_t primaryBlockId, } } - // std::cout << "ANS: " << ans << " " << circularOffset << std::endl; + std::cout << "ANS: " << ans << " " << circularOffset << std::endl; ans -= circularOffset; if (ans == -1) { *check = true; @@ -5064,6 +5218,7 @@ panmanUtils::Tree::Tree(Node* newRoot, const std::vector< Block >& b, Node* current = q.front(); q.pop(); allNodes[current->identifier] = current; + // std::cout << current->identifier << std::endl; if(cs.find(current->identifier) != cs.end()) { circularSequences[current->identifier] = cs[current->identifier]; } @@ -5255,7 +5410,7 @@ panmanUtils::GfaGraph::GfaGraph(const std::vector< std::string >& pathNames, con intSequenceConsensus.push_back(b); } } - std::cout << seqCount << " " << intSequenceConsensus.size() << endl; + // std::cout << seqCount << " " << intSequenceConsensus.size() << endl; } // re-assigning IDs in fixed order @@ -5540,7 +5695,7 @@ panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData, panmanUtils::Node* ro std::cout << "Len of consensus: " << consensus.size() << std::endl; } seqCount++; - std::cout << seqCount << " " << intSequenceConsensusNew.size() << endl; + // std::cout << seqCount << " " << intSequenceConsensusNew.size() << endl; } // re-assigning IDs in fixed order @@ -5618,20 +5773,180 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg) { } } +size_t getNodeIDHelper(panmanUtils::Node* currentNode, panmanUtils::Node* node, bool found) { + size_t nodeID = 1; + if (currentNode->children.size() == 0) { + return 0; + } + if (currentNode->identifier == node->identifier) { + found = true; + return 1; + } + if (currentNode->isComMutHead) { + return 0; + } + if (found) return 0; + for (auto &n:currentNode->children) { + nodeID += getNodeIDHelper(n, node, found); + } + return nodeID; +} + +size_t getNodeID(panmanUtils::Node* currentNode, panmanUtils::Node* node){ + size_t nodeID = 1; + bool found = false; + for (auto &n:currentNode->children) { + nodeID += getNodeIDHelper(n, node, found); + } + return nodeID; +} + +std::pair newTreeIDNodeID(panmanUtils::Node* node){ + // std::cout << "handling node: " << node->identifier << std::endl; + panmanUtils::Node* currentNode = node->parent; + std::pair treeIDNodeID= std::make_pair("", -1); + + while(currentNode != nullptr){ + if (currentNode->isComMutHead){ + // std::cout << "Found Head: " << currentNode->identifier << " " << currentNode->treeIndex << std::endl; + // treeIDNodeID.first = "node_" + std::to_string(getNodeID(currentNode, node)); + treeIDNodeID.second = currentNode->treeIndex; + return treeIDNodeID; + } + currentNode = currentNode->parent; + } +} + +bool checkCorrectness(const std::unordered_map allNodes ,std::string sequenceId1_, std::string sequenceId2_){ + bool correct = true; + panmanUtils::Node* node1; + for(auto a: allNodes){ + if (a.first == sequenceId1_){ + node1 = a.second; + break; + } + } + + panmanUtils::Node* node2; + for(auto a: allNodes){ + if (a.first == sequenceId2_){ + node2 = a.second; + break; + } + } + while (node1->parent != nullptr){ + if (node1->identifier == node2->identifier){ + correct = false; + break; + } + node1 = node1->parent; + } + return correct; + +} + + + panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg, std::ifstream& mutationFile) { // std::cout << "I am here" << std::endl; for (auto& t: tg) { trees.push_back(*t); } + // std::cout << doPreOrderLoop(trees[0].root) << std::endl; + // std::cout << trees[0].allNodes.size() << std::endl; + + // Predetermine tree ids + // std::vector mutationType_; + // std::vector treeIndex1_; + // std::vector treeIndex2_; + // std::vector treeIndex3_; + // std::vector sequenceId1_; + // std::vector sequenceId2_; + // std::vector sequenceId3_; + // std::vector startPoint1_; + // std::vector endPoint1_; + // std::vector startPoint2_; + // std::vector endPoint2_; + + // int cMutCount = 0; + // int treeCount = 0; + // unordered_map< std::string, std::pair> treeIndexMap; + // std::string line; + + // // set root at head + // tg[0]->root->isComMutHead = true; + // tg[0]->root->treeIndex = treeCount; + + // while(getline(mutationFile, line, '\n')) { + // std::vector< std::string > tokens; + // stringSplit(line, '\t', tokens); + + + // try { + // mutationType_.push_back(tokens[0][0]); + // treeIndex1_.push_back(std::stoi(tokens[1])); + // sequenceId1_.push_back(tokens[2]); + // treeIndex2_.push_back(std::stoi(tokens[3])); + // sequenceId2_.push_back(tokens[4]); + // startPoint1_.push_back(std::stoi(tokens[5])); + // endPoint1_.push_back(std::stoi(tokens[6])); + // startPoint2_.push_back(std::stoi(tokens[7])); + // endPoint2_.push_back(std::stoi(tokens[8])); + // treeIndex3_.push_back(std::stoi(tokens[9])); + // sequenceId3_.push_back(tokens[10]); + // } catch (const std::invalid_argument& e) { + // std::cerr << "Invalid argument: " << e.what() << " in line: " << line << std::endl; + // exit; // Skip this line and continue with the next one + // } catch (const std::out_of_range& e) { + // std::cerr << "Out of range: " << e.what() << " in line: " << line << std::endl; + // exit; // Skip this line and continue with the next one + // } + + // // if sequenceId_1 is child of seqeuenceId_3, then the mutation is not correct + // bool correct = true; + // if (treeIndex1_[cMutCount] == treeIndex3_[cMutCount]) { + // bool correct1 = checkCorrectness(trees[treeIndex1_[cMutCount]].allNodes , sequenceId1_[cMutCount], sequenceId3_[cMutCount]); + // if (!correct1) correct = correct1; + // std::cout << correct << std::endl; + // } + // if (treeIndex2_[cMutCount] == treeIndex3_[cMutCount]) { + // bool correct2 = checkCorrectness(trees[treeIndex2_[cMutCount]].allNodes , sequenceId2_[cMutCount], sequenceId3_[cMutCount]); + // if (!correct2) correct = correct2; + // std::cout << correct << std::endl; + // } + + + // cMutCount++; + // treeCount++; + // tg[0]->allNodes[tokens[10]]->isComMutHead = true; + // tg[0]->allNodes[tokens[10]]->treeIndex = treeCount; + // if (treeCount >=2){ + // std::pair treeIDNodeID1 = newTreeIDNodeID(tg[0]->allNodes[sequenceId1_[treeCount-1]]); + // // sequenceId1_[treeCount-1] = treeIDNodeID1.first; + // treeIndex1_[treeCount-1] = treeIDNodeID1.second; + + // std::pair treeIDNodeID2 = newTreeIDNodeID(tg[0]->allNodes[sequenceId2_[treeCount-1]]); + // // sequenceId2_[treeCount-1] = treeIDNodeID2.first; + // treeIndex2_[treeCount-1] = treeIDNodeID2.second; + + // std::pair treeIDNodeID3 = newTreeIDNodeID(tg[0]->allNodes[sequenceId3_[treeCount-1]]); + // // sequenceId3_[treeCount-1] = treeIDNodeID3.first; + // treeIndex3_[treeCount-1] = treeIDNodeID3.second; + // } + // std::cout << mutationType_[cMutCount-1] << " " << treeIndex1_[cMutCount-1] << " " << sequenceId1_[cMutCount-1] << " " << treeIndex2_[cMutCount-1] << " " << sequenceId2_[cMutCount-1] << " " << startPoint1_[cMutCount-1] << " " << endPoint1_[cMutCount-1] << " " << startPoint2_[cMutCount-1] << " " << endPoint2_[cMutCount-1] << " " << treeIndex3_[cMutCount-1] << " " << sequenceId3_[cMutCount-1] << std::endl; + // } + + // exit(0); + // mutation file format: mutation type (H or R), tree_1 index, sequence_1 name, tree_2 index, sequence_2 name, start_point_1, end_point_1, start_point_2, end_point_2, tree_3 index (child tree), sequence_3 (child sequence) name std::string line; while(getline(mutationFile, line, '\n')) { std::vector< std::string > tokens; stringSplit(line, '\t', tokens); - for (auto a: tokens) { - std::cout << a << std::endl; - } + // for (auto a: tokens) { + // std::cout << a << std::endl; + // } char mutationType = tokens[0][0]; size_t treeIndex1 = std::stoll(tokens[1]); std::string sequenceId1 = tokens[2]; @@ -5645,11 +5960,25 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg, std::ifstream& mutat std::string sequenceId3 = tokens[10]; bool splitOccurred = false; - std::cout << sequenceId1 << ", " << sequenceId2 << ": " << sequenceId3 << std::endl; + // std::cout << sequenceId1 << ", " << sequenceId2 << ": " << sequenceId3 << std::endl; + + // for (int i = 0; i < cMutCount; i++) { + // std::cout << i << std::endl; + // char mutationType = mutationType_[i]; + // size_t treeIndex1 = treeIndex1_[i]; + // std::string sequenceId1 = sequenceId1_[i]; + // size_t treeIndex2 = treeIndex2_[i]; + // std::string sequenceId2 = sequenceId2_[i]; + // size_t startPoint1 = startPoint1_[i]; + // size_t endPoint1 = endPoint1_[i]; + // size_t startPoint2 = startPoint2_[i]; + // size_t endPoint2 = endPoint2_[i]; + // size_t treeIndex3 = treeIndex3_[i]; + // std::string sequenceId3 = sequenceId3_[i]; + // bool splitOccurred = false; if(treeIndex3 == treeIndex1 && treeIndex3 == treeIndex2) { // If all three sequences are from the same tree, split this tree - std::cout << "Performing Split" << std::endl; std::pair< panmanUtils::Tree, panmanUtils::Tree > parentAndChild = trees[treeIndex1].splitByComplexMutations(sequenceId3); splitOccurred = true; trees[treeIndex1] = parentAndChild.first; @@ -5669,8 +5998,26 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg, std::ifstream& mutat trees[treeIndex2] = parentAndChild.first; trees.push_back(parentAndChild.second); treeIndex3 = trees.size()-1; + } else if (!trees[treeIndex3].allNodes[sequenceId3]->isComMutHead) { + // If child is not a head + std::pair< panmanUtils::Tree, panmanUtils::Tree > parentAndChild = trees[treeIndex3].splitByComplexMutations(sequenceId3); + splitOccurred = true; + trees[treeIndex3] = parentAndChild.first; + trees.push_back(parentAndChild.second); + treeIndex3 = trees.size()-1; + } else if (trees[treeIndex3].allNodes[sequenceId3]->isComMutHead) { + // If child is a head + continue; } + // for (auto a: trees){ + // std::cout << a.allNodes.size() << std::endl; + // } + + // for (auto a: trees[1].allNodes){ + // std::cout << a.first << std::endl; + // } + sequence_t sequence1, sequence2; blockExists_t blockExists1, blockExists2; blockStrand_t blockStrand1, blockStrand2; @@ -5696,6 +6043,12 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< Tree* >& tg, std::ifstream& mutat complexMutations.emplace_back(mutationType, treeIndex1, treeIndex2, treeIndex3, sequenceId1, sequenceId2, sequenceId3, t_start1, t_end1, t_start2, t_end2); } + + // std::cout << doPreOrderLoop(trees[0].root) << std::endl; + + // std::cout << doPreOrderLoop(trees[1].root) << std::endl; + + // exit(0); } @@ -5735,7 +6088,7 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< std::ifstream >& treeFiles, std:: if(treeIndex3 == treeIndex1 && treeIndex3 == treeIndex2) { // If all three sequences are from the same tree, split this tree - std::cout << "Performing Split" << std::endl; + // std::cout << "Performing Split" << std::endl; std::pair< panmanUtils::Tree, panmanUtils::Tree > parentAndChild = trees[treeIndex1].splitByComplexMutations(sequenceId3); splitOccurred = true; trees[treeIndex1] = parentAndChild.first; @@ -5784,22 +6137,35 @@ panmanUtils::TreeGroup::TreeGroup(std::vector< std::ifstream >& treeFiles, std:: } } -panmanUtils::TreeGroup::TreeGroup(std::istream& fin) { - kj::std::StdInputStream kjInputStream(fin); - capnp::InputStreamMessageReader messageReader(kjInputStream); +panmanUtils::TreeGroup::TreeGroup(std::istream& fin, bool isOld) { + if (!isOld) { + kj::std::StdInputStream kjInputStream(fin); + capnp::InputStreamMessageReader messageReader(kjInputStream); - panman::TreeGroup::Reader TG = messageReader.getRoot(); + panman::TreeGroup::Reader TG = messageReader.getRoot(); - int count=0; - for (auto treeFromTG: TG.getTrees()){ - // std::cout << "Tree " << count++ << ".." << std::endl; - trees.emplace_back(treeFromTG); - } - count=0; - for (auto compMutFromTG: TG.getComplexMutations()){ - // std::cout << "Complex Mutation " << count++ << ".." << std::endl; - complexMutations.emplace_back(compMutFromTG); + int count=0; + for (auto treeFromTG: TG.getTrees()){ + // std::cout << "Tree " << count++ << ".." << std::endl; + trees.emplace_back(treeFromTG); + } + count=0; + for (auto compMutFromTG: TG.getComplexMutations()){ + // std::cout << "Complex Mutation " << count++ << ".." << std::endl; + complexMutations.emplace_back(compMutFromTG); + } + } else { + panmanOld::treeGroup TG; + if(!TG.ParseFromIstream(&fin)) { + throw std::invalid_argument("Could not read tree group from input file."); + } + for(int i = 0; i < TG.trees_size(); i++) { + trees.emplace_back(TG.trees(i)); + } + for(int i = 0; i < TG.complexmutations_size(); i++) { + complexMutations.emplace_back(TG.complexmutations(i)); + } } } @@ -5816,16 +6182,16 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { capnp::List::Builder treestoWriteBuilder = treeGroupToWrite.initTrees(trees.size()); size_t treesCount = 0; - // std::cout << "Writing Trees..." << std::endl; + std::cout << "Writing Trees..." << std::endl; for(auto& tree: trees) { std::cout << "Tree Count:" << treesCount << "..." << std::endl; panman::Tree::Builder treeToWrite = treestoWriteBuilder[treesCount++]; Node* node = tree.root; - capnp::List::Builder nodesBuilder = treeToWrite.initNodes(tree.allNodes.size()); + capnp::List::Builder nodesBuilder = treeToWrite.initNodes(tree.allNodes.size()+1); size_t nodeIndex=0; - // std::cout << "Printting Nodes\n"; + std::cout << tree.allNodes.size() << std::endl; tree.getNodesPreorder(node, nodesBuilder, nodeIndex); assert(nodeIndex == tree.allNodes.size()); @@ -5926,7 +6292,7 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { capnp::List::Builder complexMutBuilder = treeGroupToWrite.initComplexMutations(complexMutations.size()); size_t cmplxMutCount=0; - // std::cout << "Writing Complex Mutations..." << std::endl; + std::cout << "Writing Complex Mutations..." << std::endl; for(auto cm: complexMutations) { panman::ComplexMutation::Builder cmBuilder = complexMutBuilder[cmplxMutCount++]; cm.toCapnProto(cmBuilder); @@ -5943,6 +6309,7 @@ void panmanUtils::TreeGroup::writeToFile(kj::std::StdOutputStream& fout) { void panmanUtils::TreeGroup::printComplexMutations(std::ostream& fout) { for(const auto& u: complexMutations) { + // std::cout << "Printing Complex Mutations: " << u.mutationType << std::endl; sequence_t s1, s2; blockExists_t b1, b2; blockStrand_t str1, str2; @@ -5961,23 +6328,27 @@ void panmanUtils::TreeGroup::printComplexMutations(std::ostream& fout) { co2 = trees[u.treeIndex2].circularSequences[u.sequenceId2]; } - fout << u.mutationType - << " " << u.treeIndex1 - << " " << u.sequenceId1 - << " " << u.treeIndex2 - << " " << u.sequenceId2 - << " " << trees[u.treeIndex1].getUnalignedGlobalCoordinate(u.primaryBlockIdStart1, + fout << trees[u.treeIndex1].getUnalignedGlobalCoordinate(u.primaryBlockIdStart1, u.secondaryBlockIdStart1, u.nucPositionStart1, u.nucGapPositionStart1, s1, b1, - str1, co1) - << " " << trees[u.treeIndex1].getUnalignedGlobalCoordinate(u.primaryBlockIdEnd1, - u.secondaryBlockIdEnd1, u.nucPositionEnd1, u.nucGapPositionEnd1, s1, b1, - str1, co1) - << " " << trees[u.treeIndex2].getUnalignedGlobalCoordinate(u.primaryBlockIdStart2, - u.secondaryBlockIdStart2, u.nucPositionStart2, u.nucGapPositionStart2, s2, b2, - str2, co2) - << " " << trees[u.treeIndex2].getUnalignedGlobalCoordinate(u.primaryBlockIdEnd2, - u.secondaryBlockIdEnd2, u.nucPositionEnd2, u.nucGapPositionEnd2, s2, b2, - str2, co2) - << " " << u.treeIndex3 << " " << u.sequenceId3 << "\n"; + str1, co1); + + // fout << u.mutationType + // << " " << u.treeIndex1 + // << " " << u.sequenceId1 + // << " " << u.treeIndex2 + // << " " << u.sequenceId2 + // << " " << trees[u.treeIndex1].getUnalignedGlobalCoordinate(u.primaryBlockIdStart1, + // u.secondaryBlockIdStart1, u.nucPositionStart1, u.nucGapPositionStart1, s1, b1, + // str1, co1) + // << " " << trees[u.treeIndex1].getUnalignedGlobalCoordinate(u.primaryBlockIdEnd1, + // u.secondaryBlockIdEnd1, u.nucPositionEnd1, u.nucGapPositionEnd1, s1, b1, + // str1, co1) + // << " " << trees[u.treeIndex2].getUnalignedGlobalCoordinate(u.primaryBlockIdStart2, + // u.secondaryBlockIdStart2, u.nucPositionStart2, u.nucGapPositionStart2, s2, b2, + // str2, co2) + // << " " << trees[u.treeIndex2].getUnalignedGlobalCoordinate(u.primaryBlockIdEnd2, + // u.secondaryBlockIdEnd2, u.nucPositionEnd2, u.nucGapPositionEnd2, s2, b2, + // str2, co2) + // << " " << u.treeIndex3 << " " << u.sequenceId3 << "\n"; } } diff --git a/src/panman.hpp b/src/panman.hpp index 28da979..90e04ec 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -12,6 +12,7 @@ #include #include "panman.capnp.h" +#include "panman.pb.h" #include "common.hpp" #include @@ -185,6 +186,26 @@ struct NucMut { } } + NucMut(panmanOld::nucMut mutation, int64_t blockId, bool blockGapExist) { + nucPosition = mutation.nucposition(); + primaryBlockId = (blockId >> 32); + mutInfo = (mutation.mutinfo() & 0xFF); + nucs = (mutation.mutinfo() >> 8); + nucs = ((nucs) << (24 - (mutInfo >> 4)*4)); + + if(blockGapExist) { + secondaryBlockId = (blockId & 0xFFFFFFFF); + } else { + secondaryBlockId = -1; + } + + if(mutation.nucgapexist()) { + nucGapPosition = mutation.nucgapposition(); + } else { + nucGapPosition = -1; + } + } + }; @@ -214,6 +235,19 @@ struct BlockMut { inversion = mutation.getBlockInversion(); } + void loadFromProtobuf(panmanOld::mutation mutation) { + primaryBlockId = (mutation.blockid() >> 32); + if(mutation.blockgapexist()) { + secondaryBlockId = (mutation.blockid() & 0xFFFFFFFF); + } else { + secondaryBlockId = -1; + } + blockMutInfo = mutation.blockmutinfo(); + // Whether the mutation is a block inversion or not. Inversion is marked by + // `blockMutInfo = deletion` and `inversion = true` + inversion = mutation.blockinversion(); + } + BlockMut(size_t blockId, std::pair< BlockMutationType, bool > type, int secondaryBId = -1) { primaryBlockId = blockId; secondaryBlockId = secondaryBId; @@ -280,6 +314,8 @@ class Node { std::vector< NucMut > nucMutation; std::vector< BlockMut > blockMutation; std::vector< std::string > annotations; + bool isComMutHead = false; + int treeIndex = -1; Node(std::string id, float len); Node(std::string id, Node* par, float len); @@ -297,6 +333,9 @@ class Tree { void assignMutationsToNodes(Node* root, size_t& currentIndex, std::vector& storedNode); + void assignMutationsToNodes(Node* root, size_t& currentIndex, + std::vector< panmanOld::node >& nodes); + // Get the total number of mutations of given type int getTotalParsimonyParallel(NucMutationType nucMutType, BlockMutationType blockMutType = NONE); @@ -414,6 +453,7 @@ class Tree { std::unordered_map< std::string, Node* > allNodes; Tree(const panman::Tree::Reader& mainTree); + Tree(const panmanOld::tree& mainTree); Tree(std::istream& fin, FILE_TYPE ftype = FILE_TYPE::PANMAT); Tree(std::ifstream& fin, std::ifstream& secondFin, FILE_TYPE ftype = FILE_TYPE::GFA, std::string reference = ""); @@ -425,8 +465,10 @@ class Tree { std::unordered_map< std::string, int >& ri, std::unordered_map< std::string, bool >& si, const BlockGapList& bgl); + void protoMATToTree(const panman::Tree::Reader& mainTree); + void protoMATToTree(const panmanOld::tree& mainTree); // Fitch Algorithm on Nucleotide mutations int nucFitchForwardPass(Node* node, std::unordered_map< std::string, int >& states, int refState=-1); @@ -728,6 +770,107 @@ struct ComplexMutation { // return cm; } + ComplexMutation(panmanOld::complexMutation cm) { + mutationType = (cm.mutationtype()? 'H': 'R'); + treeIndex1 = cm.treeindex1(); + treeIndex2 = cm.treeindex2(); + treeIndex3 = cm.treeindex3(); + sequenceId1 = cm.sequenceid1(); + sequenceId2 = cm.sequenceid2(); + sequenceId3 = cm.sequenceid3(); + + primaryBlockIdStart1 = (cm.blockidstart1() >> 32); + secondaryBlockIdStart1 = (cm.blockgapexiststart1()? + (cm.blockidstart1()&(0xFFFFFFFF)): -1); + nucPositionStart1 = cm.nucpositionstart1(); + nucGapPositionStart1 = (cm.nucgapexiststart1()? (cm.nucgappositionstart1()) : -1); + + primaryBlockIdStart2 = (cm.blockidstart2() >> 32); + secondaryBlockIdStart2 = (cm.blockgapexiststart2()? + (cm.blockidstart2()&(0xFFFFFFFF)): -1); + nucPositionStart2 = cm.nucpositionstart2(); + nucGapPositionStart2 = (cm.nucgapexiststart2()? (cm.nucgappositionstart2()) : -1); + + primaryBlockIdEnd1 = (cm.blockidend1() >> 32); + secondaryBlockIdEnd1 = (cm.blockgapexistend1()? (cm.blockidend1()&(0xFFFFFFFF)): -1); + nucPositionEnd1 = cm.nucpositionend1(); + nucGapPositionEnd1 = (cm.nucgapexistend1()? (cm.nucgappositionend1()) : -1); + + primaryBlockIdEnd2 = (cm.blockidend2() >> 32); + secondaryBlockIdEnd2 = (cm.blockgapexistend2()? (cm.blockidend2()&(0xFFFFFFFF)): -1); + nucPositionEnd2 = cm.nucpositionend2(); + nucGapPositionEnd2 = (cm.nucgapexistend2()? (cm.nucgappositionend2()) : -1); + } + + panmanOld::complexMutation toProtobuf() { + panmanOld::complexMutation cm; + cm.set_mutationtype(mutationType == 'H'); + cm.set_treeindex1(treeIndex1); + cm.set_treeindex2(treeIndex2); + cm.set_treeindex3(treeIndex3); + cm.set_sequenceid1(sequenceId1); + cm.set_sequenceid2(sequenceId2); + cm.set_sequenceid3(sequenceId3); + + if(secondaryBlockIdStart1 != -1) { + cm.set_blockgapexiststart1(true); + cm.set_blockidstart1(((int64_t)primaryBlockIdStart1 << 32)+secondaryBlockIdStart1); + } else { + cm.set_blockgapexiststart1(false); + cm.set_blockidstart1(((int64_t)primaryBlockIdStart1 << 32)); + } + cm.set_nucpositionstart1(nucPositionStart1); + + if(nucGapPositionStart1 != -1) { + cm.set_nucgapexiststart1(true); + cm.set_nucgappositionstart1(nucGapPositionStart1); + } + + if(secondaryBlockIdStart2 != -1) { + cm.set_blockgapexiststart2(true); + cm.set_blockidstart2(((int64_t)primaryBlockIdStart2 << 32)+secondaryBlockIdStart2); + } else { + cm.set_blockgapexiststart2(false); + cm.set_blockidstart2(((int64_t)primaryBlockIdStart2 << 32)); + } + cm.set_nucpositionstart2(nucPositionStart2); + + if(nucGapPositionStart2 != -1) { + cm.set_nucgapexiststart2(true); + cm.set_nucgappositionstart2(nucGapPositionStart2); + } + + if(secondaryBlockIdEnd1 != -1) { + cm.set_blockgapexistend1(true); + cm.set_blockidend1(((int64_t)primaryBlockIdEnd1 << 32)+secondaryBlockIdEnd1); + } else { + cm.set_blockgapexistend1(false); + cm.set_blockidend1(((int64_t)primaryBlockIdEnd1 << 32)); + } + cm.set_nucpositionend1(nucPositionEnd1); + + if(nucGapPositionEnd1 != -1) { + cm.set_nucgapexistend1(true); + cm.set_nucgappositionend1(nucGapPositionEnd1); + } + + if(secondaryBlockIdEnd2 != -1) { + cm.set_blockgapexistend2(true); + cm.set_blockidend2(((int64_t)primaryBlockIdEnd2 << 32)+secondaryBlockIdEnd2); + } else { + cm.set_blockgapexistend2(false); + cm.set_blockidend2(((int64_t)primaryBlockIdEnd2 << 32)); + } + cm.set_nucpositionend2(nucPositionEnd2); + + if(nucGapPositionEnd2 != -1) { + cm.set_nucgapexistend2(true); + cm.set_nucgappositionend2(nucGapPositionEnd2); + } + + return cm; + } + }; // Data structure to represent PanMAN @@ -738,7 +881,7 @@ class TreeGroup { // List of complex mutations linking PanMATs std::vector< ComplexMutation > complexMutations; - TreeGroup(std::istream& fin); + TreeGroup(std::istream& fin, bool isOld = false); // List of PanMAT files and a file with all the complex mutations relating these files TreeGroup(std::vector< std::ifstream >& treeFiles, std::ifstream& mutationFile); TreeGroup(std::vector< Tree* >& t); diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index d5ed8cd..6bafc69 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -157,6 +157,7 @@ void setupOptionDescriptions() { ("printRoot", "Print root sequence") ("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") ("toUsher", "Convert a PanMAT in PanMAN to Usher-MAT") + ("protobuf2capnp", "Converts a Google Protobuf PanMAN to Capn' Proto PanMAN") ("low-mem-mode", "Perform Fitch Algrorithm in batch to save memory consumption") ("reference,n", po::value< std::string >(), "Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required)") @@ -853,6 +854,22 @@ void aa(panmanUtils::TreeGroup *TG, po::variables_map &globalVm, std::ofstream & if(globalVm.count("output-file")) outputFile.close(); } +void protobuf2capnp(panmanUtils::TreeGroup *TG, po::variables_map &globalVm) { + std::string fileName = globalVm["input-panman"].as< std::string >(); + std::ifstream inputFile(fileName); + boost::iostreams::filtering_streambuf< boost::iostreams::input> inPMATBuffer; + inPMATBuffer.push(boost::iostreams::lzma_decompressor()); + inPMATBuffer.push(inputFile); + std::istream inputStream(&inPMATBuffer); + + std::cout << "starting reading panman" << std::endl; + TG = new panmanUtils::TreeGroup(inputStream, true); + inputFile.close(); + + writePanMAN(globalVm, TG); + +} + void createNet(po::variables_map &globalVm, std::ofstream &outputFile, std::streambuf * buf) { // Create PanMAN from list of PanMAT files and a complex mutation file listing the complex // mutations relating these PanMATs @@ -872,7 +889,6 @@ void createNet(po::variables_map &globalVm, std::ofstream &outputFile, std::stre std::vector< std::ifstream > files; for(auto u: fileNames) { - std::cout << u << std::endl; files.emplace_back(u); } @@ -891,12 +907,12 @@ void createNet(po::variables_map &globalVm, std::ofstream &outputFile, std::stre tg.push_back(&TG->trees[i]); } - panmanUtils::TreeGroup* TG_new = new panmanUtils::TreeGroup(tg, mutationFile); auto treeBuiltEnd = std::chrono::high_resolution_clock::now(); std::chrono::nanoseconds treeBuiltTime = treeBuiltEnd - treeBuiltStart; std::cout << "Data load time: " << treeBuiltTime.count() << " nanoseconds \n"; + panmanUtils::TreeGroup* TG_new = new panmanUtils::TreeGroup(tg, mutationFile); mutationFile.close(); for(auto& u: files) { u.close(); @@ -1139,6 +1155,8 @@ void parseAndExecute(int argc, char* argv[]) { if(globalVm.count("help")) { std::cout << globalDesc; return; + } else if (globalVm.count("protobuf2capnp")) { + protobuf2capnp(TG, globalVm); } else if(globalVm.count("input-panmat")) { // Load PanMAT file directly into memory @@ -1338,7 +1356,6 @@ void parseAndExecute(int argc, char* argv[]) { writePanMAN(globalVm, TG); } else if (globalVm.count("create-network")) { - std::cout << "Entering here" << std::endl; std::ofstream outputFile; std::streambuf * buf; createNet(globalVm, outputFile, buf); From 6ec428b39fa368559036e6bfaf1fa94821a25d52 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 2 Jan 2025 22:24:16 -0800 Subject: [PATCH 081/103] google proto to capn proto --- CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 941fdd5..e8b419f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required (VERSION 3.8) project(panmanUtils) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -Wall -pipe -g -ggdb3 -Wno-unused-function -Wno-deprecated-declarations -lboost_filesystem -lboost_system -lboost_program_options -w") set(CMAKE_INCLUDE_CURRENT_DIR ON) @@ -19,7 +19,6 @@ include_directories(${Protobuf_INCLUDE_DIRS}) # add capn proto - find_program(CAPNP_EXECUTABLE capnp REQUIRED) find_program(CAPNPC_CXX_EXECUTABLE capnpc-c++ REQUIRED) find_package(CapnProto REQUIRED) @@ -59,7 +58,7 @@ if(DEFINED CapnProto_PATH) protouf_generate( LANGUAGE cpp TARGET panmanUtils - PROTOS usher.proto) + PROTOS usher.proto panman.proto) else() capnp_generate_cpp( @@ -68,7 +67,7 @@ else() protobuf_generate_cpp( PROTO_SRCS PROTO_HDRS - usher.proto) + usher.proto panman.proto) add_executable(panmanUtils ${PANMAT_SRCS} From 8c1c7de920cedb282f86679574650d0b6fa2ec1a Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 2 Jan 2025 22:25:16 -0800 Subject: [PATCH 082/103] google proto to capn proto --- panman.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/panman.proto b/panman.proto index 89f5597..03f588c 100644 --- a/panman.proto +++ b/panman.proto @@ -1,6 +1,6 @@ syntax = "proto3"; -package panman; +package panmanOld; message nucMut { From c8fd657d6b0f252030aab0cbdceda7f17042761a Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 9 Jan 2025 12:30:27 -0800 Subject: [PATCH 083/103] updated workflow --- workflows/Snakefile | 141 ++++++++++++++++++++++++++++++--- workflows/scripts/splitSeqs.py | 2 +- workflows/scripts/wfmash.sh | 63 +++++++++++++++ 3 files changed, 192 insertions(+), 14 deletions(-) create mode 100755 workflows/scripts/wfmash.sh diff --git a/workflows/Snakefile b/workflows/Snakefile index 82c5fe0..39fa797 100644 --- a/workflows/Snakefile +++ b/workflows/Snakefile @@ -1,5 +1,5 @@ ''' -This is a snakemake workflow for building PanMAN from PanGraph, GFA, or MSA. +This is a snakemake workflow for building PanMAN from alignments (PanGraph, GFA, or MSA) or fragmented assembly. Users can run workflow as: @@ -10,25 +10,82 @@ Building PanMAN from PGGB Alignment Building PanMAN from MAFFT Alignment snakemake --use-conda --cores [num threads] --config RUNTYPE="msa" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] -Note: This workflow uses MashTree to build tree input for panmanUtils when building panman from gfa or msa. +Building PanMAN from Fragmented Assembly + snakemake --use-conda --cores [num threads] --config ASSEM="frag" RUNTYPE="pangraph/gfa/msa" REF="[user_fa]" TARGET="[target.txt]" SEQ_COUNT=[haplotype_count] + +Note: This workflow uses MashTree to build tree input for panmanUtils when building panman from GFA or MSA. ''' +def assem_rule(config): + assembly_rule = config.get("ASSEM", None) + if assembly_rule == "frag": + return "output/input_concat.fa" + def config_select(config): target_rule = config.get("RUNTYPE", None) + assembly_rule = config.get("ASSEM", None) - if target_rule == "pangraph": - return "output/pangraph.aln" - elif target_rule == "gfa": - return "output/pggb.aln" - elif target_rule == "msa": - return "output/mafft.aln" + if assembly_rule == "frag": + if target_rule == "pangraph": + return "output/frag_pangraph.aln" + elif target_rule == "gfa": + return "output/frag_pggb.aln" + elif target_rule == "msa": + return "output/frag_mafft.aln" + else: + raise ValueError(f"Unknown target rule '{target_rule}'. Choose from 'pangraph', 'gfa', or 'msa'.") else: - raise ValueError(f"Unknown target rule '{target_rule}'. Choose from 'pangraph', 'gfa', or 'msa'.") + if target_rule == "pangraph": + return "output/pangraph.aln" + elif target_rule == "gfa": + return "output/pggb.aln" + elif target_rule == "msa": + return "output/mafft.aln" + else: + raise ValueError(f"Unknown target rule '{target_rule}'. Choose from 'pangraph', 'gfa', or 'msa'.") + rule all: input: + assem_rule(config), config_select(config) +rule wfmash: + input: + ref = config["REF"], + target = config["TARGET"] + output: + concat_file = "output/input_concat.fa" + threads: + threads = 32 + shell: + ''' + mkdir -p output/concat + scripts/wfmash.sh {input.ref} {input.target} output/concat + cat output/concat/* > {output.concat_file} + ''' + +rule pangraph_frag: + input: + "output/input_concat.fa" + output: + aln = "output/frag_pangraph.aln", + newick = "output/pangraph.nwk", + panman = "panman/out.panman" + threads: + threads = 32 + shell: + ''' + echo "Building Alignment with PanGraph..." + export JULIA_NUM_THREADS={threads} + mkdir -p output + pangraph build {input[0]} > {output.aln} 2> {output.newick} + awk '/tree/ {{split($0,a,"tree: "); print a[2]}}' {output.newick} > temp.newick && mv temp.newick {output.newick} + + echo "Building PanMAN from Pangraph alignment..." + ../build/panmanUtils -P {output.aln} -N {output.newick} -o out + ''' + rule pangraph: input: config["FASTA"] @@ -50,6 +107,23 @@ rule pangraph: ../build/panmanUtils -P {output.aln} -N {output.newick} -o out ''' +rule mashtree_frag: + input: + "output/input_concat.fa" + output: + newick = "output/mashtree.nwk" + threads: + threads = 32 + shell: + ''' + echo "Building Tree with MashTree..." + mkdir -p output + python3 scripts/splitSeqs.py {input[0]} temp + mashtree --numcpus {threads} temp/* > {output.newick} + python3 scripts/updateNewick.py {output.newick} temp_dir {output.newick} + rm -r temp temp_dir + ''' + rule mashtree: input: config["FASTA"] @@ -63,13 +137,31 @@ rule mashtree: mkdir -p output python3 scripts/splitSeqs.py {input[0]} temp mashtree --numcpus {threads} temp/* > {output.newick} - python3 scripts/updateNewick.py {output.newick} /temp {output.newick} - rm -r temp /temp + python3 scripts/updateNewick.py {output.newick} temp_dir {output.newick} + rm -r temp temp_dir + ''' + +rule mafft_frag: + input: + fasta = "output/input_concat.fa", + newick = "output/mashtree.nwk" + output: + aln = "output/frag_mafft.aln", + panman = "panman/out.panman" + threads: + threads = 32 + shell: + ''' + echo "Building Alignment with MAFFT..." + mafft --auto {input.fasta} > {output.aln} + + echo "Building PanMAN from MAFFT alignment..." + ../build/panmanUtils -M {output.aln} -N {input.newick} -o out ''' rule mafft: input: - config["FASTA"], + fasta = config["FASTA"], newick = "output/mashtree.nwk" output: aln = "output/mafft.aln", @@ -79,12 +171,35 @@ rule mafft: shell: ''' echo "Building Alignment with MAFFT..." - mafft --auto {input.newick} > {output.aln} + mafft --auto {input.fasta} > {output.aln} echo "Building PanMAN from MAFFT alignment..." ../build/panmanUtils -M {output.aln} -N {input.newick} -o out ''' +rule pggb_frag: + input: + fasta = "output/input_concat.fa", + newick = "output/mashtree.nwk" + output: + aln = "output/frag_pggb.aln", + panman = "panman/out.panman" + threads: + threads = 32 + params: + sq = config["SEQ_COUNT"] + shell: + ''' + echo "Building Alignment with PGGB..." + samtools faidx {input.fasta} + pggb --threads {threads} -i {input.fasta} -o pggb_temp -n {params.sq} + mv pggb_temp/*final.gfa {output.aln} + rm -r pggb_temp + + echo "Building PanMAN from PGGB alignment..." + ../build/panmanUtils -G {output.aln} -N {input.newick} -o out + ''' + rule pggb: input: fasta = config["FASTA"], diff --git a/workflows/scripts/splitSeqs.py b/workflows/scripts/splitSeqs.py index 18ad9be..bea8a64 100644 --- a/workflows/scripts/splitSeqs.py +++ b/workflows/scripts/splitSeqs.py @@ -5,7 +5,7 @@ def splitFasta(input_file, output_dir): seqNameMap="" - ff = open("/temp", "w") + ff = open("temp_dir", "w") if not os.path.exists(output_dir): os.makedirs(output_dir) diff --git a/workflows/scripts/wfmash.sh b/workflows/scripts/wfmash.sh new file mode 100755 index 0000000..c26355c --- /dev/null +++ b/workflows/scripts/wfmash.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +ref="$1" +query_files="$2" +output_dir="$3" + +count=0 + +while IFS= read -r filename; do + basename="${filename%.*}" + query=$filename + order_file="order.txt" + wfmash_out=".txt" + final_fasta=$output_dir/out_${count}.fa + + wfmash $ref $query -m -p 95 -N -t 16 > $wfmash_out + + sort -k9,9n $wfmash_out | awk '{print $1}' > "$order_file" + + echo "Segment names sorted and saved to $order_file" + + concat_fasta() { + local fasta_file="$1" + local order_file="$2" + local output_file="$3" + + declare -A sequences + + local current_seq="" + while IFS= read -r line || [[ -n $line ]]; do + # Remove '\n' from the line + line=$(echo "$line" | tr -d '\n') + + if [[ $line == ">"* ]]; then + # Start of a new sequence header + current_seq=${line#>} + echo "$current_seq" + sequences["$current_seq"]="" + else + # Append the line to the current sequence + sequences["$current_seq"]+="$line" + fi + done < "$fasta_file" + + > "$output_file" + echo ">$basename" >> "$output_file" + output_data="" + while IFS= read -r seq_name || [[ -n $seq_name ]]; do + if [[ -n ${sequences["$seq_name"]} ]]; then + output_data+="${sequences["$seq_name"]}"$'\n' + else + echo "Warning: Sequence '$seq_name' not found in FASTA file" >&2 + fi + done < "$order_file" + echo -n "$output_data" >> "$output_file" + + echo "Concatenated sequences written to $output_file" + } + + concat_fasta $query $order_file $final_fasta + count=$((count + 1)) + +done < "$query_files" From e0ae4493118714dd499c44db2d32abeed9b49ab8 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 9 Jan 2025 13:16:41 -0800 Subject: [PATCH 084/103] annotation delim changed to tab --- src/annotate.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/annotate.cpp b/src/annotate.cpp index ff219bf..a17c8ee 100644 --- a/src/annotate.cpp +++ b/src/annotate.cpp @@ -2,13 +2,14 @@ void panmanUtils::Tree::annotate(std::ifstream& fin) { std::string line; + char delim = '\t'; while(getline(fin, line)) { std::string word; std::string nodeId; // Extract node ID size_t i = 0; - for(; i < line.length() && line[i]!=','; i++) { + for(; i < line.length() && line[i]!=delim; i++) { word+=line[i]; } @@ -41,7 +42,7 @@ void panmanUtils::Tree::annotate(std::ifstream& fin) { // Extract annotations for(; i < line.length(); i++) { - if(line[i] != ',') { + if(line[i] != delim) { word += line[i]; } else { word = stripString(word); From 944094ddb3d36ea61550db6ea7dcce98c5f35bac Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 9 Jan 2025 13:19:25 -0800 Subject: [PATCH 085/103] updated description of panmanUtils --- src/panmanUtils.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 6bafc69..8deade6 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -129,7 +129,7 @@ void setupOptionDescriptions() { globalDesc.add_options() ("help,h", "Print help messages") ("input-panman,I", po::value< std::string >(), "Input PanMAN file path") - ("input-panmat,T", po::value< std::string >(), "Input PanMAT file path") + // ("input-panmat,T", po::value< std::string >(), "Input PanMAT file path") ("input-pangraph,P", po::value< std::string >(), "Input PanGraph JSON file to build a PanMAN") ("input-gfa,G", po::value< std::string >(), "Input GFA file to build a PanMAN") ("input-msa,M", po::value< std::string >(), "Input MSA file (FASTA format) to build a PanMAN") @@ -140,31 +140,31 @@ void setupOptionDescriptions() { ("summary,s", "Print PanMAN summary") ("newick,t", "Print newick string of all trees in a PanMAN") - ("fasta,f", "Print tip/internal sequences (FASTA format)") + ("fasta,f", "Print tip sequences (FASTA format)") // ("fasta-fast", "Print tip/internal sequences (FASTA format)") ("fasta-aligned,m", "Print MSA of sequences for each PanMAT in a PanMAN (FASTA format)") ("subnet,b", "Extract subnet of given PanMAN to a new PanMAN file based on the list of nodes provided in the input-file") ("vcf,v", "Print variations of all sequences from any PanMAT in a PanMAN (VCF format)") ("gfa,g", "Convert any PanMAT in a PanMAN to a GFA file") ("maf,w", "Print m-WGA for each PanMAT in a PanMAN (MAF format)") - ("annotate,a", "Annotate nodes of the input PanMAN based on the list provided in the input-file") + ("annotate,a", "Annotate nodes of the input PanMAN based on the list provided in the input-file (TSV)") ("reroot,r", "Reroot a PanMAT in a PanMAN based on the input sequence id (--reference)") - ("aa-translation,v", "Extract amino acid translations in tsv file") + ("aa-translation,v", "Extract amino acid translations in TSV file") ("extended-newick,e", "Print PanMAN's network in extended-newick format") ("printMutations,p", "Create PanMAN with network of trees from single or multiple PanMAN files") ("acr,q", "ACR method [fitch(default), mppa]") ("index",po::value< bool >(0), "Generating indexes and print sequence (passed as reference) between x:y") - ("printRoot", "Print root sequence") - ("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") + // ("printRoot", "Print root sequence") + // ("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") ("toUsher", "Convert a PanMAT in PanMAN to Usher-MAT") - ("protobuf2capnp", "Converts a Google Protobuf PanMAN to Capn' Proto PanMAN") + // ("protobuf2capnp", "Converts a Google Protobuf PanMAN to Capn' Proto PanMAN") ("low-mem-mode", "Perform Fitch Algrorithm in batch to save memory consumption") ("reference,n", po::value< std::string >(), "Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required)") ("start,s", po::value< int64_t >(), "Start coordinate of protein translation/Start coordinate for indexing") ("end,e", po::value< int64_t >(), "End coordinate of protein translation/End coordinate for indexing") ("treeID,d", po::value< std::string >(), "Tree ID, required for --vcf") - ("tree-group", po::value< std::vector< std::string > >()->multitoken(), "File paths of PMATs to generate tree group") + // ("tree-group", po::value< std::vector< std::string > >()->multitoken(), "File paths of PMATs to generate tree group") ("input-file,i", po::value< std::string >(), "Path to the input file, required for --subnet, --annotate, and --create-network") ("output-file,o", po::value< std::string >(), "Prefix of the output file name") ("threads", po::value< std::int32_t >(), "Number of threads") From 1521f3b87eb9a835f7ad4f7b2097c2735f9f01d2 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 9 Jan 2025 13:21:06 -0800 Subject: [PATCH 086/103] updated description of panmanUtils --- src/panmanUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 8deade6..9d194a7 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -161,8 +161,8 @@ void setupOptionDescriptions() { ("low-mem-mode", "Perform Fitch Algrorithm in batch to save memory consumption") ("reference,n", po::value< std::string >(), "Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required)") - ("start,s", po::value< int64_t >(), "Start coordinate of protein translation/Start coordinate for indexing") - ("end,e", po::value< int64_t >(), "End coordinate of protein translation/End coordinate for indexing") + ("start,x", po::value< int64_t >(), "Start coordinate of protein translation/Start coordinate for indexing") + ("end,y", po::value< int64_t >(), "End coordinate of protein translation/End coordinate for indexing") ("treeID,d", po::value< std::string >(), "Tree ID, required for --vcf") // ("tree-group", po::value< std::vector< std::string > >()->multitoken(), "File paths of PMATs to generate tree group") ("input-file,i", po::value< std::string >(), "Path to the input file, required for --subnet, --annotate, and --create-network") From d12f84420317641db68fb7f7c06fc91cb900b66f Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 9 Jan 2025 13:40:56 -0800 Subject: [PATCH 087/103] updated docs --- docs/construction.md | 61 ------- docs/index.md | 401 ++++++++++++++++++++++++++++--------------- docs/install.md | 74 -------- docs/utils.md | 210 ---------------------- mkdocs.yml | 6 +- 5 files changed, 268 insertions(+), 484 deletions(-) diff --git a/docs/construction.md b/docs/construction.md index 7ec48fb..b0dac65 100644 --- a/docs/construction.md +++ b/docs/construction.md @@ -1,63 +1,2 @@ -# PanMAN Construction -Here, we will learn to build PanMAN from various input formats. - -**Step 0:** The Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error -```bash -# enter into the panman directory (assuming $PANMAN directs to the panman repository directory) -cd $PANMAN_HOME -``` -```bash -cd $PANMAN_HOME/build -./panmanUtils --help -``` -### Building PanMAN from PanGraph - -**Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom PanGraph (JSON) and tree topology (Newick format) files to build a panman. - -**Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: - -```bash -cd $PANMAN_HOME/build -./panmanUtils -P $PANMAN_HOME/test/sars_20.json -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 -``` -The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. - -### Building PanMAN from GFA - -**Step 1:** Check if `sars_20.gfa` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom GFA and tree topology (Newick format) files to build a panman. - -**Step 2:** Run panmanUtils with the following command to build a panman from GFA: - -```bash -cd $PANMAN_HOME/build -./panmanUtils -G $PANMAN_HOME/test/sars_20.gfa -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 -``` -The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. - -### Building PanMAN from MSA (FASTA format) - -**Step 1:** Check if `sars_20.msa` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom MSA (FASTA format) and tree topology (Newick format) files to build a panman. - -**Step 2:** Run panmanUtils to build a panman from GFA using the following commands: - -```bash -cd $PANMAN_HOME/build -./panmanUtils -M $PANMAN_HOME/test/sars_20.msa -N $PANMAN_HOME/test/sars_20.nwk -O sars_20 -``` -The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. - -### Building PanMAN from raw genome sequences (Snakemake Workflow) -We provide a Snakemake workflow to construct PanMANs from raw sequences (FASTA format). - -!!!Note - The Snakemake workflow uses various tools such as PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively and it is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)). - -**Step 1:** Run the following command to construct a panman from raw sequences. - -```bash -cd $PANMAN_HOME/workflows -conda activate snakemake -snakemake --use-conda --cores [num threads] --config RUNTYPE="[pangraph/gfa/msa]" FASTA="[user_fasta]" SEQ_COUNT=[haplotype_count] -``` \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 6750fab..a5a8956 100644 --- a/docs/index.md +++ b/docs/index.md @@ -37,230 +37,359 @@ PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](ht ## Video Tutorial TBA -## Contributions -We welcome contributions from the community to enhance the capabilities of PanMAN and panmanUtils. If you encounter any issues or have suggestions for improvement, please open an issue on [PanMAN GitHub page](https://github.com/TurakhiaLab/panman). For general inquiries and support, reach out to our team. +# Installation Methods -## Citing PanMAN -If you use the PanMANs or panmanUtils in your research or publications, we kindly request that you cite the following paper:
-* Sumit Walia, Harsh Motwani, Kyle Smith, Russell Corbett-Detig, Yatish Turakhia, "Compressive Pangenomics Using Mutation-Annotated Networks", bioRxiv 2024.07.02.601807; doi: [10.1101/2024.07.02.601807](https://doi.org/10.1101/2024.07.02.601807) +## Using installation script (requires sudo access) - + +### Functionalities in panmanUtils +All panmanUtils functionality commands manipulate the input PanMAN file. +```bash +cd $PANMAN_HOME/build +./panmanUtils -I {opt} +``` +
Table 1: List of functionalities supported by panmanUtils
+ +| **Option** | **Description** | +|----------------------------------|-------------------------------------------------------------------------------------------------------------------| +|`-I`, `--input-panman` | Input PanMAN file path | +| `-s`, `--summary` | Print PanMAN summary | +| `-t`, `--newick` | Print Newick string of all trees in a PanMAN | +| `-f`, `--fasta` | Print tip/internal sequences (FASTA format) | +| `-m`, `--fasta-aligned` | Print MSA of sequences for each PanMAT in a PanMAN (FASTA format) | +| `-b`, `--subnet` | Extract subnet of given PanMAN to a new PanMAN file based on the list of nodes provided in the input file | +| `-v`, `--vcf` | Print variations of all sequences from any PanMAT in a PanMAN (VCF format) | +| `-g`, `--gfa` | Convert any PanMAT in a PanMAN to a GFA file | +| `-w`, `--maf` | Print m-WGA for each PanMAT in a PanMAN (MAF format) | +| `-a`, `--annotate` | Annotate nodes of the input PanMAN based on the list provided in the input file | +| `-r`, `--reroot` | Reroot a PanMAT in a PanMAN based on the input sequence id (`--reference`) | +| `-v`, `--aa-translation` | Extract amino acid translations in tsv file | +| `-e`, `--extended-newick` | Print PanMAN's network in extended-newick format | +| `-k`, `--create-network` | Create PanMAN with network of trees from single or multiple PanMAN files | +| `-p`, `--printMutations` | Create PanMAN with network of trees from single or multiple PanMAN files | +| `-q`, `--acr` | ACR method `[fitch(default), mppa]` | +| `-n`, `--reference` | Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required) | +| `-s`, `--start` | Start coordinate of protein translation | +| `-e`, `--end` | End coordinate of protein translation | +| `-d`, `--treeID` | Tree ID, required for `--vcf` | +| `-i`, `--input-file` | Path to the input file, required for `--subnet`, `--annotate`, and `--create-network` | +| `-o`, `--output-file` | Prefix of the output file name | + + + +> **Important:** When output-file argument is optional and is not provided to panmanUtils, the output will be printed in the terminal. + +!!!Note + For all the examples below, `sars_20.panman` will be used as input panman. Alternatively, users can provide custom build panman using the instructions provided [here](construction.md). #### Summary extract The summary feature extracts node and tree level statistics of a PanMAN, that contains a summary of its geometric and parsimony information. -* Example syntax and Usage -``` -$ ./panmanUtils -I --summary --output-file= (optional) -``` +* Usage Syntax +```bash +./panmanUtils -I --summary --output-file= (optional) ``` -$ ./panmanUtils -I ecoli_10.panman --summary --output-file=ecoli_10 +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --summary --output-file=sars_20 ``` #### Newick extract Extract Newick string of all trees in a PanMAN. -* Example syntax and Usage +* Usage syntax +```bash +./panmanUtils -I --newick --output-file= (optional) ``` -$ ./panmanUtils -I --newick --output-file= (optional) -``` -``` -$ ./panmanUtils -I ecoli_10.panman --newick --output-file=ecoli_10 +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --newick --output-file=sars_20 ``` #### Extended Newick extract Extract network in Extended Newick format. -* Example syntax and Usage -``` -$ ./panmanUtils -I ----extended-newick --output-file= (optional) -``` +* Usage syntax +```bash +./panmanUtils -I --extended-newick --output-file= (optional) ``` -$ ./panmanUtils -I ecoli_10.panman ----extended-newick --output-file=ecoli_10 +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --extended-newick --output-file=sars_20 ``` #### Tip/internal node sequences extract Extract tip and internal node sequences from a PanMAN in a FASTA format. -* Example syntax and Usage +* Usage syntax +```bash +./panmanUtils -I --fasta --output-file= (optional) ``` -$ ./panmanUtils -I --fasta --output-file= (optional) -``` -``` -$ ./panmanUtils -I ecoli_10.panman --fasta --output-file=ecoli_10 +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --fasta --output-file=sars_20 ``` #### Multiple Sequence Alignment (MSA) extract Extract MSA of sequences for each PanMAT (with pseduo-root coordinates) in a PanMAN in a FASTA format. -* Example syntax and Usage -``` -$ ./panmanUtils -I --fasta-aligned --output-file= (optional) -``` +* Usage syntax +```bash +./panmanUtils -I --fasta-aligned --output-file= (optional) ``` -$ ./panmanUtils -I ecoli_10.panman --fasta-aligned --output-file=ecoli_10 +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --fasta-aligned --output-file=sars_20 ``` #### Multiple Whole Genome Alignment (m-WGA) extract Extract m-WGA for each PanMAT in a PanMAN in the form of a UCSC multiple alignment format (MAF). -* Example syntax and Usage +* Usage syntax +```bash +./panmanUtils -I --maf --output-file= (optional) ``` -$ ./panmanUtils -I --maf --output-file= (optional) -``` -``` -$ ./panmanUtils -I ecoli_10.panman --maf --output-file=ecoli_10 +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --maf --output-file=sars_20 ``` #### Variant Call Format (VCF) extract Extract variations of all sequences from any PanMAT in a PanMAN in the form of a VCF file with respect to any reference sequence (ref) in the PanMAT. -* Example syntax and Usage -``` -$ ./panmanUtils -I --vcf -reference=ref --output-file= (optional) -``` +* Usage syntax +```bash +./panmanUtils -I --vcf -reference=ref --output-file= (optional) ``` -$ ./panmanUtils -I ecoli_10.panman --vcf -reference=NC_000913.3 --output-file=ecoli_10 +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --vcf -reference="Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12" --output-file=sars_20 ``` #### Graphical fragment assembly (GFA) extract Convert any PanMAT in a PanMAN to a Graphical fragment assembly (GFA) file representing the pangenome. -* Example syntax and Usage +* Usage syntax +```bash +./panmanUtils -I --gfa --output-file= (optional) ``` -$ ./panmanUtils -I --gfa --output-file= (optional) -``` -``` -$ ./panmanUtils -I ecoli_10.panman --gfa --output-file=ecoli_10 +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --gfa --output-file=sars_20 ``` #### Subnetwork extract Extract a subnetwork from a given PanMAN and write it to a new PanMAN file based on the list of nodes provided in the input-file. -* Example syntax and Usage -``` -$ ./panmanUtils -I --subnet --input-file= --output-file= +* Usage syntax +```bash +./panmanUtils -I --subnet --input-file= --output-file= ``` -``` -$ ./panmanUtils -I ecoli_10.panman --subnet --input-file=nodes.txt --output-file=ecoli_10_subnet +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --subnet --input-file=nodes.txt --output-file=ecoli_10_subnet ``` #### Annotate Annotate nodes in a PanMAN with a custom string, later searched by these annotations, using an input TSV file containing a list of nodes and their corresponding custom annotations. -* Example syntax and Usage -``` -$ ./panmanUtils -I --annotate --output-file=ecoli_10_annotate +* Usage syntax +```bash +./panmanUtils -I --annotate --input-file= --output-file=ecoli_10_annotate ``` -``` -$ ./panmanUtils -I ecoli_10.panman --annotate --input-file=annotations.tsv --output-file=ecoli_10_annotate +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --annotate --input-file=annotations.tsv --output-file=ecoli_10_annotate ``` > **NOTE:** If output-file is not provided to panmanUtils, the annotated PanMAN will be written to the same file. #### Amino Acid Translation Extract amino acid translations from a PanMAN in TSV file. -* Example syntax and Usage +* Usage syntax +```bash +./panmanUtils -I --aa-translations --output-file= (optional) ``` -$ ./panmanUtils -I --aa-translations --output-file= (optional) +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I panman/sars_20.panman --aa-translations --output_file=sars_20 ``` + +### panmanUtils Interactive mode +**Step 1:** Users can enter panmanUtils's interactive mode by passing input panman as input using the following command: + +```bash +./panmanUtils -I +## Example +./panmanUtils -I panman/sars_20.panman ``` -$ ./panmanUtils -I ecoli_10.panman --aa-translations --output_file=ecoli_10 -``` --> +!!! Note + The interactive mode should look like the image attached below + ![Interactive Mode](images/interactiveMode.png) + +**Step 2:** Use the commands listed in [Table 1](#table1) to perform desired operation + +## Contributions +We welcome contributions from the community to enhance the capabilities of PanMAN and panmanUtils. If you encounter any issues or have suggestions for improvement, please open an issue on [PanMAN GitHub page](https://github.com/TurakhiaLab/panman). For general inquiries and support, reach out to our team. + +## Citing PanMAN +If you use the PanMANs or panmanUtils in your research or publications, we kindly request that you cite the following paper:
+* Sumit Walia, Harsh Motwani, Kyle Smith, Russell Corbett-Detig, Yatish Turakhia, "Compressive Pangenomics Using Mutation-Annotated Networks", bioRxiv 2024.07.02.601807; doi: [10.1101/2024.07.02.601807](https://doi.org/10.1101/2024.07.02.601807) diff --git a/docs/install.md b/docs/install.md index b0cd119..e69de29 100644 --- a/docs/install.md +++ b/docs/install.md @@ -1,74 +0,0 @@ -# Installation Methods - -## Using installation script (requires sudo access) - -0. Dependencies - i. Git - -1. Clone the repository -```bash -git clone https://github.com/TurakhiaLab/panman.git -cd panman -``` -2. Run the installation script -```bash -chmod +x install/installationUbuntu.sh -./install/installationUbuntu.sh -``` -3. Run panmanUtils -```bash -cd build -./panmanUtils --help -``` -!!!Note - panmanUtils is built using CMake and depends upon libraries such as Boost, cap'n proto, etc, which are also installed in `installationUbuntu.sh`. If users face version issues, try using the docker methods detailed below. - -## Using Docker Image - -To use panmanUtils in a docker container, users can create a docker container from a docker image, by following these steps - -0. Dependencies - i. Docker -1. Pull the PanMAN docker image from DockerHub -```bash -docker pull swalia14/panman:latest -``` -2. Build and run the docker container -```bash -docker run -it swalia14/panman:latest -``` -3. Run panmanUtils -```bash -# Insider docker container -cd /home/panman/build -./panmanUtils --help -``` -!!!Note - The docker image comes with preinstalled panmanUtils and other tools such as PanGraph, PGGB, and RIVET. - -## Using DockerFile -Docker container with preinstalled panmanUtils can also be built from DockerFile by following these steps - -0. Dependencies - i. Docker - ii. Git -1. Clone the repository -```bash -git clone https://github.com/TurakhiaLab/panman.git -cd panman -``` -2. Build a docker image -```bash -cd docker -docker build -t panman . -``` -3. Build and run docker container -```bash -docker run -it panman -``` -4. Run panmanUtils -```bash -# Insider docker container -cd /home/panman/build -./panmanUtils --help -``` diff --git a/docs/utils.md b/docs/utils.md index 6e933b7..8b13789 100644 --- a/docs/utils.md +++ b/docs/utils.md @@ -1,211 +1 @@ -# Exploring utilities in panmanUtils -Here, we will learn to use exploit various functionalities provided in panmanUtils software for downstream applications in epidemiological, microbiological, metagenomic, ecological, and evolutionary studies. - -**Step 0:** The Steps below require panmanUtils and a PanMAN. We provide a pre-built panman (`sars_20.panman`), othewise, refer to [installation guide](install.md) to install panmanUtils and [construction](construction.md) instructions to build a PanMAN. - - -### Functionalities in panmanUtils -All panmanUtils functionality commands manipulate the input PanMAN file. -```bash -cd $PANMAN_HOME/build -./panmanUtils -I {opt} -``` -
Table 1: List of functionalities supported by panmanUtils
- -| **Option** | **Description** | -|----------------------------------|-------------------------------------------------------------------------------------------------------------------| -|`-I`, `--input-panman` | Input PanMAN file path | -| `-s`, `--summary` | Print PanMAN summary | -| `-t`, `--newick` | Print Newick string of all trees in a PanMAN | -| `-f`, `--fasta` | Print tip/internal sequences (FASTA format) | -| `-m`, `--fasta-aligned` | Print MSA of sequences for each PanMAT in a PanMAN (FASTA format) | -| `-b`, `--subnet` | Extract subnet of given PanMAN to a new PanMAN file based on the list of nodes provided in the input file | -| `-v`, `--vcf` | Print variations of all sequences from any PanMAT in a PanMAN (VCF format) | -| `-g`, `--gfa` | Convert any PanMAT in a PanMAN to a GFA file | -| `-w`, `--maf` | Print m-WGA for each PanMAT in a PanMAN (MAF format) | -| `-a`, `--annotate` | Annotate nodes of the input PanMAN based on the list provided in the input file | -| `-r`, `--reroot` | Reroot a PanMAT in a PanMAN based on the input sequence id (`--reference`) | -| `-v`, `--aa-translation` | Extract amino acid translations in tsv file | -| `-e`, `--extended-newick` | Print PanMAN's network in extended-newick format | -| `-k`, `--create-network` | Create PanMAN with network of trees from single or multiple PanMAN files | -| `-p`, `--printMutations` | Create PanMAN with network of trees from single or multiple PanMAN files | -| `-q`, `--acr` | ACR method `[fitch(default), mppa]` | -| `-n`, `--reference` | Identifier of reference sequence for PanMAN construction (optional), VCF extract (required), or reroot (required) | -| `-s`, `--start` | Start coordinate of protein translation | -| `-e`, `--end` | End coordinate of protein translation | -| `-d`, `--treeID` | Tree ID, required for `--vcf` | -| `-i`, `--input-file` | Path to the input file, required for `--subnet`, `--annotate`, and `--create-network` | -| `-o`, `--output-file` | Prefix of the output file name | - - - -> **Important:** When output-file argument is optional and is not provided to panmanUtils, the output will be printed in the terminal. - -!!!Note - For all the examples below, `sars_20.panman` will be used as input panman. Alternatively, users can provide custom build panman using the instructions provided [here](construction.md). - -#### Summary extract -The summary feature extracts node and tree level statistics of a PanMAN, that contains a summary of its geometric and parsimony information. - -* Usage Syntax -```bash -./panmanUtils -I --summary --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --summary --output-file=sars_20 -``` - -#### Newick extract -Extract Newick string of all trees in a PanMAN. - -* Usage syntax -```bash -./panmanUtils -I --newick --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --newick --output-file=sars_20 -``` - -#### Extended Newick extract -Extract network in Extended Newick format. - -* Usage syntax -```bash -./panmanUtils -I --extended-newick --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --extended-newick --output-file=sars_20 -``` - -#### Tip/internal node sequences extract -Extract tip and internal node sequences from a PanMAN in a FASTA format. - -* Usage syntax -```bash -./panmanUtils -I --fasta --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --fasta --output-file=sars_20 -``` - -#### Multiple Sequence Alignment (MSA) extract -Extract MSA of sequences for each PanMAT (with pseduo-root coordinates) in a PanMAN in a FASTA format. - -* Usage syntax -```bash -./panmanUtils -I --fasta-aligned --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --fasta-aligned --output-file=sars_20 -``` - -#### Multiple Whole Genome Alignment (m-WGA) extract -Extract m-WGA for each PanMAT in a PanMAN in the form of a UCSC multiple alignment format (MAF). - -* Usage syntax -```bash -./panmanUtils -I --maf --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --maf --output-file=sars_20 -``` - -#### Variant Call Format (VCF) extract -Extract variations of all sequences from any PanMAT in a PanMAN in the form of a VCF file with respect to any reference sequence (ref) in the PanMAT. - -* Usage syntax -```bash -./panmanUtils -I --vcf -reference=ref --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --vcf -reference="Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12" --output-file=sars_20 -``` - -#### Graphical fragment assembly (GFA) extract -Convert any PanMAT in a PanMAN to a Graphical fragment assembly (GFA) file representing the pangenome. - -* Usage syntax -```bash -./panmanUtils -I --gfa --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --gfa --output-file=sars_20 -``` - -#### Subnetwork extract -Extract a subnetwork from a given PanMAN and write it to a new PanMAN file based on the list of nodes provided in the input-file. - -* Usage syntax -```bash -./panmanUtils -I --subnet --input-file= --output-file= -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --subnet --input-file=nodes.txt --output-file=ecoli_10_subnet -``` - -#### Annotate -Annotate nodes in a PanMAN with a custom string, later searched by these annotations, using an input TSV file containing a list of nodes and their corresponding custom annotations. - -* Usage syntax -```bash -./panmanUtils -I --annotate --input-file= --output-file=ecoli_10_annotate -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --annotate --input-file=annotations.tsv --output-file=ecoli_10_annotate -``` -> **NOTE:** If output-file is not provided to panmanUtils, the annotated PanMAN will be written to the same file. - -#### Amino Acid Translation -Extract amino acid translations from a PanMAN in TSV file. - -* Usage syntax -```bash -./panmanUtils -I --aa-translations --output-file= (optional) -``` -* Example -```bash -cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --aa-translations --output_file=sars_20 -``` - -### panmanUtils Interactive mode -**Step 1:** Users can enter panmanUtils's interactive mode by passing input panman as input using the following command: - -```bash -./panmanUtils -I -## Example -./panmanUtils -I panman/sars_20.panman -``` - -!!! Note - The interactive mode should look like the image attached below - - ![Interactive Mode](images/interactiveMode.png) - -**Step 2:** Use the commands listed in [Table 1](#table1) to perform desired operation diff --git a/mkdocs.yml b/mkdocs.yml index 8adc438..0ac21d8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -90,9 +90,9 @@ markdown_extensions: # use_directory_urls: false nav: - Home: index.md - - Install: install.md - - Construction: construction.md - - Utility: utils.md + # - Install: install.md + # - Construction: construction.md + # - Utility: utils.md # extra_javascript: # - javascripts/mathjax.js From 473ac0974393923264cdacd930f6dc205170e0ad Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 9 Jan 2025 13:47:14 -0800 Subject: [PATCH 088/103] updated cmake.yml file --- .github/workflows/cmake.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 7517cfe..3a19ffc 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -3,7 +3,8 @@ on: push: branches: - base - - main + - main + - capnp permissions: contents: write From a47e42192ca83f9a642b919c5b5e8d9032bf3540 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 9 Jan 2025 13:55:37 -0800 Subject: [PATCH 089/103] updated docs --- mkdocs.yml | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 0ac21d8..74b8044 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -5,27 +5,25 @@ repo_url: https://github.com/TurakhiaLab/panman theme: name: material features: - - announce.dismiss + # - announce.dismiss + - content.tabs.link - content.code.annotation - content.code.copy - - content.tabs.link + - content.action.edit + - content.action.view + - content.tooltips - toc.follow - toc.integrate - # - navigation.sections - - navigation.expand + - navigation.tabs + - navigation.sections - navigation.path - # - navigation.tabs - - navigation.top + - navigation.tabs.sticky + - navigation.instant.prefetch + - navigation.tracking - navigation.footer + - navigation.expand - search.highlight - search.suggest - # - content.action.edit - # - content.action.view - # - content.tooltips - # - navigation.tabs.sticky - # - navigation.instant.prefetch - # - navigation.tracking - # - search.share language: en @@ -51,16 +49,16 @@ plugins: - search # icon: - # admonition: - # note: octicons/tag-16 - # info: octicons/info-16 - # tip: octicons/squirrel-16 - # success: octicons/check-16 - # question: octicons/question-16 - # warning: octicons/alert-16 - # bug: octicons/bug-16 - # example: octicons/beaker-16 - # quote: octicons/quote-16 + admonition: + note: octicons/tag-16 + info: octicons/info-16 + tip: octicons/squirrel-16 + success: octicons/check-16 + question: octicons/question-16 + warning: octicons/alert-16 + bug: octicons/bug-16 + example: octicons/beaker-16 + quote: octicons/quote-16 extra: social: @@ -87,12 +85,9 @@ markdown_extensions: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:materialx.emoji.to_svg -# use_directory_urls: false +use_directory_urls: false nav: - Home: index.md - # - Install: install.md - # - Construction: construction.md - # - Utility: utils.md # extra_javascript: # - javascripts/mathjax.js From 24a0692a54446670cf42727418589170764b725b Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Thu, 9 Jan 2025 14:03:40 -0800 Subject: [PATCH 090/103] updated docs --- docs/index.md | 21 +++++++++++---------- mkdocs.yml | 27 ++++++++++----------------- 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/docs/index.md b/docs/index.md index a5a8956..64e25ef 100644 --- a/docs/index.md +++ b/docs/index.md @@ -3,7 +3,8 @@
-## What are PanMANs? +## Introduction +### What are PanMANs? PanMAN or Pangenome Mutation-Annotated Network is a novel data representation for pangenomes that provides massive leaps in both representative power and storage efficiency. Specifically, PanMANs are composed of mutation-annotated trees, called PanMATs, which, in addition to substitutions, also annotate inferred indels (Fig. 2b), and even structural mutations (Fig. 2a) on the different branches. Multiple PanMATs are connected in the form of a network using edges to generate a PanMAN (Fig. 2c). PanMAN's representative power is compared against existing pangenomic formats in Fig. 1. PanMANs are the most compressible pangenomic format for the different microbial datasets (SARS-CoV-2, RSV, HIV, Mycobacterium. Tuberculosis, E. Coli, and Klebsiella pneumoniae), providing 2.9 to 559-fold compression over standard pangenomic formats.
@@ -18,7 +19,7 @@ PanMAN or Pangenome Mutation-Annotated Network is a novel data representation fo
-## PanMAN's Protocol Buffer file format +### PanMAN's Protocol Buffer file format PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](https://protobuf.dev/)), a binary serialization file format, to compactly store PanMAN's data structure in a file. Fig. 3 provides the .proto file defining the PanMAN’s structure. At the top level, the file format of PanMANs encodes a list (declared as a repeated identifier in the .protof file) of PanMATs. Each PanMAT object stores the following data elements: (a) a unique identifier, (b) a phylogenetic tree stored as a string in Newick format, (c) a list of mutations on each branch ordered according to the pre-order traversal of the tree topology, (d) a block mapping object to record homologous segments identified as duplications and rearrangements, which are mapped against their common consensus sequence; the block-mapping object is also used to derive the pseudo-root, e) a gap list to store the position and length of gaps corresponding to each block's consensus sequence. Each mutation object encodes the node's block and nucleotide mutations that are inferred on the branches leading to that node. If a block mutation exists at a position described by the Block-ID field (int32), the block mutation field (bool) is set to 1, otherwise set to 0, and its type is stored as a substitution to and from a gap in Block mutation type field (bool), encoded as 0 or 1, respectively. In PanMAN, each nucleotide mutation within a block inferred on a branch has four pieces of information, i.e., position (middle coordinate), gap position (last coordinate), mutation type, and mutated characters. To reduce redundancy in the file, consecutive mutations of the same type are packed together and stored as a mutation info (int32) field, where mutation type, mutation length, and mutated characters use 3, 5, and 24 bits, respectively. PanMAN stores each character using one-hot encoding, hence, one "Nucleotide Mutations" object can store up to 6 consecutive mutations of the same type. PanMAN's file also stores the complex mutation object to encode the type of complex mutation and its metadata such as PanMATs' and nodes' identifiers, breakpoint coordinates, etc. The entire file is then compressed using XZ ([https://github.com/tukaani-project/xz](https://github.com/tukaani-project/xz)) to enhance storage efficiency.
@@ -26,7 +27,7 @@ PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](ht Figure 3: PanMAN's file format
-## panmanUtils +### panmanUtils panmanUtils includes multiple algorithms to construct PanMANs and to support various functionalities to modify and extract useful information from PanMANs (Fig. 4).
@@ -34,12 +35,12 @@ PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](ht Figure 4: Overview of panmanUtils' functionalities
-## Video Tutorial +### Video Tutorial TBA -# Installation Methods +## Installation Methods -## Using installation script (requires sudo access) +### Using installation script (requires sudo access) 0. Dependencies i. Git @@ -62,7 +63,7 @@ cd build !!!Note panmanUtils is built using CMake and depends upon libraries such as Boost, cap'n proto, etc, which are also installed in `installationUbuntu.sh`. If users face version issues, try using the docker methods detailed below. -## Using Docker Image +### Using Docker Image To use panmanUtils in a docker container, users can create a docker container from a docker image, by following these steps @@ -85,7 +86,7 @@ cd /home/panman/build !!!Note The docker image comes with preinstalled panmanUtils and other tools such as PanGraph, PGGB, and RIVET. -## Using DockerFile +### Using DockerFile Docker container with preinstalled panmanUtils can also be built from DockerFile by following these steps 0. Dependencies @@ -112,7 +113,7 @@ cd /home/panman/build ./panmanUtils --help ``` -# PanMAN Construction +## PanMAN Construction Here, we will learn to build PanMAN from various input formats. @@ -175,7 +176,7 @@ conda activate snakemake snakemake --use-conda --cores [num threads] --config RUNTYPE="[pangraph/gfa/msa]" FASTA="[user_fasta]" SEQ_COUNT=[haplotype_count] ``` -# Exploring utilities in panmanUtils +## Exploring utilities in panmanUtils Here, we will learn to use exploit various functionalities provided in panmanUtils software for downstream applications in epidemiological, microbiological, metagenomic, ecological, and evolutionary studies. diff --git a/mkdocs.yml b/mkdocs.yml index 74b8044..e8c207d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -34,13 +34,6 @@ theme: toggle: icon: material/brightness-7 name: Switch to dark mode - # - scheme: slate - # primary: white - # accent: white - # toggle: - # icon: material/brightness-4 - # name: Switch to light mode - favicon: images/icon.png logo: images/icon.png @@ -49,16 +42,16 @@ plugins: - search # icon: - admonition: - note: octicons/tag-16 - info: octicons/info-16 - tip: octicons/squirrel-16 - success: octicons/check-16 - question: octicons/question-16 - warning: octicons/alert-16 - bug: octicons/bug-16 - example: octicons/beaker-16 - quote: octicons/quote-16 + # admonition: + # note: octicons/tag-16 + # info: octicons/info-16 + # tip: octicons/squirrel-16 + # success: octicons/check-16 + # question: octicons/question-16 + # warning: octicons/alert-16 + # bug: octicons/bug-16 + # example: octicons/beaker-16 + # quote: octicons/quote-16 extra: social: From 48dd1941b0925901d74bacfad2f0ca5f032093a4 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Fri, 10 Jan 2025 16:43:07 -0800 Subject: [PATCH 091/103] updated snakemake workflow --- src/fasta.cpp | 92 ++++++-- src/gfa.cpp | 499 +++++++++++++++++++++++++++++++++++++++++++ src/panman.cpp | 2 +- src/panman.hpp | 5 +- src/panman2usher.cpp | 2 +- src/panmanUtils.hpp | 5 + src/summary.cpp | 2 +- src/vcf.cpp | 198 ++++++++++------- workflows/Snakefile | 12 +- 9 files changed, 711 insertions(+), 106 deletions(-) diff --git a/src/fasta.cpp b/src/fasta.cpp index a9431bf..db8c863 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -369,6 +369,73 @@ void panmanUtils::printSubsequenceLines(const sequence_t& sequence, // fout << currentLine << std::endl; } +std::pair, std::vector> panmanUtils::printSequenceLinesNewer(const std::vector>>>& sequence, + std::unordered_map& blockLengths, + const std::vector& blockExists, + const std::vector& blockStrand, size_t lineSize, bool aligned, int offset, bool debug) { + + // String that stores the sequence to be printed + std::vector lines; + std::vector blockLens; + + for(size_t i = 0; i < blockExists.size(); i++) { + // Non-gap block - the only type being used currently + if(blockExists[i]) { + std::string line=""; + // If forward strand + if(blockStrand[i]) { + // Iterate through main nucs + for(size_t j = 0; j < sequence[i].size(); j++) { + // Gap nucs + for(size_t k = 0; k < sequence[i][j].second.size(); k++) { + if(sequence[i][j].second[k] != '-') { + line += sequence[i][j].second[k]; + } else if(aligned) { + line += '-'; + } + } + // Main nuc + if(sequence[i][j].first != '-' && sequence[i][j].first != 'x') { + line += sequence[i][j].first; + } else if(aligned && sequence[i][j].first != 'x') { + line += '-'; + } + } + } else { + // If reverse strand, iterate backwards + for(size_t j = sequence[i].size()-1; j+1 > 0; j--) { + // Main nuc first since we are iterating in reverse direction + if(sequence[i][j].first != '-' && sequence[i][j].first != 'x') { + line += getComplementCharacter(sequence[i][j].first); + } else if(aligned && sequence[i][j].first != 'x') { + line += '-'; + } + + // Gap nucs + for(size_t k = sequence[i][j].second.size()-1; k+1 > 0; k--) { + if(sequence[i][j].second[k] != '-') { + line += getComplementCharacter(sequence[i][j].second[k]); + } else if(aligned) { + line += '-'; + } + } + } + } + + blockLens.push_back(line.size()); + lines.push_back(line); + + } else if(aligned) { + // If aligned sequence is required, print gaps instead if block does not exist + blockLens.push_back(blockLengths[i]); + lines.push_back("-"); + } + } + + return std::make_pair(lines, blockLens); + +} + // Depth first traversal FASTA writer void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& sequence, blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { @@ -430,7 +497,7 @@ void panmanUtils::Tree::printFASTAHelper(panmanUtils::Node* root, sequence_t& se blockStrand[primaryBlockId].first = !oldStrand; } if(oldMut != true) { - std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + // std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; } blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); } else { @@ -846,7 +913,7 @@ void panmanUtils::Tree::printSingleNodeHelper(std::vector &n blockStrand[primaryBlockId].first = !oldStrand; } if(oldMut != true) { - std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + // std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; } blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, mutation.secondaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); } else { @@ -1753,7 +1820,7 @@ std::string panmanUtils::Tree::printFASTAUltraFastHelper( oldMut = blockExists[primaryBlockId]; blockStrand[primaryBlockId] = !oldStrand; if(oldMut != true) { - std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + // std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; } } else { // Actually a deletion @@ -1921,15 +1988,15 @@ void panmanUtils::Tree::printFASTAUltraFast(std::ostream& fout, bool aligned, bo } std::mutex printMutex; - + int counting=0; // for (auto &keyValue: allNodes) { tbb::parallel_for_each(allNodes.begin(), allNodes.end(), [&](const std::pair& keyValue) { panmanUtils::Node* node = keyValue.second; - // Create a stringstream for each thread to avoid race conditions on fout if (node->children.size() != 0) { return; } + // Get block sequnece of the Tip std::vector< bool > blockSequence(blocks.size() + 1, false, {}); std::vector nodesFromTipToRoot; @@ -2031,7 +2098,7 @@ void panmanUtils::Tree::printFASTAUltraFast(std::ostream& fout, bool aligned, bo // } } -std::string panmanUtils::Tree::extractSequenceHelper( +std::pair, std::vector> panmanUtils::Tree::extractSequenceHelper( const std::vector& blockSequence, std::unordered_map& blockLengths, const std::vector& nodesFromTipToRootIn, @@ -2070,7 +2137,7 @@ std::string panmanUtils::Tree::extractSequenceHelper( oldMut = blockExists[primaryBlockId]; blockStrand[primaryBlockId] = !oldStrand; if(oldMut != true) { - std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + // std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; } } else { // Actually a deletion @@ -2187,7 +2254,6 @@ std::string panmanUtils::Tree::extractSequenceHelper( // Store sequence panmanUtils::Node* tipNode = nodesFromTipToRoot[nodesFromTipToRoot.size()-1]; - std::string line=""; int offset = 0; if(!aligned && circularSequences.find(tipNode->identifier) != circularSequences.end()) { @@ -2222,11 +2288,10 @@ std::string panmanUtils::Tree::extractSequenceHelper( reverse(blockStrandPrint.begin(), blockStrandPrint.end()); } - line += panmanUtils::printSequenceLinesNew(sequencePrint, blockLengths, blockExistsPrint, blockStrandPrint, 70, aligned, offset, false); - return line; + return panmanUtils::printSequenceLinesNewer(sequencePrint, blockLengths, blockExistsPrint, blockStrandPrint, 70, aligned, offset, false); } -std::string panmanUtils::Tree::extractSingleSequence(panmanUtils::Node* node, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { +std::pair, std::vector> panmanUtils::Tree::extractSingleSequence(panmanUtils::Node* node, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart, const std::tuple< int, int, int, int >& panMATEnd, bool allIndex) { // Create a stringstream for each thread to avoid race conditions on fout if (node->children.size() != 0) { @@ -2324,6 +2389,5 @@ std::string panmanUtils::Tree::extractSingleSequence(panmanUtils::Node* node, bo } } - std::string line = extractSequenceHelper(blockSequence, blockLengths, nodesFromTipToRoot, sequence, blockExists, blockStrand, aligned, rootSeq, panMATStart, panMATEnd, allIndex); - return line; -} \ No newline at end of file + return extractSequenceHelper(blockSequence, blockLengths, nodesFromTipToRoot, sequence, blockExists, blockStrand, aligned, rootSeq, panMATStart, panMATEnd, allIndex); +} diff --git a/src/gfa.cpp b/src/gfa.cpp index c1eae33..42d761f 100644 --- a/src/gfa.cpp +++ b/src/gfa.cpp @@ -499,3 +499,502 @@ void panmanUtils::Tree::convertToGFA(std::ostream& fout) { } } +void panmanUtils::Tree::convertToGFAEfficient(std::ostream& fout) { + // First we check if there are any nucleotide mutations. If there are no nuc mutations, we can + // simply construct a GFA of blocks + bool nucMutationFlag = false; + for(auto u: allNodes) { + if(u.second->nucMutation.size() != 0) { + nucMutationFlag = true; + } + } + + if(!nucMutationFlag) { + // get nodes + std::map, std::string> nodes; + for(auto block: blocks) { + int64_t primaryBlockId = block.primaryBlockId; + int64_t secondaryBlockId = block.secondaryBlockId; + std::string sequenceString; + for(auto u: block.consensusSeq) { + for(size_t k = 0; k < 8; k++) { + const int nucCode = (((u) >> (4*(7 - k))) & 15); + if(nucCode == 0) { + break; + } + const char nucleotide = panmanUtils::getNucleotideFromCode(nucCode); + sequenceString += nucleotide; + } + } + nodes[std::make_pair(primaryBlockId, secondaryBlockId)] = sequenceString; + } + + // block presense map + std::vector< std::pair< bool, std::vector< bool > > > blockExistsGlobal(blocks.size() + 1, {false, {}}); + // Assigning block gaps + for(size_t i = 0; i < blockGaps.blockPosition.size(); i++) { + blockExistsGlobal[blockGaps.blockPosition[i]].second.resize(blockGaps.blockGapLength[i], false); + } + + tbb::concurrent_unordered_set< std::pair< std::pair, std::pair > > edges; + tbb::concurrent_unordered_map< std::string, std::vector< std::pair > > paths; + + // get all paths + tbb::parallel_for_each(allNodes, [&](auto u) { + if(u.second->children.size()) { + return; + } + + auto blockExists = blockExistsGlobal; + + std::vector< panmanUtils::Node* > path; + + Node* it = u.second; + while(it != root) { + path.push_back(it); + it = it->parent; + } + path.push_back(root); + std::reverse(path.begin(), path.end()); + for(auto node: path) { + for(auto mutation: node->blockMutation) { + int primaryBlockId = mutation.primaryBlockId; + int secondaryBlockId = mutation.secondaryBlockId; + int type = (mutation.blockMutInfo); + + if(type == panmanUtils::BlockMutationType::BI) { + if(secondaryBlockId != -1) { + blockExists[primaryBlockId].second[secondaryBlockId] = true; + } else { + blockExists[primaryBlockId].first = true; + } + } else { + if(secondaryBlockId != -1) { + blockExists[primaryBlockId].second[secondaryBlockId] = false; + } else { + blockExists[primaryBlockId].first = false; + } + } + } + } + std::vector< std::pair< int32_t, int32_t > > currentPath; + for(size_t i = 0; i < blockExists.size(); i++) { + if(blockExists[i].first) { + currentPath.push_back(std::make_pair(i, -1)); + } + for(size_t j = 0; j < blockExists[i].second.size(); j++) { + if(blockExists[i].second[j]) { + currentPath.push_back(std::make_pair(i, j)); + } + } + } + paths[u.second->identifier] = currentPath; + for(size_t i = 1; i < currentPath.size(); i++) { + edges.insert(std::make_pair(currentPath[i-1], currentPath[i])); + } + }); + std::map< std::pair< int32_t, int32_t >, uint64_t > nodeIds; + uint64_t ctr = 0; + for(auto u: nodes) { + nodeIds[u.first] = ctr; + ctr++; + } + for(auto u: nodes) { + fout << "S\t" << nodeIds[u.first] << "\t" << u.second << "\n"; + } + for(auto u: edges) { + fout << "L\t" << nodeIds[u.first] << "\t+\t" << nodeIds[u.second] << "\t+\t0M\n"; + } + for(auto u: paths) { + fout << "P\t" << u.first << "\t"; + for(size_t i = 0; i < u.second.size(); i++) { + fout << nodeIds[u.second[i]] << "+"; + if(i != u.second.size() - 1) { + fout << ","; + } + } + fout << "\t*\n"; + } + } else { + size_t node_len = 32; + size_t autoIncrId = 0; + std::map< std::pair< std::tuple< int, size_t, size_t >, std::string >, + std::pair< size_t, bool > > allSequenceNodes; + std::mutex allSequenceNodeMutex; + tbb::concurrent_unordered_map< std::string, std::vector< size_t > > paths; + tbb::concurrent_unordered_map< std::string, std::vector< bool > > strandPaths; + + // for(const auto& u: allNodes) { + tbb::parallel_for_each(allNodes, [&](const auto& u) { + if(u.second->children.size() != 0) { + return; + // continue; + } + + sequence_t sequence; + blockExists_t blockExists; + blockStrand_t blockStrand; + getSequenceFromReference(sequence, blockExists, blockStrand, u.first); + + std::string currentSequence; + std::vector< size_t > sequenceNodeIds; + std::vector< bool > sequenceStrands; + + for(size_t i = 0; i < sequence.size(); i++) { + if(blockExists[i].first) { + if(blockStrand[i].first) { + std::tuple< size_t, size_t, size_t > currentStart; + for(size_t j = 0; j < sequence[i].first.size(); j++) { + for(size_t k = 0; k < sequence[i].first[j].second.size(); k++) { + if(currentSequence.length() == 0) { + currentStart = std::make_tuple(i,j,k); + } + currentSequence += sequence[i].first[j].second[k]; + if(currentSequence.length() == node_len) { + currentSequence = stripGaps(currentSequence); + if(currentSequence.length()) { + allSequenceNodeMutex.lock(); + if(allSequenceNodes.find(std::make_pair(currentStart, + currentSequence)) == allSequenceNodes.end()) { + allSequenceNodes[std::make_pair(currentStart, + currentSequence)] = std::make_pair(autoIncrId, true); + sequenceNodeIds.push_back(autoIncrId); + sequenceStrands.push_back(true); + autoIncrId++; + } else { + sequenceNodeIds.push_back( + allSequenceNodes[std::make_pair(currentStart, + currentSequence)].first); + sequenceStrands.push_back(true); + } + allSequenceNodeMutex.unlock(); + } + currentSequence = ""; + } + } + if(currentSequence.length() == 0) { + currentStart = std::make_tuple(i,j,-1); + } + currentSequence += sequence[i].first[j].first; + if(currentSequence.length() == node_len) { + currentSequence = stripGaps(currentSequence); + if(currentSequence.length()) { + allSequenceNodeMutex.lock(); + if(allSequenceNodes.find(std::make_pair(currentStart, + currentSequence)) == allSequenceNodes.end()) { + allSequenceNodes[std::make_pair(currentStart, + currentSequence)] = std::make_pair(autoIncrId, true); + sequenceNodeIds.push_back(autoIncrId); + sequenceStrands.push_back(true); + autoIncrId++; + } else { + sequenceNodeIds.push_back( + allSequenceNodes[std::make_pair(currentStart, + currentSequence)].first); + sequenceStrands.push_back(true); + } + allSequenceNodeMutex.unlock(); + } + currentSequence = ""; + } + } + if(currentSequence.length()) { + currentSequence = stripGaps(currentSequence); + if(currentSequence.length()) { + allSequenceNodeMutex.lock(); + if(allSequenceNodes.find(std::make_pair(currentStart, + currentSequence)) == allSequenceNodes.end()) { + allSequenceNodes[std::make_pair(currentStart, currentSequence)] + = std::make_pair(autoIncrId, true); + sequenceNodeIds.push_back(autoIncrId); + sequenceStrands.push_back(true); + autoIncrId++; + } else { + sequenceNodeIds.push_back( + allSequenceNodes[std::make_pair(currentStart, + currentSequence)].first); + sequenceStrands.push_back(true); + } + allSequenceNodeMutex.unlock(); + } + currentSequence = ""; + } + } else { + std::tuple< size_t, size_t, size_t > currentStart; + for(size_t j = sequence[i].first.size()-1; j + 1 > 0; j--) { + currentSequence += sequence[i].first[j].first; + currentStart = std::make_tuple(-1*(int)i-1,j,-1); + if(currentSequence.length() == node_len) { + currentSequence = stripGaps(currentSequence); + if(currentSequence.length()) { + // Since the GFA stores the strand parameter, the reverse + // complement will be computed anyway + std::reverse(currentSequence.begin(), currentSequence.end()); + allSequenceNodeMutex.lock(); + if(allSequenceNodes.find(std::make_pair(currentStart, + currentSequence)) == allSequenceNodes.end()) { + allSequenceNodes[std::make_pair(currentStart, currentSequence)] + = std::make_pair(autoIncrId, false); + sequenceNodeIds.push_back(autoIncrId); + sequenceStrands.push_back(false); + autoIncrId++; + } else { + sequenceNodeIds.push_back(allSequenceNodes[ + std::make_pair(currentStart, currentSequence)].first); + sequenceStrands.push_back(false); + } + allSequenceNodeMutex.unlock(); + currentSequence = ""; + } + } + for(size_t k = sequence[i].first[j].second.size() - 1; k + 1 > 0; k--) { + currentSequence += sequence[i].first[j].second[k]; + currentStart = std::make_tuple(-1*(int)i,j,k); + if(currentSequence.length() == node_len) { + currentSequence = stripGaps(currentSequence); + if(currentSequence.length()) { + // Since the GFA stores the strand parameter, the reverse + // complement will be computed anyway + std::reverse(currentSequence.begin(), currentSequence.end()); + allSequenceNodeMutex.lock(); + if(allSequenceNodes.find(std::make_pair(currentStart, + currentSequence)) == allSequenceNodes.end()) { + allSequenceNodes[std::make_pair(currentStart, + currentSequence)] = std::make_pair(autoIncrId, false); + sequenceNodeIds.push_back(autoIncrId); + sequenceStrands.push_back(false); + autoIncrId++; + } else { + sequenceNodeIds.push_back(allSequenceNodes[ + std::make_pair(currentStart, currentSequence)].first); + sequenceStrands.push_back(false); + } + allSequenceNodeMutex.unlock(); + } + currentSequence = ""; + } + } + } + if(currentSequence.length()) { + currentSequence = stripGaps(currentSequence); + if(currentSequence.length()) { + // Since the GFA stores the strand parameter, the reverse + // complement will be computed anyway + std::reverse(currentSequence.begin(), currentSequence.end()); + allSequenceNodeMutex.lock(); + if(allSequenceNodes.find(std::make_pair(currentStart, currentSequence)) + == allSequenceNodes.end()) { + allSequenceNodes[std::make_pair(currentStart, currentSequence)] + = std::make_pair(autoIncrId, false); + sequenceNodeIds.push_back(autoIncrId); + sequenceStrands.push_back(false); + autoIncrId++; + } else { + sequenceNodeIds.push_back(allSequenceNodes[ + std::make_pair(currentStart, currentSequence)].first); + sequenceStrands.push_back(false); + } + allSequenceNodeMutex.unlock(); + } + currentSequence = ""; + } + } + } + } + + paths[u.first] = sequenceNodeIds; + strandPaths[u.first] = sequenceStrands; + }); + // } + + std::map< std::pair< size_t, bool >, std::string > finalNodes; + for(const auto& u: allSequenceNodes) { + finalNodes[u.second] = u.first.second; + } + + // Graph and its transpose + // Maps from < blockID, strand > pair to list of neighbours stored as + // < sequenceId, < blockId, strand > > + std::map< std::pair< size_t, bool >, std::vector< std::pair< std::string, + std::pair< size_t, bool > > > > G; + std::map< std::pair< size_t, bool >, std::vector< std::pair< std::string, + std::pair< size_t, bool > > > > GT; + + for(const auto& u: paths) { + for(size_t i = 1; i < u.second.size(); i++) { + G[std::make_pair(u.second[i-1], strandPaths[u.first][i-1])].push_back( + std::make_pair(u.first, std::make_pair(u.second[i], strandPaths[u.first][i]))); + GT[std::make_pair(u.second[i], strandPaths[u.first][i])].push_back( + std::make_pair(u.first, std::make_pair(u.second[i-1], + strandPaths[u.first][i-1]))); + } + } + + for(auto& u: G) { + // Sort so we can compare the sequence IDs of incoming and outgoing edges for equality + tbb::parallel_sort(u.second.begin(), u.second.end()); + } + for(auto& u: GT) { + // Sort so we can compare the sequence IDs of incoming and outgoing edges for equality + tbb::parallel_sort(u.second.begin(), u.second.end()); + } + + for(auto& u: G) { + if(finalNodes.find(u.first) == finalNodes.end()) { + continue; + } + + while(true) { + // check if a node's edges only go to one next next node and there is an + // outgoing edge for every incoming edge + if(u.second.size() != GT[u.first].size() || u.second.size() == 0) { + break; + } + bool check = true; + + for(size_t j = 0; j < u.second.size(); j++) { + if(u.second[j].first != GT[u.first][j].first) { + check = false; + break; + } else if(j>0 && u.second[j].second != u.second[j-1].second) { + check = false; + break; + } + } + + if(!check) { + break; + } + + std::pair< size_t, bool > dest = u.second[0].second; + if(u.second.size() != GT[dest].size()) { + break; + } + // Combine only if strands match + if(u.first.second != dest.second) { + break; + } + for(size_t j = 0; j < u.second.size(); j++) { + if(u.second[j].first != GT[dest][j].first && GT[dest][j].second != u.first) { + check = false; + break; + } + } + if(G[dest].size() != GT[dest].size()) { + break; + } + for(size_t j = 0; j < GT[dest].size(); j++) { + if(G[dest][j].first != GT[dest][j].first) { + check = false; + break; + } + } + if(!check) { + break; + } + + // combine src and dest + if(u.first.second) { + // forward strand + finalNodes[u.first] += finalNodes[dest]; + } else { + // reverse strand + finalNodes[u.first] = finalNodes[dest] + finalNodes[u.first]; + } + finalNodes.erase(dest); + u.second.clear(); + u.second = G[dest]; + } + } + + // Remove duplicate nodes + std::map< std::string, size_t > sequenceToId; + size_t ctr = 1; + for(const auto& u: finalNodes) { + sequenceToId[u.second] = ctr++; + } + + std::unordered_map< size_t, size_t > oldToNew; + for(const auto& u: finalNodes) { + if(sequenceToId.find(u.second) != sequenceToId.end()) { + oldToNew[u.first.first] = sequenceToId[u.second]; + } + // oldToNew[u.first.first] = ctr++; + } + + std::set< std::pair< pair< size_t, bool >, pair< size_t, bool > > > edges; + + for(const auto& u: G) { + if(finalNodes.find(u.first) == finalNodes.end()) { + continue; + } + for(auto edge: u.second) { + if(finalNodes.find(edge.second) == finalNodes.end()) { + continue; + } + edges.insert(std::make_pair(std::make_pair(oldToNew[u.first.first], + u.first.second), std::make_pair(oldToNew[edge.second.first], + edge.second.second))); + } + } + + for(auto& p: paths) { + std::vector< size_t > newPath; + std::vector< bool > newStrandPath; + for(size_t j = 0; j < p.second.size(); j++) { + if(finalNodes.find(std::make_pair(p.second[j], strandPaths[p.first][j])) + != finalNodes.end()) { + newPath.push_back(p.second[j]); + newStrandPath.push_back(strandPaths[p.first][j]); + } + } + p.second = newPath; + strandPaths[p.first] = newStrandPath; + } + + // for(auto& p: paths) { + // fout << ">" << p.first << "\n"; + // for(int i = 0; i < p.second.size(); i++) { + // fout << finalNodes[std::make_pair(p.second[i], strandPaths[p.first][i])]; + // } + // fout << "\n"; + // } + + // convert node IDs to consecutive node IDs + size_t currentID = 1; + std::unordered_map< size_t, size_t > sequentialIds; + for(auto u: finalNodes) { + if(sequentialIds.find(oldToNew[u.first.first]) == sequentialIds.end()) { + sequentialIds[oldToNew[u.first.first]] = currentID; + currentID++; + } + } + + fout << "H\tVN:Z:1.1\n"; + std::unordered_map< size_t, bool > alreadyPrinted; + for(auto u: finalNodes) { + if(!alreadyPrinted[oldToNew[u.first.first]]) { + alreadyPrinted[oldToNew[u.first.first]] = true; + fout << "S\t" << sequentialIds[oldToNew[u.first.first]] << "\t" << u.second << "\n"; + } + } + + for(auto u: edges) { + fout << "L\t" << sequentialIds[u.first.first] << "\t" << (u.first.second? "+":"-") + << "\t" << sequentialIds[u.second.first] << "\t" << (u.second.second? "+":"-") + << "\t0M\n"; + } + + for(auto u: paths) { + fout << "P\t" << u.first << "\t"; + for(size_t i = 0; i < u.second.size(); i++) { + fout << sequentialIds[oldToNew[u.second[i]]] << (strandPaths[u.first][i]?"+":"-"); + if(i != u.second.size() - 1) { + fout << ","; + } + } + fout << "\t*\n"; + } + } +} + diff --git a/src/panman.cpp b/src/panman.cpp index 61e90a8..9f4b163 100644 --- a/src/panman.cpp +++ b/src/panman.cpp @@ -5692,7 +5692,7 @@ panmanUtils::Pangraph::Pangraph(Json::Value& pangraphData, panmanUtils::Node* ro for (auto &b: intSequenceConsensusNew) { intSequenceConsensus.push_back(b); } - std::cout << "Len of consensus: " << consensus.size() << std::endl; + // std::cout << "Len of consensus: " << consensus.size() << std::endl; } seqCount++; // std::cout << seqCount << " " << intSequenceConsensusNew.size() << endl; diff --git a/src/panman.hpp b/src/panman.hpp index 90e04ec..92f253b 100644 --- a/src/panman.hpp +++ b/src/panman.hpp @@ -367,7 +367,7 @@ class Tree { std::vector& blockStrand, bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); - std::string extractSequenceHelper( + std::pair, std::vector> extractSequenceHelper( const std::vector& blockSequence, std::unordered_map& blockLengths, const std::vector& nodesFromTipToRoot, @@ -376,7 +376,7 @@ class Tree { std::vector& blockStrand, bool aligned = false, bool rootSeq = false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); - std::string extractSingleSequence(panmanUtils::Node* node, bool aligned=false, bool rootSeq=false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); + std::pair, std::vector> extractSingleSequence(panmanUtils::Node* node, bool aligned=false, bool rootSeq=false, const std::tuple &start = {-1,-1,-1,-1}, const std::tuple& end={-1,-1,-1,-1}, bool allIndex = false); void printSingleNodeHelper(std::vector &nodeList, int nodeListIndex, sequence_t& sequence, blockExists_t& blockExists, blockStrand_t& blockStrand, std::ostream& fout, bool aligned, bool rootSeq, const std::tuple< int, int, int, int >& panMATStart={-1,-1,-1,-1}, const std::tuple< int, int, int, int >& panMATEnd={-1,-1,-1,-1}); @@ -593,6 +593,7 @@ class Tree { void annotate(std::ifstream& fin); std::vector< std::string > searchByAnnotation(std::string annotation); void convertToGFA(std::ostream& fout); + void convertToGFAEfficient(std::ostream& fout); void printFASTAFromGFA(std::ifstream& fin, std::ofstream& fout); void getNodesPreorder(panmanUtils::Node* root, capnp::List::Builder& nodesBuilder, size_t& nodeIndex); size_t getGlobalCoordinate(int primaryBlockId, int secondaryBlockId, int nucPosition, diff --git a/src/panman2usher.cpp b/src/panman2usher.cpp index 4f3bcf8..a60bf56 100644 --- a/src/panman2usher.cpp +++ b/src/panman2usher.cpp @@ -315,7 +315,7 @@ void getNodeDFS(Parsimony::data &data, panmanUtils::Node* node, blockStrand[primaryBlockId] = !oldStrand; if(oldMut != true) { - std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + // std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; } blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); } else { diff --git a/src/panmanUtils.hpp b/src/panmanUtils.hpp index b20fe3e..0b1bd38 100644 --- a/src/panmanUtils.hpp +++ b/src/panmanUtils.hpp @@ -40,6 +40,11 @@ void printSequenceLines(const sequence_t& sequence, const blockExists_t& blockExists, blockStrand_t& blockStrand, size_t lineSize, bool aligned, std::ostream& fout, int offset = 0, bool debug = false); +std::pair, std::vector> printSequenceLinesNewer(const std::vector>>>& sequence, + std::unordered_map& blockLengths, + const std::vector& blockExists, + const std::vector& blockStrand, size_t lineSize, + bool aligned, int offset = 0, bool debug = false); std::string printSequenceLinesNew(const std::vector>>>& sequence, std::unordered_map& blockLengths, const std::vector& blockExists, diff --git a/src/summary.cpp b/src/summary.cpp index d057237..71ae0fd 100644 --- a/src/summary.cpp +++ b/src/summary.cpp @@ -153,7 +153,7 @@ std::tuple getOtherBlockMutationsParallelHelper( blockStrand[primaryBlockId] = !oldStrand; if(oldMut != true) { - std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; + // std::cout << "There was a problem in PanMAT generation. Please Report." << std::endl; } blockMutationInfo.push_back( std::make_tuple(mutation.primaryBlockId, oldMut, oldStrand, oldMut, !oldStrand) ); } else { diff --git a/src/vcf.cpp b/src/vcf.cpp index 35d88ea..0e4ecac 100644 --- a/src/vcf.cpp +++ b/src/vcf.cpp @@ -177,10 +177,10 @@ void panmanUtils::Tree::printVCFParallel(std::string reference, std::ostream& fo void panmanUtils::Tree::printVCFParallel(panmanUtils::Node* refnode, std::ostream& fout) { std::string reference = refnode->identifier; - std::string referenceSequence = extractSingleSequence(refnode, true); + std::pair, std::vector> referenceSequence = extractSingleSequence(refnode, true); - if(referenceSequence == "Error: Reference sequence with matching name not found!") { - std::cerr << referenceSequence << std::endl; + if(reference == "") { + std::cerr << "Reference not set correctly" << std::endl; return; } @@ -189,91 +189,150 @@ void panmanUtils::Tree::printVCFParallel(panmanUtils::Node* refnode, std::ostrea std::mutex vcfMapMutex; std::map< int, std::map< std::string, std::map< std::string, std::vector< std::string > > > > vcfMap; + fout << "##fileformat=VCFv" << VCF_VERSION << '\n'; + fout << "##fileDate=" << panmanUtils::getDate() << '\n'; + fout << "##source=PanMATv" << PMAT_VERSION << '\n'; + fout << "##reference=" << reference << '\n'; + fout << "#CHROM\t" << "POS\t" << "ID\t" << "REF\t" << "ALT\t" << "QUAL\t" << "FILTER\t" << "INFO\t" << "FORMAT\t"; + + std::mutex sequenceIdsMutex; + std::map< std::string, size_t > sequenceIds; + tbb::parallel_for_each(allNodes, [&](auto& u) { + if(u.second->children.size() == 0 && u.first != reference) { + sequenceIdsMutex.lock(); + sequenceIds[u.first] = 0; + sequenceIdsMutex.unlock(); + } + }); + + // fout << std::left << std::setw(20) << "#CHROM " << std::setw(20) << "POS " << std::setw(20) << "ID " << std::setw(20) << "REF " << std::setw(20) << "ALT " << std::setw(20) << "QUAL " << std::setw(20) << "FILTER " << std::setw(20) << "INFO " << std::setw(20) << "FORMAT "; + for(auto u: sequenceIds) { + if(u.first != sequenceIds.rbegin()->first) { + fout << u.first + "\t"; + } else { + fout << u.first; + } + } + fout << '\n'; + tbb::parallel_for_each(allNodes, [&](auto& n) { if(n.second->children.size() == 0 && n.first != refnode->identifier) { - std::string altSequence = extractSingleSequence(n.second, true); - if(altSequence.length() != referenceSequence.length()) { - std::cerr << "Logic error. String lengths don't match: " << referenceSequence.length() << " " << altSequence.length() << std::endl; + std::pair, std::vector> altSequence = extractSingleSequence(n.second, true); + size_t altTotalLen = 0; + size_t refTotalLen = 0; + for(auto u: referenceSequence.second) { + refTotalLen += u; + } + for(auto u: altSequence.second) { + altTotalLen += u; + } + if (altTotalLen != refTotalLen) { + std::cerr << "Logic error. String lengths don't match: " << refTotalLen << " " << altTotalLen << std::endl; return; } + // if(altSequence.length() != referenceSequence.length()) { + // std::cerr << "Logic error. String lengths don't match: " << referenceSequence.length() << " " << altSequence.length() << std::endl; + // return; + // } std::string currentRefString, currentAltString; int currentCoordinate = 1; int diffStart = 1; - for(size_t i = 0; i < referenceSequence.length(); i++) { - - if(referenceSequence[i] == '-' && altSequence[i] == '-') { - continue; - } else if(referenceSequence[i] != '-' && altSequence[i] == '-') { - if(currentRefString == "" && currentAltString == "") { - diffStart = currentCoordinate; + std::vector referenceString = std::get<0>(referenceSequence); + std::vector referenceLen = std::get<1>(referenceSequence); + std::vector altString = std::get<0>(altSequence); + std::vector altLen = std::get<1>(altSequence); + + for (int b=0; b < referenceLen.size();b++) { + std::string referenceChar = ""; + std::string altChar = ""; + + if (referenceString[b] == "-") { + for (int c=0; c sequenceIds; - tbb::parallel_for_each(allNodes, [&](auto& u) { - if(u.second->children.size() == 0 && u.first != reference) { - sequenceIdsMutex.lock(); - sequenceIds[u.first] = 0; - sequenceIdsMutex.unlock(); - } - }); - - - fout << "##fileformat=VCFv" << VCF_VERSION << '\n'; - fout << "##fileDate=" << panmanUtils::getDate() << '\n'; - fout << "##source=PanMATv" << PMAT_VERSION << '\n'; - fout << "##reference=" << reference << '\n'; - fout << "#CHROM\t" << "POS\t" << "ID\t" << "REF\t" << "ALT\t" << "QUAL\t" << "FILTER\t" << "INFO\t" << "FORMAT\t"; - - // fout << std::left << std::setw(20) << "#CHROM " << std::setw(20) << "POS " << std::setw(20) << "ID " << std::setw(20) << "REF " << std::setw(20) << "ALT " << std::setw(20) << "QUAL " << std::setw(20) << "FILTER " << std::setw(20) << "INFO " << std::setw(20) << "FORMAT "; - for(auto u: sequenceIds) { - if(u.first != sequenceIds.rbegin()->first) { - fout << u.first + "\t"; - } else { - fout << u.first; - } - } - fout << '\n'; + for(auto u: vcfMap) { for(auto v: u.second) { diff --git a/workflows/Snakefile b/workflows/Snakefile index 39fa797..2b1c8d1 100644 --- a/workflows/Snakefile +++ b/workflows/Snakefile @@ -4,14 +4,14 @@ This is a snakemake workflow for building PanMAN from alignments (PanGraph, GFA, Users can run workflow as: Building PanMAN from PanGraph Alignment - snakemake --use-conda --cores [num threads] --config RUNTYPE="pangraph" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] + snakemake --use-conda --cores [num threads] --config RUNTYPE="pangraph" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] ASSEM="None" REF="None" TARGET="None" Building PanMAN from PGGB Alignment - snakemake --use-conda --cores [num threads] --config RUNTYPE="gfa" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] + snakemake --use-conda --cores [num threads] --config RUNTYPE="gfa" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] ASSEM="None" REF="None" TARGET="None" Building PanMAN from MAFFT Alignment - snakemake --use-conda --cores [num threads] --config RUNTYPE="msa" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] + snakemake --use-conda --cores [num threads] --config RUNTYPE="msa" FASTA="[user_fa]" SEQ_COUNT=[haplotype_count] ASSEM="None" REF="None" TARGET="None" Building PanMAN from Fragmented Assembly - snakemake --use-conda --cores [num threads] --config ASSEM="frag" RUNTYPE="pangraph/gfa/msa" REF="[user_fa]" TARGET="[target.txt]" SEQ_COUNT=[haplotype_count] + snakemake --use-conda --cores [num threads] --config RUNTYPE="pangraph/gfa/msa" FASTA="None" SEQ_COUNT=[haplotype_count] ASSEM="frag" REF="[user_fa]" TARGET="[target.txt]" Note: This workflow uses MashTree to build tree input for panmanUtils when building panman from GFA or MSA. ''' @@ -20,6 +20,8 @@ def assem_rule(config): assembly_rule = config.get("ASSEM", None) if assembly_rule == "frag": return "output/input_concat.fa" + else: + return None def config_select(config): target_rule = config.get("RUNTYPE", None) @@ -47,7 +49,7 @@ def config_select(config): rule all: input: - assem_rule(config), + assem_rule(config) or [], config_select(config) rule wfmash: From b0b71e94c79c018a45916cbb1015f804435a24a2 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Fri, 10 Jan 2025 17:18:33 -0800 Subject: [PATCH 092/103] toUsher correctness --- src/panmanUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index 9d194a7..b54ec27 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -1422,7 +1422,7 @@ void parseAndExecute(int argc, char* argv[]) { } else if(globalVm.count("printRoot")) { printRoot(TG, globalVm, outputFile, buf); return; - } else if(globalVm.count("toUser")) { + } else if(globalVm.count("toUsher")) { toUsher(TG, globalVm); return; // } else if(globalVm.count("fasta-fast")){ From 32374e4057e83899cce77916b485810c787ad9f2 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Fri, 10 Jan 2025 18:13:49 -0800 Subject: [PATCH 093/103] updated wiki --- docs/index.md | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/docs/index.md b/docs/index.md index 64e25ef..10076c2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -38,7 +38,8 @@ PanMAN utilizes Google’s protocol buffer (protobuf, [https://protobuf.dev/](ht ### Video Tutorial TBA -## Installation Methods + +## panmanUtils Installation Methods ### Using installation script (requires sudo access) @@ -112,12 +113,12 @@ docker run -it panman cd /home/panman/build ./panmanUtils --help ``` - + ## PanMAN Construction Here, we will learn to build PanMAN from various input formats. -**Step 0:** The Steps below require panmanUtils, if not done so far, refer to [installation guide](install.md) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error +**Step 0:** The Steps below require panmanUtils, if not done so far, refer to [installation guide](#install) to install panmanUtils. To check if panmanUtils is properly installed or not, run the following command, and it should execute without error ```bash # enter into the panman directory (assuming $PANMAN directs to the panman repository directory) cd $PANMAN_HOME @@ -126,8 +127,8 @@ cd $PANMAN_HOME cd $PANMAN_HOME/build ./panmanUtils --help ``` -### Building PanMAN from PanGraph - +### Building PanMAN from Alignments (PanGraph/GFA/MSA) +#### Building PanMAN from PanGraph **Step 1:** Check if `sars_20.json` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom PanGraph (JSON) and tree topology (Newick format) files to build a panman. **Step 2:** Run panmanUtils with the following command to build a panman from PanGraph: @@ -138,7 +139,7 @@ cd $PANMAN_HOME/build ``` The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. -### Building PanMAN from GFA +#### Building PanMAN from GFA **Step 1:** Check if `sars_20.gfa` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom GFA and tree topology (Newick format) files to build a panman. @@ -150,7 +151,7 @@ cd $PANMAN_HOME/build ``` The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. -### Building PanMAN from MSA (FASTA format) +#### Building PanMAN from MSA (FASTA format) **Step 1:** Check if `sars_20.msa` and `sars_20.nwk` files exist in `test` directory. Alternatively, users can provide custom MSA (FASTA format) and tree topology (Newick format) files to build a panman. @@ -162,25 +163,36 @@ cd $PANMAN_HOME/build ``` The above command will run panmanUtils program and build `sars_20.panman` in `$PANMAN_HOME/build/panman` directory. -### Building PanMAN from raw genome sequences (Snakemake Workflow) -We provide a Snakemake workflow to construct PanMANs from raw sequences (FASTA format). +### Building PanMAN from raw genome sequences or fragment assemblies using Snakemake Workflow +We provide a Snakemake workflow to construct PanMANs from raw sequences (FASTA format) or from fragment assemblies. !!!Note - The Snakemake workflow uses various tools such as PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively and it is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](install.md)). + The Snakemake workflow uses various tools such as PanGraph tool, PGGB, MAFFT, and MashTree to build input PanGraph, GFA, MSA, and Tree topology files, respectively and it is particularly designed to be used in the docker container build from either the provided docker image or the DockerFile (instructions provided [here](#install)). +#### Building PanMAN from raw genome sequences **Step 1:** Run the following command to construct a panman from raw sequences. ```bash cd $PANMAN_HOME/workflows conda activate snakemake -snakemake --use-conda --cores [num threads] --config RUNTYPE="[pangraph/gfa/msa]" FASTA="[user_fasta]" SEQ_COUNT=[haplotype_count] +snakemake --use-conda --cores 8 --config RUNTYPE="pangraph/gfa/msa" FASTA="[user_input]" SEQ_COUNT="Number of sequences" ASSEM="NONE" REF="NONE" TARGET="NONE" +``` + +#### Building PanMAN from fragment assemblies +**Step 1:** Run the following command to construct a panman from fragment assemblies. + +```bash +cd $PANMAN_HOME/workflows +conda activate snakemake +snakemake --use-conda --cores 8 --config RUNTYPE="pangraph/gfa/msa" FASTA="None" SEQ_COUNT="Number of sequences" ASSEM="frag" REF="reference_file" TARGET="target.txt" ``` +Here, target.txt contains list of files that contains the fragmented assemblies. ## Exploring utilities in panmanUtils Here, we will learn to use exploit various functionalities provided in panmanUtils software for downstream applications in epidemiological, microbiological, metagenomic, ecological, and evolutionary studies. -**Step 0:** The Steps below require panmanUtils and a PanMAN. We provide a pre-built panman (`sars_20.panman`), othewise, refer to [installation guide](install.md) to install panmanUtils and [construction](construction.md) instructions to build a PanMAN. +**Step 0:** The Steps below require panmanUtils and a PanMAN. We provide a pre-built panman (`sars_20.panman`), othewise, refer to [installation guide](#install) to install panmanUtils and [construction](#construction) instructions to build a PanMAN. + +
VG
GFA
GBZ
PanGraph
UShER-MAT
tskit
PanMAN (This work)
Lossless Sequence Encoding
Genomic Variation / m-WGA
Phylogenetic Relationship
Single-nucleotide Substitutions
Small Indels
Structural Mutations
Complex Mutations
Mutations
\ No newline at end of file From 762f34c4557059d67449dc21c4f738fa48f134f7 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sat, 11 Jan 2025 03:57:02 -0800 Subject: [PATCH 100/103] Range query --- docs/index.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/index.md b/docs/index.md index 6052385..1cb22fb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -354,7 +354,7 @@ Extract a subnetwork from a given PanMAN and write it to a new PanMAN file based * Example ```bash cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --subnet --input-file=nodes.txt --output-file=ecoli_10_subnet +./panmanUtils -I panman/sars_20.panman --subnet --input-file=nodes.txt --output-file=sars_20_subnet ``` #### Annotate @@ -362,12 +362,12 @@ Annotate nodes in a PanMAN with a custom string, later searched by these annotat * Usage syntax ```bash -./panmanUtils -I --annotate --input-file= --output-file=ecoli_10_annotate +./panmanUtils -I --annotate --input-file= --output-file=sars_20_annotate ``` * Example ```bash cd $PANMAN_HOME/build -./panmanUtils -I panman/sars_20.panman --annotate --input-file=annotations.tsv --output-file=ecoli_10_annotate +./panmanUtils -I panman/sars_20.panman --annotate --input-file=annotations.tsv --output-file=sars_20_annotate ``` > **NOTE:** If output-file is not provided to panmanUtils, the annotated PanMAN will be written to the same file. @@ -384,6 +384,19 @@ cd $PANMAN_HOME/build ./panmanUtils -I panman/sars_20.panman --aa-translations --output_file=sars_20 ``` +#### Range Query +panmanUtils allow extracting alignment of all the sequences of a single PanMAT in a PanMAN (FASTA format) with respect to a user-defined reference sequence between positions [start:end] + +* Usage syntax +```bash +./panmanUtils -I --index no -x start -y end --reference= +``` +* Example +```bash +cd $PANMAN_HOME/build +./panmanUtils -I --index no -x 10 -y 100 --reference="Switzerland/SO-ETHZ-500145/2020|OU000199.2|2020-11-12" +``` + ### panmanUtils Interactive mode **Step 1:** Users can enter panmanUtils's interactive mode by passing input panman as input using the following command: From 931cf4ba806c677b29ec4129b47cb43ea0513af7 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Sat, 11 Jan 2025 14:28:39 -0800 Subject: [PATCH 101/103] updated DOI --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6d7dcb7..7ff4be5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ [license-badge]: https://img.shields.io/badge/License-MIT-yellow.svg [license-link]: [https://github.com/TurakhiaLab/panman/LICENSE](https://github.com/TurakhiaLab/panman/blob/main/LICENSE) [![License][license-badge]][license-link] -[![DOI](https://img.shields.io/badge/DOI-https://zenodo.org/records/12630607-beige)](https://zenodo.org/records/12630607) +[![DOI](https://img.shields.io/badge/DOI-https://zenodo.org/records/12630607-beige)](https://doi.org/10.5281/zenodo.14633185) [](https://hub.docker.com/r/swalia14/panman) [](https://doi.org/10.1101/2024.07.02.601807) [](https://cmake.org) From 38628f197c4d1ec53113902602c5ef7f47555cf8 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Wed, 15 Jan 2025 09:53:15 -0800 Subject: [PATCH 102/103] updated utils description --- src/panmanUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/panmanUtils.cpp b/src/panmanUtils.cpp index b54ec27..d99aeac 100644 --- a/src/panmanUtils.cpp +++ b/src/panmanUtils.cpp @@ -151,11 +151,11 @@ void setupOptionDescriptions() { ("reroot,r", "Reroot a PanMAT in a PanMAN based on the input sequence id (--reference)") ("aa-translation,v", "Extract amino acid translations in TSV file") ("extended-newick,e", "Print PanMAN's network in extended-newick format") - ("printMutations,p", "Create PanMAN with network of trees from single or multiple PanMAN files") + ("printMutations,p", "Print mutations from root to each node") ("acr,q", "ACR method [fitch(default), mppa]") ("index",po::value< bool >(0), "Generating indexes and print sequence (passed as reference) between x:y") // ("printRoot", "Print root sequence") - // ("printNodePaths", "Create PanMAN with network of trees from single or multiple PanMAN files") + // ("printNodePaths", "Print mutations from root to each node") ("toUsher", "Convert a PanMAT in PanMAN to Usher-MAT") // ("protobuf2capnp", "Converts a Google Protobuf PanMAN to Capn' Proto PanMAN") From f5486ffa2b2c877969c079c199daf6b69ff90330 Mon Sep 17 00:00:00 2001 From: sumit-walia Date: Wed, 15 Jan 2025 09:53:53 -0800 Subject: [PATCH 103/103] updated utils description --- src/summary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/summary.cpp b/src/summary.cpp index 71ae0fd..dd00314 100644 --- a/src/summary.cpp +++ b/src/summary.cpp @@ -220,7 +220,7 @@ struct VectorHash { void panmanUtils::Tree::getBlockMutationsParallel() { //insertions, deletions, inversions std::tuple muts = getBlockMutationsParallelHelper(root); - std::cout << "Total Block Insertoins: " << std::get<0>(muts) << std::endl; + std::cout << "Total Block Insertions: " << std::get<0>(muts) << std::endl; std::cout << "Total Block Deletions: " << std::get<1>(muts) << std::endl; std::cout << "Total Block Inversion: " << std::get<2>(muts) << std::endl;