-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathfasta.hpp
98 lines (82 loc) · 3.03 KB
/
fasta.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/**
* Copyright 2018 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef VCF_VALIDATOR_FASTA_HPP
#define VCF_VALIDATOR_FASTA_HPP
#include <fstream>
#include <memory>
#include <string>
#include <boost/iostreams/filtering_stream.hpp>
#include "bioio/bioio.hpp"
namespace ebi
{
namespace vcf
{
namespace fasta
{
class IFasta
{
public:
/**
* Extract n base pairs from the contig starting from an offset
* @param contig - the name of the contig to extract the sequence from
* @param pos - the starting offset position
* @param length - the number of base pairs to be extracted
* @return the sequence string, empty if nothing can be extracted.
*/
virtual std::string sequence(const std::string& contig, const size_t pos, const size_t length) = 0;
/**
* Check if a contig exists in a FASTA.
* @param contig - the name of the contig
* @return true if the contig exists in the FASTA, false if the contig is not found.
*/
virtual bool sequence_exists(const std::string &contig) const = 0;
/**
* Get the length of the sequence for a contig
* @param contig - the name of the contig
* @return the length of the sequence for the contig. 0 if the contig is not found.
*/
virtual size_t sequence_length(const std::string &contig) const = 0;
virtual ~IFasta(){}
};
class FileBasedFasta : public IFasta
{
public:
FileBasedFasta(const std::string& fasta_path, const std::string& fasta_index_path);
virtual ~FileBasedFasta(){}
std::string sequence(const std::string& contig, const size_t pos, const size_t length);
bool sequence_exists(const std::string &contig) const;
size_t sequence_length(const std::string &contig) const;
private:
FileBasedFasta(){}
std::ifstream fasta_input;
bioio::FastaIndex fasta_index;
};
class ContigFromENA;
class RemoteContig : public IFasta
{
public:
RemoteContig(){}
virtual ~RemoteContig(){}
std::string sequence(const std::string& contig, const size_t pos, const size_t length);
bool sequence_exists(const std::string &contig) const;
size_t sequence_length(const std::string &contig) const;
private:
std::unordered_map<std::string, std::shared_ptr<ContigFromENA>> contigs;
};
}
}
}
#endif //VCF_VALIDATOR_FASTA_HPP