-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclass.h
105 lines (86 loc) · 3.28 KB
/
class.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/*
* =====================================================================================
*
* Filename: class.h
*
* Description: the definitions of some classes.
*
* Version: 1.0
* Created: 02/19/2014 10:21:42 PM
* Revision: none
* Compiler: g++
*
* Author: Xinyun Ma
* Organization: Tsinghua University
*
* =====================================================================================
*/
#ifndef CLASS_H_INCLUDED
#define CLASS_H_INCLUDED
#include <string>
#include <vector>
#include "const.h"
using namespace std;
struct isoform_anno{
public:
string gene_name;
string name;
string chrom;
string strand;
_chr_coor tx_start;
_chr_coor tx_end;
_chr_coor cds_start;
_chr_coor cds_end;
_chr_coor exon_cnt;
vector<_chr_coor> exon_starts;
vector<_chr_coor> exon_ends;
isoform_anno();
};
struct gene_info{
public:
int iso_num;
int exon_num;
//here, one elem in vector represent one isoform
string gene_name;
vector<string> iso_name; //maybe there are multiple isoforms from one gene
string chrom;
vector<string> iso_chrom; //which chromosomes are the isoforms from.
//maybe different isoform comes from different chromosomes, which is invalid here.
string strand;
vector<string> iso_strand; //which chromosomes are the isoforms from.
//maybe different isoform comes from different chromosomes, which is invalid here.
//it's nothing with strand, only represent the pos on chromosome.
// If necessary, it can be easily got from exon_start_g and exon_end_g
_chr_coor g_start;
_chr_coor g_end;
vector<vector<int> > exon_iso_idx;
//here, one elem in vector represent one exon
vector<_chr_coor> exon_start_g; // start position, coordinate on gene
vector<_chr_coor> exon_end_g; // end position, coordinate on gene
vector<_chr_coor> exon_len;
vector<_chr_coor> exon_g_start_l; //original name: exonGeneLocalStarts_fromLeft. Count from 0, record the total length from start before this exon starts!
vector<_chr_coor> exon_g_start_r; //original name: exonGeneLocalStarts_fromRight.Count from 0, record the total length from end before this exon starts!
//this boundary is for: binary search when dealing with read cnt.
vector<_chr_coor> exon_g_bound;
_chr_coor g_len;
int tot_rd_cnt; //total read counts
vector<int> rd_cnt;
vector<double> GBC;
vector<double> LBC;
vector<double> a;//0-1 matrix
vector<double> c;//float matrix. used in expression estimation
//it can be 0-1 matrix or GBC or LBC or the mixture of GBC or LBC
vector<_chr_coor> iso_len;
vector<double> theta;
int is_valid;
//following are some constructor functions.
gene_info();
gene_info(const gene_info& g);
gene_info(const vector<isoform_anno>& iso_vec);
//following are some other member function
int if_valid(); //judge whether the gene is valid. If one gene is invalid, may be it is because:
//1: there are 0 reads mapped to the gene
};
//the following work is to change the return type from bool to int, different return value represent different error type
bool if_gene_anno_valid(const gene_info& gene);
#endif // CLASS_H_INCLUDED