-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbioInfoSupplementValidation.tex
165 lines (133 loc) · 6.39 KB
/
bioInfoSupplementValidation.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
\input{supplementReset.tex}
\begin{center}
\textbf{\large Supplemental Materials of the Deconvolution Method Validation}
\end{center}
\section{Method validation on lab controlled strains} \label{sup:sec:validate}
\subsection{Use inference method to reconstruct the reference strains}
\subsubsection{Infer haplotypes for Dd2 strain}
Since 3D7 is reference strain, we can assume that strain Dd2 is the only source of `ALT' reads in samples {\textmd PG0389-C}, {\textmd PG0390-C}, {\textmd PG0391-C}, {\textmd PG0392-C}, {\textmd PG0393-C} and {\textmd PG0394-C}. Assume markers are independent from each other, let $y$ be the read count for `ALT' allele and $x$ be the weighted coverage, of which the weight are the proportions that are used during the mixing (see Table~2\todo{Reference from main paper, eyes on number changing}), we use the following regression model to infer the Dd2 variant calling, $$y = \beta_0 + \beta_{Dd2} x,$$
from which significant coefficent $\beta_{Dd2}$ implies a Dd2 variant (Fig.~\ref{fig:dd2_gt1}).
\subsubsection{Infer haplotypes for HB3 and 7G8}
Similarly, for sample from {\textmd PG0398-C} to {\textmd PG0415-C}, we let variables $x_1$, $x_2$ be the weighted coverages, of which the weights are the mixing proportions for strains HB3 and 7G8 respectively. We use regression model $y = \beta_0 + \beta_{Hb3} x_1 + \beta_{7G8} x_2$ to investigate the relationships between the total allele count and weighted coverage of HB3 and 7G8. Hb3 variant is inferred as coefficients $\beta_{Hb3}$ is significant (Fig.~\ref{fig:hb3:7g8:both} and \ref{fig:hb3}), so is 7G8 (Fig.~\ref{fig:hb3:7g8:both} and \ref{fig:7g8}).
\begin{figure}[hp]
\subfloat[]{\label{fig:dd2_gt0}
\includegraphics[width=0.5\textwidth]{validation/dd2marker709668.png}
}
\subfloat[]{\label{fig:dd2_gt1}
\includegraphics[width=0.5\textwidth]{validation/dd2marker281734.png}
}
\caption{\textcolor{red}{XXXXXXXXXXXXXX}}
\end{figure}
\begin{figure}[hp]
\subfloat[]{\label{fig:hb3:7g8:both}
\includegraphics[width=0.33\textwidth]{validation/marker73802.png}
}
\subfloat[]{\label{fig:hb3}
\includegraphics[width=0.33\textwidth]{validation/marker518472.png}
}
\subfloat[]{\label{fig:7g8}
\includegraphics[width=0.33\textwidth]{validation/marker128657.png}
}
\caption{\textcolor{red}{XXXXXXXXXXXXXX}}
\end{figure}
\subsection{Validation performance}
\subsubsection{Assessing quality of the proportion inference}
\textcolor{red}{XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX}
\begin{figure}[htp]
\centering
\subfloat[][{\textmd PG402-C} low coverage data decovolution with panel V.]{
\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample20.lab.errorVsTotalCoverage}.png}
}
\subfloat[][{\textmd PG402-C} high coverage data decovolution with panel V.]{
\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample100.lab.errorVsTotalCoverage}.png}
}\\
\subfloat[][]{
\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample50.asiaAfirca.errorVsTotalCoverage}.png}
}
\subfloat[][]{
\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample100.asiaAfirca.errorVsTotalCoverage}.png}
}\\
\caption{\color{red} to be done}
\end{figure}
%\begin{figure}[htp]
%\centering
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample20.asiaAfirca.errorVsTotalCoverage}.png}
%}
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample50.asiaAfirca.errorVsTotalCoverage}.png}
%}\\
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample80.asiaAfirca.errorVsTotalCoverage}.png}
%}
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample100.asiaAfirca.errorVsTotalCoverage}.png}
%}\\
%\caption{PG0402-C asiaAfrica}
%\end{figure}
%\begin{figure}[htp]
%\centering
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample20.lab.errorVsTotalCoverage}.png}
%}
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample50.lab.errorVsTotalCoverage}.png}
%}\\
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample80.lab.errorVsTotalCoverage}.png}
%}
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0402-C.subSample100.lab.errorVsTotalCoverage}.png}
%}\\
%\caption{PG0402-C lab}
%\end{figure}
%\begin{figure}[htp]
%\centering
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample20.asiaAfirca.errorVsTotalCoverage}.png}
%}
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample50.asiaAfirca.errorVsTotalCoverage}.png}
%}\\
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample80.asiaAfirca.errorVsTotalCoverage}.png}
%}
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample100.asiaAfirca.errorVsTotalCoverage}.png}
%}\\
%\caption{PG0402-C asiaAfrica}
%\end{figure}
%\begin{figure}[htp]
%\centering
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample20.lab.errorVsTotalCoverage}.png}
%}
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample50.lab.errorVsTotalCoverage}.png}
%}\\
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample80.lab.errorVsTotalCoverage}.png}
%}
%\subfloat[][]{
%\includegraphics[width=.5\textwidth]{{subSamples/PG0406-C.subSample100.lab.errorVsTotalCoverage}.png}
%}\\
%\caption{PG0406-C asiaAfrica}
%\end{figure}
\begin{thebibliography}{}
\bibitem[\protect\citeauthoryear{Miles, Iqbal, Vauterin, Pearson, Campino,
Theron, Gould, Mead, Drury, O{\textquoteright}Brien, Ruano~Rubio, MacInnis,
Mwangi, Samarakoon, Ranford-Cartwright, Ferdig, Hayton, Su, Wellems, Rayner,
McVean, and Kwiatkowski}{Miles et~al.}{2015}]{Miles2015:sup}
Miles, A., Z.~Iqbal, P.~Vauterin, R.~Pearson, S.~Campino, M.~Theron, K.~Gould,
D.~Mead, E.~Drury, J.~O{\textquoteright}Brien, V.~Ruano~Rubio, B.~MacInnis,
J.~Mwangi, U.~Samarakoon, L.~Ranford-Cartwright, M.~Ferdig, K.~Hayton, X.~Su,
T.~Wellems, J.~Rayner, G.~McVean, and D.~Kwiatkowski (2015).
\newblock Genome variation and meiotic recombination in plasmodium falciparum:
insights from deep sequencing of genetic crosses.
\newblock {\em bioRxiv\/}.
\bibitem[\protect\citeauthoryear{Wendler}{Wendler}{2015}]{Wendler2015:sup}
Wendler, J. (2015).
\newblock {\em Accessing complex genomic variation in {P}lasmodium falciparum
natural infection}.
\newblock Ph.\ D. thesis, University of Oxford.
\end{thebibliography}