This repository has been archived by the owner on Jul 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBayes_factor_comparison.py
executable file
·249 lines (191 loc) · 7.82 KB
/
Bayes_factor_comparison.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#!/usr/bin/env python
import os, sys
from optparse import OptionParser
import subprocess
import tempfile
##########################################
# Function to Get command line arguments #
##########################################
def main():
usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-e", "--exclude", action="store", dest="exclude", help="File containing list of MLE files to exclude (e.g. if they have failed to converge)", default="")
parser.add_option("-s", "--suffix", action="store", dest="suffix", help="Suffix for MLE files (used to identify correct files. [default = %default]", default=".MLE.log")
parser.add_option("-S", "--separator", action="store", dest="separator", help="Separator used to split repeat identifier suffix from file name prefix. [default = %default]", default="_")
parser.add_option("-n", "--nosplit", action="store_false", dest="split", help="Do not split read on separator.", default=True)
parser.add_option("-d", "--directory", action="store", dest="directory", help="Directory containing MLE files. Default is current working directory", default="")
#Could add more options in here so people can specify similarities etc.
return parser.parse_args()
################
# Main program #
################
if __name__ == "__main__":
#Get command line arguments
(options, args) = main()
if options.exclude!="":
if not os.path.isfile(options.exclude):
print >> sys.stderr, "Cannot find file", options.exclude
sys.exit()
else:
print options.exclude
if options.directory=="":
options.directory=os.getcwd()
elif not os.path.isdir(options.directory):
print >> sys.stderr, "Cannot find directory", options.directory
sys.exit()
mlefiles={}
for file in os.listdir(options.directory):
filename=options.directory+"/"+file
if os.path.isfile(filename) and filename[-1*len(options.suffix):]==options.suffix:
tail = subprocess.check_output(["tail", "-n", "1", filename])
words=tail.strip().split('\t')
if len(words)!=6:
print >> sys.stderr, "Expecting six columns in MLE file data. Found", len(words)
print >> sys.stderr, "Skipping", file
continue
try:
values=map(float,words)
except ValueError:
print >> sys.stderr, "Expecting all values in MLE file to be numeric."
print >> sys.stderr, "Skipping", file
continue
if values[4]!=0:
print >> sys.stderr, "Theta has not reached zero in", file
print >> sys.stderr, "Skipping", file
#continue
prefix=filename.rstrip(options.suffix)
if options.split:
if len(prefix.split(options.separator))<2:
print >> sys.stderr, "File", filename, "cannot be split by separator:", options.separator
print >> sys.stderr, "Using", prefix, "..."
rootname=prefix
else:
rootname=options.separator.join(prefix.split(options.separator)[:-1])
else:
rootname=prefix
if not rootname in mlefiles:
mlefiles[rootname]=[]
mlefiles[rootname].append(filename)
PS=[]
SS=[]
mlefilesort=mlefiles.keys()
mlefilesort.sort()
for mlefile in mlefilesort:
mlefiles[mlefile].sort()
tf = tempfile.NamedTemporaryFile(delete=False)
tfname=tf.name
print >> tf, '<beast>'
print >> tf, '\t<pathSamplingAnalysis fileName="'+' '.join(mlefiles[mlefile])+'">'
print >> tf, '\t\t<likelihoodColumn name="pathLikelihood.delta"/>'
print >> tf, '\t\t<thetaColumn name="pathLikelihood.theta"/>'
print >> tf, '\t</pathSamplingAnalysis>'
print >> tf, '</beast>'
tf.close()
print '<beast>'
print '\t<pathSamplingAnalysis fileName="'+' '.join(mlefiles[mlefile])+'">'
print '\t\t<likelihoodColumn name="pathLikelihood.delta"/>'
print '\t\t<thetaColumn name="pathLikelihood.theta"/>'
print '\t</pathSamplingAnalysis>'
print '</beast>'
try:
psbeast = subprocess.check_output(["beast", tfname], stderr=subprocess.STDOUT)
except subprocess.CalledProcessError, e:
print "\n\n!!!BEAST failed to run!!!\n"
print "Returncode =", e.returncode
print e.output
sys.exit()
for line in psbeast.split("\n"):
words=line.strip().split("=")
if len(words)==2 and words[0]=="log marginal likelihood (using path sampling) from pathLikelihood.delta ":
try:
PS.append([float(words[1]), mlefile.split("/")[-1]])
except ValueError:
print >> sys.stderr, "Could not read beast path sampling results output for", mlefile
os.unlink(tfname)
tf = tempfile.NamedTemporaryFile(delete=False)
tfname=tf.name
print >> tf, '<beast>'
print >> tf, '\t<steppingStoneSamplingAnalysis fileName="'+' '.join(mlefiles[mlefile])+'">'
print >> tf, '\t\t<likelihoodColumn name="pathLikelihood.delta"/>'
print >> tf, '\t\t<thetaColumn name="pathLikelihood.theta"/>'
print >> tf, '\t</steppingStoneSamplingAnalysis>'
print >> tf, '</beast>'
tf.close()
try:
ssbeast = subprocess.check_output(["beast", tfname], stderr=subprocess.STDOUT)
except subprocess.CalledProcessError:
print "\n\n!!!BEAST failed to run!!!\n"
sys.exit()
for line in ssbeast.split("\n"):
words=line.strip().split("=")
if len(words)==2 and words[0]=="log marginal likelihood (using stepping stone sampling) from pathLikelihood.delta ":
try:
SS.append([float(words[1]), mlefile.split("/")[-1]])
except ValueError:
print >> sys.stderr, "Could not read beast stepping stones results output for", mlefile
os.unlink(tfname)
PS.sort()
PS.reverse()
SS.sort()
SS.reverse()
foundfirst=False
firstvalue=0
print "\n\n=====================\nPath Sampling Results\n=====================\n"
print '\t'.join(["Prefix", "log MLE", "log BF", "Strength of Evidence (Kass & Raftery, 1995)"])
for PSvalue in PS:
isfirst=False
if not foundfirst and PSvalue[0]<0:
foundfirst=True
firstvalue=PSvalue[0]
isfirst=True
BF=firstvalue-PSvalue[0]
comment=""
if BF>0 and BF<1:
comment="Not worth more than a bare mention"
elif BF<3:
comment="Positive"
elif BF<5:
comment="Strong"
elif BF>0:
comment="Very strong"
else:
print >> sys.stderr, "Error, log BF is negative!"
if isfirst:
print '\t'.join(map(str, [PSvalue[1], PSvalue[0], "-", "-"]))
elif foundfirst:
print '\t'.join(map(str, [PSvalue[1], PSvalue[0], firstvalue-PSvalue[0], comment]))
else:
print '\t'.join(map(str, [PSvalue[1], PSvalue[0], "-", "log MLE is positive, analysis may be using improper priors"]))
foundfirst=False
firstvalue=0
print "\n\n===============================\nStepping Stone Sampling Results\n===============================\n"
print '\t'.join(["Prefix", "log MLE", "log BF", "Strength of Evidence (Kass & Raftery, 1995)"])
for SSvalue in SS:
isfirst=False
if not foundfirst and SSvalue[0]<0:
foundfirst=True
firstvalue=SSvalue[0]
isfirst=True
BF=firstvalue-SSvalue[0]
comment=""
if BF>0 and BF<1:
comment="Not worth more than a bare mention"
elif BF<3:
comment="Positive"
elif BF<5:
comment="Strong"
elif BF>0:
comment="Very strong"
else:
print >> sys.stderr, "Error, log BF is negative!"
if isfirst:
print '\t'.join(map(str, [SSvalue[1], SSvalue[0], "-", "-"]))
elif foundfirst:
print '\t'.join(map(str, [SSvalue[1], SSvalue[0], firstvalue-SSvalue[0], comment]))
else:
print '\t'.join(map(str, [SSvalue[1], SSvalue[0], "-", "log MLE is positive, analysis may be using improper priors"]))
print "\n\n"
print """If you use the results of this analysis in a paper, please cite:
Baele G, Lemey P, Bedford T, Rambaut A, Suchard MA and Alekseyenko AV (2012) 'Improving the accuracy of demographic and molecular clock model comparison while accommodating phylogenetic uncertainty' Molecular Biology and Evolution 29(9):2157-2167
and
Baele G, Li WLS, Drummond AJ, Suchard MA and Lemey P (2013) 'Accurate model selection of relaxed molecular clocks in Bayesian phylogenetics' Molecular Biology and Evolution 30(2):239-243 """