-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path07_MetaScore-HADDOCK_CombinedScores.py
120 lines (81 loc) · 2.91 KB
/
07_MetaScore-HADDOCK_CombinedScores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Ex of execution: python 07_ML+HADDOCK_CombinedScores.py /gpfs/home/yuj114/group/YJ_DOCK_PP/RFBased_ScoringFunction/Score_Comparison/HADDOCK-Val/108-fold/10_2_108-foldCV/
# in Biostar server
import glob
import sys
import os
import numpy as np
def calculate_rank(vector):
a = {}
rank = 1
for num in sorted(vector):
if num not in a:
a[num] = rank
rank += 1
return[a[i] for i in vector]
para = sys.argv[1] #'/gpfs/home/yuj114/group/YJ_DOCK_PP/RFBased_ScoringFunction/Score_Comparison/HADDOCK-Val/108-fold/10_2_108-foldCV/'
outputDIR1 = para + 'ML+HADDOCK_CombinedScores/'
os.system('rm -rf ' + outputDIR1)
os.system('mkdir ' + outputDIR1)
sc_list = glob.glob(para + '*.comp')
for sc in sc_list:
fw = open(outputDIR1 + sc.split('/')[-1], 'w')
print >>fw, "Decoy" + '\t' + "Class" + '\t' + "i-RMSD" + '\t' + "ML+HADDOCK_Combined" + '\t' + "HADDOCK-Score" + '\t' + "Rank_i-RMSD" + '\t' + "Rank-ML+HADDOCK" + '\t' + "Rank-HADDOCK"
if sc.split('/')[-1].split('.')[0] in ['1ZM4', '2OT3', '1GXD', '2G77']:
print "No decoys. Pass!"
continue
fr = open(sc, 'r')
lines = fr.read().split('\n')
fr.close()
rf_list = []
hd_list = []
for i in range(1, len(lines) - 1):
rf_list.append(float(lines[i].split('\t')[3]))
hd_list.append(float(lines[i].split('\t')[4]))
elements = np.array(hd_list)
mean = np.mean(elements, axis = 0)
sd = np.std(elements, axis = 0)
s = ''
num = 0
new_hd_list = []
for hd in hd_list:
if hd > mean + 2 * sd:
s = sc.split('/')[-1]
num += 1
else:
new_hd_list.append(hd)
if num != 0:
print s.split('.')[0] + '\t' + str(num)
rf_min = min(rf_list)
rf_max = max(rf_list)
hd_min = min(new_hd_list)
hd_max = max(new_hd_list)
rank_RFnHD_list = []
for i in range(1, len(lines) - 1):
rf_i = float(lines[i].split('\t')[3])
hd_i = float(lines[i].split('\t')[4])
if hd_i > mean + 2 * sd:
hd_i = hd_max
try:
norm_rf = (rf_i - rf_min) / (rf_max - rf_min)
except ZeroDivisionError:
norm_rf = 1
try:
norm_hd = (hd_i - hd_min) / (hd_max - hd_min)
except ZeroDivisionError:
norm_hd = 1
rank_RFnHD_list.append((norm_rf + norm_hd) / 2)
ranked_RFnHD_list = calculate_rank(rank_RFnHD_list)
for i in range(1, len(lines) - 1):
rf_i = float(lines[i].split('\t')[3])
hd_i = float(lines[i].split('\t')[4])
if hd_i > mean + 2 * sd:
hd_i = hd_max
try:
norm_rf = (rf_i - rf_min) / (rf_max - rf_min)
except ZeroDivisionError:
norm_rf = 1
try:
norm_hd = (hd_i - hd_min) / (hd_max - hd_min)
except ZeroDivisionError:
norm_hd = 1
print >>fw, lines[i].split('\t')[0] + '\t' + lines[i].split('\t')[1] + '\t' + lines[i].split('\t')[2] + '\t' + str((norm_rf + norm_hd) / 2) + '\t' + lines[i].split('\t')[4] + '\t' + lines[i].split('\t')[5] + '\t' + str(ranked_RFnHD_list[i - 1]) + '\t' + lines[i].split('\t')[7]