-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathsplitterToBreakpointExt.bak
executable file
·114 lines (106 loc) · 5.61 KB
/
splitterToBreakpointExt.bak
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/python
import sys
from optparse import OptionParser
import os
def splitterToBreakpoint(contigFile,splitReadSlop,localRange):
#Define whether event is inv, del, or dup (classifying translocations as if they were local intrachromosomal events)
if contigFile == "stdin":
contigs = sys.stdin
else:
contigs = open(str(contigFile))
for line in contigs:
split = line.split()
if (split[0] > split[3]) or ((split[0] == split[3]) and (split[1] > split[4])):
chrom1 = split[3]
start1 = int(split[4])
end1 = int(split[5])
chrom2 = split[0]
start2 = int(split[1])
end2 = int(split[2])
ID = split[6]
score = int(split[7])
strand1 = split[9]
strand2 = split[8]
queryStart1 = int(split[12])
queryEnd1 = int(split[13])
queryStart2 = int(split[10])
queryEnd2 = int(split[11])
minNonOverlap = int(split[14])
queryLength = int(split[15])
if len(split) > 16: qualScores = split[16]
else: qualScores = '*'
else:
chrom1 = split[0]
start1 = int(split[1])
end1 = int(split[2])
chrom2 = split[3]
start2 = int(split[4])
end2 = int(split[5])
ID = split[6]
score = int(split[7])
strand1 = split[8]
strand2 = split[9]
queryStart1 = int(split[10])
queryEnd1 = int(split[11])
queryStart2 = int(split[12])
queryEnd2 = int(split[13])
minNonOverlap = int(split[14])
queryLength = int(split[15])
if len(split) > 16: qualScores = split[16]
else: qualScores = '*'
if strand1 != strand2: #Classifying Inversions
if chrom1 == chrom2 and (end2 - start1) <= localRange:
variant = "local_inversion"
else:
variant = "distant_inversion"
elif ((strand1=="+" and strand2=="+" and queryStart1 < queryStart2) or (strand1=="-" and strand2=="-" and queryStart1 > queryStart2)): #Classifying Deletions
if chrom1 == chrom2 and (end2 - start1) <= localRange:
variant = "local_deletion"
else:
variant = "distant_deletion"
elif ((strand1=="+" and strand2=="+" and queryStart1 > queryStart2) or (strand1=="-" and strand2=="-" and queryStart1 < queryStart2)): #Classifying Tandem Duplications
if chrom1 == chrom2 and (end2 - start1) <= localRange:
variant = "local_duplication"
else:
variant = "distant_duplication"
else:
print "ERROR: variant " + ID + " is lost in variant classification step. Contact Mitchell"
if variant == "local_inversion" or variant == "distant_inversion":
if ((queryStart1 < queryStart2) and (strand1 == "-") and (strand2 == "+")) or ((queryStart1 > queryStart2) and (strand1 == "+") and (strand2 == "-")):
breakStart1 = str(start1-splitReadSlop)
breakEnd1 = str(start1+splitReadSlop)
breakStart2 = str(start2-splitReadSlop)
breakEnd2 = str(start2+splitReadSlop)
elif ((queryStart1 < queryStart2) and (strand1 == "+") and (strand2 == "-")) or ((queryStart1 > queryStart2) and (strand1 == "-") and (strand2 == "+")):
breakStart1 = str(end1-splitReadSlop)
breakEnd1 = str(end1+splitReadSlop)
breakStart2 = str(end2-splitReadSlop)
breakEnd2 = str(end2+splitReadSlop)
elif variant == "local_deletion" or variant == "distant_deletion":
breakStart1 = str(end1-splitReadSlop)
breakEnd1 = str(end1+splitReadSlop)
breakStart2 = str(start2-splitReadSlop)
breakEnd2 = str(start2+splitReadSlop)
elif variant == "local_duplication" or variant == "distant_duplication":
breakStart1 = str(start1-splitReadSlop)
breakEnd1 = str(start1+splitReadSlop)
breakStart2 = str(end2-splitReadSlop)
breakEnd2 = str(end2+splitReadSlop)
else:
print "ERROR: " + ID + " is lost in breakpoint locus prediction step. Contact Mitchell"
print '\t'.join(map(str, [chrom1, breakStart1, breakEnd1, chrom2, breakStart2, breakEnd2, ID, score, strand1, strand2, queryStart1, queryEnd1, queryStart2, queryEnd2, minNonOverlap, queryLength, qualScores, variant]))
def main():
usage = "%prog -b <bedpe> -c <contigFile.bedpe> [options]\nVersion: 0.1\nAuthor: Mitchell L. Leibowitz\nEdited: 25 Aug, 2010\n\n\
Note: Add slop at your own risk.\n\nPrints modified bedpe so bedpe entry 1 is always less than bedpe entry 2"
parser = OptionParser(usage)
parser.add_option("-i", dest="contigFile", metavar="FILE", help="BEDPE file containing the split contigs; requires 10 column bedpe plus 11-14 as query coordinates")
parser.add_option("-s", dest="splitReadSlop", metavar="INT", type="int", default=1, help="Bidirectional slop around the breakpoint [default = 1]")
parser.add_option("-r", dest="localRange", default=1000000, type="int", metavar="INT", help="the range of coordinates considered local (for breakpoint classification) [default = 1000000]; Calculated by subtracting field 6 from field 2.")
(opts, args) = parser.parse_args()
if opts.contigFile is None:
parser.print_help()
print
else:
splitterToBreakpoint(opts.contigFile, opts.splitReadSlop, opts.localRange)
if __name__ == "__main__":
sys.exit(main())