-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFirstCandidatesList.py
89 lines (76 loc) · 2.79 KB
/
FirstCandidatesList.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python
import os
import re
import argparse
"""
Louis Chauviere
03/11/2016
Inserm UMR-S 1155
Script that will filter an Annotated Annovar file that describe genotypes from a trio (Father, Mother, Child).
It will return variants that can be considered as Compound Heterozygous in Annotated Annovar file.
"""
def main():
inputfile, outputfile = commandLine()
fich = open(inputfile,'r')
out = open(outputfile, 'w')
for lines in fich:
matchHeader = re.search(r'Func.refGene', lines)
if matchHeader:
out.write(lines)
else:
line = lines.split("\t")
genoChild = line[65].split(":")[0]
genoFather = line[63].split(":")[0]
genoMother = line[64].split(":")[0]
matchBarre = re.search(r'\|', genoChild)
if matchBarre:
homoChild = homoOrNot("|", genoChild)
orNotGenoFather = noGeno("|", genoFather)
orNotGenoMother = noGeno("|", genoMother)
else:
homoChild = homoOrNot("/", genoChild)
orNotGenoFather = noGeno("/", genoFather)
orNotGenoMother = noGeno("/", genoMother)
#Interesting variant must not be homozygous in child
if not homoChild:
#Qphred > 30 and coverage > 20
if quality(65, line):
#The alternative variant must be present in Mother or Father but not in both parents
if not orNotGenoFather and orNotGenoMother and quality(63, line):
out.write(lines)
elif not orNotGenoMother and orNotGenoFather and quality(64, line):
out.write(lines)
out.close()
fich.close()
#Will return True if a variant is homozygous reference for an individual
def noGeno(symbol, geno):
if geno == "0" + symbol + "0" or geno == "." + symbol + ".":
return True
else:
return False
#Will return True if a variant is homozygous for an individual
def homoOrNot(symbol, geno):
print geno + " " + symbol
if geno.split(symbol)[0] == geno.split(symbol)[1]:
return True
else:
return False
#Qphred >= 30 and coverage >= 20
def quality(nb, line):
if int(line[nb].split(":")[2]) >= 30 and int(line[nb].split(":")[3]) >= 20:
return True
else:
return False
#Use argparse for the command line
def commandLine():
parser = argparse.ArgumentParser()
parser.add_argument('inputfile', action='store', help='Store the input file pathway : Annovar annotated file with in order Father, Mother, Child')
parser.add_argument('outputfile', action='store', help='Store path for the outputfile : same annotated file, but after filtering')
parser.add_argument("-d", "--description", help="Script description", action="store_true")
results = parser.parse_args()
if results.description:
print "Script that will filter an Annotated Annovar file that describe genotypes from a trio (Father, Mother, Child)."
print "It will return variants that can be considered as Compound Heterozygous"
return results.inputfile, results.outputfile
if __name__ == '__main__':
main()