Skip to content

Commit

Permalink
update to have proper binary representation
Browse files Browse the repository at this point in the history
  • Loading branch information
vasudeva8 committed Dec 6, 2024
1 parent 5177190 commit b833e31
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 10 deletions.
5 changes: 4 additions & 1 deletion htslib/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1617,12 +1617,15 @@ static inline int bcf_format_gt1(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isamp
need to specify explicitly */
e |= (ploidy > 1 && anyunphased) ?
(kputc('|', &tmp2) < 0) :
(ploidy <= 1 && !((val0 >> 1)) ? //|. needs explicit o/p
(kputc('|', &tmp2) < 0) :
0);
0;
} else {
/* 1st allele is unphased, if ploidy is = 1 or allele is '.' or
ploidy > 1 and no other unphased allele exist, need to specify
explicitly */
e |= ((ploidy <= 1) || (ploidy > 1 && !anyunphased)) ?
e |= ((ploidy <= 1 && val0 != 0) || (ploidy > 1 && !anyunphased)) ?
(kputc('/', &tmp2) < 0) :
0;
}
Expand Down
14 changes: 11 additions & 3 deletions test/vcf44_1.expected
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
##fileformat=VCFv4.4
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=1,length=1000>
##reference=file://test
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##failue="test file on explicit and implicit phasing markers in 4.4"
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
Expand All @@ -22,6 +23,13 @@
1 61494 rs56992752 T A 100 PASS . GT /0|0 1/0
1 61495 rs56992752 T A 100 PASS . GT 0|0 |1/0
1 61496 rs56992752 T A 100 PASS . GT . .
1 61497 rs56992752 T A 100 PASS . GT ./1 .|1
1 61498 rs56992752 T A 100 PASS . GT 1/. 1|.
1 61499 rs56992752 T A 100 PASS . GT ./. .|.
1 61497 rs56992752 T A 100 PASS . GT . |.
1 61498 rs56992752 T A 100 PASS . GT ./1 .|1
1 61499 rs56992752 T A 100 PASS . GT ./1 .|1
1 61500 rs56992752 T A 100 PASS . GT |./1 /.|1
1 61501 rs56992752 T A 100 PASS . GT 1/. 1|.
1 61502 rs56992752 T A 100 PASS . GT 1/. /1|.
1 61503 rs56992752 T A 100 PASS . GT |1/. 1|.
1 61504 rs56992752 T A 100 PASS . GT ./. .|.
1 61505 rs56992752 T A 100 PASS . GT ./. .|.
1 61506 rs56992752 T A 100 PASS . GT |./. /.|.
14 changes: 11 additions & 3 deletions test/vcf44_1.vcf
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
##fileformat=VCFv4.4
##contig=<ID=1,length=1000>
##reference=file://test
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##failue="test file on explicit and implicit phasing markers in 4.4"
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097
Expand All @@ -21,6 +22,13 @@
1 61494 rs56992752 T A 100 PASS . GT /0|0 1/0
1 61495 rs56992752 T A 100 PASS . GT 0|0 |1/0
1 61496 rs56992752 T A 100 PASS . GT . .
1 61497 rs56992752 T A 100 PASS . GT ./1 .|1
1 61498 rs56992752 T A 100 PASS . GT 1/. 1|.
1 61499 rs56992752 T A 100 PASS . GT ./. .|.
1 61497 rs56992752 T A 100 PASS . GT /. |.
1 61498 rs56992752 T A 100 PASS . GT ./1 .|1
1 61499 rs56992752 T A 100 PASS . GT /./1 |.|1
1 61500 rs56992752 T A 100 PASS . GT |./1 /.|1
1 61501 rs56992752 T A 100 PASS . GT 1/. 1|.
1 61502 rs56992752 T A 100 PASS . GT /1/. /1|.
1 61503 rs56992752 T A 100 PASS . GT |1/. |1|.
1 61504 rs56992752 T A 100 PASS . GT ./. .|.
1 61505 rs56992752 T A 100 PASS . GT /./. |.|.
1 61506 rs56992752 T A 100 PASS . GT |./. /.|.
11 changes: 8 additions & 3 deletions vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3111,7 +3111,7 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
uint32_t unreadable = 0;
uint32_t max = 0;
int overflow = 0, ploidy = 0, anyunphased = 0, \
phasingprfx = 0;
phasingprfx = 0, unknown1 = 0;

/* with prefixed phasing, it is explicitly given for 1st one
with non-prefixed, set based on ploidy and phasing of other
Expand All @@ -3126,6 +3126,9 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
ploidy++;
if (*t == '.') {
++t, x[l++] = is_phased;
if (l==1) { //for 1st allele only
unknown1 = 1;
}
} else {
const char *tt = t;
uint32_t val;
Expand All @@ -3151,9 +3154,11 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
/* no explicit phasing for 1st allele, set based on
other alleles and ploidy */
if (ploidy == 1) { //implicitly phased
x[0]|= 1;
if (!unknown1) {
x[0] |= 1;
}
} else { //set by other unphased alleles
x[0] |= anyunphased ? 0 : 1;
x[0] |= (anyunphased)? 0 : 1;
}
}
// Possibly check max against v->n_allele instead?
Expand Down

0 comments on commit b833e31

Please sign in to comment.