Skip to content

Commit

Permalink
Add initial support for VCFv4.4 / VCFv4.5 Number= fields
Browse files Browse the repository at this point in the history
Adds BCF_VL_* types for Number= P, LA, LG, LR and M

Upgrades bcf_hdr_register_hrec() so that it sets the appropriate
value when it finds one of the ne types in a FORMAT header line.

Only set number type BCF_VL_FIXED if the value can actually be
parsed as an integer.  Unknown types will now cause a warning
to be printed and will be treated as BCF_VL_VAR.

Adds doxygen style comments to describe the meaning of the various
BCF_VL_* values.
  • Loading branch information
daviesrob committed Jan 16, 2025
1 parent 329e794 commit b1709a0
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
20 changes: 15 additions & 5 deletions htslib/vcf.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,21 @@ extern "C" {
#define BCF_HT_STR 3
#define BCF_HT_LONG (BCF_HT_INT | 0x100) // BCF_HT_INT, but for int64_t values; VCF only!

#define BCF_VL_FIXED 0 // variable length
#define BCF_VL_VAR 1
#define BCF_VL_A 2
#define BCF_VL_G 3
#define BCF_VL_R 4
// Values for INFO / FORMAT "Number=" fields
#define BCF_VL_FIXED 0 ///< Integer defining a fixed number of items
#define BCF_VL_VAR 1 ///< Generic variable length ("Number=.")
#define BCF_VL_A 2 ///< One value per alternate allele
#define BCF_VL_G 3 ///< One value for each possible genotype
#define BCF_VL_R 4 ///< One value for each allele, including the reference

// The following was introduced in VCFv4.4 and is only valid for FORMAT header lines
#define BCF_VL_P 5 ///< One value for each allele value defined in GT

// The following were introduced in VCFv4.5 and are only valid for FORMAT header lines
#define BCF_VL_LA 6 ///< As BCF_VL_A, but only alt alleles listed in LAA are considered present
#define BCF_VL_LG 7 ///< As BCF_VL_G, but only alt alleles listed in LAA are considered present
#define BCF_VL_LR 8 ///< As BCF_VL_R, but only alt alleles listed in LAA are considered present
#define BCF_VL_M 9 ///< One value for each possible base modification for the corresponding ChEBI ID

/* === Dictionary ===
Expand Down
12 changes: 9 additions & 3 deletions vcf.c
Original file line number Diff line number Diff line change
Expand Up @@ -846,14 +846,20 @@ static int bcf_hdr_register_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec)
}
else if ( !strcmp(hrec->keys[i], "Number") )
{
int is_fmt = hrec->type == BCF_HL_FMT;
if ( !strcmp(hrec->vals[i],"A") ) var = BCF_VL_A;
else if ( !strcmp(hrec->vals[i],"R") ) var = BCF_VL_R;
else if ( !strcmp(hrec->vals[i],"G") ) var = BCF_VL_G;
else if ( !strcmp(hrec->vals[i],".") ) var = BCF_VL_VAR;
else if ( is_fmt && !strcmp(hrec->vals[i],"P") ) var = BCF_VL_P;
else if ( is_fmt && !strcmp(hrec->vals[i],"LA") ) var = BCF_VL_LA;
else if ( is_fmt && !strcmp(hrec->vals[i],"LR") ) var = BCF_VL_LR;
else if ( is_fmt && !strcmp(hrec->vals[i],"LG") ) var = BCF_VL_LG;
else if ( is_fmt && !strcmp(hrec->vals[i],"M") ) var = BCF_VL_M;
else
{
sscanf(hrec->vals[i],"%d",&num);
var = BCF_VL_FIXED;
if (sscanf(hrec->vals[i],"%d",&num) == 1)
var = BCF_VL_FIXED;
}
if (var != BCF_VL_FIXED) num = 0xfffff;
}
Expand All @@ -864,7 +870,7 @@ static int bcf_hdr_register_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec)
*hrec->key == 'I' ? "An" : "A", hrec->key);
type = BCF_HT_STR;
}
if (var == -1) {
if (var == UINT32_MAX) {
hts_log_warning("%s %s field has no Number defined. Assuming '.'",
*hrec->key == 'I' ? "An" : "A", hrec->key);
var = BCF_VL_VAR;
Expand Down

0 comments on commit b1709a0

Please sign in to comment.