From 1e2a9469e17298d1923ed8be7e933245ef20e490 Mon Sep 17 00:00:00 2001 From: Koeng101 Date: Wed, 11 Dec 2024 01:40:27 -0800 Subject: [PATCH] updates uniprot to read IDs (#104) --- README.md | 1 + lib/bio/uniprot/uniprot.go | 36 ++++++++++++++++----------------- lib/bio/uniprot/uniprot_test.go | 7 +++++++ lib/bio/uniprot/xml.go | 1 + 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 2c112c9f..ae2b5153 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Updates uniprot parser to read IDs [#104](https://github.com/Koeng101/dnadesign/pull/104) - Fixes RecursiveFragment to not add flanks to the initial input [#102](https://github.com/Koeng101/dnadesign/pull/102) - Fixes add flank bug, releases new version of python lib [#101](https://github.com/Koeng101/dnadesign/pull/101) - Adds feature for adding flanks to RecursiveFragment. [#100](https://github.com/Koeng101/dnadesign/pull/100) diff --git a/lib/bio/uniprot/uniprot.go b/lib/bio/uniprot/uniprot.go index 73c36e9d..029a63f1 100644 --- a/lib/bio/uniprot/uniprot.go +++ b/lib/bio/uniprot/uniprot.go @@ -65,35 +65,35 @@ type Parser struct { decoder Decoder } -// NewParser returns a Parser that uses r as the source -// from which to parse fasta formatted sequences. func NewParser(r io.Reader) *Parser { decoder := xml.NewDecoder(r) return &Parser{decoder: decoder} } func (p *Parser) Next() (Entry, error) { - decoderToken, err := p.decoder.Token() + for { + decoderToken, err := p.decoder.Token() - // Check decoding - if err != nil { - // If we are the end of the file, return io.EOF - if err.Error() == "EOF" { - return Entry{}, io.EOF - } - } - - // Actual parsing - startElement, ok := decoderToken.(xml.StartElement) - if ok && startElement.Name.Local == "entry" { - var e Entry - err = p.decoder.DecodeElement(&e, &startElement) + // Check decoding if err != nil { + // If we are the end of the file, return io.EOF + if err.Error() == "EOF" { + return Entry{}, io.EOF + } return Entry{}, err } - return e, nil + + // Actual parsing + startElement, ok := decoderToken.(xml.StartElement) + if ok && startElement.Name.Local == "entry" { + var e Entry + err = p.decoder.DecodeElement(&e, &startElement) + if err != nil { + return Entry{}, err + } + return e, nil + } } - return p.Next() } // BaseURL encodes the base URL for the Uniprot REST API. diff --git a/lib/bio/uniprot/uniprot_test.go b/lib/bio/uniprot/uniprot_test.go index c323454c..f7d35732 100644 --- a/lib/bio/uniprot/uniprot_test.go +++ b/lib/bio/uniprot/uniprot_test.go @@ -110,4 +110,11 @@ func TestGet(t *testing.T) { if err == nil { t.Errorf("Expected an error for invalid URL, but got none") } + for _, reference := range entry.DbReference { + if reference.Type == "Pfam" { + if reference.Id != "PF01353" { + t.Errorf("Expected Pfam ID PF01353") + } + } + } } diff --git a/lib/bio/uniprot/xml.go b/lib/bio/uniprot/xml.go index 79dd41ee..6f66e74f 100644 --- a/lib/bio/uniprot/xml.go +++ b/lib/bio/uniprot/xml.go @@ -129,6 +129,7 @@ type DbReferenceType struct { Molecule string `xml:"http://uniprot.org/uniprot molecule,omitempty"` Property []PropertyType `xml:"http://uniprot.org/uniprot property,omitempty"` Type string `xml:"type,attr"` + Id string `xml:"id,attr"` Evidence IntListType `xml:"evidence,attr,omitempty"` }