From 1e2a9469e17298d1923ed8be7e933245ef20e490 Mon Sep 17 00:00:00 2001
From: Koeng101 <Koeng101@gmail.com>
Date: Wed, 11 Dec 2024 01:40:27 -0800
Subject: [PATCH] updates uniprot to read IDs (#104)

---
 README.md                       |  1 +
 lib/bio/uniprot/uniprot.go      | 36 ++++++++++++++++-----------------
 lib/bio/uniprot/uniprot_test.go |  7 +++++++
 lib/bio/uniprot/xml.go          |  1 +
 4 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 2c112c9f..ae2b5153 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+- Updates uniprot parser to read IDs [#104](https://github.com/Koeng101/dnadesign/pull/104)
 - Fixes RecursiveFragment to not add flanks to the initial input [#102](https://github.com/Koeng101/dnadesign/pull/102)
 - Fixes add flank bug, releases new version of python lib [#101](https://github.com/Koeng101/dnadesign/pull/101)
 - Adds feature for adding flanks to RecursiveFragment. [#100](https://github.com/Koeng101/dnadesign/pull/100)
diff --git a/lib/bio/uniprot/uniprot.go b/lib/bio/uniprot/uniprot.go
index 73c36e9d..029a63f1 100644
--- a/lib/bio/uniprot/uniprot.go
+++ b/lib/bio/uniprot/uniprot.go
@@ -65,35 +65,35 @@ type Parser struct {
 	decoder Decoder
 }
 
-// NewParser returns a Parser that uses r as the source
-// from which to parse fasta formatted sequences.
 func NewParser(r io.Reader) *Parser {
 	decoder := xml.NewDecoder(r)
 	return &Parser{decoder: decoder}
 }
 
 func (p *Parser) Next() (Entry, error) {
-	decoderToken, err := p.decoder.Token()
+	for {
+		decoderToken, err := p.decoder.Token()
 
-	// Check decoding
-	if err != nil {
-		// If we are the end of the file, return io.EOF
-		if err.Error() == "EOF" {
-			return Entry{}, io.EOF
-		}
-	}
-
-	// Actual parsing
-	startElement, ok := decoderToken.(xml.StartElement)
-	if ok && startElement.Name.Local == "entry" {
-		var e Entry
-		err = p.decoder.DecodeElement(&e, &startElement)
+		// Check decoding
 		if err != nil {
+			// If we are the end of the file, return io.EOF
+			if err.Error() == "EOF" {
+				return Entry{}, io.EOF
+			}
 			return Entry{}, err
 		}
-		return e, nil
+
+		// Actual parsing
+		startElement, ok := decoderToken.(xml.StartElement)
+		if ok && startElement.Name.Local == "entry" {
+			var e Entry
+			err = p.decoder.DecodeElement(&e, &startElement)
+			if err != nil {
+				return Entry{}, err
+			}
+			return e, nil
+		}
 	}
-	return p.Next()
 }
 
 // BaseURL encodes the base URL for the Uniprot REST API.
diff --git a/lib/bio/uniprot/uniprot_test.go b/lib/bio/uniprot/uniprot_test.go
index c323454c..f7d35732 100644
--- a/lib/bio/uniprot/uniprot_test.go
+++ b/lib/bio/uniprot/uniprot_test.go
@@ -110,4 +110,11 @@ func TestGet(t *testing.T) {
 	if err == nil {
 		t.Errorf("Expected an error for invalid URL, but got none")
 	}
+	for _, reference := range entry.DbReference {
+		if reference.Type == "Pfam" {
+			if reference.Id != "PF01353" {
+				t.Errorf("Expected Pfam ID PF01353")
+			}
+		}
+	}
 }
diff --git a/lib/bio/uniprot/xml.go b/lib/bio/uniprot/xml.go
index 79dd41ee..6f66e74f 100644
--- a/lib/bio/uniprot/xml.go
+++ b/lib/bio/uniprot/xml.go
@@ -129,6 +129,7 @@ type DbReferenceType struct {
 	Molecule string         `xml:"http://uniprot.org/uniprot molecule,omitempty"`
 	Property []PropertyType `xml:"http://uniprot.org/uniprot property,omitempty"`
 	Type     string         `xml:"type,attr"`
+	Id       string         `xml:"id,attr"`
 	Evidence IntListType    `xml:"evidence,attr,omitempty"`
 }