Skip to content

Commit

Permalink
Add helper to get labels from arXiv terms
Browse files Browse the repository at this point in the history
  • Loading branch information
joecorall committed Mar 20, 2024
1 parent 57b38d6 commit b7e6472
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 0 deletions.
16 changes: 16 additions & 0 deletions internal/utils/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"io/ioutil"
"net/http"
"regexp"
"unicode/utf8"
)

func FetchEmails(url string) ([]string, error) {
Expand All @@ -30,3 +31,18 @@ func FetchEmails(url string) ([]string, error) {

return emails, nil
}

func TrimToMaxLen(s string, maxLen int) string {
// Check if the string length exceeds the maximum length
if utf8.RuneCountInString(s) > maxLen {
// Convert the string to a slice of runes
runes := []rune(s)

// Truncate the slice to the maximum length
runes = runes[:maxLen]

// Convert the slice of runes back to a string
return string(runes)
}
return s
}
34 changes: 34 additions & 0 deletions pkg/arxiv/category.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,40 @@
package arxiv

import (
"fmt"
"io"
"net/http"
"regexp"
)

type Category struct {
Term string `xml:"term,attr"`
Scheme string `xml:"scheme,attr"`
}

func GetCategoryLabels() map[string]string {
categories := map[string]string{}

url := "https://arxiv.org/category_taxonomy"
resp, err := http.Get(url)
if err != nil {
fmt.Println("Error fetching URL: ", err)
return categories
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
fmt.Println("Error reading response body: ", err)
return categories
}

pattern := `<h4>([a-z\-]+\.[A-Z]+) <span>\(([^)]+)\)</span></h4>`
re := regexp.MustCompile(pattern)
matches := re.FindAllStringSubmatch(string(body), -1)

for _, match := range matches {
categories[match[1]] = match[2]
}

return categories
}
1 change: 1 addition & 0 deletions pkg/arxiv/entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Entry struct {
JournalRef string `xml:"http://arxiv.org/schemas/atom journal_ref"`
PrimaryCategory Category `xml:"http://arxiv.org/schemas/atom primary_category"`
Categories []Category `xml:"category"`
License string `xml:"license"`
PDF string
}

Expand Down

0 comments on commit b7e6472

Please sign in to comment.