-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathfetch.go
113 lines (101 loc) · 2.78 KB
/
fetch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// Copyright ©2017 The bíogo Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// fetch is a command line entrez database query program.
package main
import (
"bytes"
"flag"
"io"
"log"
"os"
"github.com/biogo/ncbi"
"github.com/biogo/ncbi/entrez"
)
const (
tool = "entrez.example"
)
var (
clQuery = flag.String("query", "", "query specifies the search query for record retrieval (required).")
db = flag.String("db", "protein", "db specifies the database to search")
rettype = flag.String("rettype", "fasta", "rettype specifies the format of the returned data.")
retmax = flag.Int("retmax", 500, "retmax specifies the number of records to be retrieved per request.")
out = flag.String("out", "", "out specifies destination of the returned data (default to stdout).")
email = flag.String("email", "", "email specifies the email address to be sent to the server (required).")
retries = flag.Int("retry", 5, "retry specifies the number of attempts to retrieve the data.")
help = flag.Bool("help", false, "help prints this message.")
)
func main() {
ncbi.SetTimeout(0)
flag.Parse()
if *help {
flag.Usage()
os.Exit(0)
}
if *email == "" || *clQuery == "" {
flag.Usage()
os.Exit(1)
}
var of *os.File
var err error
if *out == "" {
of = os.Stdout
} else {
of, err = os.Create(*out)
if err != nil {
log.Fatalf("failed to create output file: %v\n", err)
}
defer of.Close()
}
h := entrez.History{}
s, err := entrez.DoSearch(*db, *clQuery, nil, &h, tool, *email)
if err != nil {
log.Printf("error: %v\n", err)
os.Exit(1)
}
log.Printf("will retrieve %d records.\n", s.Count)
var (
buf = &bytes.Buffer{}
p = &entrez.Parameters{RetMax: *retmax, RetType: *rettype, RetMode: "text"}
bn, n int64
)
for p.RetStart = 0; p.RetStart < s.Count; p.RetStart += p.RetMax {
log.Printf("attempting to retrieve %d records starting from %d with %d retries.\n", p.RetMax, p.RetStart, *retries)
var t int
for t = 0; t < *retries; t++ {
buf.Reset()
var (
r io.ReadCloser
_bn int64
)
r, err = entrez.Fetch(*db, p, tool, *email, &h)
if err != nil {
if r != nil {
r.Close()
}
log.Printf("failed to retrieve on attempt %d... error: %v ... retrying.\n", t, err)
continue
}
_bn, err = io.Copy(buf, r)
bn += _bn
r.Close()
if err == nil {
break
}
log.Printf("failed to buffer on attempt %d... error: %v ... retrying.\n", t, err)
}
if err != nil {
os.Exit(1)
}
log.Printf("retrieved records with %d retries... writing out.\n", t)
_n, err := io.Copy(of, buf)
n += _n
if err != nil {
log.Printf("Error: %v\n", err)
os.Exit(1)
}
}
if bn != n {
log.Printf("writethrough mismatch: %d != %d\n", bn, n)
}
}