From cc1e48650276f8d0049df5fd02fed06f73398e59 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 15 Nov 2024 12:51:14 -0500 Subject: [PATCH 1/3] Ensure full title exists --- internal/handlers/check.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/handlers/check.go b/internal/handlers/check.go index e2e296e..67cbee5 100644 --- a/internal/handlers/check.go +++ b/internal/handlers/check.go @@ -57,6 +57,7 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { requiredFields := []string{ "Title", "Object Model", + "Full Title", } uploadIds := map[string]bool{} for rowIndex, row := range csvData[1:] { From e70ee068cb671cf63b10506686e79d038ca84cfb Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 15 Nov 2024 13:41:17 -0500 Subject: [PATCH 2/3] More checks and add some tests --- internal/handlers/check.go | 357 +++++++++++++++++++++++++++++++- internal/handlers/check_test.go | 134 ++++++++++++ 2 files changed, 487 insertions(+), 4 deletions(-) create mode 100644 internal/handlers/check_test.go diff --git a/internal/handlers/check.go b/internal/handlers/check.go index 67cbee5..ae1d00d 100644 --- a/internal/handlers/check.go +++ b/internal/handlers/check.go @@ -11,6 +11,7 @@ import ( "regexp" "strconv" "strings" + "time" "github.com/golang-jwt/jwt/v5" "github.com/lehigh-university-libraries/fabricator/internal/contributor" @@ -45,15 +46,19 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { return } + errors := map[string]string{} if len(csvData) < 2 { - http.Error(w, "No rows in CSV to process", http.StatusBadRequest) + errorColumn := numberToExcelColumn(0) + errors[errorColumn] = "No rows in CSV to process" + csvData = append(csvData, []string{}) } + relators := validRelators() + header := csvData[0] doiPattern := regexp.MustCompile(`^10\.\d{4,9}\/[-._;()/:A-Za-z0-9]+$`) datePattern := regexp.MustCompile(`^\d{4}(-\d{2}(-\d{2})?)?$`) hierarchyChecked := map[string]bool{} - errors := map[string]string{} requiredFields := []string{ "Title", "Object Model", @@ -69,6 +74,19 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { if strInSlice(column, requiredFields) { errors[i] = "Missing value" } + if col == "Parent Collection" { + model := ColumnValue("ObjectModel", header, row) + if model == "Paged Content" { + errors[i] = "Paged content must have a parent collection" + } + } + + if col == "Page/Item Parent ID" { + model := ColumnValue("ObjectModel", header, row) + if model == "Page" { + errors[i] = "Pages must have a parent id" + } + } continue } @@ -82,10 +100,16 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { switch column { // make sure these columns are integers case "Parent Collection", "PPI": - _, err := strconv.Atoi(cell) + id, err := strconv.Atoi(cell) if err != nil { errors[i] = "Must be an integer" } + if column == "Parent Collection" { + url := fmt.Sprintf("https://preserve.lehigh.edu/node/%d?_format=json", id) + if !checkURL(url) { + errors[i] = fmt.Sprintf("Could not identify parent collection %d", id) + } + } // make sure these columns are valid URLs case "Catalog or ArchivesSpace URL": parsedURL, err := url.ParseRequestURI(cell) @@ -114,6 +138,10 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { if _, ok := uploadIds[cell]; !ok { errors[i] = "Unknown parent ID" } + id := ColumnValue("Upload ID", header, row) + if cell == id { + errors[i] = "Upload ID and parent ID can not be equal" + } case "Contributor": var c contributor.Contributor err := json.Unmarshal([]byte(cell), &c) @@ -129,6 +157,14 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { errors[i] = "Additional fields can only be applied to people" } } + if !strInSlice(name[2], []string{"person", "corporate_body"}) { + errors[i] = fmt.Sprintf("Bad vocabulary ID for contributor: %s", name[2]) + } + relator := fmt.Sprintf("%s:%s", name[0], name[1]) + if !strInSlice(relator, relators) { + errors[i] = fmt.Sprintf("Invalid relator: %s", relator) + } + // make sure the file exists in the filesystem case "File Path": filename := strings.ReplaceAll(cell, `\`, `/`) @@ -154,7 +190,10 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { if err != nil { errors[i] = "Unable to get TGN" } - + case "Title": + if len(cell) > 255 { + errors[i] = "Title is longer than 255 characters" + } } } } @@ -276,3 +315,313 @@ func authRequest(w http.ResponseWriter, r *http.Request) bool { return true } + +func IndexOf(value string, slice []string) int { + for i, v := range slice { + if v == value { + return i + } + } + return -1 +} + +func ColumnValue(value string, header, row []string) string { + i := IndexOf(value, header) + if i == -1 { + return "" + } + + return row[i] +} + +func checkURL(url string) bool { + client := &http.Client{ + Timeout: 10 * time.Second, + } + + resp, err := client.Head(url) + if err != nil { + return false + } + defer resp.Body.Close() + + return resp.StatusCode == http.StatusOK +} + +func validRelators() []string { + // todo: fetch this from field_linked_agent config + return []string{ + "relators:att", + "relators:abr", + "relators:act", + "relators:adp", + "relators:rcp", + "relators:anl", + "relators:anm", + "relators:ann", + "relators:apl", + "relators:ape", + "relators:app", + "relators:arc", + "relators:arr", + "relators:acp", + "relators:adi", + "relators:art", + "relators:ard", + "relators:asn", + "relators:asg", + "relators:auc", + "relators:aut", + "relators:aqt", + "relators:aft", + "relators:aud", + "relators:aui", + "relators:ato", + "relators:ant", + "relators:bnd", + "relators:bdd", + "relators:blw", + "relators:bkd", + "relators:bkp", + "relators:bjd", + "relators:bpd", + "relators:bsl", + "relators:brl", + "relators:brd", + "relators:cll", + "relators:ctg", + "relators:cas", + "relators:cns", + "relators:chr", + "relators:clb", + "relators:cng", + "relators:cli", + "relators:cor", + "relators:col", + "relators:clt", + "relators:clr", + "relators:cmm", + "relators:cwt", + "relators:com", + "relators:cpl", + "relators:cpt", + "relators:cpe", + "relators:cmp", + "relators:cmt", + "relators:ccp", + "relators:cnd", + "relators:con", + "relators:csl", + "relators:csp", + "relators:cos", + "relators:cot", + "relators:coe", + "relators:cts", + "relators:ctt", + "relators:cte", + "relators:ctr", + "relators:ctb", + "relators:cpc", + "relators:cph", + "relators:crr", + "relators:crp", + "relators:cst", + "relators:cou", + "relators:crt", + "relators:cov", + "relators:cre", + "relators:cur", + "relators:dnc", + "relators:dtc", + "relators:dtm", + "relators:dte", + "relators:dto", + "relators:dfd", + "relators:dft", + "relators:dfe", + "relators:dgg", + "relators:dgs", + "relators:dln", + "relators:dpc", + "relators:dpt", + "relators:dsr", + "relators:drt", + "relators:dis", + "relators:dbp", + "relators:dst", + "relators:dnr", + "relators:drm", + "relators:dub", + "relators:edt", + "relators:edc", + "relators:edm", + "relators:edd", + "relators:elg", + "relators:elt", + "relators:enj", + "relators:eng", + "relators:egr", + "relators:etr", + "relators:evp", + "relators:exp", + "relators:fac", + "relators:fld", + "relators:fmd", + "relators:fds", + "relators:flm", + "relators:fmp", + "relators:fmk", + "relators:fpy", + "relators:frg", + "relators:fmo", + "relators:fnd", + "relators:gis", + "relators:grt", + "relators:hnr", + "relators:hst", + "relators:his", + "relators:ilu", + "relators:ill", + "relators:ins", + "relators:itr", + "relators:ive", + "relators:ivr", + "relators:inv", + "relators:isb", + "relators:jud", + "relators:jug", + "relators:lbr", + "relators:ldr", + "relators:lsa", + "relators:led", + "relators:len", + "relators:lil", + "relators:lit", + "relators:lie", + "relators:lel", + "relators:let", + "relators:lee", + "relators:lbt", + "relators:lse", + "relators:lso", + "relators:lgd", + "relators:ltg", + "relators:lyr", + "relators:mfp", + "relators:mfr", + "relators:mrb", + "relators:mrk", + "relators:med", + "relators:mdc", + "relators:mte", + "relators:mtk", + "relators:mod", + "relators:mon", + "relators:mcp", + "relators:msd", + "relators:mus", + "relators:nrt", + "relators:osp", + "relators:opn", + "relators:orm", + "relators:org", + "relators:oth", + "relators:own", + "relators:pan", + "relators:ppm", + "relators:pta", + "relators:pth", + "relators:pat", + "relators:prf", + "relators:pma", + "relators:pht", + "relators:ptf", + "relators:ptt", + "relators:pte", + "relators:plt", + "relators:pra", + "relators:pre", + "relators:prt", + "relators:pop", + "relators:prm", + "relators:prc", + "relators:pro", + "relators:prn", + "relators:prs", + "relators:pmn", + "relators:prd", + "relators:prp", + "relators:prg", + "relators:pdr", + "relators:pfr", + "relators:prv", + "relators:pup", + "relators:pbd", + "relators:ppt", + "relators:rdd", + "relators:rpc", + "relators:rce", + "relators:rcd", + "relators:red", + "relators:ren", + "relators:rpt", + "relators:rps", + "relators:rth", + "relators:rtm", + "relators:res", + "relators:rsp", + "relators:rst", + "relators:rse", + "relators:rpy", + "relators:rsg", + "relators:rsr", + "relators:rev", + "relators:rbr", + "relators:sce", + "relators:sad", + "relators:aus", + "relators:scr", + "relators:scl", + "relators:spy", + "relators:sec", + "relators:sll", + "relators:std", + "relators:stg", + "relators:sgn", + "relators:sng", + "relators:sds", + "relators:spk", + "relators:spn", + "relators:sgd", + "relators:stm", + "relators:stn", + "relators:str", + "relators:stl", + "relators:sht", + "relators:srv", + "relators:tch", + "relators:tcd", + "relators:tld", + "relators:tlp", + "relators:ths", + "relators:trc", + "relators:trl", + "relators:tyd", + "relators:tyg", + "relators:uvp", + "relators:vdg", + "relators:voc", + "relators:vac", + "relators:wit", + "relators:wde", + "relators:wdc", + "relators:wam", + "relators:wac", + "relators:wal", + "relators:wat", + "relators:win", + "relators:wpr", + "relators:wst", + "label:department", + } +} diff --git a/internal/handlers/check_test.go b/internal/handlers/check_test.go new file mode 100644 index 0000000..70420d0 --- /dev/null +++ b/internal/handlers/check_test.go @@ -0,0 +1,134 @@ +package handlers + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "testing" +) + +func TestCheckMyWork(t *testing.T) { + tests := []struct { + name string + method string + body interface{} + statusCode int + response string + }{ + { + name: "Method not allowed", + method: http.MethodGet, + body: nil, + statusCode: http.StatusMethodNotAllowed, + response: "Method not allowed\n", + }, + { + name: "Not authorized", + method: http.MethodPost, + body: nil, + statusCode: http.StatusUnauthorized, + response: "Authorization header missing\n", + }, + { + name: "Empty request body", + method: http.MethodPost, + body: nil, + statusCode: http.StatusBadRequest, + response: "Request body is empty\n", + }, + { + name: "Invalid JSON body", + method: http.MethodPost, + body: "invalid_json", + statusCode: http.StatusBadRequest, + response: "Error parsing CSV\n", + }, + { + name: "No rows in CSV to process", + method: http.MethodPost, + body: [][]string{{"Title", "Object Model"}}, + statusCode: http.StatusOK, + response: `{"A":"No rows in CSV to process"}`, + }, + { + name: "Valid CSV data with no errors", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title"}, + {"Test Title", "Model", "Full Test Title"}, + }, + statusCode: http.StatusOK, + response: "{}", // Empty errors map + }, + { + name: "Missing title", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title"}, + {"", "Model", "foo"}, + }, + statusCode: http.StatusOK, + response: `{"A2":"Missing value"}`, + }, + { + name: "Missing model", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title"}, + {"foo", "", "bar"}, + }, + statusCode: http.StatusOK, + response: `{"B2":"Missing value"}`, + }, + { + name: "Missing full title", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title"}, + {"Test Title", "Model", ""}, + }, + statusCode: http.StatusOK, + response: `{"C2":"Missing value"}`, + }, + } + + sharedSecret := "foo" + os.Setenv("SHARED_SECRET", sharedSecret) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var reqBody *bytes.Buffer + if tt.body != nil { + bodyBytes, err := json.Marshal(tt.body) + if err != nil { + t.Fatalf("Failed to marshal body: %v", err) + } + reqBody = bytes.NewBuffer(bodyBytes) + } else { + reqBody = bytes.NewBuffer(nil) + } + + req := httptest.NewRequest(tt.method, "/check-my-work", reqBody) + if tt.name == "Not authorized" { + req.Header.Set("X-Secret", "nope") + } else { + req.Header.Set("X-Secret", sharedSecret) + } + rec := httptest.NewRecorder() + + // Call the handler + CheckMyWork(rec, req) + + // Assert response status code + if rec.Code != tt.statusCode { + t.Errorf("Expected status code %d, got %d", tt.statusCode, rec.Code) + } + + // Assert response body + if rec.Body.String() != tt.response { + t.Errorf("Expected response body: %q, got %q", tt.response, rec.Body.String()) + } + }) + } +} From 4e4a02fe15d88a2fed29308aa8edaeaa68fc4616 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Fri, 15 Nov 2024 15:06:34 -0500 Subject: [PATCH 3/3] More tests --- internal/handlers/check.go | 11 ++- internal/handlers/check_test.go | 139 +++++++++++++++++++++++++++++++- 2 files changed, 143 insertions(+), 7 deletions(-) diff --git a/internal/handlers/check.go b/internal/handlers/check.go index ae1d00d..b61b87e 100644 --- a/internal/handlers/check.go +++ b/internal/handlers/check.go @@ -74,15 +74,15 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { if strInSlice(column, requiredFields) { errors[i] = "Missing value" } - if col == "Parent Collection" { - model := ColumnValue("ObjectModel", header, row) + if column == "Parent Collection" { + model := ColumnValue("Object Model", header, row) if model == "Paged Content" { errors[i] = "Paged content must have a parent collection" } } - if col == "Page/Item Parent ID" { - model := ColumnValue("ObjectModel", header, row) + if column == "Page/Item Parent ID" { + model := ColumnValue("Object Model", header, row) if model == "Page" { errors[i] = "Pages must have a parent id" } @@ -103,6 +103,7 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { id, err := strconv.Atoi(cell) if err != nil { errors[i] = "Must be an integer" + break } if column == "Parent Collection" { url := fmt.Sprintf("https://preserve.lehigh.edu/node/%d?_format=json", id) @@ -137,6 +138,7 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { case "Page/Item Parent ID": if _, ok := uploadIds[cell]; !ok { errors[i] = "Unknown parent ID" + break } id := ColumnValue("Upload ID", header, row) if cell == id { @@ -151,6 +153,7 @@ func CheckMyWork(w http.ResponseWriter, r *http.Request) { name := strings.Split(c.Name, ":") if len(name) < 4 { errors[i] = "Contributor name not in proper format" + break } if c.Status != "" || c.Email != "" || c.Institution != "" || c.Orcid != "" { if name[2] != "person" { diff --git a/internal/handlers/check_test.go b/internal/handlers/check_test.go index 70420d0..66bcd4f 100644 --- a/internal/handlers/check_test.go +++ b/internal/handlers/check_test.go @@ -57,7 +57,7 @@ func TestCheckMyWork(t *testing.T) { method: http.MethodPost, body: [][]string{ {"Title", "Object Model", "Full Title"}, - {"Test Title", "Model", "Full Test Title"}, + {"foo", "bar", "Full Test Title"}, }, statusCode: http.StatusOK, response: "{}", // Empty errors map @@ -67,7 +67,7 @@ func TestCheckMyWork(t *testing.T) { method: http.MethodPost, body: [][]string{ {"Title", "Object Model", "Full Title"}, - {"", "Model", "foo"}, + {"", "bar", "foo"}, }, statusCode: http.StatusOK, response: `{"A2":"Missing value"}`, @@ -87,11 +87,144 @@ func TestCheckMyWork(t *testing.T) { method: http.MethodPost, body: [][]string{ {"Title", "Object Model", "Full Title"}, - {"Test Title", "Model", ""}, + {"foo", "bar", ""}, }, statusCode: http.StatusOK, response: `{"C2":"Missing value"}`, }, + { + name: "Missing file", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "File Path"}, + {"foo", "bar", "foo", "/tmp/file/does/not/exist"}, + }, + statusCode: http.StatusOK, + response: `{"D2":"File does not exist in islandora_staging"}`, + }, + // Parent Collection and PPI + { + name: "Parent Collection not integer", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Parent Collection"}, + {"foo", "bar", "foo", "NotAnInteger"}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Must be an integer"}`, + }, + { + name: "Parent Collection not found", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Parent Collection"}, + {"foo", "bar", "foo", "0"}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Could not identify parent collection 0"}`, + }, + { + name: "Parent Collection exists", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Parent Collection"}, + {"foo", "bar", "foo", "2"}, + }, + statusCode: http.StatusOK, + response: `{}`, + }, + { + name: "Invalid URL", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Catalog or ArchivesSpace URL"}, + {"foo", "bar", "foo", "invalid-url"}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Invalid URL"}`, + }, + { + name: "Duplicate Upload ID", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Upload ID"}, + {"Test Title 1", "bar", "foo", "123"}, + {"Test Title 2", "bar", "foo", "123"}, + }, + statusCode: http.StatusOK, + response: `{"D3":"Duplicate upload ID"}`, + }, + { + name: "Invalid EDTF value", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Creation Date"}, + {"foo", "bar", "foo", "1/2/2022"}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Invalid EDTF value"}`, + }, + { + name: "Invalid DOI", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "DOI"}, + {"foo", "bar", "foo", "1.2.3.4"}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Invalid DOI"}`, + }, + { + name: "Unknown Parent ID", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Page/Item Parent ID", "Upload ID"}, + {"foo", "bar", "foo", "123", "456"}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Unknown parent ID"}`, + }, + { + name: "Upload ID equals Parent ID", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Upload ID", "Page/Item Parent ID"}, + {"foo", "bar", "foo", "123", "123"}, + }, + statusCode: http.StatusOK, + response: `{"E2":"Upload ID and parent ID can not be equal"}`, + }, + // Contributor + { + name: "Contributor not in proper format", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Contributor"}, + {"foo", "bar", "foo", `{"name":"bad-format"}`}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Contributor name not in proper format"}`, + }, + { + name: "Paged Content need collection", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Parent Collection"}, + {"foo", "Paged Content", "foo", ""}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Paged content must have a parent collection"}`, + }, + { + name: "Page needs Parent ID", + method: http.MethodPost, + body: [][]string{ + {"Title", "Object Model", "Full Title", "Page/Item Parent ID"}, + {"foo", "Page", "foo", ""}, + }, + statusCode: http.StatusOK, + response: `{"D2":"Pages must have a parent id"}`, + }, } sharedSecret := "foo"