From b846e29305b3271481f637fb6b397ea9a59ca947 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Thu, 11 Jul 2024 09:25:31 -0400 Subject: [PATCH 1/3] Add command to export a parent item and its children to a workbench CSV --- cmd/csv.go | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++ cmd/export.go | 19 ++++++ 2 files changed, 177 insertions(+) create mode 100644 cmd/csv.go create mode 100644 cmd/export.go diff --git a/cmd/csv.go b/cmd/csv.go new file mode 100644 index 0000000..641e8f3 --- /dev/null +++ b/cmd/csv.go @@ -0,0 +1,158 @@ +package cmd + +import ( + "encoding/csv" + "fmt" + "log" + "log/slog" + "net/http" + "os" + + "github.com/spf13/cobra" +) + +var nid, csvFile string +var parentModels = []string{ + "Collection", + "Compound Object", + "Paged Content", + "Publication Issue", + "Sub-Collection", +} + +// csvCmd represents the csv command +var csvCmd = &cobra.Command{ + Use: "csv", + Short: "Recursively export a workbench CSV for an Islandora node", + Long: `Recursively export a workbench CSV for an Islandora node.`, + Run: func(cmd *cobra.Command, args []string) { + + if baseUrl == "" || nid == "" { + slog.Error("--baseUrl and --nid flags are required") + os.Exit(1) + } + + baseURL := fmt.Sprintf("%s/node/%s?_format=workbench_csv", baseUrl, nid) + + var allHeaders []string + headerMap := make(map[string]bool) + rows := []map[string]string{} + nodeIDMap := make(map[string]bool) + + // Fetch the initial CSV + initialCSV, err := fetchCSV(baseURL) + if err != nil { + log.Fatal(err) + } + + // Process the initial CSV to find unique columns and rows to fetch + for _, record := range initialCSV[1:] { // Skip header row + row := make(map[string]string) + nodeID := "" + + for i, header := range initialCSV[0] { + if header == "node_id" { + nodeID = record[i] + } + if !headerMap[header] { + allHeaders = append(allHeaders, header) + headerMap[header] = true + } + row[header] = record[i] + } + + if !nodeIDMap[nodeID] { + rows = append(rows, row) + nodeIDMap[nodeID] = true + } + + if StrInSlice(row["field_model"], parentModels) { + subNodeID := row["node_id"] + subURL := fmt.Sprintf("%s/node/%s?_format=workbench_csv", baseUrl, subNodeID) + subCSV, err := fetchCSV(subURL) + if err != nil { + slog.Error("Failed to fetch sub-collection CSV for node ID", "nid", subNodeID, "err", err) + continue + } + + for _, subRecord := range subCSV[1:] { // Skip header row + subRow := make(map[string]string) + subNodeID := "" + + for i, subHeader := range subCSV[0] { + if subHeader == "node_id" { + subNodeID = subRecord[i] + } + if !headerMap[subHeader] { + allHeaders = append(allHeaders, subHeader) + headerMap[subHeader] = true + } + subRow[subHeader] = subRecord[i] + } + + if !nodeIDMap[subNodeID] { + rows = append(rows, subRow) + nodeIDMap[subNodeID] = true + } + } + } + } + + // Write to the output CSV + outFile, err := os.Create(csvFile) + if err != nil { + log.Fatal(err) + } + defer outFile.Close() + + csvWriter := csv.NewWriter(outFile) + defer csvWriter.Flush() + + csvWriter.Write(allHeaders) + + for _, row := range rows { + record := make([]string, len(allHeaders)) + for i, header := range allHeaders { + record[i] = row[header] + } + csvWriter.Write(record) + } + + fmt.Println("CSV files merged successfully into", csvFile) + }, +} + +func init() { + exportCmd.AddCommand(csvCmd) + csvCmd.Flags().StringVar(&nid, "nid", "", "The node ID to export a CSV") + csvCmd.Flags().StringVar(&csvFile, "output", "merged.csv", "The CSV file name to save the export to") +} + +func fetchCSV(url string) ([][]string, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to fetch CSV from %s: %s", url, resp.Status) + } + + reader := csv.NewReader(resp.Body) + records, err := reader.ReadAll() + if err != nil { + return nil, err + } + + return records, nil +} + +func StrInSlice(s string, sl []string) bool { + for _, a := range sl { + if a == s { + return true + } + } + return false +} diff --git a/cmd/export.go b/cmd/export.go new file mode 100644 index 0000000..a3704ef --- /dev/null +++ b/cmd/export.go @@ -0,0 +1,19 @@ +package cmd + +import ( + "github.com/spf13/cobra" +) + +var baseUrl string + +// exportCmd represents the export command +var exportCmd = &cobra.Command{ + Use: "export", + Short: "Export content", +} + +func init() { + rootCmd.AddCommand(exportCmd) + + exportCmd.PersistentFlags().StringVar(&baseUrl, "baseUrl", "", "The base URL to export from (e.g. https://google.com)") +} From 1ed64c6820f443c96e9961d4b2576dee271da038 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 13 Jul 2024 09:22:26 -0400 Subject: [PATCH 2/3] Sort CSV by field_member_of then field_weight --- cmd/csv.go | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/cmd/csv.go b/cmd/csv.go index 641e8f3..0681e42 100644 --- a/cmd/csv.go +++ b/cmd/csv.go @@ -7,6 +7,8 @@ import ( "log/slog" "net/http" "os" + "sort" + "strconv" "github.com/spf13/cobra" ) @@ -20,6 +22,29 @@ var parentModels = []string{ "Sub-Collection", } +// Custom type for sorting rows +type Row map[string]string + +// Custom sorting function +type ByFieldMemberOfAndWeight []Row + +func (a ByFieldMemberOfAndWeight) Len() int { + return len(a) +} + +func (a ByFieldMemberOfAndWeight) Swap(i, j int) { + a[i], a[j] = a[j], a[i] +} + +func (a ByFieldMemberOfAndWeight) Less(i, j int) bool { + if a[i]["field_member_of"] == a[j]["field_member_of"] { + weightI, _ := strconv.Atoi(a[i]["field_weight"]) + weightJ, _ := strconv.Atoi(a[j]["field_weight"]) + return weightI < weightJ + } + return a[i]["field_member_of"] < a[j]["field_member_of"] +} + // csvCmd represents the csv command var csvCmd = &cobra.Command{ Use: "csv", @@ -36,7 +61,7 @@ var csvCmd = &cobra.Command{ var allHeaders []string headerMap := make(map[string]bool) - rows := []map[string]string{} + rows := []Row{} nodeIDMap := make(map[string]bool) // Fetch the initial CSV @@ -47,7 +72,7 @@ var csvCmd = &cobra.Command{ // Process the initial CSV to find unique columns and rows to fetch for _, record := range initialCSV[1:] { // Skip header row - row := make(map[string]string) + row := make(Row) nodeID := "" for i, header := range initialCSV[0] { @@ -76,7 +101,7 @@ var csvCmd = &cobra.Command{ } for _, subRecord := range subCSV[1:] { // Skip header row - subRow := make(map[string]string) + subRow := make(Row) subNodeID := "" for i, subHeader := range subCSV[0] { @@ -98,6 +123,9 @@ var csvCmd = &cobra.Command{ } } + // Sort the rows by field_member_of and then by field_weight + sort.Sort(ByFieldMemberOfAndWeight(rows)) + // Write to the output CSV outFile, err := os.Create(csvFile) if err != nil { From 403cd5188d3c68ea93ee385eb9dcd8c6f2679985 Mon Sep 17 00:00:00 2001 From: Joe Corall Date: Sat, 13 Jul 2024 09:23:48 -0400 Subject: [PATCH 3/3] lint --- cmd/csv.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cmd/csv.go b/cmd/csv.go index 0681e42..3c7a322 100644 --- a/cmd/csv.go +++ b/cmd/csv.go @@ -60,6 +60,7 @@ var csvCmd = &cobra.Command{ baseURL := fmt.Sprintf("%s/node/%s?_format=workbench_csv", baseUrl, nid) var allHeaders []string + var err error headerMap := make(map[string]bool) rows := []Row{} nodeIDMap := make(map[string]bool) @@ -136,14 +137,22 @@ var csvCmd = &cobra.Command{ csvWriter := csv.NewWriter(outFile) defer csvWriter.Flush() - csvWriter.Write(allHeaders) + err = csvWriter.Write(allHeaders) + if err != nil { + slog.Error("Unable to write to CSV", "err", err) + os.Exit(1) + } for _, row := range rows { record := make([]string, len(allHeaders)) for i, header := range allHeaders { record[i] = row[header] } - csvWriter.Write(record) + err = csvWriter.Write(record) + if err != nil { + slog.Error("Unable to write to CSV", "err", err) + os.Exit(1) + } } fmt.Println("CSV files merged successfully into", csvFile)