Skip to content

Commit

Permalink
Add command to export a parent item and its children to a workbench CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
joecorall committed Jul 11, 2024
1 parent 50f8e60 commit b846e29
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 0 deletions.
158 changes: 158 additions & 0 deletions cmd/csv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package cmd

import (
"encoding/csv"
"fmt"
"log"
"log/slog"
"net/http"
"os"

"github.com/spf13/cobra"
)

var nid, csvFile string
var parentModels = []string{
"Collection",
"Compound Object",
"Paged Content",
"Publication Issue",
"Sub-Collection",
}

// csvCmd represents the csv command
var csvCmd = &cobra.Command{
Use: "csv",
Short: "Recursively export a workbench CSV for an Islandora node",
Long: `Recursively export a workbench CSV for an Islandora node.`,
Run: func(cmd *cobra.Command, args []string) {

if baseUrl == "" || nid == "" {
slog.Error("--baseUrl and --nid flags are required")
os.Exit(1)
}

baseURL := fmt.Sprintf("%s/node/%s?_format=workbench_csv", baseUrl, nid)

var allHeaders []string
headerMap := make(map[string]bool)
rows := []map[string]string{}
nodeIDMap := make(map[string]bool)

// Fetch the initial CSV
initialCSV, err := fetchCSV(baseURL)
if err != nil {
log.Fatal(err)
}

// Process the initial CSV to find unique columns and rows to fetch
for _, record := range initialCSV[1:] { // Skip header row
row := make(map[string]string)
nodeID := ""

for i, header := range initialCSV[0] {
if header == "node_id" {
nodeID = record[i]
}
if !headerMap[header] {
allHeaders = append(allHeaders, header)
headerMap[header] = true
}
row[header] = record[i]
}

if !nodeIDMap[nodeID] {
rows = append(rows, row)
nodeIDMap[nodeID] = true
}

if StrInSlice(row["field_model"], parentModels) {
subNodeID := row["node_id"]
subURL := fmt.Sprintf("%s/node/%s?_format=workbench_csv", baseUrl, subNodeID)
subCSV, err := fetchCSV(subURL)
if err != nil {
slog.Error("Failed to fetch sub-collection CSV for node ID", "nid", subNodeID, "err", err)
continue
}

for _, subRecord := range subCSV[1:] { // Skip header row
subRow := make(map[string]string)
subNodeID := ""

for i, subHeader := range subCSV[0] {
if subHeader == "node_id" {
subNodeID = subRecord[i]
}
if !headerMap[subHeader] {
allHeaders = append(allHeaders, subHeader)
headerMap[subHeader] = true
}
subRow[subHeader] = subRecord[i]
}

if !nodeIDMap[subNodeID] {
rows = append(rows, subRow)
nodeIDMap[subNodeID] = true
}
}
}
}

// Write to the output CSV
outFile, err := os.Create(csvFile)
if err != nil {
log.Fatal(err)
}
defer outFile.Close()

csvWriter := csv.NewWriter(outFile)
defer csvWriter.Flush()

csvWriter.Write(allHeaders)

Check failure on line 111 in cmd/csv.go

View workflow job for this annotation

GitHub Actions / lint-test

Error return value of `csvWriter.Write` is not checked (errcheck)

for _, row := range rows {
record := make([]string, len(allHeaders))
for i, header := range allHeaders {
record[i] = row[header]
}
csvWriter.Write(record)

Check failure on line 118 in cmd/csv.go

View workflow job for this annotation

GitHub Actions / lint-test

Error return value of `csvWriter.Write` is not checked (errcheck)
}

fmt.Println("CSV files merged successfully into", csvFile)
},
}

func init() {
exportCmd.AddCommand(csvCmd)
csvCmd.Flags().StringVar(&nid, "nid", "", "The node ID to export a CSV")
csvCmd.Flags().StringVar(&csvFile, "output", "merged.csv", "The CSV file name to save the export to")
}

func fetchCSV(url string) ([][]string, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to fetch CSV from %s: %s", url, resp.Status)
}

reader := csv.NewReader(resp.Body)
records, err := reader.ReadAll()
if err != nil {
return nil, err
}

return records, nil
}

func StrInSlice(s string, sl []string) bool {
for _, a := range sl {
if a == s {
return true
}
}
return false
}
19 changes: 19 additions & 0 deletions cmd/export.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package cmd

import (
"github.com/spf13/cobra"
)

var baseUrl string

// exportCmd represents the export command
var exportCmd = &cobra.Command{
Use: "export",
Short: "Export content",
}

func init() {
rootCmd.AddCommand(exportCmd)

exportCmd.PersistentFlags().StringVar(&baseUrl, "baseUrl", "", "The base URL to export from (e.g. https://google.com)")
}

0 comments on commit b846e29

Please sign in to comment.