Skip to content

Commit

Permalink
Update main.yml
Browse files Browse the repository at this point in the history
  • Loading branch information
emgeier authored Oct 22, 2024
1 parent b6d5fc4 commit 1e1fcc6
Showing 1 changed file with 7 additions and 12 deletions.
19 changes: 7 additions & 12 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
# echo "Updated XML files: $UPDATED_FILES"
# echo "::set-output name=updated_files::$UPDATED_FILES"
# id: files
# Step 3: Identify the first 5 XML files for testing
# Step 4: Identify the first 5 XML files for testing
- name: Identify first 5 XML files
run: |
find ./data/persons/tei -name '*.xml' | head -n 5 > xml_files.txt
Expand Down Expand Up @@ -81,8 +81,6 @@ jobs:
java -jar saxon.jar -s:$file -xsl:json-stylesheet.xsl | tr -d '\n' >> bulk_data.json
echo "" >> bulk_data.json # Add a newline after the document entry



# Apply XSLT for HTML conversion and capture any error
# java -jar saxon.jar -s:$file -xsl:html-stylesheet.xsl -o:${filename}.html 2>&1 | tee saxon_error.log
# # Upload the HTML file to S3
Expand All @@ -96,7 +94,7 @@ jobs:



# Step 5: HTML files
# Step 6: Convert HTML files
- name: Create static HTML directory
run: |
mkdir -p data-html
Expand All @@ -114,12 +112,12 @@ jobs:
filename=$(basename ${file%.xml})
echo "html filename: $filename"
# Run the XSLT transformation located in the root of syriaca-data repository
java -jar saxon.jar -s:$file -xsl:html-stylesheet.xsl -o:data-html/${type}/$filename
java -jar saxon.jar -s:$file -xsl:html-stylesheet.xsl -o:data-html/${type}/${filename}.html
done < xml_files.txt
# Step 6: Upload files to S3
# Step 7: Upload files to S3
- name: Upload JSON file to S3
run: |
aws s3 cp bulk_data.json s3://srophe-syriaca-front-end/json-data/bulk_data_persons.json
Expand All @@ -134,15 +132,12 @@ jobs:
type=$(echo "$html_file" | grep -o -E 'work|subject|person|place|spear|bibl' | tail -n 1)
echo "html_file $html_file"
aws s3 cp $html_file s3://srophe-syriaca-front-end/${type}/$(basename ${html_file%.html}.html)
done
# Process all XML files and convert their extensions to .html
for xml_file in $(find ./data-html -name "*.xml"); do
type=$(echo "$xml_file" | grep -o -E 'work|subject|person|place|spear|bibl' | head -n 1)
filename=$(basename ${xml_file%.xml}.html) # Change XML extension to HTML
echo "Uploading improperly labeled xml_file as HTML $xml_file"
aws s3 cp $xml_file s3://srophe-syriaca-front-end/${type}/$filename
echo "error for xml_file $xml_file"
done
env:
Expand Down

0 comments on commit 1e1fcc6

Please sign in to comment.