Update 2263.xml #22
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Process XML to JSON and HTML | |
on: | |
push: | |
branches: | |
- 'development_backup_cbss-bibls' # current data branch | |
permissions: | |
id-token: write | |
contents: read | |
jobs: | |
process_and_transform: | |
runs-on: ubuntu-latest | |
steps: | |
# Step 1: Check out the repository | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
# Step 2: Install Java and Saxon and dependencies for XSLT | |
- name: Set up JDK 11 | |
uses: actions/setup-java@v3 | |
with: | |
java-version: '11' | |
distribution: 'temurin' | |
- name: Download Saxon from GitHub | |
run: | | |
wget https://repo1.maven.org/maven2/net/sf/saxon/Saxon-HE/10.6/Saxon-HE-10.6.jar -O saxon.jar | |
# - name: Install Saxon and dependencies | |
# run: | | |
# wget https://www.saxonica.com/download/hej/SaxonHE10-6J.zip -O saxon.zip | |
# unzip saxon.zip | |
# Step 3: Find updated XML files | |
# - name: Identify updated XML files | |
# run: | | |
# UPDATED_FILES=$(git diff --name-only HEAD~1 HEAD | grep '.xml') | |
# echo "Updated XML files: $UPDATED_FILES" | |
# echo "::set-output name=updated_files::$UPDATED_FILES" | |
# id: files | |
# Find all XML files (for the first run) | |
- name: Identify all XML files | |
run: | | |
UPDATED_FILES=$(find . -name '*.xml') | |
echo "Updated XML files: $UPDATED_FILES" | |
echo "::set-output name=updated_files::$UPDATED_FILES" | |
id: files | |
# Step 4: Run XSLT Transformations for XML to JSON and HTML | |
- name: Run XSLT Transformations | |
run: | | |
for file in ${{ steps.files.outputs.updated_files }}; do | |
echo "Processing $file" | |
# Apply XSLT for JSON conversion | |
java -jar saxon.jar -s:$file -xsl:json-stylesheet.xsl -o:${file%.xml}.json | |
# Apply XSLT for HTML conversion | |
java -jar saxon.jar -s:$file -xsl:./siteGenerator/xsl/json.xsl -o:./json-data/${file%.xml}.json || echo "Error processing $file" | |
done | |
# Step 5: Configure AWS credentials | |
- name: Configure AWS credentials from AWS account | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
role-to-assume: ${{ secrets.AWS_SROPHE_ROLE }} | |
aws-region: us-east-1 | |
role-session-name: GitHub-OIDC-data | |
# Step 6: Upload JSON files to S3 for inspection | |
- name: Upload JSON files to S3 for testing | |
run: | | |
aws s3 cp ./json-data/ s3://srophe-syriaca-front-end/json-data/ --recursive --exclude "*" --include "*.json" | |
env: | |
AWS_REGION: ${{ secrets.AWS_REGION }} | |
AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }} | |
# Step 7: Upload HTML files to S3 | |
# - name: Sync HTML files to S3 | |
# run: | | |
# for html_file in $(find . -name "*.html"); do | |
# aws s3 cp $html_file s3://srophe-syriaca-front-end/ --acl public-read | |
# done | |
# env: | |
# AWS_ACCOUNT_ID: ${{ secrets.AWS_ACCOUNT_ID }} | |
# AWS_REGION: us-east-1 | |
# Step 8: Index JSON files into OpenSearch | |
# - name: Index JSON files to OpenSearch | |
# run: | | |
# for json_file in $(find . -name "*.json"); do | |
# curl -X POST "https://your-opensearch-endpoint/_bulk" \ | |
# -H "Content-Type: application/json" \ | |
# --data-binary "@$json_file" | |
# done | |
# env: | |
# OPENSEARCH_URL: ${{ secrets.OPENSEARCH_URL }} |