Automated Commit -> New Data #68
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# .github/workflows/deploy.yml | |
name: Task1 QC and Deploy | |
on: | |
push: | |
branches: | |
- main | |
permissions: | |
contents: write | |
packages: write | |
issues: write | |
id-token: write | |
pages: write | |
jobs: | |
process_raw: | |
name: Process Raw CSV Files | |
runs-on: self-hosted | |
outputs: | |
data: ${{ steps.set_vars.outputs.json }} | |
has_data: ${{ steps.set_vars.outputs.has_data }} | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Get Changed CSV Files | |
run: | | |
#!/bin/bash | |
# Get the list of CSV files changed in the last 24 hours | |
data=$(git log --since="24 hours ago" --name-only --pretty=format: -- '*.csv' | sort | uniq) | |
# Convert the list into a multi-line format | |
formatted_data=$(echo "$data" | tr ' ' '\n') | |
# Export the formatted data variable to the environment | |
echo "data<<EOF" >> $GITHUB_ENV | |
echo "$formatted_data" >> $GITHUB_ENV | |
echo "EOF" >> $GITHUB_ENV | |
# Print the changed CSV files | |
echo "Changed CSV files in the last 24 hours:" | |
echo "$formatted_data" | |
- name: Parse Raw CSV Files | |
id: set_vars | |
run: | | |
# Initialize an empty JSON array | |
json_array="[" | |
first_item=true | |
# Loop through each CSV file in $data | |
for file in $data; do | |
# Extract the directory and filename | |
dir=$(dirname "$file") | |
filename=$(basename "$file") | |
# Extract the run-* part from the directory | |
run_part=$(basename "$dir") | |
# Split the filename into sub, task, and version | |
IFS='_' read -r sub task version <<< "$filename" | |
version="${version%.csv}" # Remove the .csv extension from version | |
# Escape variables for JSON | |
sub_escaped=$(printf '%s' "$sub" | sed 's/"/\\"/g') | |
task_escaped=$(printf '%s' "$task" | sed 's/"/\\"/g') | |
version_escaped=$(printf '%s' "$version" | sed 's/"/\\"/g') | |
run_part_escaped=$(printf '%s' "$run_part" | sed 's/"/\\"/g') | |
# Build JSON object | |
json_object="{\"sub\":\"$sub_escaped\",\"task\":\"$task_escaped\",\"version\":\"$version_escaped\",\"run_part\":\"$run_part_escaped\"}" | |
# Append to JSON array | |
if [ "$first_item" = true ]; then | |
json_array="$json_array$json_object" | |
first_item=false | |
else | |
json_array="$json_array,$json_object" | |
fi | |
# Print the extracted values | |
echo "Run Part: $run_part" | |
echo "Subject: $sub" | |
echo "Task: $task" | |
echo "Version: $version" | |
done | |
json_array="$json_array]" | |
# Output the JSON array | |
echo "json=$json_array" >> $GITHUB_OUTPUT | |
# Determine if json_array is empty | |
if [ "$json_array" = "[]" ]; then | |
echo "has_data=false" >> $GITHUB_OUTPUT | |
else | |
echo "has_data=true" >> $GITHUB_OUTPUT | |
fi | |
# Output the JSON array | |
echo "json=$json_array" >> $GITHUB_OUTPUT | |
run_qc: | |
name: Run Quality Control | |
runs-on: self-hosted | |
needs: process_raw | |
if: needs.process_raw.outputs.has_data == 'true' | |
strategy: | |
matrix: | |
config: ${{ fromJson(needs.process_raw.outputs.data) }} | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v3 | |
- name: Debug Environment Variables | |
run: | | |
echo "Subject: ${{ matrix.config.sub }}" | |
echo "Task: ${{ matrix.config.task }}" | |
echo "Version: ${{ matrix.config.version }}" | |
echo "Run Part: ${{ matrix.config.run_part }}" | |
- name: Install Python Dependencies | |
run: | | |
python -m pip install --upgrade pip | |
- name: Run Quality Control Script | |
env: | |
TEASE: ${{ secrets.TEASE }} | |
run: | | |
sub=${{ matrix.config.sub }} | |
task=${{ matrix.config.task }} | |
vers=${{ matrix.config.version }} | |
run_part=${{ matrix.config.run_part }} | |
echo "Processing subject: $sub" | |
echo "Processing task: $task" | |
echo "Processing version: $vers" | |
csv_file="./data/${sub}/processed/${run_part}/${sub}_${task}_${vers}.csv" | |
mkdir -p "./data/${sub}/${run_part}" | |
log_file="./data/${sub}/${run_part}/qc_${task}_${vers}.log" | |
echo "CSV file: $csv_file" | |
echo "Log file: $log_file" | |
if [ -f "$csv_file" ]; then | |
python ./code/SmqC.py -s "$csv_file" -o "./data/${sub}/${run_part}" -sub "$sub" | tee "$log_file" | |
echo "QC for ${sub}_${task}_${vers} completed" | |
else | |
echo "CSV file $csv_file does not exist" | |
fi | |
- name: Upload QC Results | |
uses: actions/upload-artifact@v3 | |
with: | |
name: qc-results-${{ matrix.config.sub }}-${{ matrix.config.task }}-${{ matrix.config.version }}-${{ matrix.config.run_part }} | |
path: | | |
./data/${{ matrix.config.sub }}/${{ matrix.config.run_part }}/*.png | |
./data/${{ matrix.config.sub }}/${{ matrix.config.run_part }}/*.log | |
commit_results: | |
name: Commit and Push QC Results | |
runs-on: self-hosted | |
needs: [process_raw, run_qc] | |
if: needs.process_raw.outputs.has_data == 'true' | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v3 | |
- name: Download all artifacts | |
uses: actions/download-artifact@v3 | |
with: | |
path: ./ | |
- name: Move QC Results | |
run: | | |
echo "Moving QC results to data directory..." | |
for dir in qc-results-*; do | |
echo "Processing artifact directory: $dir" | |
# Extract sub, task, version, and run_part from the directory name | |
IFS='-' read -r _ _ sub task version run_part <<< "$dir" | |
echo "Subject: $sub, Task: $task, Version: $version, Run Part: $run_part" | |
# Define the target directories | |
run_part_dir="./data/${sub}/${run_part}" | |
# Create the target directories if they don't exist | |
mkdir -p "$run_part_dir" | |
# Move .png files to the processed directory | |
mv "$dir"/*.png "$run_part_dir/" 2>/dev/null || : | |
# Move .log files to the run_part directory | |
mv "$dir"/*.log "$run_part_dir/" 2>/dev/null || : | |
done | |
- name: Commit and Push QC Results | |
run: | | |
git config --global user.name "miloswrath" | |
git config --global user.email "[email protected]" | |
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY | |
git add ./data/*/*/*.png ./data/*/*/*.log | |
git commit -m "Add QC results for subjects $(date +%Y-%m-%d)" | |
git push origin main | |
- name: List Directory After QC | |
run: | | |
echo "Listing directory after running QC:" | |
find ./data -type d | |
find ./data -type f | |
add: | |
name: Generate Jekyll Posts and Deploy | |
runs-on: ubuntu-latest | |
needs: [process_raw, run_qc, commit_results] | |
if: ${{ always() }} | |
steps: | |
# 1. Checkout the Repository | |
- name: Checkout Repository | |
uses: actions/checkout@v4 | |
with: | |
persist-credentials: false # Recommended for security | |
fetch-depth: 0 | |
ref: main | |
- name: Pull Latest Changes | |
run: git pull origin main | |
# 4. Set Up Ruby Environment | |
- name: Setup Ruby | |
uses: ruby/setup-ruby@v1 | |
with: | |
ruby-version: '3.1' # Specify your Ruby version | |
bundler-cache: true # Caches installed gems automatically | |
cache-version: 1 # Increment if you need to reset the cache | |
# 5. Install Ruby Dependencies | |
- name: Install Dependencies | |
run: bundle install | |
# 6. Generate Jekyll Posts from PNGs | |
- name: Generate Jekyll Posts | |
run: | | |
set -e # Exit immediately if a command exits with a non-zero status | |
set -x # Print commands and their arguments as they are executed | |
run_part=${{ needs.process_raw.outputs.run_part }} | |
POSTS_DIR="_posts" | |
mkdir -p "$POSTS_DIR" # Ensure the _posts directory exists | |
# Initialize an associative array to group images by subject | |
declare -A subjects | |
# Get the list of PNG files added in the latest commit | |
images=$(git diff --name-only HEAD~1 HEAD | grep '\.png$' || true) | |
echo "Images: $images" | |
if [ -z "$images" ]; then | |
echo "No new images found. Skipping post generation." | |
exit 0 | |
fi | |
# Iterate over each PNG file and group them by subject number | |
for file in $images; do | |
# Check if the file exists to avoid errors | |
if [ ! -f "$file" ]; then | |
continue | |
fi | |
# Extract the subject number from the filename (assuming it's the first part before '_') | |
filename=$(basename "$file") | |
subject=$(echo "$filename" | awk -F_ '{print $1}') | |
# Append the filename to the subject's array | |
subjects["$subject"]+="$file " | |
done | |
# Generate Jekyll posts for each subject | |
for subject in "${!subjects[@]}"; do | |
# Define the post filename with current date and subject number | |
timestamp=$(date +%H%M%S) | |
post_filename="$POSTS_DIR/$(date +%Y-%m-%d)-subject-$subject-$timestamp.md" | |
# Create the Jekyll post | |
{ | |
echo "---" | |
echo "layout: post" | |
echo "title: Subject $subject" | |
echo "date: $(date +%Y-%m-%d)" | |
echo "categories: subjects" | |
echo "---" | |
echo "" | |
# Add images to the post | |
for image in ${subjects["$subject"]}; do | |
# Adjust the image path based on your site structure | |
# Assuming images are served from the root | |
echo "![]($image)" | |
done | |
} > "$post_filename" | |
echo "Created post: $post_filename" | |
done | |
# 7. List _posts Directory for Verification (Optional) | |
- name: List _posts Directory | |
run: | | |
echo "Listing _posts directory:" | |
ls -la _posts | |
# 8. Commit and Push Generated Posts | |
- name: Commit and Push Posts | |
run: | | |
git config --global user.name "miloswrath" | |
git config --global user.email "[email protected]" | |
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY | |
# Add new posts to git | |
git add _posts/*.md | |
# Commit changes if there are any | |
if ! git diff --cached --exit-code > /dev/null; then | |
git commit -m "Add new posts for subjects $(date +%Y-%m-%d)" | |
git push origin main # Replace 'main' with your default branch if different | |
else | |
echo "No changes to commit." | |
fi | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
# 9. Build the Jekyll Site | |
- name: Build with Jekyll | |
run: bundle exec jekyll build --verbose --baseurl "${{ github.event.inputs.base_path || '' }}" | |
env: | |
JEKYLL_ENV: production | |
# 10. Deploy to GitHub Pages | |
# Using GitHub's built-in Pages action | |
- name: Configure GitHub Pages | |
uses: actions/configure-pages@v5 | |
- name: Upload Pages Artifact | |
uses: actions/upload-pages-artifact@v1 | |
with: | |
path: ./_site # Ensure this matches your Jekyll build output | |
- name: List _site Directory | |
run: | | |
echo "Listing _site directory:" | |
ls -la _site | |
- name: Deploy to GitHub Pages | |
uses: actions/deploy-pages@v1 | |
with: | |
token: ${{ secrets.GITHUB_TOKEN }} |