Automatic Instance Shelving #16629
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Automatic Instance Shelving | |
on: | |
schedule: | |
# Run every 5 mins | |
- cron: "* * * * *" | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ vars.MORPHOCLOUD_OS_CLOUD }} | |
cancel-in-progress: true | |
permissions: | |
issues: write | |
contents: read | |
jobs: | |
auto-shelve: | |
runs-on: self-hosted | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/create-github-app-token@31c86eb3b33c9b601a1f60f98dcbfd1d70f379b4 # v1.10.3 | |
id: app-token | |
with: | |
app-id: ${{ vars.MORPHOCLOUD_WORKFLOW_APP_ID }} | |
private-key: ${{ secrets.MORPHOCLOUD_WORKFLOW_APP_PRIVATE_KEY }} | |
- name: Auto Shelve | |
run: | | |
source ~/venv/bin/activate | |
instance_prefix=${PREFIX:+${PREFIX}_} | |
instance_basename="${instance_prefix}instance" | |
openstack server list --name "^${instance_basename}-\d+" -f json | \ | |
jq -r '.[] | [.Name, .Status, ."OS-EXT-STS:task_state"] | @tsv' | \ | |
while IFS=$'\t' read -r instance_name status task_state; do | |
echo "instance_name [$instance_name] status [$status] task_state [$task_state]" | |
# Skip the instance if it is not active | |
if [[ "$status" != "ACTIVE" ]]; then | |
continue | |
fi | |
# Skip the instance if it is currently undergoing a status update | |
if [[ "$task_state" != "" ]]; then | |
continue | |
fi | |
# Extract the issue number from the instance name | |
issue_number=${instance_name##*-} | |
echo "issue_number [$issue_number]" | |
# Retrieve the IP address of the instance | |
instance_ip=$( | |
openstack server show $instance_name -c addresses -f json | \ | |
jq -r '.addresses.auto_allocated_network[1]' | |
) | |
echo "instance_ip [$instance_ip]" | |
# Skip the instance if the IP address could not be retrieved | |
if [[ "$instance_ip" == "null" ]]; then | |
echo "::warning ::Failed to retrieve $instance_name IP" | |
continue | |
fi | |
# Notes on SSH usage: | |
# * Redirecting SSH standard input to /dev/null ('< /dev/null') is required to work around | |
# an issue where SSH breaks out of the while loop in Bash. | |
# Reference: https://stackoverflow.com/questions/9393038/ssh-breaks-out-of-while-loop-in-bash | |
has_check_instance_shelve_script=$(ssh \ | |
-o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
-o LogLevel=ERROR \ | |
exouser@$instance_ip \ | |
'bash -c "[[ -f /opt/instance-config-support/dist/check-instance-shelve.sh ]] && echo yes || echo no"' < /dev/null) | |
if [[ $? -ne 0 ]]; then | |
echo "::warning ::Failed to check if 'check-instance-shelve.sh' was found on $instance_name using IP $instance_ip" | |
continue | |
fi | |
if [[ "$has_check_instance_shelve_script" == "yes" ]]; then | |
uptime_hours=$(ssh \ | |
-o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
-o LogLevel=ERROR \ | |
exouser@$instance_ip \ | |
'/opt/instance-config-support/dist/check-instance-shelve.sh -d' < /dev/null) | |
if [[ $? -ne 0 ]]; then | |
echo "::warning ::Failed to retrieve uptime for $instance_name using IP $instance_ip" | |
continue | |
fi | |
else | |
# Retrieve the instance uptime using SSH | |
uptime_seconds=$(ssh \ | |
-o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
-o LogLevel=ERROR \ | |
exouser@$instance_ip \ | |
'cat /proc/uptime | awk "{print \$1}"' < /dev/null) | |
if [[ $? -ne 0 ]]; then | |
echo "::warning ::Failed to retrieve uptime for $instance_name using IP $instance_ip" | |
continue | |
fi | |
# Convert uptime from seconds to hours | |
uptime_hours=$(echo "scale=2; $uptime_seconds / 3600" | bc) | |
fi | |
# Check uptime and define action | |
if $(python3 -c "valid=($uptime_hours > 3.5 and $uptime_hours <= 4.0); EXIT_SUCCESS=0; EXIT_FAILURE=1; exit(EXIT_SUCCESS if valid else EXIT_FAILURE)"); then | |
action="notify" | |
elif $(python3 -c "valid=($uptime_hours > 4.0); EXIT_SUCCESS=0; EXIT_FAILURE=1; exit(EXIT_SUCCESS if valid else EXIT_FAILURE)"); then | |
action="shelve" | |
else | |
action="" | |
fi | |
echo "instance_name [$instance_name], has_check_instance_shelve_script [$has_check_instance_shelve_script], uptime_hours [$uptime_hours] -> action[$action]" | |
# Perform action | |
if [[ "$action" == "shelve" ]]; then | |
gh workflow run control-instance-from-workflow.yml \ | |
-f issue_number=$issue_number \ | |
-f command_name=shelve | |
fi | |
done | |
env: | |
OS_CLOUD: ${{ vars.MORPHOCLOUD_OS_CLOUD }} | |
GH_TOKEN: ${{ steps.app-token.outputs.token }} | |
GH_REPO: ${{ github.repository }} | |
PREFIX: ${{ vars.INSTANCE_NAME_PREFIX }} |