-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.sh
108 lines (88 loc) · 3.6 KB
/
main.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/bash
# INPUTS:
pf_dir="./properties_files"
coaster_host=localhost
# WORKFLOW:
job_number=$(basename $(dirname ${PWD}))
remote_dir=/tmp/pworks/job-${job_number}
# Read arguments in format "--pname pval" into export pname=pval
f_read_cmd_args(){
index=1
args=""
for arg in $@; do
prefix=$(echo "${arg}" | cut -c1-2)
if [[ ${prefix} == '--' ]]; then
pname=$(echo $@ | cut -d ' ' -f${index} | sed 's/--//g')
pval=$(echo $@ | cut -d ' ' -f$((index + 1)))
echo "export ${pname}=${pval}" >> $(dirname $0)/env.sh
export "${pname}=${pval}"
fi
index=$((index+1))
done
}
# Map all files the directory dname to the remote directory in the cjs format
get_dir_stagein() {
local dname=$1
for f in $(find ${dname} -type f); do
if [ -z "${stagein}" ]; then
stagein="${PWD}/${f} -> ${remote_dir}/${f}"
else
stagein="${stagein} : ${PWD}/${f} -> ${remote_dir}/${f}"
fi
done
echo ${stagein}
}
# Set serviceport variable with the service port number of a pool provided the name of the pool
get_pool_serviceport() {
local scheduler_pool=$1
max_retries=20
k=0
while true; do
k=$((k+1))
if [ "${k}" -gt "${max_retries}" ]; then
echo "Max retries have been reached. Giving up."
exit 1
fi
echo "Searching for service port"
serviceport=$(curl -s https://${PARSL_CLIENT_HOST}/api/resources?key=${PW_API_KEY} | grep -E 'name|serviceport' | tr -d '", ' | sed 'N;s/\n/=/' | tr '[:upper:]' '[:lower:]' | sed "s/_//g" | grep name\:${scheduler_pool}= | rev | cut -d':' -f1 | rev)
if [[ ${serviceport} -gt 0 ]]; then
break
else
echo "No service port found. Make sure pool is turned on!"
echo "Trying again ..."
sleep 30
fi
done
}
f_read_cmd_args $@
scripts=$(get_dir_stagein scripts)
properties_files=$(get_dir_stagein ${pf_dir})
#scheduler_pool=$(echo ${scheduler_pool} | tr '[:upper:]' '[:lower:]')
scheduler_pool=$(echo ${scheduler_pool} | tr '[:upper:]' '[:lower:]' | sed "s/_//g")
stagein="
${scripts} : \
${properties_files} : \
${PWD}/authorized_keys -> ${remote_dir}/authorized_keys : \
${PWD}/stream.sh -> ${remote_dir}/stream.sh"
get_pool_serviceport ${scheduler_pool}
COASTERURL=http://${coaster_host}:${serviceport}
echo "Coaster URL: $COASTERURL"
echo "For more logs, open scheduler.out and scheduler.err log files once they appear in the job directory."
cjs_args="${executor_pools} ${version} ${sum_serv} ${ds_cycle} ${od_pct} ${PW_API_KEY} ${pf_dir} ${cloud} ${PARSL_CLIENT_SSH_PORT} ${PWD} ${PARSL_CLIENT_HOST} ${allow_ps}"
set -x
cog-job-submit -provider "coaster-persistent" \
-service-contact "$COASTERURL" \
-attributes "maxWallTime=99999:00:00" \
-redirected \
-stdout "${remote_dir}/scheduler.out" \
-stderr "${remote_dir}/scheduler.err" \
-directory "${remote_dir}" \
-stagein "${stagein}" \
bash -c "mkdir -p ${remote_dir}; cd ${remote_dir}; bash ./scripts/scheduler.sh ${cjs_args}"
# Send alert if job failed!
pool_status=$(curl -s https://${PARSL_CLIENT_HOST}/api/resources?key=${PW_API_KEY} | grep -E 'name|status' | tr -d '", ' | sed 'N;s/\n/=/' | tr '[:upper:]' '[:lower:]' | sed "s/_//g" | grep ${scheduler_pool}= | rev | cut -d':' -f1 | rev)
if [[ ${pool_status} == "on" ]]; then
msg="Failed START_SCHEDULER job ${job_number} in account ${PW_USER} - @avidalto"
cat alert_slack.sh | sed "s|__MSG__|${msg}|g" > alert_slack_.sh
bash alert_slack_.sh
fi