-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsubmit_mpi.sh
37 lines (30 loc) · 1.38 KB
/
submit_mpi.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
echo "ip container: $(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1)"
echo "ip host: $(curl -s "http://169.254.169.254/latest/meta-data/local-ipv4")"
# get shared dir
IFS=',' _shared_dirs=/shared
_shared_dir=${_shared_dirs[0]}
_job_dir="${_shared_dir}/${AWS_BATCH_JOB_ID%#*}-${AWS_BATCH_JOB_ATTEMPT}"
_exit_code_file="${_job_dir}/batch-exit-code"
if [[ "${AWS_BATCH_JOB_NODE_INDEX}" -eq "${AWS_BATCH_JOB_MAIN_NODE_INDEX}" ]]; then
echo "Hello I'm the main node $HOSTNAME! I run the mpi job!"
mkdir -p "${_job_dir}"
cp mpi_hello_world.c /shared
echo "Compiling..."
mpicc -o "${_job_dir}/mpi_hello_world" "${_shared_dir}/mpi_hello_world.c"
echo "Running..."
mpirun "${_job_dir}/mpi_hello_world"
# Write exit status code
echo "0" > "${_exit_code_file}"
# Waiting for compute nodes to terminate
sleep 30
else
echo "Hello I'm the compute node $HOSTNAME! I let the main node orchestrate the mpi processing!"
# Since mpi orchestration happens on the main node, we need to make sure the containers representing the compute
# nodes are not terminated. A simple trick is to wait for a file containing the status code to be created.
# All compute nodes are terminated by AWS Batch if the main node exits abruptly.
while [ ! -f "${_exit_code_file}" ]; do
sleep 2
done
exit $(cat "${_exit_code_file}")
fi