Measure and report executor starvation time #4799
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Celerity CI | |
on: | |
push: | |
# We perform CI on "push" only on master, otherwise it is redundant with the "pull_request" trigger | |
branches: | |
- master | |
pull_request: | |
workflow_dispatch: | |
inputs: | |
test-head: | |
description: "Test against 'HEAD' revisions" | |
type: boolean | |
required: true | |
default: false | |
tag-latest: | |
description: "Tag 'HEAD' revisions as 'latest' if successful" | |
type: boolean | |
required: true | |
default: false | |
# We use nightly builds to determine whether CI passes for "HEAD" revisions | |
# of DPC++ and AdaptiveCpp. If so, these revisions are tagged as "latest" and | |
# used for all subsequent CI runs. | |
schedule: | |
# Every night at 05:00 UTC | |
- cron: "0 5 * * *" | |
jobs: | |
# Run Clang-Tidy checks | |
# | |
# Note: This action currently only supports pull_request triggers (as it creates review comments) | |
# | |
# TODO: This should be combined with the report (or "lint") step, really | |
clang-tidy: | |
if: github.event.pull_request | |
runs-on: [ self-hosted, slurm-intel ] | |
env: | |
container-workspace: <placeholder> | |
build-dir: /root/build | |
container: | |
image: ghcr.io/celerity/celerity-lint | |
volumes: | |
- ccache:/ccache | |
credentials: | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
steps: | |
# Here and in jobs below: We need to manually set the container workspace | |
# path as an environment variable, as (curiously) the `github.workspace` context | |
# variable contains the path on the container host (but $GITHUB_WORKSPACE is correct). | |
- name: Set container workspace environment variable | |
run: echo "container-workspace=$GITHUB_WORKSPACE" > $GITHUB_ENV | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Fetch base branch | |
run: git fetch --no-tags --no-recurse-submodules origin "${{ github.event.pull_request.base.ref }}" | |
# We only want to configure CMake, so we build the "help" target, | |
# which doesn't actually do anything (other than print all targets). | |
- name: Configure CMake | |
run: bash -o pipefail -c "bash /root/build-celerity.sh ${{ env.container-workspace }} --build-type Debug --target help" | |
- name: Run clang-tidy | |
continue-on-error: true # clang-tidy-diff.py returns 1 if there are any errors; continue in that case (if something actually went wrong, there won't be a fixes file) | |
# -j48 for gpuc5 | |
run: git diff -U0 origin/${{ github.event.pull_request.base.ref }} | clang-tidy-diff.py -j48 -p1 -path ${{ env.build-dir }} -export-fixes clang-tidy-fixes.yml | |
shell: bash # enables -o pipefail | |
- name: Create clang-tidy report comments | |
uses: platisd/clang-tidy-pr-comments@v1 | |
with: | |
python_path: python3 # Use our own Python installation | |
github_token: ${{ secrets.GITHUB_TOKEN }} | |
clang_tidy_fixes: "${{ env.container-workspace }}/clang-tidy-fixes.yml" | |
repo_path_prefix: "/__w" | |
# We need to jump through some hoops to have different build matrices based on what triggered the workflow. | |
# For normal CI runs we want to build and test against everything except the "HEAD" revisions, whereas during | |
# nightly builds we *only* want those. | |
# | |
# Current workaround is to represent the matrix as a JSON object, which is then deserialized in the next job. | |
read-build-matrix: | |
runs-on: self-hosted | |
outputs: | |
matrix: ${{ steps.read-json-matrix.outputs.matrix }} | |
steps: | |
- uses: actions/checkout@v4 | |
- id: read-json-matrix | |
name: Read build matrix from file | |
shell: python | |
run: | | |
import json | |
import os | |
with open("${{ github.workspace }}/.github/workflows/build_matrix.json") as f: | |
matrices = json.load(f) | |
if '${{ github.event_name != 'schedule' && inputs.test-head == false }}' == 'true': | |
matrix = matrices['default'] | |
else: | |
matrix = matrices['nightly'] | |
with open(os.environ['GITHUB_OUTPUT'], 'a') as fh: | |
print('matrix={ "include":%s }' % json.dumps(matrix), file=fh) | |
build-and-test: | |
needs: read-build-matrix | |
runs-on: [ self-hosted, "slurm-${{ matrix.platform }}" ] | |
strategy: | |
fail-fast: false | |
matrix: ${{ fromJSON(needs.read-build-matrix.outputs.matrix) }} | |
# These outputs are required by the image tagging step, only set during nightly builds. | |
outputs: | |
dpcpp-HEAD-Debug-works: ${{ steps.set-head-results.outputs.dpcpp-HEAD-Debug-works }} | |
dpcpp-HEAD-Release-works: ${{ steps.set-head-results.outputs.dpcpp-HEAD-Release-works }} | |
dpcpp-HEAD-ubuntu-version: ${{ steps.set-head-results.outputs.dpcpp-HEAD-ubuntu-version }} | |
acpp-HEAD-Debug-works: ${{ steps.set-head-results.outputs.acpp-HEAD-Debug-works }} | |
acpp-HEAD-Release-works: ${{ steps.set-head-results.outputs.acpp-HEAD-Release-works }} | |
acpp-HEAD-ubuntu-version: ${{ steps.set-head-results.outputs.acpp-HEAD-ubuntu-version }} | |
env: | |
build-name: ${{ matrix.platform }}-ubuntu${{ matrix.ubuntu-version }}-${{ matrix.sycl }}-${{ matrix.sycl-version }}-${{ matrix.build-type }} | |
container-workspace: <placeholder> | |
build-dir: /root/build | |
examples-build-dir: /root/build-examples | |
container: | |
image: ghcr.io/celerity/celerity-build/${{ matrix.sycl }}:ubuntu${{ matrix.ubuntu-version }}-${{ matrix.sycl-version }} | |
volumes: | |
- ccache:/ccache | |
credentials: | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
steps: | |
# We limit DPC++ to Level-Zero devices (instead of e.g. also showing OpenCL or CUDA devices). | |
# Most importantly, despite its naming, this currently also avoids the "Unified Runtime over Level-Zero", which | |
# doesn't seem to be quite ready for production use yet (= it crashes). | |
- name: Set environment variables | |
run: | | |
echo "container-workspace=$GITHUB_WORKSPACE" > $GITHUB_ENV | |
echo "ONEAPI_DEVICE_SELECTOR=level_zero:*" >> $GITHUB_ENV | |
echo "SIMSYCL_SYSTEM=$GITHUB_WORKSPACE/ci/simsycl-system.json" >> $GITHUB_ENV | |
- name: Print exact SYCL revision used for this CI run | |
run: cat /VERSION | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
# In SimSYCL images, /root/celerity-options.sh automatically enables coverage for Debug builds, and Tracy support for Release builds | |
- name: Build and install Celerity | |
run: bash -o pipefail -c "bash /root/build-celerity.sh ${{ env.container-workspace }} --build-type ${{ matrix.build-type }} --mpi ${{ matrix.mpi }} --target install 2>&1 | tee ${{ env.build-name }}.log" | |
# Upload build log for report step | |
- name: Upload build logs | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.build-name }} | |
path: ${{ env.build-name }}.log | |
- name: Build examples against installed Celerity | |
run: bash /root/build-examples.sh ${{ env.container-workspace }}/examples --build-type ${{ matrix.build-type }} | |
- name: Reset coverage counters | |
if: matrix.build-type == 'Debug' && matrix.sycl == 'simsycl' # celerity-build/simsycl image enables --coverage for Debug builds | |
run: fastcov -z -d "${{ env.build-dir }}" | |
# For a run with ASan, we need to fake dlclose() so all shared libraries are still mapped when leaks are evaluated | |
# - otherwise, we will see "<unknown module>" instead of a proper trace if it originated in an unloaded library | |
# (see https://github.com/google/sanitizers/issues/89#issuecomment-406316683). LD_PRELOAD will be set to CI_LD_PRELOAD | |
# by /root/capture-backtrace.sh (otherwise we would also preload for other processes like bash, which we don't want to do). | |
# Also, hwloc (used by OpenMPI) will attempt to dlopen(RTLD_DEEPBIND) for GPU backend discovery which is incompatible | |
# with sanitizers but also irrelevant for a SimSYCL build, so we set HWLOC_COMPONENTS accordingly. | |
- name: Prepare Sanitizer run | |
if: matrix.sycl == 'simsycl' && matrix.build-type == 'Debug' | |
run: | | |
echo "ASAN_OPTIONS=detect_leaks=1" >> $GITHUB_ENV | |
echo "LSAN_OPTIONS=suppressions=$GITHUB_WORKSPACE/ci/lsan.suppressions" >> $GITHUB_ENV | |
echo "int dlclose(void *p) { return 0; }" | cc -x c -shared -o libfake_dlclose.so - | |
echo "CI_LD_PRELOAD=$(echo /usr/lib/x86_64-linux-gnu/libasan.so.?):$(pwd)/libfake_dlclose.so" >> $GITHUB_ENV | |
echo "HWLOC_COMPONENTS=-cuda,-opencl,-rsmi" >> $GITHUB_ENV | |
- name: Run unit tests | |
timeout-minutes: 5 | |
working-directory: ${{ env.build-dir }} | |
run: ${{ env.container-workspace }}/ci/run-unit-tests.sh | |
- name: Run examples (single-node) | |
timeout-minutes: 10 | |
# We build examples twice, but only run the installed version (which probably has more failure modes) | |
working-directory: ${{ env.examples-build-dir }} | |
run: ${{ env.container-workspace }}/ci/run-examples.sh /data/Lenna.png 1 | |
- name: Run examples (multi-node) | |
if: ${{ matrix.mpi }} | |
timeout-minutes: 10 | |
working-directory: ${{ env.examples-build-dir }} | |
run: ${{ env.container-workspace }}/ci/run-examples.sh /data/Lenna.png 2 4 | |
- name: Run debugging tests | |
if: matrix.build-type == 'Debug' && matrix.sycl != 'dpcpp' # newer DPC++ generates DWARF5 which is incompatible with Ubuntu 20.04's GDB | |
run: ${{ env.container-workspace }}/test/debug/pretty-print-test.py ${{ env.build-dir }}/test/debug/pretty_printables | |
- name: Run system tests | |
if: ${{ matrix.mpi }} | |
working-directory: ${{ env.build-dir }} | |
run: ${{ env.container-workspace }}/ci/run-system-tests.sh 2 4 | |
- name: Run integration tests | |
working-directory: ${{ env.build-dir }} | |
run: ${{ env.container-workspace }}/test/integration/run-integration-tests.py . ${{ matrix.platform }} | |
- name: Upload stack traces (if any) | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.build-name }} | |
path: | | |
${{ env.build-dir }}/*.trace | |
${{ env.examples-build-dir }}/*.trace | |
if-no-files-found: ignore | |
- id: set-head-results | |
name: Set outputs for HEAD builds | |
if: matrix.sycl-version == 'HEAD' | |
run: | | |
echo "${{ matrix.sycl }}-HEAD-${{ matrix.build-type }}-works=1" >> "$GITHUB_OUTPUT" | |
echo "${{ matrix.sycl }}-HEAD-ubuntu-version=${{ matrix.ubuntu-version }}" >> "$GITHUB_OUTPUT" | |
- name: Collect & report coverage | |
if: matrix.build-type == 'Debug' && matrix.sycl == 'simsycl' # celerity-build/simsycl image enables --coverage for Debug builds | |
env: | |
COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} | |
run: | | |
"${{ env.container-workspace }}/ci/capture-coverage.sh" "${{ env.container-workspace }}" "${{ env.build-dir }}" -o lcov.info | |
coveralls report lcov.info | |
# Tag "HEAD" images that built and tested successfully as "latest". | |
# This is only done for nightly builds (or when specifying the "tag-latest" option on manually triggered runs). | |
tag-latest-containers: | |
needs: build-and-test | |
# Run this step regardless of result of `build-and-test` (hence the `always()`), | |
# since we always want to tag images that were successful, even if others weren't. | |
if: always() && (github.event_name == 'schedule' || (inputs.test-head && inputs.tag-latest)) | |
runs-on: slurm-${{ matrix.platform }} | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- sycl: "dpcpp" | |
platform: "intel" | |
- sycl: "acpp" | |
platform: "nvidia" | |
env: | |
image-basename-dpcpp: ghcr.io/celerity/celerity-build/dpcpp:ubuntu${{ needs.build-and-test.outputs.dpcpp-HEAD-ubuntu-version }} | |
image-basename-acpp: ghcr.io/celerity/celerity-build/acpp:ubuntu${{ needs.build-and-test.outputs.acpp-HEAD-ubuntu-version }} | |
permissions: | |
packages: write | |
steps: | |
- name: Log into Container registry | |
uses: docker/login-action@v3 | |
with: | |
registry: ghcr.io | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
- if: matrix.sycl == 'dpcpp' | |
run: | | |
if [[ "${{ needs.build-and-test.outputs.dpcpp-HEAD-Debug-works }}" -eq 1 ]] && [[ "${{ needs.build-and-test.outputs.dpcpp-HEAD-Release-works }}" -eq 1 ]]; then | |
docker tag ${{ env.image-basename-dpcpp }}-HEAD ${{ env.image-basename-dpcpp }}-latest | |
docker push ${{ env.image-basename-dpcpp }}-latest | |
else | |
exit 1 | |
fi | |
- if: matrix.sycl == 'acpp' | |
run: | | |
if [[ "${{ needs.build-and-test.outputs.acpp-HEAD-Debug-works }}" -eq 1 ]] && [[ "${{ needs.build-and-test.outputs.acpp-HEAD-Release-works }}" -eq 1 ]]; then | |
docker tag ${{ env.image-basename-acpp }}-HEAD ${{ env.image-basename-acpp }}-latest | |
docker push ${{ env.image-basename-acpp }}-latest | |
else | |
exit 1 | |
fi | |
report: | |
needs: [build-and-test] | |
runs-on: [ self-hosted, slurm ] | |
env: | |
container-workspace: <placeholder> | |
container: | |
image: ghcr.io/celerity/celerity-lint | |
credentials: | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
steps: | |
- name: Set container workspace environment variable | |
run: echo "container-workspace=$GITHUB_WORKSPACE" > $GITHUB_ENV | |
- uses: actions/checkout@v4 | |
- name: Check code formatting | |
id: formatting | |
working-directory: ${{ env.container-workspace }} | |
shell: bash | |
run: | | |
unformatted=$("./ci/find-unformatted-files.sh") | |
{ | |
echo 'unformatted-files<<EOF' | |
echo "$unformatted" | |
echo EOF | |
} >> "$GITHUB_OUTPUT" | |
- uses: "celerity/ci-report-action@v8" | |
with: | |
gh-token: ${{ secrets.GITHUB_TOKEN }} | |
unformatted-files: ${{ steps.formatting.outputs.unformatted-files }} |