Skip to content

add max_per_rank_io_concurrency into knob #127

add max_per_rank_io_concurrency into knob

add max_per_rank_io_concurrency into knob #127

Workflow file for this run

on:
pull_request:
push:
branches: [ main ]
workflow_call:
jobs:
cpu_unit_tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
steps:
- name: Check out repo
uses: actions/checkout@v2
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash -l {0}
run: |
set -eux
conda activate test
conda install pytorch cpuonly -c pytorch-nightly
pip install -r requirements.txt
pip install -r dev-requirements.txt
pip install --no-build-isolation -e ".[dev]"
- name: Run unit tests with coverage
shell: bash -l {0}
run: |
set -eux
conda activate test
pytest --timeout=300 --cov=. --cov-report xml -vv -rA -m "not gpu_only" tests
- name: Upload coverage to codecov
uses: codecov/codecov-action@v2
gpu_unit_tests:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [linux.8xlarge.nvidia.gpu]
python-version: [3.8]
cuda-tag: ["cu11"]
steps:
- name: Check out repo
uses: actions/checkout@v2
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
- name: Install dependencies
shell: bash -l {0}
run: |
set -eux
conda activate test
conda install pytorch pytorch-cuda=11.8 -c pytorch-nightly -c nvidia
pip install torchrec-nightly
# Use stable fbgemm-gpu
# pip uninstall -y fbgemm-gpu-nightly
# pip install fbgemm-gpu-nightly
pip install -r requirements.txt
pip install -r dev-requirements.txt
pip install --no-build-isolation -e ".[dev]"
- name: Run unit tests with coverage
shell: bash -l {0}
run: |
set -eux
conda activate test
pytest --timeout=60 --cov=. --cov-report xml -vv -rA -m "gpu_only or cpu_and_gpu" tests
- name: Upload coverage to codecov
uses: codecov/codecov-action@v2
s3_integration_test:
if: github.event.pull_request.head.repo.fork == false
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
# These permissions are needed to interact with GitHub's OIDC Token endpoint
permissions:
id-token: write
contents: read
steps:
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
aws-region: us-east-1
- name: Set environment variables
run: echo "TORCHSNAPSHOT_ENABLE_AWS_TEST=1" >> $GITHUB_ENV
- name: Check out repo
uses: actions/checkout@v2
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash -l {0}
run: |
set -eux
conda activate test
conda install pytorch cpuonly -c pytorch-nightly
pip install -r requirements.txt
pip install -r dev-requirements.txt
pip install --no-build-isolation -e ".[dev]"
- name: Run unit tests with coverage
shell: bash -l {0}
run: |
set -eux
conda activate test
pytest --timeout=300 --cov=. --cov-report xml -vv -rA -m s3_integration_test tests
- name: Upload coverage to codecov
uses: codecov/codecov-action@v2
gcs_integration_tests:
if: github.event.pull_request.head.repo.fork == false
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
# These permissions are needed to interact with GitHub's OIDC Token endpoint
permissions:
id-token: write
contents: read
env:
GOOGLE_APPLICATION_CREDENTIALS_JSON: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS_JSON }}
steps:
- name: Set environment variables
run: echo "TORCHSNAPSHOT_ENABLE_GCP_TEST=1" >> $GITHUB_ENV
- name: Check out repo
uses: actions/checkout@v2
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v2
with:
miniconda-version: "latest"
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash -l {0}
run: |
set -eux
conda activate test
conda install pytorch cpuonly -c pytorch-nightly
pip install -r requirements.txt
pip install -r dev-requirements.txt
pip install --no-build-isolation -e ".[dev]"
- name: Run unit tests with coverage
shell: bash -l {0}
run: |
set -eux
conda activate test
pytest --timeout=300 --cov=. --cov-report xml -vv -rA -m gcs_integration_test tests
- name: Upload coverage to codecov
uses: codecov/codecov-action@v2