Skip to content

Commit

Permalink
Override nvidia env var for 11.8 (mosaicml#2722)
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg authored Nov 16, 2023
1 parent 3cf73cc commit bfbb89a
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pr-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ jobs:
TORCHVISION_VERSION=${{ matrix.TORCHVISION_VERSION }}
PYTORCH_NIGHTLY_URL=${{ matrix.PYTORCH_NIGHTLY_URL }}
PYTORCH_NIGHTLY_VERSION=${{ matrix.PYTORCH_NIGHTLY_VERSION }}
NVIDIA_REQUIRE_CUDA_OVERRIDE=${{ matrix.NVIDIA_REQUIRE_CUDA_OVERRIDE }}
context: ./docker
image-name: ${{ matrix.IMAGE_NAME }}
image-uuid: ${{ matrix.UUID }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/release-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ jobs:
TORCHVISION_VERSION=${{ matrix.TORCHVISION_VERSION }}
PYTORCH_NIGHTLY_URL=${{ matrix.PYTORCH_NIGHTLY_URL }}
PYTORCH_NIGHTLY_VERSION=${{ matrix.PYTORCH_NIGHTLY_VERSION }}
NVIDIA_REQUIRE_CUDA_OVERRIDE=${{ matrix.NVIDIA_REQUIRE_CUDA_OVERRIDE }}
context: ./docker
image-name: ${{ matrix.IMAGE_NAME }}
image-uuid: ${{ matrix.UUID }}
Expand Down
5 changes: 5 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,11 @@ RUN pip install --no-cache-dir --upgrade \
ipython${IPYTHON_VERSION} \
urllib3${URLLIB3_VERSION}

##################################################
# Override NVIDIA mistaken env var for 11.8 images
##################################################
ARG NVIDIA_REQUIRE_CUDA_OVERRIDE
ENV NVIDIA_REQUIRE_CUDA=${NVIDIA_REQUIRE_CUDA_OVERRIDE:-$NVIDIA_REQUIRE_CUDA}

################
# Composer Image
Expand Down
19 changes: 19 additions & 0 deletions docker/build_matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
CUDA_VERSION: 12.1.0
IMAGE_NAME: torch-2-1-0-cu121
MOFED_VERSION: 5.5-1.0.3.2
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand All @@ -18,6 +19,7 @@
CUDA_VERSION: 12.1.0
IMAGE_NAME: torch-2-1-0-cu121-aws
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand All @@ -32,6 +34,7 @@
CUDA_VERSION: ''
IMAGE_NAME: torch-2-1-0-cpu
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand All @@ -46,6 +49,12 @@
CUDA_VERSION: 11.8.0
IMAGE_NAME: torch-2-0-1-cu118
MOFED_VERSION: 5.5-1.0.3.2
NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=11.8 brand=tesla,driver>=470,driver<471 brand=tesla,driver>=515,driver<516
brand=unknown,driver>=470,driver<471 brand=unknown,driver>=515,driver<516 brand=nvidia,driver>=470,driver<471
brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=470,driver<471 brand=nvidiartx,driver>=515,driver<516
brand=geforce,driver>=470,driver<471 brand=geforce,driver>=515,driver<516 brand=quadro,driver>=470,driver<471
brand=quadro,driver>=515,driver<516 brand=titan,driver>=470,driver<471 brand=titan,driver>=515,driver<516
brand=titanrtx,driver>=470,driver<471 brand=titanrtx,driver>=515,driver<516
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand All @@ -59,6 +68,12 @@
CUDA_VERSION: 11.8.0
IMAGE_NAME: torch-2-0-1-cu118-aws
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=11.8 brand=tesla,driver>=470,driver<471 brand=tesla,driver>=515,driver<516
brand=unknown,driver>=470,driver<471 brand=unknown,driver>=515,driver<516 brand=nvidia,driver>=470,driver<471
brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=470,driver<471 brand=nvidiartx,driver>=515,driver<516
brand=geforce,driver>=470,driver<471 brand=geforce,driver>=515,driver<516 brand=quadro,driver>=470,driver<471
brand=quadro,driver>=515,driver<516 brand=titan,driver>=470,driver<471 brand=titan,driver>=515,driver<516
brand=titanrtx,driver>=470,driver<471 brand=titanrtx,driver>=515,driver<516
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand All @@ -72,6 +87,7 @@
CUDA_VERSION: ''
IMAGE_NAME: torch-2-0-1-cpu
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand All @@ -85,6 +101,7 @@
CUDA_VERSION: 11.7.1
IMAGE_NAME: torch-1-13-1-cu117
MOFED_VERSION: 5.5-1.0.3.2
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand All @@ -98,6 +115,7 @@
CUDA_VERSION: 11.7.1
IMAGE_NAME: torch-1-13-1-cu117-aws
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand All @@ -111,6 +129,7 @@
CUDA_VERSION: ''
IMAGE_NAME: torch-1-13-1-cpu
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
Expand Down
13 changes: 13 additions & 0 deletions docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,17 @@ def _main():

cuda_version = _get_cuda_version(pytorch_version=pytorch_version, use_cuda=use_cuda)

override_string = ('cuda>=11.8 brand=tesla,driver>=470,driver<471 '
'brand=tesla,driver>=515,driver<516 brand=unknown,driver>=470,driver<471 '
'brand=unknown,driver>=515,driver<516 brand=nvidia,driver>=470,driver<471 '
'brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=470,driver<471 '
'brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=470,driver<471 '
'brand=geforce,driver>=515,driver<516 brand=quadro,driver>=470,driver<471 '
'brand=quadro,driver>=515,driver<516 brand=titan,driver>=470,driver<471 '
'brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=470,driver<471 '
'brand=titanrtx,driver>=515,driver<516')
nvidia_require_cuda_override = '' if cuda_version != '11.8.0' else override_string

entry = {
'IMAGE_NAME':
_get_image_name(pytorch_version, cuda_version, stage, interconnect),
Expand Down Expand Up @@ -163,6 +174,8 @@ def _main():
'',
'PYTORCH_NIGHTLY_VERSION':
'',
'NVIDIA_REQUIRE_CUDA_OVERRIDE':
nvidia_require_cuda_override,
}

# Only build EFA image on latest python with cuda on pytorch_stage
Expand Down

0 comments on commit bfbb89a

Please sign in to comment.