forked from NVIDIA/modulus
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDockerfile
172 lines (147 loc) · 8.38 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_CONTAINER=nvcr.io/nvidia/pytorch:24.03-py3
FROM ${BASE_CONTAINER} as builder
ARG TARGETPLATFORM
# Update pip and setuptools
RUN pip install "pip==23.2.1" "setuptools==68.2.2"
# Setup git lfs, graphviz gl1(vtk dep)
RUN apt-get update && \
apt-get install -y git-lfs graphviz libgl1 && \
git lfs install
ENV _CUDA_COMPAT_TIMEOUT=90
# Install other dependencies
RUN pip install --no-cache-dir "h5py>=3.7.0" "netcdf4>=1.6.3" "ruamel.yaml>=0.17.22" "scikit-learn>=1.0.2" "cftime>=1.6.2" "einops>=0.7.0" "pyspng>=0.1.0"
RUN pip install --no-cache-dir "hydra-core>=1.2.0" "termcolor>=2.1.1" "wandb>=0.13.7" "mlflow>=2.1.1" "pydantic>=1.10.2" "imageio>=2.28.1" "moviepy>=1.0.3" "tqdm>=4.60.0" "gcsfs==2024.2.0"
# copy modulus source
COPY . /modulus/
# Install Numcodecs (This needs a separate install because Numcodecs ARM pip install has issues)
# A fix is being added here: https://github.com/zarr-developers/numcodecs/pull/315 but the public release is not ready yet.
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Pip install for numcodecs for $TARGETPLATFORM exists, installing!" && \
pip install --no-cache-dir numcodecs; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ] && [ -e "/modulus/deps/numcodecs-0.11.0-cp310-cp310-linux_aarch64.whl" ]; then \
echo "Numcodecs wheel for $TARGETPLATFORM exists, installing!" && \
pip install --force-reinstall --no-cache-dir /modulus/deps/numcodecs-0.11.0-cp310-cp310-linux_aarch64.whl; \
else \
echo "Numcodecs wheel for $TARGETPLATFORM is not present, attempting to build from pip, but might fail" && \
pip install --no-cache-dir numcodecs; \
fi
# install vtk and pyvista
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ] && [ -e "/modulus/deps/vtk-9.2.6.dev0-cp310-cp310-linux_aarch64.whl" ]; then \
echo "VTK wheel for $TARGETPLATFORM exists, installing!" && \
pip install --no-cache-dir /modulus/deps/vtk-9.2.6.dev0-cp310-cp310-linux_aarch64.whl; \
elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Installing vtk for: $TARGETPLATFORM" && \
pip install --no-cache-dir "vtk>=9.2.6"; \
else \
echo "Installing vtk for: $TARGETPLATFORM from source" && \
apt-get update && apt-get install -y libgl1-mesa-dev && \
git clone https://gitlab.kitware.com/vtk/vtk.git && cd vtk && git checkout tags/v9.2.6 && git submodule update --init --recursive && \
mkdir build && cd build && cmake -GNinja -DVTK_WHEEL_BUILD=ON -DVTK_WRAP_PYTHON=ON /workspace/vtk/ && ninja && \
python setup.py bdist_wheel && \
pip install --no-cache-dir dist/vtk-9.2.6.dev0-cp310-cp310-linux_aarch64.whl && \
cd ../../ && rm -r vtk; \
fi
RUN pip install --no-cache-dir "pyvista>=0.40.1"
# Install DGL, below instructions only work for containers with CUDA >= 12.1
# (https://www.dgl.ai/pages/start.html)
ARG DGL_BACKEND=pytorch
ENV DGL_BACKEND=$DGL_BACKEND
ENV DGLBACKEND=$DGL_BACKEND
# TODO: this is a workaround as dgl is not yet shipping arm compatible wheels for CUDA 12.x: https://github.com/NVIDIA/modulus/issues/432
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ] && [ -e "/modulus/deps/dgl-2.0.0-cp310-cp310-linux_aarch64.whl" ]; then \
echo "DGL wheel for $TARGETPLATFORM exists, installing!" && \
pip install --no-cache-dir --no-deps /modulus/deps/dgl-2.0.0-cp310-cp310-linux_aarch64.whl; \
elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Installing DGL for: $TARGETPLATFORM" && \
pip install --no-cache-dir --no-deps dgl==2.0.0 -f https://data.dgl.ai/wheels/cu121/repo.html; \
else \
echo "Installing DGL for: $TARGETPLATFORM from source" && \
git clone https://github.com/dmlc/dgl.git && cd dgl/ && git checkout tags/v2.0.0 && git submodule update --init --recursive && \
DGL_HOME="/workspace/dgl" bash script/build_dgl.sh -g && \
cd python && \
python setup.py install && \
python setup.py build_ext --inplace && \
cd ../../ && rm -r /workspace/dgl; \
fi
# Install custom onnx
# TODO: Find a fix to eliminate the custom build
# Forcing numpy update to over ride numba 0.56.4 max numpy constraint
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] && [ -e "/modulus/deps/onnxruntime_gpu-1.18.0-cp310-cp310-linux_x86_64.whl" ]; then \
echo "Custom onnx wheel for $TARGETPLATFORM exists, installing!" && \
pip install --force-reinstall --no-cache-dir /modulus/deps/onnxruntime_gpu-1.18.0-cp310-cp310-linux_x86_64.whl; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ] && [ -e "/modulus/deps/onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl" ]; then \
echo "Custom onnx wheel for $TARGETPLATFORM exists, installing!" && \
pip install --force-reinstall --no-cache-dir /modulus/deps/onnxruntime_gpu-1.18.0-cp310-cp310-linux_aarch64.whl; \
else \
echo "No custom wheel present, skipping" && \
pip install --no-cache-dir "numpy==1.22.4"; \
fi
# cleanup of stage
RUN rm -rf /modulus/
# CI image
FROM builder as ci
ARG TARGETPLATFORM
COPY . /modulus/
RUN cd /modulus/ && pip install -e .[makani] && pip uninstall nvidia-modulus -y
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Installing tensorflow and warp-lang for: $TARGETPLATFORM" && \
pip install --no-cache-dir "tensorflow==2.9.0" "warp-lang>=0.6.0"; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
echo "Installing tensorflow and warp-lang for: $TARGETPLATFORM is not supported presently"; \
fi
RUN pip install --no-cache-dir "black==22.10.0" "interrogate==1.5.0" "coverage==6.5.0" "protobuf==3.20.3"
# TODO(akamenev): install Makani via direct URL, see comments in pyproject.toml.
# RUN pip install --no-cache-dir -e git+https://github.com/NVIDIA/[email protected]#egg=makani
# Install torch-scatter, torch-cluster, and pyg
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] && [ -e "/modulus/deps/torch_scatter-2.1.2-cp310-cp310-linux_x86_64.whl" ]; then \
echo "Installing torch_scatter and for: $TARGETPLATFORM" && \
pip install --force-reinstall --no-cache-dir /modulus/deps/torch_scatter-2.1.2-cp310-cp310-linux_x86_64.whl; \
else \
echo "No custom wheel present, skipping"; \
fi
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] && [ -e "/modulus/deps/torch_cluster-1.6.3-cp310-cp310-linux_x86_64.whl" ]; then \
echo "Installing torch_cluster and for: $TARGETPLATFORM" && \
pip install --force-reinstall --no-cache-dir /modulus/deps/torch_cluster-1.6.3-cp310-cp310-linux_x86_64.whl; \
else \
echo "No custom wheel present, skipping"; \
fi
RUN pip install --no-cache-dir "torch_geometric==2.5.3"
# Deployment image
FROM builder as deploy
COPY . /modulus/
RUN cd /modulus/ && pip install .
RUN pip install --no-cache-dir "protobuf==3.20.3"
# Set Git Hash as a environment variable
ARG MODULUS_GIT_HASH
ENV MODULUS_GIT_HASH=${MODULUS_GIT_HASH:-unknown}
# Clean up
RUN rm -rf /modulus/
# Docs image
FROM deploy as docs
ARG TARGETPLATFORM
# Install CI packages
RUN pip install --no-cache-dir "protobuf==3.20.3"
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
echo "Installing tensorflow and warp-lang for: $TARGETPLATFORM" && \
pip install --no-cache-dir "tensorflow==2.9.0" "warp-lang>=0.6.0"; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
echo "Installing tensorflow and warp-lang for: $TARGETPLATFORM is not supported presently"; \
fi
# Install packages for Sphinx build
RUN pip install --no-cache-dir "recommonmark==0.7.1" "sphinx==5.1.1" "sphinx-rtd-theme==1.0.0" "pydocstyle==6.1.1" "nbsphinx==0.8.9" "nbconvert==6.4.3" "jinja2==3.0.3"
RUN wget https://github.com/jgm/pandoc/releases/download/3.1.6.2/pandoc-3.1.6.2-1-amd64.deb && dpkg -i pandoc-3.1.6.2-1-amd64.deb