-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathDockerfile
272 lines (236 loc) · 9.55 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# 0 parquet-to-arrow: executables we use in Workbench
FROM workbenchdata/parquet-to-arrow:v2.2.0 AS parquet-to-arrow
FROM workbenchdata/arrow-tools:v1.1.0 AS arrow-tools
# 1 pybase: Python and tools we use in dev and production
FROM python:3.8.8-slim-buster AS pybase0
RUN mkdir -p /usr/share/man/man1 /usr/share/man/man7 \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
&& rm -rf /var/lib/apt/lists/*
# We probably don't want these, long-term:
# curl: handy for testing, NLTK download; not worth uninstalling each time
# unzip: [adamhooper, 2019-02-21] I'm afraid to uninstall it, in case one
# of our Python deps shells to it
#
# We do want:
# postgresql-client: for pg_isready in bin/wait-for-database (used in production)
# libcap2: used by pyspawner (via ctypes) to drop capabilities
# iproute2: used by setup-sandboxes.sh to find our IP for NAT
# iptables: used by setup-sandboxes.sh to set up NAT and firewall
# libicu63: used by PyICU
RUN true \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
curl \
iproute2 \
iptables \
libcap2 \
libicu63 \
postgresql-client \
unzip \
&& rm -rf /var/lib/apt/lists/*
# Download NLTK stuff
#
# NLTK expects its data to stay zipped
RUN mkdir -p /usr/share/nltk_data \
&& cd /usr/share/nltk_data \
&& mkdir -p sentiment corpora \
&& curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip > corpora/stopwords.zip \
&& curl https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/sentiment/vader_lexicon.zip > sentiment/vader_lexicon.zip
COPY --from=arrow-tools /usr/bin/arrow-validate /usr/bin/arrow-validate
COPY --from=arrow-tools /usr/bin/csv-to-arrow /usr/bin/csv-to-arrow
COPY --from=arrow-tools /usr/bin/json-to-arrow /usr/bin/json-to-arrow
COPY --from=arrow-tools /usr/bin/xls-to-arrow /usr/bin/xls-to-arrow
COPY --from=arrow-tools /usr/bin/xlsx-to-arrow /usr/bin/xlsx-to-arrow
COPY --from=parquet-to-arrow /usr/bin/parquet-diff /usr/bin/parquet-diff
COPY --from=parquet-to-arrow /usr/bin/parquet-to-arrow /usr/bin/parquet-to-arrow
COPY --from=parquet-to-arrow /usr/bin/parquet-to-text-stream /usr/bin/parquet-to-text-stream
RUN mkdir /app
WORKDIR /app
FROM python:3.8.8-slim-buster AS pybase-venv
RUN mkdir -p /opt/venv
WORKDIR /app
# Need build-essential (and everything below it) for:
# * pysycopg2 (psycopg2-binary is evil because it links SSL -- as does Python)
# * PyICU
RUN true \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
build-essential \
libicu-dev \
libpq-dev \
pkg-config
COPY venv/django-requirements-frozen.txt /app/venv/
# Clean up after pip, to save disk space. We nix the pycache from venv/django/,
# which is only invoked once per container.
RUN python -m venv --copies /opt/venv/django \
&& /opt/venv/django/bin/python -m pip install --no-deps --no-cache-dir -r /app/venv/django-requirements-frozen.txt \
&& find /opt/venv/django -name __pycache__ -depth -exec rm -r {} +
COPY venv/cjwkernel-requirements-frozen.txt /app/venv/
RUN python -m venv --copies /opt/venv/cjwkernel \
&& /opt/venv/cjwkernel/bin/python -m pip install --no-deps --no-cache-dir -r /app/venv/cjwkernel-requirements-frozen.txt
FROM pybase0 AS pybase
# Set up chroot-layers ASAP, so they're cached in a rarely-changing
# Docker layer.
#
# (We can't bind-mount to create the chroot layer, because overlayfs will
# only show the mountpoints, not the files mounted within them. So let's
# hard-link every file under the sun.)
#
# cp arguments:
# -d: copy symlinks as-is
# -r: recurse (copying directory tree)
# -l: hard-link instead of copying data (saves space)
ARG CHROOT=/var/lib/cjwkernel/chroot-layers/base
RUN for dir in \
/bin \
/lib \
/lib64 \
/usr/share/nltk_data \
/usr/bin \
/usr/lib \
/usr/local \
/etc/ld.so.cache \
/etc/ssl \
/usr/share/ca-certificates \
; do \
echo "chrooting $dir..."; \
mkdir -p $CHROOT$(dirname $dir); \
cp -drl $dir $CHROOT$dir; \
done
COPY cjwkernel/chroot-fs/etc/* $CHROOT/etc/
# Create empty tempdirs. If callers or modules write files, these directories
# will be mirrored in the upper layer.
RUN for dir in /tmp /var/tmp; do \
mkdir -p $CHROOT$dir; \
chmod 1777 $CHROOT$dir; \
done
# Copy in the venvs
COPY --from=pybase-venv /opt/venv /opt/venv
# Copy the cjwkernel venv, for within the chroot. Again, use hardlinks
RUN mkdir -p $CHROOT/opt/venv && cp -drl /opt/venv/cjwkernel $CHROOT/opt/venv/
RUN mkdir -p $CHROOT/app
COPY cjwkernel/ $CHROOT/app/cjwkernel/
# Let chroots overlay the root FS -- meaning they must be on another FS.
# see cjwkernel/setup-sandboxes.sh
VOLUME /var/lib/cjwkernel/chroot
# 2.1 Pydev: just for the development environment
FROM pybase AS pydev
# Add dev libraries to the Django venv, so we can run unit tests
#
# None of these libraries require build-essential
COPY venv/django-dev-requirements.txt /app/venv/
RUN /opt/venv/django/bin/python -m pip install --no-cache-dir -r /app/venv/django-dev-requirements.txt
COPY bin/unittest-entrypoint.sh /app/bin/unittest-entrypoint.sh
# Let chroots overlay the root FS -- meaning they must be on another FS.
# see cjwkernel/setup-sandboxes.sh
VOLUME /var/lib/cjwkernel/chroot
# 2. Node deps -- completely independent
# 2.1 jsbase: what we use in dev-in-docker
FROM node:14-buster-slim as jsbase
RUN mkdir /app
WORKDIR /app
# 2.2 jsbuild: where we build JavaScript assets
FROM jsbase AS jsbuild
COPY package.json package-lock.json babel.config.json /app/
RUN npm install
COPY webpack.config.js setupJest.js lingui.config.js /app/
COPY __mocks__/ /app/__mocks__/
COPY assets/ /app/assets/
# Inject unit tests into our continuous integration
# This catches mistakes that would otherwise foil us in bin/integration-test;
# and currently we rely on this line in our CI scripts (cloudbuild.yaml).
RUN npm test
RUN npm run lint
RUN node_modules/.bin/webpack --mode=production
# 3. Prod images will all be based on the same stuff:
FROM pybase AS base
# Configure Black
COPY pyproject.toml pyproject.toml
COPY cjwkernel/ /app/cjwkernel/
COPY cjwstate/ /app/cjwstate/
COPY cjworkbench/ /app/cjworkbench/
COPY bin/ /app/bin/
COPY manage.py /app/
# Inject code-style tests into our continuous integration.
# This catches style errors that accidentally got past somebody's
# pre-commit hook.
FROM python:3.9.6-slim-buster AS pylint
RUN python -m pip install --no-cache-dir black==21.6b0
COPY --from=base /app /app
RUN black --check /app
# Like pydev, plus code
FROM base AS unittest
COPY venv/django-dev-requirements.txt /app/venv/
RUN /opt/venv/django/bin/python -m pip install --no-cache-dir -r /app/venv/django-dev-requirements.txt
COPY bin/unittest-entrypoint.sh /app/bin/unittest-entrypoint.sh
RUN mkdir assets
COPY assets/locale/ /app/assets/locale/
COPY daphne/ /app/daphne/
COPY fetcher/ /app/fetcher/
COPY renderer/ /app/renderer/
COPY server/ /app/server/
COPY templates/ /app/templates/
COPY cron/ /app/cron/
COPY tusdhooks/ /app/tusdhooks/
COPY --from=jsbuild /app/assets/bundles/webpack-manifest.json /app/assets/bundles/webpack-manifest.json
# 3.1. assets: uploads assets to S3 (frontend will point end users there)
FROM base AS compile-assets
COPY staticfilesdev/ /app/staticfilesdev/
COPY assets/ /app/assets/
RUN mkdir /app/server
COPY server/lessons/ /app/server/lessons/
COPY server/courses/ /app/server/courses/
COPY --from=jsbuild /app/assets/bundles/ /app/assets/bundles/
RUN DJANGO_SETTINGS_MODULE=staticfilesdev.settings /opt/venv/django/bin/python ./manage.py collectstatic
RUN find /app/static -type f -printf "%s\t%P\n"
FROM amazon/aws-cli:2.2.18 AS upload-assets
COPY --from=compile-assets /app/static/ /app/static/
ENTRYPOINT []
RUN aws configure set default.s3.preferred_transfer_client crt
ENV AWS_DEFAULT_REGION=us-east-1
# We use /bin/sh to substitute environment variables
CMD [ "/bin/sh", "-c", "exec aws s3 cp --recursive --no-progress \"--endpoint-url=${AWS_S3_ENDPOINT:-https://s3.us-east-1.amazonaws.com}\" /app/static/ \"s3://${BUCKET_NAME:?must set BUCKET_NAME environment variable}/\"" ]
# 3.2. migrate: modifies database schema
FROM flyway/flyway:7.7.0-alpine AS migrate
COPY flyway/ /flyway/
CMD [ "migrate" ]
# 3.3. fetcher: runs fetch
FROM base AS fetcher
COPY fetcher/ /app/fetcher/
STOPSIGNAL SIGKILL
CMD [ "bin/fetcher-prod" ]
# 3.4. fetcher: runs fetch
FROM base AS renderer
COPY renderer/ /app/renderer/
# i18n for notifications emails
COPY assets/locale/ /app/assets/locale/
STOPSIGNAL SIGKILL
CMD [ "bin/renderer-prod" ]
# 3.5. cron: schedules fetches and runs cleanup SQL
FROM base AS cron
COPY cron/ /app/cron/
STOPSIGNAL SIGKILL
CMD [ "bin/cron-prod" ]
# 3.6. frontend: serves website
FROM base AS frontend
COPY assets/icons/ /app/assets/icons/
COPY server/ /app/server/
# templates are used in renderer for notifications emails and in frontend for
# views.
COPY templates/ /app/templates/
COPY assets/locale/ /app/assets/locale/
COPY --from=jsbuild /app/assets/bundles/webpack-manifest.json /app/assets/bundles/webpack-manifest.json
# 8080 is Kubernetes' conventional web-server port
EXPOSE 8080
# Beware: uvicorn does not serve static files! Use upload-assets to push them
# to GCS and publish them there.
CMD [ "bin/frontend-prod" ]
# 3.7. tusd-hooks: finishes file uploads
# It requires the whole module-loader system, merely to set a parameter.
# TODO make migrate-params far more lightweight, so cjwkernel won't be needed.
FROM base AS tusd-hooks
COPY tusdhooks/ /app/tusdhooks/
# 8080 is Kubernetes' conventional web-server port
EXPOSE 8080
CMD [ "bin/tusd-hooks-prod" ]