Skip to content

ci: add caching

ci: add caching #7

Workflow file for this run

name: Build and push docker image to registry
on:
workflow_dispatch:
push:
branches:
- 'main'
tags:
- 'v*'
pull_request:
paths:
- ".github/workflows/build.yaml"
# - "integration-tests/**"
- "backends/**"
- "core/**"
- "router/**"
- "Cargo.lock"
- "rust-toolchain.toml"
- "Dockerfile"
branches:
- 'main'
jobs:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
env:
AWS_REGION: us-east-1
EC2_AMI_ID: ami-03cfed9ea28f4b002
EC2_INSTANCE_TYPE: g5.4xlarge
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
EC2_SECURITY_GROUP: sg-030175c435ac141d6
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: philschmid/philschmid-ec2-github-runner@main
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ env.EC2_AMI_ID }}
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
subnet-id: ${{ env.EC2_SUBNET_ID }}
security-group-id: ${{ env.EC2_SECURITY_GROUP }}
aws-resource-tags: > # optional, requires additional permissions
[
{"Key": "Name", "Value": "ec2-tgi-github-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
]
build-and-push-image:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
needs: start-runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
permissions:
contents: write
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Initialize Docker Buildx
uses: docker/[email protected]
with:
install: true
- name: Inject slug/short variables
uses: rlespinasse/[email protected]
- name: Tailscale
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Login to GitHub Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to internal Container Registry
uses: docker/[email protected]
with:
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
registry: registry.internal.huggingface.tech
- name: Extract metadata (tags, labels) for Docker
id: meta-75
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
tags: |
type=semver,pattern=turing-{{version}}
type=semver,pattern=turing-{{major}}.{{minor}}
type=raw,value=turing-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=turing-sha-${{ env.GITHUB_SHA_SHORT }}
- name: Extract metadata (tags, labels) for Docker
id: meta-80
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
- name: Extract metadata (tags, labels) for Docker
id: meta-86
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
tags: |
type=semver,pattern=86-{{version}}
type=semver,pattern=86-{{major}}.{{minor}}
type=raw,value=86-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=86-sha-${{ env.GITHUB_SHA_SHORT }}
- name: Extract metadata (tags, labels) for Docker
id: meta-90
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
tags: |
type=semver,pattern=hopper-{{version}}
type=semver,pattern=hopper-{{major}}.{{minor}}
type=raw,value=hopper-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=hopper-sha-${{ env.GITHUB_SHA_SHORT }}
- name: Extract metadata (tags, labels) for Docker
id: meta-cpu
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
tags: |
type=semver,pattern=cpu-{{version}}
type=semver,pattern=cpu-{{major}}.{{minor}}
type=raw,value=cpu-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=cpu-sha-${{ env.GITHUB_SHA_SHORT }}
- name: Build and push Docker image
id: build-and-push-75
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile-cuda
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
CUDA_COMPUTE_CAP=75
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-75.outputs.tags }}
labels: ${{ steps.meta-75.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-75,mode=min
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-75,mode=min
- name: Build and push Docker image
id: build-and-push-80
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile-cuda
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
CUDA_COMPUTE_CAP=80
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-80.outputs.tags }}
labels: ${{ steps.meta-80.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-80,mode=min
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-80,mode=min
- name: Build and push Docker image
id: build-and-push-86
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile-cuda
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
CUDA_COMPUTE_CAP=86
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-86.outputs.tags }}
labels: ${{ steps.meta-86.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-86,mode=min
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-86,mode=min
- name: Build and push Docker image
id: build-and-push-90
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile-cuda
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
CUDA_COMPUTE_CAP=90
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-90.outputs.tags }}
labels: ${{ steps.meta-90.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-90,mode=min
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-90,mode=min
- name: Build and push Docker image
id: build-and-push-cpu
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-cpu.outputs.tags }}
labels: ${{ steps.meta-cpu.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-cpu,mode=min
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-cpu,mode=min
stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner
- build-and-push-image
runs-on: ubuntu-latest
env:
AWS_REGION: us-east-1
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Stop EC2 runner
uses: philschmid/philschmid-ec2-github-runner@main
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}