Skip to content

feat(router): add truncation direction parameter #276

feat(router): add truncation direction parameter

feat(router): add truncation direction parameter #276

Workflow file for this run

name: Build and push Cuda Ampere docker image to registry
on:
workflow_dispatch:
push:
branches:
- 'main'
tags:
- 'v*'
pull_request:
paths:
- ".github/workflows/build.yaml"
# - "integration-tests/**"
- "backends/**"
- "core/**"
- "router/**"
- "Cargo.lock"
- "rust-toolchain.toml"
- "Dockerfile"
branches:
- 'main'
jobs:
build-and-push-image:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-80-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci]
permissions:
contents: write
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Tailscale
uses: huggingface/tailscale-action@main
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Initialize Docker Buildx
uses: docker/[email protected]
with:
install: true
config-inline: |
[registry."docker.io"]
mirrors = ["registry.github-runners.huggingface.tech"]
- name: Configure sccache
uses: actions/github-script@v6
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
- name: Inject slug/short variables
uses: rlespinasse/[email protected]
- name: Login to GitHub Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to internal Container Registry
uses: docker/[email protected]
with:
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
registry: registry.internal.huggingface.tech
- name: Extract metadata (tags, labels) for Docker
id: meta-80
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
flavor: |
latest=false
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
- name: Build and push Docker image
id: build-and-push-80
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile-cuda
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
SCCACHE_GHA_ENABLED=on
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
CUDA_COMPUTE_CAP=80
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-80.outputs.tags }}
labels: ${{ steps.meta-80.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-80,mode=max
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-80,mode=max
- name: Extract metadata (tags, labels) for Docker
id: meta-80-grpc
uses: docker/[email protected]
with:
images: |
registry.internal.huggingface.tech/api-inference/text-embeddings-inference
ghcr.io/huggingface/text-embeddings-inference
flavor: |
latest=false
tags: |
type=semver,pattern={{version}}-grpc
type=semver,pattern={{major}}.{{minor}}-grpc
type=raw,value=latest-grpc
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-grpc
- name: Build and push Docker image
id: build-and-push-80-grpc
uses: docker/build-push-action@v4
with:
context: .
target: grpc
file: Dockerfile-cuda
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
SCCACHE_GHA_ENABLED=on
ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
CUDA_COMPUTE_CAP=80
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
tags: ${{ steps.meta-80-grpc.outputs.tags }}
labels: ${{ steps.meta-80-grpc.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/text-embeddings-inference:cache-80,mode=max