Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
Signed-off-by: sallyom <[email protected]>
  • Loading branch information
sallyom committed Apr 12, 2024
1 parent bd7b4d5 commit 932f4df
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 50 deletions.
41 changes: 32 additions & 9 deletions .github/workflows/model_servers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,33 @@ jobs:
matrix:
include:
- image_name: llamacpp_python
cuda_image_name: llamacpp_python_cuda
base_image_name: llamacpp_python
model: mistral
flavor: base
- image_name: llamacpp_python_vulkan
model: mistral
flavor: vulkan
directory: llamacpp_python
platforms: linux/amd64,linux/arm64
#- image_name: llamacpp_python_vulkan
# cuda_image_name: llamacpp_python_cuda
# base_image_name: llamacpp_python
# model: mistral
# flavor: vulkan
# directory: llamacpp_python
# platforms: linux/arm64
- image_name: llamacpp_python_cuda
cuda_image_name: llamacpp_python_cuda
base_image_name: llamacpp_python
model: mistral
flavor: cuda
directory: llamacpp_python
platforms: linux/amd64
- image_name: whispercpp
cuda_image_name: llamacpp_python_cuda
base_image_name: llamacpp_python
model: whisper-small
flavor: base
directory: whispercpp
platforms: linux/amd64,linux/arm64
runs-on: ubuntu-latest
permissions:
contents: read
Expand All @@ -46,6 +62,11 @@ jobs:
ports:
- 5000:5000
steps:
- name: Remove unnecessary files
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/[email protected]

- name: Install qemu dependency
Expand All @@ -58,13 +79,13 @@ jobs:
uses: redhat-actions/[email protected]
with:
image: ${{ env.REGISTRY }}/${{ github.repository_owner}}/${{ matrix.image_name }}
platforms: linux/amd64, linux/arm64
platforms: ${{ matrix.platforms }}
tags: latest
containerfiles: ./model_servers/${{ matrix.image_name }}/${{ matrix.flavor }}/Containerfile
context: model_servers/${{ matrix.image_name }}/
containerfiles: ./model_servers/${{ matrix.directory }}/${{ matrix.flavor }}/Containerfile
context: model_servers/${{ matrix.directory }}/

- name: Download model
working-directory: ./model_servers/${{ matrix.image_name }}/
working-directory: ./model_servers/${{ matrix.directory }}/
run: make ${{ matrix.model }}

- name: Set up Python
Expand All @@ -73,14 +94,16 @@ jobs:
python-version: '3.11'

- name: Install python dependencies
working-directory: ./model_servers/${{ matrix.image_name }}/
working-directory: ./model_servers/${{ matrix.directory }}/
run: make install

- name: Run tests
working-directory: ./model_servers/${{ matrix.image_name }}/
working-directory: ./model_servers/${{ matrix.directory }}/
run: make test
env:
IMAGE_NAME: ${{ matrix.image_name }}
BASE_IMAGE_NAME: ${{ matrix.base_image_name }}
CUDA_IMAGE_NAME: ${{ matrix.cuda_image_name }}

- name: Login to Container Registry
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
Expand Down
17 changes: 11 additions & 6 deletions .github/workflows/rag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ on:
pull_request:
branches:
- main
# paths:
# - ./recipes/natural_language_processing/rag/**
# - .github/workflows/rag.yaml
paths:
- ./recipes/natural_language_processing/rag/**
- .github/workflows/rag.yaml
push:
branches:
- main
# paths:
# - ./recipes/natural_language_processing/rag/**
# - .github/workflows/rag.yaml
paths:
- ./recipes/natural_language_processing/rag/**
- .github/workflows/rag.yaml

workflow_dispatch:

Expand All @@ -32,6 +32,11 @@ jobs:
ports:
- 5000:5000
steps:
- name: Remove unnecessary files
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/[email protected]

- name: Install qemu dependency
Expand Down
36 changes: 8 additions & 28 deletions .github/workflows/testing-framework.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Testing Framework

on:
schedule: # schedule the job to run every hour
- cron: '0 */6 * * *'
- cron: '0 * * * *'

workflow_dispatch:

Expand Down Expand Up @@ -42,11 +42,6 @@ jobs:
- arch: amd64 # gpu enabled
aws_image_type: g4dn.xlarge
aws_ami_architecture: x86_64
- app_path: natural_language_processing/chatbot
- app_path: natural_language_processing/summarizer
- app_path: natural_language_processing/codegen
- app_path: natural_language_processing/rag
- app_path: audio/audio_to_text
steps:
- name: Checkout
uses: actions/[email protected]
Expand Down Expand Up @@ -89,11 +84,11 @@ jobs:

- name: Ansible Collections
run: ansible-galaxy install -r ./provision/requirements.yml
working-directory: ./main/recipes/${{ matrix.app_path }}
working-directory: ./main/recipes/natural_language_processing/chatbot

- name: Provision
run: |
ansible-playbook ./main/recipes/${{ matrix.app_path }}/provision/playbook.yml \
ansible-playbook ./main/recipes/natural_language_processing/chatbot/provision/playbook.yml \
-i terraform-test-environment-module/hosts.ini \
--private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }}
env:
Expand All @@ -105,11 +100,11 @@ jobs:
python-version: '3.11'

- name: Install Dependencies
working-directory: ./main/recipes/${{ matrix.app_path }}
working-directory: ./main/recipes/natural_language_processing/chatbot
run: make install

- name: Run Integration Tests
working-directory: ./main/recipes/${{ matrix.app_path }}
working-directory: ./main/recipes/natural_language_processing/chatbot
run: make integration-tests
env:
URL: ${{ steps.terraform-output.outputs.url }}
Expand Down Expand Up @@ -144,14 +139,8 @@ jobs:
matrix:
include:
- image: llamacpp_python
- image: llamacpp_python_vulkan
- image: llamacpp_python_cuda
- image: whispercpp
- image: chatbot
- image: summarizer
- image: codegen
- image: rag
- image: transcribe
steps:
- name: Login to registry
uses: redhat-actions/[email protected]
Expand All @@ -178,23 +167,14 @@ jobs:
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}

test-make-bootc:
test-make-targets:
if: github.repository == 'containers-mirror/ai-lab-recipes'
runs-on: ubuntu-22.04-2core
strategy:
fail-fast: false
matrix:
include:
- app_path: natural_language_processing/chatbot
- app_path: natural_language_processing/summarizer
- app_path: natural_language_processing/codegen
- app_path: natural_language_processing/rag
- app_path: audio/audio_to_text
steps:
- uses: actions/[email protected]

- name:
working-directory: ./recipes/${{ matrix.app_path }}
- name: chatbot
working-directory: ./recipes/natural_language_processing/chatbot
run: make bootc

- name: Publish Job Results to Slack
Expand Down
14 changes: 10 additions & 4 deletions model_servers/llamacpp_python/Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
APP := llamacpp_python
IMAGE_BASE := llamacpp-python
PORT := 8001

IMAGE := quay.io/ai-lab/$(APP):latest
CUDA_IMAGE := quay.io/ai-lab/$(APP)_cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/$(APP)_vulkan:latest
IMAGE := quay.io/ai-lab/$(IMAGE_BASE):latest
CUDA_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-cuda:latest
VULKAN_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-vulkan:latest

# ----- MODEL OPTIONS -----

Expand Down Expand Up @@ -43,7 +44,7 @@ build-cuda:

.PHONY: build-vulkan
build-vulkan:
podman build --squash-all -t $(VULKAN_IMAGE) . -f cuda/Containerfile
podman build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/Containerfile

.PHONY: download-model-tiny-llama
download-model-tiny-llama:
Expand All @@ -67,6 +68,11 @@ run:
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)

.PHONY: run-cuda
run-cuda:
cd ../../models && \
podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE)

.PHONY: test
test:
curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
Expand Down
25 changes: 24 additions & 1 deletion model_servers/llamacpp_python/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
import pytest_container
import os

CUDA_MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['CUDA_IMAGE_NAME']}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="/locallm/models/model.gguf",
host_path=f"./model.gguf",
flags=["ro"]
)
],
extra_environment_variables={
"MODEL_PATH": "/locallm/models/model.gguf",
"HOST": "0.0.0.0",
"PORT": "8001"
},
forwarded_ports=[
pytest_container.PortForwarding(
container_port=8001,
host_port=8001
)
],
extra_launch_args=["--device", "nvidia.com/gpu=all"],
)

MS = pytest_container.Container(
url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['BASE_IMAGE_NAME']}",
volume_mounts=[
pytest_container.container.BindMount(
container_path="/locallm/models/model.gguf",
Expand Down
4 changes: 2 additions & 2 deletions model_servers/llamacpp_python/tests/test_alive.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import pytest_container
from .conftest import MS
from .conftest import CUDA_MS
import tenacity

CONTAINER_IMAGES = [MS]

CONTAINER_IMAGES = [MS, CUDA_MS]

def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
assert auto_container.connection.file("/etc/os-release").exists
Expand Down

0 comments on commit 932f4df

Please sign in to comment.