From 932f4dfc2537fbcd79605673972f1b1216989e2c Mon Sep 17 00:00:00 2001 From: sallyom Date: Thu, 11 Apr 2024 14:20:11 -0400 Subject: [PATCH] fix Signed-off-by: sallyom --- .github/workflows/model_servers.yaml | 41 +++++++++++++++---- .github/workflows/rag.yaml | 17 +++++--- .github/workflows/testing-framework.yaml | 36 ++++------------ model_servers/llamacpp_python/Makefile | 14 +++++-- .../llamacpp_python/tests/conftest.py | 25 ++++++++++- .../llamacpp_python/tests/test_alive.py | 4 +- 6 files changed, 87 insertions(+), 50 deletions(-) diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml index 83d44a235..b861e70ff 100644 --- a/.github/workflows/model_servers.yaml +++ b/.github/workflows/model_servers.yaml @@ -25,17 +25,33 @@ jobs: matrix: include: - image_name: llamacpp_python + cuda_image_name: llamacpp_python_cuda + base_image_name: llamacpp_python model: mistral flavor: base - - image_name: llamacpp_python_vulkan - model: mistral - flavor: vulkan + directory: llamacpp_python + platforms: linux/amd64,linux/arm64 + #- image_name: llamacpp_python_vulkan + # cuda_image_name: llamacpp_python_cuda + # base_image_name: llamacpp_python + # model: mistral + # flavor: vulkan + # directory: llamacpp_python + # platforms: linux/arm64 - image_name: llamacpp_python_cuda + cuda_image_name: llamacpp_python_cuda + base_image_name: llamacpp_python model: mistral flavor: cuda + directory: llamacpp_python + platforms: linux/amd64 - image_name: whispercpp + cuda_image_name: llamacpp_python_cuda + base_image_name: llamacpp_python model: whisper-small flavor: base + directory: whispercpp + platforms: linux/amd64,linux/arm64 runs-on: ubuntu-latest permissions: contents: read @@ -46,6 +62,11 @@ jobs: ports: - 5000:5000 steps: + - name: Remove unnecessary files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - uses: actions/checkout@v4.1.1 - name: Install qemu dependency @@ -58,13 +79,13 @@ jobs: uses: redhat-actions/buildah-build@v2.13 with: image: ${{ env.REGISTRY }}/${{ github.repository_owner}}/${{ matrix.image_name }} - platforms: linux/amd64, linux/arm64 + platforms: ${{ matrix.platforms }} tags: latest - containerfiles: ./model_servers/${{ matrix.image_name }}/${{ matrix.flavor }}/Containerfile - context: model_servers/${{ matrix.image_name }}/ + containerfiles: ./model_servers/${{ matrix.directory }}/${{ matrix.flavor }}/Containerfile + context: model_servers/${{ matrix.directory }}/ - name: Download model - working-directory: ./model_servers/${{ matrix.image_name }}/ + working-directory: ./model_servers/${{ matrix.directory }}/ run: make ${{ matrix.model }} - name: Set up Python @@ -73,14 +94,16 @@ jobs: python-version: '3.11' - name: Install python dependencies - working-directory: ./model_servers/${{ matrix.image_name }}/ + working-directory: ./model_servers/${{ matrix.directory }}/ run: make install - name: Run tests - working-directory: ./model_servers/${{ matrix.image_name }}/ + working-directory: ./model_servers/${{ matrix.directory }}/ run: make test env: IMAGE_NAME: ${{ matrix.image_name }} + BASE_IMAGE_NAME: ${{ matrix.base_image_name }} + CUDA_IMAGE_NAME: ${{ matrix.cuda_image_name }} - name: Login to Container Registry if: github.event_name == 'push' && github.ref == 'refs/heads/main' diff --git a/.github/workflows/rag.yaml b/.github/workflows/rag.yaml index b953a8946..3fbe96ef2 100644 --- a/.github/workflows/rag.yaml +++ b/.github/workflows/rag.yaml @@ -4,15 +4,15 @@ on: pull_request: branches: - main - # paths: - # - ./recipes/natural_language_processing/rag/** - # - .github/workflows/rag.yaml + paths: + - ./recipes/natural_language_processing/rag/** + - .github/workflows/rag.yaml push: branches: - main - # paths: - # - ./recipes/natural_language_processing/rag/** - # - .github/workflows/rag.yaml + paths: + - ./recipes/natural_language_processing/rag/** + - .github/workflows/rag.yaml workflow_dispatch: @@ -32,6 +32,11 @@ jobs: ports: - 5000:5000 steps: + - name: Remove unnecessary files + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - uses: actions/checkout@v4.1.1 - name: Install qemu dependency diff --git a/.github/workflows/testing-framework.yaml b/.github/workflows/testing-framework.yaml index 400d2c9d0..ca9be4179 100644 --- a/.github/workflows/testing-framework.yaml +++ b/.github/workflows/testing-framework.yaml @@ -2,7 +2,7 @@ name: Testing Framework on: schedule: # schedule the job to run every hour - - cron: '0 */6 * * *' + - cron: '0 * * * *' workflow_dispatch: @@ -42,11 +42,6 @@ jobs: - arch: amd64 # gpu enabled aws_image_type: g4dn.xlarge aws_ami_architecture: x86_64 - - app_path: natural_language_processing/chatbot - - app_path: natural_language_processing/summarizer - - app_path: natural_language_processing/codegen - - app_path: natural_language_processing/rag - - app_path: audio/audio_to_text steps: - name: Checkout uses: actions/checkout@v4.1.1 @@ -89,11 +84,11 @@ jobs: - name: Ansible Collections run: ansible-galaxy install -r ./provision/requirements.yml - working-directory: ./main/recipes/${{ matrix.app_path }} + working-directory: ./main/recipes/natural_language_processing/chatbot - name: Provision run: | - ansible-playbook ./main/recipes/${{ matrix.app_path }}/provision/playbook.yml \ + ansible-playbook ./main/recipes/natural_language_processing/chatbot/provision/playbook.yml \ -i terraform-test-environment-module/hosts.ini \ --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }} env: @@ -105,11 +100,11 @@ jobs: python-version: '3.11' - name: Install Dependencies - working-directory: ./main/recipes/${{ matrix.app_path }} + working-directory: ./main/recipes/natural_language_processing/chatbot run: make install - name: Run Integration Tests - working-directory: ./main/recipes/${{ matrix.app_path }} + working-directory: ./main/recipes/natural_language_processing/chatbot run: make integration-tests env: URL: ${{ steps.terraform-output.outputs.url }} @@ -144,14 +139,8 @@ jobs: matrix: include: - image: llamacpp_python - - image: llamacpp_python_vulkan - - image: llamacpp_python_cuda - image: whispercpp - image: chatbot - - image: summarizer - - image: codegen - - image: rag - - image: transcribe steps: - name: Login to registry uses: redhat-actions/podman-login@v1.7 @@ -178,23 +167,14 @@ jobs: env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - test-make-bootc: + test-make-targets: if: github.repository == 'containers-mirror/ai-lab-recipes' runs-on: ubuntu-22.04-2core - strategy: - fail-fast: false - matrix: - include: - - app_path: natural_language_processing/chatbot - - app_path: natural_language_processing/summarizer - - app_path: natural_language_processing/codegen - - app_path: natural_language_processing/rag - - app_path: audio/audio_to_text steps: - uses: actions/checkout@v4.1.1 - - name: - working-directory: ./recipes/${{ matrix.app_path }} + - name: chatbot + working-directory: ./recipes/natural_language_processing/chatbot run: make bootc - name: Publish Job Results to Slack diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile index 62e3bc91d..5fd86cc04 100644 --- a/model_servers/llamacpp_python/Makefile +++ b/model_servers/llamacpp_python/Makefile @@ -1,9 +1,10 @@ APP := llamacpp_python +IMAGE_BASE := llamacpp-python PORT := 8001 -IMAGE := quay.io/ai-lab/$(APP):latest -CUDA_IMAGE := quay.io/ai-lab/$(APP)_cuda:latest -VULKAN_IMAGE := quay.io/ai-lab/$(APP)_vulkan:latest +IMAGE := quay.io/ai-lab/$(IMAGE_BASE):latest +CUDA_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-cuda:latest +VULKAN_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-vulkan:latest # ----- MODEL OPTIONS ----- @@ -43,7 +44,7 @@ build-cuda: .PHONY: build-vulkan build-vulkan: - podman build --squash-all -t $(VULKAN_IMAGE) . -f cuda/Containerfile + podman build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/Containerfile .PHONY: download-model-tiny-llama download-model-tiny-llama: @@ -67,6 +68,11 @@ run: cd ../../models && \ podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE) +.PHONY: run-cuda +run-cuda: + cd ../../models && \ + podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE) + .PHONY: test test: curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py index 380262b1f..f3c088aa0 100644 --- a/model_servers/llamacpp_python/tests/conftest.py +++ b/model_servers/llamacpp_python/tests/conftest.py @@ -1,8 +1,31 @@ import pytest_container import os +CUDA_MS = pytest_container.Container( + url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['CUDA_IMAGE_NAME']}", + volume_mounts=[ + pytest_container.container.BindMount( + container_path="/locallm/models/model.gguf", + host_path=f"./model.gguf", + flags=["ro"] + ) + ], + extra_environment_variables={ + "MODEL_PATH": "/locallm/models/model.gguf", + "HOST": "0.0.0.0", + "PORT": "8001" + }, + forwarded_ports=[ + pytest_container.PortForwarding( + container_port=8001, + host_port=8001 + ) + ], + extra_launch_args=["--device", "nvidia.com/gpu=all"], + ) + MS = pytest_container.Container( - url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}", + url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['BASE_IMAGE_NAME']}", volume_mounts=[ pytest_container.container.BindMount( container_path="/locallm/models/model.gguf", diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py index fcad510a0..1f621887a 100644 --- a/model_servers/llamacpp_python/tests/test_alive.py +++ b/model_servers/llamacpp_python/tests/test_alive.py @@ -1,9 +1,9 @@ import pytest_container from .conftest import MS +from .conftest import CUDA_MS import tenacity -CONTAINER_IMAGES = [MS] - +CONTAINER_IMAGES = [MS, CUDA_MS] def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData): assert auto_container.connection.file("/etc/os-release").exists