From 932f4dfc2537fbcd79605673972f1b1216989e2c Mon Sep 17 00:00:00 2001
From: sallyom <somalley@redhat.com>
Date: Thu, 11 Apr 2024 14:20:11 -0400
Subject: [PATCH] fix

Signed-off-by: sallyom <somalley@redhat.com>
---
 .github/workflows/model_servers.yaml          | 41 +++++++++++++++----
 .github/workflows/rag.yaml                    | 17 +++++---
 .github/workflows/testing-framework.yaml      | 36 ++++------------
 model_servers/llamacpp_python/Makefile        | 14 +++++--
 .../llamacpp_python/tests/conftest.py         | 25 ++++++++++-
 .../llamacpp_python/tests/test_alive.py       |  4 +-
 6 files changed, 87 insertions(+), 50 deletions(-)

diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
index 83d44a235..b861e70ff 100644
--- a/.github/workflows/model_servers.yaml
+++ b/.github/workflows/model_servers.yaml
@@ -25,17 +25,33 @@ jobs:
       matrix:
         include:
           - image_name: llamacpp_python
+            cuda_image_name: llamacpp_python_cuda
+            base_image_name: llamacpp_python
             model: mistral
             flavor: base
-          - image_name: llamacpp_python_vulkan
-            model: mistral
-            flavor: vulkan
+            directory: llamacpp_python
+            platforms: linux/amd64,linux/arm64
+          #- image_name: llamacpp_python_vulkan
+          #  cuda_image_name: llamacpp_python_cuda
+          #  base_image_name: llamacpp_python
+          #  model: mistral
+          #  flavor: vulkan
+          #  directory: llamacpp_python
+          #  platforms: linux/arm64
           - image_name: llamacpp_python_cuda
+            cuda_image_name: llamacpp_python_cuda
+            base_image_name: llamacpp_python
             model: mistral
             flavor: cuda
+            directory: llamacpp_python
+            platforms: linux/amd64
           - image_name: whispercpp
+            cuda_image_name: llamacpp_python_cuda
+            base_image_name: llamacpp_python
             model: whisper-small
             flavor: base
+            directory: whispercpp
+            platforms: linux/amd64,linux/arm64
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -46,6 +62,11 @@ jobs:
         ports:
           - 5000:5000
     steps:
+      - name: Remove unnecessary files
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
       - uses: actions/checkout@v4.1.1
 
       - name: Install qemu dependency
@@ -58,13 +79,13 @@ jobs:
         uses: redhat-actions/buildah-build@v2.13
         with:
           image: ${{ env.REGISTRY }}/${{ github.repository_owner}}/${{ matrix.image_name }}
-          platforms: linux/amd64, linux/arm64
+          platforms: ${{ matrix.platforms }}
           tags: latest
-          containerfiles: ./model_servers/${{ matrix.image_name }}/${{ matrix.flavor }}/Containerfile
-          context: model_servers/${{ matrix.image_name }}/
+          containerfiles: ./model_servers/${{ matrix.directory }}/${{ matrix.flavor }}/Containerfile
+          context: model_servers/${{ matrix.directory }}/
 
       - name: Download model
-        working-directory: ./model_servers/${{ matrix.image_name }}/
+        working-directory: ./model_servers/${{ matrix.directory }}/
         run: make ${{ matrix.model }}
 
       - name: Set up Python
@@ -73,14 +94,16 @@ jobs:
           python-version: '3.11'
 
       - name: Install python dependencies
-        working-directory: ./model_servers/${{ matrix.image_name }}/
+        working-directory: ./model_servers/${{ matrix.directory }}/
         run: make install
 
       - name: Run tests
-        working-directory: ./model_servers/${{ matrix.image_name }}/
+        working-directory: ./model_servers/${{ matrix.directory }}/
         run: make test
         env:
           IMAGE_NAME: ${{ matrix.image_name }}
+          BASE_IMAGE_NAME: ${{ matrix.base_image_name }}
+          CUDA_IMAGE_NAME: ${{ matrix.cuda_image_name }}
 
       - name: Login to Container Registry
         if: github.event_name == 'push' && github.ref == 'refs/heads/main'
diff --git a/.github/workflows/rag.yaml b/.github/workflows/rag.yaml
index b953a8946..3fbe96ef2 100644
--- a/.github/workflows/rag.yaml
+++ b/.github/workflows/rag.yaml
@@ -4,15 +4,15 @@ on:
   pull_request:
     branches:
       - main
-    # paths:
-    #   - ./recipes/natural_language_processing/rag/**
-    #   - .github/workflows/rag.yaml
+    paths:
+      - ./recipes/natural_language_processing/rag/**
+      - .github/workflows/rag.yaml
   push:
     branches:
       - main
-    # paths:
-    #   - ./recipes/natural_language_processing/rag/**
-    #   - .github/workflows/rag.yaml
+    paths:
+      - ./recipes/natural_language_processing/rag/**
+      - .github/workflows/rag.yaml
 
   workflow_dispatch:
 
@@ -32,6 +32,11 @@ jobs:
         ports:
           - 5000:5000
     steps:
+      - name: Remove unnecessary files
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
       - uses: actions/checkout@v4.1.1
 
       - name: Install qemu dependency
diff --git a/.github/workflows/testing-framework.yaml b/.github/workflows/testing-framework.yaml
index 400d2c9d0..ca9be4179 100644
--- a/.github/workflows/testing-framework.yaml
+++ b/.github/workflows/testing-framework.yaml
@@ -2,7 +2,7 @@ name: Testing Framework
 
 on:
   schedule: # schedule the job to run every hour
-   - cron: '0 */6 * * *'
+   - cron: '0 * * * *'
   
   workflow_dispatch:
 
@@ -42,11 +42,6 @@ jobs:
           - arch: amd64 # gpu enabled
             aws_image_type: g4dn.xlarge
             aws_ami_architecture: x86_64
-          - app_path: natural_language_processing/chatbot
-          - app_path: natural_language_processing/summarizer
-          - app_path: natural_language_processing/codegen
-          - app_path: natural_language_processing/rag
-          - app_path: audio/audio_to_text
     steps:
       - name: Checkout
         uses: actions/checkout@v4.1.1
@@ -89,11 +84,11 @@ jobs:
 
       - name: Ansible Collections
         run: ansible-galaxy install -r ./provision/requirements.yml
-        working-directory: ./main/recipes/${{ matrix.app_path }}
+        working-directory: ./main/recipes/natural_language_processing/chatbot
 
       - name: Provision
         run: |
-          ansible-playbook ./main/recipes/${{ matrix.app_path }}/provision/playbook.yml \
+          ansible-playbook ./main/recipes/natural_language_processing/chatbot/provision/playbook.yml \
             -i terraform-test-environment-module/hosts.ini \
             --private-key=terraform-test-environment-module/${{ steps.terraform-output.outputs.pem_filename }}
         env:
@@ -105,11 +100,11 @@ jobs:
           python-version: '3.11'
 
       - name: Install Dependencies
-        working-directory: ./main/recipes/${{ matrix.app_path }}
+        working-directory: ./main/recipes/natural_language_processing/chatbot
         run: make install
 
       - name: Run Integration Tests
-        working-directory: ./main/recipes/${{ matrix.app_path }}
+        working-directory: ./main/recipes/natural_language_processing/chatbot
         run: make integration-tests
         env:
           URL: ${{ steps.terraform-output.outputs.url }}
@@ -144,14 +139,8 @@ jobs:
       matrix:
         include:
           - image: llamacpp_python
-          - image: llamacpp_python_vulkan
-          - image: llamacpp_python_cuda
           - image: whispercpp
           - image: chatbot
-          - image: summarizer
-          - image: codegen
-          - image: rag
-          - image: transcribe
     steps:
       - name: Login to registry
         uses: redhat-actions/podman-login@v1.7
@@ -178,23 +167,14 @@ jobs:
         env:
           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
 
-  test-make-bootc:
+  test-make-targets:
     if: github.repository == 'containers-mirror/ai-lab-recipes'
     runs-on: ubuntu-22.04-2core
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - app_path: natural_language_processing/chatbot
-          - app_path: natural_language_processing/summarizer
-          - app_path: natural_language_processing/codegen
-          - app_path: natural_language_processing/rag
-          - app_path: audio/audio_to_text
     steps:
       - uses: actions/checkout@v4.1.1
 
-      - name: 
-        working-directory: ./recipes/${{ matrix.app_path }}
+      - name: chatbot
+        working-directory: ./recipes/natural_language_processing/chatbot
         run: make bootc
 
       - name: Publish Job Results to Slack
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
index 62e3bc91d..5fd86cc04 100644
--- a/model_servers/llamacpp_python/Makefile
+++ b/model_servers/llamacpp_python/Makefile
@@ -1,9 +1,10 @@
 APP := llamacpp_python
+IMAGE_BASE := llamacpp-python
 PORT := 8001
 
-IMAGE := quay.io/ai-lab/$(APP):latest
-CUDA_IMAGE := quay.io/ai-lab/$(APP)_cuda:latest
-VULKAN_IMAGE := quay.io/ai-lab/$(APP)_vulkan:latest
+IMAGE := quay.io/ai-lab/$(IMAGE_BASE):latest
+CUDA_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-cuda:latest
+VULKAN_IMAGE := quay.io/ai-lab/$(IMAGE_BASE)-vulkan:latest
 
 # ----- MODEL OPTIONS -----
 
@@ -43,7 +44,7 @@ build-cuda:
 
 .PHONY: build-vulkan
 build-vulkan:
-	podman build --squash-all -t $(VULKAN_IMAGE) . -f cuda/Containerfile
+	podman build --squash-all -t $(VULKAN_IMAGE) . -f vulkan/Containerfile
 
 .PHONY: download-model-tiny-llama
 download-model-tiny-llama:
@@ -67,6 +68,11 @@ run:
 	cd ../../models && \
 	podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host $(IMAGE)
 
+.PHONY: run-cuda
+run-cuda:
+	cd ../../models && \
+	podman run -it -d -p $(PORT):$(PORT) -v ./$(SELECTED_MODEL_NAME):$(MODELS_PATH)/model.gguf:$(BIND_MOUNT_OPTIONS) -e MODEL_PATH=$(MODELS_PATH)/model.gguf -e HOST=0.0.0.0 -e PORT=$(PORT) --net=host --device nvidia.com/gpu=all $(IMAGE)
+
 .PHONY: test
 test:
 	curl -H "Cache-Control: no-cache" -s -S -L -f $(SELECTED_MODEL_URL) -z ./model.gguf -o ./model.gguf.tmp && mv -f ./model.gguf.tmp ./model.gguf 2>/dev/null || rm -f ./model.gguf.tmp ./model.gguf
diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
index 380262b1f..f3c088aa0 100644
--- a/model_servers/llamacpp_python/tests/conftest.py
+++ b/model_servers/llamacpp_python/tests/conftest.py
@@ -1,8 +1,31 @@
 import pytest_container
 import os
 
+CUDA_MS = pytest_container.Container(
+        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['CUDA_IMAGE_NAME']}",
+        volume_mounts=[
+            pytest_container.container.BindMount(
+                container_path="/locallm/models/model.gguf",
+                host_path=f"./model.gguf",
+                flags=["ro"]
+            )
+        ],
+        extra_environment_variables={
+            "MODEL_PATH": "/locallm/models/model.gguf",
+            "HOST": "0.0.0.0",
+            "PORT": "8001"
+        },
+        forwarded_ports=[
+            pytest_container.PortForwarding(
+                container_port=8001,
+                host_port=8001
+            )
+        ],
+        extra_launch_args=["--device", "nvidia.com/gpu=all"],
+    )
+
 MS = pytest_container.Container(
-        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['IMAGE_NAME']}",
+        url=f"containers-storage:{os.environ['REGISTRY']}/containers/{os.environ['BASE_IMAGE_NAME']}",
         volume_mounts=[
             pytest_container.container.BindMount(
                 container_path="/locallm/models/model.gguf",
diff --git a/model_servers/llamacpp_python/tests/test_alive.py b/model_servers/llamacpp_python/tests/test_alive.py
index fcad510a0..1f621887a 100644
--- a/model_servers/llamacpp_python/tests/test_alive.py
+++ b/model_servers/llamacpp_python/tests/test_alive.py
@@ -1,9 +1,9 @@
 import pytest_container
 from .conftest import MS
+from .conftest import CUDA_MS
 import tenacity
 
-CONTAINER_IMAGES = [MS]
-
+CONTAINER_IMAGES = [MS, CUDA_MS]
 
 def test_etc_os_release_present(auto_container: pytest_container.container.ContainerData):
     assert auto_container.connection.file("/etc/os-release").exists