From 6fed29963d7238eefba2686ca4eff1dacaaa355e Mon Sep 17 00:00:00 2001 From: sallyom Date: Mon, 19 Feb 2024 13:15:30 -0500 Subject: [PATCH 1/2] update model images to quay.io/redhat-et/models:* Signed-off-by: sallyom --- chatbot-langchain/quadlet/chatbot.image | 2 +- chatbot-langchain/quadlet/chatbot.yaml | 2 +- code-generation/quadlet/codegen.image | 2 +- code-generation/quadlet/codegen.yaml | 6 +++--- models/Containerfile | 1 + summarizer-langchain/quadlet/summarizer.image | 2 +- summarizer-langchain/quadlet/summarizer.yaml | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) diff --git a/chatbot-langchain/quadlet/chatbot.image b/chatbot-langchain/quadlet/chatbot.image index d4967ceb..4ca5eaa3 100644 --- a/chatbot-langchain/quadlet/chatbot.image +++ b/chatbot-langchain/quadlet/chatbot.image @@ -2,6 +2,6 @@ WantedBy=chatbot.service [Image] -Image=quay.io/sallyom/models:mistral-7b-gguf +Image=quay.io/redhat-et/locallm-models:mistral-7b-instruct-v0.1.Q4_K_S.gguf Image=quay.io/redhat-et/locallm-model-service:latest Image=quay.io/redhat-et/locallm-chatbot:latest diff --git a/chatbot-langchain/quadlet/chatbot.yaml b/chatbot-langchain/quadlet/chatbot.yaml index 1a06e1c5..9ec6dbb8 100644 --- a/chatbot-langchain/quadlet/chatbot.yaml +++ b/chatbot-langchain/quadlet/chatbot.yaml @@ -7,7 +7,7 @@ metadata: spec: initContainers: - name: model-file - image: quay.io/sallyom/models:mistral-7b-gguf + image: quay.io/redhat-et/locallm-models:mistral-7b-instruct-v0.1.Q4_K_S.gguf command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_S.gguf", "/shared/"] volumeMounts: - name: model-file diff --git a/code-generation/quadlet/codegen.image b/code-generation/quadlet/codegen.image index 9fc113ae..1917c1d2 100644 --- a/code-generation/quadlet/codegen.image +++ b/code-generation/quadlet/codegen.image @@ -2,6 +2,6 @@ WantedBy=codegen.service [Image] -Image=quay.io/sallyom/models:mistral-7b-gguf +Image=quay.io/redhat-et/locallm-models:codellama-7b-instruct.Q4_K_M.gguf Image=quay.io/redhat-et/locallm-model-service:latest Image=quay.io/redhat-et/locallm-codegen:latest diff --git a/code-generation/quadlet/codegen.yaml b/code-generation/quadlet/codegen.yaml index 873c5442..214a32ad 100644 --- a/code-generation/quadlet/codegen.yaml +++ b/code-generation/quadlet/codegen.yaml @@ -7,8 +7,8 @@ metadata: spec: initContainers: - name: model-file - image: quay.io/sallyom/models:mistral-7b-gguf - command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_S.gguf", "/shared/"] + image: quay.io/redhat-et/locallm-models:codellama-7b-instruct.Q4_K_M.gguf + command: ['/usr/bin/install', "/model/codellama-7b-instruct.Q4_K_M.gguf", "/shared/"] volumeMounts: - name: model-file mountPath: /shared @@ -29,7 +29,7 @@ spec: - name: PORT value: 8001 - name: MODEL_PATH - value: /model/mistral-7b-instruct-v0.1.Q4_K_S.gguf + value: /model/codellama-7b-instruct.Q4_K_M.gguf image: quay.io/redhat-et/locallm-model-service:latest name: codegen-model-service ports: diff --git a/models/Containerfile b/models/Containerfile index 0c1254de..981c85f4 100644 --- a/models/Containerfile +++ b/models/Containerfile @@ -1,5 +1,6 @@ #https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf #https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf +#https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf # podman build --build-arg MODEL_URL=https://... -t quay.io/yourimage . FROM registry.access.redhat.com/ubi9/ubi-micro:9.3-13 ARG MODEL_URL diff --git a/summarizer-langchain/quadlet/summarizer.image b/summarizer-langchain/quadlet/summarizer.image index 5b0ff5e5..862cb152 100644 --- a/summarizer-langchain/quadlet/summarizer.image +++ b/summarizer-langchain/quadlet/summarizer.image @@ -2,6 +2,6 @@ WantedBy=summarizer.service [Image] -Image=quay.io/sallyom/models:mistral-7b-gguf +Image=quay.io/redhat-et/locallm-models:mistral-7b-instruct-v0.1.Q4_K_S.gguf Image=quay.io/redhat-et/locallm-model-service:latest Image=quay.io/redhat-et/locallm-text-summarizer:latest diff --git a/summarizer-langchain/quadlet/summarizer.yaml b/summarizer-langchain/quadlet/summarizer.yaml index 9c5e16d1..96d35b33 100644 --- a/summarizer-langchain/quadlet/summarizer.yaml +++ b/summarizer-langchain/quadlet/summarizer.yaml @@ -7,7 +7,7 @@ metadata: spec: initContainers: - name: model-file - image: quay.io/sallyom/models:mistral-7b-gguf + image: quay.io/redhat-et/locallm-models:mistral-7b-instruct-v0.1.Q4_K_S.gguf command: ['/usr/bin/install', "/model/mistral-7b-instruct-v0.1.Q4_K_S.gguf", "/shared/"] volumeMounts: - name: model-file From 3b645c7b0307a6d04d868429366d67997855cf15 Mon Sep 17 00:00:00 2001 From: sallyom Date: Mon, 19 Feb 2024 14:04:00 -0500 Subject: [PATCH 2/2] update README and add images list Signed-off-by: sallyom --- README.md | 35 +++++++++++++++++++++++++++-------- locallm-images.md | 17 +++++++++++++++++ 2 files changed, 44 insertions(+), 8 deletions(-) create mode 100644 locallm-images.md diff --git a/README.md b/README.md index 476be3ec..e063c17e 100644 --- a/README.md +++ b/README.md @@ -7,18 +7,34 @@ to production quicker. ## Current Locallm Services: +* [Model Service](#model-service) * [Chatbot](#chatbot) * [Text Summarization](#text-summarization) +* [Code Generation](#code-generation) +* [RAG](#rag-application) (Retrieval Augmented Generation) * [Fine-tuning](#fine-tuning) +### Model service + +A model service that can be used for various applications with various models is included in this repository. +Learn how to build and run the model service here: [Playground model service](/playground/). + ### Chatbot -A simple chatbot using the gradio UI. Learn how to build and run this model service here: [Chatbot](/chatbot/). +A simple chatbot using the [Streamlit UI](https://docs.streamlit.io/). Learn how to build and run this application here: [Chatbot](/chatbot-langchain/). ### Text Summarization -An LLM app that can summarize arbitrarily long text inputs. Learn how to build and run this model service here: -[Text Summarization](/summarizer/). +An LLM app that can summarize arbitrarily long text inputs with the [streamlit UI](https://docs.streamlit.io/). Learn how to build and run thisapplication here: +[Text Summarization](/summarizer-langchain/). + +### Code generation + +A simple chatbot using the [Streamlit UI](https://docs.streamlit.io/). Learn how to build and run this application here: [Code Generation](/code-generation/). + +### RAG + +A chatbot using the [Streamlit UI](https://docs.streamlit.io/) and Retrieval Augmented Generation. Learn how to build and run this application here: [RAG](/rag-langchain/). ### Fine Tuning @@ -26,14 +42,17 @@ This application allows a user to select a model and a data set they'd like to f Once the application finishes, it outputs a new fine-tuned model for the user to apply to other LLM services. Learn how to build and run this model training job here: [Fine-tuning](/finetune/). +## Current Locallm Images built from this repository + +Images for all sample applications and models are tracked in [locallm-images.md](./locallm-images.md) + ## Architecture ![](/assets/arch.jpg) -The diagram above indicates the general architecture for each of the individual model services contained in this repo. -The core code available here is the "LLM Task Service" and the "API Server", bundled together under `builds/model_service`. -With an appropriately chosen model,`model_service/builds` contains the Containerfiles required to build a model-service. +The diagram above indicates the general architecture for each of the individual applications contained in this repo. +The core code available here is the "LLM Task Service" and the "API Server", bundled together under `./playground`. +With an appropriately chosen model, [./playground/Containerfile] can build an image to run the model-service. Model services are intended to be light-weight and run with smaller hardware footprints (hence the `locallm` name), but they can be run on any hardware that supports containers and can be scaled up if needed. -Within the chatbot and summarizer folder, there is an `ai_applications` folder for each model service. -These examples show how a developer might interact with the model service based on their requirements. +Within each sample application folders, there is an inference implementation in the `./builds` folder with a Containerfile for building the image. These examples show how a developer might interact with the model service based on their requirements. diff --git a/locallm-images.md b/locallm-images.md new file mode 100644 index 00000000..f76bc1c6 --- /dev/null +++ b/locallm-images.md @@ -0,0 +1,17 @@ +## Images currently built from GH Actions in this repository + +- quay.io/redhat-et/locallm-model-service:latest +- quay.io/redhat-et/locallm-text-summarizer:latest +- quay.io/redhat-et/locallm-chatbot:latest +- quay.io/redhat-et/locallm-rag:latest +- quay.io/redhat-et/locallm-codegen:latest +- quay.io/redhat-et/locallm-chromadb:latest + +## Model Images currently in `quay.io/redhat-et/locallm-models` + +- quay.io/redhat-et/locallm-models:llama-2-7b-chat.Q5_K_S.gguf + - [model download link](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf) +- quay.io/redhat-et/locallm-models:mistral-7b-instruct-v0.1.Q4_K_S.gguf + - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_S.gguf) +- quay.io/redhat-et/locallm-models:codellama-7b-instruct.Q4_K_M.gguf + - [model download link](https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf)