From 92198f78d58ab15e5b998cf8b700cfe58a750f14 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 02:52:51 +0530
Subject: [PATCH 01/34] Split templates

---
 .../templates/ca-certs-configmap.yaml         |  12 +
 helm/h2ogpt-chart/templates/config-map.yaml   |  69 --
 helm/h2ogpt-chart/templates/deployment.yaml   | 884 ------------------
 .../templates/h2ogpt-configmap.yaml           |  13 +
 .../templates/h2ogpt-deployment.yaml          | 373 ++++++++
 .../templates/h2ogpt-service.yaml             |  49 +
 .../templates/lmdeploy-configmap.yaml         |  13 +
 .../templates/lmdeploy-deployment.yaml        | 163 ++++
 .../templates/lmdeploy-service.yaml           |  15 +
 helm/h2ogpt-chart/templates/service.yaml      |  97 --
 .../templates/tgi-configmap.yamal             |  13 +
 .../templates/tgi-deployment.yaml             | 175 ++++
 helm/h2ogpt-chart/templates/tgi-service.yaml  |  15 +
 .../templates/vllm-configmap.yaml             |  13 +
 .../templates/vllm-deployment.yaml            | 167 ++++
 helm/h2ogpt-chart/templates/vllm-service.yaml |  15 +
 16 files changed, 1036 insertions(+), 1050 deletions(-)
 create mode 100644 helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/config-map.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
 create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-service.yaml
 create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
 create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-service.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/service.yaml
 create mode 100644 helm/h2ogpt-chart/templates/tgi-configmap.yamal
 create mode 100644 helm/h2ogpt-chart/templates/tgi-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/tgi-service.yaml
 create mode 100644 helm/h2ogpt-chart/templates/vllm-configmap.yaml
 create mode 100644 helm/h2ogpt-chart/templates/vllm-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/vllm-service.yaml

diff --git a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
new file mode 100644
index 000000000..a2580b771
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
@@ -0,0 +1,12 @@
+{{- if .Values.caCertificates}}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-ca-certificates
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+  root-ca-bundle.crt:  |
+    {{ .Values.caCertificates | nindent 4 | trim }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/config-map.yaml b/helm/h2ogpt-chart/templates/config-map.yaml
deleted file mode 100644
index 64aca5503..000000000
--- a/helm/h2ogpt-chart/templates/config-map.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.h2ogpt.overrideConfig }}
-  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.tgi.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.tgi.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.vllm.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.vllm.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.lmdeploy.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.lmdeploy.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.caCertificates}}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-ca-certificates
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-  root-ca-bundle.crt:  |
-    {{ .Values.caCertificates | nindent 4 | trim }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/deployment.yaml b/helm/h2ogpt-chart/templates/deployment.yaml
deleted file mode 100644
index d89d8a3cb..000000000
--- a/helm/h2ogpt-chart/templates/deployment.yaml
+++ /dev/null
@@ -1,884 +0,0 @@
-{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
-  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if .Values.h2ogpt.stack.enabled }}
-  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
-    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
-  {{- end }}
-{{- end }}
----
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}
-spec:
-  {{- if not .Values.h2ogpt.autoscaling.enabled }}
-  replicas: {{ .Values.h2ogpt.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}
-  {{- if .Values.h2ogpt.updateStrategy }}
-  strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.h2ogpt.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}
-        {{- with .Values.h2ogpt.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.h2ogpt.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.h2ogpt.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.h2ogpt.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.h2ogpt.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.h2ogpt.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.h2ogpt.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.h2ogpt.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-          securityContext:
-            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
-          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
-          command: ["python3"]
-          args: 
-            - "-m" 
-            - "vllm.entrypoints.openai.api_server"
-            - "--port"
-            - "5000"
-            - "--host"
-            - "0.0.0.0"
-            - "--download-dir"
-            - "/workspace/.cache/huggingface/hub"
-{{- range $arg := .Values.vllm.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 5000
-              protocol: TCP
-          {{- if .Values.vllm.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.vllm.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.vllm.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          {{- range $key, $value := .Values.vllm.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-          {{- end }}
-        - name: {{ include "h2ogpt.fullname" . }}
-          securityContext:
-            {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }}
-          image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }}
-          command: ["/bin/bash", "-c"]
-          {{- if .Values.h2ogpt.stack.enabled }}
-          args:
-          - >
-            while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
-            http://localhost:5000/v1/models)" != "200" ]]; do
-              echo "Waiting for inference service to become ready... (2sec)"
-              sleep 2
-            done
-
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if not .Values.h2ogpt.stack.enabled }}
-          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }}
-          args:
-          - >
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- end }}
-          ports:
-            - name: http
-              containerPort: 7860
-              protocol: TCP
-            - name: gpt
-              containerPort: 8888
-              protocol: TCP
-            - name: openai
-              containerPort: 5000
-              protocol: TCP
-            - name: function
-              containerPort: 5002
-              protocol: TCP
-            - name: agent
-              containerPort: 5004
-              protocol: TCP
-          {{- if .Values.h2ogpt.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.h2ogpt.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.h2ogpt.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-config
-          env:
-          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
-          {{- end }}
-          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
-          {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
-          {{- end }}
-          {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled)  }}
-          - name: h2ogpt_inference_server
-            value: "vllm:localhost:5000"
-          {{- end }}
-          {{- range $key, $value := .Values.h2ogpt.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: OPENAI_AZURE_KEY
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_AZURE_KEY
-          - name: OPENAI_AZURE_API_BASE
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_AZURE_API_BASE
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: OPENAI_API_KEY
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_API_KEY
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: REPLICATE_API_TOKEN
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: REPLICATE_API_TOKEN
-          {{- end }}
-          {{- if .Values.h2ogpt.externalLLM.enabled }}
-          - name: H2OGPT_MODEL_LOCK
-            value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }}
-          - name: H2OGPT_SCORE_MODEL
-            value: None
-          {{- end }}
-          {{- if .Values.h2ogpt.visionModels.enabled }}
-          - name: H2OGPT_VISIBLE_VISION_MODELS
-            value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }}
-          - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
-            value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }}
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-volume
-              mountPath: /workspace/save
-              subPath: save
-            {{- if .Values.caCertificates }}
-            - name: ca-certificates
-              mountPath: /etc/ssl/certs/root-ca-bundle.crt
-              subPath: root-ca-bundle.crt
-            {{- end }}
-            {{ with .Values.h2ogpt.extraVolumeMounts }}
-            {{- toYaml . | nindent 12 }}
-            {{- end }}
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-volume
-          {{- if not .Values.h2ogpt.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-volume          
-          {{- else}}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.h2ogpt.storage.size | quote }}
-                storageClassName: {{ .Values.h2ogpt.storage.class }}
-          {{- end }}
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
-        {{- if .Values.caCertificates }}
-        - name: ca-certificates
-          configMap:
-            name: {{ include "h2ogpt.fullname" . }}-ca-certificates
-        {{- end }}
-        {{- with .Values.h2ogpt.extraVolumes }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-{{- end }}
----
-{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.h2ogpt.storage.class | quote }}
-  storageClassName: {{ .Values.h2ogpt.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.h2ogpt.storage.size | quote }}
-{{- end }}
-
----
-{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-spec:
-  {{- if not .Values.tgi.autoscaling.enabled }}
-  replicas: {{ .Values.tgi.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  {{- if .Values.tgi.updateStrategy }}
-  strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.tgi.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-        {{- with .Values.tgi.podLabels }}
-        {{ toYaml . | nindent 6 }}
-        {{- end }}
-    spec:
-      {{- with .Values.tgi.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.tgi.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.tgi.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.tgi.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.tgi.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-          securityContext:
-            {{- toYaml .Values.tgi.securityContext | nindent 12 }}
-          image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}"
-          imagePullPolicy: {{ .Values.tgi.image.pullPolicy }}
-          command: []
-          args: 
-{{- range $arg := .Values.tgi.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 80
-              protocol: TCP
-          {{- if .Values.tgi.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.tgi.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.tgi.resources | nindent 12 }}
-          env:
-          {{- range $key, $value := .Values.tgi.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-            - secretRef:
-                name: {{ .Values.tgi.hfSecret }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /app/cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /data
-              subPath: data
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-      volumes:
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-        {{- if not .Values.tgi.storage.useEphemeral}}
-          persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-tgi-inference-volume      
-          {{- else}}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.tgi.storage.size | quote }}
-                storageClassName: {{ .Values.tgi.storage.class }}
-          {{- end }}
-{{- end }}
----
-{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.h2ogpt.storage.class | quote }}
-  storageClassName: {{ .Values.tgi.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.tgi.storage.size | quote }}
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-spec:
-  {{- if not .Values.vllm.autoscaling.enabled }}
-  replicas: {{ .Values.vllm.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  {{- if .Values.vllm.updateStrategy }}
-  strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.vllm.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-        {{- with .Values.vllm.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.vllm.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.vllm.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.vllm.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.vllm.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.vllm.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.vllm.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.vllm.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-          securityContext:
-            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
-          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
-          command: ["python3"]
-          args: 
-            - "-m" 
-            - "vllm.entrypoints.openai.api_server"
-            - "--port"
-            - "5000"
-            - "--host"
-            - "0.0.0.0"
-            - "--download-dir"
-            - "/workspace/.cache/huggingface/hub"
-{{- range $arg := .Values.vllm.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 5000
-              protocol: TCP
-          {{- if .Values.vllm.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.vllm.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.vllm.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          {{- range $key, $value := .Values.vllm.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: shm
-              mountPath: /dev/shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        - emptyDir: 
-            medium: Memory
-            sizeLimit: 10.24Gi
-          name: shm          
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.vllm.storage.class | quote }}
-  storageClassName: {{ .Values.vllm.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.vllm.storage.size | quote }}
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-spec:
-  {{- if not .Values.lmdeploy.autoscaling.enabled }}
-  replicas: {{ .Values.lmdeploy.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  {{- if .Values.lmdeploy.updateStrategy }}
-  strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.lmdeploy.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-        {{- with .Values.lmdeploy.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.lmdeploy.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.lmdeploy.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.lmdeploy.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.lmdeploy.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.lmdeploy.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.lmdeploy.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.lmdeploy.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-          securityContext:
-            {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }}
-          image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }}
-          command: ["lmdeploy"]
-          args:
-            - "serve"
-            - "api_server"
-{{- range $arg := .Values.lmdeploy.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 23333
-              protocol: TCP
-          {{- if .Values.lmdeploy.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.lmdeploy.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.lmdeploy.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          - name: HF_HOME
-            value: "/workspace/.cache"
-          {{- range $key, $value := .Values.lmdeploy.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: shm
-              mountPath: /dev/shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-          {{- if not .Values.lmdeploy.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.lmdeploy.storage.size | quote }}
-                storageClassName: {{ .Values.lmdeploy.storage.class }}
-          {{- end }}
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 10.24Gi
-          name: shm
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.lmdeploy.storage.class | quote }}
-  storageClassName: {{ .Values.lmdeploy.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.lmdeploy.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
new file mode 100644
index 000000000..03cb57751
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.h2ogpt.overrideConfig }}
+  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
new file mode 100644
index 000000000..d240a7b6e
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -0,0 +1,373 @@
+{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
+  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
+  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
+  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if .Values.h2ogpt.stack.enabled }}
+  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
+    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
+  {{- end }}
+{{- end }}
+
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}
+spec:
+  {{- if not .Values.h2ogpt.autoscaling.enabled }}
+  replicas: {{ .Values.h2ogpt.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}
+  {{- if .Values.h2ogpt.updateStrategy }}
+  strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.h2ogpt.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}
+        {{- with .Values.h2ogpt.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.h2ogpt.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.h2ogpt.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.h2ogpt.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.h2ogpt.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.h2ogpt.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.h2ogpt.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.h2ogpt.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        {{- if .Values.h2ogpt.stack.enabled }}
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+          securityContext:
+            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
+          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
+          command: ["python3"]
+          args:
+            - "-m"
+            - "vllm.entrypoints.openai.api_server"
+            - "--port"
+            - "5000"
+            - "--host"
+            - "0.0.0.0"
+            - "--download-dir"
+            - "/workspace/.cache/huggingface/hub"
+{{- range $arg := .Values.vllm.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 5000
+              protocol: TCP
+          {{- if .Values.vllm.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.vllm.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.vllm.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+          env:
+            - name: NCCL_IGNORE_DISABLED_P2P
+              value: "1"
+          {{- range $key, $value := .Values.vllm.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /dev/shm
+              subPath: shm
+          {{- end }}
+        - name: {{ include "h2ogpt.fullname" . }}
+          securityContext:
+            {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }}
+          image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }}
+          command: ["/bin/bash", "-c"]
+          {{- if .Values.h2ogpt.stack.enabled }}
+          args:
+            - >
+              while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
+              http://localhost:5000/v1/models)" != "200" ]]; do
+                echo "Waiting for inference service to become ready... (2sec)"
+                sleep 2
+              done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if not .Values.h2ogpt.stack.enabled }}
+          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }}
+          args:
+            - >
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- end }}
+          ports:
+            - name: http
+              containerPort: 7860
+              protocol: TCP
+            - name: gpt
+              containerPort: 8888
+              protocol: TCP
+            - name: openai
+              containerPort: 5000
+              protocol: TCP
+            - name: function
+              containerPort: 5002
+              protocol: TCP
+            - name: agent
+              containerPort: 5004
+              protocol: TCP
+          {{- if .Values.h2ogpt.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.h2ogpt.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.h2ogpt.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-config
+          env:
+          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
+          {{- end }}
+          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
+          {{- end }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
+          {{- end }}
+          {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled)  }}
+            - name: h2ogpt_inference_server
+              value: "vllm:localhost:5000"
+          {{- end }}
+          {{- range $key, $value := .Values.h2ogpt.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }}
+            - name: OPENAI_AZURE_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  key: OPENAI_AZURE_KEY
+            - name: OPENAI_AZURE_API_BASE
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  key: OPENAI_AZURE_API_BASE
+          {{- end }}
+          {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }}
+            - name: OPENAI_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  key: OPENAI_API_KEY
+          {{- end }}
+          {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }}
+            - name: REPLICATE_API_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  key: REPLICATE_API_TOKEN
+          {{- end }}
+          {{- if .Values.h2ogpt.externalLLM.enabled }}
+            - name: H2OGPT_MODEL_LOCK
+              value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }}
+            - name: H2OGPT_SCORE_MODEL
+              value: None
+          {{- end }}
+          {{- if .Values.h2ogpt.visionModels.enabled }}
+            - name: H2OGPT_VISIBLE_VISION_MODELS
+              value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }}
+            - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
+              value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }}
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-volume
+              mountPath: /workspace/save
+              subPath: save
+            {{- if .Values.caCertificates }}
+            - name: ca-certificates
+              mountPath: /etc/ssl/certs/root-ca-bundle.crt
+              subPath: root-ca-bundle.crt
+            {{- end }}
+            {{ with .Values.h2ogpt.extraVolumeMounts }}
+            {{- toYaml . | nindent 12 }}
+            {{- end }}
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-volume
+          {{- if not .Values.h2ogpt.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName:  {{ include "h2ogpt.fullname" . }}-volume
+          {{- else}}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.h2ogpt.storage.size | quote }}
+                storageClassName: {{ .Values.h2ogpt.storage.class }}
+          {{- end }}
+        {{- if .Values.h2ogpt.stack.enabled }}
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- if not .Values.vllm.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.vllm.storage.size | quote }}
+                storageClassName: {{ .Values.vllm.storage.class }}
+          {{- end }}
+        {{- end }}
+        {{- if .Values.caCertificates }}
+        - name: ca-certificates
+          configMap:
+            name: {{ include "h2ogpt.fullname" . }}-ca-certificates
+        {{- end }}
+        {{- with .Values.h2ogpt.extraVolumes }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+{{- end }}
+---
+{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.h2ogpt.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.h2ogpt.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
new file mode 100644
index 000000000..16417b7ff
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
@@ -0,0 +1,49 @@
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-web
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+
+  {{- with .Values.h2ogpt.service.webServiceAnnotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}
+  ports:
+    - name: http
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.webPort }}
+      targetPort: 7860
+    - name: openai
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.openaiPort }}
+      targetPort: 5000
+    - name: function
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.functionPort }}
+      targetPort: 5002
+    - name: agent
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.agentsPort }}
+      targetPort: 5004
+  type: {{ .Values.h2ogpt.service.type }}
+{{- end }}
+---
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}
+  ports:
+    - protocol: TCP
+      port: {{ .Values.h2ogpt.service.gptPort }}
+      targetPort: 8888
+  type: {{ .Values.h2ogpt.service.type }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
new file mode 100644
index 000000000..7d041e79f
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.lmdeploy.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.lmdeploy.overrideConfig }}
+  {{ printf "%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
new file mode 100644
index 000000000..bb3240924
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
@@ -0,0 +1,163 @@
+{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+spec:
+  {{- if not .Values.lmdeploy.autoscaling.enabled }}
+  replicas: {{ .Values.lmdeploy.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+  {{- if .Values.lmdeploy.updateStrategy }}
+  strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.lmdeploy.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+        {{- with .Values.lmdeploy.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.lmdeploy.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.lmdeploy.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.lmdeploy.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.lmdeploy.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.lmdeploy.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.lmdeploy.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.lmdeploy.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+          securityContext:
+            {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }}
+          image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }}
+          command: ["lmdeploy"]
+          args:
+            - "serve"
+            - "api_server"
+{{- range $arg := .Values.lmdeploy.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 23333
+              protocol: TCP
+          {{- if .Values.lmdeploy.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.lmdeploy.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.lmdeploy.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
+          env:
+          - name: NCCL_IGNORE_DISABLED_P2P
+            value: "1"
+          - name: HF_HOME
+            value: "/workspace/.cache"
+          {{- range $key, $value := .Values.lmdeploy.env }}
+          - name: "{{ $key }}"
+            value: "{{ $value }}"
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: shm
+              mountPath: /dev/shm
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+          {{- if not .Values.lmdeploy.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.lmdeploy.storage.size | quote }}
+                storageClassName: {{ .Values.lmdeploy.storage.class }}
+          {{- end }}
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 10.24Gi
+          name: shm
+{{- end }}
+---
+{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: {{ .Values.lmdeploy.storage.class | quote }}
+  storageClassName: {{ .Values.lmdeploy.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.lmdeploy.storage.size | quote }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
new file mode 100644
index 000000000..e1dfdc4d3
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
@@ -0,0 +1,15 @@
+{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+  ports:
+    - protocol: TCP
+      port: {{ .Values.lmdeploy.service.port }}
+      targetPort: 23333
+  type: {{ .Values.lmdeploy.service.type }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/service.yaml b/helm/h2ogpt-chart/templates/service.yaml
deleted file mode 100644
index 8d3ddb73d..000000000
--- a/helm/h2ogpt-chart/templates/service.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-web
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-
-  {{- with .Values.h2ogpt.service.webServiceAnnotations }}
-  annotations:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}
-  ports:
-    - name: http
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.webPort }}
-      targetPort: 7860
-    - name: openai
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.openaiPort }}
-      targetPort: 5000
-    - name: function
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.functionPort }}
-      targetPort: 5002
-    - name: agent
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.agentsPort }}
-      targetPort: 5004
-  type: {{ .Values.h2ogpt.service.type }}
-{{- end }}
----
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}
-  ports:
-    - protocol: TCP
-      port: {{ .Values.h2ogpt.service.gptPort }}
-      targetPort: 8888
-  type: {{ .Values.h2ogpt.service.type }}
-{{- end }}
----
-{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.tgi.service.port }}
-      targetPort: 80
-  type: {{ .Values.tgi.service.type }}
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.vllm.service.port }}
-      targetPort: 5000
-  type: {{ .Values.vllm.service.type }}
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.lmdeploy.service.port }}
-      targetPort: 23333
-  type: {{ .Values.lmdeploy.service.type }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yamal b/helm/h2ogpt-chart/templates/tgi-configmap.yamal
new file mode 100644
index 000000000..3857b92c8
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/tgi-configmap.yamal
@@ -0,0 +1,13 @@
+{{- if .Values.tgi.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.tgi.overrideConfig }}
+  {{ printf "%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
new file mode 100644
index 000000000..400ac6eb6
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
@@ -0,0 +1,175 @@
+{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
+spec:
+  {{- if not .Values.tgi.autoscaling.enabled }}
+  replicas: {{ .Values.tgi.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-tgi-inference
+  {{- if .Values.tgi.updateStrategy }}
+  strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.tgi.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-tgi-inference
+        {{- with .Values.tgi.podLabels }}
+        {{ toYaml . | nindent 6 }}
+        {{- end }}
+    spec:
+      {{- with .Values.tgi.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tgi.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.tgi.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.tgi.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.tgi.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.tgi.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tgi.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference
+          securityContext:
+            {{- toYaml .Values.tgi.securityContext | nindent 12 }}
+          image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}"
+          imagePullPolicy: {{ .Values.tgi.image.pullPolicy }}
+          command: []
+          args:
+{{- range $arg := .Values.tgi.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 80
+              protocol: TCP
+          {{- if .Values.tgi.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.tgi.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.tgi.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.tgi.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.tgi.resources | nindent 12 }}
+          env:
+          {{- range $key, $value := .Values.tgi.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
+            - secretRef:
+                name: {{ .Values.tgi.hfSecret }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+              mountPath: /app/cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+              mountPath: /data
+              subPath: data
+            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+              mountPath: /dev/shm
+              subPath: shm
+      volumes:
+        {{- if .Values.h2ogpt.stack.enabled }}
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- if not .Values.vllm.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.vllm.storage.size | quote }}
+                storageClassName: {{ .Values.vllm.storage.class }}
+          {{- end }}
+        {{- end }}
+        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+        {{- if not .Values.tgi.storage.useEphemeral}}
+          persistentVolumeClaim:
+            claimName:  {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+          {{- else}}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.tgi.storage.size | quote }}
+                storageClassName: {{ .Values.tgi.storage.class }}
+          {{- end }}
+{{- end }}
+---
+{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.tgi.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.tgi.storage.size | quote }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml
new file mode 100644
index 000000000..63b04b36d
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/tgi-service.yaml
@@ -0,0 +1,15 @@
+{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
+  ports:
+    - protocol: TCP
+      port: {{ .Values.tgi.service.port }}
+      targetPort: 80
+  type: {{ .Values.tgi.service.type }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/vllm-configmap.yaml b/helm/h2ogpt-chart/templates/vllm-configmap.yaml
new file mode 100644
index 000000000..66c187b3c
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-configmap.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.vllm.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.vllm.overrideConfig }}
+  {{ printf "%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
new file mode 100644
index 000000000..e0228500b
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
@@ -0,0 +1,167 @@
+{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+spec:
+  {{- if not .Values.vllm.autoscaling.enabled }}
+  replicas: {{ .Values.vllm.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  {{- if .Values.vllm.updateStrategy }}
+  strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.vllm.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+        {{- with .Values.vllm.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.vllm.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.vllm.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.vllm.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.vllm.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.vllm.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.vllm.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.vllm.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+          securityContext:
+            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
+          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
+          command: ["python3"]
+          args:
+            - "-m"
+            - "vllm.entrypoints.openai.api_server"
+            - "--port"
+            - "5000"
+            - "--host"
+            - "0.0.0.0"
+            - "--download-dir"
+            - "/workspace/.cache/huggingface/hub"
+{{- range $arg := .Values.vllm.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 5000
+              protocol: TCP
+          {{- if .Values.vllm.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.vllm.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.vllm.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+          env:
+            - name: NCCL_IGNORE_DISABLED_P2P
+              value: "1"
+          {{- range $key, $value := .Values.vllm.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: shm
+              mountPath: /dev/shm
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- if not .Values.vllm.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.vllm.storage.size | quote }}
+                storageClassName: {{ .Values.vllm.storage.class }}
+          {{- end }}
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 10.24Gi
+          name: shm
+{{- end }}
+---
+{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: {{ .Values.vllm.storage.class | quote }}
+  storageClassName: {{ .Values.vllm.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.vllm.storage.size | quote }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml
new file mode 100644
index 000000000..34678c2c5
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-service.yaml
@@ -0,0 +1,15 @@
+{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  ports:
+    - protocol: TCP
+      port: {{ .Values.vllm.service.port }}
+      targetPort: 5000
+  type: {{ .Values.vllm.service.type }}
+{{- end }}
\ No newline at end of file

From 722b891c5e3e755670e9f0535bd93e43273042ed Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 03:01:09 +0530
Subject: [PATCH 02/34] Fix file name

---
 .../templates/{tgi-configmap.yamal => tgi-configmap.yaml}         | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename helm/h2ogpt-chart/templates/{tgi-configmap.yamal => tgi-configmap.yaml} (100%)

diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yamal b/helm/h2ogpt-chart/templates/tgi-configmap.yaml
similarity index 100%
rename from helm/h2ogpt-chart/templates/tgi-configmap.yamal
rename to helm/h2ogpt-chart/templates/tgi-configmap.yaml

From b1d4b3c00e4f8bbcdc531c0fff0b891bbb13d93c Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 03:08:35 +0530
Subject: [PATCH 03/34] Move validations to validations.yaml

---
 .../h2ogpt-chart/templates/h2ogpt-deployment.yaml | 15 ---------------
 helm/h2ogpt-chart/templates/validations.yaml      | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 15 deletions(-)
 create mode 100644 helm/h2ogpt-chart/templates/validations.yaml

diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index d240a7b6e..1ba47e84a 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -1,18 +1,3 @@
-{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
-  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if .Values.h2ogpt.stack.enabled }}
-  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
-    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
-  {{- end }}
-{{- end }}
-
 {{- if .Values.h2ogpt.enabled }}
 apiVersion: apps/v1
 kind: Deployment
diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml
new file mode 100644
index 000000000..6e9936d83
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/validations.yaml
@@ -0,0 +1,14 @@
+{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
+  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
+  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
+  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if .Values.h2ogpt.stack.enabled }}
+  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
+    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
+  {{- end }}
+{{- end }}
\ No newline at end of file

From 5d97a47b70f94d663fdedfe14fea5727c02f663f Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 03:12:33 +0530
Subject: [PATCH 04/34] Add NOTES.txt

---
 helm/h2ogpt-chart/templates/NOTES.txt | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 helm/h2ogpt-chart/templates/NOTES.txt

diff --git a/helm/h2ogpt-chart/templates/NOTES.txt b/helm/h2ogpt-chart/templates/NOTES.txt
new file mode 100644
index 000000000..c32a7790f
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/NOTES.txt
@@ -0,0 +1,8 @@
+Thank you for installing {{ .Chart.Name }}.
+
+Your release is named {{ .Release.Name }}.
+
+To learn more about the release, try:
+
+  $ helm status {{ .Release.Name }}
+  $ helm get all {{ .Release.Name }}
\ No newline at end of file

From 7d6c0077b058c95965b41015ab5ca6f92f6c6c51 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 03:42:48 +0530
Subject: [PATCH 05/34] Update documentation in values.yaml ( for `helm-docs` )

---
 helm/h2ogpt-chart/values.yaml | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index b0e599bf4..226038aa2 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -3,6 +3,7 @@ fullnameOverride: ""
 namespaceOverride: ""
 
 h2ogpt:
+  # -- Enable h2oGPT
   enabled: true
   stack:
     # -- Run h2oGPT and vLLM on same pod.
@@ -18,12 +19,12 @@ h2ogpt:
     tag:
     pullPolicy:
 
-  # extra volumes, for more certs, mount under /etc/ssl/more-certs
+  # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs
   extraVolumes: []
+  # -- Extra volume mounts
   extraVolumeMounts: []
-
-  podAffinity:
   # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
+  podAffinity:
   # hostname:
   # zone:
 
@@ -48,9 +49,9 @@ h2ogpt:
       enabled: false
   
   visionModels:
+    # -- Enable vision models
     enabled: false
-    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model
-    # -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
+    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
     visibleModels: []
     rotateAlignResizeImage: false
 
@@ -136,6 +137,7 @@ h2ogpt:
   autoscaling: {}
 
 tgi:
+  # -- Enable tgi
   enabled: false
   replicaCount: 1
 
@@ -143,9 +145,8 @@ tgi:
     repository: ghcr.io/huggingface/text-generation-inference
     tag: 0.9.3
     pullPolicy: IfNotPresent
-
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
   podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
     # hostname:
     # zone:
 
@@ -179,6 +180,7 @@ tgi:
   autoscaling: {}
 
 vllm:
+  # -- Enable vllm
   enabled: false
   replicaCount: 1
 
@@ -186,9 +188,9 @@ vllm:
     repository: vllm/vllm-openai
     tag: latest
     pullPolicy: IfNotPresent
-
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
   podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
+
     # hostname:
     # zone:
 
@@ -248,6 +250,7 @@ vllm:
   autoscaling: {}
 
 lmdeploy:
+  # -- Enable lmdeploy
   enabled: false
   replicaCount: 1
 
@@ -255,9 +258,8 @@ lmdeploy:
     repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy
     tag:
     pullPolicy: IfNotPresent
-
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
   podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
     # hostname:
     # zone:
 

From 41d1f2bb672c8da30983ff66b1da778ad93e3ff3 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 02:52:51 +0530
Subject: [PATCH 06/34] Split templates

---
 .../templates/ca-certs-configmap.yaml         |  12 +
 helm/h2ogpt-chart/templates/config-map.yaml   |  69 --
 helm/h2ogpt-chart/templates/deployment.yaml   | 884 ------------------
 .../templates/h2ogpt-configmap.yaml           |  13 +
 .../templates/h2ogpt-deployment.yaml          | 373 ++++++++
 .../templates/h2ogpt-service.yaml             |  49 +
 .../templates/lmdeploy-configmap.yaml         |  13 +
 .../templates/lmdeploy-deployment.yaml        | 163 ++++
 .../templates/lmdeploy-service.yaml           |  15 +
 helm/h2ogpt-chart/templates/service.yaml      |  97 --
 .../templates/tgi-configmap.yamal             |  13 +
 .../templates/tgi-deployment.yaml             | 175 ++++
 helm/h2ogpt-chart/templates/tgi-service.yaml  |  15 +
 .../templates/vllm-configmap.yaml             |  13 +
 .../templates/vllm-deployment.yaml            | 167 ++++
 helm/h2ogpt-chart/templates/vllm-service.yaml |  15 +
 16 files changed, 1036 insertions(+), 1050 deletions(-)
 create mode 100644 helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/config-map.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
 create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-service.yaml
 create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
 create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-service.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/service.yaml
 create mode 100644 helm/h2ogpt-chart/templates/tgi-configmap.yamal
 create mode 100644 helm/h2ogpt-chart/templates/tgi-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/tgi-service.yaml
 create mode 100644 helm/h2ogpt-chart/templates/vllm-configmap.yaml
 create mode 100644 helm/h2ogpt-chart/templates/vllm-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/vllm-service.yaml

diff --git a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
new file mode 100644
index 000000000..a2580b771
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
@@ -0,0 +1,12 @@
+{{- if .Values.caCertificates}}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-ca-certificates
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+  root-ca-bundle.crt:  |
+    {{ .Values.caCertificates | nindent 4 | trim }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/config-map.yaml b/helm/h2ogpt-chart/templates/config-map.yaml
deleted file mode 100644
index 64aca5503..000000000
--- a/helm/h2ogpt-chart/templates/config-map.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.h2ogpt.overrideConfig }}
-  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.tgi.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.tgi.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.vllm.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.vllm.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.lmdeploy.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.lmdeploy.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
----
-{{- if .Values.caCertificates}}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-ca-certificates
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-  root-ca-bundle.crt:  |
-    {{ .Values.caCertificates | nindent 4 | trim }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/deployment.yaml b/helm/h2ogpt-chart/templates/deployment.yaml
deleted file mode 100644
index d89d8a3cb..000000000
--- a/helm/h2ogpt-chart/templates/deployment.yaml
+++ /dev/null
@@ -1,884 +0,0 @@
-{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
-  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if .Values.h2ogpt.stack.enabled }}
-  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
-    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
-  {{- end }}
-{{- end }}
----
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}
-spec:
-  {{- if not .Values.h2ogpt.autoscaling.enabled }}
-  replicas: {{ .Values.h2ogpt.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}
-  {{- if .Values.h2ogpt.updateStrategy }}
-  strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.h2ogpt.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}
-        {{- with .Values.h2ogpt.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.h2ogpt.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.h2ogpt.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.h2ogpt.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.h2ogpt.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.h2ogpt.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.h2ogpt.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.h2ogpt.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-          securityContext:
-            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
-          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
-          command: ["python3"]
-          args: 
-            - "-m" 
-            - "vllm.entrypoints.openai.api_server"
-            - "--port"
-            - "5000"
-            - "--host"
-            - "0.0.0.0"
-            - "--download-dir"
-            - "/workspace/.cache/huggingface/hub"
-{{- range $arg := .Values.vllm.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 5000
-              protocol: TCP
-          {{- if .Values.vllm.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.vllm.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.vllm.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          {{- range $key, $value := .Values.vllm.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-          {{- end }}
-        - name: {{ include "h2ogpt.fullname" . }}
-          securityContext:
-            {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }}
-          image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }}
-          command: ["/bin/bash", "-c"]
-          {{- if .Values.h2ogpt.stack.enabled }}
-          args:
-          - >
-            while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
-            http://localhost:5000/v1/models)" != "200" ]]; do
-              echo "Waiting for inference service to become ready... (2sec)"
-              sleep 2
-            done
-
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if not .Values.h2ogpt.stack.enabled }}
-          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
-          args:
-          - >
-            until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
-              do
-                echo "Waiting for inference service to become ready...";
-                sleep 5;
-              done
-              
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }}
-          args:
-          - >
-            python3 /workspace/generate.py
-          {{- end }}
-          {{- end }}
-          ports:
-            - name: http
-              containerPort: 7860
-              protocol: TCP
-            - name: gpt
-              containerPort: 8888
-              protocol: TCP
-            - name: openai
-              containerPort: 5000
-              protocol: TCP
-            - name: function
-              containerPort: 5002
-              protocol: TCP
-            - name: agent
-              containerPort: 5004
-              protocol: TCP
-          {{- if .Values.h2ogpt.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.h2ogpt.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.h2ogpt.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-config
-          env:
-          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
-          {{- end }}
-          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
-          {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-          - name: h2ogpt_inference_server
-            value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
-          {{- end }}
-          {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled)  }}
-          - name: h2ogpt_inference_server
-            value: "vllm:localhost:5000"
-          {{- end }}
-          {{- range $key, $value := .Values.h2ogpt.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: OPENAI_AZURE_KEY
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_AZURE_KEY
-          - name: OPENAI_AZURE_API_BASE
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_AZURE_API_BASE
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: OPENAI_API_KEY
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: OPENAI_API_KEY
-          {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }}
-          - name: REPLICATE_API_TOKEN
-            valueFrom:
-              secretKeyRef:
-                name: {{ .Values.h2ogpt.externalLLM.secret }}
-                key: REPLICATE_API_TOKEN
-          {{- end }}
-          {{- if .Values.h2ogpt.externalLLM.enabled }}
-          - name: H2OGPT_MODEL_LOCK
-            value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }}
-          - name: H2OGPT_SCORE_MODEL
-            value: None
-          {{- end }}
-          {{- if .Values.h2ogpt.visionModels.enabled }}
-          - name: H2OGPT_VISIBLE_VISION_MODELS
-            value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }}
-          - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
-            value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }}
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-volume
-              mountPath: /workspace/save
-              subPath: save
-            {{- if .Values.caCertificates }}
-            - name: ca-certificates
-              mountPath: /etc/ssl/certs/root-ca-bundle.crt
-              subPath: root-ca-bundle.crt
-            {{- end }}
-            {{ with .Values.h2ogpt.extraVolumeMounts }}
-            {{- toYaml . | nindent 12 }}
-            {{- end }}
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-volume
-          {{- if not .Values.h2ogpt.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-volume          
-          {{- else}}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.h2ogpt.storage.size | quote }}
-                storageClassName: {{ .Values.h2ogpt.storage.class }}
-          {{- end }}
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
-        {{- if .Values.caCertificates }}
-        - name: ca-certificates
-          configMap:
-            name: {{ include "h2ogpt.fullname" . }}-ca-certificates
-        {{- end }}
-        {{- with .Values.h2ogpt.extraVolumes }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-{{- end }}
----
-{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.h2ogpt.storage.class | quote }}
-  storageClassName: {{ .Values.h2ogpt.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.h2ogpt.storage.size | quote }}
-{{- end }}
-
----
-{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-spec:
-  {{- if not .Values.tgi.autoscaling.enabled }}
-  replicas: {{ .Values.tgi.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  {{- if .Values.tgi.updateStrategy }}
-  strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.tgi.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-        {{- with .Values.tgi.podLabels }}
-        {{ toYaml . | nindent 6 }}
-        {{- end }}
-    spec:
-      {{- with .Values.tgi.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.tgi.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.tgi.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.tgi.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.tgi.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-          securityContext:
-            {{- toYaml .Values.tgi.securityContext | nindent 12 }}
-          image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}"
-          imagePullPolicy: {{ .Values.tgi.image.pullPolicy }}
-          command: []
-          args: 
-{{- range $arg := .Values.tgi.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 80
-              protocol: TCP
-          {{- if .Values.tgi.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.tgi.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.tgi.resources | nindent 12 }}
-          env:
-          {{- range $key, $value := .Values.tgi.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-            - secretRef:
-                name: {{ .Values.tgi.hfSecret }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /app/cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /data
-              subPath: data
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-      volumes:
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-        {{- if not .Values.tgi.storage.useEphemeral}}
-          persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-tgi-inference-volume      
-          {{- else}}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.tgi.storage.size | quote }}
-                storageClassName: {{ .Values.tgi.storage.class }}
-          {{- end }}
-{{- end }}
----
-{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.h2ogpt.storage.class | quote }}
-  storageClassName: {{ .Values.tgi.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.tgi.storage.size | quote }}
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-spec:
-  {{- if not .Values.vllm.autoscaling.enabled }}
-  replicas: {{ .Values.vllm.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  {{- if .Values.vllm.updateStrategy }}
-  strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.vllm.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-        {{- with .Values.vllm.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.vllm.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.vllm.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.vllm.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.vllm.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.vllm.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.vllm.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.vllm.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-          securityContext:
-            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
-          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
-          command: ["python3"]
-          args: 
-            - "-m" 
-            - "vllm.entrypoints.openai.api_server"
-            - "--port"
-            - "5000"
-            - "--host"
-            - "0.0.0.0"
-            - "--download-dir"
-            - "/workspace/.cache/huggingface/hub"
-{{- range $arg := .Values.vllm.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 5000
-              protocol: TCP
-          {{- if .Values.vllm.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.vllm.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.vllm.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          {{- range $key, $value := .Values.vllm.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: shm
-              mountPath: /dev/shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes: 
-                  - ReadWriteOnce
-                resources:
-                  requests: 
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        - emptyDir: 
-            medium: Memory
-            sizeLimit: 10.24Gi
-          name: shm          
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.vllm.storage.class | quote }}
-  storageClassName: {{ .Values.vllm.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.vllm.storage.size | quote }}
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-spec:
-  {{- if not .Values.lmdeploy.autoscaling.enabled }}
-  replicas: {{ .Values.lmdeploy.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  {{- if .Values.lmdeploy.updateStrategy }}
-  strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.lmdeploy.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-        {{- with .Values.lmdeploy.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.lmdeploy.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.lmdeploy.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.lmdeploy.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.lmdeploy.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.lmdeploy.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.lmdeploy.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.lmdeploy.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-          securityContext:
-            {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }}
-          image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }}
-          command: ["lmdeploy"]
-          args:
-            - "serve"
-            - "api_server"
-{{- range $arg := .Values.lmdeploy.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 23333
-              protocol: TCP
-          {{- if .Values.lmdeploy.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.lmdeploy.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.lmdeploy.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          - name: HF_HOME
-            value: "/workspace/.cache"
-          {{- range $key, $value := .Values.lmdeploy.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: shm
-              mountPath: /dev/shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-          {{- if not .Values.lmdeploy.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.lmdeploy.storage.size | quote }}
-                storageClassName: {{ .Values.lmdeploy.storage.class }}
-          {{- end }}
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 10.24Gi
-          name: shm
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.lmdeploy.storage.class | quote }}
-  storageClassName: {{ .Values.lmdeploy.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.lmdeploy.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
new file mode 100644
index 000000000..03cb57751
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.h2ogpt.overrideConfig }}
+  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
new file mode 100644
index 000000000..d240a7b6e
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -0,0 +1,373 @@
+{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
+  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
+  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
+  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if .Values.h2ogpt.stack.enabled }}
+  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
+    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
+  {{- end }}
+{{- end }}
+
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}
+spec:
+  {{- if not .Values.h2ogpt.autoscaling.enabled }}
+  replicas: {{ .Values.h2ogpt.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}
+  {{- if .Values.h2ogpt.updateStrategy }}
+  strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.h2ogpt.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}
+        {{- with .Values.h2ogpt.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.h2ogpt.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.h2ogpt.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.h2ogpt.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.h2ogpt.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.h2ogpt.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.h2ogpt.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.h2ogpt.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        {{- if .Values.h2ogpt.stack.enabled }}
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+          securityContext:
+            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
+          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
+          command: ["python3"]
+          args:
+            - "-m"
+            - "vllm.entrypoints.openai.api_server"
+            - "--port"
+            - "5000"
+            - "--host"
+            - "0.0.0.0"
+            - "--download-dir"
+            - "/workspace/.cache/huggingface/hub"
+{{- range $arg := .Values.vllm.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 5000
+              protocol: TCP
+          {{- if .Values.vllm.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.vllm.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.vllm.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+          env:
+            - name: NCCL_IGNORE_DISABLED_P2P
+              value: "1"
+          {{- range $key, $value := .Values.vllm.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /dev/shm
+              subPath: shm
+          {{- end }}
+        - name: {{ include "h2ogpt.fullname" . }}
+          securityContext:
+            {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }}
+          image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }}
+          command: ["/bin/bash", "-c"]
+          {{- if .Values.h2ogpt.stack.enabled }}
+          args:
+            - >
+              while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
+              http://localhost:5000/v1/models)" != "200" ]]; do
+                echo "Waiting for inference service to become ready... (2sec)"
+                sleep 2
+              done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if not .Values.h2ogpt.stack.enabled }}
+          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }}
+          args:
+            - >
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- end }}
+          ports:
+            - name: http
+              containerPort: 7860
+              protocol: TCP
+            - name: gpt
+              containerPort: 8888
+              protocol: TCP
+            - name: openai
+              containerPort: 5000
+              protocol: TCP
+            - name: function
+              containerPort: 5002
+              protocol: TCP
+            - name: agent
+              containerPort: 5004
+              protocol: TCP
+          {{- if .Values.h2ogpt.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.h2ogpt.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.h2ogpt.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-config
+          env:
+          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
+          {{- end }}
+          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
+          {{- end }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
+          {{- end }}
+          {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled)  }}
+            - name: h2ogpt_inference_server
+              value: "vllm:localhost:5000"
+          {{- end }}
+          {{- range $key, $value := .Values.h2ogpt.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }}
+            - name: OPENAI_AZURE_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  key: OPENAI_AZURE_KEY
+            - name: OPENAI_AZURE_API_BASE
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  key: OPENAI_AZURE_API_BASE
+          {{- end }}
+          {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }}
+            - name: OPENAI_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  key: OPENAI_API_KEY
+          {{- end }}
+          {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }}
+            - name: REPLICATE_API_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  key: REPLICATE_API_TOKEN
+          {{- end }}
+          {{- if .Values.h2ogpt.externalLLM.enabled }}
+            - name: H2OGPT_MODEL_LOCK
+              value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }}
+            - name: H2OGPT_SCORE_MODEL
+              value: None
+          {{- end }}
+          {{- if .Values.h2ogpt.visionModels.enabled }}
+            - name: H2OGPT_VISIBLE_VISION_MODELS
+              value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }}
+            - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
+              value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }}
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-volume
+              mountPath: /workspace/save
+              subPath: save
+            {{- if .Values.caCertificates }}
+            - name: ca-certificates
+              mountPath: /etc/ssl/certs/root-ca-bundle.crt
+              subPath: root-ca-bundle.crt
+            {{- end }}
+            {{ with .Values.h2ogpt.extraVolumeMounts }}
+            {{- toYaml . | nindent 12 }}
+            {{- end }}
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-volume
+          {{- if not .Values.h2ogpt.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName:  {{ include "h2ogpt.fullname" . }}-volume
+          {{- else}}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.h2ogpt.storage.size | quote }}
+                storageClassName: {{ .Values.h2ogpt.storage.class }}
+          {{- end }}
+        {{- if .Values.h2ogpt.stack.enabled }}
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- if not .Values.vllm.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.vllm.storage.size | quote }}
+                storageClassName: {{ .Values.vllm.storage.class }}
+          {{- end }}
+        {{- end }}
+        {{- if .Values.caCertificates }}
+        - name: ca-certificates
+          configMap:
+            name: {{ include "h2ogpt.fullname" . }}-ca-certificates
+        {{- end }}
+        {{- with .Values.h2ogpt.extraVolumes }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+{{- end }}
+---
+{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.h2ogpt.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.h2ogpt.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
new file mode 100644
index 000000000..16417b7ff
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
@@ -0,0 +1,49 @@
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-web
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+
+  {{- with .Values.h2ogpt.service.webServiceAnnotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}
+  ports:
+    - name: http
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.webPort }}
+      targetPort: 7860
+    - name: openai
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.openaiPort }}
+      targetPort: 5000
+    - name: function
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.functionPort }}
+      targetPort: 5002
+    - name: agent
+      protocol: TCP
+      port: {{ .Values.h2ogpt.service.agentsPort }}
+      targetPort: 5004
+  type: {{ .Values.h2ogpt.service.type }}
+{{- end }}
+---
+{{- if .Values.h2ogpt.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}
+  ports:
+    - protocol: TCP
+      port: {{ .Values.h2ogpt.service.gptPort }}
+      targetPort: 8888
+  type: {{ .Values.h2ogpt.service.type }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
new file mode 100644
index 000000000..7d041e79f
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.lmdeploy.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.lmdeploy.overrideConfig }}
+  {{ printf "%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
new file mode 100644
index 000000000..bb3240924
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
@@ -0,0 +1,163 @@
+{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+spec:
+  {{- if not .Values.lmdeploy.autoscaling.enabled }}
+  replicas: {{ .Values.lmdeploy.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+  {{- if .Values.lmdeploy.updateStrategy }}
+  strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.lmdeploy.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+        {{- with .Values.lmdeploy.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.lmdeploy.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.lmdeploy.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.lmdeploy.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.lmdeploy.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.lmdeploy.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.lmdeploy.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.lmdeploy.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+          securityContext:
+            {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }}
+          image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }}
+          command: ["lmdeploy"]
+          args:
+            - "serve"
+            - "api_server"
+{{- range $arg := .Values.lmdeploy.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 23333
+              protocol: TCP
+          {{- if .Values.lmdeploy.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.lmdeploy.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.lmdeploy.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
+          env:
+          - name: NCCL_IGNORE_DISABLED_P2P
+            value: "1"
+          - name: HF_HOME
+            value: "/workspace/.cache"
+          {{- range $key, $value := .Values.lmdeploy.env }}
+          - name: "{{ $key }}"
+            value: "{{ $value }}"
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: shm
+              mountPath: /dev/shm
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+          {{- if not .Values.lmdeploy.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.lmdeploy.storage.size | quote }}
+                storageClassName: {{ .Values.lmdeploy.storage.class }}
+          {{- end }}
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 10.24Gi
+          name: shm
+{{- end }}
+---
+{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: {{ .Values.lmdeploy.storage.class | quote }}
+  storageClassName: {{ .Values.lmdeploy.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.lmdeploy.storage.size | quote }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
new file mode 100644
index 000000000..e1dfdc4d3
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
@@ -0,0 +1,15 @@
+{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
+  ports:
+    - protocol: TCP
+      port: {{ .Values.lmdeploy.service.port }}
+      targetPort: 23333
+  type: {{ .Values.lmdeploy.service.type }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/service.yaml b/helm/h2ogpt-chart/templates/service.yaml
deleted file mode 100644
index 8d3ddb73d..000000000
--- a/helm/h2ogpt-chart/templates/service.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-web
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-
-  {{- with .Values.h2ogpt.service.webServiceAnnotations }}
-  annotations:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}
-  ports:
-    - name: http
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.webPort }}
-      targetPort: 7860
-    - name: openai
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.openaiPort }}
-      targetPort: 5000
-    - name: function
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.functionPort }}
-      targetPort: 5002
-    - name: agent
-      protocol: TCP
-      port: {{ .Values.h2ogpt.service.agentsPort }}
-      targetPort: 5004
-  type: {{ .Values.h2ogpt.service.type }}
-{{- end }}
----
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}
-  ports:
-    - protocol: TCP
-      port: {{ .Values.h2ogpt.service.gptPort }}
-      targetPort: 8888
-  type: {{ .Values.h2ogpt.service.type }}
-{{- end }}
----
-{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.tgi.service.port }}
-      targetPort: 80
-  type: {{ .Values.tgi.service.type }}
-{{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.vllm.service.port }}
-      targetPort: 5000
-  type: {{ .Values.vllm.service.type }}
-{{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.lmdeploy.service.port }}
-      targetPort: 23333
-  type: {{ .Values.lmdeploy.service.type }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yamal b/helm/h2ogpt-chart/templates/tgi-configmap.yamal
new file mode 100644
index 000000000..3857b92c8
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/tgi-configmap.yamal
@@ -0,0 +1,13 @@
+{{- if .Values.tgi.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.tgi.overrideConfig }}
+  {{ printf "%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
new file mode 100644
index 000000000..400ac6eb6
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
@@ -0,0 +1,175 @@
+{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
+spec:
+  {{- if not .Values.tgi.autoscaling.enabled }}
+  replicas: {{ .Values.tgi.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-tgi-inference
+  {{- if .Values.tgi.updateStrategy }}
+  strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.tgi.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-tgi-inference
+        {{- with .Values.tgi.podLabels }}
+        {{ toYaml . | nindent 6 }}
+        {{- end }}
+    spec:
+      {{- with .Values.tgi.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tgi.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.tgi.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.tgi.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.tgi.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.tgi.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tgi.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference
+          securityContext:
+            {{- toYaml .Values.tgi.securityContext | nindent 12 }}
+          image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}"
+          imagePullPolicy: {{ .Values.tgi.image.pullPolicy }}
+          command: []
+          args:
+{{- range $arg := .Values.tgi.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 80
+              protocol: TCP
+          {{- if .Values.tgi.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.tgi.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.tgi.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.tgi.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.tgi.resources | nindent 12 }}
+          env:
+          {{- range $key, $value := .Values.tgi.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
+            - secretRef:
+                name: {{ .Values.tgi.hfSecret }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+              mountPath: /app/cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+              mountPath: /data
+              subPath: data
+            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+              mountPath: /dev/shm
+              subPath: shm
+      volumes:
+        {{- if .Values.h2ogpt.stack.enabled }}
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- if not .Values.vllm.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.vllm.storage.size | quote }}
+                storageClassName: {{ .Values.vllm.storage.class }}
+          {{- end }}
+        {{- end }}
+        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+        {{- if not .Values.tgi.storage.useEphemeral}}
+          persistentVolumeClaim:
+            claimName:  {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+          {{- else}}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.tgi.storage.size | quote }}
+                storageClassName: {{ .Values.tgi.storage.class }}
+          {{- end }}
+{{- end }}
+---
+{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.tgi.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.tgi.storage.size | quote }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml
new file mode 100644
index 000000000..63b04b36d
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/tgi-service.yaml
@@ -0,0 +1,15 @@
+{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
+  ports:
+    - protocol: TCP
+      port: {{ .Values.tgi.service.port }}
+      targetPort: 80
+  type: {{ .Values.tgi.service.type }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/vllm-configmap.yaml b/helm/h2ogpt-chart/templates/vllm-configmap.yaml
new file mode 100644
index 000000000..66c187b3c
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-configmap.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.vllm.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.vllm.overrideConfig }}
+  {{ printf "%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
new file mode 100644
index 000000000..e0228500b
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
@@ -0,0 +1,167 @@
+{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+spec:
+  {{- if not .Values.vllm.autoscaling.enabled }}
+  replicas: {{ .Values.vllm.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  {{- if .Values.vllm.updateStrategy }}
+  strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.vllm.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+        {{- with .Values.vllm.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.vllm.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.vllm.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.vllm.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.vllm.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.vllm.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.vllm.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.vllm.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+          securityContext:
+            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
+          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
+          command: ["python3"]
+          args:
+            - "-m"
+            - "vllm.entrypoints.openai.api_server"
+            - "--port"
+            - "5000"
+            - "--host"
+            - "0.0.0.0"
+            - "--download-dir"
+            - "/workspace/.cache/huggingface/hub"
+{{- range $arg := .Values.vllm.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 5000
+              protocol: TCP
+          {{- if .Values.vllm.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.vllm.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.vllm.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+          env:
+            - name: NCCL_IGNORE_DISABLED_P2P
+              value: "1"
+          {{- range $key, $value := .Values.vllm.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: shm
+              mountPath: /dev/shm
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- if not .Values.vllm.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.vllm.storage.size | quote }}
+                storageClassName: {{ .Values.vllm.storage.class }}
+          {{- end }}
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 10.24Gi
+          name: shm
+{{- end }}
+---
+{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: {{ .Values.vllm.storage.class | quote }}
+  storageClassName: {{ .Values.vllm.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.vllm.storage.size | quote }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml
new file mode 100644
index 000000000..34678c2c5
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-service.yaml
@@ -0,0 +1,15 @@
+{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-vllm-inference
+  ports:
+    - protocol: TCP
+      port: {{ .Values.vllm.service.port }}
+      targetPort: 5000
+  type: {{ .Values.vllm.service.type }}
+{{- end }}
\ No newline at end of file

From d4d2ae620d6f5c6f32279e8e14799b64a467ef51 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 03:01:09 +0530
Subject: [PATCH 07/34] Fix file name

---
 .../templates/{tgi-configmap.yamal => tgi-configmap.yaml}         | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename helm/h2ogpt-chart/templates/{tgi-configmap.yamal => tgi-configmap.yaml} (100%)

diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yamal b/helm/h2ogpt-chart/templates/tgi-configmap.yaml
similarity index 100%
rename from helm/h2ogpt-chart/templates/tgi-configmap.yamal
rename to helm/h2ogpt-chart/templates/tgi-configmap.yaml

From 80fc6a0aee703afcdb66fa35e16db6daf9795556 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 03:08:35 +0530
Subject: [PATCH 08/34] Move validations to validations.yaml

---
 .../h2ogpt-chart/templates/h2ogpt-deployment.yaml | 15 ---------------
 helm/h2ogpt-chart/templates/validations.yaml      | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 15 deletions(-)
 create mode 100644 helm/h2ogpt-chart/templates/validations.yaml

diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index d240a7b6e..1ba47e84a 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -1,18 +1,3 @@
-{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
-  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if .Values.h2ogpt.stack.enabled }}
-  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
-    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
-  {{- end }}
-{{- end }}
-
 {{- if .Values.h2ogpt.enabled }}
 apiVersion: apps/v1
 kind: Deployment
diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml
new file mode 100644
index 000000000..6e9936d83
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/validations.yaml
@@ -0,0 +1,14 @@
+{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
+  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
+  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
+  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
+{{- end }}
+{{- if .Values.h2ogpt.stack.enabled }}
+  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
+    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
+  {{- end }}
+{{- end }}
\ No newline at end of file

From d90ddbf22855e60bf10ee31e4a419d1b2babeaed Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 03:12:33 +0530
Subject: [PATCH 09/34] Add NOTES.txt

---
 helm/h2ogpt-chart/templates/NOTES.txt | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 helm/h2ogpt-chart/templates/NOTES.txt

diff --git a/helm/h2ogpt-chart/templates/NOTES.txt b/helm/h2ogpt-chart/templates/NOTES.txt
new file mode 100644
index 000000000..c32a7790f
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/NOTES.txt
@@ -0,0 +1,8 @@
+Thank you for installing {{ .Chart.Name }}.
+
+Your release is named {{ .Release.Name }}.
+
+To learn more about the release, try:
+
+  $ helm status {{ .Release.Name }}
+  $ helm get all {{ .Release.Name }}
\ No newline at end of file

From dc34c1d26560afc9c6e81ac3954d9f51e3aad0a1 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 9 Oct 2024 03:42:48 +0530
Subject: [PATCH 10/34] Update documentation in values.yaml ( for `helm-docs` )

---
 helm/h2ogpt-chart/values.yaml | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index b0e599bf4..226038aa2 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -3,6 +3,7 @@ fullnameOverride: ""
 namespaceOverride: ""
 
 h2ogpt:
+  # -- Enable h2oGPT
   enabled: true
   stack:
     # -- Run h2oGPT and vLLM on same pod.
@@ -18,12 +19,12 @@ h2ogpt:
     tag:
     pullPolicy:
 
-  # extra volumes, for more certs, mount under /etc/ssl/more-certs
+  # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs
   extraVolumes: []
+  # -- Extra volume mounts
   extraVolumeMounts: []
-
-  podAffinity:
   # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
+  podAffinity:
   # hostname:
   # zone:
 
@@ -48,9 +49,9 @@ h2ogpt:
       enabled: false
   
   visionModels:
+    # -- Enable vision models
     enabled: false
-    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model
-    # -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
+    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
     visibleModels: []
     rotateAlignResizeImage: false
 
@@ -136,6 +137,7 @@ h2ogpt:
   autoscaling: {}
 
 tgi:
+  # -- Enable tgi
   enabled: false
   replicaCount: 1
 
@@ -143,9 +145,8 @@ tgi:
     repository: ghcr.io/huggingface/text-generation-inference
     tag: 0.9.3
     pullPolicy: IfNotPresent
-
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
   podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
     # hostname:
     # zone:
 
@@ -179,6 +180,7 @@ tgi:
   autoscaling: {}
 
 vllm:
+  # -- Enable vllm
   enabled: false
   replicaCount: 1
 
@@ -186,9 +188,9 @@ vllm:
     repository: vllm/vllm-openai
     tag: latest
     pullPolicy: IfNotPresent
-
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
   podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
+
     # hostname:
     # zone:
 
@@ -248,6 +250,7 @@ vllm:
   autoscaling: {}
 
 lmdeploy:
+  # -- Enable lmdeploy
   enabled: false
   replicaCount: 1
 
@@ -255,9 +258,8 @@ lmdeploy:
     repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy
     tag:
     pullPolicy: IfNotPresent
-
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
   podAffinity:
-    # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
     # hostname:
     # zone:
 

From 801e907479e0d30f1ba4f313117ac86d8f99e669 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Fri, 25 Oct 2024 13:35:54 +0530
Subject: [PATCH 11/34] Add agents related templates and values

---
 helm/h2ogpt-chart/templates/_helpers.tpl      |  25 ++
 .../templates/agents-configmap.yaml           |  16 +
 .../templates/agents-deployment.yaml          | 346 ++++++++++++++++++
 .../templates/agents-service.yaml             |  21 ++
 .../templates/h2ogpt-configmap.yaml           |   3 +
 .../templates/h2ogpt-deployment.yaml          |  38 +-
 helm/h2ogpt-chart/templates/validations.yaml  |   3 +
 helm/h2ogpt-chart/values.yaml                 | 163 +++++++--
 8 files changed, 565 insertions(+), 50 deletions(-)
 create mode 100644 helm/h2ogpt-chart/templates/agents-configmap.yaml
 create mode 100644 helm/h2ogpt-chart/templates/agents-deployment.yaml
 create mode 100644 helm/h2ogpt-chart/templates/agents-service.yaml

diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl
index a8352a4ad..9eb75f3c9 100644
--- a/helm/h2ogpt-chart/templates/_helpers.tpl
+++ b/helm/h2ogpt-chart/templates/_helpers.tpl
@@ -68,3 +68,28 @@ Create the name of the service account to use
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
+
+{{/*
+Configs for agents server
+*/}}
+
+{{- define "agents.overrideConfig" -}}
+agent_server: True
+agent_port: "5004"
+{{- end }}
+
+{{/*
+Configs for agents with h2ogpt
+*/}}
+
+{{- define "h2ogpt.overrideConfig" -}}
+{{- if .Values.h2ogpt.agents.enabled }}
+agent_server: True
+agent_port: "5004"
+multiple_workers_gunicorn: True
+agent_workers: {{ .Values.h2ogpt.agents.agent_workers}}
+{{- else }}
+agents_server: False
+{{- end }}
+
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/agents-configmap.yaml b/helm/h2ogpt-chart/templates/agents-configmap.yaml
new file mode 100644
index 000000000..e242dee69
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-configmap.yaml
@@ -0,0 +1,16 @@
+{{- if .Values.agents.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agents-config
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := ( include "agents.overrideConfig" . | fromYaml ) }}
+  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- range $key, $value := .Values.agents.overrideConfig }}
+  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml
new file mode 100644
index 000000000..607f92573
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml
@@ -0,0 +1,346 @@
+{{- if .Values.agents.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agents
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    app: {{ include "h2ogpt.fullname" . }}-agents
+spec:
+  {{- if not .Values.agents.autoscaling.enabled }}
+  replicas: {{ .Values.agents.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      app: {{ include "h2ogpt.fullname" . }}-agents
+  {{- if .Values.agents.updateStrategy }}
+  strategy: {{- toYaml .Values.agents.updateStrategy | nindent 4 }}
+  {{- end }}
+  template:
+    metadata:
+      {{- with .Values.agents.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        app: {{ include "h2ogpt.fullname" . }}-agents
+        {{- with .Values.agents.podLabels }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.agents.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.agents.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.agents.podSecurityContext | nindent 8 }}
+      affinity:
+        {{- if .Values.agents.podAffinity }}
+        podAntiAffinity:
+          {{- if .Values.agents.podAffinity.hostname }}
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - {{ include "h2ogpt.fullname" . }}
+              topologyKey: kubernetes.io/hostname
+          {{- end }}
+          {{- if .Values.agents.podAffinity.zone }}
+          preferredDuringSchedulingIgnoredDuringExecution:
+            - weight: 100
+              podAffinityTerm:
+                labelSelector:
+                  matchExpressions:
+                    - key: app
+                      operator: In
+                      values:
+                        - {{ include "h2ogpt.fullname" . }}
+                topologyKey: failure-domain.beta.kubernetes.io/zone
+          {{- end }}
+        {{- end }}
+      {{- with .Values.agents.extraAffinity }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.agents.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        {{- if .Values.agents.stack.enabled }}
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
+          securityContext:
+            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
+          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
+          command: ["python3"]
+          args:
+            - "-m"
+            - "vllm.entrypoints.openai.api_server"
+            - "--port"
+            - "5000"
+            - "--host"
+            - "0.0.0.0"
+            - "--download-dir"
+            - "/workspace/.cache/huggingface/hub"
+{{- range $arg := .Values.vllm.containerArgs }}
+            - "{{ $arg }}"
+{{- end }}
+          ports:
+            - name: http
+              containerPort: 5000
+              protocol: TCP
+          {{- if .Values.vllm.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.vllm.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.vllm.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
+          env:
+            - name: NCCL_IGNORE_DISABLED_P2P
+              value: "1"
+          {{- range $key, $value := .Values.vllm.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+              mountPath: /dev/shm
+              subPath: shm
+          {{- end }}
+        - name: {{ include "h2ogpt.fullname" . }}-agents
+          securityContext:
+            {{- toYaml .Values.agents.securityContext | nindent 12 }}
+          image: "{{ .Values.agents.image.repository }}:{{ .Values.agents.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.agents.image.pullPolicy }}
+          command: ["/bin/bash", "-c"]
+          {{- if .Values.agents.stack.enabled }}
+          args:
+            - >
+              while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
+              http://localhost:5000/v1/models)" != "200" ]]; do
+                echo "Waiting for inference service to become ready... (2sec)"
+                sleep 2
+              done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if not .Values.agents.stack.enabled }}
+          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.modelLock) }}
+          args:
+            - >
+              until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
+                do
+                  echo "Waiting for inference service to become ready...";
+                  sleep 5;
+                done
+
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- if and .Values.agents.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }}
+          args:
+            - >
+              python3 /workspace/generate.py
+          {{- end }}
+          {{- end }}
+          ports:
+            - name: agent
+              containerPort: 5004
+              protocol: TCP
+          {{- if .Values.agents.livenessProbe }}
+          livenessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.agents.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.agents.readinessProbe }}
+          readinessProbe:
+            httpGet:
+              path:  /
+              scheme: HTTP
+              port: http
+            {{- toYaml .Values.agents.readinessProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.agents.resources | nindent 12 }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "h2ogpt.fullname" . }}-agents-config
+          env:
+          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
+          {{- end }}
+          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
+          {{- end }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }}
+            - name: h2ogpt_inference_server
+              value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
+          {{- end }}
+          {{- if and .Values.agents.stack.enabled (not .Values.global.externalLLM.enabled)  }}
+            - name: h2ogpt_inference_server
+              value: "vllm:localhost:5000"
+          {{- end }}
+          {{- range $key, $value := .Values.agents.env }}
+            - name: "{{ $key }}"
+              value: "{{ $value }}"
+          {{- end }}
+          {{- if and .Values.global.externalLLM.openAIAzure.enabled .Values.global.externalLLM.enabled }}
+            - name: OPENAI_AZURE_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.global.externalLLM.secret }}
+                  key: OPENAI_AZURE_KEY
+            - name: OPENAI_AZURE_API_BASE
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.global.externalLLM.secret }}
+                  key: OPENAI_AZURE_API_BASE
+          {{- end }}
+          {{- if and .Values.global.externalLLM.openAI.enabled .Values.global.externalLLM.enabled }}
+            - name: OPENAI_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.global.externalLLM.secret }}
+                  key: OPENAI_API_KEY
+          {{- end }}
+          {{- if and .Values.global.externalLLM.replicate.enabled .Values.global.externalLLM.enabled }}
+            - name: REPLICATE_API_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.global.externalLLM.secret }}
+                  key: REPLICATE_API_TOKEN
+          {{- end }}
+          {{- if .Values.global.externalLLM.enabled }}
+            - name: H2OGPT_MODEL_LOCK
+              value: {{ toJson .Values.global.externalLLM.modelLock | quote }}
+            - name: H2OGPT_SCORE_MODEL
+              value: None
+          {{- end }}
+          {{- if .Values.global.visionModels.enabled }}
+            - name: H2OGPT_VISIBLE_VISION_MODELS
+              value: {{ .Values.global.visionModels.visibleModels | quote }}
+            - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
+              value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }}
+          {{- end }}
+          volumeMounts:
+            - name: {{ include "h2ogpt.fullname" . }}-agents-volume
+              mountPath: /workspace/.cache
+              subPath: cache
+            - name: {{ include "h2ogpt.fullname" . }}-volume
+              mountPath: /workspace/save
+              subPath: save
+            {{- if .Values.caCertificates }}
+            - name: ca-certificates
+              mountPath: /etc/ssl/certs/root-ca-bundle.crt
+              subPath: root-ca-bundle.crt
+            {{- end }}
+            {{ with .Values.agents.extraVolumeMounts }}
+            {{- toYaml . | nindent 12 }}
+            {{- end }}
+      volumes:
+        - name: {{ include "h2ogpt.fullname" . }}-agents-volume
+          {{- if not .Values.agents.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName:  {{ include "h2ogpt.fullname" . }}-volume
+          {{- else}}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.agents.storage.size | quote }}
+                storageClassName: {{ .Values.agents.storage.class }}
+          {{- end }}
+        {{- if .Values.agents.stack.enabled }}
+        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- if not .Values.vllm.storage.useEphemeral }}
+          persistentVolumeClaim:
+            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+          {{- else }}
+          ephemeral:
+            volumeClaimTemplate:
+              spec:
+                accessModes:
+                  - ReadWriteOnce
+                resources:
+                  requests:
+                    storage: {{ .Values.vllm.storage.size | quote }}
+                storageClassName: {{ .Values.vllm.storage.class }}
+          {{- end }}
+        {{- end }}
+        {{- if .Values.caCertificates }}
+        - name: ca-certificates
+          configMap:
+            name: {{ include "h2ogpt.fullname" . }}-ca-certificates
+        {{- end }}
+        {{- with .Values.agents.extraVolumes }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+{{- end }}
+---
+{{- if and (.Values.agents.enabled) (not .Values.agents.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agents-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.agents.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.agents.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-service.yaml b/helm/h2ogpt-chart/templates/agents-service.yaml
new file mode 100644
index 000000000..d39cad58e
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-service.yaml
@@ -0,0 +1,21 @@
+{{- if .Values.agents.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agents
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+
+  {{- with .Values.agents.service.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  selector:
+    app: {{ include "h2ogpt.fullname" . }}-agents
+  ports:
+    - name: agent
+      protocol: TCP
+      port: {{ .Values.agents.service.agentsPort }}
+      targetPort: 5004
+  type: {{ .Values.agents.service.type }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
index 03cb57751..902705552 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
@@ -7,6 +7,9 @@ metadata:
   labels:
     {{- include "h2ogpt.labels" . | nindent 4 }}
 data:
+{{- range $key, $value := ( include "h2ogpt.overrideConfig" . | fromYaml ) }}
+  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- end }}
 {{- range $key, $value := .Values.h2ogpt.overrideConfig }}
   {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 1ba47e84a..c320d5488 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -149,7 +149,7 @@ spec:
               python3 /workspace/generate.py
           {{- end }}
           {{- if not .Values.h2ogpt.stack.enabled }}
-          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }}
           args:
             - >
               until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
@@ -160,7 +160,7 @@ spec:
 
               python3 /workspace/generate.py
           {{- end }}
-          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }}
           args:
             - >
               until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
@@ -171,7 +171,7 @@ spec:
 
               python3 /workspace/generate.py
           {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.modelLock) }}
           args:
             - >
               until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
@@ -226,19 +226,19 @@ spec:
             - configMapRef:
                 name: {{ include "h2ogpt.fullname" . }}-config
           env:
-          {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
             - name: h2ogpt_inference_server
               value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
           {{- end }}
-          {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
             - name: h2ogpt_inference_server
               value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
           {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
             - name: h2ogpt_inference_server
               value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
           {{- end }}
-          {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled)  }}
+          {{- if and .Values.h2ogpt.stack.enabled (not .Values.global.externalLLM.enabled)  }}
             - name: h2ogpt_inference_server
               value: "vllm:localhost:5000"
           {{- end }}
@@ -246,43 +246,43 @@ spec:
             - name: "{{ $key }}"
               value: "{{ $value }}"
           {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }}
+          {{- if and .Values.global.externalLLM.openAIAzure.enabled .Values.global.externalLLM.enabled }}
             - name: OPENAI_AZURE_KEY
               valueFrom:
                 secretKeyRef:
-                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  name: {{ .Values.global.externalLLM.secret }}
                   key: OPENAI_AZURE_KEY
             - name: OPENAI_AZURE_API_BASE
               valueFrom:
                 secretKeyRef:
-                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  name: {{ .Values.global.externalLLM.secret }}
                   key: OPENAI_AZURE_API_BASE
           {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }}
+          {{- if and .Values.global.externalLLM.openAI.enabled .Values.global.externalLLM.enabled }}
             - name: OPENAI_API_KEY
               valueFrom:
                 secretKeyRef:
-                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  name: {{ .Values.global.externalLLM.secret }}
                   key: OPENAI_API_KEY
           {{- end }}
-          {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }}
+          {{- if and .Values.global.externalLLM.replicate.enabled .Values.global.externalLLM.enabled }}
             - name: REPLICATE_API_TOKEN
               valueFrom:
                 secretKeyRef:
-                  name: {{ .Values.h2ogpt.externalLLM.secret }}
+                  name: {{ .Values.global.externalLLM.secret }}
                   key: REPLICATE_API_TOKEN
           {{- end }}
-          {{- if .Values.h2ogpt.externalLLM.enabled }}
+          {{- if .Values.global.externalLLM.enabled }}
             - name: H2OGPT_MODEL_LOCK
-              value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }}
+              value: {{ toJson .Values.global.externalLLM.modelLock | quote }}
             - name: H2OGPT_SCORE_MODEL
               value: None
           {{- end }}
-          {{- if .Values.h2ogpt.visionModels.enabled }}
+          {{- if .Values.global.visionModels.enabled }}
             - name: H2OGPT_VISIBLE_VISION_MODELS
-              value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }}
+              value: {{ .Values.global.visionModels.visibleModels | quote }}
             - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE
-              value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }}
+              value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }}
           {{- end }}
           volumeMounts:
             - name: {{ include "h2ogpt.fullname" . }}-volume
diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml
index 6e9936d83..c0c45e0a5 100644
--- a/helm/h2ogpt-chart/templates/validations.yaml
+++ b/helm/h2ogpt-chart/templates/validations.yaml
@@ -11,4 +11,7 @@
   {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
     {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
   {{- end }}
+{{- end }}
+{{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }}
+  {{- fail "Agents in both h2oGPT and agents cannot be enabled." }}
 {{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 226038aa2..8e06f6240 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -2,23 +2,50 @@ nameOverride: ""
 fullnameOverride: ""
 namespaceOverride: ""
 
+global:
+  externalLLM:
+    enabled: false
+    secret:
+
+    modelLock:
+
+    openAIAzure:
+      enabled: false
+
+    openAI:
+      enabled: False
+
+    replicate:
+      enabled: false
+
+  visionModels:
+    # -- Enable vision models
+    enabled: false
+    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
+    visibleModels: [ ]
+    rotateAlignResizeImage: false
+
 h2ogpt:
   # -- Enable h2oGPT
   enabled: true
   stack:
     # -- Run h2oGPT and vLLM on same pod.
-    enabled: false 
+    enabled: false
+  # -- Enable agents
+  agents:
+    # -- Run agents with h2oGPT container
+    enabled: true
+    agent_workers: 5
   replicaCount: 1
-  imagePullSecrets: 
+  imagePullSecrets:
   image:
     repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
-    tag: 
+    tag:
     pullPolicy: IfNotPresent
   initImage:
     repository:
     tag:
     pullPolicy:
-
   # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs
   extraVolumes: []
   # -- Extra volume mounts
@@ -27,33 +54,10 @@ h2ogpt:
   podAffinity:
   # hostname:
   # zone:
-
   storage:
     size: 128Gi
-    class: 
+    class:
     useEphemeral: true
-  
-  externalLLM:
-    enabled: false
-    secret:
-
-    modelLock:
-
-    openAIAzure:
-      enabled: false
-
-    openAI:
-      enabled: False
-
-    replicate: 
-      enabled: false
-  
-  visionModels:
-    # -- Enable vision models
-    enabled: false
-    # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5']
-    visibleModels: []
-    rotateAlignResizeImage: false
 
 # -- Example configs to use when not using Model Lock and External LLM
   # overrideConfig:
@@ -86,6 +90,7 @@ h2ogpt:
     embedding_gpu_id: "cpu"
     hf_embedding_model: "fake"
     openai_server: True
+    openai_workers: 5
     share: False
     enforce_h2ogpt_api_key: True
     enforce_h2ogpt_ui_key: False
@@ -113,9 +118,105 @@ h2ogpt:
 
   podSecurityContext:
     runAsNonRoot: true
-    runAsUser: 
-    runAsGroup: 
-    fsGroup: 
+    runAsUser:
+    runAsGroup:
+    fsGroup:
+
+  securityContext:
+    runAsNonRoot: true
+    allowPrivilegeEscalation: false
+    capabilities:
+      drop:
+        - ALL
+    seccompProfile:
+      type: RuntimeDefault
+
+  resources:
+  nodeSelector:
+  tolerations:
+
+  env: {}
+
+  podAnnotations: {}
+  podLabels: {}
+  autoscaling: {}
+
+agents:
+  # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true`
+  enabled: false
+  stack:
+    # -- Run agents and vLLM on same pod.
+    enabled: false
+  replicaCount: 1
+  imagePullSecrets:
+  image:
+    repository: gcr.io/vorvan/h2oai/h2ogpt-runtime
+    tag:
+    pullPolicy: IfNotPresent
+  initImage:
+    repository:
+    tag:
+    pullPolicy:
+  # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs
+  extraVolumes: []
+  # -- Extra volume mounts
+  extraVolumeMounts: []
+  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
+  podAffinity:
+  # hostname:
+  # zone:
+
+  storage:
+    size: 128Gi
+    class:
+    useEphemeral: true
+
+  overrideConfig:
+    agent_workers: 5
+    visible_login_tab: False
+    visible_system_tab: False
+    visible_models_tab: False
+    visible_hosts_tab: False
+    # change below to valid vision model or remove this entry
+    #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']"
+    rotate_align_resize_image: False
+    concurrency_count: 100
+    top_k_docs_max_show: 100
+    num_async: 10
+    # change below to valid directory or remove this entry
+    #save_dir: "/docker_logs"
+    score_model: "None"
+    enable_tts: False
+    enable_stt: False
+    enable_transcriptions: False
+    embedding_gpu_id: "cpu"
+    hf_embedding_model: "fake"
+    share: False
+    enforce_h2ogpt_api_key: True
+    enforce_h2ogpt_ui_key: False
+    # change to something secure for ui access to backend
+    #h2ogpt_api_keys: "['api_key_change_me']"
+    metadata_in_context: ""
+    # change or remove if using model hub
+    #use_auth_token: "hf_xxxxx"
+    # change below to first visible model or remove this entry
+    #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']"
+    # change so ui or api cannot access without this password
+    #admin_pass: "admin_password_change_me"
+
+  service:
+    type: NodePort
+    agentsPort: 5004
+    annotations: {}
+
+  updateStrategy:
+    type: RollingUpdate
+
+  podSecurityContext:
+    runAsNonRoot: true
+    runAsUser:
+    runAsGroup:
+    fsGroup:
 
   securityContext:
     runAsNonRoot: true

From 0c434c3773126869c518d01742b3cd47481e5500 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Fri, 25 Oct 2024 18:12:22 +0530
Subject: [PATCH 12/34] Add missing EOF

---
 helm/h2ogpt-chart/templates/_helpers.tpl         |  2 +-
 .../templates/agents-deployment.yaml             | 15 ---------------
 .../templates/ca-certs-configmap.yaml            |  2 +-
 helm/h2ogpt-chart/templates/h2ogpt-service.yaml  | 16 ----------------
 .../templates/lmdeploy-configmap.yaml            |  2 +-
 .../templates/lmdeploy-deployment.yaml           |  2 +-
 helm/h2ogpt-chart/templates/tgi-configmap.yaml   |  2 +-
 helm/h2ogpt-chart/templates/tgi-deployment.yaml  |  2 +-
 helm/h2ogpt-chart/templates/tgi-service.yaml     |  2 +-
 helm/h2ogpt-chart/templates/validations.yaml     |  2 +-
 helm/h2ogpt-chart/templates/vllm-deployment.yaml |  2 +-
 helm/h2ogpt-chart/templates/vllm-service.yaml    |  2 +-
 12 files changed, 10 insertions(+), 41 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl
index 9eb75f3c9..9688e8e4f 100644
--- a/helm/h2ogpt-chart/templates/_helpers.tpl
+++ b/helm/h2ogpt-chart/templates/_helpers.tpl
@@ -92,4 +92,4 @@ agent_workers: {{ .Values.h2ogpt.agents.agent_workers}}
 agents_server: False
 {{- end }}
 
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml
index 607f92573..365df9ddd 100644
--- a/helm/h2ogpt-chart/templates/agents-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml
@@ -329,18 +329,3 @@ spec:
         {{- toYaml . | nindent 8 }}
         {{- end }}
 {{- end }}
----
-{{- if and (.Values.agents.enabled) (not .Values.agents.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-agents-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  storageClassName: {{ .Values.agents.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.agents.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
index a2580b771..84d2f4199 100644
--- a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
+++ b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml
@@ -9,4 +9,4 @@ metadata:
 data:
   root-ca-bundle.crt:  |
     {{ .Values.caCertificates | nindent 4 | trim }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
index 16417b7ff..0d9bbcc1d 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
@@ -31,19 +31,3 @@ spec:
       targetPort: 5004
   type: {{ .Values.h2ogpt.service.type }}
 {{- end }}
----
-{{- if .Values.h2ogpt.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}
-  ports:
-    - protocol: TCP
-      port: {{ .Values.h2ogpt.service.gptPort }}
-      targetPort: 8888
-  type: {{ .Values.h2ogpt.service.type }}
-{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
index 7d041e79f..c1dd07713 100644
--- a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
+++ b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
@@ -10,4 +10,4 @@ data:
 {{- range $key, $value := .Values.lmdeploy.overrideConfig }}
   {{ printf "%s" $key | upper }}: {{ $value | quote }}
 {{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
index bb3240924..eed4624b4 100644
--- a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
@@ -160,4 +160,4 @@ spec:
   resources:
     requests:
       storage: {{ .Values.lmdeploy.storage.size | quote }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yaml b/helm/h2ogpt-chart/templates/tgi-configmap.yaml
index 3857b92c8..ec5c17866 100644
--- a/helm/h2ogpt-chart/templates/tgi-configmap.yaml
+++ b/helm/h2ogpt-chart/templates/tgi-configmap.yaml
@@ -10,4 +10,4 @@ data:
 {{- range $key, $value := .Values.tgi.overrideConfig }}
   {{ printf "%s" $key | upper }}: {{ $value | quote }}
 {{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
index 400ac6eb6..75f486b70 100644
--- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
@@ -172,4 +172,4 @@ spec:
   resources:
     requests:
       storage: {{ .Values.tgi.storage.size | quote }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml
index 63b04b36d..bbaa23439 100644
--- a/helm/h2ogpt-chart/templates/tgi-service.yaml
+++ b/helm/h2ogpt-chart/templates/tgi-service.yaml
@@ -12,4 +12,4 @@ spec:
       port: {{ .Values.tgi.service.port }}
       targetPort: 80
   type: {{ .Values.tgi.service.type }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml
index c0c45e0a5..11adaa3f8 100644
--- a/helm/h2ogpt-chart/templates/validations.yaml
+++ b/helm/h2ogpt-chart/templates/validations.yaml
@@ -14,4 +14,4 @@
 {{- end }}
 {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }}
   {{- fail "Agents in both h2oGPT and agents cannot be enabled." }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
index e0228500b..9cf866d40 100644
--- a/helm/h2ogpt-chart/templates/vllm-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
@@ -164,4 +164,4 @@ spec:
   resources:
     requests:
       storage: {{ .Values.vllm.storage.size | quote }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml
index 34678c2c5..d30c729e4 100644
--- a/helm/h2ogpt-chart/templates/vllm-service.yaml
+++ b/helm/h2ogpt-chart/templates/vllm-service.yaml
@@ -12,4 +12,4 @@ spec:
       port: {{ .Values.vllm.service.port }}
       targetPort: 5000
   type: {{ .Values.vllm.service.type }}
-{{- end }}
\ No newline at end of file
+{{- end }}

From 8c552e5e2d1f1d386f84659b368d88a450d7bc15 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Fri, 25 Oct 2024 18:12:42 +0530
Subject: [PATCH 13/34] Add agents-pvc.yaml

---
 helm/h2ogpt-chart/templates/agents-pvc.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 helm/h2ogpt-chart/templates/agents-pvc.yaml

diff --git a/helm/h2ogpt-chart/templates/agents-pvc.yaml b/helm/h2ogpt-chart/templates/agents-pvc.yaml
new file mode 100644
index 000000000..2165fab9d
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-pvc.yaml
@@ -0,0 +1,14 @@
+{{- if and (.Values.agents.enabled) (not .Values.agents.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-agents-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.agents.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.agents.storage.size | quote }}
+{{- end }}

From cc15e61718d5c22329c01c66f4deb6d34739f37d Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Fri, 25 Oct 2024 18:14:56 +0530
Subject: [PATCH 14/34] Add h2ogpt-pvc.yaml

---
 .../h2ogpt-chart/templates/h2ogpt-deployment.yaml | 15 ---------------
 helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml       | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 15 deletions(-)
 create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml

diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index c320d5488..92f25aae4 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -341,18 +341,3 @@ spec:
         {{- toYaml . | nindent 8 }}
         {{- end }}
 {{- end }}
----
-{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  storageClassName: {{ .Values.h2ogpt.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.h2ogpt.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml
new file mode 100644
index 000000000..0f5ac0956
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml
@@ -0,0 +1,14 @@
+{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.h2ogpt.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.h2ogpt.storage.size | quote }}
+{{- end }}
\ No newline at end of file

From cd6c1738951f6a63af03ad803792ab95c52b1143 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Fri, 25 Oct 2024 18:25:26 +0530
Subject: [PATCH 15/34] Separate PVC into files and remove autoscaling

---
 .../templates/agents-deployment.yaml           |  2 --
 .../templates/h2ogpt-deployment.yaml           |  2 --
 helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml    |  2 +-
 .../templates/lmdeploy-deployment.yaml         | 18 ------------------
 helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml  | 15 +++++++++++++++
 .../h2ogpt-chart/templates/tgi-deployment.yaml | 17 -----------------
 helm/h2ogpt-chart/templates/tgi-pvc.yaml       | 14 ++++++++++++++
 .../templates/vllm-deployment.yaml             | 18 ------------------
 helm/h2ogpt-chart/templates/vllm-pvc.yaml      | 16 ++++++++++++++++
 helm/h2ogpt-chart/values.yaml                  |  6 +-----
 10 files changed, 47 insertions(+), 63 deletions(-)
 create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml
 create mode 100644 helm/h2ogpt-chart/templates/tgi-pvc.yaml
 create mode 100644 helm/h2ogpt-chart/templates/vllm-pvc.yaml

diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml
index 365df9ddd..0eb0e6b57 100644
--- a/helm/h2ogpt-chart/templates/agents-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml
@@ -7,9 +7,7 @@ metadata:
   labels:
     app: {{ include "h2ogpt.fullname" . }}-agents
 spec:
-  {{- if not .Values.agents.autoscaling.enabled }}
   replicas: {{ .Values.agents.replicaCount }}
-  {{- end }}
   selector:
     matchLabels:
       app: {{ include "h2ogpt.fullname" . }}-agents
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 92f25aae4..2669c53a9 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -7,9 +7,7 @@ metadata:
   labels:
     app: {{ include "h2ogpt.fullname" . }}
 spec:
-  {{- if not .Values.h2ogpt.autoscaling.enabled }}
   replicas: {{ .Values.h2ogpt.replicaCount }}
-  {{- end }}
   selector:
     matchLabels:
       app: {{ include "h2ogpt.fullname" . }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml
index 0f5ac0956..bd6e7141f 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml
@@ -11,4 +11,4 @@ spec:
   resources:
     requests:
       storage: {{ .Values.h2ogpt.storage.size | quote }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
index eed4624b4..0cf8b03b3 100644
--- a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
@@ -7,9 +7,7 @@ metadata:
   labels:
     app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
 spec:
-  {{- if not .Values.lmdeploy.autoscaling.enabled }}
   replicas: {{ .Values.lmdeploy.replicaCount }}
-  {{- end }}
   selector:
     matchLabels:
       app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
@@ -145,19 +143,3 @@ spec:
             sizeLimit: 10.24Gi
           name: shm
 {{- end }}
----
-{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.lmdeploy.storage.class | quote }}
-  storageClassName: {{ .Values.lmdeploy.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.lmdeploy.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml b/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml
new file mode 100644
index 000000000..164ec6f1d
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml
@@ -0,0 +1,15 @@
+{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: {{ .Values.lmdeploy.storage.class | quote }}
+  storageClassName: {{ .Values.lmdeploy.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.lmdeploy.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
index 75f486b70..c85b5f4c8 100644
--- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
@@ -7,9 +7,7 @@ metadata:
   labels:
     app: {{ include "h2ogpt.fullname" . }}-tgi-inference
 spec:
-  {{- if not .Values.tgi.autoscaling.enabled }}
   replicas: {{ .Values.tgi.replicaCount }}
-  {{- end }}
   selector:
     matchLabels:
       app: {{ include "h2ogpt.fullname" . }}-tgi-inference
@@ -158,18 +156,3 @@ spec:
                 storageClassName: {{ .Values.tgi.storage.class }}
           {{- end }}
 {{- end }}
----
-{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  storageClassName: {{ .Values.tgi.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.tgi.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-pvc.yaml b/helm/h2ogpt-chart/templates/tgi-pvc.yaml
new file mode 100644
index 000000000..0a34be2fd
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/tgi-pvc.yaml
@@ -0,0 +1,14 @@
+{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: {{ .Values.tgi.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.tgi.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
index 9cf866d40..89390876d 100644
--- a/helm/h2ogpt-chart/templates/vllm-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
@@ -7,9 +7,7 @@ metadata:
   labels:
     app: {{ include "h2ogpt.fullname" . }}-vllm-inference
 spec:
-  {{- if not .Values.vllm.autoscaling.enabled }}
   replicas: {{ .Values.vllm.replicaCount }}
-  {{- end }}
   selector:
     matchLabels:
       app: {{ include "h2ogpt.fullname" . }}-vllm-inference
@@ -149,19 +147,3 @@ spec:
             sizeLimit: 10.24Gi
           name: shm
 {{- end }}
----
-{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.vllm.storage.class | quote }}
-  storageClassName: {{ .Values.vllm.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.vllm.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-pvc.yaml b/helm/h2ogpt-chart/templates/vllm-pvc.yaml
new file mode 100644
index 000000000..fe26f08ea
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/vllm-pvc.yaml
@@ -0,0 +1,16 @@
+---
+{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  # storageClassName: {{ .Values.vllm.storage.class | quote }}
+  storageClassName: {{ .Values.vllm.storage.class }}
+  resources:
+    requests:
+      storage: {{ .Values.vllm.storage.size | quote }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 8e06f6240..f88324a55 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -139,7 +139,6 @@ h2ogpt:
 
   podAnnotations: {}
   podLabels: {}
-  autoscaling: {}
 
 agents:
   # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true`
@@ -235,7 +234,6 @@ agents:
 
   podAnnotations: {}
   podLabels: {}
-  autoscaling: {}
 
 tgi:
   # -- Enable tgi
@@ -278,7 +276,6 @@ tgi:
 
   podAnnotations: {}
   podLabels: {}
-  autoscaling: {}
 
 vllm:
   # -- Enable vllm
@@ -348,7 +345,7 @@ vllm:
 
   podAnnotations: {}
   podLabels: {}
-  autoscaling: {}
+
 
 lmdeploy:
   # -- Enable lmdeploy
@@ -392,7 +389,6 @@ lmdeploy:
 
   podAnnotations: {}
   podLabels: {}
-  autoscaling: {}
 
 # -- CA certs
 caCertificates: ""

From 1661737788dca3b12b6e3cf9f5d16d25914b2a7a Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Fri, 25 Oct 2024 18:56:27 +0530
Subject: [PATCH 16/34] Remove stack, vllm, tgi, and lmdeploy related checks
 for agents

---
 .../templates/agents-deployment.yaml          | 140 ------------------
 helm/h2ogpt-chart/values.yaml                 |   3 -
 2 files changed, 143 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml
index 0eb0e6b57..3015395ce 100644
--- a/helm/h2ogpt-chart/templates/agents-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml
@@ -70,122 +70,15 @@ spec:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       containers:
-        {{- if .Values.agents.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-          securityContext:
-            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
-          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
-          command: ["python3"]
-          args:
-            - "-m"
-            - "vllm.entrypoints.openai.api_server"
-            - "--port"
-            - "5000"
-            - "--host"
-            - "0.0.0.0"
-            - "--download-dir"
-            - "/workspace/.cache/huggingface/hub"
-{{- range $arg := .Values.vllm.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 5000
-              protocol: TCP
-          {{- if .Values.vllm.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.vllm.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.vllm.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-          env:
-            - name: NCCL_IGNORE_DISABLED_P2P
-              value: "1"
-          {{- range $key, $value := .Values.vllm.env }}
-            - name: "{{ $key }}"
-              value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-          {{- end }}
         - name: {{ include "h2ogpt.fullname" . }}-agents
           securityContext:
             {{- toYaml .Values.agents.securityContext | nindent 12 }}
           image: "{{ .Values.agents.image.repository }}:{{ .Values.agents.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.agents.image.pullPolicy }}
           command: ["/bin/bash", "-c"]
-          {{- if .Values.agents.stack.enabled }}
-          args:
-            - >
-              while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
-              http://localhost:5000/v1/models)" != "200" ]]; do
-                echo "Waiting for inference service to become ready... (2sec)"
-                sleep 2
-              done
-
-              python3 /workspace/generate.py
-          {{- end }}
-          {{- if not .Values.agents.stack.enabled }}
-          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }}
-          args:
-            - >
-              until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1;
-                do
-                  echo "Waiting for inference service to become ready...";
-                  sleep 5;
-                done
-
-              python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }}
           args:
             - >
-              until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
-                do
-                  echo "Waiting for inference service to become ready...";
-                  sleep 5;
-                done
-
               python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.modelLock) }}
-          args:
-            - >
-              until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
-                do
-                  echo "Waiting for inference service to become ready...";
-                  sleep 5;
-                done
-
-              python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.agents.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }}
-          args:
-            - >
-              python3 /workspace/generate.py
-          {{- end }}
-          {{- end }}
           ports:
             - name: agent
               containerPort: 5004
@@ -212,22 +105,6 @@ spec:
             - configMapRef:
                 name: {{ include "h2ogpt.fullname" . }}-agents-config
           env:
-          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }}
-            - name: h2ogpt_inference_server
-              value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
-          {{- end }}
-          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }}
-            - name: h2ogpt_inference_server
-              value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
-          {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }}
-            - name: h2ogpt_inference_server
-              value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
-          {{- end }}
-          {{- if and .Values.agents.stack.enabled (not .Values.global.externalLLM.enabled)  }}
-            - name: h2ogpt_inference_server
-              value: "vllm:localhost:5000"
-          {{- end }}
           {{- range $key, $value := .Values.agents.env }}
             - name: "{{ $key }}"
               value: "{{ $value }}"
@@ -301,23 +178,6 @@ spec:
                     storage: {{ .Values.agents.storage.size | quote }}
                 storageClassName: {{ .Values.agents.storage.class }}
           {{- end }}
-        {{- if .Values.agents.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
         {{- if .Values.caCertificates }}
         - name: ca-certificates
           configMap:
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index f88324a55..70b9089d5 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -143,9 +143,6 @@ h2ogpt:
 agents:
   # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true`
   enabled: false
-  stack:
-    # -- Run agents and vLLM on same pod.
-    enabled: false
   replicaCount: 1
   imagePullSecrets:
   image:

From 66909426de63821cc0e2abd9261c28432b5a7198 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Fri, 25 Oct 2024 19:57:40 +0530
Subject: [PATCH 17/34] Update secret configs

---
 .../templates/agents-deployment.yaml          | 30 +++----------------
 .../global-external-llm-secrets.yaml          | 13 ++++++++
 .../templates/h2ogpt-deployment.yaml          | 30 +++----------------
 helm/h2ogpt-chart/values.yaml                 |  5 ++++
 4 files changed, 26 insertions(+), 52 deletions(-)
 create mode 100644 helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml

diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml
index 3015395ce..c9a0eea68 100644
--- a/helm/h2ogpt-chart/templates/agents-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml
@@ -104,37 +104,15 @@ spec:
           envFrom:
             - configMapRef:
                 name: {{ include "h2ogpt.fullname" . }}-agents-config
+          {{- if .Values.global.externalLLM.enabled }}
+            - secretRef:
+                name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
+          {{- end }}
           env:
           {{- range $key, $value := .Values.agents.env }}
             - name: "{{ $key }}"
               value: "{{ $value }}"
           {{- end }}
-          {{- if and .Values.global.externalLLM.openAIAzure.enabled .Values.global.externalLLM.enabled }}
-            - name: OPENAI_AZURE_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.global.externalLLM.secret }}
-                  key: OPENAI_AZURE_KEY
-            - name: OPENAI_AZURE_API_BASE
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.global.externalLLM.secret }}
-                  key: OPENAI_AZURE_API_BASE
-          {{- end }}
-          {{- if and .Values.global.externalLLM.openAI.enabled .Values.global.externalLLM.enabled }}
-            - name: OPENAI_API_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.global.externalLLM.secret }}
-                  key: OPENAI_API_KEY
-          {{- end }}
-          {{- if and .Values.global.externalLLM.replicate.enabled .Values.global.externalLLM.enabled }}
-            - name: REPLICATE_API_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.global.externalLLM.secret }}
-                  key: REPLICATE_API_TOKEN
-          {{- end }}
           {{- if .Values.global.externalLLM.enabled }}
             - name: H2OGPT_MODEL_LOCK
               value: {{ toJson .Values.global.externalLLM.modelLock | quote }}
diff --git a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
new file mode 100644
index 000000000..5ff95d9cd
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
@@ -0,0 +1,13 @@
+{{- if and .Values.global.externalLLM.enabled (or .Values.agents.enabled .Values.h2ogpt.enabled) }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+data:
+{{- range $key, $value := .Values.global.externalLLM.secret }}
+  {{ $key }}: {{ $value | quote }}
+{{- end }}
+{{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 2669c53a9..42f393a73 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -223,6 +223,10 @@ spec:
           envFrom:
             - configMapRef:
                 name: {{ include "h2ogpt.fullname" . }}-config
+          {{- if .Values.global.externalLLM.enabled }}
+            - secretRef:
+                name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
+          {{- end }}
           env:
           {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
             - name: h2ogpt_inference_server
@@ -244,32 +248,6 @@ spec:
             - name: "{{ $key }}"
               value: "{{ $value }}"
           {{- end }}
-          {{- if and .Values.global.externalLLM.openAIAzure.enabled .Values.global.externalLLM.enabled }}
-            - name: OPENAI_AZURE_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.global.externalLLM.secret }}
-                  key: OPENAI_AZURE_KEY
-            - name: OPENAI_AZURE_API_BASE
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.global.externalLLM.secret }}
-                  key: OPENAI_AZURE_API_BASE
-          {{- end }}
-          {{- if and .Values.global.externalLLM.openAI.enabled .Values.global.externalLLM.enabled }}
-            - name: OPENAI_API_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.global.externalLLM.secret }}
-                  key: OPENAI_API_KEY
-          {{- end }}
-          {{- if and .Values.global.externalLLM.replicate.enabled .Values.global.externalLLM.enabled }}
-            - name: REPLICATE_API_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.global.externalLLM.secret }}
-                  key: REPLICATE_API_TOKEN
-          {{- end }}
           {{- if .Values.global.externalLLM.enabled }}
             - name: H2OGPT_MODEL_LOCK
               value: {{ toJson .Values.global.externalLLM.modelLock | quote }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 70b9089d5..6f3cd3d4f 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -5,7 +5,12 @@ namespaceOverride: ""
 global:
   externalLLM:
     enabled: false
+    # -- list of secrets for h2ogpt and agents env
     secret:
+#      OPENAI_AZURE_KEY: "value"
+#      OPENAI_AZURE_API_BASE: "value"
+#      OPENAI_API_KEY: "value"
+#      REPLICATE_API_TOKEN: "value"
 
     modelLock:
 

From ba0434b49e1279fe1cbe5dcf9e2e80609476de0d Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Fri, 25 Oct 2024 20:10:06 +0530
Subject: [PATCH 18/34] Add conditions for agent and openAi ports

---
 helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml | 7 ++++---
 helm/h2ogpt-chart/templates/h2ogpt-service.yaml    | 4 ++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 42f393a73..b81753e15 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -190,18 +190,19 @@ spec:
             - name: http
               containerPort: 7860
               protocol: TCP
-            - name: gpt
-              containerPort: 8888
-              protocol: TCP
+          {{- if .Values.global.externalLLM.openAI.enabled }}
             - name: openai
               containerPort: 5000
               protocol: TCP
+          {{- end }}
             - name: function
               containerPort: 5002
               protocol: TCP
+          {{- if .Values.h2ogpt.agents.enabled }}
             - name: agent
               containerPort: 5004
               protocol: TCP
+          {{- end }}
           {{- if .Values.h2ogpt.livenessProbe }}
           livenessProbe:
             httpGet:
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
index 0d9bbcc1d..043feb527 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
@@ -17,17 +17,21 @@ spec:
       protocol: TCP
       port: {{ .Values.h2ogpt.service.webPort }}
       targetPort: 7860
+  {{- if .Values.global.externalLLM.openAI.enabled }}
     - name: openai
       protocol: TCP
       port: {{ .Values.h2ogpt.service.openaiPort }}
       targetPort: 5000
+  {{- end }}
     - name: function
       protocol: TCP
       port: {{ .Values.h2ogpt.service.functionPort }}
       targetPort: 5002
+  {{- if .Values.h2ogpt.agents.enabled }}
     - name: agent
       protocol: TCP
       port: {{ .Values.h2ogpt.service.agentsPort }}
       targetPort: 5004
+  {{- end }}
   type: {{ .Values.h2ogpt.service.type }}
 {{- end }}

From 86f060b297321982701839d3898cfee8f319d8f3 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Sat, 26 Oct 2024 01:07:46 +0530
Subject: [PATCH 19/34] Remove stack from h2ogpt

---
 .../templates/h2ogpt-deployment.yaml          | 98 +------------------
 helm/h2ogpt-chart/values.yaml                 |  5 +-
 2 files changed, 4 insertions(+), 99 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index b81753e15..e24805db4 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -70,83 +70,12 @@ spec:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       containers:
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference
-          securityContext:
-            {{- toYaml .Values.vllm.securityContext | nindent 12 }}
-          image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.vllm.image.pullPolicy }}
-          command: ["python3"]
-          args:
-            - "-m"
-            - "vllm.entrypoints.openai.api_server"
-            - "--port"
-            - "5000"
-            - "--host"
-            - "0.0.0.0"
-            - "--download-dir"
-            - "/workspace/.cache/huggingface/hub"
-{{- range $arg := .Values.vllm.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 5000
-              protocol: TCP
-          {{- if .Values.vllm.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.vllm.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.vllm.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.vllm.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config
-          env:
-            - name: NCCL_IGNORE_DISABLED_P2P
-              value: "1"
-          {{- range $key, $value := .Values.vllm.env }}
-            - name: "{{ $key }}"
-              value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-          {{- end }}
         - name: {{ include "h2ogpt.fullname" . }}
           securityContext:
             {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }}
           image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }}
           command: ["/bin/bash", "-c"]
-          {{- if .Values.h2ogpt.stack.enabled }}
-          args:
-            - >
-              while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}''
-              http://localhost:5000/v1/models)" != "200" ]]; do
-                echo "Waiting for inference service to become ready... (2sec)"
-                sleep 2
-              done
-
-              python3 /workspace/generate.py
-          {{- end }}
-          {{- if not .Values.h2ogpt.stack.enabled }}
           {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }}
           args:
             - >
@@ -185,7 +114,6 @@ spec:
             - >
               python3 /workspace/generate.py
           {{- end }}
-          {{- end }}
           ports:
             - name: http
               containerPort: 7860
@@ -229,21 +157,18 @@ spec:
                 name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
           {{- end }}
           env:
-          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) }}
             - name: h2ogpt_inference_server
               value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
           {{- end }}
-          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+          {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) }}
             - name: h2ogpt_inference_server
               value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
           {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) }}
             - name: h2ogpt_inference_server
               value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
           {{- end }}
-          {{- if and .Values.h2ogpt.stack.enabled (not .Values.global.externalLLM.enabled)  }}
-            - name: h2ogpt_inference_server
-              value: "vllm:localhost:5000"
           {{- end }}
           {{- range $key, $value := .Values.h2ogpt.env }}
             - name: "{{ $key }}"
@@ -292,23 +217,6 @@ spec:
                     storage: {{ .Values.h2ogpt.storage.size | quote }}
                 storageClassName: {{ .Values.h2ogpt.storage.class }}
           {{- end }}
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
         {{- if .Values.caCertificates }}
         - name: ca-certificates
           configMap:
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 6f3cd3d4f..2f84b5adf 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -18,7 +18,7 @@ global:
       enabled: false
 
     openAI:
-      enabled: False
+      enabled: false
 
     replicate:
       enabled: false
@@ -33,9 +33,6 @@ global:
 h2ogpt:
   # -- Enable h2oGPT
   enabled: true
-  stack:
-    # -- Run h2oGPT and vLLM on same pod.
-    enabled: false
   # -- Enable agents
   agents:
     # -- Run agents with h2oGPT container

From a14ed1cf4869c091d6604652a7bbebc76e05c58e Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Sat, 26 Oct 2024 02:08:33 +0530
Subject: [PATCH 20/34] Add HPA for agents

---
 helm/h2ogpt-chart/templates/agents-hpa.yaml | 33 +++++++++++++++++++++
 helm/h2ogpt-chart/values.yaml               |  9 +++++-
 2 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 helm/h2ogpt-chart/templates/agents-hpa.yaml

diff --git a/helm/h2ogpt-chart/templates/agents-hpa.yaml b/helm/h2ogpt-chart/templates/agents-hpa.yaml
new file mode 100644
index 000000000..9872e8d43
--- /dev/null
+++ b/helm/h2ogpt-chart/templates/agents-hpa.yaml
@@ -0,0 +1,33 @@
+{{- if .Values.agents.autoscaling.enabled | default false }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ .Release.Name }}-agents
+  namespace: {{ include "h2ogpt.namespace" . | quote }}
+  labels:
+    {{- include "h2ogpt.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "h2ogpt.fullname" . }}-agents
+  minReplicas: {{ .Values.agents.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.agents.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.agents.autoscaling.targetCPU }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.agents.autoscaling.targetCPU }}
+    {{- end }}
+    {{- if .Values.agents.autoscaling.targetMemory }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.agents.autoscaling.targetMemory }}
+    {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 2f84b5adf..21d5bfe1c 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -36,7 +36,7 @@ h2ogpt:
   # -- Enable agents
   agents:
     # -- Run agents with h2oGPT container
-    enabled: true
+    enabled: false
     agent_workers: 5
   replicaCount: 1
   imagePullSecrets:
@@ -145,6 +145,13 @@ h2ogpt:
 agents:
   # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true`
   enabled: false
+  autoscaling:
+    # Enable autoscaling for agents
+    enabled: false
+    minReplicas: 1
+    maxReplicas: 2
+    targetMemory: 32Gi
+    targetCPU: 80
   replicaCount: 1
   imagePullSecrets:
   image:

From db1cfcecffcf2b2b7f615c77ba0c10068d3e6b48 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Sat, 26 Oct 2024 02:08:54 +0530
Subject: [PATCH 21/34] Fix stack removal

---
 .../templates/h2ogpt-deployment.yaml          |  1 -
 .../templates/lmdeploy-deployment.yaml        |  2 +-
 .../templates/lmdeploy-service.yaml           |  2 +-
 .../templates/tgi-deployment.yaml             | 19 +------------------
 helm/h2ogpt-chart/templates/tgi-service.yaml  |  2 +-
 helm/h2ogpt-chart/templates/validations.yaml  |  7 +------
 .../templates/vllm-deployment.yaml            |  2 +-
 helm/h2ogpt-chart/templates/vllm-service.yaml |  2 +-
 8 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index e24805db4..549da1d5d 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -169,7 +169,6 @@ spec:
             - name: h2ogpt_inference_server
               value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
           {{- end }}
-          {{- end }}
           {{- range $key, $value := .Values.h2ogpt.env }}
             - name: "{{ $key }}"
               value: "{{ $value }}"
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
index 0cf8b03b3..95a49320f 100644
--- a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
@@ -1,4 +1,4 @@
-{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}}
+{{- if and .Values.lmdeploy.enabled }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
index e1dfdc4d3..831189944 100644
--- a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
+++ b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
@@ -1,4 +1,4 @@
-{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+{{- if .Values.lmdeploy.enabled }}
 apiVersion: v1
 kind: Service
 metadata:
diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
index c85b5f4c8..721b2ed01 100644
--- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
@@ -1,4 +1,4 @@
-{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+{{- if .Values.tgi.enabled }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -123,23 +123,6 @@ spec:
               mountPath: /dev/shm
               subPath: shm
       volumes:
-        {{- if .Values.h2ogpt.stack.enabled }}
-        - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- if not .Values.vllm.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.vllm.storage.size | quote }}
-                storageClassName: {{ .Values.vllm.storage.class }}
-          {{- end }}
-        {{- end }}
         - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
         {{- if not .Values.tgi.storage.useEphemeral}}
           persistentVolumeClaim:
diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml
index bbaa23439..de42ad89a 100644
--- a/helm/h2ogpt-chart/templates/tgi-service.yaml
+++ b/helm/h2ogpt-chart/templates/tgi-service.yaml
@@ -1,4 +1,4 @@
-{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+{{- if .Values.tgi.enabled }}
 apiVersion: v1
 kind: Service
 metadata:
diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml
index 11adaa3f8..cd08023e8 100644
--- a/helm/h2ogpt-chart/templates/validations.yaml
+++ b/helm/h2ogpt-chart/templates/validations.yaml
@@ -7,11 +7,6 @@
 {{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
   {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
 {{- end }}
-{{- if .Values.h2ogpt.stack.enabled }}
-  {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }}
-    {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }}
-  {{- end }}
-{{- end }}
 {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }}
-  {{- fail "Agents in both h2oGPT and agents cannot be enabled." }}
+  {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
index 89390876d..755a87aac 100644
--- a/helm/h2ogpt-chart/templates/vllm-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml
@@ -1,4 +1,4 @@
-{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}}
+{{- if .Values.vllm.enabled }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml
index d30c729e4..980d998cd 100644
--- a/helm/h2ogpt-chart/templates/vllm-service.yaml
+++ b/helm/h2ogpt-chart/templates/vllm-service.yaml
@@ -1,4 +1,4 @@
-{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }}
+{{- if .Values.vllm.enabled }}
 apiVersion: v1
 kind: Service
 metadata:

From 95573cdb4efcd63fdb5c86b555986d49a068ca41 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Sat, 26 Oct 2024 02:15:33 +0530
Subject: [PATCH 22/34] Add default resource limits

---
 helm/h2ogpt-chart/values.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 21d5bfe1c..7e3fe7307 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -134,6 +134,10 @@ h2ogpt:
       type: RuntimeDefault
 
   resources:
+    requests:
+      memory: 32Gi
+    limits:
+      memory: 64Gi
   nodeSelector:
   tolerations:
 
@@ -233,6 +237,10 @@ agents:
       type: RuntimeDefault
 
   resources:
+    requests:
+      memory: 32Gi
+    limits:
+      memory: 64Gi
   nodeSelector:
   tolerations:
 

From 661cf3c69266ee5214ed729efccb3feeaf1215aa Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Sat, 26 Oct 2024 02:21:10 +0530
Subject: [PATCH 23/34] Add README.md generated from helm-docs

---
 helm/h2ogpt-chart/Chart.yaml |   2 +-
 helm/h2ogpt-chart/README.md  | 226 +++++++++++++++++++++++++++++++++++
 2 files changed, 227 insertions(+), 1 deletion(-)
 create mode 100644 helm/h2ogpt-chart/README.md

diff --git a/helm/h2ogpt-chart/Chart.yaml b/helm/h2ogpt-chart/Chart.yaml
index d90a7d69e..eeaf87fef 100644
--- a/helm/h2ogpt-chart/Chart.yaml
+++ b/helm/h2ogpt-chart/Chart.yaml
@@ -1,6 +1,6 @@
 apiVersion: v2
 name: h2ogpt
-description: A Helm chart for h2ogpt
+description: A Helm chart for h2oGPT
 
 # A chart can be either an 'application' or a 'library' chart.
 #
diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
new file mode 100644
index 000000000..a410aa342
--- /dev/null
+++ b/helm/h2ogpt-chart/README.md
@@ -0,0 +1,226 @@
+# h2ogpt
+
+![Version: 0.1.0-288](https://img.shields.io/badge/Version-0.1.0--288-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.0-288](https://img.shields.io/badge/AppVersion-0.1.0--288-informational?style=flat-square)
+
+A Helm chart for h2oGPT
+
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| agents.autoscaling.enabled | bool | `false` |  |
+| agents.autoscaling.maxReplicas | int | `2` |  |
+| agents.autoscaling.minReplicas | int | `1` |  |
+| agents.autoscaling.targetCPU | int | `80` |  |
+| agents.autoscaling.targetMemory | string | `"32Gi"` |  |
+| agents.enabled | bool | `false` | Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` |
+| agents.env | object | `{}` |  |
+| agents.extraVolumeMounts | list | `[]` | Extra volume mounts |
+| agents.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs |
+| agents.image.pullPolicy | string | `"IfNotPresent"` |  |
+| agents.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` |  |
+| agents.image.tag | string | `nil` |  |
+| agents.imagePullSecrets | string | `nil` |  |
+| agents.initImage.pullPolicy | string | `nil` |  |
+| agents.initImage.repository | string | `nil` |  |
+| agents.initImage.tag | string | `nil` |  |
+| agents.nodeSelector | string | `nil` |  |
+| agents.overrideConfig.agent_workers | int | `5` |  |
+| agents.overrideConfig.concurrency_count | int | `100` |  |
+| agents.overrideConfig.embedding_gpu_id | string | `"cpu"` |  |
+| agents.overrideConfig.enable_stt | bool | `false` |  |
+| agents.overrideConfig.enable_transcriptions | bool | `false` |  |
+| agents.overrideConfig.enable_tts | bool | `false` |  |
+| agents.overrideConfig.enforce_h2ogpt_api_key | bool | `true` |  |
+| agents.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` |  |
+| agents.overrideConfig.hf_embedding_model | string | `"fake"` |  |
+| agents.overrideConfig.metadata_in_context | string | `""` |  |
+| agents.overrideConfig.num_async | int | `10` |  |
+| agents.overrideConfig.rotate_align_resize_image | bool | `false` |  |
+| agents.overrideConfig.score_model | string | `"None"` |  |
+| agents.overrideConfig.share | bool | `false` |  |
+| agents.overrideConfig.top_k_docs_max_show | int | `100` |  |
+| agents.overrideConfig.visible_hosts_tab | bool | `false` |  |
+| agents.overrideConfig.visible_login_tab | bool | `false` |  |
+| agents.overrideConfig.visible_models_tab | bool | `false` |  |
+| agents.overrideConfig.visible_system_tab | bool | `false` |  |
+| agents.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| agents.podAnnotations | object | `{}` |  |
+| agents.podLabels | object | `{}` |  |
+| agents.podSecurityContext.fsGroup | string | `nil` |  |
+| agents.podSecurityContext.runAsGroup | string | `nil` |  |
+| agents.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| agents.podSecurityContext.runAsUser | string | `nil` |  |
+| agents.replicaCount | int | `1` |  |
+| agents.resources.limits.memory | string | `"64Gi"` |  |
+| agents.resources.requests.memory | string | `"32Gi"` |  |
+| agents.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| agents.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| agents.securityContext.runAsNonRoot | bool | `true` |  |
+| agents.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
+| agents.service.agentsPort | int | `5004` |  |
+| agents.service.annotations | object | `{}` |  |
+| agents.service.type | string | `"NodePort"` |  |
+| agents.storage.class | string | `nil` |  |
+| agents.storage.size | string | `"128Gi"` |  |
+| agents.storage.useEphemeral | bool | `true` |  |
+| agents.tolerations | string | `nil` |  |
+| agents.updateStrategy.type | string | `"RollingUpdate"` |  |
+| caCertificates | string | `""` | CA certs |
+| fullnameOverride | string | `""` |  |
+| global.externalLLM.enabled | bool | `false` |  |
+| global.externalLLM.modelLock | string | `nil` |  |
+| global.externalLLM.openAI.enabled | bool | `false` |  |
+| global.externalLLM.openAIAzure.enabled | bool | `false` |  |
+| global.externalLLM.replicate.enabled | bool | `false` |  |
+| global.externalLLM.secret | string | `nil` | list of secrets for h2ogpt and agents env |
+| global.visionModels.enabled | bool | `false` | Enable vision models |
+| global.visionModels.rotateAlignResizeImage | bool | `false` |  |
+| global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] |
+| h2ogpt.agents | object | `{"agent_workers":5,"enabled":false}` | Enable agents |
+| h2ogpt.agents.enabled | bool | `false` | Run agents with h2oGPT container |
+| h2ogpt.enabled | bool | `true` | Enable h2oGPT |
+| h2ogpt.env | object | `{}` |  |
+| h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts |
+| h2ogpt.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs |
+| h2ogpt.image.pullPolicy | string | `"IfNotPresent"` |  |
+| h2ogpt.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` |  |
+| h2ogpt.image.tag | string | `nil` |  |
+| h2ogpt.imagePullSecrets | string | `nil` |  |
+| h2ogpt.initImage.pullPolicy | string | `nil` |  |
+| h2ogpt.initImage.repository | string | `nil` |  |
+| h2ogpt.initImage.tag | string | `nil` |  |
+| h2ogpt.nodeSelector | string | `nil` |  |
+| h2ogpt.overrideConfig.concurrency_count | int | `100` |  |
+| h2ogpt.overrideConfig.embedding_gpu_id | string | `"cpu"` |  |
+| h2ogpt.overrideConfig.enable_stt | bool | `false` |  |
+| h2ogpt.overrideConfig.enable_transcriptions | bool | `false` |  |
+| h2ogpt.overrideConfig.enable_tts | bool | `false` |  |
+| h2ogpt.overrideConfig.enforce_h2ogpt_api_key | bool | `true` |  |
+| h2ogpt.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` |  |
+| h2ogpt.overrideConfig.hf_embedding_model | string | `"fake"` |  |
+| h2ogpt.overrideConfig.metadata_in_context | string | `""` |  |
+| h2ogpt.overrideConfig.num_async | int | `10` |  |
+| h2ogpt.overrideConfig.openai_server | bool | `true` |  |
+| h2ogpt.overrideConfig.openai_workers | int | `5` |  |
+| h2ogpt.overrideConfig.rotate_align_resize_image | bool | `false` |  |
+| h2ogpt.overrideConfig.score_model | string | `"None"` |  |
+| h2ogpt.overrideConfig.share | bool | `false` |  |
+| h2ogpt.overrideConfig.top_k_docs_max_show | int | `100` |  |
+| h2ogpt.overrideConfig.visible_hosts_tab | bool | `false` |  |
+| h2ogpt.overrideConfig.visible_login_tab | bool | `false` |  |
+| h2ogpt.overrideConfig.visible_models_tab | bool | `false` |  |
+| h2ogpt.overrideConfig.visible_system_tab | bool | `false` |  |
+| h2ogpt.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| h2ogpt.podAnnotations | object | `{}` |  |
+| h2ogpt.podLabels | object | `{}` |  |
+| h2ogpt.podSecurityContext.fsGroup | string | `nil` |  |
+| h2ogpt.podSecurityContext.runAsGroup | string | `nil` |  |
+| h2ogpt.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| h2ogpt.podSecurityContext.runAsUser | string | `nil` |  |
+| h2ogpt.replicaCount | int | `1` |  |
+| h2ogpt.resources.limits.memory | string | `"64Gi"` |  |
+| h2ogpt.resources.requests.memory | string | `"32Gi"` |  |
+| h2ogpt.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| h2ogpt.securityContext.runAsNonRoot | bool | `true` |  |
+| h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
+| h2ogpt.service.agentsPort | int | `5004` |  |
+| h2ogpt.service.functionPort | int | `5002` |  |
+| h2ogpt.service.gptPort | int | `8888` |  |
+| h2ogpt.service.openaiPort | int | `5000` |  |
+| h2ogpt.service.type | string | `"NodePort"` |  |
+| h2ogpt.service.webPort | int | `80` |  |
+| h2ogpt.service.webServiceAnnotations | object | `{}` |  |
+| h2ogpt.storage.class | string | `nil` |  |
+| h2ogpt.storage.size | string | `"128Gi"` |  |
+| h2ogpt.storage.useEphemeral | bool | `true` |  |
+| h2ogpt.tolerations | string | `nil` |  |
+| h2ogpt.updateStrategy.type | string | `"RollingUpdate"` |  |
+| lmdeploy.containerArgs[0] | string | `"OpenGVLab/InternVL-Chat-V1-5"` |  |
+| lmdeploy.enabled | bool | `false` | Enable lmdeploy |
+| lmdeploy.env | object | `{}` |  |
+| lmdeploy.hfSecret | string | `nil` |  |
+| lmdeploy.image.pullPolicy | string | `"IfNotPresent"` |  |
+| lmdeploy.image.repository | string | `"gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy"` |  |
+| lmdeploy.image.tag | string | `nil` |  |
+| lmdeploy.nodeSelector | string | `nil` |  |
+| lmdeploy.overrideConfig | string | `nil` |  |
+| lmdeploy.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| lmdeploy.podAnnotations | object | `{}` |  |
+| lmdeploy.podLabels | object | `{}` |  |
+| lmdeploy.podSecurityContext | string | `nil` |  |
+| lmdeploy.replicaCount | int | `1` |  |
+| lmdeploy.resources | string | `nil` |  |
+| lmdeploy.securityContext | string | `nil` |  |
+| lmdeploy.service.port | int | `23333` |  |
+| lmdeploy.service.type | string | `"ClusterIP"` |  |
+| lmdeploy.storage.class | string | `nil` |  |
+| lmdeploy.storage.size | string | `"512Gi"` |  |
+| lmdeploy.storage.useEphemeral | bool | `true` |  |
+| lmdeploy.tolerations | string | `nil` |  |
+| lmdeploy.updateStrategy.type | string | `"RollingUpdate"` |  |
+| nameOverride | string | `""` |  |
+| namespaceOverride | string | `""` |  |
+| tgi.containerArgs | string | `nil` |  |
+| tgi.enabled | bool | `false` | Enable tgi |
+| tgi.env | object | `{}` |  |
+| tgi.hfSecret | string | `nil` |  |
+| tgi.image.pullPolicy | string | `"IfNotPresent"` |  |
+| tgi.image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` |  |
+| tgi.image.tag | string | `"0.9.3"` |  |
+| tgi.nodeSelector | string | `nil` |  |
+| tgi.overrideConfig | string | `nil` |  |
+| tgi.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| tgi.podAnnotations | object | `{}` |  |
+| tgi.podLabels | object | `{}` |  |
+| tgi.podSecurityContext | string | `nil` |  |
+| tgi.replicaCount | int | `1` |  |
+| tgi.resources | string | `nil` |  |
+| tgi.securityContext | string | `nil` |  |
+| tgi.service.port | int | `8080` |  |
+| tgi.service.type | string | `"ClusterIP"` |  |
+| tgi.storage.class | string | `nil` |  |
+| tgi.storage.size | string | `"512Gi"` |  |
+| tgi.storage.useEphemeral | bool | `true` |  |
+| tgi.tolerations | string | `nil` |  |
+| tgi.updateStrategy.type | string | `"RollingUpdate"` |  |
+| vllm.containerArgs[0] | string | `"--model"` |  |
+| vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` |  |
+| vllm.containerArgs[2] | string | `"--tokenizer"` |  |
+| vllm.containerArgs[3] | string | `"hf-internal-testing/llama-tokenizer"` |  |
+| vllm.containerArgs[4] | string | `"--tensor-parallel-size"` |  |
+| vllm.containerArgs[5] | int | `2` |  |
+| vllm.containerArgs[6] | string | `"--seed"` |  |
+| vllm.containerArgs[7] | int | `1234` |  |
+| vllm.containerArgs[8] | string | `"--trust-remote-code"` |  |
+| vllm.enabled | bool | `false` | Enable vllm |
+| vllm.env.DO_NOT_TRACK | string | `"1"` |  |
+| vllm.env.VLLM_NO_USAGE_STATS | string | `"1"` |  |
+| vllm.image.pullPolicy | string | `"IfNotPresent"` |  |
+| vllm.image.repository | string | `"vllm/vllm-openai"` |  |
+| vllm.image.tag | string | `"latest"` |  |
+| vllm.imagePullSecrets | string | `nil` |  |
+| vllm.nodeSelector | string | `nil` |  |
+| vllm.overrideConfig | string | `nil` |  |
+| vllm.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| vllm.podAnnotations | object | `{}` |  |
+| vllm.podLabels | object | `{}` |  |
+| vllm.podSecurityContext.fsGroup | string | `nil` |  |
+| vllm.podSecurityContext.runAsGroup | string | `nil` |  |
+| vllm.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| vllm.podSecurityContext.runAsUser | string | `nil` |  |
+| vllm.replicaCount | int | `1` |  |
+| vllm.resources | string | `nil` |  |
+| vllm.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| vllm.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| vllm.securityContext.runAsNonRoot | bool | `true` |  |
+| vllm.securityContext.seccompProfile | string | `nil` |  |
+| vllm.service.port | int | `5000` |  |
+| vllm.service.type | string | `"ClusterIP"` |  |
+| vllm.storage.class | string | `nil` |  |
+| vllm.storage.size | string | `"512Gi"` |  |
+| vllm.storage.useEphemeral | bool | `true` |  |
+| vllm.tolerations | string | `nil` |  |
+| vllm.updateStrategy.type | string | `"RollingUpdate"` |  |
+

From 1b75c9d512da62c65078fc1334f3389a9c74daae Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Sat, 26 Oct 2024 02:36:46 +0530
Subject: [PATCH 24/34] Update Chart version

---
 helm/h2ogpt-chart/Chart.yaml                | 4 ++--
 helm/h2ogpt-chart/templates/agents-hpa.yaml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/helm/h2ogpt-chart/Chart.yaml b/helm/h2ogpt-chart/Chart.yaml
index eeaf87fef..5a597ed84 100644
--- a/helm/h2ogpt-chart/Chart.yaml
+++ b/helm/h2ogpt-chart/Chart.yaml
@@ -15,10 +15,10 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0-288
+version: 0.2.1-1254
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: 0.1.0-288
+appVersion: 0.2.1-1254
diff --git a/helm/h2ogpt-chart/templates/agents-hpa.yaml b/helm/h2ogpt-chart/templates/agents-hpa.yaml
index 9872e8d43..f0c796484 100644
--- a/helm/h2ogpt-chart/templates/agents-hpa.yaml
+++ b/helm/h2ogpt-chart/templates/agents-hpa.yaml
@@ -30,4 +30,4 @@ spec:
           type: Utilization
           averageUtilization: {{ .Values.agents.autoscaling.targetMemory }}
     {{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}

From 0f52c7b62e57415b476da1433d7c2c834f61633f Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Mon, 28 Oct 2024 13:27:15 +0530
Subject: [PATCH 25/34] Update secrets

---
 helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml | 3 ++-
 helm/h2ogpt-chart/values.yaml                                | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
index 5ff95d9cd..6c6f5b74e 100644
--- a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
+++ b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
@@ -6,7 +6,8 @@ metadata:
   namespace: {{ include "h2ogpt.namespace" . | quote }}
   labels:
     {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
+type: Opaque
+stringData:
 {{- range $key, $value := .Values.global.externalLLM.secret }}
   {{ $key }}: {{ $value | quote }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 7e3fe7307..dbbd6019f 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -6,7 +6,7 @@ global:
   externalLLM:
     enabled: false
     # -- list of secrets for h2ogpt and agents env
-    secret:
+    secret: {}
 #      OPENAI_AZURE_KEY: "value"
 #      OPENAI_AZURE_API_BASE: "value"
 #      OPENAI_API_KEY: "value"

From 9af9f2dc7c0c1a94f9f709153ebe25141ee2e96f Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Mon, 28 Oct 2024 22:13:44 +0530
Subject: [PATCH 26/34] Remove global OpenAI, replicate and openAIAzure

---
 helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml |  2 +-
 helm/h2ogpt-chart/templates/h2ogpt-service.yaml    |  2 +-
 helm/h2ogpt-chart/values.yaml                      | 10 ----------
 3 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 549da1d5d..05b61255c 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -118,7 +118,7 @@ spec:
             - name: http
               containerPort: 7860
               protocol: TCP
-          {{- if .Values.global.externalLLM.openAI.enabled }}
+          {{- if .Values.h2ogpt.overrideConfig.openai_server }}
             - name: openai
               containerPort: 5000
               protocol: TCP
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
index 043feb527..a41364864 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
@@ -17,7 +17,7 @@ spec:
       protocol: TCP
       port: {{ .Values.h2ogpt.service.webPort }}
       targetPort: 7860
-  {{- if .Values.global.externalLLM.openAI.enabled }}
+  {{- if .Values.h2ogpt.overrideConfig.openai_server }}
     - name: openai
       protocol: TCP
       port: {{ .Values.h2ogpt.service.openaiPort }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index dbbd6019f..9bd04d6a3 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -14,15 +14,6 @@ global:
 
     modelLock:
 
-    openAIAzure:
-      enabled: false
-
-    openAI:
-      enabled: false
-
-    replicate:
-      enabled: false
-
   visionModels:
     # -- Enable vision models
     enabled: false
@@ -112,7 +103,6 @@ h2ogpt:
     openaiPort: 5000
     functionPort: 5002
     agentsPort: 5004
-    gptPort: 8888
     webServiceAnnotations: {}
 
   updateStrategy:

From 78b2e720cbf27193ad9271f39b625e7e6e0d4c5e Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Mon, 28 Oct 2024 22:14:03 +0530
Subject: [PATCH 27/34] Update helm-doc

---
 helm/h2ogpt-chart/README.md | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
index a410aa342..8e5fc3cec 100644
--- a/helm/h2ogpt-chart/README.md
+++ b/helm/h2ogpt-chart/README.md
@@ -1,6 +1,6 @@
 # h2ogpt
 
-![Version: 0.1.0-288](https://img.shields.io/badge/Version-0.1.0--288-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.0-288](https://img.shields.io/badge/AppVersion-0.1.0--288-informational?style=flat-square)
+![Version: 0.2.1-1254](https://img.shields.io/badge/Version-0.2.1--1254-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.2.1-1254](https://img.shields.io/badge/AppVersion-0.2.1--1254-informational?style=flat-square)
 
 A Helm chart for h2oGPT
 
@@ -70,10 +70,7 @@ A Helm chart for h2oGPT
 | fullnameOverride | string | `""` |  |
 | global.externalLLM.enabled | bool | `false` |  |
 | global.externalLLM.modelLock | string | `nil` |  |
-| global.externalLLM.openAI.enabled | bool | `false` |  |
-| global.externalLLM.openAIAzure.enabled | bool | `false` |  |
-| global.externalLLM.replicate.enabled | bool | `false` |  |
-| global.externalLLM.secret | string | `nil` | list of secrets for h2ogpt and agents env |
+| global.externalLLM.secret | object | `{}` | list of secrets for h2ogpt and agents env |
 | global.visionModels.enabled | bool | `false` | Enable vision models |
 | global.visionModels.rotateAlignResizeImage | bool | `false` |  |
 | global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] |
@@ -127,7 +124,6 @@ A Helm chart for h2oGPT
 | h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
 | h2ogpt.service.agentsPort | int | `5004` |  |
 | h2ogpt.service.functionPort | int | `5002` |  |
-| h2ogpt.service.gptPort | int | `8888` |  |
 | h2ogpt.service.openaiPort | int | `5000` |  |
 | h2ogpt.service.type | string | `"NodePort"` |  |
 | h2ogpt.service.webPort | int | `80` |  |

From f5b79dadfd18bf064afb034de819acd6fc44ac89 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Tue, 29 Oct 2024 00:36:26 +0530
Subject: [PATCH 28/34] Fix agents volumes

---
 helm/h2ogpt-chart/templates/agents-deployment.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml
index c9a0eea68..dcdda700a 100644
--- a/helm/h2ogpt-chart/templates/agents-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml
@@ -129,7 +129,7 @@ spec:
             - name: {{ include "h2ogpt.fullname" . }}-agents-volume
               mountPath: /workspace/.cache
               subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-volume
+            - name: {{ include "h2ogpt.fullname" . }}-agents-volume
               mountPath: /workspace/save
               subPath: save
             {{- if .Values.caCertificates }}
@@ -144,7 +144,7 @@ spec:
         - name: {{ include "h2ogpt.fullname" . }}-agents-volume
           {{- if not .Values.agents.storage.useEphemeral }}
           persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-volume
+            claimName:  {{ include "h2ogpt.fullname" . }}-agents-volume
           {{- else}}
           ephemeral:
             volumeClaimTemplate:

From 008636072c4c5661f5838073016ce4defd56f8ec Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Thu, 31 Oct 2024 01:38:41 +0530
Subject: [PATCH 29/34] Remove lmdeploy

---
 helm/h2ogpt-chart/README.md                   |  23 ---
 .../templates/h2ogpt-deployment.yaml          |  17 +-
 .../templates/lmdeploy-configmap.yaml         |  13 --
 .../templates/lmdeploy-deployment.yaml        | 145 ------------------
 helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml |  15 --
 .../templates/lmdeploy-service.yaml           |  15 --
 helm/h2ogpt-chart/templates/validations.yaml  |   6 -
 helm/h2ogpt-chart/values.yaml                 |  43 ------
 8 files changed, 1 insertion(+), 276 deletions(-)
 delete mode 100644 helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/lmdeploy-service.yaml

diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
index 8e5fc3cec..8d37e1f1b 100644
--- a/helm/h2ogpt-chart/README.md
+++ b/helm/h2ogpt-chart/README.md
@@ -133,29 +133,6 @@ A Helm chart for h2oGPT
 | h2ogpt.storage.useEphemeral | bool | `true` |  |
 | h2ogpt.tolerations | string | `nil` |  |
 | h2ogpt.updateStrategy.type | string | `"RollingUpdate"` |  |
-| lmdeploy.containerArgs[0] | string | `"OpenGVLab/InternVL-Chat-V1-5"` |  |
-| lmdeploy.enabled | bool | `false` | Enable lmdeploy |
-| lmdeploy.env | object | `{}` |  |
-| lmdeploy.hfSecret | string | `nil` |  |
-| lmdeploy.image.pullPolicy | string | `"IfNotPresent"` |  |
-| lmdeploy.image.repository | string | `"gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy"` |  |
-| lmdeploy.image.tag | string | `nil` |  |
-| lmdeploy.nodeSelector | string | `nil` |  |
-| lmdeploy.overrideConfig | string | `nil` |  |
-| lmdeploy.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
-| lmdeploy.podAnnotations | object | `{}` |  |
-| lmdeploy.podLabels | object | `{}` |  |
-| lmdeploy.podSecurityContext | string | `nil` |  |
-| lmdeploy.replicaCount | int | `1` |  |
-| lmdeploy.resources | string | `nil` |  |
-| lmdeploy.securityContext | string | `nil` |  |
-| lmdeploy.service.port | int | `23333` |  |
-| lmdeploy.service.type | string | `"ClusterIP"` |  |
-| lmdeploy.storage.class | string | `nil` |  |
-| lmdeploy.storage.size | string | `"512Gi"` |  |
-| lmdeploy.storage.useEphemeral | bool | `true` |  |
-| lmdeploy.tolerations | string | `nil` |  |
-| lmdeploy.updateStrategy.type | string | `"RollingUpdate"` |  |
 | nameOverride | string | `""` |  |
 | namespaceOverride | string | `""` |  |
 | tgi.containerArgs | string | `nil` |  |
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 05b61255c..7556ca758 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -98,18 +98,7 @@ spec:
 
               python3 /workspace/generate.py
           {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.modelLock) }}
-          args:
-            - >
-              until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1;
-                do
-                  echo "Waiting for inference service to become ready...";
-                  sleep 5;
-                done
-
-              python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }}
+          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled )) }}
           args:
             - >
               python3 /workspace/generate.py
@@ -165,10 +154,6 @@ spec:
             - name: h2ogpt_inference_server
               value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
           {{- end }}
-          {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) }}
-            - name: h2ogpt_inference_server
-              value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}"
-          {{- end }}
           {{- range $key, $value := .Values.h2ogpt.env }}
             - name: "{{ $key }}"
               value: "{{ $value }}"
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
deleted file mode 100644
index c1dd07713..000000000
--- a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-{{- if .Values.lmdeploy.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.lmdeploy.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
deleted file mode 100644
index 95a49320f..000000000
--- a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml
+++ /dev/null
@@ -1,145 +0,0 @@
-{{- if and .Values.lmdeploy.enabled }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-spec:
-  replicas: {{ .Values.lmdeploy.replicaCount }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  {{- if .Values.lmdeploy.updateStrategy }}
-  strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.lmdeploy.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-        {{- with .Values.lmdeploy.podLabels }}
-        {{ toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.lmdeploy.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.lmdeploy.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.lmdeploy.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.lmdeploy.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.lmdeploy.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.lmdeploy.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.lmdeploy.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-          securityContext:
-            {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }}
-          image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }}
-          command: ["lmdeploy"]
-          args:
-            - "serve"
-            - "api_server"
-{{- range $arg := .Values.lmdeploy.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 23333
-              protocol: TCP
-          {{- if .Values.lmdeploy.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.lmdeploy.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.lmdeploy.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config
-          env:
-          - name: NCCL_IGNORE_DISABLED_P2P
-            value: "1"
-          - name: HF_HOME
-            value: "/workspace/.cache"
-          {{- range $key, $value := .Values.lmdeploy.env }}
-          - name: "{{ $key }}"
-            value: "{{ $value }}"
-          {{- end }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-              mountPath: /workspace/.cache
-              subPath: cache
-            - name: shm
-              mountPath: /dev/shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-          {{- if not .Values.lmdeploy.storage.useEphemeral }}
-          persistentVolumeClaim:
-            claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-          {{- else }}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.lmdeploy.storage.size | quote }}
-                storageClassName: {{ .Values.lmdeploy.storage.class }}
-          {{- end }}
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 10.24Gi
-          name: shm
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml b/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml
deleted file mode 100644
index 164ec6f1d..000000000
--- a/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  # storageClassName: {{ .Values.lmdeploy.storage.class | quote }}
-  storageClassName: {{ .Values.lmdeploy.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.lmdeploy.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
deleted file mode 100644
index 831189944..000000000
--- a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-{{- if .Values.lmdeploy.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.lmdeploy.service.port }}
-      targetPort: 23333
-  type: {{ .Values.lmdeploy.service.type }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml
index cd08023e8..ce4e264fd 100644
--- a/helm/h2ogpt-chart/templates/validations.yaml
+++ b/helm/h2ogpt-chart/templates/validations.yaml
@@ -1,12 +1,6 @@
 {{- if and .Values.vllm.enabled .Values.tgi.enabled }}
   {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
 {{- end }}
-{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }}
-  {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
-{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
 {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }}
   {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 9bd04d6a3..6bcfb292b 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -351,48 +351,5 @@ vllm:
   podLabels: {}
 
 
-lmdeploy:
-  # -- Enable lmdeploy
-  enabled: false
-  replicaCount: 1
-
-  image:
-    repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy
-    tag:
-    pullPolicy: IfNotPresent
-  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
-  podAffinity:
-    # hostname:
-    # zone:
-
-  storage:
-    size: 512Gi
-    class:
-    useEphemeral: true
-
-  overrideConfig:
-  hfSecret:
-  containerArgs:
-    - "OpenGVLab/InternVL-Chat-V1-5"
-
-  service:
-    type: ClusterIP
-    port: 23333
-
-  updateStrategy:
-    type: RollingUpdate
-
-  podSecurityContext:
-  securityContext:
-
-  resources:
-  nodeSelector:
-  tolerations:
-
-  env: {}
-
-  podAnnotations: {}
-  podLabels: {}
-
 # -- CA certs
 caCertificates: ""

From eccb0c2de85c6d16da09c8629f380628ddeff68a Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Thu, 31 Oct 2024 02:14:27 +0530
Subject: [PATCH 30/34] Remove tgi

---
 helm/h2ogpt-chart/README.md                   |  23 ---
 .../templates/h2ogpt-deployment.yaml          |  17 +--
 .../h2ogpt-chart/templates/tgi-configmap.yaml |  13 --
 .../templates/tgi-deployment.yaml             | 141 ------------------
 helm/h2ogpt-chart/templates/tgi-pvc.yaml      |  14 --
 helm/h2ogpt-chart/templates/tgi-service.yaml  |  15 --
 .../{validations.yaml => validators.yaml}     |   3 -
 helm/h2ogpt-chart/values.yaml                 |  44 +-----
 8 files changed, 2 insertions(+), 268 deletions(-)
 delete mode 100644 helm/h2ogpt-chart/templates/tgi-configmap.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/tgi-deployment.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/tgi-pvc.yaml
 delete mode 100644 helm/h2ogpt-chart/templates/tgi-service.yaml
 rename helm/h2ogpt-chart/templates/{validations.yaml => validators.yaml} (55%)

diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
index 8d37e1f1b..2f41f4291 100644
--- a/helm/h2ogpt-chart/README.md
+++ b/helm/h2ogpt-chart/README.md
@@ -135,29 +135,6 @@ A Helm chart for h2oGPT
 | h2ogpt.updateStrategy.type | string | `"RollingUpdate"` |  |
 | nameOverride | string | `""` |  |
 | namespaceOverride | string | `""` |  |
-| tgi.containerArgs | string | `nil` |  |
-| tgi.enabled | bool | `false` | Enable tgi |
-| tgi.env | object | `{}` |  |
-| tgi.hfSecret | string | `nil` |  |
-| tgi.image.pullPolicy | string | `"IfNotPresent"` |  |
-| tgi.image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` |  |
-| tgi.image.tag | string | `"0.9.3"` |  |
-| tgi.nodeSelector | string | `nil` |  |
-| tgi.overrideConfig | string | `nil` |  |
-| tgi.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
-| tgi.podAnnotations | object | `{}` |  |
-| tgi.podLabels | object | `{}` |  |
-| tgi.podSecurityContext | string | `nil` |  |
-| tgi.replicaCount | int | `1` |  |
-| tgi.resources | string | `nil` |  |
-| tgi.securityContext | string | `nil` |  |
-| tgi.service.port | int | `8080` |  |
-| tgi.service.type | string | `"ClusterIP"` |  |
-| tgi.storage.class | string | `nil` |  |
-| tgi.storage.size | string | `"512Gi"` |  |
-| tgi.storage.useEphemeral | bool | `true` |  |
-| tgi.tolerations | string | `nil` |  |
-| tgi.updateStrategy.type | string | `"RollingUpdate"` |  |
 | vllm.containerArgs[0] | string | `"--model"` |  |
 | vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` |  |
 | vllm.containerArgs[2] | string | `"--tokenizer"` |  |
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 7556ca758..741390cd7 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -87,18 +87,7 @@ spec:
 
               python3 /workspace/generate.py
           {{- end }}
-          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }}
-          args:
-            - >
-              until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1;
-                do
-                  echo "Waiting for inference service to become ready...";
-                  sleep 5;
-                done
-
-              python3 /workspace/generate.py
-          {{- end }}
-          {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled )) }}
+          {{- if and .Values.h2ogpt.enabled (not .Values.vllm.enabled ) }}
           args:
             - >
               python3 /workspace/generate.py
@@ -146,10 +135,6 @@ spec:
                 name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
           {{- end }}
           env:
-          {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) }}
-            - name: h2ogpt_inference_server
-              value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}"
-          {{- end }}
           {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) }}
             - name: h2ogpt_inference_server
               value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}"
diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yaml b/helm/h2ogpt-chart/templates/tgi-configmap.yaml
deleted file mode 100644
index ec5c17866..000000000
--- a/helm/h2ogpt-chart/templates/tgi-configmap.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-{{- if .Values.tgi.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    {{- include "h2ogpt.labels" . | nindent 4 }}
-data:
-{{- range $key, $value := .Values.tgi.overrideConfig }}
-  {{ printf "%s" $key | upper }}: {{ $value | quote }}
-{{- end }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml
deleted file mode 100644
index 721b2ed01..000000000
--- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml
+++ /dev/null
@@ -1,141 +0,0 @@
-{{- if .Values.tgi.enabled }}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-  labels:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-spec:
-  replicas: {{ .Values.tgi.replicaCount }}
-  selector:
-    matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  {{- if .Values.tgi.updateStrategy }}
-  strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }}
-  {{- end }}
-  template:
-    metadata:
-      {{- with .Values.tgi.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-        {{- with .Values.tgi.podLabels }}
-        {{ toYaml . | nindent 6 }}
-        {{- end }}
-    spec:
-      {{- with .Values.tgi.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      securityContext:
-        {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }}
-      affinity:
-        {{- if .Values.tgi.podAffinity }}
-        podAntiAffinity:
-          {{- if .Values.tgi.podAffinity.hostname }}
-          requiredDuringSchedulingIgnoredDuringExecution:
-            - labelSelector:
-                matchExpressions:
-                  - key: app
-                    operator: In
-                    values:
-                      - {{ include "h2ogpt.fullname" . }}
-              topologyKey: kubernetes.io/hostname
-          {{- end }}
-          {{- if .Values.tgi.podAffinity.zone }}
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: app
-                      operator: In
-                      values:
-                        - {{ include "h2ogpt.fullname" . }}
-                topologyKey: failure-domain.beta.kubernetes.io/zone
-          {{- end }}
-        {{- end }}
-      {{- with .Values.tgi.extraAffinity }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tgi.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      containers:
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-          securityContext:
-            {{- toYaml .Values.tgi.securityContext | nindent 12 }}
-          image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}"
-          imagePullPolicy: {{ .Values.tgi.image.pullPolicy }}
-          command: []
-          args:
-{{- range $arg := .Values.tgi.containerArgs }}
-            - "{{ $arg }}"
-{{- end }}
-          ports:
-            - name: http
-              containerPort: 80
-              protocol: TCP
-          {{- if .Values.tgi.livenessProbe }}
-          livenessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.livenessProbe | nindent 12 }}
-          {{- end }}
-          {{- if .Values.tgi.readinessProbe }}
-          readinessProbe:
-            httpGet:
-              path:  /
-              scheme: HTTP
-              port: http
-            {{- toYaml .Values.tgi.readinessProbe | nindent 12 }}
-          {{- end }}
-          resources:
-            {{- toYaml .Values.tgi.resources | nindent 12 }}
-          env:
-          {{- range $key, $value := .Values.tgi.env }}
-            - name: "{{ $key }}"
-              value: "{{ $value }}"
-          {{- end }}
-          envFrom:
-            - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config
-            - secretRef:
-                name: {{ .Values.tgi.hfSecret }}
-          volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /app/cache
-              subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /data
-              subPath: data
-            - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-              mountPath: /dev/shm
-              subPath: shm
-      volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-        {{- if not .Values.tgi.storage.useEphemeral}}
-          persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-          {{- else}}
-          ephemeral:
-            volumeClaimTemplate:
-              spec:
-                accessModes:
-                  - ReadWriteOnce
-                resources:
-                  requests:
-                    storage: {{ .Values.tgi.storage.size | quote }}
-                storageClassName: {{ .Values.tgi.storage.class }}
-          {{- end }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-pvc.yaml b/helm/h2ogpt-chart/templates/tgi-pvc.yaml
deleted file mode 100644
index 0a34be2fd..000000000
--- a/helm/h2ogpt-chart/templates/tgi-pvc.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  accessModes:
-    - ReadWriteOnce
-  storageClassName: {{ .Values.tgi.storage.class }}
-  resources:
-    requests:
-      storage: {{ .Values.tgi.storage.size | quote }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml
deleted file mode 100644
index de42ad89a..000000000
--- a/helm/h2ogpt-chart/templates/tgi-service.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-{{- if .Values.tgi.enabled }}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  namespace: {{ include "h2ogpt.namespace" . | quote }}
-spec:
-  selector:
-    app: {{ include "h2ogpt.fullname" . }}-tgi-inference
-  ports:
-    - protocol: TCP
-      port: {{ .Values.tgi.service.port }}
-      targetPort: 80
-  type: {{ .Values.tgi.service.type }}
-{{- end }}
diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validators.yaml
similarity index 55%
rename from helm/h2ogpt-chart/templates/validations.yaml
rename to helm/h2ogpt-chart/templates/validators.yaml
index ce4e264fd..b97d33e5c 100644
--- a/helm/h2ogpt-chart/templates/validations.yaml
+++ b/helm/h2ogpt-chart/templates/validators.yaml
@@ -1,6 +1,3 @@
-{{- if and .Values.vllm.enabled .Values.tgi.enabled }}
-  {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }}
-{{- end }}
 {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }}
   {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 6bcfb292b..78b79d159 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -140,7 +140,7 @@ agents:
   # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true`
   enabled: false
   autoscaling:
-    # Enable autoscaling for agents
+    # Enable autoscaling (HPA) for agents
     enabled: false
     minReplicas: 1
     maxReplicas: 2
@@ -239,48 +239,6 @@ agents:
   podAnnotations: {}
   podLabels: {}
 
-tgi:
-  # -- Enable tgi
-  enabled: false
-  replicaCount: 1
-
-  image:
-    repository: ghcr.io/huggingface/text-generation-inference
-    tag: 0.9.3
-    pullPolicy: IfNotPresent
-  # -- Set hostname and zone to true for pod affinity rules based on hostname and zone.
-  podAffinity:
-    # hostname:
-    # zone:
-
-  storage:
-    size: 512Gi
-    class: 
-    useEphemeral: true
-  
-  overrideConfig:
-  hfSecret:
-  containerArgs:
-
-  service:
-    type: ClusterIP
-    port: 8080
-
-  updateStrategy:
-    type: RollingUpdate
-
-  podSecurityContext:
-  securityContext:
-
-  resources:
-  nodeSelector:
-  tolerations:
-
-  env: {}
-
-  podAnnotations: {}
-  podLabels: {}
-
 vllm:
   # -- Enable vllm
   enabled: false

From 86fae3c50fd18db3d5376c264edf4d54dfe32970 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Tue, 5 Nov 2024 21:09:57 +0530
Subject: [PATCH 31/34] Change overrideConfig passing method

---
 helm/h2ogpt-chart/README.md                   |  62 ++-----
 helm/h2ogpt-chart/templates/_helpers.tpl      | 129 ++++++++++++--
 .../templates/agents-configmap.yaml           |  18 +-
 .../templates/h2ogpt-configmap.yaml           |  18 +-
 .../templates/h2ogpt-deployment.yaml          |   2 +-
 .../templates/h2ogpt-service.yaml             |   2 +-
 helm/h2ogpt-chart/values.yaml                 | 168 +++++++++---------
 7 files changed, 250 insertions(+), 149 deletions(-)

diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
index 2f41f4291..bd4dedfb8 100644
--- a/helm/h2ogpt-chart/README.md
+++ b/helm/h2ogpt-chart/README.md
@@ -8,6 +8,8 @@ A Helm chart for h2oGPT
 
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
+| agents.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. |
+| agents.agent_workers | int | `5` |  |
 | agents.autoscaling.enabled | bool | `false` |  |
 | agents.autoscaling.maxReplicas | int | `2` |  |
 | agents.autoscaling.minReplicas | int | `1` |  |
@@ -24,26 +26,8 @@ A Helm chart for h2oGPT
 | agents.initImage.pullPolicy | string | `nil` |  |
 | agents.initImage.repository | string | `nil` |  |
 | agents.initImage.tag | string | `nil` |  |
-| agents.nodeSelector | string | `nil` |  |
-| agents.overrideConfig.agent_workers | int | `5` |  |
-| agents.overrideConfig.concurrency_count | int | `100` |  |
-| agents.overrideConfig.embedding_gpu_id | string | `"cpu"` |  |
-| agents.overrideConfig.enable_stt | bool | `false` |  |
-| agents.overrideConfig.enable_transcriptions | bool | `false` |  |
-| agents.overrideConfig.enable_tts | bool | `false` |  |
-| agents.overrideConfig.enforce_h2ogpt_api_key | bool | `true` |  |
-| agents.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` |  |
-| agents.overrideConfig.hf_embedding_model | string | `"fake"` |  |
-| agents.overrideConfig.metadata_in_context | string | `""` |  |
-| agents.overrideConfig.num_async | int | `10` |  |
-| agents.overrideConfig.rotate_align_resize_image | bool | `false` |  |
-| agents.overrideConfig.score_model | string | `"None"` |  |
-| agents.overrideConfig.share | bool | `false` |  |
-| agents.overrideConfig.top_k_docs_max_show | int | `100` |  |
-| agents.overrideConfig.visible_hosts_tab | bool | `false` |  |
-| agents.overrideConfig.visible_login_tab | bool | `false` |  |
-| agents.overrideConfig.visible_models_tab | bool | `false` |  |
-| agents.overrideConfig.visible_system_tab | bool | `false` |  |
+| agents.nodeSelector | object | `{}` | Node selector for the agents pods. |
+| agents.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} |
 | agents.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
 | agents.podAnnotations | object | `{}` |  |
 | agents.podLabels | object | `{}` |  |
@@ -52,7 +36,9 @@ A Helm chart for h2oGPT
 | agents.podSecurityContext.runAsNonRoot | bool | `true` |  |
 | agents.podSecurityContext.runAsUser | string | `nil` |  |
 | agents.replicaCount | int | `1` |  |
+| agents.resources.limits."nvidia.com/gpu" | int | `1` |  |
 | agents.resources.limits.memory | string | `"64Gi"` |  |
+| agents.resources.requests."nvidia.com/gpu" | int | `1` |  |
 | agents.resources.requests.memory | string | `"32Gi"` |  |
 | agents.securityContext.allowPrivilegeEscalation | bool | `false` |  |
 | agents.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
@@ -64,7 +50,7 @@ A Helm chart for h2oGPT
 | agents.storage.class | string | `nil` |  |
 | agents.storage.size | string | `"128Gi"` |  |
 | agents.storage.useEphemeral | bool | `true` |  |
-| agents.tolerations | string | `nil` |  |
+| agents.tolerations | list | `[]` | Node taints to tolerate by the agents pods. |
 | agents.updateStrategy.type | string | `"RollingUpdate"` |  |
 | caCertificates | string | `""` | CA certs |
 | fullnameOverride | string | `""` |  |
@@ -74,8 +60,9 @@ A Helm chart for h2oGPT
 | global.visionModels.enabled | bool | `false` | Enable vision models |
 | global.visionModels.rotateAlignResizeImage | bool | `false` |  |
 | global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] |
-| h2ogpt.agents | object | `{"agent_workers":5,"enabled":false}` | Enable agents |
-| h2ogpt.agents.enabled | bool | `false` | Run agents with h2oGPT container |
+| h2ogpt.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. |
+| h2ogpt.agents | object | `{"agent_workers":5,"enabled":true}` | Enable agents |
+| h2ogpt.agents.enabled | bool | `true` | Run agents with h2oGPT container |
 | h2ogpt.enabled | bool | `true` | Enable h2oGPT |
 | h2ogpt.env | object | `{}` |  |
 | h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts |
@@ -87,27 +74,10 @@ A Helm chart for h2oGPT
 | h2ogpt.initImage.pullPolicy | string | `nil` |  |
 | h2ogpt.initImage.repository | string | `nil` |  |
 | h2ogpt.initImage.tag | string | `nil` |  |
-| h2ogpt.nodeSelector | string | `nil` |  |
-| h2ogpt.overrideConfig.concurrency_count | int | `100` |  |
-| h2ogpt.overrideConfig.embedding_gpu_id | string | `"cpu"` |  |
-| h2ogpt.overrideConfig.enable_stt | bool | `false` |  |
-| h2ogpt.overrideConfig.enable_transcriptions | bool | `false` |  |
-| h2ogpt.overrideConfig.enable_tts | bool | `false` |  |
-| h2ogpt.overrideConfig.enforce_h2ogpt_api_key | bool | `true` |  |
-| h2ogpt.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` |  |
-| h2ogpt.overrideConfig.hf_embedding_model | string | `"fake"` |  |
-| h2ogpt.overrideConfig.metadata_in_context | string | `""` |  |
-| h2ogpt.overrideConfig.num_async | int | `10` |  |
-| h2ogpt.overrideConfig.openai_server | bool | `true` |  |
-| h2ogpt.overrideConfig.openai_workers | int | `5` |  |
-| h2ogpt.overrideConfig.rotate_align_resize_image | bool | `false` |  |
-| h2ogpt.overrideConfig.score_model | string | `"None"` |  |
-| h2ogpt.overrideConfig.share | bool | `false` |  |
-| h2ogpt.overrideConfig.top_k_docs_max_show | int | `100` |  |
-| h2ogpt.overrideConfig.visible_hosts_tab | bool | `false` |  |
-| h2ogpt.overrideConfig.visible_login_tab | bool | `false` |  |
-| h2ogpt.overrideConfig.visible_models_tab | bool | `false` |  |
-| h2ogpt.overrideConfig.visible_system_tab | bool | `false` |  |
+| h2ogpt.nodeSelector | object | `{}` | Node selector for the h2ogpt pods. |
+| h2ogpt.openai.enabled | bool | `true` |  |
+| h2ogpt.openai.openai_workers | int | `5` |  |
+| h2ogpt.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} |
 | h2ogpt.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
 | h2ogpt.podAnnotations | object | `{}` |  |
 | h2ogpt.podLabels | object | `{}` |  |
@@ -116,7 +86,9 @@ A Helm chart for h2oGPT
 | h2ogpt.podSecurityContext.runAsNonRoot | bool | `true` |  |
 | h2ogpt.podSecurityContext.runAsUser | string | `nil` |  |
 | h2ogpt.replicaCount | int | `1` |  |
+| h2ogpt.resources.limits."nvidia.com/gpu" | int | `0` |  |
 | h2ogpt.resources.limits.memory | string | `"64Gi"` |  |
+| h2ogpt.resources.requests."nvidia.com/gpu" | int | `0` |  |
 | h2ogpt.resources.requests.memory | string | `"32Gi"` |  |
 | h2ogpt.securityContext.allowPrivilegeEscalation | bool | `false` |  |
 | h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
@@ -131,7 +103,7 @@ A Helm chart for h2oGPT
 | h2ogpt.storage.class | string | `nil` |  |
 | h2ogpt.storage.size | string | `"128Gi"` |  |
 | h2ogpt.storage.useEphemeral | bool | `true` |  |
-| h2ogpt.tolerations | string | `nil` |  |
+| h2ogpt.tolerations | list | `[]` | Node taints to tolerate by the h2ogpt pods. |
 | h2ogpt.updateStrategy.type | string | `"RollingUpdate"` |  |
 | nameOverride | string | `""` |  |
 | namespaceOverride | string | `""` |  |
diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl
index 9688e8e4f..77163b4a6 100644
--- a/helm/h2ogpt-chart/templates/_helpers.tpl
+++ b/helm/h2ogpt-chart/templates/_helpers.tpl
@@ -70,26 +70,129 @@ Create the name of the service account to use
 {{- end }}
 
 {{/*
-Configs for agents server
+Config for h2oGPT
 */}}
 
-{{- define "agents.overrideConfig" -}}
-agent_server: True
-agent_port: "5004"
+{{- define "h2ogpt.config" -}}
+{{- with .Values.h2ogpt }}
+verbose: {{ default "True" .overrideConfig.verbose }}
+{{- if .overrideConfig.heap_app_id }}
+heap_app_id: {{ .overrideConfig.heap_app_id }}
+{{- end }}
+num_async: {{ default 10 .overrideConfig.num_async }}
+save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }}
+score_model: {{ default "None" .overrideConfig.score_model }}
+share: {{ default "False" .overrideConfig.share }}
+enforce_h2ogpt_api_key: {{ default "False" .overrideConfig.enforce_h2ogpt_api_key }}
+enforce_h2ogpt_ui_key: {{ default "False" .overrideConfig.enforce_h2ogpt_ui_key }}
+{{- if .overrideConfig.h2ogpt_api_keys }}
+h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }}
+{{- end }}
+{{- if .overrideConfig.use_auth_token }}
+use_auth_token: {{ .overrideConfig.use_auth_token }}
+{{- end }}
+visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }}
+visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}
+top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }}
+{{- if .overrideConfig.admin_pass }}
+admin_pass: {{ .overrideConfig.admin_pass }}
+{{- end }}
+{{- if .openai.enabled }}
+openai_server: "True"
+openai_port: 5000
+openai_workers: {{ default 5 .openai.openai_workers }}
+{{- end }}
+{{- if .agents.enabled }}
+agent_server: "True"
+agent_port: 5004
+agent_workers: {{ .agents.agent_workers }}
+{{- end }}
+function_server: {{ default "True" .overrideConfig.function_server }}
+function_port: 5002
+function_server_workers: {{ default 1 .overrideConfig.function_server_workers }}
+multiple_workers_gunicorn: {{ default "True" .overrideConfig.multiple_workers_gunicorn }}
+llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }}
+enable_llava: {{ default "True" .overrideConfig.enable_llava }}
+{{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }}
+enable_tts: {{ default "False" .overrideConfig.enable_tts }}
+enable_stt: {{ default "True" .overrideConfig.enable_stt }}
+enable_transcriptions: {{ default "True" .overrideConfig.enable_transcriptions }}
+asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }}
+pre_load_embedding_model: {{ default "True" .overrideConfig.pre_load_embedding_model }}
+pre_load_image_audio_models: {{ default "True" .overrideConfig.pre_load_image_audio_models }}
+cut_distance: {{ default 10000 .overrideConfig.cut_distance }}
+hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }}
+enable_captions: {{ default "False" .overrideConfig.enable_captions }}
+enable_doctr: {{ default "True" .overrideConfig.enable_doctr }}
+{{- else }}
+enable_tts: {{ default "False" .overrideConfig.enable_tts }}
+enable_stt: {{ default "False" .overrideConfig.enable_stt }}
+enable_transcriptions: {{ default "False" .overrideConfig.enable_transcriptions }}
+embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }}
+hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }}
+pre_load_embedding_model: {{ default "False" .overrideConfig.pre_load_embedding_model }}
+pre_load_image_audio_models:  {{ default "False" .overrideConfig.pre_load_image_audio_models }}
+enable_captions: {{ default "False" .overrideConfig.enable_captions }}
+enable_doctr: {{ default "False" .overrideConfig.enable_doctr }}
+{{- end }}
+{{- end }}
 {{- end }}
 
 {{/*
-Configs for agents with h2ogpt
+Config for agents
 */}}
 
-{{- define "h2ogpt.overrideConfig" -}}
-{{- if .Values.h2ogpt.agents.enabled }}
-agent_server: True
-agent_port: "5004"
-multiple_workers_gunicorn: True
-agent_workers: {{ .Values.h2ogpt.agents.agent_workers}}
+{{- define "agents.config" -}}
+{{- with .Values.agents }}
+verbose: {{ default "True" .overrideConfig.verbose }}
+{{- if .overrideConfig.heap_app_id }}
+heap_app_id: {{ .overrideConfig.heap_app_id }}
+{{- end }}
+num_async: {{ default 10 .overrideConfig.num_async }}
+save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }}
+score_model: {{ default "None" .overrideConfig.score_model }}
+share: {{ default "False" .overrideConfig.share }}
+enforce_h2ogpt_api_key: {{ default "False" .overrideConfig.enforce_h2ogpt_api_key }}
+enforce_h2ogpt_ui_key: {{ default "False" .overrideConfig.enforce_h2ogpt_ui_key }}
+{{- if .overrideConfig.h2ogpt_api_keys }}
+h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }}
+{{- end }}
+{{- if .overrideConfig.use_auth_token }}
+use_auth_token: {{ .overrideConfig.use_auth_token }}
+{{- end }}
+visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }}
+visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}
+top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }}
+{{- if .overrideConfig.admin_pass }}
+admin_pass: {{ .overrideConfig.admin_pass }}
+{{- end }}
+agent_server: "True"
+agent_port: 5004
+agent_workers: {{ default 5 .agent_workers }}
+multiple_workers_gunicorn: {{ default "True" .overrideConfig.multiple_workers_gunicorn }}
+llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }}
+enable_llava: {{ default "True" .overrideConfig.enable_llava }}
+{{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }}
+enable_tts: {{ default "False" .overrideConfig.enable_tts }}
+enable_stt: {{ default "True" .overrideConfig.enable_stt }}
+enable_transcriptions: {{ default "True" .overrideConfig.enable_transcriptions }}
+asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }}
+pre_load_embedding_model: {{ default "True" .overrideConfig.pre_load_embedding_model }}
+pre_load_image_audio_models: {{ default "True" .overrideConfig.pre_load_image_audio_models }}
+cut_distance: {{ default 10000 .overrideConfig.cut_distance }}
+hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }}
+enable_captions: {{ default "False" .overrideConfig.enable_captions }}
+enable_doctr: {{ default "True" .overrideConfig.enable_doctr }}
 {{- else }}
-agents_server: False
+enable_tts: {{ default "False" .overrideConfig.enable_tts }}
+enable_stt: {{ default "False" .overrideConfig.enable_stt }}
+enable_transcriptions: {{ default "False" .overrideConfig.enable_transcriptions }}
+embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }}
+hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }}
+pre_load_embedding_model: {{ default "False" .overrideConfig.pre_load_embedding_model }}
+pre_load_image_audio_models:  {{ default "False" .overrideConfig.pre_load_image_audio_models }}
+enable_captions: {{ default "False" .overrideConfig.enable_captions }}
+enable_doctr: {{ default "False" .overrideConfig.enable_doctr }}
 {{- end }}
-
 {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/templates/agents-configmap.yaml b/helm/h2ogpt-chart/templates/agents-configmap.yaml
index e242dee69..2f293cd2f 100644
--- a/helm/h2ogpt-chart/templates/agents-configmap.yaml
+++ b/helm/h2ogpt-chart/templates/agents-configmap.yaml
@@ -7,10 +7,20 @@ metadata:
   labels:
     {{- include "h2ogpt.labels" . | nindent 4 }}
 data:
-{{- range $key, $value := ( include "agents.overrideConfig" . | fromYaml ) }}
-  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- range $key, $value := ( include "agents.config" . | fromYaml ) }}
+{{- /* convert boolean value to cli compatiblity */}}
+  {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
+  {{- else }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+  {{- end }}
 {{- end }}
-{{- range $key, $value := .Values.agents.overrideConfig }}
-  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- range $key, $value := ( .Values.agents.additionalConfig ) }}
+{{- /* convert boolean value to cli compatiblity */}}
+  {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
+  {{- else }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+  {{- end }}
 {{- end }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
index 902705552..ceb8a18d9 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml
@@ -7,10 +7,20 @@ metadata:
   labels:
     {{- include "h2ogpt.labels" . | nindent 4 }}
 data:
-{{- range $key, $value := ( include "h2ogpt.overrideConfig" . | fromYaml ) }}
-  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- range $key, $value := ( include "h2ogpt.config" . | fromYaml ) }}
+{{- /* convert boolean value to cli compatiblity */}}
+  {{- if or ( eq "true" ($value | toString)) ( eq "false" ($value | toString)) }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
+  {{- else }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+  {{- end }}
 {{- end }}
-{{- range $key, $value := .Values.h2ogpt.overrideConfig }}
-  {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+{{- range $key, $value := ( .Values.h2ogpt.additionalConfig ) }}
+{{- /* convert boolean value to cli compatiblity */}}
+  {{- if or ( eq "true" ($value | toString)) ( eq "false" ($value | toString)) }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
+  {{- else }}
+    {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
+  {{- end }}
 {{- end }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index 741390cd7..bac71f22d 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -96,7 +96,7 @@ spec:
             - name: http
               containerPort: 7860
               protocol: TCP
-          {{- if .Values.h2ogpt.overrideConfig.openai_server }}
+          {{- if .Values.h2ogpt.openai.enabled }}
             - name: openai
               containerPort: 5000
               protocol: TCP
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
index a41364864..747aed223 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
@@ -17,7 +17,7 @@ spec:
       protocol: TCP
       port: {{ .Values.h2ogpt.service.webPort }}
       targetPort: 7860
-  {{- if .Values.h2ogpt.overrideConfig.openai_server }}
+  {{- if .Values.h2ogpt.openai.enabled }}
     - name: openai
       protocol: TCP
       port: {{ .Values.h2ogpt.service.openaiPort }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 78b79d159..95b1d67f1 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -27,8 +27,11 @@ h2ogpt:
   # -- Enable agents
   agents:
     # -- Run agents with h2oGPT container
-    enabled: false
+    enabled: true
     agent_workers: 5
+  openai:
+    enabled: true
+    openai_workers: 5
   replicaCount: 1
   imagePullSecrets:
   image:
@@ -52,50 +55,42 @@ h2ogpt:
     class:
     useEphemeral: true
 
-# -- Example configs to use when not using Model Lock and External LLM
-  # overrideConfig:
-  #   base_model: h2oai/h2ogpt-4096-llama2-7b-chat
-  #   use_safetensors: True
-  #   prompt_type: llama2
-  #   save_dir: /workspace/save/
-  #   use_gpu_id: False
-  #   score_model: None
-  #   max_max_new_tokens: 2048
-  #   max_new_tokens: 1024
-
-  overrideConfig:
-    visible_login_tab: False
-    visible_system_tab: False
-    visible_models_tab: False
-    visible_hosts_tab: False
-    # change below to valid vision model or remove this entry
-    #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']"
-    rotate_align_resize_image: False
-    concurrency_count: 100
-    top_k_docs_max_show: 100
-    num_async: 10
-    # change below to valid directory or remove this entry
-    #save_dir: "/docker_logs"
-    score_model: "None"
-    enable_tts: False
-    enable_stt: False
-    enable_transcriptions: False
-    embedding_gpu_id: "cpu"
-    hf_embedding_model: "fake"
-    openai_server: True
-    openai_workers: 5
-    share: False
-    enforce_h2ogpt_api_key: True
-    enforce_h2ogpt_ui_key: False
-    # change to something secure for ui access to backend
-    #h2ogpt_api_keys: "['api_key_change_me']"
-    metadata_in_context: ""
-    # change or remove if using model hub
-    #use_auth_token: "hf_xxxxx"
-    # change below to first visible model or remove this entry
-    #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']"
-    # change so ui or api cannot access without this password
-    #admin_pass: "admin_password_change_me"
+  # -- Defaults configs are set internally with recommended values. Set values if you really need to change. Make sure to quote boolean values ex: "True","true","false","False".
+  # -- Supported configs are commented. If you don't pass any value, keep {}
+  overrideConfig: {}
+#    verbose:
+#    heap_app_id:
+#    num_async:
+#    save_dir:
+#    score_model:
+#    share:
+#    enforce_h2ogpt_api_key:
+#    enforce_h2ogpt_ui_key:
+#    h2ogpt_api_keys:
+#    use_auth_token:
+#    visible_models:
+#    visible_vision_models:
+#    top_k_docs_max_show:
+#    admin_pass:
+#    function_server:
+#    function_server_workers:
+#    multiple_workers_gunicorn:
+#    llava_model:
+#    enable_llava:
+#    enable_tts:
+#    enable_stt:
+#    enable_transcriptions:
+#    asr_model:
+#    pre_load_embedding_model:
+#    pre_load_image_audio_models:
+#    cut_distance:
+#    hf_embedding_model:
+#    enable_captions:
+#    enable_doctr:
+#    embedding_gpu_id:
+
+  # -- You can pass additional config here if overrideConfig does not have it.
+  additionalConfig: {}
 
   service:
     type: NodePort
@@ -126,10 +121,14 @@ h2ogpt:
   resources:
     requests:
       memory: 32Gi
+      nvidia.com/gpu: 0
     limits:
       memory: 64Gi
-  nodeSelector:
-  tolerations:
+      nvidia.com/gpu: 0
+  # -- Node taints to tolerate by the h2ogpt pods.
+  tolerations: []
+  # -- Node selector for the h2ogpt pods.
+  nodeSelector: {}
 
   env: {}
 
@@ -139,6 +138,7 @@ h2ogpt:
 agents:
   # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true`
   enabled: false
+  agent_workers: 5
   autoscaling:
     # Enable autoscaling (HPA) for agents
     enabled: false
@@ -170,38 +170,40 @@ agents:
     class:
     useEphemeral: true
 
-  overrideConfig:
-    agent_workers: 5
-    visible_login_tab: False
-    visible_system_tab: False
-    visible_models_tab: False
-    visible_hosts_tab: False
-    # change below to valid vision model or remove this entry
-    #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']"
-    rotate_align_resize_image: False
-    concurrency_count: 100
-    top_k_docs_max_show: 100
-    num_async: 10
-    # change below to valid directory or remove this entry
-    #save_dir: "/docker_logs"
-    score_model: "None"
-    enable_tts: False
-    enable_stt: False
-    enable_transcriptions: False
-    embedding_gpu_id: "cpu"
-    hf_embedding_model: "fake"
-    share: False
-    enforce_h2ogpt_api_key: True
-    enforce_h2ogpt_ui_key: False
-    # change to something secure for ui access to backend
-    #h2ogpt_api_keys: "['api_key_change_me']"
-    metadata_in_context: ""
-    # change or remove if using model hub
-    #use_auth_token: "hf_xxxxx"
-    # change below to first visible model or remove this entry
-    #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']"
-    # change so ui or api cannot access without this password
-    #admin_pass: "admin_password_change_me"
+  # -- Defaults configs are set internally with recommended values. Set values if you really need to change. Make sure to quote boolean values ex: "True","true","false","False".
+  # -- Supported configs are commented. If you don't pass any value, keep {}
+  overrideConfig: { }
+#    verbose:
+#    heap_app_id:
+#    num_async:
+#    save_dir:
+#    score_model:
+#    share:
+#    enforce_h2ogpt_api_key:
+#    enforce_h2ogpt_ui_key:
+#    h2ogpt_api_keys:
+#    use_auth_token:
+#    visible_models:
+#    visible_vision_models:
+#    top_k_docs_max_show:
+#    admin_pass:
+#    multiple_workers_gunicorn:
+#    llava_model:
+#    enable_llava:
+#    enable_tts:
+#    enable_stt:
+#    enable_transcriptions:
+#    asr_model:
+#    pre_load_embedding_model:
+#    pre_load_image_audio_models:
+#    cut_distance:
+#    hf_embedding_model:
+#    enable_captions:
+#    enable_doctr:
+#    embedding_gpu_id:
+
+  # -- You can pass additional config here if overrideConfig does not have it.
+  additionalConfig: { }
 
   service:
     type: NodePort
@@ -229,10 +231,14 @@ agents:
   resources:
     requests:
       memory: 32Gi
+      nvidia.com/gpu: 1
     limits:
       memory: 64Gi
-  nodeSelector:
-  tolerations:
+      nvidia.com/gpu: 1
+  # -- Node taints to tolerate by the agents pods.
+  tolerations: []
+  # -- Node selector for the agents pods.
+  nodeSelector: {}
 
   env: {}
 

From 9818a0481663a6199586a05339fa4363e5d3e5be Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 6 Nov 2024 00:35:41 +0530
Subject: [PATCH 32/34] Fix boolean values quote issue

---
 helm/h2ogpt-chart/templates/_helpers.tpl | 86 ++++++++++++------------
 helm/h2ogpt-chart/values.yaml            | 10 ++-
 2 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl
index 77163b4a6..69ae9ae93 100644
--- a/helm/h2ogpt-chart/templates/_helpers.tpl
+++ b/helm/h2ogpt-chart/templates/_helpers.tpl
@@ -75,16 +75,16 @@ Config for h2oGPT
 
 {{- define "h2ogpt.config" -}}
 {{- with .Values.h2ogpt }}
-verbose: {{ default "True" .overrideConfig.verbose }}
+verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }}
 {{- if .overrideConfig.heap_app_id }}
 heap_app_id: {{ .overrideConfig.heap_app_id }}
 {{- end }}
 num_async: {{ default 10 .overrideConfig.num_async }}
 save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }}
 score_model: {{ default "None" .overrideConfig.score_model }}
-share: {{ default "False" .overrideConfig.share }}
-enforce_h2ogpt_api_key: {{ default "False" .overrideConfig.enforce_h2ogpt_api_key }}
-enforce_h2ogpt_ui_key: {{ default "False" .overrideConfig.enforce_h2ogpt_ui_key }}
+share: {{ default "False" (.overrideConfig.share | quote ) }}
+enforce_h2ogpt_api_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_api_key | quote ) }}
+enforce_h2ogpt_ui_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_ui_key | quote ) }}
 {{- if .overrideConfig.h2ogpt_api_keys }}
 h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }}
 {{- end }}
@@ -92,7 +92,7 @@ h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }}
 use_auth_token: {{ .overrideConfig.use_auth_token }}
 {{- end }}
 visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }}
-visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}
+{{/*visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}*/}}
 top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }}
 {{- if .overrideConfig.admin_pass }}
 admin_pass: {{ .overrideConfig.admin_pass }}
@@ -107,33 +107,33 @@ agent_server: "True"
 agent_port: 5004
 agent_workers: {{ .agents.agent_workers }}
 {{- end }}
-function_server: {{ default "True" .overrideConfig.function_server }}
+function_server: {{ default "True" ( .overrideConfig.function_server | quote ) }}
 function_port: 5002
 function_server_workers: {{ default 1 .overrideConfig.function_server_workers }}
-multiple_workers_gunicorn: {{ default "True" .overrideConfig.multiple_workers_gunicorn }}
+multiple_workers_gunicorn: {{ default "True" ( .overrideConfig.multiple_workers_gunicorn | quote ) }}
 llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }}
-enable_llava: {{ default "True" .overrideConfig.enable_llava }}
+enable_llava: {{ default "True" ( .overrideConfig.enable_llava | quote ) }}
 {{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }}
-enable_tts: {{ default "False" .overrideConfig.enable_tts }}
-enable_stt: {{ default "True" .overrideConfig.enable_stt }}
-enable_transcriptions: {{ default "True" .overrideConfig.enable_transcriptions }}
+enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }}
+enable_stt: {{ default "True" ( .overrideConfig.enable_stt | quote ) }}
+enable_transcriptions: {{ default "True" ( .overrideConfig.enable_transcriptions | quote ) }}
 asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }}
-pre_load_embedding_model: {{ default "True" .overrideConfig.pre_load_embedding_model }}
-pre_load_image_audio_models: {{ default "True" .overrideConfig.pre_load_image_audio_models }}
+pre_load_embedding_model: {{ default "True" (.overrideConfig.pre_load_embedding_model | quote ) }}
+pre_load_image_audio_models: {{ default "True" ( .overrideConfig.pre_load_image_audio_models | quote ) }}
 cut_distance: {{ default 10000 .overrideConfig.cut_distance }}
 hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }}
-enable_captions: {{ default "False" .overrideConfig.enable_captions }}
-enable_doctr: {{ default "True" .overrideConfig.enable_doctr }}
+enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }}
+enable_doctr: {{ default "True" ( .overrideConfig.enable_doctr | quote ) }}
 {{- else }}
-enable_tts: {{ default "False" .overrideConfig.enable_tts }}
-enable_stt: {{ default "False" .overrideConfig.enable_stt }}
-enable_transcriptions: {{ default "False" .overrideConfig.enable_transcriptions }}
+enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }}
+enable_stt: {{ default "False" ( .overrideConfig.enable_stt | quote ) }}
+enable_transcriptions: {{ default "False" ( .overrideConfig.enable_transcriptions | quote ) }}
 embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }}
 hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }}
-pre_load_embedding_model: {{ default "False" .overrideConfig.pre_load_embedding_model }}
-pre_load_image_audio_models:  {{ default "False" .overrideConfig.pre_load_image_audio_models }}
-enable_captions: {{ default "False" .overrideConfig.enable_captions }}
-enable_doctr: {{ default "False" .overrideConfig.enable_doctr }}
+pre_load_embedding_model: {{ default "False" ( .overrideConfig.pre_load_embedding_model | quote ) }}
+pre_load_image_audio_models:  {{ default "False" ( .overrideConfig.pre_load_image_audio_models | quote ) }}
+enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }}
+enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }}
 {{- end }}
 {{- end }}
 {{- end }}
@@ -144,16 +144,16 @@ Config for agents
 
 {{- define "agents.config" -}}
 {{- with .Values.agents }}
-verbose: {{ default "True" .overrideConfig.verbose }}
+verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }}
 {{- if .overrideConfig.heap_app_id }}
 heap_app_id: {{ .overrideConfig.heap_app_id }}
 {{- end }}
 num_async: {{ default 10 .overrideConfig.num_async }}
 save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }}
 score_model: {{ default "None" .overrideConfig.score_model }}
-share: {{ default "False" .overrideConfig.share }}
-enforce_h2ogpt_api_key: {{ default "False" .overrideConfig.enforce_h2ogpt_api_key }}
-enforce_h2ogpt_ui_key: {{ default "False" .overrideConfig.enforce_h2ogpt_ui_key }}
+share: {{ default "False" (.overrideConfig.share | quote ) }}
+enforce_h2ogpt_api_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_api_key | quote ) }}
+enforce_h2ogpt_ui_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_ui_key | quote ) }}
 {{- if .overrideConfig.h2ogpt_api_keys }}
 h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }}
 {{- end }}
@@ -161,7 +161,7 @@ h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }}
 use_auth_token: {{ .overrideConfig.use_auth_token }}
 {{- end }}
 visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }}
-visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}
+{{/*visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}*/}}
 top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }}
 {{- if .overrideConfig.admin_pass }}
 admin_pass: {{ .overrideConfig.admin_pass }}
@@ -169,30 +169,30 @@ admin_pass: {{ .overrideConfig.admin_pass }}
 agent_server: "True"
 agent_port: 5004
 agent_workers: {{ default 5 .agent_workers }}
-multiple_workers_gunicorn: {{ default "True" .overrideConfig.multiple_workers_gunicorn }}
+multiple_workers_gunicorn: {{ default "True" ( .overrideConfig.multiple_workers_gunicorn | quote ) }}
 llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }}
-enable_llava: {{ default "True" .overrideConfig.enable_llava }}
+enable_llava: {{ default "True" ( .overrideConfig.enable_llava | quote ) }}
 {{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }}
-enable_tts: {{ default "False" .overrideConfig.enable_tts }}
-enable_stt: {{ default "True" .overrideConfig.enable_stt }}
-enable_transcriptions: {{ default "True" .overrideConfig.enable_transcriptions }}
+enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }}
+enable_stt: {{ default "True" ( .overrideConfig.enable_stt | quote ) }}
+enable_transcriptions: {{ default "True" ( .overrideConfig.enable_transcriptions | quote ) }}
 asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }}
-pre_load_embedding_model: {{ default "True" .overrideConfig.pre_load_embedding_model }}
-pre_load_image_audio_models: {{ default "True" .overrideConfig.pre_load_image_audio_models }}
+pre_load_embedding_model: {{ default "True" (.overrideConfig.pre_load_embedding_model | quote ) }}
+pre_load_image_audio_models: {{ default "True" ( .overrideConfig.pre_load_image_audio_models | quote ) }}
 cut_distance: {{ default 10000 .overrideConfig.cut_distance }}
 hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }}
-enable_captions: {{ default "False" .overrideConfig.enable_captions }}
-enable_doctr: {{ default "True" .overrideConfig.enable_doctr }}
+enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }}
+enable_doctr: {{ default "True" ( .overrideConfig.enable_doctr | quote ) }}
 {{- else }}
-enable_tts: {{ default "False" .overrideConfig.enable_tts }}
-enable_stt: {{ default "False" .overrideConfig.enable_stt }}
-enable_transcriptions: {{ default "False" .overrideConfig.enable_transcriptions }}
+enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }}
+enable_stt: {{ default "False" ( .overrideConfig.enable_stt | quote ) }}
+enable_transcriptions: {{ default "False" ( .overrideConfig.enable_transcriptions | quote ) }}
 embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }}
 hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }}
-pre_load_embedding_model: {{ default "False" .overrideConfig.pre_load_embedding_model }}
-pre_load_image_audio_models:  {{ default "False" .overrideConfig.pre_load_image_audio_models }}
-enable_captions: {{ default "False" .overrideConfig.enable_captions }}
-enable_doctr: {{ default "False" .overrideConfig.enable_doctr }}
+pre_load_embedding_model: {{ default "False" ( .overrideConfig.pre_load_embedding_model | quote ) }}
+pre_load_image_audio_models:  {{ default "False" ( .overrideConfig.pre_load_image_audio_models | quote ) }}
+enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }}
+enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }}
 {{- end }}
 {{- end }}
 {{- end }}
\ No newline at end of file
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 95b1d67f1..6d8183e36 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -55,7 +55,7 @@ h2ogpt:
     class:
     useEphemeral: true
 
-  # -- Defaults configs are set internally with recommended values. Set values if you really need to change. Make sure to quote boolean values ex: "True","true","false","False".
+  # -- Defaults configs are set internally with recommended values. Set values if you really need to change.
   # -- Supported configs are commented. If you don't pass any value, keep {}
   overrideConfig: {}
 #    verbose:
@@ -69,7 +69,6 @@ h2ogpt:
 #    h2ogpt_api_keys:
 #    use_auth_token:
 #    visible_models:
-#    visible_vision_models:
 #    top_k_docs_max_show:
 #    admin_pass:
 #    function_server:
@@ -170,9 +169,9 @@ agents:
     class:
     useEphemeral: true
 
-  # -- Defaults configs are set internally with recommended values. Set values if you really need to change. Make sure to quote boolean values ex: "True","true","false","False".
+  # -- Defaults configs are set internally with recommended values. Set values if you really need to change.
   # -- Supported configs are commented. If you don't pass any value, keep {}
-  overrideConfig: { }
+  overrideConfig: {}
 #    verbose:
 #    heap_app_id:
 #    num_async:
@@ -184,7 +183,6 @@ agents:
 #    h2ogpt_api_keys:
 #    use_auth_token:
 #    visible_models:
-#    visible_vision_models:
 #    top_k_docs_max_show:
 #    admin_pass:
 #    multiple_workers_gunicorn:
@@ -203,7 +201,7 @@ agents:
 #    embedding_gpu_id:
 
   # -- You can pass additional config here if overrideConfig does not have it.
-  additionalConfig: { }
+  additionalConfig: {}
 
   service:
     type: NodePort

From 72c5859cad6aa7022bf2164bbf185a1ce6209916 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 6 Nov 2024 15:19:19 +0530
Subject: [PATCH 33/34] Add new line

---
 helm/h2ogpt-chart/templates/_helpers.tpl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl
index 69ae9ae93..26661a337 100644
--- a/helm/h2ogpt-chart/templates/_helpers.tpl
+++ b/helm/h2ogpt-chart/templates/_helpers.tpl
@@ -195,4 +195,4 @@ enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote )
 enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }}
 {{- end }}
 {{- end }}
-{{- end }}
\ No newline at end of file
+{{- end }}

From 36ca38ef005282e59587baa4c3425eb75db1b702 Mon Sep 17 00:00:00 2001
From: Lakindu <lakindu.gunasekara@h2o.ai>
Date: Wed, 6 Nov 2024 15:30:33 +0530
Subject: [PATCH 34/34] Rename agents to agent

---
 helm/h2ogpt-chart/README.md                   | 96 +++++++++----------
 helm/h2ogpt-chart/templates/_helpers.tpl      | 10 +-
 .../templates/agents-configmap.yaml           |  8 +-
 .../templates/agents-deployment.yaml          | 76 +++++++--------
 helm/h2ogpt-chart/templates/agents-hpa.yaml   | 18 ++--
 helm/h2ogpt-chart/templates/agents-pvc.yaml   |  8 +-
 .../templates/agents-service.yaml             | 12 +--
 .../global-external-llm-secrets.yaml          |  2 +-
 .../templates/h2ogpt-deployment.yaml          |  2 +-
 .../templates/h2ogpt-service.yaml             |  4 +-
 helm/h2ogpt-chart/templates/validators.yaml   |  4 +-
 helm/h2ogpt-chart/values.yaml                 | 26 ++---
 12 files changed, 133 insertions(+), 133 deletions(-)

diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md
index bd4dedfb8..b4b6bc94b 100644
--- a/helm/h2ogpt-chart/README.md
+++ b/helm/h2ogpt-chart/README.md
@@ -8,61 +8,61 @@ A Helm chart for h2oGPT
 
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
-| agents.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. |
-| agents.agent_workers | int | `5` |  |
-| agents.autoscaling.enabled | bool | `false` |  |
-| agents.autoscaling.maxReplicas | int | `2` |  |
-| agents.autoscaling.minReplicas | int | `1` |  |
-| agents.autoscaling.targetCPU | int | `80` |  |
-| agents.autoscaling.targetMemory | string | `"32Gi"` |  |
-| agents.enabled | bool | `false` | Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` |
-| agents.env | object | `{}` |  |
-| agents.extraVolumeMounts | list | `[]` | Extra volume mounts |
-| agents.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs |
-| agents.image.pullPolicy | string | `"IfNotPresent"` |  |
-| agents.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` |  |
-| agents.image.tag | string | `nil` |  |
-| agents.imagePullSecrets | string | `nil` |  |
-| agents.initImage.pullPolicy | string | `nil` |  |
-| agents.initImage.repository | string | `nil` |  |
-| agents.initImage.tag | string | `nil` |  |
-| agents.nodeSelector | object | `{}` | Node selector for the agents pods. |
-| agents.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} |
-| agents.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
-| agents.podAnnotations | object | `{}` |  |
-| agents.podLabels | object | `{}` |  |
-| agents.podSecurityContext.fsGroup | string | `nil` |  |
-| agents.podSecurityContext.runAsGroup | string | `nil` |  |
-| agents.podSecurityContext.runAsNonRoot | bool | `true` |  |
-| agents.podSecurityContext.runAsUser | string | `nil` |  |
-| agents.replicaCount | int | `1` |  |
-| agents.resources.limits."nvidia.com/gpu" | int | `1` |  |
-| agents.resources.limits.memory | string | `"64Gi"` |  |
-| agents.resources.requests."nvidia.com/gpu" | int | `1` |  |
-| agents.resources.requests.memory | string | `"32Gi"` |  |
-| agents.securityContext.allowPrivilegeEscalation | bool | `false` |  |
-| agents.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
-| agents.securityContext.runAsNonRoot | bool | `true` |  |
-| agents.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
-| agents.service.agentsPort | int | `5004` |  |
-| agents.service.annotations | object | `{}` |  |
-| agents.service.type | string | `"NodePort"` |  |
-| agents.storage.class | string | `nil` |  |
-| agents.storage.size | string | `"128Gi"` |  |
-| agents.storage.useEphemeral | bool | `true` |  |
-| agents.tolerations | list | `[]` | Node taints to tolerate by the agents pods. |
-| agents.updateStrategy.type | string | `"RollingUpdate"` |  |
+| agent.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. |
+| agent.agent_workers | int | `5` |  |
+| agent.autoscaling.enabled | bool | `false` |  |
+| agent.autoscaling.maxReplicas | int | `2` |  |
+| agent.autoscaling.minReplicas | int | `1` |  |
+| agent.autoscaling.targetCPU | int | `80` |  |
+| agent.autoscaling.targetMemory | string | `"32Gi"` |  |
+| agent.enabled | bool | `true` | Enable agent, this must be `false` if `h2ogpt.agent.enabled` is `true` |
+| agent.env | object | `{}` |  |
+| agent.extraVolumeMounts | list | `[]` | Extra volume mounts |
+| agent.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs |
+| agent.image.pullPolicy | string | `"IfNotPresent"` |  |
+| agent.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` |  |
+| agent.image.tag | string | `nil` |  |
+| agent.imagePullSecrets | string | `nil` |  |
+| agent.initImage.pullPolicy | string | `nil` |  |
+| agent.initImage.repository | string | `nil` |  |
+| agent.initImage.tag | string | `nil` |  |
+| agent.nodeSelector | object | `{}` | Node selector for the agent pods. |
+| agent.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} |
+| agent.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. |
+| agent.podAnnotations | object | `{}` |  |
+| agent.podLabels | object | `{}` |  |
+| agent.podSecurityContext.fsGroup | string | `nil` |  |
+| agent.podSecurityContext.runAsGroup | string | `nil` |  |
+| agent.podSecurityContext.runAsNonRoot | bool | `true` |  |
+| agent.podSecurityContext.runAsUser | string | `nil` |  |
+| agent.replicaCount | int | `1` |  |
+| agent.resources.limits."nvidia.com/gpu" | int | `1` |  |
+| agent.resources.limits.memory | string | `"64Gi"` |  |
+| agent.resources.requests."nvidia.com/gpu" | int | `1` |  |
+| agent.resources.requests.memory | string | `"32Gi"` |  |
+| agent.securityContext.allowPrivilegeEscalation | bool | `false` |  |
+| agent.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| agent.securityContext.runAsNonRoot | bool | `true` |  |
+| agent.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
+| agent.service.agentPort | int | `5004` |  |
+| agent.service.annotations | object | `{}` |  |
+| agent.service.type | string | `"NodePort"` |  |
+| agent.storage.class | string | `nil` |  |
+| agent.storage.size | string | `"128Gi"` |  |
+| agent.storage.useEphemeral | bool | `true` |  |
+| agent.tolerations | list | `[]` | Node taints to tolerate by the agent pods. |
+| agent.updateStrategy.type | string | `"RollingUpdate"` |  |
 | caCertificates | string | `""` | CA certs |
 | fullnameOverride | string | `""` |  |
 | global.externalLLM.enabled | bool | `false` |  |
 | global.externalLLM.modelLock | string | `nil` |  |
-| global.externalLLM.secret | object | `{}` | list of secrets for h2ogpt and agents env |
+| global.externalLLM.secret | object | `{}` | list of secrets for h2ogpt and agent env |
 | global.visionModels.enabled | bool | `false` | Enable vision models |
 | global.visionModels.rotateAlignResizeImage | bool | `false` |  |
 | global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] |
 | h2ogpt.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. |
-| h2ogpt.agents | object | `{"agent_workers":5,"enabled":true}` | Enable agents |
-| h2ogpt.agents.enabled | bool | `true` | Run agents with h2oGPT container |
+| h2ogpt.agent | object | `{"agent_workers":5,"enabled":false}` | Enable agent |
+| h2ogpt.agent.enabled | bool | `false` | Run agent with h2oGPT container |
 | h2ogpt.enabled | bool | `true` | Enable h2oGPT |
 | h2ogpt.env | object | `{}` |  |
 | h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts |
@@ -94,7 +94,7 @@ A Helm chart for h2oGPT
 | h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` |  |
 | h2ogpt.securityContext.runAsNonRoot | bool | `true` |  |
 | h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
-| h2ogpt.service.agentsPort | int | `5004` |  |
+| h2ogpt.service.agentPort | int | `5004` |  |
 | h2ogpt.service.functionPort | int | `5002` |  |
 | h2ogpt.service.openaiPort | int | `5000` |  |
 | h2ogpt.service.type | string | `"NodePort"` |  |
diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl
index 26661a337..61e2168dd 100644
--- a/helm/h2ogpt-chart/templates/_helpers.tpl
+++ b/helm/h2ogpt-chart/templates/_helpers.tpl
@@ -102,10 +102,10 @@ openai_server: "True"
 openai_port: 5000
 openai_workers: {{ default 5 .openai.openai_workers }}
 {{- end }}
-{{- if .agents.enabled }}
+{{- if .agent.enabled }}
 agent_server: "True"
 agent_port: 5004
-agent_workers: {{ .agents.agent_workers }}
+agent_workers: {{ .agent.agent_workers }}
 {{- end }}
 function_server: {{ default "True" ( .overrideConfig.function_server | quote ) }}
 function_port: 5002
@@ -139,11 +139,11 @@ enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }}
 {{- end }}
 
 {{/*
-Config for agents
+Config for agent
 */}}
 
-{{- define "agents.config" -}}
-{{- with .Values.agents }}
+{{- define "agent.config" -}}
+{{- with .Values.agent }}
 verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }}
 {{- if .overrideConfig.heap_app_id }}
 heap_app_id: {{ .overrideConfig.heap_app_id }}
diff --git a/helm/h2ogpt-chart/templates/agents-configmap.yaml b/helm/h2ogpt-chart/templates/agents-configmap.yaml
index 2f293cd2f..b6fa6e51e 100644
--- a/helm/h2ogpt-chart/templates/agents-configmap.yaml
+++ b/helm/h2ogpt-chart/templates/agents-configmap.yaml
@@ -1,13 +1,13 @@
-{{- if .Values.agents.enabled }}
+{{- if .Values.agent.enabled }}
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: {{ include "h2ogpt.fullname" . }}-agents-config
+  name: {{ include "h2ogpt.fullname" . }}-agent-config
   namespace: {{ include "h2ogpt.namespace" . | quote }}
   labels:
     {{- include "h2ogpt.labels" . | nindent 4 }}
 data:
-{{- range $key, $value := ( include "agents.config" . | fromYaml ) }}
+{{- range $key, $value := ( include "agent.config" . | fromYaml ) }}
 {{- /* convert boolean value to cli compatiblity */}}
   {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }}
     {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
@@ -15,7 +15,7 @@ data:
     {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }}
   {{- end }}
 {{- end }}
-{{- range $key, $value := ( .Values.agents.additionalConfig ) }}
+{{- range $key, $value := ( .Values.agent.additionalConfig ) }}
 {{- /* convert boolean value to cli compatiblity */}}
   {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }}
     {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }}
diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml
index dcdda700a..ac737a792 100644
--- a/helm/h2ogpt-chart/templates/agents-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml
@@ -1,45 +1,45 @@
-{{- if .Values.agents.enabled }}
+{{- if .Values.agent.enabled }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: {{ include "h2ogpt.fullname" . }}-agents
+  name: {{ include "h2ogpt.fullname" . }}-agent
   namespace: {{ include "h2ogpt.namespace" . | quote }}
   labels:
-    app: {{ include "h2ogpt.fullname" . }}-agents
+    app: {{ include "h2ogpt.fullname" . }}-agent
 spec:
-  replicas: {{ .Values.agents.replicaCount }}
+  replicas: {{ .Values.agent.replicaCount }}
   selector:
     matchLabels:
-      app: {{ include "h2ogpt.fullname" . }}-agents
-  {{- if .Values.agents.updateStrategy }}
-  strategy: {{- toYaml .Values.agents.updateStrategy | nindent 4 }}
+      app: {{ include "h2ogpt.fullname" . }}-agent
+  {{- if .Values.agent.updateStrategy }}
+  strategy: {{- toYaml .Values.agent.updateStrategy | nindent 4 }}
   {{- end }}
   template:
     metadata:
-      {{- with .Values.agents.podAnnotations }}
+      {{- with .Values.agent.podAnnotations }}
       annotations:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       labels:
-        app: {{ include "h2ogpt.fullname" . }}-agents
-        {{- with .Values.agents.podLabels }}
+        app: {{ include "h2ogpt.fullname" . }}-agent
+        {{- with .Values.agent.podLabels }}
         {{ toYaml . | nindent 8 }}
         {{- end }}
     spec:
-      {{- with .Values.agents.nodeSelector }}
+      {{- with .Values.agent.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}
       {{- end }}
-      {{- with .Values.agents.tolerations }}
+      {{- with .Values.agent.tolerations }}
       tolerations:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       securityContext:
-        {{- toYaml .Values.agents.podSecurityContext | nindent 8 }}
+        {{- toYaml .Values.agent.podSecurityContext | nindent 8 }}
       affinity:
-        {{- if .Values.agents.podAffinity }}
+        {{- if .Values.agent.podAffinity }}
         podAntiAffinity:
-          {{- if .Values.agents.podAffinity.hostname }}
+          {{- if .Values.agent.podAffinity.hostname }}
           requiredDuringSchedulingIgnoredDuringExecution:
             - labelSelector:
                 matchExpressions:
@@ -49,7 +49,7 @@ spec:
                       - {{ include "h2ogpt.fullname" . }}
               topologyKey: kubernetes.io/hostname
           {{- end }}
-          {{- if .Values.agents.podAffinity.zone }}
+          {{- if .Values.agent.podAffinity.zone }}
           preferredDuringSchedulingIgnoredDuringExecution:
             - weight: 100
               podAffinityTerm:
@@ -62,19 +62,19 @@ spec:
                 topologyKey: failure-domain.beta.kubernetes.io/zone
           {{- end }}
         {{- end }}
-      {{- with .Values.agents.extraAffinity }}
+      {{- with .Values.agent.extraAffinity }}
         {{- toYaml . | nindent 8 }}
       {{- end }}
-      {{- with .Values.agents.imagePullSecrets }}
+      {{- with .Values.agent.imagePullSecrets }}
       imagePullSecrets:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       containers:
-        - name: {{ include "h2ogpt.fullname" . }}-agents
+        - name: {{ include "h2ogpt.fullname" . }}-agent
           securityContext:
-            {{- toYaml .Values.agents.securityContext | nindent 12 }}
-          image: "{{ .Values.agents.image.repository }}:{{ .Values.agents.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.agents.image.pullPolicy }}
+            {{- toYaml .Values.agent.securityContext | nindent 12 }}
+          image: "{{ .Values.agent.image.repository }}:{{ .Values.agent.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.agent.image.pullPolicy }}
           command: ["/bin/bash", "-c"]
           args:
             - >
@@ -83,33 +83,33 @@ spec:
             - name: agent
               containerPort: 5004
               protocol: TCP
-          {{- if .Values.agents.livenessProbe }}
+          {{- if .Values.agent.livenessProbe }}
           livenessProbe:
             httpGet:
               path:  /
               scheme: HTTP
               port: http
-            {{- toYaml .Values.agents.livenessProbe | nindent 12 }}
+            {{- toYaml .Values.agent.livenessProbe | nindent 12 }}
           {{- end }}
-          {{- if .Values.agents.readinessProbe }}
+          {{- if .Values.agent.readinessProbe }}
           readinessProbe:
             httpGet:
               path:  /
               scheme: HTTP
               port: http
-            {{- toYaml .Values.agents.readinessProbe | nindent 12 }}
+            {{- toYaml .Values.agent.readinessProbe | nindent 12 }}
           {{- end }}
           resources:
-            {{- toYaml .Values.agents.resources | nindent 12 }}
+            {{- toYaml .Values.agent.resources | nindent 12 }}
           envFrom:
             - configMapRef:
-                name: {{ include "h2ogpt.fullname" . }}-agents-config
+                name: {{ include "h2ogpt.fullname" . }}-agent-config
           {{- if .Values.global.externalLLM.enabled }}
             - secretRef:
                 name: {{ include "h2ogpt.fullname" . }}-external-llm-secret
           {{- end }}
           env:
-          {{- range $key, $value := .Values.agents.env }}
+          {{- range $key, $value := .Values.agent.env }}
             - name: "{{ $key }}"
               value: "{{ $value }}"
           {{- end }}
@@ -126,10 +126,10 @@ spec:
               value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }}
           {{- end }}
           volumeMounts:
-            - name: {{ include "h2ogpt.fullname" . }}-agents-volume
+            - name: {{ include "h2ogpt.fullname" . }}-agent-volume
               mountPath: /workspace/.cache
               subPath: cache
-            - name: {{ include "h2ogpt.fullname" . }}-agents-volume
+            - name: {{ include "h2ogpt.fullname" . }}-agent-volume
               mountPath: /workspace/save
               subPath: save
             {{- if .Values.caCertificates }}
@@ -137,14 +137,14 @@ spec:
               mountPath: /etc/ssl/certs/root-ca-bundle.crt
               subPath: root-ca-bundle.crt
             {{- end }}
-            {{ with .Values.agents.extraVolumeMounts }}
+            {{ with .Values.agent.extraVolumeMounts }}
             {{- toYaml . | nindent 12 }}
             {{- end }}
       volumes:
-        - name: {{ include "h2ogpt.fullname" . }}-agents-volume
-          {{- if not .Values.agents.storage.useEphemeral }}
+        - name: {{ include "h2ogpt.fullname" . }}-agent-volume
+          {{- if not .Values.agent.storage.useEphemeral }}
           persistentVolumeClaim:
-            claimName:  {{ include "h2ogpt.fullname" . }}-agents-volume
+            claimName:  {{ include "h2ogpt.fullname" . }}-agent-volume
           {{- else}}
           ephemeral:
             volumeClaimTemplate:
@@ -153,15 +153,15 @@ spec:
                   - ReadWriteOnce
                 resources:
                   requests:
-                    storage: {{ .Values.agents.storage.size | quote }}
-                storageClassName: {{ .Values.agents.storage.class }}
+                    storage: {{ .Values.agent.storage.size | quote }}
+                storageClassName: {{ .Values.agent.storage.class }}
           {{- end }}
         {{- if .Values.caCertificates }}
         - name: ca-certificates
           configMap:
             name: {{ include "h2ogpt.fullname" . }}-ca-certificates
         {{- end }}
-        {{- with .Values.agents.extraVolumes }}
+        {{- with .Values.agent.extraVolumes }}
         {{- toYaml . | nindent 8 }}
         {{- end }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-hpa.yaml b/helm/h2ogpt-chart/templates/agents-hpa.yaml
index f0c796484..5cf083bbb 100644
--- a/helm/h2ogpt-chart/templates/agents-hpa.yaml
+++ b/helm/h2ogpt-chart/templates/agents-hpa.yaml
@@ -1,8 +1,8 @@
-{{- if .Values.agents.autoscaling.enabled | default false }}
+{{- if .Values.agent.autoscaling.enabled | default false }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
-  name: {{ .Release.Name }}-agents
+  name: {{ .Release.Name }}-agent
   namespace: {{ include "h2ogpt.namespace" . | quote }}
   labels:
     {{- include "h2ogpt.labels" . | nindent 4 }}
@@ -10,24 +10,24 @@ spec:
   scaleTargetRef:
     apiVersion: apps/v1
     kind: Deployment
-    name: {{ include "h2ogpt.fullname" . }}-agents
-  minReplicas: {{ .Values.agents.autoscaling.minReplicas }}
-  maxReplicas: {{ .Values.agents.autoscaling.maxReplicas }}
+    name: {{ include "h2ogpt.fullname" . }}-agent
+  minReplicas: {{ .Values.agent.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.agent.autoscaling.maxReplicas }}
   metrics:
-    {{- if .Values.agents.autoscaling.targetCPU }}
+    {{- if .Values.agent.autoscaling.targetCPU }}
     - type: Resource
       resource:
         name: cpu
         target:
           type: Utilization
-          averageUtilization: {{ .Values.agents.autoscaling.targetCPU }}
+          averageUtilization: {{ .Values.agent.autoscaling.targetCPU }}
     {{- end }}
-    {{- if .Values.agents.autoscaling.targetMemory }}
+    {{- if .Values.agent.autoscaling.targetMemory }}
     - type: Resource
       resource:
         name: memory
         target:
           type: Utilization
-          averageUtilization: {{ .Values.agents.autoscaling.targetMemory }}
+          averageUtilization: {{ .Values.agent.autoscaling.targetMemory }}
     {{- end }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-pvc.yaml b/helm/h2ogpt-chart/templates/agents-pvc.yaml
index 2165fab9d..2ac48c921 100644
--- a/helm/h2ogpt-chart/templates/agents-pvc.yaml
+++ b/helm/h2ogpt-chart/templates/agents-pvc.yaml
@@ -1,14 +1,14 @@
-{{- if and (.Values.agents.enabled) (not .Values.agents.storage.useEphemeral) }}
+{{- if and (.Values.agent.enabled) (not .Values.agent.storage.useEphemeral) }}
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: {{ include "h2ogpt.fullname" . }}-agents-volume
+  name: {{ include "h2ogpt.fullname" . }}-agent-volume
   namespace: {{ include "h2ogpt.namespace" . | quote }}
 spec:
   accessModes:
     - ReadWriteOnce
-  storageClassName: {{ .Values.agents.storage.class }}
+  storageClassName: {{ .Values.agent.storage.class }}
   resources:
     requests:
-      storage: {{ .Values.agents.storage.size | quote }}
+      storage: {{ .Values.agent.storage.size | quote }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/templates/agents-service.yaml b/helm/h2ogpt-chart/templates/agents-service.yaml
index d39cad58e..6b0653555 100644
--- a/helm/h2ogpt-chart/templates/agents-service.yaml
+++ b/helm/h2ogpt-chart/templates/agents-service.yaml
@@ -1,21 +1,21 @@
-{{- if .Values.agents.enabled }}
+{{- if .Values.agent.enabled }}
 apiVersion: v1
 kind: Service
 metadata:
-  name: {{ include "h2ogpt.fullname" . }}-agents
+  name: {{ include "h2ogpt.fullname" . }}-agent
   namespace: {{ include "h2ogpt.namespace" . | quote }}
 
-  {{- with .Values.agents.service.annotations }}
+  {{- with .Values.agent.service.annotations }}
   annotations:
     {{- toYaml . | nindent 4 }}
   {{- end }}
 spec:
   selector:
-    app: {{ include "h2ogpt.fullname" . }}-agents
+    app: {{ include "h2ogpt.fullname" . }}-agent
   ports:
     - name: agent
       protocol: TCP
-      port: {{ .Values.agents.service.agentsPort }}
+      port: {{ .Values.agent.service.agentPort }}
       targetPort: 5004
-  type: {{ .Values.agents.service.type }}
+  type: {{ .Values.agent.service.type }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
index 6c6f5b74e..044d9eeae 100644
--- a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
+++ b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.global.externalLLM.enabled (or .Values.agents.enabled .Values.h2ogpt.enabled) }}
+{{- if and .Values.global.externalLLM.enabled (or .Values.agent.enabled .Values.h2ogpt.enabled) }}
 apiVersion: v1
 kind: Secret
 metadata:
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
index bac71f22d..4d1f74a70 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml
@@ -104,7 +104,7 @@ spec:
             - name: function
               containerPort: 5002
               protocol: TCP
-          {{- if .Values.h2ogpt.agents.enabled }}
+          {{- if .Values.h2ogpt.agent.enabled }}
             - name: agent
               containerPort: 5004
               protocol: TCP
diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
index 747aed223..7e9f13bb9 100644
--- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
+++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml
@@ -27,10 +27,10 @@ spec:
       protocol: TCP
       port: {{ .Values.h2ogpt.service.functionPort }}
       targetPort: 5002
-  {{- if .Values.h2ogpt.agents.enabled }}
+  {{- if .Values.h2ogpt.agent.enabled }}
     - name: agent
       protocol: TCP
-      port: {{ .Values.h2ogpt.service.agentsPort }}
+      port: {{ .Values.h2ogpt.service.agentPort }}
       targetPort: 5004
   {{- end }}
   type: {{ .Values.h2ogpt.service.type }}
diff --git a/helm/h2ogpt-chart/templates/validators.yaml b/helm/h2ogpt-chart/templates/validators.yaml
index b97d33e5c..49fb1532b 100644
--- a/helm/h2ogpt-chart/templates/validators.yaml
+++ b/helm/h2ogpt-chart/templates/validators.yaml
@@ -1,3 +1,3 @@
-{{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }}
-  {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }}
+{{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agent.enabled) .Values.agent.enabled }}
+  {{- fail " Both agent and h2ogpt.agent cannot be enabled. Enably only one and try again" }}
 {{- end }}
diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml
index 6d8183e36..7b7644dca 100644
--- a/helm/h2ogpt-chart/values.yaml
+++ b/helm/h2ogpt-chart/values.yaml
@@ -5,7 +5,7 @@ namespaceOverride: ""
 global:
   externalLLM:
     enabled: false
-    # -- list of secrets for h2ogpt and agents env
+    # -- list of secrets for h2ogpt and agent env
     secret: {}
 #      OPENAI_AZURE_KEY: "value"
 #      OPENAI_AZURE_API_BASE: "value"
@@ -24,10 +24,10 @@ global:
 h2ogpt:
   # -- Enable h2oGPT
   enabled: true
-  # -- Enable agents
-  agents:
-    # -- Run agents with h2oGPT container
-    enabled: true
+  # -- Enable agent
+  agent:
+    # -- Run agent with h2oGPT container
+    enabled: false
     agent_workers: 5
   openai:
     enabled: true
@@ -96,7 +96,7 @@ h2ogpt:
     webPort: 80
     openaiPort: 5000
     functionPort: 5002
-    agentsPort: 5004
+    agentPort: 5004
     webServiceAnnotations: {}
 
   updateStrategy:
@@ -134,12 +134,12 @@ h2ogpt:
   podAnnotations: {}
   podLabels: {}
 
-agents:
-  # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true`
-  enabled: false
+agent:
+  # -- Enable agent, this must be `false` if `h2ogpt.agent.enabled` is `true`
+  enabled: true
   agent_workers: 5
   autoscaling:
-    # Enable autoscaling (HPA) for agents
+    # Enable autoscaling (HPA) for agent
     enabled: false
     minReplicas: 1
     maxReplicas: 2
@@ -205,7 +205,7 @@ agents:
 
   service:
     type: NodePort
-    agentsPort: 5004
+    agentPort: 5004
     annotations: {}
 
   updateStrategy:
@@ -233,9 +233,9 @@ agents:
     limits:
       memory: 64Gi
       nvidia.com/gpu: 1
-  # -- Node taints to tolerate by the agents pods.
+  # -- Node taints to tolerate by the agent pods.
   tolerations: []
-  # -- Node selector for the agents pods.
+  # -- Node selector for the agent pods.
   nodeSelector: {}
 
   env: {}