From 92198f78d58ab15e5b998cf8b700cfe58a750f14 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 02:52:51 +0530 Subject: [PATCH 01/34] Split templates --- .../templates/ca-certs-configmap.yaml | 12 + helm/h2ogpt-chart/templates/config-map.yaml | 69 -- helm/h2ogpt-chart/templates/deployment.yaml | 884 ------------------ .../templates/h2ogpt-configmap.yaml | 13 + .../templates/h2ogpt-deployment.yaml | 373 ++++++++ .../templates/h2ogpt-service.yaml | 49 + .../templates/lmdeploy-configmap.yaml | 13 + .../templates/lmdeploy-deployment.yaml | 163 ++++ .../templates/lmdeploy-service.yaml | 15 + helm/h2ogpt-chart/templates/service.yaml | 97 -- .../templates/tgi-configmap.yamal | 13 + .../templates/tgi-deployment.yaml | 175 ++++ helm/h2ogpt-chart/templates/tgi-service.yaml | 15 + .../templates/vllm-configmap.yaml | 13 + .../templates/vllm-deployment.yaml | 167 ++++ helm/h2ogpt-chart/templates/vllm-service.yaml | 15 + 16 files changed, 1036 insertions(+), 1050 deletions(-) create mode 100644 helm/h2ogpt-chart/templates/ca-certs-configmap.yaml delete mode 100644 helm/h2ogpt-chart/templates/config-map.yaml delete mode 100644 helm/h2ogpt-chart/templates/deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-service.yaml create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-service.yaml delete mode 100644 helm/h2ogpt-chart/templates/service.yaml create mode 100644 helm/h2ogpt-chart/templates/tgi-configmap.yamal create mode 100644 helm/h2ogpt-chart/templates/tgi-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/tgi-service.yaml create mode 100644 helm/h2ogpt-chart/templates/vllm-configmap.yaml create mode 100644 helm/h2ogpt-chart/templates/vllm-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/vllm-service.yaml diff --git a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml new file mode 100644 index 000000000..a2580b771 --- /dev/null +++ b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml @@ -0,0 +1,12 @@ +{{- if .Values.caCertificates}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-ca-certificates + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: + root-ca-bundle.crt: | + {{ .Values.caCertificates | nindent 4 | trim }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/config-map.yaml b/helm/h2ogpt-chart/templates/config-map.yaml deleted file mode 100644 index 64aca5503..000000000 --- a/helm/h2ogpt-chart/templates/config-map.yaml +++ /dev/null @@ -1,69 +0,0 @@ - -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.h2ogpt.overrideConfig }} - {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.tgi.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.tgi.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.vllm.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.vllm.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.lmdeploy.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.lmdeploy.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.caCertificates}} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-ca-certificates - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: - root-ca-bundle.crt: | - {{ .Values.caCertificates | nindent 4 | trim }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/deployment.yaml b/helm/h2ogpt-chart/templates/deployment.yaml deleted file mode 100644 index d89d8a3cb..000000000 --- a/helm/h2ogpt-chart/templates/deployment.yaml +++ /dev/null @@ -1,884 +0,0 @@ -{{- if and .Values.vllm.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} - {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if .Values.h2ogpt.stack.enabled }} - {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} - {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} - {{- end }} -{{- end }} ---- -{{- if .Values.h2ogpt.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }} - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }} -spec: - {{- if not .Values.h2ogpt.autoscaling.enabled }} - replicas: {{ .Values.h2ogpt.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }} - {{- if .Values.h2ogpt.updateStrategy }} - strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.h2ogpt.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }} - {{- with .Values.h2ogpt.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.h2ogpt.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.h2ogpt.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.h2ogpt.podAffinity }} - podAntiAffinity: - {{- if .Values.h2ogpt.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.h2ogpt.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.h2ogpt.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.h2ogpt.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - securityContext: - {{- toYaml .Values.vllm.securityContext | nindent 12 }} - image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} - command: ["python3"] - args: - - "-m" - - "vllm.entrypoints.openai.api_server" - - "--port" - - "5000" - - "--host" - - "0.0.0.0" - - "--download-dir" - - "/workspace/.cache/huggingface/hub" -{{- range $arg := .Values.vllm.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 5000 - protocol: TCP - {{- if .Values.vllm.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.vllm.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.vllm.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - {{- range $key, $value := .Values.vllm.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /dev/shm - subPath: shm - {{- end }} - - name: {{ include "h2ogpt.fullname" . }} - securityContext: - {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} - image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }} - command: ["/bin/bash", "-c"] - {{- if .Values.h2ogpt.stack.enabled }} - args: - - > - while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' - http://localhost:5000/v1/models)" != "200" ]]; do - echo "Waiting for inference service to become ready... (2sec)" - sleep 2 - done - - python3 /workspace/generate.py - {{- end }} - {{- if not .Values.h2ogpt.stack.enabled }} - {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }} - args: - - > - python3 /workspace/generate.py - {{- end }} - {{- end }} - ports: - - name: http - containerPort: 7860 - protocol: TCP - - name: gpt - containerPort: 8888 - protocol: TCP - - name: openai - containerPort: 5000 - protocol: TCP - - name: function - containerPort: 5002 - protocol: TCP - - name: agent - containerPort: 5004 - protocol: TCP - {{- if .Values.h2ogpt.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.h2ogpt.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.h2ogpt.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-config - env: - {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" - {{- end }} - {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" - {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" - {{- end }} - {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled) }} - - name: h2ogpt_inference_server - value: "vllm:localhost:5000" - {{- end }} - {{- range $key, $value := .Values.h2ogpt.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: OPENAI_AZURE_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_AZURE_KEY - - name: OPENAI_AZURE_API_BASE - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_AZURE_API_BASE - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: OPENAI_API_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_API_KEY - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: REPLICATE_API_TOKEN - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: REPLICATE_API_TOKEN - {{- end }} - {{- if .Values.h2ogpt.externalLLM.enabled }} - - name: H2OGPT_MODEL_LOCK - value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }} - - name: H2OGPT_SCORE_MODEL - value: None - {{- end }} - {{- if .Values.h2ogpt.visionModels.enabled }} - - name: H2OGPT_VISIBLE_VISION_MODELS - value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }} - - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE - value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }} - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-volume - mountPath: /workspace/.cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-volume - mountPath: /workspace/save - subPath: save - {{- if .Values.caCertificates }} - - name: ca-certificates - mountPath: /etc/ssl/certs/root-ca-bundle.crt - subPath: root-ca-bundle.crt - {{- end }} - {{ with .Values.h2ogpt.extraVolumeMounts }} - {{- toYaml . | nindent 12 }} - {{- end }} - volumes: - - name: {{ include "h2ogpt.fullname" . }}-volume - {{- if not .Values.h2ogpt.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-volume - {{- else}} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.h2ogpt.storage.size | quote }} - storageClassName: {{ .Values.h2ogpt.storage.class }} - {{- end }} - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} - {{- if .Values.caCertificates }} - - name: ca-certificates - configMap: - name: {{ include "h2ogpt.fullname" . }}-ca-certificates - {{- end }} - {{- with .Values.h2ogpt.extraVolumes }} - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} ---- -{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.h2ogpt.storage.class | quote }} - storageClassName: {{ .Values.h2ogpt.storage.class }} - resources: - requests: - storage: {{ .Values.h2ogpt.storage.size | quote }} -{{- end }} - ---- -{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference -spec: - {{- if not .Values.tgi.autoscaling.enabled }} - replicas: {{ .Values.tgi.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- if .Values.tgi.updateStrategy }} - strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.tgi.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- with .Values.tgi.podLabels }} - {{ toYaml . | nindent 6 }} - {{- end }} - spec: - {{- with .Values.tgi.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.tgi.podAffinity }} - podAntiAffinity: - {{- if .Values.tgi.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.tgi.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.tgi.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - securityContext: - {{- toYaml .Values.tgi.securityContext | nindent 12 }} - image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}" - imagePullPolicy: {{ .Values.tgi.image.pullPolicy }} - command: [] - args: -{{- range $arg := .Values.tgi.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 80 - protocol: TCP - {{- if .Values.tgi.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.tgi.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.tgi.resources | nindent 12 }} - env: - {{- range $key, $value := .Values.tgi.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - - secretRef: - name: {{ .Values.tgi.hfSecret }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /app/cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /data - subPath: data - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /dev/shm - subPath: shm - volumes: - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- if not .Values.tgi.storage.useEphemeral}} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- else}} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} - storageClassName: {{ .Values.tgi.storage.class }} - {{- end }} -{{- end }} ---- -{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.h2ogpt.storage.class | quote }} - storageClassName: {{ .Values.tgi.storage.class }} - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference -spec: - {{- if not .Values.vllm.autoscaling.enabled }} - replicas: {{ .Values.vllm.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - {{- if .Values.vllm.updateStrategy }} - strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.vllm.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - {{- with .Values.vllm.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.vllm.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.vllm.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.vllm.podAffinity }} - podAntiAffinity: - {{- if .Values.vllm.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.vllm.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.vllm.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.vllm.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - securityContext: - {{- toYaml .Values.vllm.securityContext | nindent 12 }} - image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} - command: ["python3"] - args: - - "-m" - - "vllm.entrypoints.openai.api_server" - - "--port" - - "5000" - - "--host" - - "0.0.0.0" - - "--download-dir" - - "/workspace/.cache/huggingface/hub" -{{- range $arg := .Values.vllm.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 5000 - protocol: TCP - {{- if .Values.vllm.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.vllm.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.vllm.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - {{- range $key, $value := .Values.vllm.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: shm - mountPath: /dev/shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - - emptyDir: - medium: Memory - sizeLimit: 10.24Gi - name: shm -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.vllm.storage.class | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference -spec: - {{- if not .Values.lmdeploy.autoscaling.enabled }} - replicas: {{ .Values.lmdeploy.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - {{- if .Values.lmdeploy.updateStrategy }} - strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.lmdeploy.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - {{- with .Values.lmdeploy.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.lmdeploy.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.lmdeploy.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.lmdeploy.podAffinity }} - podAntiAffinity: - {{- if .Values.lmdeploy.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.lmdeploy.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.lmdeploy.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.lmdeploy.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - securityContext: - {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }} - image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }} - command: ["lmdeploy"] - args: - - "serve" - - "api_server" -{{- range $arg := .Values.lmdeploy.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 23333 - protocol: TCP - {{- if .Values.lmdeploy.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.lmdeploy.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.lmdeploy.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - - name: HF_HOME - value: "/workspace/.cache" - {{- range $key, $value := .Values.lmdeploy.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: shm - mountPath: /dev/shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - {{- if not .Values.lmdeploy.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - {{- end }} - - emptyDir: - medium: Memory - sizeLimit: 10.24Gi - name: shm -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.lmdeploy.storage.class | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml new file mode 100644 index 000000000..03cb57751 --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.h2ogpt.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.h2ogpt.overrideConfig }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml new file mode 100644 index 000000000..d240a7b6e --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -0,0 +1,373 @@ +{{- if and .Values.vllm.enabled .Values.tgi.enabled }} + {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} + {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} + {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if .Values.h2ogpt.stack.enabled }} + {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} + {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} + {{- end }} +{{- end }} + +{{- if .Values.h2ogpt.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }} + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }} +spec: + {{- if not .Values.h2ogpt.autoscaling.enabled }} + replicas: {{ .Values.h2ogpt.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }} + {{- if .Values.h2ogpt.updateStrategy }} + strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.h2ogpt.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }} + {{- with .Values.h2ogpt.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.h2ogpt.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.h2ogpt.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.h2ogpt.podAffinity }} + podAntiAffinity: + {{- if .Values.h2ogpt.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.h2ogpt.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.h2ogpt.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.h2ogpt.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + {{- if .Values.h2ogpt.stack.enabled }} + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference + securityContext: + {{- toYaml .Values.vllm.securityContext | nindent 12 }} + image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} + command: ["python3"] + args: + - "-m" + - "vllm.entrypoints.openai.api_server" + - "--port" + - "5000" + - "--host" + - "0.0.0.0" + - "--download-dir" + - "/workspace/.cache/huggingface/hub" +{{- range $arg := .Values.vllm.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 5000 + protocol: TCP + {{- if .Values.vllm.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.vllm.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.vllm.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + env: + - name: NCCL_IGNORE_DISABLED_P2P + value: "1" + {{- range $key, $value := .Values.vllm.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /workspace/.cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /dev/shm + subPath: shm + {{- end }} + - name: {{ include "h2ogpt.fullname" . }} + securityContext: + {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} + image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }} + command: ["/bin/bash", "-c"] + {{- if .Values.h2ogpt.stack.enabled }} + args: + - > + while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' + http://localhost:5000/v1/models)" != "200" ]]; do + echo "Waiting for inference service to become ready... (2sec)" + sleep 2 + done + + python3 /workspace/generate.py + {{- end }} + {{- if not .Values.h2ogpt.stack.enabled }} + {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }} + args: + - > + python3 /workspace/generate.py + {{- end }} + {{- end }} + ports: + - name: http + containerPort: 7860 + protocol: TCP + - name: gpt + containerPort: 8888 + protocol: TCP + - name: openai + containerPort: 5000 + protocol: TCP + - name: function + containerPort: 5002 + protocol: TCP + - name: agent + containerPort: 5004 + protocol: TCP + {{- if .Values.h2ogpt.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.h2ogpt.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.h2ogpt.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-config + env: + {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" + {{- end }} + {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" + {{- end }} + {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" + {{- end }} + {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled) }} + - name: h2ogpt_inference_server + value: "vllm:localhost:5000" + {{- end }} + {{- range $key, $value := .Values.h2ogpt.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }} + - name: OPENAI_AZURE_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.h2ogpt.externalLLM.secret }} + key: OPENAI_AZURE_KEY + - name: OPENAI_AZURE_API_BASE + valueFrom: + secretKeyRef: + name: {{ .Values.h2ogpt.externalLLM.secret }} + key: OPENAI_AZURE_API_BASE + {{- end }} + {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }} + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.h2ogpt.externalLLM.secret }} + key: OPENAI_API_KEY + {{- end }} + {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }} + - name: REPLICATE_API_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.h2ogpt.externalLLM.secret }} + key: REPLICATE_API_TOKEN + {{- end }} + {{- if .Values.h2ogpt.externalLLM.enabled }} + - name: H2OGPT_MODEL_LOCK + value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }} + - name: H2OGPT_SCORE_MODEL + value: None + {{- end }} + {{- if .Values.h2ogpt.visionModels.enabled }} + - name: H2OGPT_VISIBLE_VISION_MODELS + value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }} + - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE + value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }} + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-volume + mountPath: /workspace/.cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-volume + mountPath: /workspace/save + subPath: save + {{- if .Values.caCertificates }} + - name: ca-certificates + mountPath: /etc/ssl/certs/root-ca-bundle.crt + subPath: root-ca-bundle.crt + {{- end }} + {{ with .Values.h2ogpt.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: {{ include "h2ogpt.fullname" . }}-volume + {{- if not .Values.h2ogpt.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-volume + {{- else}} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.h2ogpt.storage.size | quote }} + storageClassName: {{ .Values.h2ogpt.storage.class }} + {{- end }} + {{- if .Values.h2ogpt.stack.enabled }} + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- if not .Values.vllm.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + {{- end }} + {{- end }} + {{- if .Values.caCertificates }} + - name: ca-certificates + configMap: + name: {{ include "h2ogpt.fullname" . }}-ca-certificates + {{- end }} + {{- with .Values.h2ogpt.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} +--- +{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.h2ogpt.storage.class }} + resources: + requests: + storage: {{ .Values.h2ogpt.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml new file mode 100644 index 000000000..16417b7ff --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml @@ -0,0 +1,49 @@ +{{- if .Values.h2ogpt.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-web + namespace: {{ include "h2ogpt.namespace" . | quote }} + + {{- with .Values.h2ogpt.service.webServiceAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }} + ports: + - name: http + protocol: TCP + port: {{ .Values.h2ogpt.service.webPort }} + targetPort: 7860 + - name: openai + protocol: TCP + port: {{ .Values.h2ogpt.service.openaiPort }} + targetPort: 5000 + - name: function + protocol: TCP + port: {{ .Values.h2ogpt.service.functionPort }} + targetPort: 5002 + - name: agent + protocol: TCP + port: {{ .Values.h2ogpt.service.agentsPort }} + targetPort: 5004 + type: {{ .Values.h2ogpt.service.type }} +{{- end }} +--- +{{- if .Values.h2ogpt.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }} + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }} + ports: + - protocol: TCP + port: {{ .Values.h2ogpt.service.gptPort }} + targetPort: 8888 + type: {{ .Values.h2ogpt.service.type }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml new file mode 100644 index 000000000..7d041e79f --- /dev/null +++ b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.lmdeploy.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.lmdeploy.overrideConfig }} + {{ printf "%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml new file mode 100644 index 000000000..bb3240924 --- /dev/null +++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml @@ -0,0 +1,163 @@ +{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference +spec: + {{- if not .Values.lmdeploy.autoscaling.enabled }} + replicas: {{ .Values.lmdeploy.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + {{- if .Values.lmdeploy.updateStrategy }} + strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.lmdeploy.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + {{- with .Values.lmdeploy.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.lmdeploy.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.lmdeploy.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.lmdeploy.podAffinity }} + podAntiAffinity: + {{- if .Values.lmdeploy.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.lmdeploy.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.lmdeploy.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.lmdeploy.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + securityContext: + {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }} + image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }} + command: ["lmdeploy"] + args: + - "serve" + - "api_server" +{{- range $arg := .Values.lmdeploy.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 23333 + protocol: TCP + {{- if .Values.lmdeploy.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.lmdeploy.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.lmdeploy.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config + env: + - name: NCCL_IGNORE_DISABLED_P2P + value: "1" + - name: HF_HOME + value: "/workspace/.cache" + {{- range $key, $value := .Values.lmdeploy.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + mountPath: /workspace/.cache + subPath: cache + - name: shm + mountPath: /dev/shm + volumes: + - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + {{- if not .Values.lmdeploy.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.lmdeploy.storage.size | quote }} + storageClassName: {{ .Values.lmdeploy.storage.class }} + {{- end }} + - emptyDir: + medium: Memory + sizeLimit: 10.24Gi + name: shm +{{- end }} +--- +{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + # storageClassName: {{ .Values.lmdeploy.storage.class | quote }} + storageClassName: {{ .Values.lmdeploy.storage.class }} + resources: + requests: + storage: {{ .Values.lmdeploy.storage.size | quote }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml new file mode 100644 index 000000000..e1dfdc4d3 --- /dev/null +++ b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml @@ -0,0 +1,15 @@ +{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + ports: + - protocol: TCP + port: {{ .Values.lmdeploy.service.port }} + targetPort: 23333 + type: {{ .Values.lmdeploy.service.type }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/service.yaml b/helm/h2ogpt-chart/templates/service.yaml deleted file mode 100644 index 8d3ddb73d..000000000 --- a/helm/h2ogpt-chart/templates/service.yaml +++ /dev/null @@ -1,97 +0,0 @@ -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-web - namespace: {{ include "h2ogpt.namespace" . | quote }} - - {{- with .Values.h2ogpt.service.webServiceAnnotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }} - ports: - - name: http - protocol: TCP - port: {{ .Values.h2ogpt.service.webPort }} - targetPort: 7860 - - name: openai - protocol: TCP - port: {{ .Values.h2ogpt.service.openaiPort }} - targetPort: 5000 - - name: function - protocol: TCP - port: {{ .Values.h2ogpt.service.functionPort }} - targetPort: 5002 - - name: agent - protocol: TCP - port: {{ .Values.h2ogpt.service.agentsPort }} - targetPort: 5004 - type: {{ .Values.h2ogpt.service.type }} -{{- end }} ---- -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }} - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }} - ports: - - protocol: TCP - port: {{ .Values.h2ogpt.service.gptPort }} - targetPort: 8888 - type: {{ .Values.h2ogpt.service.type }} -{{- end }} ---- -{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - ports: - - protocol: TCP - port: {{ .Values.tgi.service.port }} - targetPort: 80 - type: {{ .Values.tgi.service.type }} -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - ports: - - protocol: TCP - port: {{ .Values.vllm.service.port }} - targetPort: 5000 - type: {{ .Values.vllm.service.type }} -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - ports: - - protocol: TCP - port: {{ .Values.lmdeploy.service.port }} - targetPort: 23333 - type: {{ .Values.lmdeploy.service.type }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yamal b/helm/h2ogpt-chart/templates/tgi-configmap.yamal new file mode 100644 index 000000000..3857b92c8 --- /dev/null +++ b/helm/h2ogpt-chart/templates/tgi-configmap.yamal @@ -0,0 +1,13 @@ +{{- if .Values.tgi.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.tgi.overrideConfig }} + {{ printf "%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml new file mode 100644 index 000000000..400ac6eb6 --- /dev/null +++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml @@ -0,0 +1,175 @@ +{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-tgi-inference +spec: + {{- if not .Values.tgi.autoscaling.enabled }} + replicas: {{ .Values.tgi.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-tgi-inference + {{- if .Values.tgi.updateStrategy }} + strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.tgi.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-tgi-inference + {{- with .Values.tgi.podLabels }} + {{ toYaml . | nindent 6 }} + {{- end }} + spec: + {{- with .Values.tgi.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tgi.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.tgi.podAffinity }} + podAntiAffinity: + {{- if .Values.tgi.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.tgi.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.tgi.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tgi.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference + securityContext: + {{- toYaml .Values.tgi.securityContext | nindent 12 }} + image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}" + imagePullPolicy: {{ .Values.tgi.image.pullPolicy }} + command: [] + args: +{{- range $arg := .Values.tgi.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 80 + protocol: TCP + {{- if .Values.tgi.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.tgi.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.tgi.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.tgi.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.tgi.resources | nindent 12 }} + env: + {{- range $key, $value := .Values.tgi.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config + - secretRef: + name: {{ .Values.tgi.hfSecret }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + mountPath: /app/cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + mountPath: /data + subPath: data + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + mountPath: /dev/shm + subPath: shm + volumes: + {{- if .Values.h2ogpt.stack.enabled }} + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- if not .Values.vllm.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + {{- end }} + {{- end }} + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + {{- if not .Values.tgi.storage.useEphemeral}} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + {{- else}} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.tgi.storage.size | quote }} + storageClassName: {{ .Values.tgi.storage.class }} + {{- end }} +{{- end }} +--- +{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.tgi.storage.class }} + resources: + requests: + storage: {{ .Values.tgi.storage.size | quote }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml new file mode 100644 index 000000000..63b04b36d --- /dev/null +++ b/helm/h2ogpt-chart/templates/tgi-service.yaml @@ -0,0 +1,15 @@ +{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-tgi-inference + ports: + - protocol: TCP + port: {{ .Values.tgi.service.port }} + targetPort: 80 + type: {{ .Values.tgi.service.type }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/vllm-configmap.yaml b/helm/h2ogpt-chart/templates/vllm-configmap.yaml new file mode 100644 index 000000000..66c187b3c --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.vllm.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.vllm.overrideConfig }} + {{ printf "%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml new file mode 100644 index 000000000..e0228500b --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml @@ -0,0 +1,167 @@ +{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference +spec: + {{- if not .Values.vllm.autoscaling.enabled }} + replicas: {{ .Values.vllm.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + {{- if .Values.vllm.updateStrategy }} + strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.vllm.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + {{- with .Values.vllm.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.vllm.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.vllm.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.vllm.podAffinity }} + podAntiAffinity: + {{- if .Values.vllm.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.vllm.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.vllm.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.vllm.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference + securityContext: + {{- toYaml .Values.vllm.securityContext | nindent 12 }} + image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} + command: ["python3"] + args: + - "-m" + - "vllm.entrypoints.openai.api_server" + - "--port" + - "5000" + - "--host" + - "0.0.0.0" + - "--download-dir" + - "/workspace/.cache/huggingface/hub" +{{- range $arg := .Values.vllm.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 5000 + protocol: TCP + {{- if .Values.vllm.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.vllm.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.vllm.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + env: + - name: NCCL_IGNORE_DISABLED_P2P + value: "1" + {{- range $key, $value := .Values.vllm.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /workspace/.cache + subPath: cache + - name: shm + mountPath: /dev/shm + volumes: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- if not .Values.vllm.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + {{- end }} + - emptyDir: + medium: Memory + sizeLimit: 10.24Gi + name: shm +{{- end }} +--- +{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + # storageClassName: {{ .Values.vllm.storage.class | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml new file mode 100644 index 000000000..34678c2c5 --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-service.yaml @@ -0,0 +1,15 @@ +{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + ports: + - protocol: TCP + port: {{ .Values.vllm.service.port }} + targetPort: 5000 + type: {{ .Values.vllm.service.type }} +{{- end }} \ No newline at end of file From 722b891c5e3e755670e9f0535bd93e43273042ed Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 03:01:09 +0530 Subject: [PATCH 02/34] Fix file name --- .../templates/{tgi-configmap.yamal => tgi-configmap.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename helm/h2ogpt-chart/templates/{tgi-configmap.yamal => tgi-configmap.yaml} (100%) diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yamal b/helm/h2ogpt-chart/templates/tgi-configmap.yaml similarity index 100% rename from helm/h2ogpt-chart/templates/tgi-configmap.yamal rename to helm/h2ogpt-chart/templates/tgi-configmap.yaml From b1d4b3c00e4f8bbcdc531c0fff0b891bbb13d93c Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 03:08:35 +0530 Subject: [PATCH 03/34] Move validations to validations.yaml --- .../h2ogpt-chart/templates/h2ogpt-deployment.yaml | 15 --------------- helm/h2ogpt-chart/templates/validations.yaml | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 15 deletions(-) create mode 100644 helm/h2ogpt-chart/templates/validations.yaml diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index d240a7b6e..1ba47e84a 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -1,18 +1,3 @@ -{{- if and .Values.vllm.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} - {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if .Values.h2ogpt.stack.enabled }} - {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} - {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} - {{- end }} -{{- end }} - {{- if .Values.h2ogpt.enabled }} apiVersion: apps/v1 kind: Deployment diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml new file mode 100644 index 000000000..6e9936d83 --- /dev/null +++ b/helm/h2ogpt-chart/templates/validations.yaml @@ -0,0 +1,14 @@ +{{- if and .Values.vllm.enabled .Values.tgi.enabled }} + {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} + {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} + {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if .Values.h2ogpt.stack.enabled }} + {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} + {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} + {{- end }} +{{- end }} \ No newline at end of file From 5d97a47b70f94d663fdedfe14fea5727c02f663f Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 03:12:33 +0530 Subject: [PATCH 04/34] Add NOTES.txt --- helm/h2ogpt-chart/templates/NOTES.txt | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 helm/h2ogpt-chart/templates/NOTES.txt diff --git a/helm/h2ogpt-chart/templates/NOTES.txt b/helm/h2ogpt-chart/templates/NOTES.txt new file mode 100644 index 000000000..c32a7790f --- /dev/null +++ b/helm/h2ogpt-chart/templates/NOTES.txt @@ -0,0 +1,8 @@ +Thank you for installing {{ .Chart.Name }}. + +Your release is named {{ .Release.Name }}. + +To learn more about the release, try: + + $ helm status {{ .Release.Name }} + $ helm get all {{ .Release.Name }} \ No newline at end of file From 7d6c0077b058c95965b41015ab5ca6f92f6c6c51 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 03:42:48 +0530 Subject: [PATCH 05/34] Update documentation in values.yaml ( for `helm-docs` ) --- helm/h2ogpt-chart/values.yaml | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index b0e599bf4..226038aa2 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -3,6 +3,7 @@ fullnameOverride: "" namespaceOverride: "" h2ogpt: + # -- Enable h2oGPT enabled: true stack: # -- Run h2oGPT and vLLM on same pod. @@ -18,12 +19,12 @@ h2ogpt: tag: pullPolicy: - # extra volumes, for more certs, mount under /etc/ssl/more-certs + # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs extraVolumes: [] + # -- Extra volume mounts extraVolumeMounts: [] - - podAffinity: # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. + podAffinity: # hostname: # zone: @@ -48,9 +49,9 @@ h2ogpt: enabled: false visionModels: + # -- Enable vision models enabled: false - # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model - # -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] + # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] visibleModels: [] rotateAlignResizeImage: false @@ -136,6 +137,7 @@ h2ogpt: autoscaling: {} tgi: + # -- Enable tgi enabled: false replicaCount: 1 @@ -143,9 +145,8 @@ tgi: repository: ghcr.io/huggingface/text-generation-inference tag: 0.9.3 pullPolicy: IfNotPresent - + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. # hostname: # zone: @@ -179,6 +180,7 @@ tgi: autoscaling: {} vllm: + # -- Enable vllm enabled: false replicaCount: 1 @@ -186,9 +188,9 @@ vllm: repository: vllm/vllm-openai tag: latest pullPolicy: IfNotPresent - + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. + # hostname: # zone: @@ -248,6 +250,7 @@ vllm: autoscaling: {} lmdeploy: + # -- Enable lmdeploy enabled: false replicaCount: 1 @@ -255,9 +258,8 @@ lmdeploy: repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy tag: pullPolicy: IfNotPresent - + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. # hostname: # zone: From 41d1f2bb672c8da30983ff66b1da778ad93e3ff3 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 02:52:51 +0530 Subject: [PATCH 06/34] Split templates --- .../templates/ca-certs-configmap.yaml | 12 + helm/h2ogpt-chart/templates/config-map.yaml | 69 -- helm/h2ogpt-chart/templates/deployment.yaml | 884 ------------------ .../templates/h2ogpt-configmap.yaml | 13 + .../templates/h2ogpt-deployment.yaml | 373 ++++++++ .../templates/h2ogpt-service.yaml | 49 + .../templates/lmdeploy-configmap.yaml | 13 + .../templates/lmdeploy-deployment.yaml | 163 ++++ .../templates/lmdeploy-service.yaml | 15 + helm/h2ogpt-chart/templates/service.yaml | 97 -- .../templates/tgi-configmap.yamal | 13 + .../templates/tgi-deployment.yaml | 175 ++++ helm/h2ogpt-chart/templates/tgi-service.yaml | 15 + .../templates/vllm-configmap.yaml | 13 + .../templates/vllm-deployment.yaml | 167 ++++ helm/h2ogpt-chart/templates/vllm-service.yaml | 15 + 16 files changed, 1036 insertions(+), 1050 deletions(-) create mode 100644 helm/h2ogpt-chart/templates/ca-certs-configmap.yaml delete mode 100644 helm/h2ogpt-chart/templates/config-map.yaml delete mode 100644 helm/h2ogpt-chart/templates/deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-service.yaml create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-service.yaml delete mode 100644 helm/h2ogpt-chart/templates/service.yaml create mode 100644 helm/h2ogpt-chart/templates/tgi-configmap.yamal create mode 100644 helm/h2ogpt-chart/templates/tgi-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/tgi-service.yaml create mode 100644 helm/h2ogpt-chart/templates/vllm-configmap.yaml create mode 100644 helm/h2ogpt-chart/templates/vllm-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/vllm-service.yaml diff --git a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml new file mode 100644 index 000000000..a2580b771 --- /dev/null +++ b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml @@ -0,0 +1,12 @@ +{{- if .Values.caCertificates}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-ca-certificates + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: + root-ca-bundle.crt: | + {{ .Values.caCertificates | nindent 4 | trim }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/config-map.yaml b/helm/h2ogpt-chart/templates/config-map.yaml deleted file mode 100644 index 64aca5503..000000000 --- a/helm/h2ogpt-chart/templates/config-map.yaml +++ /dev/null @@ -1,69 +0,0 @@ - -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.h2ogpt.overrideConfig }} - {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.tgi.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.tgi.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.vllm.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.vllm.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.lmdeploy.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.lmdeploy.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} ---- -{{- if .Values.caCertificates}} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-ca-certificates - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: - root-ca-bundle.crt: | - {{ .Values.caCertificates | nindent 4 | trim }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/deployment.yaml b/helm/h2ogpt-chart/templates/deployment.yaml deleted file mode 100644 index d89d8a3cb..000000000 --- a/helm/h2ogpt-chart/templates/deployment.yaml +++ /dev/null @@ -1,884 +0,0 @@ -{{- if and .Values.vllm.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} - {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if .Values.h2ogpt.stack.enabled }} - {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} - {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} - {{- end }} -{{- end }} ---- -{{- if .Values.h2ogpt.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }} - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }} -spec: - {{- if not .Values.h2ogpt.autoscaling.enabled }} - replicas: {{ .Values.h2ogpt.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }} - {{- if .Values.h2ogpt.updateStrategy }} - strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.h2ogpt.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }} - {{- with .Values.h2ogpt.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.h2ogpt.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.h2ogpt.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.h2ogpt.podAffinity }} - podAntiAffinity: - {{- if .Values.h2ogpt.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.h2ogpt.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.h2ogpt.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.h2ogpt.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - securityContext: - {{- toYaml .Values.vllm.securityContext | nindent 12 }} - image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} - command: ["python3"] - args: - - "-m" - - "vllm.entrypoints.openai.api_server" - - "--port" - - "5000" - - "--host" - - "0.0.0.0" - - "--download-dir" - - "/workspace/.cache/huggingface/hub" -{{- range $arg := .Values.vllm.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 5000 - protocol: TCP - {{- if .Values.vllm.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.vllm.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.vllm.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - {{- range $key, $value := .Values.vllm.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /dev/shm - subPath: shm - {{- end }} - - name: {{ include "h2ogpt.fullname" . }} - securityContext: - {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} - image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }} - command: ["/bin/bash", "-c"] - {{- if .Values.h2ogpt.stack.enabled }} - args: - - > - while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' - http://localhost:5000/v1/models)" != "200" ]]; do - echo "Waiting for inference service to become ready... (2sec)" - sleep 2 - done - - python3 /workspace/generate.py - {{- end }} - {{- if not .Values.h2ogpt.stack.enabled }} - {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }} - args: - - > - python3 /workspace/generate.py - {{- end }} - {{- end }} - ports: - - name: http - containerPort: 7860 - protocol: TCP - - name: gpt - containerPort: 8888 - protocol: TCP - - name: openai - containerPort: 5000 - protocol: TCP - - name: function - containerPort: 5002 - protocol: TCP - - name: agent - containerPort: 5004 - protocol: TCP - {{- if .Values.h2ogpt.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.h2ogpt.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.h2ogpt.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-config - env: - {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" - {{- end }} - {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" - {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" - {{- end }} - {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled) }} - - name: h2ogpt_inference_server - value: "vllm:localhost:5000" - {{- end }} - {{- range $key, $value := .Values.h2ogpt.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: OPENAI_AZURE_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_AZURE_KEY - - name: OPENAI_AZURE_API_BASE - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_AZURE_API_BASE - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: OPENAI_API_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: OPENAI_API_KEY - {{- end }} - {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }} - - name: REPLICATE_API_TOKEN - valueFrom: - secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} - key: REPLICATE_API_TOKEN - {{- end }} - {{- if .Values.h2ogpt.externalLLM.enabled }} - - name: H2OGPT_MODEL_LOCK - value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }} - - name: H2OGPT_SCORE_MODEL - value: None - {{- end }} - {{- if .Values.h2ogpt.visionModels.enabled }} - - name: H2OGPT_VISIBLE_VISION_MODELS - value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }} - - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE - value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }} - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-volume - mountPath: /workspace/.cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-volume - mountPath: /workspace/save - subPath: save - {{- if .Values.caCertificates }} - - name: ca-certificates - mountPath: /etc/ssl/certs/root-ca-bundle.crt - subPath: root-ca-bundle.crt - {{- end }} - {{ with .Values.h2ogpt.extraVolumeMounts }} - {{- toYaml . | nindent 12 }} - {{- end }} - volumes: - - name: {{ include "h2ogpt.fullname" . }}-volume - {{- if not .Values.h2ogpt.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-volume - {{- else}} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.h2ogpt.storage.size | quote }} - storageClassName: {{ .Values.h2ogpt.storage.class }} - {{- end }} - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} - {{- if .Values.caCertificates }} - - name: ca-certificates - configMap: - name: {{ include "h2ogpt.fullname" . }}-ca-certificates - {{- end }} - {{- with .Values.h2ogpt.extraVolumes }} - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} ---- -{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.h2ogpt.storage.class | quote }} - storageClassName: {{ .Values.h2ogpt.storage.class }} - resources: - requests: - storage: {{ .Values.h2ogpt.storage.size | quote }} -{{- end }} - ---- -{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference -spec: - {{- if not .Values.tgi.autoscaling.enabled }} - replicas: {{ .Values.tgi.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- if .Values.tgi.updateStrategy }} - strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.tgi.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- with .Values.tgi.podLabels }} - {{ toYaml . | nindent 6 }} - {{- end }} - spec: - {{- with .Values.tgi.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.tgi.podAffinity }} - podAntiAffinity: - {{- if .Values.tgi.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.tgi.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.tgi.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - securityContext: - {{- toYaml .Values.tgi.securityContext | nindent 12 }} - image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}" - imagePullPolicy: {{ .Values.tgi.image.pullPolicy }} - command: [] - args: -{{- range $arg := .Values.tgi.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 80 - protocol: TCP - {{- if .Values.tgi.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.tgi.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.tgi.resources | nindent 12 }} - env: - {{- range $key, $value := .Values.tgi.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - - secretRef: - name: {{ .Values.tgi.hfSecret }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /app/cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /data - subPath: data - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /dev/shm - subPath: shm - volumes: - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- if not .Values.tgi.storage.useEphemeral}} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- else}} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} - storageClassName: {{ .Values.tgi.storage.class }} - {{- end }} -{{- end }} ---- -{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.h2ogpt.storage.class | quote }} - storageClassName: {{ .Values.tgi.storage.class }} - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference -spec: - {{- if not .Values.vllm.autoscaling.enabled }} - replicas: {{ .Values.vllm.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - {{- if .Values.vllm.updateStrategy }} - strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.vllm.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - {{- with .Values.vllm.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.vllm.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.vllm.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.vllm.podAffinity }} - podAntiAffinity: - {{- if .Values.vllm.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.vllm.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.vllm.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.vllm.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - securityContext: - {{- toYaml .Values.vllm.securityContext | nindent 12 }} - image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} - command: ["python3"] - args: - - "-m" - - "vllm.entrypoints.openai.api_server" - - "--port" - - "5000" - - "--host" - - "0.0.0.0" - - "--download-dir" - - "/workspace/.cache/huggingface/hub" -{{- range $arg := .Values.vllm.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 5000 - protocol: TCP - {{- if .Values.vllm.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.vllm.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.vllm.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - {{- range $key, $value := .Values.vllm.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: shm - mountPath: /dev/shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - - emptyDir: - medium: Memory - sizeLimit: 10.24Gi - name: shm -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.vllm.storage.class | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference -spec: - {{- if not .Values.lmdeploy.autoscaling.enabled }} - replicas: {{ .Values.lmdeploy.replicaCount }} - {{- end }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - {{- if .Values.lmdeploy.updateStrategy }} - strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.lmdeploy.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - {{- with .Values.lmdeploy.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.lmdeploy.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.lmdeploy.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.lmdeploy.podAffinity }} - podAntiAffinity: - {{- if .Values.lmdeploy.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.lmdeploy.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.lmdeploy.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.lmdeploy.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - securityContext: - {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }} - image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }} - command: ["lmdeploy"] - args: - - "serve" - - "api_server" -{{- range $arg := .Values.lmdeploy.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 23333 - protocol: TCP - {{- if .Values.lmdeploy.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.lmdeploy.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.lmdeploy.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - - name: HF_HOME - value: "/workspace/.cache" - {{- range $key, $value := .Values.lmdeploy.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: shm - mountPath: /dev/shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - {{- if not .Values.lmdeploy.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - {{- end }} - - emptyDir: - medium: Memory - sizeLimit: 10.24Gi - name: shm -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.lmdeploy.storage.class | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml new file mode 100644 index 000000000..03cb57751 --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.h2ogpt.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.h2ogpt.overrideConfig }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml new file mode 100644 index 000000000..d240a7b6e --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -0,0 +1,373 @@ +{{- if and .Values.vllm.enabled .Values.tgi.enabled }} + {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} + {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} + {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if .Values.h2ogpt.stack.enabled }} + {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} + {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} + {{- end }} +{{- end }} + +{{- if .Values.h2ogpt.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }} + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }} +spec: + {{- if not .Values.h2ogpt.autoscaling.enabled }} + replicas: {{ .Values.h2ogpt.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }} + {{- if .Values.h2ogpt.updateStrategy }} + strategy: {{- toYaml .Values.h2ogpt.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.h2ogpt.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }} + {{- with .Values.h2ogpt.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.h2ogpt.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.h2ogpt.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.h2ogpt.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.h2ogpt.podAffinity }} + podAntiAffinity: + {{- if .Values.h2ogpt.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.h2ogpt.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.h2ogpt.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.h2ogpt.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + {{- if .Values.h2ogpt.stack.enabled }} + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference + securityContext: + {{- toYaml .Values.vllm.securityContext | nindent 12 }} + image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} + command: ["python3"] + args: + - "-m" + - "vllm.entrypoints.openai.api_server" + - "--port" + - "5000" + - "--host" + - "0.0.0.0" + - "--download-dir" + - "/workspace/.cache/huggingface/hub" +{{- range $arg := .Values.vllm.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 5000 + protocol: TCP + {{- if .Values.vllm.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.vllm.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.vllm.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + env: + - name: NCCL_IGNORE_DISABLED_P2P + value: "1" + {{- range $key, $value := .Values.vllm.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /workspace/.cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /dev/shm + subPath: shm + {{- end }} + - name: {{ include "h2ogpt.fullname" . }} + securityContext: + {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} + image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }} + command: ["/bin/bash", "-c"] + {{- if .Values.h2ogpt.stack.enabled }} + args: + - > + while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' + http://localhost:5000/v1/models)" != "200" ]]; do + echo "Waiting for inference service to become ready... (2sec)" + sleep 2 + done + + python3 /workspace/generate.py + {{- end }} + {{- if not .Values.h2ogpt.stack.enabled }} + {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }} + args: + - > + python3 /workspace/generate.py + {{- end }} + {{- end }} + ports: + - name: http + containerPort: 7860 + protocol: TCP + - name: gpt + containerPort: 8888 + protocol: TCP + - name: openai + containerPort: 5000 + protocol: TCP + - name: function + containerPort: 5002 + protocol: TCP + - name: agent + containerPort: 5004 + protocol: TCP + {{- if .Values.h2ogpt.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.h2ogpt.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.h2ogpt.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.h2ogpt.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.h2ogpt.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-config + env: + {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" + {{- end }} + {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" + {{- end }} + {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" + {{- end }} + {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled) }} + - name: h2ogpt_inference_server + value: "vllm:localhost:5000" + {{- end }} + {{- range $key, $value := .Values.h2ogpt.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }} + - name: OPENAI_AZURE_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.h2ogpt.externalLLM.secret }} + key: OPENAI_AZURE_KEY + - name: OPENAI_AZURE_API_BASE + valueFrom: + secretKeyRef: + name: {{ .Values.h2ogpt.externalLLM.secret }} + key: OPENAI_AZURE_API_BASE + {{- end }} + {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }} + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.h2ogpt.externalLLM.secret }} + key: OPENAI_API_KEY + {{- end }} + {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }} + - name: REPLICATE_API_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.h2ogpt.externalLLM.secret }} + key: REPLICATE_API_TOKEN + {{- end }} + {{- if .Values.h2ogpt.externalLLM.enabled }} + - name: H2OGPT_MODEL_LOCK + value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }} + - name: H2OGPT_SCORE_MODEL + value: None + {{- end }} + {{- if .Values.h2ogpt.visionModels.enabled }} + - name: H2OGPT_VISIBLE_VISION_MODELS + value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }} + - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE + value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }} + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-volume + mountPath: /workspace/.cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-volume + mountPath: /workspace/save + subPath: save + {{- if .Values.caCertificates }} + - name: ca-certificates + mountPath: /etc/ssl/certs/root-ca-bundle.crt + subPath: root-ca-bundle.crt + {{- end }} + {{ with .Values.h2ogpt.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: {{ include "h2ogpt.fullname" . }}-volume + {{- if not .Values.h2ogpt.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-volume + {{- else}} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.h2ogpt.storage.size | quote }} + storageClassName: {{ .Values.h2ogpt.storage.class }} + {{- end }} + {{- if .Values.h2ogpt.stack.enabled }} + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- if not .Values.vllm.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + {{- end }} + {{- end }} + {{- if .Values.caCertificates }} + - name: ca-certificates + configMap: + name: {{ include "h2ogpt.fullname" . }}-ca-certificates + {{- end }} + {{- with .Values.h2ogpt.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} +--- +{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.h2ogpt.storage.class }} + resources: + requests: + storage: {{ .Values.h2ogpt.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml new file mode 100644 index 000000000..16417b7ff --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml @@ -0,0 +1,49 @@ +{{- if .Values.h2ogpt.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-web + namespace: {{ include "h2ogpt.namespace" . | quote }} + + {{- with .Values.h2ogpt.service.webServiceAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }} + ports: + - name: http + protocol: TCP + port: {{ .Values.h2ogpt.service.webPort }} + targetPort: 7860 + - name: openai + protocol: TCP + port: {{ .Values.h2ogpt.service.openaiPort }} + targetPort: 5000 + - name: function + protocol: TCP + port: {{ .Values.h2ogpt.service.functionPort }} + targetPort: 5002 + - name: agent + protocol: TCP + port: {{ .Values.h2ogpt.service.agentsPort }} + targetPort: 5004 + type: {{ .Values.h2ogpt.service.type }} +{{- end }} +--- +{{- if .Values.h2ogpt.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }} + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }} + ports: + - protocol: TCP + port: {{ .Values.h2ogpt.service.gptPort }} + targetPort: 8888 + type: {{ .Values.h2ogpt.service.type }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml new file mode 100644 index 000000000..7d041e79f --- /dev/null +++ b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.lmdeploy.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.lmdeploy.overrideConfig }} + {{ printf "%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml new file mode 100644 index 000000000..bb3240924 --- /dev/null +++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml @@ -0,0 +1,163 @@ +{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference +spec: + {{- if not .Values.lmdeploy.autoscaling.enabled }} + replicas: {{ .Values.lmdeploy.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + {{- if .Values.lmdeploy.updateStrategy }} + strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.lmdeploy.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + {{- with .Values.lmdeploy.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.lmdeploy.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.lmdeploy.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.lmdeploy.podAffinity }} + podAntiAffinity: + {{- if .Values.lmdeploy.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.lmdeploy.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.lmdeploy.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.lmdeploy.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + securityContext: + {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }} + image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }} + command: ["lmdeploy"] + args: + - "serve" + - "api_server" +{{- range $arg := .Values.lmdeploy.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 23333 + protocol: TCP + {{- if .Values.lmdeploy.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.lmdeploy.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.lmdeploy.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config + env: + - name: NCCL_IGNORE_DISABLED_P2P + value: "1" + - name: HF_HOME + value: "/workspace/.cache" + {{- range $key, $value := .Values.lmdeploy.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + mountPath: /workspace/.cache + subPath: cache + - name: shm + mountPath: /dev/shm + volumes: + - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + {{- if not .Values.lmdeploy.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.lmdeploy.storage.size | quote }} + storageClassName: {{ .Values.lmdeploy.storage.class }} + {{- end }} + - emptyDir: + medium: Memory + sizeLimit: 10.24Gi + name: shm +{{- end }} +--- +{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + # storageClassName: {{ .Values.lmdeploy.storage.class | quote }} + storageClassName: {{ .Values.lmdeploy.storage.class }} + resources: + requests: + storage: {{ .Values.lmdeploy.storage.size | quote }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml new file mode 100644 index 000000000..e1dfdc4d3 --- /dev/null +++ b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml @@ -0,0 +1,15 @@ +{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference + ports: + - protocol: TCP + port: {{ .Values.lmdeploy.service.port }} + targetPort: 23333 + type: {{ .Values.lmdeploy.service.type }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/service.yaml b/helm/h2ogpt-chart/templates/service.yaml deleted file mode 100644 index 8d3ddb73d..000000000 --- a/helm/h2ogpt-chart/templates/service.yaml +++ /dev/null @@ -1,97 +0,0 @@ -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-web - namespace: {{ include "h2ogpt.namespace" . | quote }} - - {{- with .Values.h2ogpt.service.webServiceAnnotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }} - ports: - - name: http - protocol: TCP - port: {{ .Values.h2ogpt.service.webPort }} - targetPort: 7860 - - name: openai - protocol: TCP - port: {{ .Values.h2ogpt.service.openaiPort }} - targetPort: 5000 - - name: function - protocol: TCP - port: {{ .Values.h2ogpt.service.functionPort }} - targetPort: 5002 - - name: agent - protocol: TCP - port: {{ .Values.h2ogpt.service.agentsPort }} - targetPort: 5004 - type: {{ .Values.h2ogpt.service.type }} -{{- end }} ---- -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }} - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }} - ports: - - protocol: TCP - port: {{ .Values.h2ogpt.service.gptPort }} - targetPort: 8888 - type: {{ .Values.h2ogpt.service.type }} -{{- end }} ---- -{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - ports: - - protocol: TCP - port: {{ .Values.tgi.service.port }} - targetPort: 80 - type: {{ .Values.tgi.service.type }} -{{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-vllm-inference - ports: - - protocol: TCP - port: {{ .Values.vllm.service.port }} - targetPort: 5000 - type: {{ .Values.vllm.service.type }} -{{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - ports: - - protocol: TCP - port: {{ .Values.lmdeploy.service.port }} - targetPort: 23333 - type: {{ .Values.lmdeploy.service.type }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yamal b/helm/h2ogpt-chart/templates/tgi-configmap.yamal new file mode 100644 index 000000000..3857b92c8 --- /dev/null +++ b/helm/h2ogpt-chart/templates/tgi-configmap.yamal @@ -0,0 +1,13 @@ +{{- if .Values.tgi.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.tgi.overrideConfig }} + {{ printf "%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml new file mode 100644 index 000000000..400ac6eb6 --- /dev/null +++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml @@ -0,0 +1,175 @@ +{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-tgi-inference +spec: + {{- if not .Values.tgi.autoscaling.enabled }} + replicas: {{ .Values.tgi.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-tgi-inference + {{- if .Values.tgi.updateStrategy }} + strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.tgi.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-tgi-inference + {{- with .Values.tgi.podLabels }} + {{ toYaml . | nindent 6 }} + {{- end }} + spec: + {{- with .Values.tgi.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tgi.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.tgi.podAffinity }} + podAntiAffinity: + {{- if .Values.tgi.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.tgi.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.tgi.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tgi.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference + securityContext: + {{- toYaml .Values.tgi.securityContext | nindent 12 }} + image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}" + imagePullPolicy: {{ .Values.tgi.image.pullPolicy }} + command: [] + args: +{{- range $arg := .Values.tgi.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 80 + protocol: TCP + {{- if .Values.tgi.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.tgi.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.tgi.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.tgi.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.tgi.resources | nindent 12 }} + env: + {{- range $key, $value := .Values.tgi.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config + - secretRef: + name: {{ .Values.tgi.hfSecret }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + mountPath: /app/cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + mountPath: /data + subPath: data + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + mountPath: /dev/shm + subPath: shm + volumes: + {{- if .Values.h2ogpt.stack.enabled }} + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- if not .Values.vllm.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + {{- end }} + {{- end }} + - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + {{- if not .Values.tgi.storage.useEphemeral}} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + {{- else}} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.tgi.storage.size | quote }} + storageClassName: {{ .Values.tgi.storage.class }} + {{- end }} +{{- end }} +--- +{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.tgi.storage.class }} + resources: + requests: + storage: {{ .Values.tgi.storage.size | quote }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml new file mode 100644 index 000000000..63b04b36d --- /dev/null +++ b/helm/h2ogpt-chart/templates/tgi-service.yaml @@ -0,0 +1,15 @@ +{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-tgi-inference + ports: + - protocol: TCP + port: {{ .Values.tgi.service.port }} + targetPort: 80 + type: {{ .Values.tgi.service.type }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/vllm-configmap.yaml b/helm/h2ogpt-chart/templates/vllm-configmap.yaml new file mode 100644 index 000000000..66c187b3c --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-configmap.yaml @@ -0,0 +1,13 @@ +{{- if .Values.vllm.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.vllm.overrideConfig }} + {{ printf "%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml new file mode 100644 index 000000000..e0228500b --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml @@ -0,0 +1,167 @@ +{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference +spec: + {{- if not .Values.vllm.autoscaling.enabled }} + replicas: {{ .Values.vllm.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + {{- if .Values.vllm.updateStrategy }} + strategy: {{- toYaml .Values.vllm.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.vllm.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + {{- with .Values.vllm.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.vllm.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.vllm.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.vllm.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.vllm.podAffinity }} + podAntiAffinity: + {{- if .Values.vllm.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.vllm.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.vllm.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.vllm.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference + securityContext: + {{- toYaml .Values.vllm.securityContext | nindent 12 }} + image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} + command: ["python3"] + args: + - "-m" + - "vllm.entrypoints.openai.api_server" + - "--port" + - "5000" + - "--host" + - "0.0.0.0" + - "--download-dir" + - "/workspace/.cache/huggingface/hub" +{{- range $arg := .Values.vllm.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 5000 + protocol: TCP + {{- if .Values.vllm.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.vllm.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.vllm.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + env: + - name: NCCL_IGNORE_DISABLED_P2P + value: "1" + {{- range $key, $value := .Values.vllm.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /workspace/.cache + subPath: cache + - name: shm + mountPath: /dev/shm + volumes: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- if not .Values.vllm.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + {{- end }} + - emptyDir: + medium: Memory + sizeLimit: 10.24Gi + name: shm +{{- end }} +--- +{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + # storageClassName: {{ .Values.vllm.storage.class | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml new file mode 100644 index 000000000..34678c2c5 --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-service.yaml @@ -0,0 +1,15 @@ +{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-vllm-inference + ports: + - protocol: TCP + port: {{ .Values.vllm.service.port }} + targetPort: 5000 + type: {{ .Values.vllm.service.type }} +{{- end }} \ No newline at end of file From d4d2ae620d6f5c6f32279e8e14799b64a467ef51 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 03:01:09 +0530 Subject: [PATCH 07/34] Fix file name --- .../templates/{tgi-configmap.yamal => tgi-configmap.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename helm/h2ogpt-chart/templates/{tgi-configmap.yamal => tgi-configmap.yaml} (100%) diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yamal b/helm/h2ogpt-chart/templates/tgi-configmap.yaml similarity index 100% rename from helm/h2ogpt-chart/templates/tgi-configmap.yamal rename to helm/h2ogpt-chart/templates/tgi-configmap.yaml From 80fc6a0aee703afcdb66fa35e16db6daf9795556 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 03:08:35 +0530 Subject: [PATCH 08/34] Move validations to validations.yaml --- .../h2ogpt-chart/templates/h2ogpt-deployment.yaml | 15 --------------- helm/h2ogpt-chart/templates/validations.yaml | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 15 deletions(-) create mode 100644 helm/h2ogpt-chart/templates/validations.yaml diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index d240a7b6e..1ba47e84a 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -1,18 +1,3 @@ -{{- if and .Values.vllm.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} - {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if .Values.h2ogpt.stack.enabled }} - {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} - {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} - {{- end }} -{{- end }} - {{- if .Values.h2ogpt.enabled }} apiVersion: apps/v1 kind: Deployment diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml new file mode 100644 index 000000000..6e9936d83 --- /dev/null +++ b/helm/h2ogpt-chart/templates/validations.yaml @@ -0,0 +1,14 @@ +{{- if and .Values.vllm.enabled .Values.tgi.enabled }} + {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} + {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} + {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} +{{- end }} +{{- if .Values.h2ogpt.stack.enabled }} + {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} + {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} + {{- end }} +{{- end }} \ No newline at end of file From d90ddbf22855e60bf10ee31e4a419d1b2babeaed Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 03:12:33 +0530 Subject: [PATCH 09/34] Add NOTES.txt --- helm/h2ogpt-chart/templates/NOTES.txt | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 helm/h2ogpt-chart/templates/NOTES.txt diff --git a/helm/h2ogpt-chart/templates/NOTES.txt b/helm/h2ogpt-chart/templates/NOTES.txt new file mode 100644 index 000000000..c32a7790f --- /dev/null +++ b/helm/h2ogpt-chart/templates/NOTES.txt @@ -0,0 +1,8 @@ +Thank you for installing {{ .Chart.Name }}. + +Your release is named {{ .Release.Name }}. + +To learn more about the release, try: + + $ helm status {{ .Release.Name }} + $ helm get all {{ .Release.Name }} \ No newline at end of file From dc34c1d26560afc9c6e81ac3954d9f51e3aad0a1 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 9 Oct 2024 03:42:48 +0530 Subject: [PATCH 10/34] Update documentation in values.yaml ( for `helm-docs` ) --- helm/h2ogpt-chart/values.yaml | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index b0e599bf4..226038aa2 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -3,6 +3,7 @@ fullnameOverride: "" namespaceOverride: "" h2ogpt: + # -- Enable h2oGPT enabled: true stack: # -- Run h2oGPT and vLLM on same pod. @@ -18,12 +19,12 @@ h2ogpt: tag: pullPolicy: - # extra volumes, for more certs, mount under /etc/ssl/more-certs + # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs extraVolumes: [] + # -- Extra volume mounts extraVolumeMounts: [] - - podAffinity: # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. + podAffinity: # hostname: # zone: @@ -48,9 +49,9 @@ h2ogpt: enabled: false visionModels: + # -- Enable vision models enabled: false - # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model - # -- Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] + # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] visibleModels: [] rotateAlignResizeImage: false @@ -136,6 +137,7 @@ h2ogpt: autoscaling: {} tgi: + # -- Enable tgi enabled: false replicaCount: 1 @@ -143,9 +145,8 @@ tgi: repository: ghcr.io/huggingface/text-generation-inference tag: 0.9.3 pullPolicy: IfNotPresent - + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. # hostname: # zone: @@ -179,6 +180,7 @@ tgi: autoscaling: {} vllm: + # -- Enable vllm enabled: false replicaCount: 1 @@ -186,9 +188,9 @@ vllm: repository: vllm/vllm-openai tag: latest pullPolicy: IfNotPresent - + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. + # hostname: # zone: @@ -248,6 +250,7 @@ vllm: autoscaling: {} lmdeploy: + # -- Enable lmdeploy enabled: false replicaCount: 1 @@ -255,9 +258,8 @@ lmdeploy: repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy tag: pullPolicy: IfNotPresent - + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. podAffinity: - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. # hostname: # zone: From 801e907479e0d30f1ba4f313117ac86d8f99e669 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Fri, 25 Oct 2024 13:35:54 +0530 Subject: [PATCH 11/34] Add agents related templates and values --- helm/h2ogpt-chart/templates/_helpers.tpl | 25 ++ .../templates/agents-configmap.yaml | 16 + .../templates/agents-deployment.yaml | 346 ++++++++++++++++++ .../templates/agents-service.yaml | 21 ++ .../templates/h2ogpt-configmap.yaml | 3 + .../templates/h2ogpt-deployment.yaml | 38 +- helm/h2ogpt-chart/templates/validations.yaml | 3 + helm/h2ogpt-chart/values.yaml | 163 +++++++-- 8 files changed, 565 insertions(+), 50 deletions(-) create mode 100644 helm/h2ogpt-chart/templates/agents-configmap.yaml create mode 100644 helm/h2ogpt-chart/templates/agents-deployment.yaml create mode 100644 helm/h2ogpt-chart/templates/agents-service.yaml diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl index a8352a4ad..9eb75f3c9 100644 --- a/helm/h2ogpt-chart/templates/_helpers.tpl +++ b/helm/h2ogpt-chart/templates/_helpers.tpl @@ -68,3 +68,28 @@ Create the name of the service account to use {{- default "default" .Values.serviceAccount.name }} {{- end }} {{- end }} + +{{/* +Configs for agents server +*/}} + +{{- define "agents.overrideConfig" -}} +agent_server: True +agent_port: "5004" +{{- end }} + +{{/* +Configs for agents with h2ogpt +*/}} + +{{- define "h2ogpt.overrideConfig" -}} +{{- if .Values.h2ogpt.agents.enabled }} +agent_server: True +agent_port: "5004" +multiple_workers_gunicorn: True +agent_workers: {{ .Values.h2ogpt.agents.agent_workers}} +{{- else }} +agents_server: False +{{- end }} + +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/agents-configmap.yaml b/helm/h2ogpt-chart/templates/agents-configmap.yaml new file mode 100644 index 000000000..e242dee69 --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-configmap.yaml @@ -0,0 +1,16 @@ +{{- if .Values.agents.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "h2ogpt.fullname" . }}-agents-config + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := ( include "agents.overrideConfig" . | fromYaml ) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- range $key, $value := .Values.agents.overrideConfig }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml new file mode 100644 index 000000000..607f92573 --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml @@ -0,0 +1,346 @@ +{{- if .Values.agents.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "h2ogpt.fullname" . }}-agents + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + app: {{ include "h2ogpt.fullname" . }}-agents +spec: + {{- if not .Values.agents.autoscaling.enabled }} + replicas: {{ .Values.agents.replicaCount }} + {{- end }} + selector: + matchLabels: + app: {{ include "h2ogpt.fullname" . }}-agents + {{- if .Values.agents.updateStrategy }} + strategy: {{- toYaml .Values.agents.updateStrategy | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.agents.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + app: {{ include "h2ogpt.fullname" . }}-agents + {{- with .Values.agents.podLabels }} + {{ toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.agents.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.agents.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.agents.podSecurityContext | nindent 8 }} + affinity: + {{- if .Values.agents.podAffinity }} + podAntiAffinity: + {{- if .Values.agents.podAffinity.hostname }} + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: kubernetes.io/hostname + {{- end }} + {{- if .Values.agents.podAffinity.zone }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ include "h2ogpt.fullname" . }} + topologyKey: failure-domain.beta.kubernetes.io/zone + {{- end }} + {{- end }} + {{- with .Values.agents.extraAffinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.agents.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + {{- if .Values.agents.stack.enabled }} + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference + securityContext: + {{- toYaml .Values.vllm.securityContext | nindent 12 }} + image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} + command: ["python3"] + args: + - "-m" + - "vllm.entrypoints.openai.api_server" + - "--port" + - "5000" + - "--host" + - "0.0.0.0" + - "--download-dir" + - "/workspace/.cache/huggingface/hub" +{{- range $arg := .Values.vllm.containerArgs }} + - "{{ $arg }}" +{{- end }} + ports: + - name: http + containerPort: 5000 + protocol: TCP + {{- if .Values.vllm.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.vllm.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.vllm.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config + env: + - name: NCCL_IGNORE_DISABLED_P2P + value: "1" + {{- range $key, $value := .Values.vllm.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /workspace/.cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + mountPath: /dev/shm + subPath: shm + {{- end }} + - name: {{ include "h2ogpt.fullname" . }}-agents + securityContext: + {{- toYaml .Values.agents.securityContext | nindent 12 }} + image: "{{ .Values.agents.image.repository }}:{{ .Values.agents.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.agents.image.pullPolicy }} + command: ["/bin/bash", "-c"] + {{- if .Values.agents.stack.enabled }} + args: + - > + while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' + http://localhost:5000/v1/models)" != "200" ]]; do + echo "Waiting for inference service to become ready... (2sec)" + sleep 2 + done + + python3 /workspace/generate.py + {{- end }} + {{- if not .Values.agents.stack.enabled }} + {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.modelLock) }} + args: + - > + until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; + do + echo "Waiting for inference service to become ready..."; + sleep 5; + done + + python3 /workspace/generate.py + {{- end }} + {{- if and .Values.agents.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }} + args: + - > + python3 /workspace/generate.py + {{- end }} + {{- end }} + ports: + - name: agent + containerPort: 5004 + protocol: TCP + {{- if .Values.agents.livenessProbe }} + livenessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.agents.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.agents.readinessProbe }} + readinessProbe: + httpGet: + path: / + scheme: HTTP + port: http + {{- toYaml .Values.agents.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.agents.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "h2ogpt.fullname" . }}-agents-config + env: + {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" + {{- end }} + {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" + {{- end }} + {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }} + - name: h2ogpt_inference_server + value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" + {{- end }} + {{- if and .Values.agents.stack.enabled (not .Values.global.externalLLM.enabled) }} + - name: h2ogpt_inference_server + value: "vllm:localhost:5000" + {{- end }} + {{- range $key, $value := .Values.agents.env }} + - name: "{{ $key }}" + value: "{{ $value }}" + {{- end }} + {{- if and .Values.global.externalLLM.openAIAzure.enabled .Values.global.externalLLM.enabled }} + - name: OPENAI_AZURE_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.global.externalLLM.secret }} + key: OPENAI_AZURE_KEY + - name: OPENAI_AZURE_API_BASE + valueFrom: + secretKeyRef: + name: {{ .Values.global.externalLLM.secret }} + key: OPENAI_AZURE_API_BASE + {{- end }} + {{- if and .Values.global.externalLLM.openAI.enabled .Values.global.externalLLM.enabled }} + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: {{ .Values.global.externalLLM.secret }} + key: OPENAI_API_KEY + {{- end }} + {{- if and .Values.global.externalLLM.replicate.enabled .Values.global.externalLLM.enabled }} + - name: REPLICATE_API_TOKEN + valueFrom: + secretKeyRef: + name: {{ .Values.global.externalLLM.secret }} + key: REPLICATE_API_TOKEN + {{- end }} + {{- if .Values.global.externalLLM.enabled }} + - name: H2OGPT_MODEL_LOCK + value: {{ toJson .Values.global.externalLLM.modelLock | quote }} + - name: H2OGPT_SCORE_MODEL + value: None + {{- end }} + {{- if .Values.global.visionModels.enabled }} + - name: H2OGPT_VISIBLE_VISION_MODELS + value: {{ .Values.global.visionModels.visibleModels | quote }} + - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE + value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }} + {{- end }} + volumeMounts: + - name: {{ include "h2ogpt.fullname" . }}-agents-volume + mountPath: /workspace/.cache + subPath: cache + - name: {{ include "h2ogpt.fullname" . }}-volume + mountPath: /workspace/save + subPath: save + {{- if .Values.caCertificates }} + - name: ca-certificates + mountPath: /etc/ssl/certs/root-ca-bundle.crt + subPath: root-ca-bundle.crt + {{- end }} + {{ with .Values.agents.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: {{ include "h2ogpt.fullname" . }}-agents-volume + {{- if not .Values.agents.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-volume + {{- else}} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.agents.storage.size | quote }} + storageClassName: {{ .Values.agents.storage.class }} + {{- end }} + {{- if .Values.agents.stack.enabled }} + - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- if not .Values.vllm.storage.useEphemeral }} + persistentVolumeClaim: + claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + {{- else }} + ephemeral: + volumeClaimTemplate: + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + {{- end }} + {{- end }} + {{- if .Values.caCertificates }} + - name: ca-certificates + configMap: + name: {{ include "h2ogpt.fullname" . }}-ca-certificates + {{- end }} + {{- with .Values.agents.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} +--- +{{- if and (.Values.agents.enabled) (not .Values.agents.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-agents-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.agents.storage.class }} + resources: + requests: + storage: {{ .Values.agents.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-service.yaml b/helm/h2ogpt-chart/templates/agents-service.yaml new file mode 100644 index 000000000..d39cad58e --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-service.yaml @@ -0,0 +1,21 @@ +{{- if .Values.agents.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "h2ogpt.fullname" . }}-agents + namespace: {{ include "h2ogpt.namespace" . | quote }} + + {{- with .Values.agents.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + app: {{ include "h2ogpt.fullname" . }}-agents + ports: + - name: agent + protocol: TCP + port: {{ .Values.agents.service.agentsPort }} + targetPort: 5004 + type: {{ .Values.agents.service.type }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml index 03cb57751..902705552 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml @@ -7,6 +7,9 @@ metadata: labels: {{- include "h2ogpt.labels" . | nindent 4 }} data: +{{- range $key, $value := ( include "h2ogpt.overrideConfig" . | fromYaml ) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- end }} {{- range $key, $value := .Values.h2ogpt.overrideConfig }} {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} {{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 1ba47e84a..c320d5488 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -149,7 +149,7 @@ spec: python3 /workspace/generate.py {{- end }} {{- if not .Values.h2ogpt.stack.enabled }} - {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }} args: - > until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; @@ -160,7 +160,7 @@ spec: python3 /workspace/generate.py {{- end }} - {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }} args: - > until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; @@ -171,7 +171,7 @@ spec: python3 /workspace/generate.py {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.modelLock) }} + {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.modelLock) }} args: - > until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; @@ -226,19 +226,19 @@ spec: - configMapRef: name: {{ include "h2ogpt.fullname" . }}-config env: - {{- if and .Values.tgi.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - name: h2ogpt_inference_server value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" {{- end }} - {{- if and .Values.vllm.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - name: h2ogpt_inference_server value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.h2ogpt.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - name: h2ogpt_inference_server value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" {{- end }} - {{- if and .Values.h2ogpt.stack.enabled (not .Values.h2ogpt.externalLLM.enabled) }} + {{- if and .Values.h2ogpt.stack.enabled (not .Values.global.externalLLM.enabled) }} - name: h2ogpt_inference_server value: "vllm:localhost:5000" {{- end }} @@ -246,43 +246,43 @@ spec: - name: "{{ $key }}" value: "{{ $value }}" {{- end }} - {{- if and .Values.h2ogpt.externalLLM.openAIAzure.enabled .Values.h2ogpt.externalLLM.enabled }} + {{- if and .Values.global.externalLLM.openAIAzure.enabled .Values.global.externalLLM.enabled }} - name: OPENAI_AZURE_KEY valueFrom: secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} + name: {{ .Values.global.externalLLM.secret }} key: OPENAI_AZURE_KEY - name: OPENAI_AZURE_API_BASE valueFrom: secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} + name: {{ .Values.global.externalLLM.secret }} key: OPENAI_AZURE_API_BASE {{- end }} - {{- if and .Values.h2ogpt.externalLLM.openAI.enabled .Values.h2ogpt.externalLLM.enabled }} + {{- if and .Values.global.externalLLM.openAI.enabled .Values.global.externalLLM.enabled }} - name: OPENAI_API_KEY valueFrom: secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} + name: {{ .Values.global.externalLLM.secret }} key: OPENAI_API_KEY {{- end }} - {{- if and .Values.h2ogpt.externalLLM.replicate.enabled .Values.h2ogpt.externalLLM.enabled }} + {{- if and .Values.global.externalLLM.replicate.enabled .Values.global.externalLLM.enabled }} - name: REPLICATE_API_TOKEN valueFrom: secretKeyRef: - name: {{ .Values.h2ogpt.externalLLM.secret }} + name: {{ .Values.global.externalLLM.secret }} key: REPLICATE_API_TOKEN {{- end }} - {{- if .Values.h2ogpt.externalLLM.enabled }} + {{- if .Values.global.externalLLM.enabled }} - name: H2OGPT_MODEL_LOCK - value: {{ toJson .Values.h2ogpt.externalLLM.modelLock | quote }} + value: {{ toJson .Values.global.externalLLM.modelLock | quote }} - name: H2OGPT_SCORE_MODEL value: None {{- end }} - {{- if .Values.h2ogpt.visionModels.enabled }} + {{- if .Values.global.visionModels.enabled }} - name: H2OGPT_VISIBLE_VISION_MODELS - value: {{ .Values.h2ogpt.visionModels.visibleModels | quote }} + value: {{ .Values.global.visionModels.visibleModels | quote }} - name: H2OGPT_ROTATE_ALIGN_RESIZE_IMAGE - value: {{ .Values.h2ogpt.visionModels.rotateAlignResizeImage | quote }} + value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }} {{- end }} volumeMounts: - name: {{ include "h2ogpt.fullname" . }}-volume diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml index 6e9936d83..c0c45e0a5 100644 --- a/helm/h2ogpt-chart/templates/validations.yaml +++ b/helm/h2ogpt-chart/templates/validations.yaml @@ -11,4 +11,7 @@ {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} {{- end }} +{{- end }} +{{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }} + {{- fail "Agents in both h2oGPT and agents cannot be enabled." }} {{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 226038aa2..8e06f6240 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -2,23 +2,50 @@ nameOverride: "" fullnameOverride: "" namespaceOverride: "" +global: + externalLLM: + enabled: false + secret: + + modelLock: + + openAIAzure: + enabled: false + + openAI: + enabled: False + + replicate: + enabled: false + + visionModels: + # -- Enable vision models + enabled: false + # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] + visibleModels: [ ] + rotateAlignResizeImage: false + h2ogpt: # -- Enable h2oGPT enabled: true stack: # -- Run h2oGPT and vLLM on same pod. - enabled: false + enabled: false + # -- Enable agents + agents: + # -- Run agents with h2oGPT container + enabled: true + agent_workers: 5 replicaCount: 1 - imagePullSecrets: + imagePullSecrets: image: repository: gcr.io/vorvan/h2oai/h2ogpt-runtime - tag: + tag: pullPolicy: IfNotPresent initImage: repository: tag: pullPolicy: - # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs extraVolumes: [] # -- Extra volume mounts @@ -27,33 +54,10 @@ h2ogpt: podAffinity: # hostname: # zone: - storage: size: 128Gi - class: + class: useEphemeral: true - - externalLLM: - enabled: false - secret: - - modelLock: - - openAIAzure: - enabled: false - - openAI: - enabled: False - - replicate: - enabled: false - - visionModels: - # -- Enable vision models - enabled: false - # -- Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] - visibleModels: [] - rotateAlignResizeImage: false # -- Example configs to use when not using Model Lock and External LLM # overrideConfig: @@ -86,6 +90,7 @@ h2ogpt: embedding_gpu_id: "cpu" hf_embedding_model: "fake" openai_server: True + openai_workers: 5 share: False enforce_h2ogpt_api_key: True enforce_h2ogpt_ui_key: False @@ -113,9 +118,105 @@ h2ogpt: podSecurityContext: runAsNonRoot: true - runAsUser: - runAsGroup: - fsGroup: + runAsUser: + runAsGroup: + fsGroup: + + securityContext: + runAsNonRoot: true + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + + resources: + nodeSelector: + tolerations: + + env: {} + + podAnnotations: {} + podLabels: {} + autoscaling: {} + +agents: + # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` + enabled: false + stack: + # -- Run agents and vLLM on same pod. + enabled: false + replicaCount: 1 + imagePullSecrets: + image: + repository: gcr.io/vorvan/h2oai/h2ogpt-runtime + tag: + pullPolicy: IfNotPresent + initImage: + repository: + tag: + pullPolicy: + # -- Extra volumes, for more certs, mount under /etc/ssl/more-certs + extraVolumes: [] + # -- Extra volume mounts + extraVolumeMounts: [] + # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. + podAffinity: + # hostname: + # zone: + + storage: + size: 128Gi + class: + useEphemeral: true + + overrideConfig: + agent_workers: 5 + visible_login_tab: False + visible_system_tab: False + visible_models_tab: False + visible_hosts_tab: False + # change below to valid vision model or remove this entry + #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']" + rotate_align_resize_image: False + concurrency_count: 100 + top_k_docs_max_show: 100 + num_async: 10 + # change below to valid directory or remove this entry + #save_dir: "/docker_logs" + score_model: "None" + enable_tts: False + enable_stt: False + enable_transcriptions: False + embedding_gpu_id: "cpu" + hf_embedding_model: "fake" + share: False + enforce_h2ogpt_api_key: True + enforce_h2ogpt_ui_key: False + # change to something secure for ui access to backend + #h2ogpt_api_keys: "['api_key_change_me']" + metadata_in_context: "" + # change or remove if using model hub + #use_auth_token: "hf_xxxxx" + # change below to first visible model or remove this entry + #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']" + # change so ui or api cannot access without this password + #admin_pass: "admin_password_change_me" + + service: + type: NodePort + agentsPort: 5004 + annotations: {} + + updateStrategy: + type: RollingUpdate + + podSecurityContext: + runAsNonRoot: true + runAsUser: + runAsGroup: + fsGroup: securityContext: runAsNonRoot: true From 0c434c3773126869c518d01742b3cd47481e5500 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Fri, 25 Oct 2024 18:12:22 +0530 Subject: [PATCH 12/34] Add missing EOF --- helm/h2ogpt-chart/templates/_helpers.tpl | 2 +- .../templates/agents-deployment.yaml | 15 --------------- .../templates/ca-certs-configmap.yaml | 2 +- helm/h2ogpt-chart/templates/h2ogpt-service.yaml | 16 ---------------- .../templates/lmdeploy-configmap.yaml | 2 +- .../templates/lmdeploy-deployment.yaml | 2 +- helm/h2ogpt-chart/templates/tgi-configmap.yaml | 2 +- helm/h2ogpt-chart/templates/tgi-deployment.yaml | 2 +- helm/h2ogpt-chart/templates/tgi-service.yaml | 2 +- helm/h2ogpt-chart/templates/validations.yaml | 2 +- helm/h2ogpt-chart/templates/vllm-deployment.yaml | 2 +- helm/h2ogpt-chart/templates/vllm-service.yaml | 2 +- 12 files changed, 10 insertions(+), 41 deletions(-) diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl index 9eb75f3c9..9688e8e4f 100644 --- a/helm/h2ogpt-chart/templates/_helpers.tpl +++ b/helm/h2ogpt-chart/templates/_helpers.tpl @@ -92,4 +92,4 @@ agent_workers: {{ .Values.h2ogpt.agents.agent_workers}} agents_server: False {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml index 607f92573..365df9ddd 100644 --- a/helm/h2ogpt-chart/templates/agents-deployment.yaml +++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml @@ -329,18 +329,3 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} {{- end }} ---- -{{- if and (.Values.agents.enabled) (not .Values.agents.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-agents-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - storageClassName: {{ .Values.agents.storage.class }} - resources: - requests: - storage: {{ .Values.agents.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml index a2580b771..84d2f4199 100644 --- a/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml +++ b/helm/h2ogpt-chart/templates/ca-certs-configmap.yaml @@ -9,4 +9,4 @@ metadata: data: root-ca-bundle.crt: | {{ .Values.caCertificates | nindent 4 | trim }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml index 16417b7ff..0d9bbcc1d 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml @@ -31,19 +31,3 @@ spec: targetPort: 5004 type: {{ .Values.h2ogpt.service.type }} {{- end }} ---- -{{- if .Values.h2ogpt.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }} - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }} - ports: - - protocol: TCP - port: {{ .Values.h2ogpt.service.gptPort }} - targetPort: 8888 - type: {{ .Values.h2ogpt.service.type }} -{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml index 7d041e79f..c1dd07713 100644 --- a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml +++ b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml @@ -10,4 +10,4 @@ data: {{- range $key, $value := .Values.lmdeploy.overrideConfig }} {{ printf "%s" $key | upper }}: {{ $value | quote }} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml index bb3240924..eed4624b4 100644 --- a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml +++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml @@ -160,4 +160,4 @@ spec: resources: requests: storage: {{ .Values.lmdeploy.storage.size | quote }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yaml b/helm/h2ogpt-chart/templates/tgi-configmap.yaml index 3857b92c8..ec5c17866 100644 --- a/helm/h2ogpt-chart/templates/tgi-configmap.yaml +++ b/helm/h2ogpt-chart/templates/tgi-configmap.yaml @@ -10,4 +10,4 @@ data: {{- range $key, $value := .Values.tgi.overrideConfig }} {{ printf "%s" $key | upper }}: {{ $value | quote }} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml index 400ac6eb6..75f486b70 100644 --- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml +++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml @@ -172,4 +172,4 @@ spec: resources: requests: storage: {{ .Values.tgi.storage.size | quote }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml index 63b04b36d..bbaa23439 100644 --- a/helm/h2ogpt-chart/templates/tgi-service.yaml +++ b/helm/h2ogpt-chart/templates/tgi-service.yaml @@ -12,4 +12,4 @@ spec: port: {{ .Values.tgi.service.port }} targetPort: 80 type: {{ .Values.tgi.service.type }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml index c0c45e0a5..11adaa3f8 100644 --- a/helm/h2ogpt-chart/templates/validations.yaml +++ b/helm/h2ogpt-chart/templates/validations.yaml @@ -14,4 +14,4 @@ {{- end }} {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }} {{- fail "Agents in both h2oGPT and agents cannot be enabled." }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml index e0228500b..9cf866d40 100644 --- a/helm/h2ogpt-chart/templates/vllm-deployment.yaml +++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml @@ -164,4 +164,4 @@ spec: resources: requests: storage: {{ .Values.vllm.storage.size | quote }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml index 34678c2c5..d30c729e4 100644 --- a/helm/h2ogpt-chart/templates/vllm-service.yaml +++ b/helm/h2ogpt-chart/templates/vllm-service.yaml @@ -12,4 +12,4 @@ spec: port: {{ .Values.vllm.service.port }} targetPort: 5000 type: {{ .Values.vllm.service.type }} -{{- end }} \ No newline at end of file +{{- end }} From 8c552e5e2d1f1d386f84659b368d88a450d7bc15 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Fri, 25 Oct 2024 18:12:42 +0530 Subject: [PATCH 13/34] Add agents-pvc.yaml --- helm/h2ogpt-chart/templates/agents-pvc.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 helm/h2ogpt-chart/templates/agents-pvc.yaml diff --git a/helm/h2ogpt-chart/templates/agents-pvc.yaml b/helm/h2ogpt-chart/templates/agents-pvc.yaml new file mode 100644 index 000000000..2165fab9d --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-pvc.yaml @@ -0,0 +1,14 @@ +{{- if and (.Values.agents.enabled) (not .Values.agents.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-agents-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.agents.storage.class }} + resources: + requests: + storage: {{ .Values.agents.storage.size | quote }} +{{- end }} From cc15e61718d5c22329c01c66f4deb6d34739f37d Mon Sep 17 00:00:00 2001 From: Lakindu Date: Fri, 25 Oct 2024 18:14:56 +0530 Subject: [PATCH 14/34] Add h2ogpt-pvc.yaml --- .../h2ogpt-chart/templates/h2ogpt-deployment.yaml | 15 --------------- helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 15 deletions(-) create mode 100644 helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index c320d5488..92f25aae4 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -341,18 +341,3 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} {{- end }} ---- -{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - storageClassName: {{ .Values.h2ogpt.storage.class }} - resources: - requests: - storage: {{ .Values.h2ogpt.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml new file mode 100644 index 000000000..0f5ac0956 --- /dev/null +++ b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml @@ -0,0 +1,14 @@ +{{- if and (.Values.h2ogpt.enabled) (not .Values.h2ogpt.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.h2ogpt.storage.class }} + resources: + requests: + storage: {{ .Values.h2ogpt.storage.size | quote }} +{{- end }} \ No newline at end of file From cd6c1738951f6a63af03ad803792ab95c52b1143 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Fri, 25 Oct 2024 18:25:26 +0530 Subject: [PATCH 15/34] Separate PVC into files and remove autoscaling --- .../templates/agents-deployment.yaml | 2 -- .../templates/h2ogpt-deployment.yaml | 2 -- helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml | 2 +- .../templates/lmdeploy-deployment.yaml | 18 ------------------ helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml | 15 +++++++++++++++ .../h2ogpt-chart/templates/tgi-deployment.yaml | 17 ----------------- helm/h2ogpt-chart/templates/tgi-pvc.yaml | 14 ++++++++++++++ .../templates/vllm-deployment.yaml | 18 ------------------ helm/h2ogpt-chart/templates/vllm-pvc.yaml | 16 ++++++++++++++++ helm/h2ogpt-chart/values.yaml | 6 +----- 10 files changed, 47 insertions(+), 63 deletions(-) create mode 100644 helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml create mode 100644 helm/h2ogpt-chart/templates/tgi-pvc.yaml create mode 100644 helm/h2ogpt-chart/templates/vllm-pvc.yaml diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml index 365df9ddd..0eb0e6b57 100644 --- a/helm/h2ogpt-chart/templates/agents-deployment.yaml +++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml @@ -7,9 +7,7 @@ metadata: labels: app: {{ include "h2ogpt.fullname" . }}-agents spec: - {{- if not .Values.agents.autoscaling.enabled }} replicas: {{ .Values.agents.replicaCount }} - {{- end }} selector: matchLabels: app: {{ include "h2ogpt.fullname" . }}-agents diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 92f25aae4..2669c53a9 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -7,9 +7,7 @@ metadata: labels: app: {{ include "h2ogpt.fullname" . }} spec: - {{- if not .Values.h2ogpt.autoscaling.enabled }} replicas: {{ .Values.h2ogpt.replicaCount }} - {{- end }} selector: matchLabels: app: {{ include "h2ogpt.fullname" . }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml index 0f5ac0956..bd6e7141f 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-pvc.yaml @@ -11,4 +11,4 @@ spec: resources: requests: storage: {{ .Values.h2ogpt.storage.size | quote }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml index eed4624b4..0cf8b03b3 100644 --- a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml +++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml @@ -7,9 +7,7 @@ metadata: labels: app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference spec: - {{- if not .Values.lmdeploy.autoscaling.enabled }} replicas: {{ .Values.lmdeploy.replicaCount }} - {{- end }} selector: matchLabels: app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference @@ -145,19 +143,3 @@ spec: sizeLimit: 10.24Gi name: shm {{- end }} ---- -{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.lmdeploy.storage.class | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml b/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml new file mode 100644 index 000000000..164ec6f1d --- /dev/null +++ b/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml @@ -0,0 +1,15 @@ +{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + # storageClassName: {{ .Values.lmdeploy.storage.class | quote }} + storageClassName: {{ .Values.lmdeploy.storage.class }} + resources: + requests: + storage: {{ .Values.lmdeploy.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml index 75f486b70..c85b5f4c8 100644 --- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml +++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml @@ -7,9 +7,7 @@ metadata: labels: app: {{ include "h2ogpt.fullname" . }}-tgi-inference spec: - {{- if not .Values.tgi.autoscaling.enabled }} replicas: {{ .Values.tgi.replicaCount }} - {{- end }} selector: matchLabels: app: {{ include "h2ogpt.fullname" . }}-tgi-inference @@ -158,18 +156,3 @@ spec: storageClassName: {{ .Values.tgi.storage.class }} {{- end }} {{- end }} ---- -{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - storageClassName: {{ .Values.tgi.storage.class }} - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-pvc.yaml b/helm/h2ogpt-chart/templates/tgi-pvc.yaml new file mode 100644 index 000000000..0a34be2fd --- /dev/null +++ b/helm/h2ogpt-chart/templates/tgi-pvc.yaml @@ -0,0 +1,14 @@ +{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + storageClassName: {{ .Values.tgi.storage.class }} + resources: + requests: + storage: {{ .Values.tgi.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml index 9cf866d40..89390876d 100644 --- a/helm/h2ogpt-chart/templates/vllm-deployment.yaml +++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml @@ -7,9 +7,7 @@ metadata: labels: app: {{ include "h2ogpt.fullname" . }}-vllm-inference spec: - {{- if not .Values.vllm.autoscaling.enabled }} replicas: {{ .Values.vllm.replicaCount }} - {{- end }} selector: matchLabels: app: {{ include "h2ogpt.fullname" . }}-vllm-inference @@ -149,19 +147,3 @@ spec: sizeLimit: 10.24Gi name: shm {{- end }} ---- -{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.vllm.storage.class | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-pvc.yaml b/helm/h2ogpt-chart/templates/vllm-pvc.yaml new file mode 100644 index 000000000..fe26f08ea --- /dev/null +++ b/helm/h2ogpt-chart/templates/vllm-pvc.yaml @@ -0,0 +1,16 @@ +--- +{{- if and (.Values.vllm.enabled) (not .Values.vllm.storage.useEphemeral) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume + namespace: {{ include "h2ogpt.namespace" . | quote }} +spec: + accessModes: + - ReadWriteOnce + # storageClassName: {{ .Values.vllm.storage.class | quote }} + storageClassName: {{ .Values.vllm.storage.class }} + resources: + requests: + storage: {{ .Values.vllm.storage.size | quote }} +{{- end }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 8e06f6240..f88324a55 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -139,7 +139,6 @@ h2ogpt: podAnnotations: {} podLabels: {} - autoscaling: {} agents: # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` @@ -235,7 +234,6 @@ agents: podAnnotations: {} podLabels: {} - autoscaling: {} tgi: # -- Enable tgi @@ -278,7 +276,6 @@ tgi: podAnnotations: {} podLabels: {} - autoscaling: {} vllm: # -- Enable vllm @@ -348,7 +345,7 @@ vllm: podAnnotations: {} podLabels: {} - autoscaling: {} + lmdeploy: # -- Enable lmdeploy @@ -392,7 +389,6 @@ lmdeploy: podAnnotations: {} podLabels: {} - autoscaling: {} # -- CA certs caCertificates: "" From 1661737788dca3b12b6e3cf9f5d16d25914b2a7a Mon Sep 17 00:00:00 2001 From: Lakindu Date: Fri, 25 Oct 2024 18:56:27 +0530 Subject: [PATCH 16/34] Remove stack, vllm, tgi, and lmdeploy related checks for agents --- .../templates/agents-deployment.yaml | 140 ------------------ helm/h2ogpt-chart/values.yaml | 3 - 2 files changed, 143 deletions(-) diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml index 0eb0e6b57..3015395ce 100644 --- a/helm/h2ogpt-chart/templates/agents-deployment.yaml +++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml @@ -70,122 +70,15 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} containers: - {{- if .Values.agents.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - securityContext: - {{- toYaml .Values.vllm.securityContext | nindent 12 }} - image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} - command: ["python3"] - args: - - "-m" - - "vllm.entrypoints.openai.api_server" - - "--port" - - "5000" - - "--host" - - "0.0.0.0" - - "--download-dir" - - "/workspace/.cache/huggingface/hub" -{{- range $arg := .Values.vllm.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 5000 - protocol: TCP - {{- if .Values.vllm.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.vllm.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.vllm.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - {{- range $key, $value := .Values.vllm.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /dev/shm - subPath: shm - {{- end }} - name: {{ include "h2ogpt.fullname" . }}-agents securityContext: {{- toYaml .Values.agents.securityContext | nindent 12 }} image: "{{ .Values.agents.image.repository }}:{{ .Values.agents.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.agents.image.pullPolicy }} command: ["/bin/bash", "-c"] - {{- if .Values.agents.stack.enabled }} - args: - - > - while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' - http://localhost:5000/v1/models)" != "200" ]]; do - echo "Waiting for inference service to become ready... (2sec)" - sleep 2 - done - - python3 /workspace/generate.py - {{- end }} - {{- if not .Values.agents.stack.enabled }} - {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}/v1/models >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }} args: - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.agents.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }} - args: - - > - python3 /workspace/generate.py - {{- end }} - {{- end }} ports: - name: agent containerPort: 5004 @@ -212,22 +105,6 @@ spec: - configMapRef: name: {{ include "h2ogpt.fullname" . }}-agents-config env: - {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" - {{- end }} - {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" - {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) (not .Values.agents.stack.enabled ) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" - {{- end }} - {{- if and .Values.agents.stack.enabled (not .Values.global.externalLLM.enabled) }} - - name: h2ogpt_inference_server - value: "vllm:localhost:5000" - {{- end }} {{- range $key, $value := .Values.agents.env }} - name: "{{ $key }}" value: "{{ $value }}" @@ -301,23 +178,6 @@ spec: storage: {{ .Values.agents.storage.size | quote }} storageClassName: {{ .Values.agents.storage.class }} {{- end }} - {{- if .Values.agents.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} {{- if .Values.caCertificates }} - name: ca-certificates configMap: diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index f88324a55..70b9089d5 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -143,9 +143,6 @@ h2ogpt: agents: # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` enabled: false - stack: - # -- Run agents and vLLM on same pod. - enabled: false replicaCount: 1 imagePullSecrets: image: From 66909426de63821cc0e2abd9261c28432b5a7198 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Fri, 25 Oct 2024 19:57:40 +0530 Subject: [PATCH 17/34] Update secret configs --- .../templates/agents-deployment.yaml | 30 +++---------------- .../global-external-llm-secrets.yaml | 13 ++++++++ .../templates/h2ogpt-deployment.yaml | 30 +++---------------- helm/h2ogpt-chart/values.yaml | 5 ++++ 4 files changed, 26 insertions(+), 52 deletions(-) create mode 100644 helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml index 3015395ce..c9a0eea68 100644 --- a/helm/h2ogpt-chart/templates/agents-deployment.yaml +++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml @@ -104,37 +104,15 @@ spec: envFrom: - configMapRef: name: {{ include "h2ogpt.fullname" . }}-agents-config + {{- if .Values.global.externalLLM.enabled }} + - secretRef: + name: {{ include "h2ogpt.fullname" . }}-external-llm-secret + {{- end }} env: {{- range $key, $value := .Values.agents.env }} - name: "{{ $key }}" value: "{{ $value }}" {{- end }} - {{- if and .Values.global.externalLLM.openAIAzure.enabled .Values.global.externalLLM.enabled }} - - name: OPENAI_AZURE_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.global.externalLLM.secret }} - key: OPENAI_AZURE_KEY - - name: OPENAI_AZURE_API_BASE - valueFrom: - secretKeyRef: - name: {{ .Values.global.externalLLM.secret }} - key: OPENAI_AZURE_API_BASE - {{- end }} - {{- if and .Values.global.externalLLM.openAI.enabled .Values.global.externalLLM.enabled }} - - name: OPENAI_API_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.global.externalLLM.secret }} - key: OPENAI_API_KEY - {{- end }} - {{- if and .Values.global.externalLLM.replicate.enabled .Values.global.externalLLM.enabled }} - - name: REPLICATE_API_TOKEN - valueFrom: - secretKeyRef: - name: {{ .Values.global.externalLLM.secret }} - key: REPLICATE_API_TOKEN - {{- end }} {{- if .Values.global.externalLLM.enabled }} - name: H2OGPT_MODEL_LOCK value: {{ toJson .Values.global.externalLLM.modelLock | quote }} diff --git a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml new file mode 100644 index 000000000..5ff95d9cd --- /dev/null +++ b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml @@ -0,0 +1,13 @@ +{{- if and .Values.global.externalLLM.enabled (or .Values.agents.enabled .Values.h2ogpt.enabled) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "h2ogpt.fullname" . }}-external-llm-secret + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +data: +{{- range $key, $value := .Values.global.externalLLM.secret }} + {{ $key }}: {{ $value | quote }} +{{- end }} +{{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 2669c53a9..42f393a73 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -223,6 +223,10 @@ spec: envFrom: - configMapRef: name: {{ include "h2ogpt.fullname" . }}-config + {{- if .Values.global.externalLLM.enabled }} + - secretRef: + name: {{ include "h2ogpt.fullname" . }}-external-llm-secret + {{- end }} env: {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} - name: h2ogpt_inference_server @@ -244,32 +248,6 @@ spec: - name: "{{ $key }}" value: "{{ $value }}" {{- end }} - {{- if and .Values.global.externalLLM.openAIAzure.enabled .Values.global.externalLLM.enabled }} - - name: OPENAI_AZURE_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.global.externalLLM.secret }} - key: OPENAI_AZURE_KEY - - name: OPENAI_AZURE_API_BASE - valueFrom: - secretKeyRef: - name: {{ .Values.global.externalLLM.secret }} - key: OPENAI_AZURE_API_BASE - {{- end }} - {{- if and .Values.global.externalLLM.openAI.enabled .Values.global.externalLLM.enabled }} - - name: OPENAI_API_KEY - valueFrom: - secretKeyRef: - name: {{ .Values.global.externalLLM.secret }} - key: OPENAI_API_KEY - {{- end }} - {{- if and .Values.global.externalLLM.replicate.enabled .Values.global.externalLLM.enabled }} - - name: REPLICATE_API_TOKEN - valueFrom: - secretKeyRef: - name: {{ .Values.global.externalLLM.secret }} - key: REPLICATE_API_TOKEN - {{- end }} {{- if .Values.global.externalLLM.enabled }} - name: H2OGPT_MODEL_LOCK value: {{ toJson .Values.global.externalLLM.modelLock | quote }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 70b9089d5..6f3cd3d4f 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -5,7 +5,12 @@ namespaceOverride: "" global: externalLLM: enabled: false + # -- list of secrets for h2ogpt and agents env secret: +# OPENAI_AZURE_KEY: "value" +# OPENAI_AZURE_API_BASE: "value" +# OPENAI_API_KEY: "value" +# REPLICATE_API_TOKEN: "value" modelLock: From ba0434b49e1279fe1cbe5dcf9e2e80609476de0d Mon Sep 17 00:00:00 2001 From: Lakindu Date: Fri, 25 Oct 2024 20:10:06 +0530 Subject: [PATCH 18/34] Add conditions for agent and openAi ports --- helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml | 7 ++++--- helm/h2ogpt-chart/templates/h2ogpt-service.yaml | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 42f393a73..b81753e15 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -190,18 +190,19 @@ spec: - name: http containerPort: 7860 protocol: TCP - - name: gpt - containerPort: 8888 - protocol: TCP + {{- if .Values.global.externalLLM.openAI.enabled }} - name: openai containerPort: 5000 protocol: TCP + {{- end }} - name: function containerPort: 5002 protocol: TCP + {{- if .Values.h2ogpt.agents.enabled }} - name: agent containerPort: 5004 protocol: TCP + {{- end }} {{- if .Values.h2ogpt.livenessProbe }} livenessProbe: httpGet: diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml index 0d9bbcc1d..043feb527 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml @@ -17,17 +17,21 @@ spec: protocol: TCP port: {{ .Values.h2ogpt.service.webPort }} targetPort: 7860 + {{- if .Values.global.externalLLM.openAI.enabled }} - name: openai protocol: TCP port: {{ .Values.h2ogpt.service.openaiPort }} targetPort: 5000 + {{- end }} - name: function protocol: TCP port: {{ .Values.h2ogpt.service.functionPort }} targetPort: 5002 + {{- if .Values.h2ogpt.agents.enabled }} - name: agent protocol: TCP port: {{ .Values.h2ogpt.service.agentsPort }} targetPort: 5004 + {{- end }} type: {{ .Values.h2ogpt.service.type }} {{- end }} From 86f060b297321982701839d3898cfee8f319d8f3 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Sat, 26 Oct 2024 01:07:46 +0530 Subject: [PATCH 19/34] Remove stack from h2ogpt --- .../templates/h2ogpt-deployment.yaml | 98 +------------------ helm/h2ogpt-chart/values.yaml | 5 +- 2 files changed, 4 insertions(+), 99 deletions(-) diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index b81753e15..e24805db4 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -70,83 +70,12 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} containers: - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference - securityContext: - {{- toYaml .Values.vllm.securityContext | nindent 12 }} - image: "{{ .Values.vllm.image.repository }}:{{ .Values.vllm.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.vllm.image.pullPolicy }} - command: ["python3"] - args: - - "-m" - - "vllm.entrypoints.openai.api_server" - - "--port" - - "5000" - - "--host" - - "0.0.0.0" - - "--download-dir" - - "/workspace/.cache/huggingface/hub" -{{- range $arg := .Values.vllm.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 5000 - protocol: TCP - {{- if .Values.vllm.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.vllm.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.vllm.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.vllm.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - {{- range $key, $value := .Values.vllm.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - mountPath: /dev/shm - subPath: shm - {{- end }} - name: {{ include "h2ogpt.fullname" . }} securityContext: {{- toYaml .Values.h2ogpt.securityContext | nindent 12 }} image: "{{ .Values.h2ogpt.image.repository }}:{{ .Values.h2ogpt.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.h2ogpt.image.pullPolicy }} command: ["/bin/bash", "-c"] - {{- if .Values.h2ogpt.stack.enabled }} - args: - - > - while [[ "$(curl --insecure -s -o /dev/null -w ''%{http_code}'' - http://localhost:5000/v1/models)" != "200" ]]; do - echo "Waiting for inference service to become ready... (2sec)" - sleep 2 - done - - python3 /workspace/generate.py - {{- end }} - {{- if not .Values.h2ogpt.stack.enabled }} {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.modelLock) }} args: - > @@ -185,7 +114,6 @@ spec: - > python3 /workspace/generate.py {{- end }} - {{- end }} ports: - name: http containerPort: 7860 @@ -229,21 +157,18 @@ spec: name: {{ include "h2ogpt.fullname" . }}-external-llm-secret {{- end }} env: - {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) }} - name: h2ogpt_inference_server value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" {{- end }} - {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) }} - name: h2ogpt_inference_server value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) (not .Values.h2ogpt.stack.enabled ) }} + {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) }} - name: h2ogpt_inference_server value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" {{- end }} - {{- if and .Values.h2ogpt.stack.enabled (not .Values.global.externalLLM.enabled) }} - - name: h2ogpt_inference_server - value: "vllm:localhost:5000" {{- end }} {{- range $key, $value := .Values.h2ogpt.env }} - name: "{{ $key }}" @@ -292,23 +217,6 @@ spec: storage: {{ .Values.h2ogpt.storage.size | quote }} storageClassName: {{ .Values.h2ogpt.storage.class }} {{- end }} - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} {{- if .Values.caCertificates }} - name: ca-certificates configMap: diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 6f3cd3d4f..2f84b5adf 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -18,7 +18,7 @@ global: enabled: false openAI: - enabled: False + enabled: false replicate: enabled: false @@ -33,9 +33,6 @@ global: h2ogpt: # -- Enable h2oGPT enabled: true - stack: - # -- Run h2oGPT and vLLM on same pod. - enabled: false # -- Enable agents agents: # -- Run agents with h2oGPT container From a14ed1cf4869c091d6604652a7bbebc76e05c58e Mon Sep 17 00:00:00 2001 From: Lakindu Date: Sat, 26 Oct 2024 02:08:33 +0530 Subject: [PATCH 20/34] Add HPA for agents --- helm/h2ogpt-chart/templates/agents-hpa.yaml | 33 +++++++++++++++++++++ helm/h2ogpt-chart/values.yaml | 9 +++++- 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 helm/h2ogpt-chart/templates/agents-hpa.yaml diff --git a/helm/h2ogpt-chart/templates/agents-hpa.yaml b/helm/h2ogpt-chart/templates/agents-hpa.yaml new file mode 100644 index 000000000..9872e8d43 --- /dev/null +++ b/helm/h2ogpt-chart/templates/agents-hpa.yaml @@ -0,0 +1,33 @@ +{{- if .Values.agents.autoscaling.enabled | default false }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ .Release.Name }}-agents + namespace: {{ include "h2ogpt.namespace" . | quote }} + labels: + {{- include "h2ogpt.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "h2ogpt.fullname" . }}-agents + minReplicas: {{ .Values.agents.autoscaling.minReplicas }} + maxReplicas: {{ .Values.agents.autoscaling.maxReplicas }} + metrics: + {{- if .Values.agents.autoscaling.targetCPU }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.agents.autoscaling.targetCPU }} + {{- end }} + {{- if .Values.agents.autoscaling.targetMemory }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.agents.autoscaling.targetMemory }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 2f84b5adf..21d5bfe1c 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -36,7 +36,7 @@ h2ogpt: # -- Enable agents agents: # -- Run agents with h2oGPT container - enabled: true + enabled: false agent_workers: 5 replicaCount: 1 imagePullSecrets: @@ -145,6 +145,13 @@ h2ogpt: agents: # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` enabled: false + autoscaling: + # Enable autoscaling for agents + enabled: false + minReplicas: 1 + maxReplicas: 2 + targetMemory: 32Gi + targetCPU: 80 replicaCount: 1 imagePullSecrets: image: From db1cfcecffcf2b2b7f615c77ba0c10068d3e6b48 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Sat, 26 Oct 2024 02:08:54 +0530 Subject: [PATCH 21/34] Fix stack removal --- .../templates/h2ogpt-deployment.yaml | 1 - .../templates/lmdeploy-deployment.yaml | 2 +- .../templates/lmdeploy-service.yaml | 2 +- .../templates/tgi-deployment.yaml | 19 +------------------ helm/h2ogpt-chart/templates/tgi-service.yaml | 2 +- helm/h2ogpt-chart/templates/validations.yaml | 7 +------ .../templates/vllm-deployment.yaml | 2 +- helm/h2ogpt-chart/templates/vllm-service.yaml | 2 +- 8 files changed, 7 insertions(+), 30 deletions(-) diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index e24805db4..549da1d5d 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -169,7 +169,6 @@ spec: - name: h2ogpt_inference_server value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" {{- end }} - {{- end }} {{- range $key, $value := .Values.h2ogpt.env }} - name: "{{ $key }}" value: "{{ $value }}" diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml index 0cf8b03b3..95a49320f 100644 --- a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml +++ b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled )}} +{{- if and .Values.lmdeploy.enabled }} apiVersion: apps/v1 kind: Deployment metadata: diff --git a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml index e1dfdc4d3..831189944 100644 --- a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml +++ b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.lmdeploy.enabled) (not .Values.h2ogpt.stack.enabled ) }} +{{- if .Values.lmdeploy.enabled }} apiVersion: v1 kind: Service metadata: diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml index c85b5f4c8..721b2ed01 100644 --- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml +++ b/helm/h2ogpt-chart/templates/tgi-deployment.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} +{{- if .Values.tgi.enabled }} apiVersion: apps/v1 kind: Deployment metadata: @@ -123,23 +123,6 @@ spec: mountPath: /dev/shm subPath: shm volumes: - {{- if .Values.h2ogpt.stack.enabled }} - - name: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- if not .Values.vllm.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-vllm-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.vllm.storage.size | quote }} - storageClassName: {{ .Values.vllm.storage.class }} - {{- end }} - {{- end }} - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume {{- if not .Values.tgi.storage.useEphemeral}} persistentVolumeClaim: diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml index bbaa23439..de42ad89a 100644 --- a/helm/h2ogpt-chart/templates/tgi-service.yaml +++ b/helm/h2ogpt-chart/templates/tgi-service.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.tgi.enabled) (not .Values.h2ogpt.stack.enabled ) }} +{{- if .Values.tgi.enabled }} apiVersion: v1 kind: Service metadata: diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml index 11adaa3f8..cd08023e8 100644 --- a/helm/h2ogpt-chart/templates/validations.yaml +++ b/helm/h2ogpt-chart/templates/validations.yaml @@ -7,11 +7,6 @@ {{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} {{- end }} -{{- if .Values.h2ogpt.stack.enabled }} - {{- if not (and .Values.vllm.enabled .Values.h2ogpt.enabled) }} - {{- fail "If h2oGPT stack is enabled, both vLLM and h2oGPT should be enabled" }} - {{- end }} -{{- end }} {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }} - {{- fail "Agents in both h2oGPT and agents cannot be enabled." }} + {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }} {{- end }} diff --git a/helm/h2ogpt-chart/templates/vllm-deployment.yaml b/helm/h2ogpt-chart/templates/vllm-deployment.yaml index 89390876d..755a87aac 100644 --- a/helm/h2ogpt-chart/templates/vllm-deployment.yaml +++ b/helm/h2ogpt-chart/templates/vllm-deployment.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled )}} +{{- if .Values.vllm.enabled }} apiVersion: apps/v1 kind: Deployment metadata: diff --git a/helm/h2ogpt-chart/templates/vllm-service.yaml b/helm/h2ogpt-chart/templates/vllm-service.yaml index d30c729e4..980d998cd 100644 --- a/helm/h2ogpt-chart/templates/vllm-service.yaml +++ b/helm/h2ogpt-chart/templates/vllm-service.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.vllm.enabled) (not .Values.h2ogpt.stack.enabled ) }} +{{- if .Values.vllm.enabled }} apiVersion: v1 kind: Service metadata: From 95573cdb4efcd63fdb5c86b555986d49a068ca41 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Sat, 26 Oct 2024 02:15:33 +0530 Subject: [PATCH 22/34] Add default resource limits --- helm/h2ogpt-chart/values.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 21d5bfe1c..7e3fe7307 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -134,6 +134,10 @@ h2ogpt: type: RuntimeDefault resources: + requests: + memory: 32Gi + limits: + memory: 64Gi nodeSelector: tolerations: @@ -233,6 +237,10 @@ agents: type: RuntimeDefault resources: + requests: + memory: 32Gi + limits: + memory: 64Gi nodeSelector: tolerations: From 661cf3c69266ee5214ed729efccb3feeaf1215aa Mon Sep 17 00:00:00 2001 From: Lakindu Date: Sat, 26 Oct 2024 02:21:10 +0530 Subject: [PATCH 23/34] Add README.md generated from helm-docs --- helm/h2ogpt-chart/Chart.yaml | 2 +- helm/h2ogpt-chart/README.md | 226 +++++++++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 helm/h2ogpt-chart/README.md diff --git a/helm/h2ogpt-chart/Chart.yaml b/helm/h2ogpt-chart/Chart.yaml index d90a7d69e..eeaf87fef 100644 --- a/helm/h2ogpt-chart/Chart.yaml +++ b/helm/h2ogpt-chart/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: h2ogpt -description: A Helm chart for h2ogpt +description: A Helm chart for h2oGPT # A chart can be either an 'application' or a 'library' chart. # diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md new file mode 100644 index 000000000..a410aa342 --- /dev/null +++ b/helm/h2ogpt-chart/README.md @@ -0,0 +1,226 @@ +# h2ogpt + +![Version: 0.1.0-288](https://img.shields.io/badge/Version-0.1.0--288-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.0-288](https://img.shields.io/badge/AppVersion-0.1.0--288-informational?style=flat-square) + +A Helm chart for h2oGPT + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| agents.autoscaling.enabled | bool | `false` | | +| agents.autoscaling.maxReplicas | int | `2` | | +| agents.autoscaling.minReplicas | int | `1` | | +| agents.autoscaling.targetCPU | int | `80` | | +| agents.autoscaling.targetMemory | string | `"32Gi"` | | +| agents.enabled | bool | `false` | Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` | +| agents.env | object | `{}` | | +| agents.extraVolumeMounts | list | `[]` | Extra volume mounts | +| agents.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs | +| agents.image.pullPolicy | string | `"IfNotPresent"` | | +| agents.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` | | +| agents.image.tag | string | `nil` | | +| agents.imagePullSecrets | string | `nil` | | +| agents.initImage.pullPolicy | string | `nil` | | +| agents.initImage.repository | string | `nil` | | +| agents.initImage.tag | string | `nil` | | +| agents.nodeSelector | string | `nil` | | +| agents.overrideConfig.agent_workers | int | `5` | | +| agents.overrideConfig.concurrency_count | int | `100` | | +| agents.overrideConfig.embedding_gpu_id | string | `"cpu"` | | +| agents.overrideConfig.enable_stt | bool | `false` | | +| agents.overrideConfig.enable_transcriptions | bool | `false` | | +| agents.overrideConfig.enable_tts | bool | `false` | | +| agents.overrideConfig.enforce_h2ogpt_api_key | bool | `true` | | +| agents.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` | | +| agents.overrideConfig.hf_embedding_model | string | `"fake"` | | +| agents.overrideConfig.metadata_in_context | string | `""` | | +| agents.overrideConfig.num_async | int | `10` | | +| agents.overrideConfig.rotate_align_resize_image | bool | `false` | | +| agents.overrideConfig.score_model | string | `"None"` | | +| agents.overrideConfig.share | bool | `false` | | +| agents.overrideConfig.top_k_docs_max_show | int | `100` | | +| agents.overrideConfig.visible_hosts_tab | bool | `false` | | +| agents.overrideConfig.visible_login_tab | bool | `false` | | +| agents.overrideConfig.visible_models_tab | bool | `false` | | +| agents.overrideConfig.visible_system_tab | bool | `false` | | +| agents.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| agents.podAnnotations | object | `{}` | | +| agents.podLabels | object | `{}` | | +| agents.podSecurityContext.fsGroup | string | `nil` | | +| agents.podSecurityContext.runAsGroup | string | `nil` | | +| agents.podSecurityContext.runAsNonRoot | bool | `true` | | +| agents.podSecurityContext.runAsUser | string | `nil` | | +| agents.replicaCount | int | `1` | | +| agents.resources.limits.memory | string | `"64Gi"` | | +| agents.resources.requests.memory | string | `"32Gi"` | | +| agents.securityContext.allowPrivilegeEscalation | bool | `false` | | +| agents.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| agents.securityContext.runAsNonRoot | bool | `true` | | +| agents.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| agents.service.agentsPort | int | `5004` | | +| agents.service.annotations | object | `{}` | | +| agents.service.type | string | `"NodePort"` | | +| agents.storage.class | string | `nil` | | +| agents.storage.size | string | `"128Gi"` | | +| agents.storage.useEphemeral | bool | `true` | | +| agents.tolerations | string | `nil` | | +| agents.updateStrategy.type | string | `"RollingUpdate"` | | +| caCertificates | string | `""` | CA certs | +| fullnameOverride | string | `""` | | +| global.externalLLM.enabled | bool | `false` | | +| global.externalLLM.modelLock | string | `nil` | | +| global.externalLLM.openAI.enabled | bool | `false` | | +| global.externalLLM.openAIAzure.enabled | bool | `false` | | +| global.externalLLM.replicate.enabled | bool | `false` | | +| global.externalLLM.secret | string | `nil` | list of secrets for h2ogpt and agents env | +| global.visionModels.enabled | bool | `false` | Enable vision models | +| global.visionModels.rotateAlignResizeImage | bool | `false` | | +| global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] | +| h2ogpt.agents | object | `{"agent_workers":5,"enabled":false}` | Enable agents | +| h2ogpt.agents.enabled | bool | `false` | Run agents with h2oGPT container | +| h2ogpt.enabled | bool | `true` | Enable h2oGPT | +| h2ogpt.env | object | `{}` | | +| h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts | +| h2ogpt.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs | +| h2ogpt.image.pullPolicy | string | `"IfNotPresent"` | | +| h2ogpt.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` | | +| h2ogpt.image.tag | string | `nil` | | +| h2ogpt.imagePullSecrets | string | `nil` | | +| h2ogpt.initImage.pullPolicy | string | `nil` | | +| h2ogpt.initImage.repository | string | `nil` | | +| h2ogpt.initImage.tag | string | `nil` | | +| h2ogpt.nodeSelector | string | `nil` | | +| h2ogpt.overrideConfig.concurrency_count | int | `100` | | +| h2ogpt.overrideConfig.embedding_gpu_id | string | `"cpu"` | | +| h2ogpt.overrideConfig.enable_stt | bool | `false` | | +| h2ogpt.overrideConfig.enable_transcriptions | bool | `false` | | +| h2ogpt.overrideConfig.enable_tts | bool | `false` | | +| h2ogpt.overrideConfig.enforce_h2ogpt_api_key | bool | `true` | | +| h2ogpt.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` | | +| h2ogpt.overrideConfig.hf_embedding_model | string | `"fake"` | | +| h2ogpt.overrideConfig.metadata_in_context | string | `""` | | +| h2ogpt.overrideConfig.num_async | int | `10` | | +| h2ogpt.overrideConfig.openai_server | bool | `true` | | +| h2ogpt.overrideConfig.openai_workers | int | `5` | | +| h2ogpt.overrideConfig.rotate_align_resize_image | bool | `false` | | +| h2ogpt.overrideConfig.score_model | string | `"None"` | | +| h2ogpt.overrideConfig.share | bool | `false` | | +| h2ogpt.overrideConfig.top_k_docs_max_show | int | `100` | | +| h2ogpt.overrideConfig.visible_hosts_tab | bool | `false` | | +| h2ogpt.overrideConfig.visible_login_tab | bool | `false` | | +| h2ogpt.overrideConfig.visible_models_tab | bool | `false` | | +| h2ogpt.overrideConfig.visible_system_tab | bool | `false` | | +| h2ogpt.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| h2ogpt.podAnnotations | object | `{}` | | +| h2ogpt.podLabels | object | `{}` | | +| h2ogpt.podSecurityContext.fsGroup | string | `nil` | | +| h2ogpt.podSecurityContext.runAsGroup | string | `nil` | | +| h2ogpt.podSecurityContext.runAsNonRoot | bool | `true` | | +| h2ogpt.podSecurityContext.runAsUser | string | `nil` | | +| h2ogpt.replicaCount | int | `1` | | +| h2ogpt.resources.limits.memory | string | `"64Gi"` | | +| h2ogpt.resources.requests.memory | string | `"32Gi"` | | +| h2ogpt.securityContext.allowPrivilegeEscalation | bool | `false` | | +| h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| h2ogpt.securityContext.runAsNonRoot | bool | `true` | | +| h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| h2ogpt.service.agentsPort | int | `5004` | | +| h2ogpt.service.functionPort | int | `5002` | | +| h2ogpt.service.gptPort | int | `8888` | | +| h2ogpt.service.openaiPort | int | `5000` | | +| h2ogpt.service.type | string | `"NodePort"` | | +| h2ogpt.service.webPort | int | `80` | | +| h2ogpt.service.webServiceAnnotations | object | `{}` | | +| h2ogpt.storage.class | string | `nil` | | +| h2ogpt.storage.size | string | `"128Gi"` | | +| h2ogpt.storage.useEphemeral | bool | `true` | | +| h2ogpt.tolerations | string | `nil` | | +| h2ogpt.updateStrategy.type | string | `"RollingUpdate"` | | +| lmdeploy.containerArgs[0] | string | `"OpenGVLab/InternVL-Chat-V1-5"` | | +| lmdeploy.enabled | bool | `false` | Enable lmdeploy | +| lmdeploy.env | object | `{}` | | +| lmdeploy.hfSecret | string | `nil` | | +| lmdeploy.image.pullPolicy | string | `"IfNotPresent"` | | +| lmdeploy.image.repository | string | `"gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy"` | | +| lmdeploy.image.tag | string | `nil` | | +| lmdeploy.nodeSelector | string | `nil` | | +| lmdeploy.overrideConfig | string | `nil` | | +| lmdeploy.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| lmdeploy.podAnnotations | object | `{}` | | +| lmdeploy.podLabels | object | `{}` | | +| lmdeploy.podSecurityContext | string | `nil` | | +| lmdeploy.replicaCount | int | `1` | | +| lmdeploy.resources | string | `nil` | | +| lmdeploy.securityContext | string | `nil` | | +| lmdeploy.service.port | int | `23333` | | +| lmdeploy.service.type | string | `"ClusterIP"` | | +| lmdeploy.storage.class | string | `nil` | | +| lmdeploy.storage.size | string | `"512Gi"` | | +| lmdeploy.storage.useEphemeral | bool | `true` | | +| lmdeploy.tolerations | string | `nil` | | +| lmdeploy.updateStrategy.type | string | `"RollingUpdate"` | | +| nameOverride | string | `""` | | +| namespaceOverride | string | `""` | | +| tgi.containerArgs | string | `nil` | | +| tgi.enabled | bool | `false` | Enable tgi | +| tgi.env | object | `{}` | | +| tgi.hfSecret | string | `nil` | | +| tgi.image.pullPolicy | string | `"IfNotPresent"` | | +| tgi.image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` | | +| tgi.image.tag | string | `"0.9.3"` | | +| tgi.nodeSelector | string | `nil` | | +| tgi.overrideConfig | string | `nil` | | +| tgi.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| tgi.podAnnotations | object | `{}` | | +| tgi.podLabels | object | `{}` | | +| tgi.podSecurityContext | string | `nil` | | +| tgi.replicaCount | int | `1` | | +| tgi.resources | string | `nil` | | +| tgi.securityContext | string | `nil` | | +| tgi.service.port | int | `8080` | | +| tgi.service.type | string | `"ClusterIP"` | | +| tgi.storage.class | string | `nil` | | +| tgi.storage.size | string | `"512Gi"` | | +| tgi.storage.useEphemeral | bool | `true` | | +| tgi.tolerations | string | `nil` | | +| tgi.updateStrategy.type | string | `"RollingUpdate"` | | +| vllm.containerArgs[0] | string | `"--model"` | | +| vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` | | +| vllm.containerArgs[2] | string | `"--tokenizer"` | | +| vllm.containerArgs[3] | string | `"hf-internal-testing/llama-tokenizer"` | | +| vllm.containerArgs[4] | string | `"--tensor-parallel-size"` | | +| vllm.containerArgs[5] | int | `2` | | +| vllm.containerArgs[6] | string | `"--seed"` | | +| vllm.containerArgs[7] | int | `1234` | | +| vllm.containerArgs[8] | string | `"--trust-remote-code"` | | +| vllm.enabled | bool | `false` | Enable vllm | +| vllm.env.DO_NOT_TRACK | string | `"1"` | | +| vllm.env.VLLM_NO_USAGE_STATS | string | `"1"` | | +| vllm.image.pullPolicy | string | `"IfNotPresent"` | | +| vllm.image.repository | string | `"vllm/vllm-openai"` | | +| vllm.image.tag | string | `"latest"` | | +| vllm.imagePullSecrets | string | `nil` | | +| vllm.nodeSelector | string | `nil` | | +| vllm.overrideConfig | string | `nil` | | +| vllm.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| vllm.podAnnotations | object | `{}` | | +| vllm.podLabels | object | `{}` | | +| vllm.podSecurityContext.fsGroup | string | `nil` | | +| vllm.podSecurityContext.runAsGroup | string | `nil` | | +| vllm.podSecurityContext.runAsNonRoot | bool | `true` | | +| vllm.podSecurityContext.runAsUser | string | `nil` | | +| vllm.replicaCount | int | `1` | | +| vllm.resources | string | `nil` | | +| vllm.securityContext.allowPrivilegeEscalation | bool | `false` | | +| vllm.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| vllm.securityContext.runAsNonRoot | bool | `true` | | +| vllm.securityContext.seccompProfile | string | `nil` | | +| vllm.service.port | int | `5000` | | +| vllm.service.type | string | `"ClusterIP"` | | +| vllm.storage.class | string | `nil` | | +| vllm.storage.size | string | `"512Gi"` | | +| vllm.storage.useEphemeral | bool | `true` | | +| vllm.tolerations | string | `nil` | | +| vllm.updateStrategy.type | string | `"RollingUpdate"` | | + From 1b75c9d512da62c65078fc1334f3389a9c74daae Mon Sep 17 00:00:00 2001 From: Lakindu Date: Sat, 26 Oct 2024 02:36:46 +0530 Subject: [PATCH 24/34] Update Chart version --- helm/h2ogpt-chart/Chart.yaml | 4 ++-- helm/h2ogpt-chart/templates/agents-hpa.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/helm/h2ogpt-chart/Chart.yaml b/helm/h2ogpt-chart/Chart.yaml index eeaf87fef..5a597ed84 100644 --- a/helm/h2ogpt-chart/Chart.yaml +++ b/helm/h2ogpt-chart/Chart.yaml @@ -15,10 +15,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0-288 +version: 0.2.1-1254 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: 0.1.0-288 +appVersion: 0.2.1-1254 diff --git a/helm/h2ogpt-chart/templates/agents-hpa.yaml b/helm/h2ogpt-chart/templates/agents-hpa.yaml index 9872e8d43..f0c796484 100644 --- a/helm/h2ogpt-chart/templates/agents-hpa.yaml +++ b/helm/h2ogpt-chart/templates/agents-hpa.yaml @@ -30,4 +30,4 @@ spec: type: Utilization averageUtilization: {{ .Values.agents.autoscaling.targetMemory }} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} From 0f52c7b62e57415b476da1433d7c2c834f61633f Mon Sep 17 00:00:00 2001 From: Lakindu Date: Mon, 28 Oct 2024 13:27:15 +0530 Subject: [PATCH 25/34] Update secrets --- helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml | 3 ++- helm/h2ogpt-chart/values.yaml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml index 5ff95d9cd..6c6f5b74e 100644 --- a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml +++ b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml @@ -6,7 +6,8 @@ metadata: namespace: {{ include "h2ogpt.namespace" . | quote }} labels: {{- include "h2ogpt.labels" . | nindent 4 }} -data: +type: Opaque +stringData: {{- range $key, $value := .Values.global.externalLLM.secret }} {{ $key }}: {{ $value | quote }} {{- end }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 7e3fe7307..dbbd6019f 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -6,7 +6,7 @@ global: externalLLM: enabled: false # -- list of secrets for h2ogpt and agents env - secret: + secret: {} # OPENAI_AZURE_KEY: "value" # OPENAI_AZURE_API_BASE: "value" # OPENAI_API_KEY: "value" From 9af9f2dc7c0c1a94f9f709153ebe25141ee2e96f Mon Sep 17 00:00:00 2001 From: Lakindu Date: Mon, 28 Oct 2024 22:13:44 +0530 Subject: [PATCH 26/34] Remove global OpenAI, replicate and openAIAzure --- helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml | 2 +- helm/h2ogpt-chart/templates/h2ogpt-service.yaml | 2 +- helm/h2ogpt-chart/values.yaml | 10 ---------- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 549da1d5d..05b61255c 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -118,7 +118,7 @@ spec: - name: http containerPort: 7860 protocol: TCP - {{- if .Values.global.externalLLM.openAI.enabled }} + {{- if .Values.h2ogpt.overrideConfig.openai_server }} - name: openai containerPort: 5000 protocol: TCP diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml index 043feb527..a41364864 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml @@ -17,7 +17,7 @@ spec: protocol: TCP port: {{ .Values.h2ogpt.service.webPort }} targetPort: 7860 - {{- if .Values.global.externalLLM.openAI.enabled }} + {{- if .Values.h2ogpt.overrideConfig.openai_server }} - name: openai protocol: TCP port: {{ .Values.h2ogpt.service.openaiPort }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index dbbd6019f..9bd04d6a3 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -14,15 +14,6 @@ global: modelLock: - openAIAzure: - enabled: false - - openAI: - enabled: false - - replicate: - enabled: false - visionModels: # -- Enable vision models enabled: false @@ -112,7 +103,6 @@ h2ogpt: openaiPort: 5000 functionPort: 5002 agentsPort: 5004 - gptPort: 8888 webServiceAnnotations: {} updateStrategy: From 78b2e720cbf27193ad9271f39b625e7e6e0d4c5e Mon Sep 17 00:00:00 2001 From: Lakindu Date: Mon, 28 Oct 2024 22:14:03 +0530 Subject: [PATCH 27/34] Update helm-doc --- helm/h2ogpt-chart/README.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md index a410aa342..8e5fc3cec 100644 --- a/helm/h2ogpt-chart/README.md +++ b/helm/h2ogpt-chart/README.md @@ -1,6 +1,6 @@ # h2ogpt -![Version: 0.1.0-288](https://img.shields.io/badge/Version-0.1.0--288-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.1.0-288](https://img.shields.io/badge/AppVersion-0.1.0--288-informational?style=flat-square) +![Version: 0.2.1-1254](https://img.shields.io/badge/Version-0.2.1--1254-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.2.1-1254](https://img.shields.io/badge/AppVersion-0.2.1--1254-informational?style=flat-square) A Helm chart for h2oGPT @@ -70,10 +70,7 @@ A Helm chart for h2oGPT | fullnameOverride | string | `""` | | | global.externalLLM.enabled | bool | `false` | | | global.externalLLM.modelLock | string | `nil` | | -| global.externalLLM.openAI.enabled | bool | `false` | | -| global.externalLLM.openAIAzure.enabled | bool | `false` | | -| global.externalLLM.replicate.enabled | bool | `false` | | -| global.externalLLM.secret | string | `nil` | list of secrets for h2ogpt and agents env | +| global.externalLLM.secret | object | `{}` | list of secrets for h2ogpt and agents env | | global.visionModels.enabled | bool | `false` | Enable vision models | | global.visionModels.rotateAlignResizeImage | bool | `false` | | | global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] | @@ -127,7 +124,6 @@ A Helm chart for h2oGPT | h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | | h2ogpt.service.agentsPort | int | `5004` | | | h2ogpt.service.functionPort | int | `5002` | | -| h2ogpt.service.gptPort | int | `8888` | | | h2ogpt.service.openaiPort | int | `5000` | | | h2ogpt.service.type | string | `"NodePort"` | | | h2ogpt.service.webPort | int | `80` | | From f5b79dadfd18bf064afb034de819acd6fc44ac89 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Tue, 29 Oct 2024 00:36:26 +0530 Subject: [PATCH 28/34] Fix agents volumes --- helm/h2ogpt-chart/templates/agents-deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml index c9a0eea68..dcdda700a 100644 --- a/helm/h2ogpt-chart/templates/agents-deployment.yaml +++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml @@ -129,7 +129,7 @@ spec: - name: {{ include "h2ogpt.fullname" . }}-agents-volume mountPath: /workspace/.cache subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-volume + - name: {{ include "h2ogpt.fullname" . }}-agents-volume mountPath: /workspace/save subPath: save {{- if .Values.caCertificates }} @@ -144,7 +144,7 @@ spec: - name: {{ include "h2ogpt.fullname" . }}-agents-volume {{- if not .Values.agents.storage.useEphemeral }} persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-volume + claimName: {{ include "h2ogpt.fullname" . }}-agents-volume {{- else}} ephemeral: volumeClaimTemplate: From 008636072c4c5661f5838073016ce4defd56f8ec Mon Sep 17 00:00:00 2001 From: Lakindu Date: Thu, 31 Oct 2024 01:38:41 +0530 Subject: [PATCH 29/34] Remove lmdeploy --- helm/h2ogpt-chart/README.md | 23 --- .../templates/h2ogpt-deployment.yaml | 17 +- .../templates/lmdeploy-configmap.yaml | 13 -- .../templates/lmdeploy-deployment.yaml | 145 ------------------ helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml | 15 -- .../templates/lmdeploy-service.yaml | 15 -- helm/h2ogpt-chart/templates/validations.yaml | 6 - helm/h2ogpt-chart/values.yaml | 43 ------ 8 files changed, 1 insertion(+), 276 deletions(-) delete mode 100644 helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml delete mode 100644 helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml delete mode 100644 helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml delete mode 100644 helm/h2ogpt-chart/templates/lmdeploy-service.yaml diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md index 8e5fc3cec..8d37e1f1b 100644 --- a/helm/h2ogpt-chart/README.md +++ b/helm/h2ogpt-chart/README.md @@ -133,29 +133,6 @@ A Helm chart for h2oGPT | h2ogpt.storage.useEphemeral | bool | `true` | | | h2ogpt.tolerations | string | `nil` | | | h2ogpt.updateStrategy.type | string | `"RollingUpdate"` | | -| lmdeploy.containerArgs[0] | string | `"OpenGVLab/InternVL-Chat-V1-5"` | | -| lmdeploy.enabled | bool | `false` | Enable lmdeploy | -| lmdeploy.env | object | `{}` | | -| lmdeploy.hfSecret | string | `nil` | | -| lmdeploy.image.pullPolicy | string | `"IfNotPresent"` | | -| lmdeploy.image.repository | string | `"gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy"` | | -| lmdeploy.image.tag | string | `nil` | | -| lmdeploy.nodeSelector | string | `nil` | | -| lmdeploy.overrideConfig | string | `nil` | | -| lmdeploy.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | -| lmdeploy.podAnnotations | object | `{}` | | -| lmdeploy.podLabels | object | `{}` | | -| lmdeploy.podSecurityContext | string | `nil` | | -| lmdeploy.replicaCount | int | `1` | | -| lmdeploy.resources | string | `nil` | | -| lmdeploy.securityContext | string | `nil` | | -| lmdeploy.service.port | int | `23333` | | -| lmdeploy.service.type | string | `"ClusterIP"` | | -| lmdeploy.storage.class | string | `nil` | | -| lmdeploy.storage.size | string | `"512Gi"` | | -| lmdeploy.storage.useEphemeral | bool | `true` | | -| lmdeploy.tolerations | string | `nil` | | -| lmdeploy.updateStrategy.type | string | `"RollingUpdate"` | | | nameOverride | string | `""` | | | namespaceOverride | string | `""` | | | tgi.containerArgs | string | `nil` | | diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 05b61255c..7556ca758 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -98,18 +98,7 @@ spec: python3 /workspace/generate.py {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled .Values.lmdeploy.enabled)) }} + {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled )) }} args: - > python3 /workspace/generate.py @@ -165,10 +154,6 @@ spec: - name: h2ogpt_inference_server value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" {{- end }} - {{- if and .Values.lmdeploy.enabled (not .Values.global.externalLLM.enabled) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-lmdeploy-inference:{{ .Values.lmdeploy.service.port }}" - {{- end }} {{- range $key, $value := .Values.h2ogpt.env }} - name: "{{ $key }}" value: "{{ $value }}" diff --git a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml b/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml deleted file mode 100644 index c1dd07713..000000000 --- a/helm/h2ogpt-chart/templates/lmdeploy-configmap.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.lmdeploy.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.lmdeploy.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml b/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml deleted file mode 100644 index 95a49320f..000000000 --- a/helm/h2ogpt-chart/templates/lmdeploy-deployment.yaml +++ /dev/null @@ -1,145 +0,0 @@ -{{- if and .Values.lmdeploy.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference -spec: - replicas: {{ .Values.lmdeploy.replicaCount }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - {{- if .Values.lmdeploy.updateStrategy }} - strategy: {{- toYaml .Values.lmdeploy.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.lmdeploy.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - {{- with .Values.lmdeploy.podLabels }} - {{ toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.lmdeploy.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.lmdeploy.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.lmdeploy.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.lmdeploy.podAffinity }} - podAntiAffinity: - {{- if .Values.lmdeploy.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.lmdeploy.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.lmdeploy.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.lmdeploy.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - securityContext: - {{- toYaml .Values.lmdeploy.securityContext | nindent 12 }} - image: "{{ .Values.lmdeploy.image.repository }}:{{ .Values.lmdeploy.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.lmdeploy.image.pullPolicy }} - command: ["lmdeploy"] - args: - - "serve" - - "api_server" -{{- range $arg := .Values.lmdeploy.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 23333 - protocol: TCP - {{- if .Values.lmdeploy.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.lmdeploy.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.lmdeploy.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.lmdeploy.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.lmdeploy.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-config - env: - - name: NCCL_IGNORE_DISABLED_P2P - value: "1" - - name: HF_HOME - value: "/workspace/.cache" - {{- range $key, $value := .Values.lmdeploy.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - mountPath: /workspace/.cache - subPath: cache - - name: shm - mountPath: /dev/shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - {{- if not .Values.lmdeploy.storage.useEphemeral }} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - {{- else }} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - {{- end }} - - emptyDir: - medium: Memory - sizeLimit: 10.24Gi - name: shm -{{- end }} diff --git a/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml b/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml deleted file mode 100644 index 164ec6f1d..000000000 --- a/helm/h2ogpt-chart/templates/lmdeploy-pvc.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if and (.Values.lmdeploy.enabled) (not .Values.lmdeploy.storage.useEphemeral) }} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - # storageClassName: {{ .Values.lmdeploy.storage.class | quote }} - storageClassName: {{ .Values.lmdeploy.storage.class }} - resources: - requests: - storage: {{ .Values.lmdeploy.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml b/helm/h2ogpt-chart/templates/lmdeploy-service.yaml deleted file mode 100644 index 831189944..000000000 --- a/helm/h2ogpt-chart/templates/lmdeploy-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if .Values.lmdeploy.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-lmdeploy-inference - ports: - - protocol: TCP - port: {{ .Values.lmdeploy.service.port }} - targetPort: 23333 - type: {{ .Values.lmdeploy.service.type }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validations.yaml index cd08023e8..ce4e264fd 100644 --- a/helm/h2ogpt-chart/templates/validations.yaml +++ b/helm/h2ogpt-chart/templates/validations.yaml @@ -1,12 +1,6 @@ {{- if and .Values.vllm.enabled .Values.tgi.enabled }} {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} {{- end }} -{{- if and .Values.vllm.enabled .Values.lmdeploy.enabled }} - {{- fail "Both lmdeploy and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} -{{- if and .Values.lmdeploy.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and lmdeploy cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }} {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }} {{- end }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 9bd04d6a3..6bcfb292b 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -351,48 +351,5 @@ vllm: podLabels: {} -lmdeploy: - # -- Enable lmdeploy - enabled: false - replicaCount: 1 - - image: - repository: gcr.io/vorvan/h2oai/h2oai-h2ogpt-lmdeploy - tag: - pullPolicy: IfNotPresent - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. - podAffinity: - # hostname: - # zone: - - storage: - size: 512Gi - class: - useEphemeral: true - - overrideConfig: - hfSecret: - containerArgs: - - "OpenGVLab/InternVL-Chat-V1-5" - - service: - type: ClusterIP - port: 23333 - - updateStrategy: - type: RollingUpdate - - podSecurityContext: - securityContext: - - resources: - nodeSelector: - tolerations: - - env: {} - - podAnnotations: {} - podLabels: {} - # -- CA certs caCertificates: "" From eccb0c2de85c6d16da09c8629f380628ddeff68a Mon Sep 17 00:00:00 2001 From: Lakindu Date: Thu, 31 Oct 2024 02:14:27 +0530 Subject: [PATCH 30/34] Remove tgi --- helm/h2ogpt-chart/README.md | 23 --- .../templates/h2ogpt-deployment.yaml | 17 +-- .../h2ogpt-chart/templates/tgi-configmap.yaml | 13 -- .../templates/tgi-deployment.yaml | 141 ------------------ helm/h2ogpt-chart/templates/tgi-pvc.yaml | 14 -- helm/h2ogpt-chart/templates/tgi-service.yaml | 15 -- .../{validations.yaml => validators.yaml} | 3 - helm/h2ogpt-chart/values.yaml | 44 +----- 8 files changed, 2 insertions(+), 268 deletions(-) delete mode 100644 helm/h2ogpt-chart/templates/tgi-configmap.yaml delete mode 100644 helm/h2ogpt-chart/templates/tgi-deployment.yaml delete mode 100644 helm/h2ogpt-chart/templates/tgi-pvc.yaml delete mode 100644 helm/h2ogpt-chart/templates/tgi-service.yaml rename helm/h2ogpt-chart/templates/{validations.yaml => validators.yaml} (55%) diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md index 8d37e1f1b..2f41f4291 100644 --- a/helm/h2ogpt-chart/README.md +++ b/helm/h2ogpt-chart/README.md @@ -135,29 +135,6 @@ A Helm chart for h2oGPT | h2ogpt.updateStrategy.type | string | `"RollingUpdate"` | | | nameOverride | string | `""` | | | namespaceOverride | string | `""` | | -| tgi.containerArgs | string | `nil` | | -| tgi.enabled | bool | `false` | Enable tgi | -| tgi.env | object | `{}` | | -| tgi.hfSecret | string | `nil` | | -| tgi.image.pullPolicy | string | `"IfNotPresent"` | | -| tgi.image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` | | -| tgi.image.tag | string | `"0.9.3"` | | -| tgi.nodeSelector | string | `nil` | | -| tgi.overrideConfig | string | `nil` | | -| tgi.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | -| tgi.podAnnotations | object | `{}` | | -| tgi.podLabels | object | `{}` | | -| tgi.podSecurityContext | string | `nil` | | -| tgi.replicaCount | int | `1` | | -| tgi.resources | string | `nil` | | -| tgi.securityContext | string | `nil` | | -| tgi.service.port | int | `8080` | | -| tgi.service.type | string | `"ClusterIP"` | | -| tgi.storage.class | string | `nil` | | -| tgi.storage.size | string | `"512Gi"` | | -| tgi.storage.useEphemeral | bool | `true` | | -| tgi.tolerations | string | `nil` | | -| tgi.updateStrategy.type | string | `"RollingUpdate"` | | | vllm.containerArgs[0] | string | `"--model"` | | | vllm.containerArgs[1] | string | `"h2oai/h2ogpt-4096-llama2-7b-chat"` | | | vllm.containerArgs[2] | string | `"--tokenizer"` | | diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 7556ca758..741390cd7 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -87,18 +87,7 @@ spec: python3 /workspace/generate.py {{- end }} - {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.modelLock) }} - args: - - > - until wget -O- http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}/ >/dev/null 2>&1; - do - echo "Waiting for inference service to become ready..."; - sleep 5; - done - - python3 /workspace/generate.py - {{- end }} - {{- if and .Values.h2ogpt.enabled (not (or .Values.vllm.enabled .Values.tgi.enabled )) }} + {{- if and .Values.h2ogpt.enabled (not .Values.vllm.enabled ) }} args: - > python3 /workspace/generate.py @@ -146,10 +135,6 @@ spec: name: {{ include "h2ogpt.fullname" . }}-external-llm-secret {{- end }} env: - {{- if and .Values.tgi.enabled (not .Values.global.externalLLM.enabled) }} - - name: h2ogpt_inference_server - value: "http://{{ include "h2ogpt.fullname" . }}-tgi-inference:{{ .Values.tgi.service.port }}" - {{- end }} {{- if and .Values.vllm.enabled (not .Values.global.externalLLM.enabled) }} - name: h2ogpt_inference_server value: "vllm:{{ include "h2ogpt.fullname" . }}-vllm-inference:{{ .Values.vllm.service.port }}" diff --git a/helm/h2ogpt-chart/templates/tgi-configmap.yaml b/helm/h2ogpt-chart/templates/tgi-configmap.yaml deleted file mode 100644 index ec5c17866..000000000 --- a/helm/h2ogpt-chart/templates/tgi-configmap.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.tgi.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - {{- include "h2ogpt.labels" . | nindent 4 }} -data: -{{- range $key, $value := .Values.tgi.overrideConfig }} - {{ printf "%s" $key | upper }}: {{ $value | quote }} -{{- end }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-deployment.yaml b/helm/h2ogpt-chart/templates/tgi-deployment.yaml deleted file mode 100644 index 721b2ed01..000000000 --- a/helm/h2ogpt-chart/templates/tgi-deployment.yaml +++ /dev/null @@ -1,141 +0,0 @@ -{{- if .Values.tgi.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference -spec: - replicas: {{ .Values.tgi.replicaCount }} - selector: - matchLabels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- if .Values.tgi.updateStrategy }} - strategy: {{- toYaml .Values.tgi.updateStrategy | nindent 4 }} - {{- end }} - template: - metadata: - {{- with .Values.tgi.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - {{- with .Values.tgi.podLabels }} - {{ toYaml . | nindent 6 }} - {{- end }} - spec: - {{- with .Values.tgi.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - {{- toYaml .Values.tgi.podSecurityContext | nindent 8 }} - affinity: - {{- if .Values.tgi.podAffinity }} - podAntiAffinity: - {{- if .Values.tgi.podAffinity.hostname }} - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: kubernetes.io/hostname - {{- end }} - {{- if .Values.tgi.podAffinity.zone }} - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ include "h2ogpt.fullname" . }} - topologyKey: failure-domain.beta.kubernetes.io/zone - {{- end }} - {{- end }} - {{- with .Values.tgi.extraAffinity }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tgi.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - securityContext: - {{- toYaml .Values.tgi.securityContext | nindent 12 }} - image: "{{ .Values.tgi.image.repository }}:{{ .Values.tgi.image.tag }}" - imagePullPolicy: {{ .Values.tgi.image.pullPolicy }} - command: [] - args: -{{- range $arg := .Values.tgi.containerArgs }} - - "{{ $arg }}" -{{- end }} - ports: - - name: http - containerPort: 80 - protocol: TCP - {{- if .Values.tgi.livenessProbe }} - livenessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.tgi.readinessProbe }} - readinessProbe: - httpGet: - path: / - scheme: HTTP - port: http - {{- toYaml .Values.tgi.readinessProbe | nindent 12 }} - {{- end }} - resources: - {{- toYaml .Values.tgi.resources | nindent 12 }} - env: - {{- range $key, $value := .Values.tgi.env }} - - name: "{{ $key }}" - value: "{{ $value }}" - {{- end }} - envFrom: - - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-config - - secretRef: - name: {{ .Values.tgi.hfSecret }} - volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /app/cache - subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /data - subPath: data - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - mountPath: /dev/shm - subPath: shm - volumes: - - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- if not .Values.tgi.storage.useEphemeral}} - persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - {{- else}} - ephemeral: - volumeClaimTemplate: - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} - storageClassName: {{ .Values.tgi.storage.class }} - {{- end }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-pvc.yaml b/helm/h2ogpt-chart/templates/tgi-pvc.yaml deleted file mode 100644 index 0a34be2fd..000000000 --- a/helm/h2ogpt-chart/templates/tgi-pvc.yaml +++ /dev/null @@ -1,14 +0,0 @@ -{{- if and (.Values.tgi.enabled) (not .Values.tgi.storage.useEphemeral)}} -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference-volume - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - accessModes: - - ReadWriteOnce - storageClassName: {{ .Values.tgi.storage.class }} - resources: - requests: - storage: {{ .Values.tgi.storage.size | quote }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/tgi-service.yaml b/helm/h2ogpt-chart/templates/tgi-service.yaml deleted file mode 100644 index de42ad89a..000000000 --- a/helm/h2ogpt-chart/templates/tgi-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if .Values.tgi.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: {{ include "h2ogpt.fullname" . }}-tgi-inference - namespace: {{ include "h2ogpt.namespace" . | quote }} -spec: - selector: - app: {{ include "h2ogpt.fullname" . }}-tgi-inference - ports: - - protocol: TCP - port: {{ .Values.tgi.service.port }} - targetPort: 80 - type: {{ .Values.tgi.service.type }} -{{- end }} diff --git a/helm/h2ogpt-chart/templates/validations.yaml b/helm/h2ogpt-chart/templates/validators.yaml similarity index 55% rename from helm/h2ogpt-chart/templates/validations.yaml rename to helm/h2ogpt-chart/templates/validators.yaml index ce4e264fd..b97d33e5c 100644 --- a/helm/h2ogpt-chart/templates/validations.yaml +++ b/helm/h2ogpt-chart/templates/validators.yaml @@ -1,6 +1,3 @@ -{{- if and .Values.vllm.enabled .Values.tgi.enabled }} - {{- fail "Both TGI and vLLM cannot be enabled at the same time. Enable only one and try again" }} -{{- end }} {{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }} {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }} {{- end }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 6bcfb292b..78b79d159 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -140,7 +140,7 @@ agents: # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` enabled: false autoscaling: - # Enable autoscaling for agents + # Enable autoscaling (HPA) for agents enabled: false minReplicas: 1 maxReplicas: 2 @@ -239,48 +239,6 @@ agents: podAnnotations: {} podLabels: {} -tgi: - # -- Enable tgi - enabled: false - replicaCount: 1 - - image: - repository: ghcr.io/huggingface/text-generation-inference - tag: 0.9.3 - pullPolicy: IfNotPresent - # -- Set hostname and zone to true for pod affinity rules based on hostname and zone. - podAffinity: - # hostname: - # zone: - - storage: - size: 512Gi - class: - useEphemeral: true - - overrideConfig: - hfSecret: - containerArgs: - - service: - type: ClusterIP - port: 8080 - - updateStrategy: - type: RollingUpdate - - podSecurityContext: - securityContext: - - resources: - nodeSelector: - tolerations: - - env: {} - - podAnnotations: {} - podLabels: {} - vllm: # -- Enable vllm enabled: false From 86fae3c50fd18db3d5376c264edf4d54dfe32970 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Tue, 5 Nov 2024 21:09:57 +0530 Subject: [PATCH 31/34] Change overrideConfig passing method --- helm/h2ogpt-chart/README.md | 62 ++----- helm/h2ogpt-chart/templates/_helpers.tpl | 129 ++++++++++++-- .../templates/agents-configmap.yaml | 18 +- .../templates/h2ogpt-configmap.yaml | 18 +- .../templates/h2ogpt-deployment.yaml | 2 +- .../templates/h2ogpt-service.yaml | 2 +- helm/h2ogpt-chart/values.yaml | 168 +++++++++--------- 7 files changed, 250 insertions(+), 149 deletions(-) diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md index 2f41f4291..bd4dedfb8 100644 --- a/helm/h2ogpt-chart/README.md +++ b/helm/h2ogpt-chart/README.md @@ -8,6 +8,8 @@ A Helm chart for h2oGPT | Key | Type | Default | Description | |-----|------|---------|-------------| +| agents.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. | +| agents.agent_workers | int | `5` | | | agents.autoscaling.enabled | bool | `false` | | | agents.autoscaling.maxReplicas | int | `2` | | | agents.autoscaling.minReplicas | int | `1` | | @@ -24,26 +26,8 @@ A Helm chart for h2oGPT | agents.initImage.pullPolicy | string | `nil` | | | agents.initImage.repository | string | `nil` | | | agents.initImage.tag | string | `nil` | | -| agents.nodeSelector | string | `nil` | | -| agents.overrideConfig.agent_workers | int | `5` | | -| agents.overrideConfig.concurrency_count | int | `100` | | -| agents.overrideConfig.embedding_gpu_id | string | `"cpu"` | | -| agents.overrideConfig.enable_stt | bool | `false` | | -| agents.overrideConfig.enable_transcriptions | bool | `false` | | -| agents.overrideConfig.enable_tts | bool | `false` | | -| agents.overrideConfig.enforce_h2ogpt_api_key | bool | `true` | | -| agents.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` | | -| agents.overrideConfig.hf_embedding_model | string | `"fake"` | | -| agents.overrideConfig.metadata_in_context | string | `""` | | -| agents.overrideConfig.num_async | int | `10` | | -| agents.overrideConfig.rotate_align_resize_image | bool | `false` | | -| agents.overrideConfig.score_model | string | `"None"` | | -| agents.overrideConfig.share | bool | `false` | | -| agents.overrideConfig.top_k_docs_max_show | int | `100` | | -| agents.overrideConfig.visible_hosts_tab | bool | `false` | | -| agents.overrideConfig.visible_login_tab | bool | `false` | | -| agents.overrideConfig.visible_models_tab | bool | `false` | | -| agents.overrideConfig.visible_system_tab | bool | `false` | | +| agents.nodeSelector | object | `{}` | Node selector for the agents pods. | +| agents.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} | | agents.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | | agents.podAnnotations | object | `{}` | | | agents.podLabels | object | `{}` | | @@ -52,7 +36,9 @@ A Helm chart for h2oGPT | agents.podSecurityContext.runAsNonRoot | bool | `true` | | | agents.podSecurityContext.runAsUser | string | `nil` | | | agents.replicaCount | int | `1` | | +| agents.resources.limits."nvidia.com/gpu" | int | `1` | | | agents.resources.limits.memory | string | `"64Gi"` | | +| agents.resources.requests."nvidia.com/gpu" | int | `1` | | | agents.resources.requests.memory | string | `"32Gi"` | | | agents.securityContext.allowPrivilegeEscalation | bool | `false` | | | agents.securityContext.capabilities.drop[0] | string | `"ALL"` | | @@ -64,7 +50,7 @@ A Helm chart for h2oGPT | agents.storage.class | string | `nil` | | | agents.storage.size | string | `"128Gi"` | | | agents.storage.useEphemeral | bool | `true` | | -| agents.tolerations | string | `nil` | | +| agents.tolerations | list | `[]` | Node taints to tolerate by the agents pods. | | agents.updateStrategy.type | string | `"RollingUpdate"` | | | caCertificates | string | `""` | CA certs | | fullnameOverride | string | `""` | | @@ -74,8 +60,9 @@ A Helm chart for h2oGPT | global.visionModels.enabled | bool | `false` | Enable vision models | | global.visionModels.rotateAlignResizeImage | bool | `false` | | | global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] | -| h2ogpt.agents | object | `{"agent_workers":5,"enabled":false}` | Enable agents | -| h2ogpt.agents.enabled | bool | `false` | Run agents with h2oGPT container | +| h2ogpt.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. | +| h2ogpt.agents | object | `{"agent_workers":5,"enabled":true}` | Enable agents | +| h2ogpt.agents.enabled | bool | `true` | Run agents with h2oGPT container | | h2ogpt.enabled | bool | `true` | Enable h2oGPT | | h2ogpt.env | object | `{}` | | | h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts | @@ -87,27 +74,10 @@ A Helm chart for h2oGPT | h2ogpt.initImage.pullPolicy | string | `nil` | | | h2ogpt.initImage.repository | string | `nil` | | | h2ogpt.initImage.tag | string | `nil` | | -| h2ogpt.nodeSelector | string | `nil` | | -| h2ogpt.overrideConfig.concurrency_count | int | `100` | | -| h2ogpt.overrideConfig.embedding_gpu_id | string | `"cpu"` | | -| h2ogpt.overrideConfig.enable_stt | bool | `false` | | -| h2ogpt.overrideConfig.enable_transcriptions | bool | `false` | | -| h2ogpt.overrideConfig.enable_tts | bool | `false` | | -| h2ogpt.overrideConfig.enforce_h2ogpt_api_key | bool | `true` | | -| h2ogpt.overrideConfig.enforce_h2ogpt_ui_key | bool | `false` | | -| h2ogpt.overrideConfig.hf_embedding_model | string | `"fake"` | | -| h2ogpt.overrideConfig.metadata_in_context | string | `""` | | -| h2ogpt.overrideConfig.num_async | int | `10` | | -| h2ogpt.overrideConfig.openai_server | bool | `true` | | -| h2ogpt.overrideConfig.openai_workers | int | `5` | | -| h2ogpt.overrideConfig.rotate_align_resize_image | bool | `false` | | -| h2ogpt.overrideConfig.score_model | string | `"None"` | | -| h2ogpt.overrideConfig.share | bool | `false` | | -| h2ogpt.overrideConfig.top_k_docs_max_show | int | `100` | | -| h2ogpt.overrideConfig.visible_hosts_tab | bool | `false` | | -| h2ogpt.overrideConfig.visible_login_tab | bool | `false` | | -| h2ogpt.overrideConfig.visible_models_tab | bool | `false` | | -| h2ogpt.overrideConfig.visible_system_tab | bool | `false` | | +| h2ogpt.nodeSelector | object | `{}` | Node selector for the h2ogpt pods. | +| h2ogpt.openai.enabled | bool | `true` | | +| h2ogpt.openai.openai_workers | int | `5` | | +| h2ogpt.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} | | h2ogpt.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | | h2ogpt.podAnnotations | object | `{}` | | | h2ogpt.podLabels | object | `{}` | | @@ -116,7 +86,9 @@ A Helm chart for h2oGPT | h2ogpt.podSecurityContext.runAsNonRoot | bool | `true` | | | h2ogpt.podSecurityContext.runAsUser | string | `nil` | | | h2ogpt.replicaCount | int | `1` | | +| h2ogpt.resources.limits."nvidia.com/gpu" | int | `0` | | | h2ogpt.resources.limits.memory | string | `"64Gi"` | | +| h2ogpt.resources.requests."nvidia.com/gpu" | int | `0` | | | h2ogpt.resources.requests.memory | string | `"32Gi"` | | | h2ogpt.securityContext.allowPrivilegeEscalation | bool | `false` | | | h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` | | @@ -131,7 +103,7 @@ A Helm chart for h2oGPT | h2ogpt.storage.class | string | `nil` | | | h2ogpt.storage.size | string | `"128Gi"` | | | h2ogpt.storage.useEphemeral | bool | `true` | | -| h2ogpt.tolerations | string | `nil` | | +| h2ogpt.tolerations | list | `[]` | Node taints to tolerate by the h2ogpt pods. | | h2ogpt.updateStrategy.type | string | `"RollingUpdate"` | | | nameOverride | string | `""` | | | namespaceOverride | string | `""` | | diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl index 9688e8e4f..77163b4a6 100644 --- a/helm/h2ogpt-chart/templates/_helpers.tpl +++ b/helm/h2ogpt-chart/templates/_helpers.tpl @@ -70,26 +70,129 @@ Create the name of the service account to use {{- end }} {{/* -Configs for agents server +Config for h2oGPT */}} -{{- define "agents.overrideConfig" -}} -agent_server: True -agent_port: "5004" +{{- define "h2ogpt.config" -}} +{{- with .Values.h2ogpt }} +verbose: {{ default "True" .overrideConfig.verbose }} +{{- if .overrideConfig.heap_app_id }} +heap_app_id: {{ .overrideConfig.heap_app_id }} +{{- end }} +num_async: {{ default 10 .overrideConfig.num_async }} +save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }} +score_model: {{ default "None" .overrideConfig.score_model }} +share: {{ default "False" .overrideConfig.share }} +enforce_h2ogpt_api_key: {{ default "False" .overrideConfig.enforce_h2ogpt_api_key }} +enforce_h2ogpt_ui_key: {{ default "False" .overrideConfig.enforce_h2ogpt_ui_key }} +{{- if .overrideConfig.h2ogpt_api_keys }} +h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }} +{{- end }} +{{- if .overrideConfig.use_auth_token }} +use_auth_token: {{ .overrideConfig.use_auth_token }} +{{- end }} +visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }} +visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }} +top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }} +{{- if .overrideConfig.admin_pass }} +admin_pass: {{ .overrideConfig.admin_pass }} +{{- end }} +{{- if .openai.enabled }} +openai_server: "True" +openai_port: 5000 +openai_workers: {{ default 5 .openai.openai_workers }} +{{- end }} +{{- if .agents.enabled }} +agent_server: "True" +agent_port: 5004 +agent_workers: {{ .agents.agent_workers }} +{{- end }} +function_server: {{ default "True" .overrideConfig.function_server }} +function_port: 5002 +function_server_workers: {{ default 1 .overrideConfig.function_server_workers }} +multiple_workers_gunicorn: {{ default "True" .overrideConfig.multiple_workers_gunicorn }} +llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }} +enable_llava: {{ default "True" .overrideConfig.enable_llava }} +{{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }} +enable_tts: {{ default "False" .overrideConfig.enable_tts }} +enable_stt: {{ default "True" .overrideConfig.enable_stt }} +enable_transcriptions: {{ default "True" .overrideConfig.enable_transcriptions }} +asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }} +pre_load_embedding_model: {{ default "True" .overrideConfig.pre_load_embedding_model }} +pre_load_image_audio_models: {{ default "True" .overrideConfig.pre_load_image_audio_models }} +cut_distance: {{ default 10000 .overrideConfig.cut_distance }} +hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }} +enable_captions: {{ default "False" .overrideConfig.enable_captions }} +enable_doctr: {{ default "True" .overrideConfig.enable_doctr }} +{{- else }} +enable_tts: {{ default "False" .overrideConfig.enable_tts }} +enable_stt: {{ default "False" .overrideConfig.enable_stt }} +enable_transcriptions: {{ default "False" .overrideConfig.enable_transcriptions }} +embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }} +hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }} +pre_load_embedding_model: {{ default "False" .overrideConfig.pre_load_embedding_model }} +pre_load_image_audio_models: {{ default "False" .overrideConfig.pre_load_image_audio_models }} +enable_captions: {{ default "False" .overrideConfig.enable_captions }} +enable_doctr: {{ default "False" .overrideConfig.enable_doctr }} +{{- end }} +{{- end }} {{- end }} {{/* -Configs for agents with h2ogpt +Config for agents */}} -{{- define "h2ogpt.overrideConfig" -}} -{{- if .Values.h2ogpt.agents.enabled }} -agent_server: True -agent_port: "5004" -multiple_workers_gunicorn: True -agent_workers: {{ .Values.h2ogpt.agents.agent_workers}} +{{- define "agents.config" -}} +{{- with .Values.agents }} +verbose: {{ default "True" .overrideConfig.verbose }} +{{- if .overrideConfig.heap_app_id }} +heap_app_id: {{ .overrideConfig.heap_app_id }} +{{- end }} +num_async: {{ default 10 .overrideConfig.num_async }} +save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }} +score_model: {{ default "None" .overrideConfig.score_model }} +share: {{ default "False" .overrideConfig.share }} +enforce_h2ogpt_api_key: {{ default "False" .overrideConfig.enforce_h2ogpt_api_key }} +enforce_h2ogpt_ui_key: {{ default "False" .overrideConfig.enforce_h2ogpt_ui_key }} +{{- if .overrideConfig.h2ogpt_api_keys }} +h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }} +{{- end }} +{{- if .overrideConfig.use_auth_token }} +use_auth_token: {{ .overrideConfig.use_auth_token }} +{{- end }} +visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }} +visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }} +top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }} +{{- if .overrideConfig.admin_pass }} +admin_pass: {{ .overrideConfig.admin_pass }} +{{- end }} +agent_server: "True" +agent_port: 5004 +agent_workers: {{ default 5 .agent_workers }} +multiple_workers_gunicorn: {{ default "True" .overrideConfig.multiple_workers_gunicorn }} +llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }} +enable_llava: {{ default "True" .overrideConfig.enable_llava }} +{{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }} +enable_tts: {{ default "False" .overrideConfig.enable_tts }} +enable_stt: {{ default "True" .overrideConfig.enable_stt }} +enable_transcriptions: {{ default "True" .overrideConfig.enable_transcriptions }} +asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }} +pre_load_embedding_model: {{ default "True" .overrideConfig.pre_load_embedding_model }} +pre_load_image_audio_models: {{ default "True" .overrideConfig.pre_load_image_audio_models }} +cut_distance: {{ default 10000 .overrideConfig.cut_distance }} +hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }} +enable_captions: {{ default "False" .overrideConfig.enable_captions }} +enable_doctr: {{ default "True" .overrideConfig.enable_doctr }} {{- else }} -agents_server: False +enable_tts: {{ default "False" .overrideConfig.enable_tts }} +enable_stt: {{ default "False" .overrideConfig.enable_stt }} +enable_transcriptions: {{ default "False" .overrideConfig.enable_transcriptions }} +embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }} +hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }} +pre_load_embedding_model: {{ default "False" .overrideConfig.pre_load_embedding_model }} +pre_load_image_audio_models: {{ default "False" .overrideConfig.pre_load_image_audio_models }} +enable_captions: {{ default "False" .overrideConfig.enable_captions }} +enable_doctr: {{ default "False" .overrideConfig.enable_doctr }} {{- end }} - {{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/templates/agents-configmap.yaml b/helm/h2ogpt-chart/templates/agents-configmap.yaml index e242dee69..2f293cd2f 100644 --- a/helm/h2ogpt-chart/templates/agents-configmap.yaml +++ b/helm/h2ogpt-chart/templates/agents-configmap.yaml @@ -7,10 +7,20 @@ metadata: labels: {{- include "h2ogpt.labels" . | nindent 4 }} data: -{{- range $key, $value := ( include "agents.overrideConfig" . | fromYaml ) }} - {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- range $key, $value := ( include "agents.config" . | fromYaml ) }} +{{- /* convert boolean value to cli compatiblity */}} + {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} + {{- else }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} + {{- end }} {{- end }} -{{- range $key, $value := .Values.agents.overrideConfig }} - {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- range $key, $value := ( .Values.agents.additionalConfig ) }} +{{- /* convert boolean value to cli compatiblity */}} + {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} + {{- else }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} + {{- end }} {{- end }} {{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml index 902705552..ceb8a18d9 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-configmap.yaml @@ -7,10 +7,20 @@ metadata: labels: {{- include "h2ogpt.labels" . | nindent 4 }} data: -{{- range $key, $value := ( include "h2ogpt.overrideConfig" . | fromYaml ) }} - {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- range $key, $value := ( include "h2ogpt.config" . | fromYaml ) }} +{{- /* convert boolean value to cli compatiblity */}} + {{- if or ( eq "true" ($value | toString)) ( eq "false" ($value | toString)) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} + {{- else }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} + {{- end }} {{- end }} -{{- range $key, $value := .Values.h2ogpt.overrideConfig }} - {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} +{{- range $key, $value := ( .Values.h2ogpt.additionalConfig ) }} +{{- /* convert boolean value to cli compatiblity */}} + {{- if or ( eq "true" ($value | toString)) ( eq "false" ($value | toString)) }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} + {{- else }} + {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} + {{- end }} {{- end }} {{- end }} diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index 741390cd7..bac71f22d 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -96,7 +96,7 @@ spec: - name: http containerPort: 7860 protocol: TCP - {{- if .Values.h2ogpt.overrideConfig.openai_server }} + {{- if .Values.h2ogpt.openai.enabled }} - name: openai containerPort: 5000 protocol: TCP diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml index a41364864..747aed223 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml @@ -17,7 +17,7 @@ spec: protocol: TCP port: {{ .Values.h2ogpt.service.webPort }} targetPort: 7860 - {{- if .Values.h2ogpt.overrideConfig.openai_server }} + {{- if .Values.h2ogpt.openai.enabled }} - name: openai protocol: TCP port: {{ .Values.h2ogpt.service.openaiPort }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 78b79d159..95b1d67f1 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -27,8 +27,11 @@ h2ogpt: # -- Enable agents agents: # -- Run agents with h2oGPT container - enabled: false + enabled: true agent_workers: 5 + openai: + enabled: true + openai_workers: 5 replicaCount: 1 imagePullSecrets: image: @@ -52,50 +55,42 @@ h2ogpt: class: useEphemeral: true -# -- Example configs to use when not using Model Lock and External LLM - # overrideConfig: - # base_model: h2oai/h2ogpt-4096-llama2-7b-chat - # use_safetensors: True - # prompt_type: llama2 - # save_dir: /workspace/save/ - # use_gpu_id: False - # score_model: None - # max_max_new_tokens: 2048 - # max_new_tokens: 1024 - - overrideConfig: - visible_login_tab: False - visible_system_tab: False - visible_models_tab: False - visible_hosts_tab: False - # change below to valid vision model or remove this entry - #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']" - rotate_align_resize_image: False - concurrency_count: 100 - top_k_docs_max_show: 100 - num_async: 10 - # change below to valid directory or remove this entry - #save_dir: "/docker_logs" - score_model: "None" - enable_tts: False - enable_stt: False - enable_transcriptions: False - embedding_gpu_id: "cpu" - hf_embedding_model: "fake" - openai_server: True - openai_workers: 5 - share: False - enforce_h2ogpt_api_key: True - enforce_h2ogpt_ui_key: False - # change to something secure for ui access to backend - #h2ogpt_api_keys: "['api_key_change_me']" - metadata_in_context: "" - # change or remove if using model hub - #use_auth_token: "hf_xxxxx" - # change below to first visible model or remove this entry - #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']" - # change so ui or api cannot access without this password - #admin_pass: "admin_password_change_me" + # -- Defaults configs are set internally with recommended values. Set values if you really need to change. Make sure to quote boolean values ex: "True","true","false","False". + # -- Supported configs are commented. If you don't pass any value, keep {} + overrideConfig: {} +# verbose: +# heap_app_id: +# num_async: +# save_dir: +# score_model: +# share: +# enforce_h2ogpt_api_key: +# enforce_h2ogpt_ui_key: +# h2ogpt_api_keys: +# use_auth_token: +# visible_models: +# visible_vision_models: +# top_k_docs_max_show: +# admin_pass: +# function_server: +# function_server_workers: +# multiple_workers_gunicorn: +# llava_model: +# enable_llava: +# enable_tts: +# enable_stt: +# enable_transcriptions: +# asr_model: +# pre_load_embedding_model: +# pre_load_image_audio_models: +# cut_distance: +# hf_embedding_model: +# enable_captions: +# enable_doctr: +# embedding_gpu_id: + + # -- You can pass additional config here if overrideConfig does not have it. + additionalConfig: {} service: type: NodePort @@ -126,10 +121,14 @@ h2ogpt: resources: requests: memory: 32Gi + nvidia.com/gpu: 0 limits: memory: 64Gi - nodeSelector: - tolerations: + nvidia.com/gpu: 0 + # -- Node taints to tolerate by the h2ogpt pods. + tolerations: [] + # -- Node selector for the h2ogpt pods. + nodeSelector: {} env: {} @@ -139,6 +138,7 @@ h2ogpt: agents: # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` enabled: false + agent_workers: 5 autoscaling: # Enable autoscaling (HPA) for agents enabled: false @@ -170,38 +170,40 @@ agents: class: useEphemeral: true - overrideConfig: - agent_workers: 5 - visible_login_tab: False - visible_system_tab: False - visible_models_tab: False - visible_hosts_tab: False - # change below to valid vision model or remove this entry - #visible_vision_models: "['OpenGVLab/InternVL-Chat-V1-5']" - rotate_align_resize_image: False - concurrency_count: 100 - top_k_docs_max_show: 100 - num_async: 10 - # change below to valid directory or remove this entry - #save_dir: "/docker_logs" - score_model: "None" - enable_tts: False - enable_stt: False - enable_transcriptions: False - embedding_gpu_id: "cpu" - hf_embedding_model: "fake" - share: False - enforce_h2ogpt_api_key: True - enforce_h2ogpt_ui_key: False - # change to something secure for ui access to backend - #h2ogpt_api_keys: "['api_key_change_me']" - metadata_in_context: "" - # change or remove if using model hub - #use_auth_token: "hf_xxxxx" - # change below to first visible model or remove this entry - #visible_models: "['mistralai/Mistral-7B-Instruct-v0.3']" - # change so ui or api cannot access without this password - #admin_pass: "admin_password_change_me" + # -- Defaults configs are set internally with recommended values. Set values if you really need to change. Make sure to quote boolean values ex: "True","true","false","False". + # -- Supported configs are commented. If you don't pass any value, keep {} + overrideConfig: { } +# verbose: +# heap_app_id: +# num_async: +# save_dir: +# score_model: +# share: +# enforce_h2ogpt_api_key: +# enforce_h2ogpt_ui_key: +# h2ogpt_api_keys: +# use_auth_token: +# visible_models: +# visible_vision_models: +# top_k_docs_max_show: +# admin_pass: +# multiple_workers_gunicorn: +# llava_model: +# enable_llava: +# enable_tts: +# enable_stt: +# enable_transcriptions: +# asr_model: +# pre_load_embedding_model: +# pre_load_image_audio_models: +# cut_distance: +# hf_embedding_model: +# enable_captions: +# enable_doctr: +# embedding_gpu_id: + + # -- You can pass additional config here if overrideConfig does not have it. + additionalConfig: { } service: type: NodePort @@ -229,10 +231,14 @@ agents: resources: requests: memory: 32Gi + nvidia.com/gpu: 1 limits: memory: 64Gi - nodeSelector: - tolerations: + nvidia.com/gpu: 1 + # -- Node taints to tolerate by the agents pods. + tolerations: [] + # -- Node selector for the agents pods. + nodeSelector: {} env: {} From 9818a0481663a6199586a05339fa4363e5d3e5be Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 6 Nov 2024 00:35:41 +0530 Subject: [PATCH 32/34] Fix boolean values quote issue --- helm/h2ogpt-chart/templates/_helpers.tpl | 86 ++++++++++++------------ helm/h2ogpt-chart/values.yaml | 10 ++- 2 files changed, 47 insertions(+), 49 deletions(-) diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl index 77163b4a6..69ae9ae93 100644 --- a/helm/h2ogpt-chart/templates/_helpers.tpl +++ b/helm/h2ogpt-chart/templates/_helpers.tpl @@ -75,16 +75,16 @@ Config for h2oGPT {{- define "h2ogpt.config" -}} {{- with .Values.h2ogpt }} -verbose: {{ default "True" .overrideConfig.verbose }} +verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }} {{- if .overrideConfig.heap_app_id }} heap_app_id: {{ .overrideConfig.heap_app_id }} {{- end }} num_async: {{ default 10 .overrideConfig.num_async }} save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }} score_model: {{ default "None" .overrideConfig.score_model }} -share: {{ default "False" .overrideConfig.share }} -enforce_h2ogpt_api_key: {{ default "False" .overrideConfig.enforce_h2ogpt_api_key }} -enforce_h2ogpt_ui_key: {{ default "False" .overrideConfig.enforce_h2ogpt_ui_key }} +share: {{ default "False" (.overrideConfig.share | quote ) }} +enforce_h2ogpt_api_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_api_key | quote ) }} +enforce_h2ogpt_ui_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_ui_key | quote ) }} {{- if .overrideConfig.h2ogpt_api_keys }} h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }} {{- end }} @@ -92,7 +92,7 @@ h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }} use_auth_token: {{ .overrideConfig.use_auth_token }} {{- end }} visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }} -visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }} +{{/*visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}*/}} top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }} {{- if .overrideConfig.admin_pass }} admin_pass: {{ .overrideConfig.admin_pass }} @@ -107,33 +107,33 @@ agent_server: "True" agent_port: 5004 agent_workers: {{ .agents.agent_workers }} {{- end }} -function_server: {{ default "True" .overrideConfig.function_server }} +function_server: {{ default "True" ( .overrideConfig.function_server | quote ) }} function_port: 5002 function_server_workers: {{ default 1 .overrideConfig.function_server_workers }} -multiple_workers_gunicorn: {{ default "True" .overrideConfig.multiple_workers_gunicorn }} +multiple_workers_gunicorn: {{ default "True" ( .overrideConfig.multiple_workers_gunicorn | quote ) }} llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }} -enable_llava: {{ default "True" .overrideConfig.enable_llava }} +enable_llava: {{ default "True" ( .overrideConfig.enable_llava | quote ) }} {{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }} -enable_tts: {{ default "False" .overrideConfig.enable_tts }} -enable_stt: {{ default "True" .overrideConfig.enable_stt }} -enable_transcriptions: {{ default "True" .overrideConfig.enable_transcriptions }} +enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }} +enable_stt: {{ default "True" ( .overrideConfig.enable_stt | quote ) }} +enable_transcriptions: {{ default "True" ( .overrideConfig.enable_transcriptions | quote ) }} asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }} -pre_load_embedding_model: {{ default "True" .overrideConfig.pre_load_embedding_model }} -pre_load_image_audio_models: {{ default "True" .overrideConfig.pre_load_image_audio_models }} +pre_load_embedding_model: {{ default "True" (.overrideConfig.pre_load_embedding_model | quote ) }} +pre_load_image_audio_models: {{ default "True" ( .overrideConfig.pre_load_image_audio_models | quote ) }} cut_distance: {{ default 10000 .overrideConfig.cut_distance }} hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }} -enable_captions: {{ default "False" .overrideConfig.enable_captions }} -enable_doctr: {{ default "True" .overrideConfig.enable_doctr }} +enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }} +enable_doctr: {{ default "True" ( .overrideConfig.enable_doctr | quote ) }} {{- else }} -enable_tts: {{ default "False" .overrideConfig.enable_tts }} -enable_stt: {{ default "False" .overrideConfig.enable_stt }} -enable_transcriptions: {{ default "False" .overrideConfig.enable_transcriptions }} +enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }} +enable_stt: {{ default "False" ( .overrideConfig.enable_stt | quote ) }} +enable_transcriptions: {{ default "False" ( .overrideConfig.enable_transcriptions | quote ) }} embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }} hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }} -pre_load_embedding_model: {{ default "False" .overrideConfig.pre_load_embedding_model }} -pre_load_image_audio_models: {{ default "False" .overrideConfig.pre_load_image_audio_models }} -enable_captions: {{ default "False" .overrideConfig.enable_captions }} -enable_doctr: {{ default "False" .overrideConfig.enable_doctr }} +pre_load_embedding_model: {{ default "False" ( .overrideConfig.pre_load_embedding_model | quote ) }} +pre_load_image_audio_models: {{ default "False" ( .overrideConfig.pre_load_image_audio_models | quote ) }} +enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }} +enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }} {{- end }} {{- end }} {{- end }} @@ -144,16 +144,16 @@ Config for agents {{- define "agents.config" -}} {{- with .Values.agents }} -verbose: {{ default "True" .overrideConfig.verbose }} +verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }} {{- if .overrideConfig.heap_app_id }} heap_app_id: {{ .overrideConfig.heap_app_id }} {{- end }} num_async: {{ default 10 .overrideConfig.num_async }} save_dir: {{ default "/docker_logs" .overrideConfig.save_dir }} score_model: {{ default "None" .overrideConfig.score_model }} -share: {{ default "False" .overrideConfig.share }} -enforce_h2ogpt_api_key: {{ default "False" .overrideConfig.enforce_h2ogpt_api_key }} -enforce_h2ogpt_ui_key: {{ default "False" .overrideConfig.enforce_h2ogpt_ui_key }} +share: {{ default "False" (.overrideConfig.share | quote ) }} +enforce_h2ogpt_api_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_api_key | quote ) }} +enforce_h2ogpt_ui_key: {{ default "False" ( .overrideConfig.enforce_h2ogpt_ui_key | quote ) }} {{- if .overrideConfig.h2ogpt_api_keys }} h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }} {{- end }} @@ -161,7 +161,7 @@ h2ogpt_api_keys: {{ .overrideConfig.h2ogpt_api_keys }} use_auth_token: {{ .overrideConfig.use_auth_token }} {{- end }} visible_models: {{ default "['meta-llama/Meta-Llama-3.1-8B-Instruct']" .overrideConfig.visible_models }} -visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }} +{{/*visible_vision_models: {{ default "['mistralai/Pixtral-12B-2409']" .overrideConfig.visible_vision_models }}*/}} top_k_docs_max_show: {{ default 100 .overrideConfig.top_k_docs_max_show }} {{- if .overrideConfig.admin_pass }} admin_pass: {{ .overrideConfig.admin_pass }} @@ -169,30 +169,30 @@ admin_pass: {{ .overrideConfig.admin_pass }} agent_server: "True" agent_port: 5004 agent_workers: {{ default 5 .agent_workers }} -multiple_workers_gunicorn: {{ default "True" .overrideConfig.multiple_workers_gunicorn }} +multiple_workers_gunicorn: {{ default "True" ( .overrideConfig.multiple_workers_gunicorn | quote ) }} llava_model: {{ default "openai:mistralai/Pixtral-12B-2409" .overrideConfig.llava_model }} -enable_llava: {{ default "True" .overrideConfig.enable_llava }} +enable_llava: {{ default "True" ( .overrideConfig.enable_llava | quote ) }} {{- if ge (int (index .resources.requests "nvidia.com/gpu") ) (int 1) }} -enable_tts: {{ default "False" .overrideConfig.enable_tts }} -enable_stt: {{ default "True" .overrideConfig.enable_stt }} -enable_transcriptions: {{ default "True" .overrideConfig.enable_transcriptions }} +enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }} +enable_stt: {{ default "True" ( .overrideConfig.enable_stt | quote ) }} +enable_transcriptions: {{ default "True" ( .overrideConfig.enable_transcriptions | quote ) }} asr_model: {{ default "distil-whisper/distil-large-v3" .overrideConfig.asr_model }} -pre_load_embedding_model: {{ default "True" .overrideConfig.pre_load_embedding_model }} -pre_load_image_audio_models: {{ default "True" .overrideConfig.pre_load_image_audio_models }} +pre_load_embedding_model: {{ default "True" (.overrideConfig.pre_load_embedding_model | quote ) }} +pre_load_image_audio_models: {{ default "True" ( .overrideConfig.pre_load_image_audio_models | quote ) }} cut_distance: {{ default 10000 .overrideConfig.cut_distance }} hf_embedding_model: {{ default "BAAI/bge-large-en-v1.5" .overrideConfig.hf_embedding_model }} -enable_captions: {{ default "False" .overrideConfig.enable_captions }} -enable_doctr: {{ default "True" .overrideConfig.enable_doctr }} +enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }} +enable_doctr: {{ default "True" ( .overrideConfig.enable_doctr | quote ) }} {{- else }} -enable_tts: {{ default "False" .overrideConfig.enable_tts }} -enable_stt: {{ default "False" .overrideConfig.enable_stt }} -enable_transcriptions: {{ default "False" .overrideConfig.enable_transcriptions }} +enable_tts: {{ default "False" ( .overrideConfig.enable_tts | quote ) }} +enable_stt: {{ default "False" ( .overrideConfig.enable_stt | quote ) }} +enable_transcriptions: {{ default "False" ( .overrideConfig.enable_transcriptions | quote ) }} embedding_gpu_id: {{ default "cpu" .overrideConfig.embedding_gpu_id }} hf_embedding_model: {{ default "fake" .overrideConfig.hf_embedding_model }} -pre_load_embedding_model: {{ default "False" .overrideConfig.pre_load_embedding_model }} -pre_load_image_audio_models: {{ default "False" .overrideConfig.pre_load_image_audio_models }} -enable_captions: {{ default "False" .overrideConfig.enable_captions }} -enable_doctr: {{ default "False" .overrideConfig.enable_doctr }} +pre_load_embedding_model: {{ default "False" ( .overrideConfig.pre_load_embedding_model | quote ) }} +pre_load_image_audio_models: {{ default "False" ( .overrideConfig.pre_load_image_audio_models | quote ) }} +enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) }} +enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }} {{- end }} {{- end }} {{- end }} \ No newline at end of file diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 95b1d67f1..6d8183e36 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -55,7 +55,7 @@ h2ogpt: class: useEphemeral: true - # -- Defaults configs are set internally with recommended values. Set values if you really need to change. Make sure to quote boolean values ex: "True","true","false","False". + # -- Defaults configs are set internally with recommended values. Set values if you really need to change. # -- Supported configs are commented. If you don't pass any value, keep {} overrideConfig: {} # verbose: @@ -69,7 +69,6 @@ h2ogpt: # h2ogpt_api_keys: # use_auth_token: # visible_models: -# visible_vision_models: # top_k_docs_max_show: # admin_pass: # function_server: @@ -170,9 +169,9 @@ agents: class: useEphemeral: true - # -- Defaults configs are set internally with recommended values. Set values if you really need to change. Make sure to quote boolean values ex: "True","true","false","False". + # -- Defaults configs are set internally with recommended values. Set values if you really need to change. # -- Supported configs are commented. If you don't pass any value, keep {} - overrideConfig: { } + overrideConfig: {} # verbose: # heap_app_id: # num_async: @@ -184,7 +183,6 @@ agents: # h2ogpt_api_keys: # use_auth_token: # visible_models: -# visible_vision_models: # top_k_docs_max_show: # admin_pass: # multiple_workers_gunicorn: @@ -203,7 +201,7 @@ agents: # embedding_gpu_id: # -- You can pass additional config here if overrideConfig does not have it. - additionalConfig: { } + additionalConfig: {} service: type: NodePort From 72c5859cad6aa7022bf2164bbf185a1ce6209916 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 6 Nov 2024 15:19:19 +0530 Subject: [PATCH 33/34] Add new line --- helm/h2ogpt-chart/templates/_helpers.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl index 69ae9ae93..26661a337 100644 --- a/helm/h2ogpt-chart/templates/_helpers.tpl +++ b/helm/h2ogpt-chart/templates/_helpers.tpl @@ -195,4 +195,4 @@ enable_captions: {{ default "False" ( .overrideConfig.enable_captions | quote ) enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }} {{- end }} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} From 36ca38ef005282e59587baa4c3425eb75db1b702 Mon Sep 17 00:00:00 2001 From: Lakindu Date: Wed, 6 Nov 2024 15:30:33 +0530 Subject: [PATCH 34/34] Rename agents to agent --- helm/h2ogpt-chart/README.md | 96 +++++++++---------- helm/h2ogpt-chart/templates/_helpers.tpl | 10 +- .../templates/agents-configmap.yaml | 8 +- .../templates/agents-deployment.yaml | 76 +++++++-------- helm/h2ogpt-chart/templates/agents-hpa.yaml | 18 ++-- helm/h2ogpt-chart/templates/agents-pvc.yaml | 8 +- .../templates/agents-service.yaml | 12 +-- .../global-external-llm-secrets.yaml | 2 +- .../templates/h2ogpt-deployment.yaml | 2 +- .../templates/h2ogpt-service.yaml | 4 +- helm/h2ogpt-chart/templates/validators.yaml | 4 +- helm/h2ogpt-chart/values.yaml | 26 ++--- 12 files changed, 133 insertions(+), 133 deletions(-) diff --git a/helm/h2ogpt-chart/README.md b/helm/h2ogpt-chart/README.md index bd4dedfb8..b4b6bc94b 100644 --- a/helm/h2ogpt-chart/README.md +++ b/helm/h2ogpt-chart/README.md @@ -8,61 +8,61 @@ A Helm chart for h2oGPT | Key | Type | Default | Description | |-----|------|---------|-------------| -| agents.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. | -| agents.agent_workers | int | `5` | | -| agents.autoscaling.enabled | bool | `false` | | -| agents.autoscaling.maxReplicas | int | `2` | | -| agents.autoscaling.minReplicas | int | `1` | | -| agents.autoscaling.targetCPU | int | `80` | | -| agents.autoscaling.targetMemory | string | `"32Gi"` | | -| agents.enabled | bool | `false` | Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` | -| agents.env | object | `{}` | | -| agents.extraVolumeMounts | list | `[]` | Extra volume mounts | -| agents.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs | -| agents.image.pullPolicy | string | `"IfNotPresent"` | | -| agents.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` | | -| agents.image.tag | string | `nil` | | -| agents.imagePullSecrets | string | `nil` | | -| agents.initImage.pullPolicy | string | `nil` | | -| agents.initImage.repository | string | `nil` | | -| agents.initImage.tag | string | `nil` | | -| agents.nodeSelector | object | `{}` | Node selector for the agents pods. | -| agents.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} | -| agents.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | -| agents.podAnnotations | object | `{}` | | -| agents.podLabels | object | `{}` | | -| agents.podSecurityContext.fsGroup | string | `nil` | | -| agents.podSecurityContext.runAsGroup | string | `nil` | | -| agents.podSecurityContext.runAsNonRoot | bool | `true` | | -| agents.podSecurityContext.runAsUser | string | `nil` | | -| agents.replicaCount | int | `1` | | -| agents.resources.limits."nvidia.com/gpu" | int | `1` | | -| agents.resources.limits.memory | string | `"64Gi"` | | -| agents.resources.requests."nvidia.com/gpu" | int | `1` | | -| agents.resources.requests.memory | string | `"32Gi"` | | -| agents.securityContext.allowPrivilegeEscalation | bool | `false` | | -| agents.securityContext.capabilities.drop[0] | string | `"ALL"` | | -| agents.securityContext.runAsNonRoot | bool | `true` | | -| agents.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | -| agents.service.agentsPort | int | `5004` | | -| agents.service.annotations | object | `{}` | | -| agents.service.type | string | `"NodePort"` | | -| agents.storage.class | string | `nil` | | -| agents.storage.size | string | `"128Gi"` | | -| agents.storage.useEphemeral | bool | `true` | | -| agents.tolerations | list | `[]` | Node taints to tolerate by the agents pods. | -| agents.updateStrategy.type | string | `"RollingUpdate"` | | +| agent.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. | +| agent.agent_workers | int | `5` | | +| agent.autoscaling.enabled | bool | `false` | | +| agent.autoscaling.maxReplicas | int | `2` | | +| agent.autoscaling.minReplicas | int | `1` | | +| agent.autoscaling.targetCPU | int | `80` | | +| agent.autoscaling.targetMemory | string | `"32Gi"` | | +| agent.enabled | bool | `true` | Enable agent, this must be `false` if `h2ogpt.agent.enabled` is `true` | +| agent.env | object | `{}` | | +| agent.extraVolumeMounts | list | `[]` | Extra volume mounts | +| agent.extraVolumes | list | `[]` | Extra volumes, for more certs, mount under /etc/ssl/more-certs | +| agent.image.pullPolicy | string | `"IfNotPresent"` | | +| agent.image.repository | string | `"gcr.io/vorvan/h2oai/h2ogpt-runtime"` | | +| agent.image.tag | string | `nil` | | +| agent.imagePullSecrets | string | `nil` | | +| agent.initImage.pullPolicy | string | `nil` | | +| agent.initImage.repository | string | `nil` | | +| agent.initImage.tag | string | `nil` | | +| agent.nodeSelector | object | `{}` | Node selector for the agent pods. | +| agent.overrideConfig | object | `{}` | Supported configs are commented. If you don't pass any value, keep {} | +| agent.podAffinity | string | `nil` | Set hostname and zone to true for pod affinity rules based on hostname and zone. | +| agent.podAnnotations | object | `{}` | | +| agent.podLabels | object | `{}` | | +| agent.podSecurityContext.fsGroup | string | `nil` | | +| agent.podSecurityContext.runAsGroup | string | `nil` | | +| agent.podSecurityContext.runAsNonRoot | bool | `true` | | +| agent.podSecurityContext.runAsUser | string | `nil` | | +| agent.replicaCount | int | `1` | | +| agent.resources.limits."nvidia.com/gpu" | int | `1` | | +| agent.resources.limits.memory | string | `"64Gi"` | | +| agent.resources.requests."nvidia.com/gpu" | int | `1` | | +| agent.resources.requests.memory | string | `"32Gi"` | | +| agent.securityContext.allowPrivilegeEscalation | bool | `false` | | +| agent.securityContext.capabilities.drop[0] | string | `"ALL"` | | +| agent.securityContext.runAsNonRoot | bool | `true` | | +| agent.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | +| agent.service.agentPort | int | `5004` | | +| agent.service.annotations | object | `{}` | | +| agent.service.type | string | `"NodePort"` | | +| agent.storage.class | string | `nil` | | +| agent.storage.size | string | `"128Gi"` | | +| agent.storage.useEphemeral | bool | `true` | | +| agent.tolerations | list | `[]` | Node taints to tolerate by the agent pods. | +| agent.updateStrategy.type | string | `"RollingUpdate"` | | | caCertificates | string | `""` | CA certs | | fullnameOverride | string | `""` | | | global.externalLLM.enabled | bool | `false` | | | global.externalLLM.modelLock | string | `nil` | | -| global.externalLLM.secret | object | `{}` | list of secrets for h2ogpt and agents env | +| global.externalLLM.secret | object | `{}` | list of secrets for h2ogpt and agent env | | global.visionModels.enabled | bool | `false` | Enable vision models | | global.visionModels.rotateAlignResizeImage | bool | `false` | | | global.visionModels.visibleModels | list | `[]` | Visible vision models, the vision model itslef needs to be set via modeLock or base_model. Ex: visibleModels: ['OpenGVLab/InternVL-Chat-V1-5'] | | h2ogpt.additionalConfig | object | `{}` | You can pass additional config here if overrideConfig does not have it. | -| h2ogpt.agents | object | `{"agent_workers":5,"enabled":true}` | Enable agents | -| h2ogpt.agents.enabled | bool | `true` | Run agents with h2oGPT container | +| h2ogpt.agent | object | `{"agent_workers":5,"enabled":false}` | Enable agent | +| h2ogpt.agent.enabled | bool | `false` | Run agent with h2oGPT container | | h2ogpt.enabled | bool | `true` | Enable h2oGPT | | h2ogpt.env | object | `{}` | | | h2ogpt.extraVolumeMounts | list | `[]` | Extra volume mounts | @@ -94,7 +94,7 @@ A Helm chart for h2oGPT | h2ogpt.securityContext.capabilities.drop[0] | string | `"ALL"` | | | h2ogpt.securityContext.runAsNonRoot | bool | `true` | | | h2ogpt.securityContext.seccompProfile.type | string | `"RuntimeDefault"` | | -| h2ogpt.service.agentsPort | int | `5004` | | +| h2ogpt.service.agentPort | int | `5004` | | | h2ogpt.service.functionPort | int | `5002` | | | h2ogpt.service.openaiPort | int | `5000` | | | h2ogpt.service.type | string | `"NodePort"` | | diff --git a/helm/h2ogpt-chart/templates/_helpers.tpl b/helm/h2ogpt-chart/templates/_helpers.tpl index 26661a337..61e2168dd 100644 --- a/helm/h2ogpt-chart/templates/_helpers.tpl +++ b/helm/h2ogpt-chart/templates/_helpers.tpl @@ -102,10 +102,10 @@ openai_server: "True" openai_port: 5000 openai_workers: {{ default 5 .openai.openai_workers }} {{- end }} -{{- if .agents.enabled }} +{{- if .agent.enabled }} agent_server: "True" agent_port: 5004 -agent_workers: {{ .agents.agent_workers }} +agent_workers: {{ .agent.agent_workers }} {{- end }} function_server: {{ default "True" ( .overrideConfig.function_server | quote ) }} function_port: 5002 @@ -139,11 +139,11 @@ enable_doctr: {{ default "False" ( .overrideConfig.enable_doctr | quote ) }} {{- end }} {{/* -Config for agents +Config for agent */}} -{{- define "agents.config" -}} -{{- with .Values.agents }} +{{- define "agent.config" -}} +{{- with .Values.agent }} verbose: {{ default "True" ( .overrideConfig.verbose | quote ) }} {{- if .overrideConfig.heap_app_id }} heap_app_id: {{ .overrideConfig.heap_app_id }} diff --git a/helm/h2ogpt-chart/templates/agents-configmap.yaml b/helm/h2ogpt-chart/templates/agents-configmap.yaml index 2f293cd2f..b6fa6e51e 100644 --- a/helm/h2ogpt-chart/templates/agents-configmap.yaml +++ b/helm/h2ogpt-chart/templates/agents-configmap.yaml @@ -1,13 +1,13 @@ -{{- if .Values.agents.enabled }} +{{- if .Values.agent.enabled }} apiVersion: v1 kind: ConfigMap metadata: - name: {{ include "h2ogpt.fullname" . }}-agents-config + name: {{ include "h2ogpt.fullname" . }}-agent-config namespace: {{ include "h2ogpt.namespace" . | quote }} labels: {{- include "h2ogpt.labels" . | nindent 4 }} data: -{{- range $key, $value := ( include "agents.config" . | fromYaml ) }} +{{- range $key, $value := ( include "agent.config" . | fromYaml ) }} {{- /* convert boolean value to cli compatiblity */}} {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }} {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} @@ -15,7 +15,7 @@ data: {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote }} {{- end }} {{- end }} -{{- range $key, $value := ( .Values.agents.additionalConfig ) }} +{{- range $key, $value := ( .Values.agent.additionalConfig ) }} {{- /* convert boolean value to cli compatiblity */}} {{- if or ( eq "true" ( $value | toString )) ( eq "false" ( $value | toString )) }} {{ printf "H2OGPT_%s" $key | upper }}: {{ $value | quote | title }} diff --git a/helm/h2ogpt-chart/templates/agents-deployment.yaml b/helm/h2ogpt-chart/templates/agents-deployment.yaml index dcdda700a..ac737a792 100644 --- a/helm/h2ogpt-chart/templates/agents-deployment.yaml +++ b/helm/h2ogpt-chart/templates/agents-deployment.yaml @@ -1,45 +1,45 @@ -{{- if .Values.agents.enabled }} +{{- if .Values.agent.enabled }} apiVersion: apps/v1 kind: Deployment metadata: - name: {{ include "h2ogpt.fullname" . }}-agents + name: {{ include "h2ogpt.fullname" . }}-agent namespace: {{ include "h2ogpt.namespace" . | quote }} labels: - app: {{ include "h2ogpt.fullname" . }}-agents + app: {{ include "h2ogpt.fullname" . }}-agent spec: - replicas: {{ .Values.agents.replicaCount }} + replicas: {{ .Values.agent.replicaCount }} selector: matchLabels: - app: {{ include "h2ogpt.fullname" . }}-agents - {{- if .Values.agents.updateStrategy }} - strategy: {{- toYaml .Values.agents.updateStrategy | nindent 4 }} + app: {{ include "h2ogpt.fullname" . }}-agent + {{- if .Values.agent.updateStrategy }} + strategy: {{- toYaml .Values.agent.updateStrategy | nindent 4 }} {{- end }} template: metadata: - {{- with .Values.agents.podAnnotations }} + {{- with .Values.agent.podAnnotations }} annotations: {{- toYaml . | nindent 8 }} {{- end }} labels: - app: {{ include "h2ogpt.fullname" . }}-agents - {{- with .Values.agents.podLabels }} + app: {{ include "h2ogpt.fullname" . }}-agent + {{- with .Values.agent.podLabels }} {{ toYaml . | nindent 8 }} {{- end }} spec: - {{- with .Values.agents.nodeSelector }} + {{- with .Values.agent.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.agents.tolerations }} + {{- with .Values.agent.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} securityContext: - {{- toYaml .Values.agents.podSecurityContext | nindent 8 }} + {{- toYaml .Values.agent.podSecurityContext | nindent 8 }} affinity: - {{- if .Values.agents.podAffinity }} + {{- if .Values.agent.podAffinity }} podAntiAffinity: - {{- if .Values.agents.podAffinity.hostname }} + {{- if .Values.agent.podAffinity.hostname }} requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: @@ -49,7 +49,7 @@ spec: - {{ include "h2ogpt.fullname" . }} topologyKey: kubernetes.io/hostname {{- end }} - {{- if .Values.agents.podAffinity.zone }} + {{- if .Values.agent.podAffinity.zone }} preferredDuringSchedulingIgnoredDuringExecution: - weight: 100 podAffinityTerm: @@ -62,19 +62,19 @@ spec: topologyKey: failure-domain.beta.kubernetes.io/zone {{- end }} {{- end }} - {{- with .Values.agents.extraAffinity }} + {{- with .Values.agent.extraAffinity }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.agents.imagePullSecrets }} + {{- with .Values.agent.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} containers: - - name: {{ include "h2ogpt.fullname" . }}-agents + - name: {{ include "h2ogpt.fullname" . }}-agent securityContext: - {{- toYaml .Values.agents.securityContext | nindent 12 }} - image: "{{ .Values.agents.image.repository }}:{{ .Values.agents.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.agents.image.pullPolicy }} + {{- toYaml .Values.agent.securityContext | nindent 12 }} + image: "{{ .Values.agent.image.repository }}:{{ .Values.agent.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.agent.image.pullPolicy }} command: ["/bin/bash", "-c"] args: - > @@ -83,33 +83,33 @@ spec: - name: agent containerPort: 5004 protocol: TCP - {{- if .Values.agents.livenessProbe }} + {{- if .Values.agent.livenessProbe }} livenessProbe: httpGet: path: / scheme: HTTP port: http - {{- toYaml .Values.agents.livenessProbe | nindent 12 }} + {{- toYaml .Values.agent.livenessProbe | nindent 12 }} {{- end }} - {{- if .Values.agents.readinessProbe }} + {{- if .Values.agent.readinessProbe }} readinessProbe: httpGet: path: / scheme: HTTP port: http - {{- toYaml .Values.agents.readinessProbe | nindent 12 }} + {{- toYaml .Values.agent.readinessProbe | nindent 12 }} {{- end }} resources: - {{- toYaml .Values.agents.resources | nindent 12 }} + {{- toYaml .Values.agent.resources | nindent 12 }} envFrom: - configMapRef: - name: {{ include "h2ogpt.fullname" . }}-agents-config + name: {{ include "h2ogpt.fullname" . }}-agent-config {{- if .Values.global.externalLLM.enabled }} - secretRef: name: {{ include "h2ogpt.fullname" . }}-external-llm-secret {{- end }} env: - {{- range $key, $value := .Values.agents.env }} + {{- range $key, $value := .Values.agent.env }} - name: "{{ $key }}" value: "{{ $value }}" {{- end }} @@ -126,10 +126,10 @@ spec: value: {{ .Values.global.visionModels.rotateAlignResizeImage | quote }} {{- end }} volumeMounts: - - name: {{ include "h2ogpt.fullname" . }}-agents-volume + - name: {{ include "h2ogpt.fullname" . }}-agent-volume mountPath: /workspace/.cache subPath: cache - - name: {{ include "h2ogpt.fullname" . }}-agents-volume + - name: {{ include "h2ogpt.fullname" . }}-agent-volume mountPath: /workspace/save subPath: save {{- if .Values.caCertificates }} @@ -137,14 +137,14 @@ spec: mountPath: /etc/ssl/certs/root-ca-bundle.crt subPath: root-ca-bundle.crt {{- end }} - {{ with .Values.agents.extraVolumeMounts }} + {{ with .Values.agent.extraVolumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} volumes: - - name: {{ include "h2ogpt.fullname" . }}-agents-volume - {{- if not .Values.agents.storage.useEphemeral }} + - name: {{ include "h2ogpt.fullname" . }}-agent-volume + {{- if not .Values.agent.storage.useEphemeral }} persistentVolumeClaim: - claimName: {{ include "h2ogpt.fullname" . }}-agents-volume + claimName: {{ include "h2ogpt.fullname" . }}-agent-volume {{- else}} ephemeral: volumeClaimTemplate: @@ -153,15 +153,15 @@ spec: - ReadWriteOnce resources: requests: - storage: {{ .Values.agents.storage.size | quote }} - storageClassName: {{ .Values.agents.storage.class }} + storage: {{ .Values.agent.storage.size | quote }} + storageClassName: {{ .Values.agent.storage.class }} {{- end }} {{- if .Values.caCertificates }} - name: ca-certificates configMap: name: {{ include "h2ogpt.fullname" . }}-ca-certificates {{- end }} - {{- with .Values.agents.extraVolumes }} + {{- with .Values.agent.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} {{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-hpa.yaml b/helm/h2ogpt-chart/templates/agents-hpa.yaml index f0c796484..5cf083bbb 100644 --- a/helm/h2ogpt-chart/templates/agents-hpa.yaml +++ b/helm/h2ogpt-chart/templates/agents-hpa.yaml @@ -1,8 +1,8 @@ -{{- if .Values.agents.autoscaling.enabled | default false }} +{{- if .Values.agent.autoscaling.enabled | default false }} apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: - name: {{ .Release.Name }}-agents + name: {{ .Release.Name }}-agent namespace: {{ include "h2ogpt.namespace" . | quote }} labels: {{- include "h2ogpt.labels" . | nindent 4 }} @@ -10,24 +10,24 @@ spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment - name: {{ include "h2ogpt.fullname" . }}-agents - minReplicas: {{ .Values.agents.autoscaling.minReplicas }} - maxReplicas: {{ .Values.agents.autoscaling.maxReplicas }} + name: {{ include "h2ogpt.fullname" . }}-agent + minReplicas: {{ .Values.agent.autoscaling.minReplicas }} + maxReplicas: {{ .Values.agent.autoscaling.maxReplicas }} metrics: - {{- if .Values.agents.autoscaling.targetCPU }} + {{- if .Values.agent.autoscaling.targetCPU }} - type: Resource resource: name: cpu target: type: Utilization - averageUtilization: {{ .Values.agents.autoscaling.targetCPU }} + averageUtilization: {{ .Values.agent.autoscaling.targetCPU }} {{- end }} - {{- if .Values.agents.autoscaling.targetMemory }} + {{- if .Values.agent.autoscaling.targetMemory }} - type: Resource resource: name: memory target: type: Utilization - averageUtilization: {{ .Values.agents.autoscaling.targetMemory }} + averageUtilization: {{ .Values.agent.autoscaling.targetMemory }} {{- end }} {{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-pvc.yaml b/helm/h2ogpt-chart/templates/agents-pvc.yaml index 2165fab9d..2ac48c921 100644 --- a/helm/h2ogpt-chart/templates/agents-pvc.yaml +++ b/helm/h2ogpt-chart/templates/agents-pvc.yaml @@ -1,14 +1,14 @@ -{{- if and (.Values.agents.enabled) (not .Values.agents.storage.useEphemeral) }} +{{- if and (.Values.agent.enabled) (not .Values.agent.storage.useEphemeral) }} apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: {{ include "h2ogpt.fullname" . }}-agents-volume + name: {{ include "h2ogpt.fullname" . }}-agent-volume namespace: {{ include "h2ogpt.namespace" . | quote }} spec: accessModes: - ReadWriteOnce - storageClassName: {{ .Values.agents.storage.class }} + storageClassName: {{ .Values.agent.storage.class }} resources: requests: - storage: {{ .Values.agents.storage.size | quote }} + storage: {{ .Values.agent.storage.size | quote }} {{- end }} diff --git a/helm/h2ogpt-chart/templates/agents-service.yaml b/helm/h2ogpt-chart/templates/agents-service.yaml index d39cad58e..6b0653555 100644 --- a/helm/h2ogpt-chart/templates/agents-service.yaml +++ b/helm/h2ogpt-chart/templates/agents-service.yaml @@ -1,21 +1,21 @@ -{{- if .Values.agents.enabled }} +{{- if .Values.agent.enabled }} apiVersion: v1 kind: Service metadata: - name: {{ include "h2ogpt.fullname" . }}-agents + name: {{ include "h2ogpt.fullname" . }}-agent namespace: {{ include "h2ogpt.namespace" . | quote }} - {{- with .Values.agents.service.annotations }} + {{- with .Values.agent.service.annotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} spec: selector: - app: {{ include "h2ogpt.fullname" . }}-agents + app: {{ include "h2ogpt.fullname" . }}-agent ports: - name: agent protocol: TCP - port: {{ .Values.agents.service.agentsPort }} + port: {{ .Values.agent.service.agentPort }} targetPort: 5004 - type: {{ .Values.agents.service.type }} + type: {{ .Values.agent.service.type }} {{- end }} diff --git a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml index 6c6f5b74e..044d9eeae 100644 --- a/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml +++ b/helm/h2ogpt-chart/templates/global-external-llm-secrets.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.global.externalLLM.enabled (or .Values.agents.enabled .Values.h2ogpt.enabled) }} +{{- if and .Values.global.externalLLM.enabled (or .Values.agent.enabled .Values.h2ogpt.enabled) }} apiVersion: v1 kind: Secret metadata: diff --git a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml index bac71f22d..4d1f74a70 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-deployment.yaml @@ -104,7 +104,7 @@ spec: - name: function containerPort: 5002 protocol: TCP - {{- if .Values.h2ogpt.agents.enabled }} + {{- if .Values.h2ogpt.agent.enabled }} - name: agent containerPort: 5004 protocol: TCP diff --git a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml index 747aed223..7e9f13bb9 100644 --- a/helm/h2ogpt-chart/templates/h2ogpt-service.yaml +++ b/helm/h2ogpt-chart/templates/h2ogpt-service.yaml @@ -27,10 +27,10 @@ spec: protocol: TCP port: {{ .Values.h2ogpt.service.functionPort }} targetPort: 5002 - {{- if .Values.h2ogpt.agents.enabled }} + {{- if .Values.h2ogpt.agent.enabled }} - name: agent protocol: TCP - port: {{ .Values.h2ogpt.service.agentsPort }} + port: {{ .Values.h2ogpt.service.agentPort }} targetPort: 5004 {{- end }} type: {{ .Values.h2ogpt.service.type }} diff --git a/helm/h2ogpt-chart/templates/validators.yaml b/helm/h2ogpt-chart/templates/validators.yaml index b97d33e5c..49fb1532b 100644 --- a/helm/h2ogpt-chart/templates/validators.yaml +++ b/helm/h2ogpt-chart/templates/validators.yaml @@ -1,3 +1,3 @@ -{{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agents.enabled) .Values.agents.enabled }} - {{- fail " Both agents in both h2ogpt.agents cannot be enabled. Enably only one and try again" }} +{{- if and ( and .Values.h2ogpt.enabled .Values.h2ogpt.agent.enabled) .Values.agent.enabled }} + {{- fail " Both agent and h2ogpt.agent cannot be enabled. Enably only one and try again" }} {{- end }} diff --git a/helm/h2ogpt-chart/values.yaml b/helm/h2ogpt-chart/values.yaml index 6d8183e36..7b7644dca 100644 --- a/helm/h2ogpt-chart/values.yaml +++ b/helm/h2ogpt-chart/values.yaml @@ -5,7 +5,7 @@ namespaceOverride: "" global: externalLLM: enabled: false - # -- list of secrets for h2ogpt and agents env + # -- list of secrets for h2ogpt and agent env secret: {} # OPENAI_AZURE_KEY: "value" # OPENAI_AZURE_API_BASE: "value" @@ -24,10 +24,10 @@ global: h2ogpt: # -- Enable h2oGPT enabled: true - # -- Enable agents - agents: - # -- Run agents with h2oGPT container - enabled: true + # -- Enable agent + agent: + # -- Run agent with h2oGPT container + enabled: false agent_workers: 5 openai: enabled: true @@ -96,7 +96,7 @@ h2ogpt: webPort: 80 openaiPort: 5000 functionPort: 5002 - agentsPort: 5004 + agentPort: 5004 webServiceAnnotations: {} updateStrategy: @@ -134,12 +134,12 @@ h2ogpt: podAnnotations: {} podLabels: {} -agents: - # -- Enable agents, this must be `false` if `h2ogpt.agents.enabled` is `true` - enabled: false +agent: + # -- Enable agent, this must be `false` if `h2ogpt.agent.enabled` is `true` + enabled: true agent_workers: 5 autoscaling: - # Enable autoscaling (HPA) for agents + # Enable autoscaling (HPA) for agent enabled: false minReplicas: 1 maxReplicas: 2 @@ -205,7 +205,7 @@ agents: service: type: NodePort - agentsPort: 5004 + agentPort: 5004 annotations: {} updateStrategy: @@ -233,9 +233,9 @@ agents: limits: memory: 64Gi nvidia.com/gpu: 1 - # -- Node taints to tolerate by the agents pods. + # -- Node taints to tolerate by the agent pods. tolerations: [] - # -- Node selector for the agents pods. + # -- Node selector for the agent pods. nodeSelector: {} env: {}