Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Kepler to gather energy usage data #530

Merged
merged 2 commits into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/configs/ct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ chart-dirs:
- charts
chart-repos:
- grafana=https://grafana.github.io/helm-charts
- kepler=https://sustainable-computing-io.github.io/kepler-helm-chart
- opencost=https://opencost.github.io/opencost-helm-chart
- prometheus-community=https://prometheus-community.github.io/helm-charts
validate-chart-schema: true
Expand Down
33 changes: 33 additions & 0 deletions .github/configs/updatecli.d/kepler.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
name: Bump dependency "kepler" for Helm chart "k8s-monitoring"
sources:
kepler:
name: Get latest "kepler" Helm chart version
kind: helmchart
spec:
name: kepler
url: https://sustainable-computing-io.github.io/kepler-helm-chart
versionfilter:
kind: semver
pattern: '*'

conditions:
kepler:
name: Ensure Helm chart dependency "kepler" is specified
kind: yaml
spec:
file: charts/k8s-monitoring/Chart.yaml
key: $.dependencies[9].name
value: kepler
disablesourceinput: true

targets:
kepler:
name: Bump Helm chart dependency "kepler" for Helm chart "k8s-monitoring"
kind: helmchart
spec:
file: Chart.yaml
key: $.dependencies[9].version
name: charts/k8s-monitoring
versionincrement: none
sourceid: kepler
58 changes: 52 additions & 6 deletions .github/workflows/check-for-dependency-updates.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: Run Updatecli
id: update-alloy
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/alloy.yaml
updatecli apply --config "${UPDATECLI_CONFIG_DIR}/alloy.yaml"
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
Expand Down Expand Up @@ -62,6 +62,52 @@ jobs:
branch: chore/update-grafana-alloy
delete-branch: true

updateKepler:
name: Update Kepler
runs-on: "ubuntu-latest"
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install Updatecli
uses: updatecli/updatecli-action@v2

- name: Run Updatecli
id: update-kepler
run: |
updatecli apply --config "${UPDATECLI_CONFIG_DIR}/kepler.yaml"
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi

- name: Regenerate docs
if: steps.update-kepler.outputs.changed == 'true'
run: docker run --rm -v "$(pwd)/charts/k8s-monitoring:/helm-docs" -u "$(id -u)" jnorwood/helm-docs

- name: Install Helm
if: steps.update-kepler.outputs.changed == 'true'
uses: azure/setup-helm@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}

- name: Regenerate examples
if: steps.update-kepler.outputs.changed == 'true'
run: make regenerate-example-outputs

- name: Create pull request
if: steps.update-kepler.outputs.changed == 'true'
uses: peter-evans/create-pull-request@v6
with:
title: "[dependency] Update Kepler"
body: "Updates the Kepler subchart"
base: main
author: "${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>"
committer: "GitHub <[email protected]>"
commit-message: Update Kepler
labels: dependencies
branch: chore/update-kepler
delete-branch: true

updateKubeStateMetrics:
name: Update Kube State Metrics
runs-on: "ubuntu-latest"
Expand All @@ -75,7 +121,7 @@ jobs:
- name: Run Updatecli
id: update-kube-state-metrics
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/kube-state-metrics.yaml
updatecli apply --config "${UPDATECLI_CONFIG_DIR}/kube-state-metrics.yaml"
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
Expand Down Expand Up @@ -121,7 +167,7 @@ jobs:
- name: Run Updatecli
id: update-node-exporter
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/node-exporter.yaml
updatecli apply --config "${UPDATECLI_CONFIG_DIR}/node-exporter.yaml"
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
Expand Down Expand Up @@ -167,7 +213,7 @@ jobs:
- name: Run Updatecli
id: update-opencost
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/opencost.yaml
updatecli apply --config "${UPDATECLI_CONFIG_DIR}/opencost.yaml"
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
Expand Down Expand Up @@ -213,7 +259,7 @@ jobs:
- name: Run Updatecli
id: update-prometheus-operator-crds
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/prometheus-operator-crds.yaml
updatecli apply --config "${UPDATECLI_CONFIG_DIR}/prometheus-operator-crds.yaml"
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
Expand Down Expand Up @@ -259,7 +305,7 @@ jobs:
- name: Run Updatecli
id: update-windows-exporter
run: |
updatecli apply --config ${UPDATECLI_CONFIG_DIR}/windows-exporter.yaml
updatecli apply --config "${UPDATECLI_CONFIG_DIR}/windows-exporter.yaml"
if ! git diff --exit-code > /dev/null; then
echo "changed=true" >> "${GITHUB_OUTPUT}"
fi
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/helm-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ jobs:
run: |
cd source
helm repo add grafana https://grafana.github.io/helm-charts
helm repo add kepler https://sustainable-computing-io.github.io/kepler-helm-chart
helm repo add opencost https://opencost.github.io/opencost-helm-chart
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts

Expand Down
7 changes: 5 additions & 2 deletions charts/k8s-monitoring/Chart.lock
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,8 @@ dependencies:
- name: opencost
repository: https://opencost.github.io/opencost-helm-chart
version: 1.40.0
digest: sha256:18e00cfbd62c68c369271387dbc07ebe4213c7f43423e24b7f31356d8a8de562
generated: "2024-07-03T00:18:13.232705926Z"
- name: kepler
repository: https://sustainable-computing-io.github.io/kepler-helm-chart
version: 0.5.6
digest: sha256:cfc769362f7e209ec89d5581290af16f1a055215c88f0f8d9fd8f123f8ce9760
generated: "2024-07-03T12:32:42.263474-05:00"
4 changes: 4 additions & 0 deletions charts/k8s-monitoring/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ dependencies:
version: 1.40.0
repository: https://opencost.github.io/opencost-helm-chart
condition: opencost.enabled
- name: kepler
version: 0.5.6
repository: https://sustainable-computing-io.github.io/kepler-helm-chart
condition: kepler.enabled
1 change: 1 addition & 0 deletions charts/k8s-monitoring/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ values.schema.json: values.yaml schema-mods/enums-and-types.json schema-mods/req
| del(.properties["alloy-profiles"].properties.alloy) \
| del(.properties["alloy-profiles"].properties.controller) \
| del(.properties["alloy-profiles"].properties.crds) \
| del(.properties["kepler"].properties.canMount) \
| del(.properties["kube-state-metrics"].properties.autosharding) \
| del(.properties["kube-state-metrics"].properties.nodeSelector) \
| del(.properties["kube-state-metrics"].properties.prometheusScrape) \
Expand Down
30 changes: 28 additions & 2 deletions charts/k8s-monitoring/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
| https://prometheus-community.github.io/helm-charts | prometheus-node-exporter | 4.37.0 |
| https://prometheus-community.github.io/helm-charts | prometheus-operator-crds | 13.0.1 |
| https://prometheus-community.github.io/helm-charts | prometheus-windows-exporter | 0.3.1 |
| https://sustainable-computing-io.github.io/kepler-helm-chart | kepler | 0.5.6 |
<!-- markdownlint-enable no-bare-urls -->

## Values
Expand Down Expand Up @@ -305,13 +306,18 @@
|-----|------|---------|-------------|
| extraConfig | string | `""` | Extra configuration that will be added to the Grafana Alloy configuration file. This value is templated so that you can refer to other values from this file. This cannot be used to modify the generated configuration values, only append new components. See [Adding custom Flow configuration](#adding-custom-flow-configuration) for an example. |

### Chart
### Image Registry

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| extraObjects | list | `[]` | Deploy additional manifest objects |
| global.image.pullSecrets | list | `[]` | Optional set of global image pull secrets. |
| global.image.registry | string | `""` | Global image registry to use if it needs to be overridden for some specific use cases (e.g local registries, custom images, ...) |

### Chart

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| kepler.enabled | bool | `false` | Should this Helm chart deploy Kepler to the cluster. Set this to false if your cluster already has Kepler, or if you do not want to scrape metrics from Kepler. |
| kube-state-metrics.enabled | bool | `true` | Should this helm chart deploy Kube State Metrics to the cluster. Set this to false if your cluster already has Kube State Metrics, or if you do not want to scrape metrics from Kube State Metrics. |
| opencost.enabled | bool | `true` | Should this Helm chart deploy OpenCost to the cluster. Set this to false if your cluster already has OpenCost, or if you do not want to scrape metrics from OpenCost. |
| opencost.opencost.prometheus.existingSecretName | string | `"prometheus-k8s-monitoring"` | The name of the secret containing the username and password for the metrics service. This must be in the same namespace as the OpenCost deployment. |
Expand Down Expand Up @@ -483,6 +489,20 @@
| metrics.maxCacheSize | int | `100000` | Sets the max_cache_size for every prometheus.relabel component. ([docs](https://grafana.com/docs/alloy/latest/reference/components/prometheus.relabel/#arguments)) This should be at least 2x-5x your largest scrape target or samples appended rate. |
| metrics.scrapeInterval | string | `"60s"` | How frequently to scrape metrics |

### Metrics Job: Kepler

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| metrics.kepler.enabled | bool | `false` | Scrape energy metrics from Kepler |
| metrics.kepler.extraMetricRelabelingRules | string | `""` | Rule blocks to be added to the prometheus.relabel component for Kepler. ([docs](https://grafana.com/docs/alloy/latest/reference/components/prometheus.relabel/#rule-block)) These relabeling rules are applied post-scrape against the metrics returned from the scraped target, no __meta* labels are present. |
| metrics.kepler.extraRelabelingRules | string | `""` | Rule blocks to be added to the discovery.relabel component for Kepler. These relabeling rules are applied pre-scrape against the targets from service discovery. Before the scrape, any remaining target labels that start with __ (i.e. __meta_kubernetes*) are dropped. ([docs](https://grafana.com/docs/alloy/latest/reference/components/discovery.relabel/#rule-block)) |

Check failure on line 498 in charts/k8s-monitoring/README.md

View workflow job for this annotation

GitHub Actions / runner / markdownlint

[markdownlint] reported by reviewdog 🐶 MD037/no-space-in-emphasis Spaces inside emphasis markers [Context: "__ ("] Raw Output: charts/k8s-monitoring/README.md:498:281 MD037/no-space-in-emphasis Spaces inside emphasis markers [Context: "__ ("]

Check failure on line 498 in charts/k8s-monitoring/README.md

View workflow job for this annotation

GitHub Actions / runner / markdownlint

[markdownlint] reported by reviewdog 🐶 MD037/no-space-in-emphasis Spaces inside emphasis markers [Context: ". __"] Raw Output: charts/k8s-monitoring/README.md:498:288 MD037/no-space-in-emphasis Spaces inside emphasis markers [Context: ". __"]
| metrics.kepler.labelMatchers | object | `{"app.kubernetes.io/name":"kepler"}` | Label matchers used to select the Kepler pods |
| metrics.kepler.maxCacheSize | string | 100000 | Sets the max_cache_size for the prometheus.relabel component for Kepler. This should be at least 2x-5x your largest scrape target or samples appended rate. ([docs](https://grafana.com/docs/alloy/latest/reference/components/prometheus.relabel/#arguments)) Overrides metrics.maxCacheSize |
| metrics.kepler.metricsTuning.excludeMetrics | list | `[]` | Metrics to drop. Can use regex. |

Check failure on line 501 in charts/k8s-monitoring/README.md

View workflow job for this annotation

GitHub Actions / runner / textlint

[textlint] reported by reviewdog 🐶 Incorrect usage of the term: “regex”, use “regular expression” instead (terminology) Raw Output: /home/runner/work/k8s-monitoring-helm/k8s-monitoring-helm/charts/k8s-monitoring/README.md:501:88: error: Incorrect usage of the term: “regex”, use “regular expression” instead (terminology) (eslint.rules.terminology)
| metrics.kepler.metricsTuning.includeMetrics | list | `[]` | Metrics to keep. Can use regex. |

Check failure on line 502 in charts/k8s-monitoring/README.md

View workflow job for this annotation

GitHub Actions / runner / textlint

[textlint] reported by reviewdog 🐶 Incorrect usage of the term: “regex”, use “regular expression” instead (terminology) Raw Output: /home/runner/work/k8s-monitoring-helm/k8s-monitoring-helm/charts/k8s-monitoring/README.md:502:88: error: Incorrect usage of the term: “regex”, use “regular expression” instead (terminology) (eslint.rules.terminology)
| metrics.kepler.metricsTuning.useDefaultAllowList | bool | `true` | Filter the list of metrics from Kepler to the minimal set required for Kubernetes Monitoring. See [Metrics Tuning and Allow Lists](#metrics-tuning-and-allow-lists) |
| metrics.kepler.scrapeInterval | string | 60s | How frequently to scrape metrics from Kepler. Overrides metrics.scrapeInterval |

### Metrics Job: Kube State Metrics

| Key | Type | Default | Description |
Expand Down Expand Up @@ -758,6 +778,12 @@
| traces.receiver.filters | object | `{"span":[],"spanevent":[]}` | Apply a filter to traces received via the OTLP or OTLP HTTP receivers. ([docs](https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.filter/)) |
| traces.receiver.transforms | object | `{"resource":[],"span":[],"spanevent":[]}` | Apply a transformation to traces received via the OTLP or OTLP HTTP receivers. ([docs](https://grafana.com/docs/alloy/latest/reference/components/otelcol.processor.transform/)) |

### Other Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| extraObjects | list | `[]` | Deploy additional manifest objects |

## Customizing the configuration

There are several options for customizing the configuration generated by this chart. This can be used to add extra
Expand Down
Binary file added charts/k8s-monitoring/charts/kepler-0.5.6.tgz
Binary file not shown.
9 changes: 9 additions & 0 deletions charts/k8s-monitoring/ci/ci-integrations-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ metrics:
}
cost:
enabled: false
kepler:
enabled: true

extraConfig: |
remote.kubernetes.configmap "mysql_config" {
Expand Down Expand Up @@ -89,6 +91,10 @@ logs:
test:
attempts: 20
extraQueries:
# Check for Kepler metrics
- query: "kepler_container_joules_total{cluster=\"ci-integrations-cluster\"}"
type: promql

# Check for CertManager metrics, discovered by service annotations
- query: "certmanager_clock_time_seconds{cluster=\"ci-integrations-cluster\", job=\"integrations/cert-manager\"}"
type: promql
Expand Down Expand Up @@ -127,6 +133,9 @@ test:
opencost:
enabled: false

kepler:
enabled: true

alloy:
controller:
replicas: 2
Expand Down
3 changes: 3 additions & 0 deletions charts/k8s-monitoring/default_allow_lists/kepler.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
# The minimal set of metrics from Kepler required for Kubernetes Monitoring
- kepler_.*
4 changes: 4 additions & 0 deletions charts/k8s-monitoring/templates/_configs.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@
{{- include "alloy.config.opencost" . }}
{{- end }}

{{- if .Values.metrics.kepler.enabled }}
{{- include "alloy.config.kepler" . }}
{{- end }}

{{- if .Values.metrics.podMonitors.enabled }}
{{- include "alloy.config.pod_monitors" . }}
{{- end }}
Expand Down
4 changes: 3 additions & 1 deletion charts/k8s-monitoring/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
{{- if .Values.metrics.cadvisor.enabled -}}{{- $metrics = append $metrics "cadvisor" -}}{{- end -}}
{{- if .Values.metrics.apiserver.enabled -}}{{- $metrics = append $metrics "apiserver" }}{{ end -}}
{{- if .Values.metrics.cost.enabled -}}{{- $metrics = append $metrics "cost" }}{{ end -}}
{{- if .Values.metrics.kepler.enabled -}}{{- $metrics = append $metrics "kepler" }}{{ end -}}
{{- if .Values.extraConfig -}}{{- $metrics = append $metrics "extraConfig" }}{{ end -}}
{{- else -}}
{{- $metrics = append $metrics "disabled" -}}
Expand Down Expand Up @@ -56,7 +57,8 @@
{{- if index (index .Values "prometheus-node-exporter").enabled -}}{{- $deployments = append $deployments "prometheus-node-exporter" -}}{{- end -}}
{{- if index (index .Values "prometheus-windows-exporter").enabled -}}{{- $deployments = append $deployments "prometheus-windows-exporter" -}}{{- end -}}
{{- if index (index .Values "prometheus-operator-crds").enabled -}}{{- $deployments = append $deployments "prometheus-operator-crds" -}}{{- end -}}
{{- if index (index .Values "opencost").enabled -}}{{- $deployments = append $deployments "opencost" -}}{{- end -}}
{{- if index .Values.opencost.enabled -}}{{- $deployments = append $deployments "opencost" -}}{{- end -}}
{{- if index .Values.kepler.enabled -}}{{- $deployments = append $deployments "kepler" -}}{{- end -}}
{{- join "," $deployments -}}
{{- end }}

Expand Down
71 changes: 71 additions & 0 deletions charts/k8s-monitoring/templates/alloy_config/_kepler.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{{ define "alloy.config.kepler.allowList" }}
{{ if .Values.metrics.kepler.metricsTuning.useDefaultAllowList }}
{{ "default_allow_lists/kepler.yaml" | .Files.Get }}
{{ end }}
{{ if .Values.metrics.kepler.metricsTuning.includeMetrics }}
{{ .Values.metrics.kepler.metricsTuning.includeMetrics | toYaml }}
{{ end }}
{{ end }}

{{ define "alloy.config.kepler" }}
// Kepler
discovery.relabel "kepler" {
targets = discovery.kubernetes.pods.targets
{{- range $k, $v := .Values.metrics.kepler.labelMatchers }}
rule {
source_labels = ["__meta_kubernetes_pod_label_{{ include "escape_label" $k }}"]
regex = "{{ $v }}"
action = "keep"
}
{{- end }}
rule {
source_labels = ["__meta_kubernetes_pod_node_name"]
action = "replace"
target_label = "instance"
}
{{- if .Values.metrics.extraRelabelingRules }}
{{ .Values.metrics.extraRelabelingRules | indent 2 }}
{{- end }}
{{- if .Values.metrics.kepler.extraRelabelingRules }}
{{ .Values.metrics.kepler.extraRelabelingRules | indent 2 }}
{{- end }}
}

prometheus.scrape "kepler" {
targets = discovery.relabel.kepler.output
job_name = "integrations/kepler"
honor_labels = true
scrape_interval = {{ .Values.metrics.kepler.scrapeInterval | default .Values.metrics.scrapeInterval | quote }}
{{- if .Values.alloy.alloy.clustering.enabled }}
clustering {
enabled = true
}
{{- end }}
forward_to = [prometheus.relabel.kepler.receiver]
}

prometheus.relabel "kepler" {
max_cache_size = {{ .Values.metrics.kepler.maxCacheSize | default .Values.metrics.maxCacheSize | int }}
{{- if or .Values.metrics.kepler.metricsTuning.useDefaultAllowList .Values.metrics.kepler.metricsTuning.includeMetrics }}
rule {
source_labels = ["__name__"]
regex = "up|{{ join "|" (include "alloy.config.kepler.allowList" . | fromYamlArray) }}"
action = "keep"
}
{{- end }}
{{- if .Values.metrics.kepler.metricsTuning.excludeMetrics }}
rule {
source_labels = ["__name__"]
regex = {{ join "|" .Values.metrics.kepler.metricsTuning.excludeMetrics | quote }}
action = "drop"
}
{{- end }}
{{- if .Values.metrics.extraMetricRelabelingRules }}
{{ .Values.metrics.extraMetricRelabelingRules | indent 2 }}
{{- end }}
{{- if .Values.metrics.kepler.extraMetricRelabelingRules }}
{{ .Values.metrics.kepler.extraMetricRelabelingRules | indent 2 }}
{{- end }}
forward_to = [prometheus.relabel.metrics_service.receiver]
}
{{ end }}
Loading
Loading