From 19723cb1991685027882c24aec1f3597fd1bc856 Mon Sep 17 00:00:00 2001 From: Pete Wall Date: Wed, 8 Jan 2025 18:53:49 -0600 Subject: [PATCH] Add example and integration test to show sharded kube-state-metrics (#1076) * Add example and integration test to show sharded kube-state-metrics with multiple replicas Signed-off-by: Pete Wall * Update alloy modules Signed-off-by: Pete Wall * Update examples and test outputs Signed-off-by: Pete Wall * Silence srderr and increase to five minutes Signed-off-by: Pete Wall --------- Signed-off-by: Pete Wall --- .../kube-state-metrics/metrics.alloy | 8 +- .../docs/examples/auth/oauth2/output.yaml | 8 +- .../docs/examples/auth/sigv4/output.yaml | 8 +- .../examples/collector-storage/output.yaml | 8 +- .../destinations/otlp-gateway/output.yaml | 8 +- .../docs/examples/extra-rules/output.yaml | 8 +- .../control-plane-monitoring/output.yaml | 8 +- .../cluster-metrics/default/output.yaml | 8 +- .../docs/examples/meta-monitoring/output.yaml | 8 +- .../docs/examples/metrics-tuning/output.yaml | 8 +- .../examples/platforms/azure-aks/output.yaml | 8 +- .../platforms/eks-fargate/output.yaml | 8 +- .../platforms/gke-autopilot/output.yaml | 8 +- .../examples/platforms/openshift/output.yaml | 8 +- .../private-image-registries/output.yaml | 8 +- .../docs/examples/proxies/output.yaml | 8 +- .../{ => scalability}/autoscaling/README.md | 0 .../autoscaling/alloy-metrics.alloy | 0 .../autoscaling/description.txt | 0 .../{ => scalability}/autoscaling/output.yaml | 8 +- .../{ => scalability}/autoscaling/values.yaml | 0 .../sharded-kube-state-metrics/README.md | 27 + .../alloy-metrics.alloy | 310 ++ .../sharded-kube-state-metrics/output.yaml | 2821 +++++++++++++++ .../sharded-kube-state-metrics/values.yaml | 17 + .../cluster-monitoring/.rendered/output.yaml | 8 +- .../.rendered/output.yaml | 8 +- .../integration-grafana/.rendered/output.yaml | 8 +- .../integration-loki/.rendered/output.yaml | 8 +- .../.rendered/output.yaml | 8 +- .../.rendered/output.yaml | 3036 +++++++++++++++++ .../deployments/grafana.yaml | 39 + .../deployments/prometheus.yaml | 61 + .../deployments/query-test.yaml | 56 + .../kind-cluster-config.yaml | 7 + .../sharded-kube-state-metrics/values.yaml | 25 + .../eks-with-windows/.rendered/output.yaml | 8 +- .../gke-autopilot/.rendered/output.yaml | 8 +- .../k8s-monitoring/.rendered/output.yaml | 8 +- charts/k8s-monitoring/vendir.lock.yml | 6 +- scripts/run-cluster-test.sh | 8 +- 41 files changed, 6506 insertions(+), 107 deletions(-) rename charts/k8s-monitoring/docs/examples/{ => scalability}/autoscaling/README.md (100%) rename charts/k8s-monitoring/docs/examples/{ => scalability}/autoscaling/alloy-metrics.alloy (100%) rename charts/k8s-monitoring/docs/examples/{ => scalability}/autoscaling/description.txt (100%) rename charts/k8s-monitoring/docs/examples/{ => scalability}/autoscaling/output.yaml (99%) rename charts/k8s-monitoring/docs/examples/{ => scalability}/autoscaling/values.yaml (100%) create mode 100644 charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/README.md create mode 100644 charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/alloy-metrics.alloy create mode 100644 charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/output.yaml create mode 100644 charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/values.yaml create mode 100644 charts/k8s-monitoring/tests/integration/sharded-kube-state-metrics/.rendered/output.yaml create mode 100644 charts/k8s-monitoring/tests/integration/sharded-kube-state-metrics/deployments/grafana.yaml create mode 100644 charts/k8s-monitoring/tests/integration/sharded-kube-state-metrics/deployments/prometheus.yaml create mode 100644 charts/k8s-monitoring/tests/integration/sharded-kube-state-metrics/deployments/query-test.yaml create mode 100644 charts/k8s-monitoring/tests/integration/sharded-kube-state-metrics/kind-cluster-config.yaml create mode 100644 charts/k8s-monitoring/tests/integration/sharded-kube-state-metrics/values.yaml diff --git a/charts/k8s-monitoring/alloyModules/modules/kubernetes/kube-state-metrics/metrics.alloy b/charts/k8s-monitoring/alloyModules/modules/kubernetes/kube-state-metrics/metrics.alloy index f41ac1ed6..487e2969e 100644 --- a/charts/k8s-monitoring/alloyModules/modules/kubernetes/kube-state-metrics/metrics.alloy +++ b/charts/k8s-monitoring/alloyModules/modules/kubernetes/kube-state-metrics/metrics.alloy @@ -33,12 +33,12 @@ declare "kubernetes" { optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -54,7 +54,7 @@ declare "kubernetes" { // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/auth/oauth2/output.yaml b/charts/k8s-monitoring/docs/examples/auth/oauth2/output.yaml index af19e5762..bce31f886 100644 --- a/charts/k8s-monitoring/docs/examples/auth/oauth2/output.yaml +++ b/charts/k8s-monitoring/docs/examples/auth/oauth2/output.yaml @@ -2369,12 +2369,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -2390,7 +2390,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/auth/sigv4/output.yaml b/charts/k8s-monitoring/docs/examples/auth/sigv4/output.yaml index 370eb5477..f8166236e 100644 --- a/charts/k8s-monitoring/docs/examples/auth/sigv4/output.yaml +++ b/charts/k8s-monitoring/docs/examples/auth/sigv4/output.yaml @@ -1520,12 +1520,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1541,7 +1541,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/collector-storage/output.yaml b/charts/k8s-monitoring/docs/examples/collector-storage/output.yaml index 503503890..dd7f9812a 100644 --- a/charts/k8s-monitoring/docs/examples/collector-storage/output.yaml +++ b/charts/k8s-monitoring/docs/examples/collector-storage/output.yaml @@ -1685,12 +1685,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1706,7 +1706,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/destinations/otlp-gateway/output.yaml b/charts/k8s-monitoring/docs/examples/destinations/otlp-gateway/output.yaml index 6880ad4e7..26a31d96f 100644 --- a/charts/k8s-monitoring/docs/examples/destinations/otlp-gateway/output.yaml +++ b/charts/k8s-monitoring/docs/examples/destinations/otlp-gateway/output.yaml @@ -1771,12 +1771,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1792,7 +1792,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/extra-rules/output.yaml b/charts/k8s-monitoring/docs/examples/extra-rules/output.yaml index 0fe978e89..7b1ec2878 100644 --- a/charts/k8s-monitoring/docs/examples/extra-rules/output.yaml +++ b/charts/k8s-monitoring/docs/examples/extra-rules/output.yaml @@ -1913,12 +1913,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1934,7 +1934,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml index 2af5a62f7..70c69cb52 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml @@ -2256,12 +2256,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -2277,7 +2277,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/output.yaml b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/output.yaml index a4a07c9b1..55d607247 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/default/output.yaml @@ -1609,12 +1609,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1630,7 +1630,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml b/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml index edf9def1b..b52d984c9 100644 --- a/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml +++ b/charts/k8s-monitoring/docs/examples/meta-monitoring/output.yaml @@ -2438,12 +2438,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -2459,7 +2459,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/metrics-tuning/output.yaml b/charts/k8s-monitoring/docs/examples/metrics-tuning/output.yaml index 48e910b4c..1d0f04945 100644 --- a/charts/k8s-monitoring/docs/examples/metrics-tuning/output.yaml +++ b/charts/k8s-monitoring/docs/examples/metrics-tuning/output.yaml @@ -1718,12 +1718,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1739,7 +1739,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/platforms/azure-aks/output.yaml b/charts/k8s-monitoring/docs/examples/platforms/azure-aks/output.yaml index 3a63ee3f2..773ad359f 100644 --- a/charts/k8s-monitoring/docs/examples/platforms/azure-aks/output.yaml +++ b/charts/k8s-monitoring/docs/examples/platforms/azure-aks/output.yaml @@ -1854,12 +1854,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1875,7 +1875,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/platforms/eks-fargate/output.yaml b/charts/k8s-monitoring/docs/examples/platforms/eks-fargate/output.yaml index 73abf7e77..c4b07b707 100644 --- a/charts/k8s-monitoring/docs/examples/platforms/eks-fargate/output.yaml +++ b/charts/k8s-monitoring/docs/examples/platforms/eks-fargate/output.yaml @@ -1782,12 +1782,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1803,7 +1803,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/platforms/gke-autopilot/output.yaml b/charts/k8s-monitoring/docs/examples/platforms/gke-autopilot/output.yaml index 11ec31fd1..77b94a24d 100644 --- a/charts/k8s-monitoring/docs/examples/platforms/gke-autopilot/output.yaml +++ b/charts/k8s-monitoring/docs/examples/platforms/gke-autopilot/output.yaml @@ -1802,12 +1802,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1823,7 +1823,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/platforms/openshift/output.yaml b/charts/k8s-monitoring/docs/examples/platforms/openshift/output.yaml index ff6e72b09..3bb65d309 100644 --- a/charts/k8s-monitoring/docs/examples/platforms/openshift/output.yaml +++ b/charts/k8s-monitoring/docs/examples/platforms/openshift/output.yaml @@ -1908,12 +1908,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1929,7 +1929,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/private-image-registries/output.yaml b/charts/k8s-monitoring/docs/examples/private-image-registries/output.yaml index 2f58dcdf2..f09dd7f9d 100644 --- a/charts/k8s-monitoring/docs/examples/private-image-registries/output.yaml +++ b/charts/k8s-monitoring/docs/examples/private-image-registries/output.yaml @@ -1691,12 +1691,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1712,7 +1712,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/proxies/output.yaml b/charts/k8s-monitoring/docs/examples/proxies/output.yaml index ca4de0441..39266e9ff 100644 --- a/charts/k8s-monitoring/docs/examples/proxies/output.yaml +++ b/charts/k8s-monitoring/docs/examples/proxies/output.yaml @@ -2978,12 +2978,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -2999,7 +2999,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/autoscaling/README.md b/charts/k8s-monitoring/docs/examples/scalability/autoscaling/README.md similarity index 100% rename from charts/k8s-monitoring/docs/examples/autoscaling/README.md rename to charts/k8s-monitoring/docs/examples/scalability/autoscaling/README.md diff --git a/charts/k8s-monitoring/docs/examples/autoscaling/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/scalability/autoscaling/alloy-metrics.alloy similarity index 100% rename from charts/k8s-monitoring/docs/examples/autoscaling/alloy-metrics.alloy rename to charts/k8s-monitoring/docs/examples/scalability/autoscaling/alloy-metrics.alloy diff --git a/charts/k8s-monitoring/docs/examples/autoscaling/description.txt b/charts/k8s-monitoring/docs/examples/scalability/autoscaling/description.txt similarity index 100% rename from charts/k8s-monitoring/docs/examples/autoscaling/description.txt rename to charts/k8s-monitoring/docs/examples/scalability/autoscaling/description.txt diff --git a/charts/k8s-monitoring/docs/examples/autoscaling/output.yaml b/charts/k8s-monitoring/docs/examples/scalability/autoscaling/output.yaml similarity index 99% rename from charts/k8s-monitoring/docs/examples/autoscaling/output.yaml rename to charts/k8s-monitoring/docs/examples/scalability/autoscaling/output.yaml index ba0763724..5a7c258d1 100644 --- a/charts/k8s-monitoring/docs/examples/autoscaling/output.yaml +++ b/charts/k8s-monitoring/docs/examples/scalability/autoscaling/output.yaml @@ -1499,12 +1499,12 @@ data: optional = true } - // kube state metrics service discovery for all of the pods + // kube state metrics service discovery for all of the endpoints discovery.kubernetes "ksm" { - role = "service" + role = "endpoints" selectors { - role = "service" + role = "endpoints" field = string.join(coalesce(argument.field_selectors.value, []), ",") label = string.join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=kube-state-metrics"]), ",") } @@ -1520,7 +1520,7 @@ data: // only keep targets with a matching port name rule { - source_labels = ["__meta_kubernetes_service_port_name"] + source_labels = ["__meta_kubernetes_endpoint_port_name"] regex = coalesce(argument.port_name.value, "http") action = "keep" } diff --git a/charts/k8s-monitoring/docs/examples/autoscaling/values.yaml b/charts/k8s-monitoring/docs/examples/scalability/autoscaling/values.yaml similarity index 100% rename from charts/k8s-monitoring/docs/examples/autoscaling/values.yaml rename to charts/k8s-monitoring/docs/examples/scalability/autoscaling/values.yaml diff --git a/charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/README.md b/charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/README.md new file mode 100644 index 000000000..e314b5573 --- /dev/null +++ b/charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/README.md @@ -0,0 +1,27 @@ + +# Example: scalability/sharded-kube-state-metrics/values.yaml + +## Values + +```yaml +cluster: + name: sharded-kube-state-metrics + +destinations: + - name: prometheus + type: prometheus + url: http://prometheus.prometheus.svc:9090/api/v1/write + +clusterMetrics: + enabled: true + kube-state-metrics: + autosharding: + enabled: true + replicas: 5 + +alloy-metrics: + enabled: true +``` diff --git a/charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/alloy-metrics.alloy new file mode 100644 index 000000000..cf5b347d6 --- /dev/null +++ b/charts/k8s-monitoring/docs/examples/scalability/sharded-kube-state-metrics/alloy-metrics.alloy @@ -0,0 +1,310 @@ +// Destination: prometheus (prometheus) +otelcol.exporter.prometheus "prometheus" { + add_metric_suffixes = true + forward_to = [prometheus.remote_write.prometheus.receiver] +} + +prometheus.remote_write "prometheus" { + endpoint { + url = "http://prometheus.prometheus.svc:9090/api/v1/write" + headers = { + } + tls_config { + insecure_skip_verify = false + } + send_native_histograms = false + + queue_config { + capacity = 10000 + min_shards = 1 + max_shards = 50 + max_samples_per_send = 2000 + batch_send_deadline = "5s" + min_backoff = "30ms" + max_backoff = "5s" + retry_on_http_429 = true + sample_age_limit = "0s" + } + + write_relabel_config { + source_labels = ["cluster"] + regex = "" + replacement = "sharded-kube-state-metrics" + target_label = "cluster" + } + write_relabel_config { + source_labels = ["k8s.cluster.name"] + regex = "" + replacement = "sharded-kube-state-metrics" + target_label = "cluster" + } + } + + wal { + truncate_frequency = "2h" + min_keepalive_time = "5m" + max_keepalive_time = "8h" + } +} + +// Feature: Cluster Metrics +declare "cluster_metrics" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + remote.kubernetes.configmap "kubernetes" { + name = "k8smon-alloy-module-kubernetes" + namespace = "default" + } + + import.string "kubernetes" { + content = remote.kubernetes.configmap.kubernetes.data["core_metrics.alloy"] + } + + kubernetes.kubelet "scrape" { + clustering = true + keep_metrics = "up|go_goroutines|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_used_bytes|kubernetes_build_info|namespace_workload_pod|process_cpu_seconds_total|process_resident_memory_bytes|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + + kubernetes.resources "scrape" { + clustering = true + job_label = "integrations/kubernetes/resources" + keep_metrics = "up|node_cpu_usage_seconds_total|node_memory_working_set_bytes" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + + kubernetes.cadvisor "scrape" { + clustering = true + keep_metrics = "up|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = [prometheus.relabel.cadvisor.receiver] + } + + prometheus.relabel "cadvisor" { + max_cache_size = 100000 + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + // Normalizing unimportant labels (not deleting to continue satisfying