Skip to content

Commit

Permalink
Make job labels in clusterMetrics configurable
Browse files Browse the repository at this point in the history
Signed-off-by: Pete Wall <[email protected]>
  • Loading branch information
petewall committed Jan 16, 2025
1 parent 242434d commit 5bf28d0
Show file tree
Hide file tree
Showing 70 changed files with 281 additions and 8 deletions.
13 changes: 13 additions & 0 deletions charts/k8s-monitoring/charts/feature-cluster-metrics/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

kubernetes.apiserver "scrape" {
clustering = true
job_label = {{ .Values.apiServer.jobLabel | quote }}
{{- if $metricAllowList }}
keep_metrics = "up|{{ $metricAllowList | join "|" }}"
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

kubernetes.cadvisor "scrape" {
clustering = true
job_label = {{ .Values.cadvisor.jobLabel | quote }}
{{- if $metricAllowList }}
keep_metrics = {{ $metricAllowList | join "|" | quote }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ discovery.relabel "kepler" {

prometheus.scrape "kepler" {
targets = discovery.relabel.kepler.output
job_name = "integrations/kepler"
job_name = {{ .Values.kepler.jobLabel | quote }}
honor_labels = true
scrape_interval = {{ .Values.kepler.scrapeInterval | default .Values.global.scrapeInterval | quote }}
clustering {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ discovery.relabel "kube_controller_manager" {

prometheus.scrape "kube_controller_manager" {
targets = discovery.relabel.kube_controller_manager.output
job_name = "kube-controller-manager"
job_name = {{ .Values.kubeControllerManager.jobLabel | quote }}
scheme = "https"
scrape_interval = {{ .Values.kubeControllerManager.scrapeInterval | default .Values.global.scrapeInterval | quote }}
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

kubernetes.kube_dns "scrape" {
clustering = true
job_label = {{ .Values.kubeDNS.jobLabel | quote }}
{{- if $metricAllowList }}
keep_metrics = "up|{{ $metricAllowList | join "|" }}"
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ discovery.relabel "kube_proxy" {

prometheus.scrape "kube_proxy" {
targets = discovery.relabel.kube_proxy.output
job_name = "integrations/kubernetes/kube-proxy"
job_name = {{ .Values.kubeProxy.jobLabel | quote }}
scheme = "http"
scrape_interval = {{ .Values.kubeProxy.scrapeInterval | default .Values.global.scrapeInterval | quote }}
clustering {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ discovery.relabel "kube_scheduler" {

prometheus.scrape "kube_scheduler" {
targets = discovery.relabel.kube_scheduler.output
job_name = "kube-scheduler"
job_name = {{ .Values.kubeScheduler.jobLabel | quote }}
scheme = "https"
scrape_interval = {{ .Values.kubeScheduler.scrapeInterval | default .Values.global.scrapeInterval | quote }}
bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ discovery.relabel "kube_state_metrics" {
kube_state_metrics.scrape "metrics" {
targets = {{ $scrapeTargets }}
clustering = true
job_label = {{ (index .Values "kube-state-metrics").jobLabel | quote }}
{{- if $metricAllowList }}
keep_metrics = {{ $metricAllowList | join "|" | quote }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

kubernetes.kubelet "scrape" {
clustering = true
job_label = {{ .Values.kubelet.jobLabel | quote }}
{{- if $metricAllowList }}
keep_metrics = {{ $metricAllowList | join "|" | quote }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

kubernetes.resources "scrape" {
clustering = true
job_label = "integrations/kubernetes/resources"
job_label = {{ .Values.kubeletResource.jobLabel | quote }}
{{- if $metricAllowList }}
keep_metrics = {{ $metricAllowList | join "|" | quote }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ discovery.relabel "node_exporter" {

node_exporter.scrape "metrics" {
targets = discovery.relabel.node_exporter.output
job_label = "integrations/node_exporter"
job_label = {{ (index .Values "node-exporter").jobLabel | quote }}
clustering = true
{{- if $metricAllowList }}
keep_metrics = {{ $metricAllowList | join "|" | quote }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ discovery.relabel "opencost" {

prometheus.scrape "opencost" {
targets = discovery.relabel.opencost.output
job_name = "integrations/opencost"
job_name = {{ .Values.opencost.jobLabel | quote }}
honor_labels = true
scrape_interval = {{ .Values.opencost.scrapeInterval | default .Values.global.scrapeInterval | quote }}
clustering {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ discovery.relabel "windows_exporter" {
}

prometheus.scrape "windows_exporter" {
job_name = "integrations/windows-exporter"
job_name = {{ (index .Values "windows-exporter").jobLabel | quote }}
targets = discovery.relabel.windows_exporter.output
scrape_interval = {{ (index .Values "windows-exporter").scrapeInterval | default .Values.global.scrapeInterval | quote }}
clustering {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ tests:
kubernetes.kubelet "scrape" {
clustering = true
job_label = "integrations/kubernetes/kubelet"
keep_metrics = "up|scrape_samples_scraped|go_goroutines|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_used_bytes|kubernetes_build_info|namespace_workload_pod|process_cpu_seconds_total|process_resident_memory_bytes|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes"
scrape_interval = "60s"
max_cache_size = 100000
Expand All @@ -45,6 +46,7 @@ tests:
kubernetes.cadvisor "scrape" {
clustering = true
job_label = "integrations/kubernetes/cadvisor"
keep_metrics = "up|scrape_samples_scraped|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes"
scrape_interval = "60s"
max_cache_size = 100000
Expand Down Expand Up @@ -126,6 +128,7 @@ tests:
kubernetes.apiserver "scrape" {
clustering = true
job_label = "integrations/kubernetes/kube-apiserver"
scrape_interval = "60s"
max_cache_size = 100000
forward_to = argument.metrics_destinations.value
Expand Down Expand Up @@ -168,6 +171,7 @@ tests:
kubernetes.kube_dns "scrape" {
clustering = true
job_label = "integrations/kubernetes/kube-dns"
scrape_interval = "60s"
max_cache_size = 100000
forward_to = argument.metrics_destinations.value
Expand Down Expand Up @@ -258,6 +262,7 @@ tests:
kube_state_metrics.scrape "metrics" {
targets = kube_state_metrics.kubernetes.targets.output
clustering = true
job_label = "integrations/kubernetes/kube-state-metrics"
keep_metrics = "up|scrape_samples_scraped|kube_configmap_info|kube_configmap_metadata_resource_version|kube_daemonset.*|kube_deployment_metadata_generation|kube_deployment_spec_replicas|kube_deployment_status_condition|kube_deployment_status_observed_generation|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_horizontalpodautoscaler_spec_max_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job.*|kube_namespace_status_phase|kube_node.*|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_labels|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_persistentvolumeclaim_status_phase|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_restarts_total|kube_pod_container_status_waiting_reason|kube_pod_info|kube_pod_owner|kube_pod_spec_volumes_persistentvolumeclaims_info|kube_pod_start_time|kube_pod_status_phase|kube_pod_status_reason|kube_replicaset.*|kube_resourcequota|kube_secret_metadata_resource_version|kube_statefulset.*"
scheme = "http"
scrape_interval = "60s"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ tests:
kubernetes.kubelet "scrape" {
clustering = true
job_label = "integrations/kubernetes/kubelet"
keep_metrics = "up|scrape_samples_scraped|go_goroutines|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_used_bytes|kubernetes_build_info|namespace_workload_pod|process_cpu_seconds_total|process_resident_memory_bytes|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes"
scrape_interval = "60s"
max_cache_size = 100000
Expand All @@ -61,6 +62,7 @@ tests:
kubernetes.cadvisor "scrape" {
clustering = true
job_label = "integrations/kubernetes/cadvisor"
keep_metrics = "up|scrape_samples_scraped|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes"
scrape_interval = "60s"
max_cache_size = 100000
Expand Down Expand Up @@ -168,6 +170,7 @@ tests:
kube_state_metrics.scrape "metrics" {
targets = discovery.relabel.kube_state_metrics.output
clustering = true
job_label = "integrations/kubernetes/kube-state-metrics"
keep_metrics = "up|scrape_samples_scraped|kube_configmap_info|kube_configmap_metadata_resource_version|kube_daemonset.*|kube_deployment_metadata_generation|kube_deployment_spec_replicas|kube_deployment_status_condition|kube_deployment_status_observed_generation|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_horizontalpodautoscaler_spec_max_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job.*|kube_namespace_status_phase|kube_node.*|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_labels|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_persistentvolumeclaim_status_phase|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_restarts_total|kube_pod_container_status_waiting_reason|kube_pod_info|kube_pod_owner|kube_pod_spec_volumes_persistentvolumeclaims_info|kube_pod_start_time|kube_pod_status_phase|kube_pod_status_reason|kube_replicaset.*|kube_resourcequota|kube_secret_metadata_resource_version|kube_statefulset.*"
scheme = "http"
scrape_interval = "60s"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ tests:
kubernetes.kubelet "scrape" {
clustering = true
job_label = "integrations/kubernetes/kubelet"
keep_metrics = "up|scrape_samples_scraped|go_goroutines|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_certificate_manager_client_ttl_seconds|kubelet_certificate_manager_server_ttl_seconds|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_cgroup_manager_duration_seconds_count|kubelet_node_config_error|kubelet_node_name|kubelet_pleg_relist_duration_seconds_bucket|kubelet_pleg_relist_duration_seconds_count|kubelet_pleg_relist_interval_seconds_bucket|kubelet_pod_start_duration_seconds_bucket|kubelet_pod_start_duration_seconds_count|kubelet_pod_worker_duration_seconds_bucket|kubelet_pod_worker_duration_seconds_count|kubelet_running_container_count|kubelet_running_containers|kubelet_running_pod_count|kubelet_running_pods|kubelet_runtime_operations_errors_total|kubelet_runtime_operations_total|kubelet_server_expiration_renew_errors|kubelet_volume_stats_available_bytes|kubelet_volume_stats_capacity_bytes|kubelet_volume_stats_inodes|kubelet_volume_stats_inodes_free|kubelet_volume_stats_inodes_used|kubelet_volume_stats_used_bytes|kubernetes_build_info|namespace_workload_pod|process_cpu_seconds_total|process_resident_memory_bytes|rest_client_requests_total|storage_operation_duration_seconds_count|storage_operation_errors_total|volume_manager_total_volumes"
scrape_interval = "60s"
max_cache_size = 100000
Expand All @@ -43,6 +44,7 @@ tests:
kubernetes.cadvisor "scrape" {
clustering = true
job_label = "integrations/kubernetes/cadvisor"
keep_metrics = "up|scrape_samples_scraped|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|container_cpu_usage_seconds_total|container_fs_reads_bytes_total|container_fs_reads_total|container_fs_writes_bytes_total|container_fs_writes_total|container_memory_cache|container_memory_rss|container_memory_swap|container_memory_working_set_bytes|container_network_receive_bytes_total|container_network_receive_packets_dropped_total|container_network_receive_packets_total|container_network_transmit_bytes_total|container_network_transmit_packets_dropped_total|container_network_transmit_packets_total|machine_memory_bytes"
scrape_interval = "60s"
max_cache_size = 100000
Expand Down Expand Up @@ -141,6 +143,7 @@ tests:
kube_state_metrics.scrape "metrics" {
targets = kube_state_metrics.kubernetes.targets.output
clustering = true
job_label = "integrations/kubernetes/kube-state-metrics"
keep_metrics = "up|scrape_samples_scraped|kube_configmap_info|kube_configmap_metadata_resource_version|kube_daemonset.*|kube_deployment_metadata_generation|kube_deployment_spec_replicas|kube_deployment_status_condition|kube_deployment_status_observed_generation|kube_deployment_status_replicas_available|kube_deployment_status_replicas_updated|kube_horizontalpodautoscaler_spec_max_replicas|kube_horizontalpodautoscaler_spec_min_replicas|kube_horizontalpodautoscaler_status_current_replicas|kube_horizontalpodautoscaler_status_desired_replicas|kube_job.*|kube_namespace_status_phase|kube_node.*|kube_persistentvolume_status_phase|kube_persistentvolumeclaim_access_mode|kube_persistentvolumeclaim_info|kube_persistentvolumeclaim_labels|kube_persistentvolumeclaim_resource_requests_storage_bytes|kube_persistentvolumeclaim_status_phase|kube_pod_container_info|kube_pod_container_resource_limits|kube_pod_container_resource_requests|kube_pod_container_status_last_terminated_reason|kube_pod_container_status_restarts_total|kube_pod_container_status_waiting_reason|kube_pod_info|kube_pod_owner|kube_pod_spec_volumes_persistentvolumeclaims_info|kube_pod_start_time|kube_pod_status_phase|kube_pod_status_reason|kube_replicaset.*|kube_resourcequota|kube_secret_metadata_resource_version|kube_statefulset.*"
scheme = "http"
scrape_interval = "60s"
Expand Down
Loading

0 comments on commit 5bf28d0

Please sign in to comment.