diff --git a/charts/k8s-monitoring/tests/platform/eks-with-windows/.rendered/output.yaml b/charts/k8s-monitoring/tests/platform/eks-with-windows/.rendered/output.yaml index 689fe9414..a1b6ed1d8 100644 --- a/charts/k8s-monitoring/tests/platform/eks-with-windows/.rendered/output.yaml +++ b/charts/k8s-monitoring/tests/platform/eks-with-windows/.rendered/output.yaml @@ -937,6 +937,7 @@ data: # TYPE grafana_kubernetes_monitoring_feature_info gauge grafana_kubernetes_monitoring_feature_info{deployments="kube-state-metrics,node-exporter,windows-exporter", feature="clusterMetrics", sources="kubelet,kubeletResource,cadvisor,kube-state-metrics,node-exporter,windows-exporter", version="1.0.0"} 1 grafana_kubernetes_monitoring_feature_info{feature="clusterEvents", version="1.0.0"} 1 + grafana_kubernetes_monitoring_feature_info{feature="nodeLogs", version="1.0.0"} 1 grafana_kubernetes_monitoring_feature_info{feature="podLogs", method="volumes", version="1.0.0"} 1 grafana_kubernetes_monitoring_feature_info{feature="integrations", sources="alloy", version="1.0.0"} 1 --- @@ -979,6 +980,159 @@ data: namespace = "default" } + // Feature: Node Logs + declare "node_logs" { + argument "logs_destinations" { + comment = "Must be a list of log destinations where collected logs should be forwarded to" + } + + loki.relabel "journal" { + + // copy all journal labels and make the available to the pipeline stages as labels, there is a label + // keep defined to filter out unwanted labels, these pipeline labels can be set as structured metadata + // as well, the following labels are available: + // - boot_id + // - cap_effective + // - cmdline + // - comm + // - exe + // - gid + // - hostname + // - machine_id + // - pid + // - stream_id + // - systemd_cgroup + // - systemd_invocation_id + // - systemd_slice + // - systemd_unit + // - transport + // - uid + // + // More Info: https://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html + rule { + action = "labelmap" + regex = "__journal__(.+)" + } + + rule { + action = "replace" + source_labels = ["__journal__systemd_unit"] + replacement = "$1" + target_label = "unit" + } + + // the service_name label will be set automatically in loki if not set, and the unit label + // will not allow service_name to be set automatically. + rule { + action = "replace" + source_labels = ["__journal__systemd_unit"] + replacement = "$1" + target_label = "service_name" + } + + forward_to = [] // No forward_to is used in this component, the defined rules are used in the loki.source.journal component + } + + loki.source.journal "worker" { + path = "/var/log/journal" + format_as_json = false + max_age = "8h" + relabel_rules = loki.relabel.journal.rules + labels = { + job = "integrations/kubernetes/journal", + instance = sys.env("HOSTNAME"), + } + forward_to = [loki.process.journal_logs.receiver] + } + + loki.process "journal_logs" { + stage.static_labels { + values = { + // add a static source label to the logs so they can be differentiated / restricted if necessary + "source" = "journal", + // default level to unknown + level = "unknown", + } + } + + // Attempt to determine the log level, most k8s workers are either in logfmt or klog formats + // check to see if the log line matches the klog format (https://github.com/kubernetes/klog) + stage.match { + // unescaped regex: ([IWED][0-9]{4}\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+) + selector = "{level=\"unknown\"} |~ \"([IWED][0-9]{4}\\\\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\\\\.[0-9]+)\"" + + // extract log level, klog uses a single letter code for the level followed by the month and day i.e. I0119 + stage.regex { + expression = "((?P[A-Z])[0-9])" + } + + // if the extracted level is I set INFO + stage.replace { + source = "level" + expression = "(I)" + replace = "INFO" + } + + // if the extracted level is W set WARN + stage.replace { + source = "level" + expression = "(W)" + replace = "WARN" + } + + // if the extracted level is E set ERROR + stage.replace { + source = "level" + expression = "(E)" + replace = "ERROR" + } + + // if the extracted level is I set INFO + stage.replace { + source = "level" + expression = "(D)" + replace = "DEBUG" + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + } + + // if the level is still unknown, do one last attempt at detecting it based on common levels + stage.match { + selector = "{level=\"unknown\"}" + + // unescaped regex: (?i)(?:"(?:level|loglevel|levelname|lvl|levelText|SeverityText)":\s*"|\s*(?:level|loglevel|levelText|lvl)="?|\s+\[?)(?P(DEBUG?|DBG|INFO?(RMATION)?|WA?RN(ING)?|ERR(OR)?|CRI?T(ICAL)?|FATAL|FTL|NOTICE|TRACE|TRC|PANIC|PNC|ALERT|EMERGENCY))("|\s+|-|\s*\]) + stage.regex { + expression = "(?i)(?:\"(?:level|loglevel|levelname|lvl|levelText|SeverityText)\":\\s*\"|\\s*(?:level|loglevel|levelText|lvl)=\"?|\\s+\\[?)(?P(DEBUG?|DBG|INFO?(RMATION)?|WA?RN(ING)?|ERR(OR)?|CRI?T(ICAL)?|FATAL|FTL|NOTICE|TRACE|TRC|PANIC|PNC|ALERT|EMERGENCY))(\"|\\s+|-|\\s*\\])" + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + } + + // Only keep the labels that are defined in the `keepLabels` list. + stage.label_keep { + values = ["instance","job","level","name","unit","service_name","source"] + } + + forward_to = argument.logs_destinations.value + } + } + node_logs "feature" { + logs_destinations = [ + loki.write.grafana_cloud_logs.receiver, + ] + } + // Feature: Pod Logs declare "pod_logs" { argument "logs_destinations" { diff --git a/charts/k8s-monitoring/tests/platform/eks-with-windows/Makefile b/charts/k8s-monitoring/tests/platform/eks-with-windows/Makefile index 20ca9c2ea..d49a35f6e 100644 --- a/charts/k8s-monitoring/tests/platform/eks-with-windows/Makefile +++ b/charts/k8s-monitoring/tests/platform/eks-with-windows/Makefile @@ -17,3 +17,6 @@ deployments/grafana-cloud-credentials.yaml: --from-literal=LOKI_USER="$$GRAFANA_CLOUD_LOGS_USERNAME" \ --from-literal=LOKI_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \ -o yaml --dry-run=client >> $@ + +run-test: + ../../../../../scripts/run-cluster-test.sh . diff --git a/charts/k8s-monitoring/tests/platform/eks-with-windows/deployments/query-test.yaml b/charts/k8s-monitoring/tests/platform/eks-with-windows/deployments/query-test.yaml index 1ba5ac4e9..286198128 100644 --- a/charts/k8s-monitoring/tests/platform/eks-with-windows/deployments/query-test.yaml +++ b/charts/k8s-monitoring/tests/platform/eks-with-windows/deployments/query-test.yaml @@ -41,6 +41,8 @@ spec: type: promql - query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="clusterEvents"} type: promql + - query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="nodeLogs"} + type: promql - query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="podLogs", method="volumes"} type: promql - query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="integrations", sources="alloy"} @@ -83,6 +85,10 @@ spec: - query: count_over_time({cluster="$CLUSTER", job="integrations/kubernetes/eventhandler"}[1h]) type: logql + # Node logs + - query: count_over_time({cluster="$CLUSTER", job="integrations/kubernetes/journal"}[1h]) + type: logql + # Pod logs - - query: count_over_time({cluster="$CLUSTER", job!="integrations/kubernetes/eventhandler"}[1h]) + - query: count_over_time({cluster="$CLUSTER", job!~"integrations/kubernetes/eventhandler|integrations/kubernetes/journal"}[1h]) type: logql diff --git a/charts/k8s-monitoring/tests/platform/eks-with-windows/values.yaml b/charts/k8s-monitoring/tests/platform/eks-with-windows/values.yaml index e9582f791..9350b1122 100644 --- a/charts/k8s-monitoring/tests/platform/eks-with-windows/values.yaml +++ b/charts/k8s-monitoring/tests/platform/eks-with-windows/values.yaml @@ -34,6 +34,9 @@ clusterMetrics: clusterEvents: enabled: true +nodeLogs: + enabled: true + podLogs: enabled: true diff --git a/charts/k8s-monitoring/tests/platform/gke-autopilot/Makefile b/charts/k8s-monitoring/tests/platform/gke-autopilot/Makefile index 20ca9c2ea..d49a35f6e 100644 --- a/charts/k8s-monitoring/tests/platform/gke-autopilot/Makefile +++ b/charts/k8s-monitoring/tests/platform/gke-autopilot/Makefile @@ -17,3 +17,6 @@ deployments/grafana-cloud-credentials.yaml: --from-literal=LOKI_USER="$$GRAFANA_CLOUD_LOGS_USERNAME" \ --from-literal=LOKI_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \ -o yaml --dry-run=client >> $@ + +run-test: + ../../../../../scripts/run-cluster-test.sh . diff --git a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/Makefile b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/Makefile index 20ca9c2ea..4306406cf 100644 --- a/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/Makefile +++ b/charts/k8s-monitoring/tests/platform/grafana-cloud-features/k8s-monitoring/Makefile @@ -17,3 +17,6 @@ deployments/grafana-cloud-credentials.yaml: --from-literal=LOKI_USER="$$GRAFANA_CLOUD_LOGS_USERNAME" \ --from-literal=LOKI_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \ -o yaml --dry-run=client >> $@ + +run-test: + ../../../../../../scripts/run-cluster-test.sh . diff --git a/charts/k8s-monitoring/tests/platform/otlp-gateway/Makefile b/charts/k8s-monitoring/tests/platform/otlp-gateway/Makefile index 89f578963..fe4c0e07a 100644 --- a/charts/k8s-monitoring/tests/platform/otlp-gateway/Makefile +++ b/charts/k8s-monitoring/tests/platform/otlp-gateway/Makefile @@ -19,3 +19,6 @@ deployments/grafana-cloud-credentials.yaml: --from-literal=LOKI_USER="$$GRAFANA_CLOUD_LOGS_USERNAME" \ --from-literal=LOKI_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \ -o yaml --dry-run=client >> $@ + +run-test: + ../../../../../scripts/run-cluster-test.sh . diff --git a/charts/k8s-monitoring/tests/platform/remote-config/Makefile b/charts/k8s-monitoring/tests/platform/remote-config/Makefile index 570e413d6..ff437adca 100644 --- a/charts/k8s-monitoring/tests/platform/remote-config/Makefile +++ b/charts/k8s-monitoring/tests/platform/remote-config/Makefile @@ -17,3 +17,6 @@ deployments/grafana-cloud-credentials.yaml: --from-literal=PROMETHEUS_USER="$$GRAFANA_CLOUD_METRICS_USERNAME" \ --from-literal=PROMETHEUS_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \ -o yaml --dry-run=client >> $@ + +run-test: + ../../../../../scripts/run-cluster-test.sh .