Skip to content

Commit

Permalink
Add the Node Logs feature to a test. (#1060)
Browse files Browse the repository at this point in the history
* Add the Node Logs feature to a test.

Also add a run-test target to platform tests Makefiles to make it easier to run locally.

* Update charts/k8s-monitoring/tests/platform/eks-with-windows/deployments/query-test.yaml

Co-authored-by: Stephen Lang <[email protected]>

---------

Co-authored-by: Stephen Lang <[email protected]>
  • Loading branch information
petewall and skl authored Jan 7, 2025
1 parent bfeaad7 commit 8d3b3d8
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,7 @@ data:
# TYPE grafana_kubernetes_monitoring_feature_info gauge
grafana_kubernetes_monitoring_feature_info{deployments="kube-state-metrics,node-exporter,windows-exporter", feature="clusterMetrics", sources="kubelet,kubeletResource,cadvisor,kube-state-metrics,node-exporter,windows-exporter", version="1.0.0"} 1
grafana_kubernetes_monitoring_feature_info{feature="clusterEvents", version="1.0.0"} 1
grafana_kubernetes_monitoring_feature_info{feature="nodeLogs", version="1.0.0"} 1
grafana_kubernetes_monitoring_feature_info{feature="podLogs", method="volumes", version="1.0.0"} 1
grafana_kubernetes_monitoring_feature_info{feature="integrations", sources="alloy", version="1.0.0"} 1
---
Expand Down Expand Up @@ -979,6 +980,159 @@ data:
namespace = "default"
}
// Feature: Node Logs
declare "node_logs" {
argument "logs_destinations" {
comment = "Must be a list of log destinations where collected logs should be forwarded to"
}
loki.relabel "journal" {
// copy all journal labels and make the available to the pipeline stages as labels, there is a label
// keep defined to filter out unwanted labels, these pipeline labels can be set as structured metadata
// as well, the following labels are available:
// - boot_id
// - cap_effective
// - cmdline
// - comm
// - exe
// - gid
// - hostname
// - machine_id
// - pid
// - stream_id
// - systemd_cgroup
// - systemd_invocation_id
// - systemd_slice
// - systemd_unit
// - transport
// - uid
//
// More Info: https://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html
rule {
action = "labelmap"
regex = "__journal__(.+)"
}
rule {
action = "replace"
source_labels = ["__journal__systemd_unit"]
replacement = "$1"
target_label = "unit"
}
// the service_name label will be set automatically in loki if not set, and the unit label
// will not allow service_name to be set automatically.
rule {
action = "replace"
source_labels = ["__journal__systemd_unit"]
replacement = "$1"
target_label = "service_name"
}
forward_to = [] // No forward_to is used in this component, the defined rules are used in the loki.source.journal component
}
loki.source.journal "worker" {
path = "/var/log/journal"
format_as_json = false
max_age = "8h"
relabel_rules = loki.relabel.journal.rules
labels = {
job = "integrations/kubernetes/journal",
instance = sys.env("HOSTNAME"),
}
forward_to = [loki.process.journal_logs.receiver]
}
loki.process "journal_logs" {
stage.static_labels {
values = {
// add a static source label to the logs so they can be differentiated / restricted if necessary
"source" = "journal",
// default level to unknown
level = "unknown",
}
}
// Attempt to determine the log level, most k8s workers are either in logfmt or klog formats
// check to see if the log line matches the klog format (https://github.com/kubernetes/klog)
stage.match {
// unescaped regex: ([IWED][0-9]{4}\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+)
selector = "{level=\"unknown\"} |~ \"([IWED][0-9]{4}\\\\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\\\\.[0-9]+)\""
// extract log level, klog uses a single letter code for the level followed by the month and day i.e. I0119
stage.regex {
expression = "((?P<level>[A-Z])[0-9])"
}
// if the extracted level is I set INFO
stage.replace {
source = "level"
expression = "(I)"
replace = "INFO"
}
// if the extracted level is W set WARN
stage.replace {
source = "level"
expression = "(W)"
replace = "WARN"
}
// if the extracted level is E set ERROR
stage.replace {
source = "level"
expression = "(E)"
replace = "ERROR"
}
// if the extracted level is I set INFO
stage.replace {
source = "level"
expression = "(D)"
replace = "DEBUG"
}
// set the extracted level to be a label
stage.labels {
values = {
level = "",
}
}
}
// if the level is still unknown, do one last attempt at detecting it based on common levels
stage.match {
selector = "{level=\"unknown\"}"
// unescaped regex: (?i)(?:"(?:level|loglevel|levelname|lvl|levelText|SeverityText)":\s*"|\s*(?:level|loglevel|levelText|lvl)="?|\s+\[?)(?P<level>(DEBUG?|DBG|INFO?(RMATION)?|WA?RN(ING)?|ERR(OR)?|CRI?T(ICAL)?|FATAL|FTL|NOTICE|TRACE|TRC|PANIC|PNC|ALERT|EMERGENCY))("|\s+|-|\s*\])
stage.regex {
expression = "(?i)(?:\"(?:level|loglevel|levelname|lvl|levelText|SeverityText)\":\\s*\"|\\s*(?:level|loglevel|levelText|lvl)=\"?|\\s+\\[?)(?P<level>(DEBUG?|DBG|INFO?(RMATION)?|WA?RN(ING)?|ERR(OR)?|CRI?T(ICAL)?|FATAL|FTL|NOTICE|TRACE|TRC|PANIC|PNC|ALERT|EMERGENCY))(\"|\\s+|-|\\s*\\])"
}
// set the extracted level to be a label
stage.labels {
values = {
level = "",
}
}
}
// Only keep the labels that are defined in the `keepLabels` list.
stage.label_keep {
values = ["instance","job","level","name","unit","service_name","source"]
}
forward_to = argument.logs_destinations.value
}
}
node_logs "feature" {
logs_destinations = [
loki.write.grafana_cloud_logs.receiver,
]
}
// Feature: Pod Logs
declare "pod_logs" {
argument "logs_destinations" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ deployments/grafana-cloud-credentials.yaml:
--from-literal=LOKI_USER="$$GRAFANA_CLOUD_LOGS_USERNAME" \
--from-literal=LOKI_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \
-o yaml --dry-run=client >> $@

run-test:
../../../../../scripts/run-cluster-test.sh .
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ spec:
type: promql
- query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="clusterEvents"}
type: promql
- query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="nodeLogs"}
type: promql
- query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="podLogs", method="volumes"}
type: promql
- query: grafana_kubernetes_monitoring_feature_info{cluster="$CLUSTER", feature="integrations", sources="alloy"}
Expand Down Expand Up @@ -83,6 +85,10 @@ spec:
- query: count_over_time({cluster="$CLUSTER", job="integrations/kubernetes/eventhandler"}[1h])
type: logql

# Node logs
- query: count_over_time({cluster="$CLUSTER", job="integrations/kubernetes/journal"}[1h])
type: logql

# Pod logs
- query: count_over_time({cluster="$CLUSTER", job!="integrations/kubernetes/eventhandler"}[1h])
- query: count_over_time({cluster="$CLUSTER", job!~"integrations/kubernetes/eventhandler|integrations/kubernetes/journal"}[1h])
type: logql
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ clusterMetrics:
clusterEvents:
enabled: true

nodeLogs:
enabled: true

podLogs:
enabled: true

Expand Down
3 changes: 3 additions & 0 deletions charts/k8s-monitoring/tests/platform/gke-autopilot/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ deployments/grafana-cloud-credentials.yaml:
--from-literal=LOKI_USER="$$GRAFANA_CLOUD_LOGS_USERNAME" \
--from-literal=LOKI_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \
-o yaml --dry-run=client >> $@

run-test:
../../../../../scripts/run-cluster-test.sh .
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ deployments/grafana-cloud-credentials.yaml:
--from-literal=LOKI_USER="$$GRAFANA_CLOUD_LOGS_USERNAME" \
--from-literal=LOKI_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \
-o yaml --dry-run=client >> $@

run-test:
../../../../../../scripts/run-cluster-test.sh .
3 changes: 3 additions & 0 deletions charts/k8s-monitoring/tests/platform/otlp-gateway/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ deployments/grafana-cloud-credentials.yaml:
--from-literal=LOKI_USER="$$GRAFANA_CLOUD_LOGS_USERNAME" \
--from-literal=LOKI_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \
-o yaml --dry-run=client >> $@

run-test:
../../../../../scripts/run-cluster-test.sh .
3 changes: 3 additions & 0 deletions charts/k8s-monitoring/tests/platform/remote-config/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ deployments/grafana-cloud-credentials.yaml:
--from-literal=PROMETHEUS_USER="$$GRAFANA_CLOUD_METRICS_USERNAME" \
--from-literal=PROMETHEUS_PASS="$$GRAFANA_CLOUD_RW_POLICY_TOKEN" \
-o yaml --dry-run=client >> $@

run-test:
../../../../../scripts/run-cluster-test.sh .

0 comments on commit 8d3b3d8

Please sign in to comment.