Skip to content

Commit

Permalink
CI Localenv -> improve container log formats and promtail log parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
reimarstier committed Dec 20, 2024
1 parent 394b571 commit fd8120c
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 44 deletions.
5 changes: 5 additions & 0 deletions .ci/deploy/localenv/data/coturn/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,18 @@ TURN_TLS_PORT=${TURN_TLS_PORT:-5349}
TURN_PORT_START=${TURN_PORT_START:-49152}
TURN_PORT_END=${TURN_PORT_END:-65535}

# timestamp format justification
# %FT%T%z -> 2024-12-19T13:06:00+0000 milliseconds lost
# %FT%T%Z -> 2024-12-19T13:08:13UTC milliseconds lost
# %s -> 1734614558 milliseconds lost

exec turnserver -a -v -L 0.0.0.0 \
--server-name "${TURN_DOMAIN}" \
--realm="${TURN_REALM}" --listening-port "${TURN_PORT}" \
--user="${TURN_USER}:${TURN_PASSWORD}" \
--no-cli --no-software-attribute \
--fingerprint --lt-cred-mech \
--new-log-timestamp --new-log-timestamp-format="%FT%T%z: " \
--min-port "${TURN_PORT_START}" --max-port "${TURN_PORT_END}" \
--log-file="${TURN_LOG_FILE}" --simple-log "${TURN_EXTRA_ARGS}"

Expand Down
4 changes: 4 additions & 0 deletions .ci/deploy/localenv/data/keycloak/Dockerfile-keycloak-server
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,7 @@ RUN /opt/keycloak/bin/kc.sh show-config

ENTRYPOINT ["/opt/keycloak/bin/kc.sh"]

# https://www.keycloak.org/server/logging#_console_log_handler
# The default keycloak date format is '%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c] (%t) %s%e%n'
# which is incompatible with date format RFC3339.
CMD ["start", "--optimized", "--log-console-format='%d{yyyy-MM-dd HH:mm:ss.SSSz} %-5p [%c] (%t) %s%e%n'"]
177 changes: 136 additions & 41 deletions .ci/deploy/localenv/data/telemetry/promtail.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ clients:

# promtail watches docker for containers with the following labels: logging="promtail" and logging_job="job_name"

# general observations:
# - loki does not keep order of logs in case of same timestamp!
# - Because of how YAML treats backslashes in double-quoted strings,
# note that all backslashes in a regex expression must be escaped when using double quotes.
# https://grafana.com/docs/loki/latest/send-data/promtail/stages/regex/#schema
# promtail stages:
# docker: https://grafana.com/docs/loki/latest/send-data/promtail/stages/docker/
# timestamp: https://grafana.com/docs/loki/latest/send-data/promtail/stages/timestamp/#reference-time
# labels: adds fields that may be filtered in grafana

scrape_configs:
- job_name: docker_logs_scrape
docker_sd_configs:
Expand All @@ -27,48 +37,149 @@ scrape_configs:
- source_labels: ['__meta_docker_container_label_logging_job']
target_label: 'job'
pipeline_stages:
# process listed containers and extract the "level" information
- match:
docker: { }
selector: '{job=~"traefik|netbird-coturn|netbird-dashboard|netbird-management|netbird-management-init|netbird-signal|keycloak|keycloak-init"}'
selector: '{job=~"traefik"}'
stages:
- static_labels:
level: DEFAULT
- json: #traefik
- json:
expressions:
level: level
msg: message # rename field 'msg' to 'message'
time: timestamp
message: msg # rename 'msg' from json to message
time: time
entryPointName: entryPointName
- regex: #traefik
expression: '.*level=(?P<level>[a-zA-Z]+).*'
- regex: #netbird-coturn
expression: '.*\([0-9]+\): (?P<level>[A-Z]+):.*'
- regex: #netbird-dashboard, keycloak
expression: '.*[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]+ (?P<level>[A-Z]+) .*'
- regex: #netbird-dashboard, keycloak
expression: '.*[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]+ (?P<level>[A-Z]+) .*'
- regex: #netbird-management, netbird-management-init, netbird-signal
expression: '.*(?P<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z) (?P<level>[A-Z]+) .*'
- regex: #keycloak-init
expression: '^(?P<level>(WARNING)?(INFO)?(DEBUG)?(ERROR)?(WARN)?):? .*'
ClientAddr: ClientAddr
RequestHost: RequestHost
RequestPath: RequestPath
ServiceName: ServiceName
- template: # cast the level value to upper case
source: level
template: '{{ ToUpper .Value }}'
# make labels available for filtering in loki
- timestamp:
source: time
format: RFC3339
# omitting output stage here because extracting 'msg' field will drop access logs of traefik which don't have this field
- labels:
level:
message:
timestamp:
entryPointName:
message:
ClientAddr:
RequestHost:
RequestPath:
ServiceName:

- match:
docker: { }
selector: '{job=~"netbird-coturn"}'
stages:
- static_labels:
level: DEFAULT
- regex:
expression: '(?P<time>.+): \((?P<thread>[0-9]+)\): (?P<level>[A-Z]+):( session (?P<session>[0-9]+):)? (?P<message>.*)'
- timestamp:
source: time
format: RFC3339
- output:
source: message
- labels:
level:
session:

- match:
docker: { }
selector: '{job=~"netbird-management"}'
stages:
- static_labels:
level: DEFAULT
- regex:
expression: '(?P<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z) (?P<level>[A-Z]+) (?P<message>.*)'
- timestamp:
source: timestamp
format: RFC3339
- template:
source: level
# netbird management log level has only four characters
template: '{{ regexReplaceAllLiteral "ERRO" .level "ERROR" }}'
- output:
source: message
- labels:
level:

- match:
docker: { }
selector: '{job=~"keycloak"}'
stages:
- static_labels:
level: DEFAULT
- regex:
# this assumes following keycloak format: --log-console-format='%d{yyyy-MM-dd HH:mm:ss.SSSz} %-5p [%c] (%t) %s%e%n'
expression: '(?P<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+[A-Z]+)\s+(?P<level>[A-Z]+)\s+\[(?P<category>[^\]]+)\] \((?P<thread>[^\)]+)\) (?P<message>.*)'
- timestamp:
source: timestamp
format: RFC3339
- output:
source: message
- labels:
level:
category:
thread:

- match:
docker: { }
# netbird-dashboard contains shell script output and nginx access log, regex TBD
selector: '{job=~"netbird-dashboard"}'
stages:
- static_labels:
level: DEBUG
- labels:
level:

- match:
docker: { }
selector: '{job=~"keycloak-init"}'
stages:
- static_labels:
level: DEBUG # there is no log level for most messages here
- regex:
expression: '^(?P<level>WARNING|INFO|DEBUG|ERROR|WARN)?(:\s)*(?P<message>.+)'
- drop:
expression: "^\\s*$"
- template:
source: level
template: '{{ regexReplaceAllLiteral "WARNING" .level "WARN" }}'
- output:
source: message
- labels:
level:

- match:
docker: { }
selector: '{job=~"netbird-signal"}'
stages:
- static_labels:
level: DEFAULT
- regex:
expression: '(?P<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z) (?P<level>[A-Z]+) (?P<location>[^:]+:\d+): (?P<message>.*)'
- timestamp:
source: timestamp
format: RFC3339
- output:
source: message
- labels:
level:
location:

- match:
docker: { }
selector: '{job=~"keycloak-postgres|carl-postgres"}'
stages:
- multiline:
firstline: '^\d{4}-\d{2}-\d{2}'
max_wait_time: 3s
- regex:
expression: '^(?P<timestamp>[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+ [A-Z]+) \[[0-9]+\] (?P<level>[A-Z]+): (?P<message>.*)$'
# rename additional debug levels of postgres to one debug level
# https://www.postgresql.org/docs/current/runtime-config-logging.html#RUNTIME-CONFIG-SEVERITY-LEVELS
- template:
source: level
Expand All @@ -79,26 +190,10 @@ scrape_configs:
- template:
source: level
template: '{{ regexReplaceAllLiteral "ERROR|FATAL|PANIC" .level "ERROR" }}'
- timestamp:
source: timestamp
format: RFC3339
- output:
source: message
- labels:
level:
message:
timestamp:

# change level WARNING to WARN for all containers
- match:
docker: { }
selector: '{level="WARNING"}'
stages:
- static_labels:
level: WARN
# search for non-empty lines
- match:
docker: { }
selector: '{job="keycloak-init"} |~ "^\\s*$"'
stages:
- static_labels:
emptyline: EMPTYLINE
# drop lines not matched by previous condition
- match:
selector: '{job="keycloak-init", emptyline="EMPTYLINE"}'
action: drop
8 changes: 5 additions & 3 deletions .ci/deploy/localenv/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ services:
dockerfile: ./.ci/deploy/localenv/data/keycloak/Dockerfile-keycloak-server
args:
KEYCLOAK_VERSION: 22.0.2
command: ['start', '--optimized']
#command: ['start', '--optimized']
restart: unless-stopped
depends_on:
- keycloak-postgres
Expand Down Expand Up @@ -118,9 +118,11 @@ services:
- "--entrypoints.web.http.redirections.entryPoint.to=websecure"
- "--providers.file.directory=/etc/traefik/dynamic"
- "--providers.file.watch=true"
- "--log.level=DEBUG" # TODO: reduce level
# https://doc.traefik.io/traefik/observability/logs/#level default level=ERROR
- "--log.level=INFO"
- "--log.format=json"
- "--accesslog" # TODO: remove access log?
# https://doc.traefik.io/traefik/observability/access-logs/#format default is no access log
- "--accesslog"
- "--accesslog.format=json"
environment:
- "LEGO_CA_CERTIFICATES=/pki/insecure-development-ca.pem"
Expand Down

0 comments on commit fd8120c

Please sign in to comment.