Skip to content

Commit

Permalink
Add perfdata support
Browse files Browse the repository at this point in the history
This commit reworks condition and result handling code
to add support for perfdata and multi-line output.
  • Loading branch information
Robert Wikman authored Jan 23, 2020
1 parent 3918448 commit f0aee9f
Show file tree
Hide file tree
Showing 17 changed files with 370 additions and 249 deletions.
23 changes: 9 additions & 14 deletions check_k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,13 @@
import traceback
import json

from collections import namedtuple
from urllib.error import URLError, HTTPError

from k8s.components import MAPPINGS
from k8s.cli import parse_cmdline
from k8s.http import build_url, request
from k8s.consts import NAGIOS_MSG, Severity
from k8s.exceptions import PluginException


Output = namedtuple("Output", ["state", "message", "channel"])
from k8s.consts import NAGIOS_MSG, NaemonState
from k8s.result import Output


def main():
Expand All @@ -38,28 +34,27 @@ def main():
# Request and check health data
try:
response, status = request(url, token=parsed.token, insecure=parsed.insecure)
result = health_check(response)
output = Output(Severity.OK, result, sys.stdout)
except PluginException as e:
output = Output(e.state, e.message, sys.stderr)
output = health_check(response).output
if not isinstance(output, Output):
raise TypeError("Unknown health check format")
except HTTPError as e:
body = json.loads(e.read().decode("utf8"))
output = Output(
Severity.UNKNOWN,
NaemonState.UNKNOWN,
"{0}: {1}".format(e.code, body.get("message")),
sys.stderr
)
except URLError as e:
output = Output(Severity.UNKNOWN, e.reason, sys.stderr)
output = Output(NaemonState.UNKNOWN, e.reason, sys.stderr)
except Exception as e:
if parsed.debug:
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback.print_tb(exc_traceback, file=sys.stdout)

output = Output(Severity.UNKNOWN, e, sys.stderr)
output = Output(NaemonState.UNKNOWN, e, sys.stderr)

msg = NAGIOS_MSG.format(state=output.state.name, message=output.message)
print(msg, file=output.channel)
output.channel.write(msg)
sys.exit(output.state.value)


Expand Down
19 changes: 2 additions & 17 deletions k8s/components/deployment/check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from k8s.exceptions import NagiosCritical, NagiosWarning
from k8s.result import Result

from .resource import Deployment

Expand All @@ -14,19 +14,4 @@ def check_deployments(items):
:return: Deployments health summary
"""

for item in items:
deployment = Deployment(item)
reps = deployment.replicas

if deployment.alerts_critical:
raise NagiosCritical(deployment.alerts_critical[0])
elif deployment.alerts_warning:
raise NagiosWarning(deployment.alerts_warning[0])

if reps.available < reps.total or reps.updated < reps.total:
if reps.available != 0 and reps.updated != 0:
raise NagiosWarning("Deployment degraded", **deployment.meta)

raise NagiosCritical("Deployment unavailable", **deployment.meta)

return "Found {} healthy Deployments".format(len(items))
return Result(Deployment, items)
40 changes: 29 additions & 11 deletions k8s/components/deployment/resource.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
from collections import namedtuple
from enum import Enum

from k8s.consts import NaemonState

from ..resource import Resource, NaemonStatus

from k8s.resource import Resource
from k8s.consts import Severity

Replicas = namedtuple("Replicas", ["total", "ready", "updated", "available"])


class Deployment(Resource):
def __init__(self, data):
super(Deployment, self).__init__(data)
class PerfMap(Enum):
AVAILABLE = "available"
UNAVAILABLE = "unavailable"
DEGRADED = "degraded"
NOREPS = "noreps"

def __init__(self, data, *args, **kwargs):
super(Deployment, self).__init__(data, *args, **kwargs)

self.replicas = Replicas(
self._status["replicas"],
self._status["readyReplicas"],
self._status["updatedReplicas"],
self._status["availableReplicas"]
self._status.get("replicas", 0),
self._status.get("readyReplicas", 0),
self._status.get("updatedReplicas", 0),
self._status.get("availableReplicas", 0)
)

def _condition_severity(self, _type, status):
if _type == "Available" and status != "True":
return Severity.CRITICAL
def _get_status(self, cnd_type, cnd_status):
reps = self.replicas

if cnd_type == "Available":
if cnd_status == "True":
return NaemonStatus(NaemonState.OK, self.perf.AVAILABLE)
else:
return NaemonStatus(NaemonState.CRITICAL, self.perf.UNAVAILABLE)
elif reps.available < reps.total or reps.updated < reps.total:
if reps.available != 0 and reps.updated != 0:
return NaemonStatus(NaemonState.WARNING, self.perf.DEGRADED)
return NaemonStatus(NaemonState.CRITICAL, self.perf.NOREPS)
15 changes: 2 additions & 13 deletions k8s/components/node/check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from k8s.exceptions import NagiosCritical, NagiosWarning
from k8s.result import Result

from .resource import Node

Expand All @@ -14,15 +14,4 @@ def check_nodes(items):
:return: Nodes health summary
"""

for item in items:
node = Node(item)

if node.alerts_critical:
raise NagiosCritical(node.alerts_critical[0])
elif node.alerts_warning:
raise NagiosWarning(node.alerts_warning[0])

if node.unschedulable:
raise NagiosWarning("Node {} is ready, but unschedulable".format(node.meta["name"]))

return "Found {} healthy Nodes".format(len(items))
return Result(Node, items)
36 changes: 27 additions & 9 deletions k8s/components/node/resource.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
from k8s.resource import Resource
from k8s.consts import Severity
from enum import Enum

from k8s.consts import NaemonState

from ..resource import Resource, NaemonStatus


class Node(Resource):
def __init__(self, data):
super(Node, self).__init__(data)
class PerfMap(Enum):
AVAILABLE = "available"
UNAVAILABLE = "unavailable"
DEGRADED = "degraded"
UNSCHEDULABLE = "unschedulable"

def __init__(self, data, *args, **kwargs):
super(Node, self).__init__(data, *args, **kwargs)

# https://kubernetes.io/docs/concepts/architecture/nodes/#manual-node-administration
self.unschedulable = data["spec"].get("unschedulable", False)

def _condition_severity(self, _type, status):
if _type == "Ready" and status != "True":
return Severity.CRITICAL
elif _type != "Ready" and status == "True":
return Severity.WARNING
def _get_status(self, cnd_type, cnd_status):
if self.unschedulable:
return NaemonStatus(
NaemonState.WARNING,
self.perf.UNSCHEDULABLE,
"Node {} is ready, but unschedulable".format(self.meta["name"])
)
elif cnd_type == "Ready":
if cnd_status == "True":
return NaemonStatus(NaemonState.OK, self.perf.AVAILABLE)
else:
return NaemonStatus(NaemonState.CRITICAL, self.perf.UNAVAILABLE)
elif cnd_type != "Ready" and cnd_status == "True":
return NaemonStatus(NaemonState.WARNING, self.perf.DEGRADED)
18 changes: 2 additions & 16 deletions k8s/components/pod/check.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from k8s.exceptions import NagiosCritical, NagiosWarning
from k8s.result import Result

from .resource import Pod
from .consts import Phase


def check_pods(items):
Expand All @@ -15,17 +14,4 @@ def check_pods(items):
:return: Pods health summary
"""

for item in items:
pod = Pod(item)

if pod.phase == Phase.pending:
raise NagiosWarning("{kind} {name} is {0}".format(pod.phase.value, **pod.meta))
elif pod.phase != Phase.running and pod.phase != Phase.succeeded:
raise NagiosCritical("Unexpected Phase for {kind} {name}: {0}".format(pod.phase.value, **pod.meta))

if pod.alerts_critical:
raise NagiosCritical(pod.alerts_critical[0])
elif pod.alerts_warning:
raise NagiosWarning(pod.alerts_warning[0])

return "Found {} healthy Pods".format(len(items))
return Result(Pod, items)
2 changes: 1 addition & 1 deletion k8s/components/pod/consts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from enum import Enum

CONDITIONS_HEALTHY = ["Ready", "Initialized", "PodScheduled", "ContainersReady"]
STATUSES = ["Ready", "Initialized", "PodScheduled", "ContainersReady"]


class ContainerState(Enum):
Expand Down
45 changes: 31 additions & 14 deletions k8s/components/pod/resource.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from k8s.resource import Resource
from k8s.consts import Severity
from enum import Enum

from .consts import ContainerState, Phase, CONDITIONS_HEALTHY
from k8s.consts import NaemonState

from ..resource import Resource, NaemonStatus

from .consts import ContainerState, Phase, STATUSES


class Container:
Expand All @@ -12,21 +15,35 @@ def __init__(self, data):
# Container State is a single-item dict, with a nested dict value.
# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.15/#containerstate-v1-core
state = list(data["state"].keys())

# Ensure state is known
self.state = ContainerState(state[0])


class Pod(Resource):
def __init__(self, data):
super(Pod, self).__init__(data)
class PerfMap(Enum):
AVAILABLE = "available"
UNAVAILABLE = "unavailable"
DEGRADED = "degraded"
PENDING = "pending"

self.containers = [Container(c) for c in self._status["containerStatuses"]]
self.phase = Phase(self._status["phase"])
def __init__(self, data, *args, **kwargs):
super(Pod, self).__init__(data, *args, **kwargs)

def _condition_severity(self, _type, status):
if _type in CONDITIONS_HEALTHY and status != "True":
return Severity.CRITICAL
elif _type not in CONDITIONS_HEALTHY and status == "True":
return Severity.WARNING
self.containers = [Container(c) for c in self._status.get("containerStatuses", [])]
self.phase = Phase(self._status["phase"])

def _get_status(self, cnd_type, cnd_status):
if self.phase != Phase.running and self.phase != Phase.succeeded:
return NaemonStatus(
NaemonState.CRITICAL,
self.perf.UNAVAILABLE,
"Unexpected Phase for {kind} {name}: {0}".format(self.phase.value, **self.meta)
)
elif cnd_type in STATUSES:
if cnd_status == "True":
return NaemonStatus(NaemonState.OK, self.perf.AVAILABLE)
else:
return NaemonStatus(NaemonState.CRITICAL, self.perf.UNAVAILABLE)
elif cnd_type not in STATUSES and cnd_status == "True":
return NaemonStatus(NaemonState.WARNING, self.perf.DEGRADED)
elif self.phase == Phase.pending:
return NaemonStatus(NaemonState.WARNING, self.perf.PENDING)
Loading

0 comments on commit f0aee9f

Please sign in to comment.