Skip to content

Commit

Permalink
simplify
Browse files Browse the repository at this point in the history
  • Loading branch information
stavros-k committed Apr 29, 2024
1 parent 37350fd commit 622f37e
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 165 deletions.
163 changes: 80 additions & 83 deletions tools/docker-health-check/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,110 +88,109 @@ func main() {
func normalizeContainerName(n []string) string {
return strings.TrimLeft(n[0], "/")
}
func checkContainer(c types.Container, checksCh chan utils.Result) {

func checkContainerWithHealthCheck(c types.Container) (utils.Result, error) {
start := time.Now()
var res utils.Result
res.Name = normalizeContainerName(c.Names)

res.HasCheck, _ = utils.HasHealthCheck(c.ID)
if !res.HasCheck { // If there is no HC, and container exited with 0, mark it healthy
fmt.Printf("[WARN] Container [%s] has no health check\n", res.Name)
exited, _ := utils.IsExited(c.ID)
exitCode, _ := utils.GetExitCode(c.ID)
if exited {
if exitCode == 0 {
fmt.Printf("Container [%s] has exited with a zero exit code\n", res.Name)
handleHealthy(&c, &res)
checksCh <- res
return
} else {
handleNonZeroExitCode(exitCode, &c, &res)
checksCh <- res
return
}
}

result := utils.Result{
Name: normalizeContainerName(c.Names),
HasCheck: true,
}

running, _ := utils.IsRunning(c.ID)
if !running {
exitCode, _ := utils.GetExitCode(c.ID)
if res.ExitCode != 0 {
handleNonZeroExitCode(exitCode, &c, &res)
checksCh <- res
return
} else if !res.HasCheck {
fmt.Printf("Container [%s] is not running and has no health check\n", res.Name)
handleHealthy(&c, &res)
checksCh <- res
return
var err error
for {
result.InspectData, err = utils.GetInspectData(c.ID)
if err != nil {
return result, err
}
}

// If its running, and does not have a health check
// assume it is healthy and stop checking
health, _ := utils.GetHealth(c.ID)
if !res.HasCheck {
count := 10
// There are cases where health is empty initially, so check a few times
for health == "" && count > 0 {
fmt.Printf("Container [%s] has an empty health state\n", res.Name)
health, _ = utils.GetHealth(c.ID)
time.Sleep(2 * time.Second)
count--
result.ExitCode = utils.GetExitCode(result.InspectData)
if utils.IsHealthy(result.InspectData) {
handleHealthy(&c, &result)
return result, nil
}
if health == "running" {
res.Healthy = true
res.Logs, _ = utils.GetLogs(c.ID)
checksCh <- res
return
} else if health == "" {
// Ignore this case for now
fmt.Printf("Container [%s] has no health check and has an empty health state, Doing nothing\n", res.Name)
} else {
// Log any other states so we can see how to handle them
fmt.Printf("Container [%s] has a health state of [%s]\n", res.Name, health)
res.Healthy = false
res.ExitCode, _ = utils.GetExitCode(c.ID)
res.Logs, _ = utils.GetLogs(c.ID)
res.InspectData, _ = utils.GetInspectData(c.ID)
checksCh <- res
return

if time.Since(start) > timeout {
handleTimeout(&c, &result)
return result, nil
}

// Wait 2 seconds to avoid spamming
time.Sleep(2 * time.Second)
}
}

// If its running and has a health check, keep checking
// until it is healthy or timeout is reached
func checkContainerWithNoHealthCheck(c types.Container) (utils.Result, error) {
start := time.Now()

result := utils.Result{
Name: normalizeContainerName(c.Names),
HasCheck: false,
}

var err error
for {
health, _ = utils.GetHealth(c.ID)
result.InspectData, err = utils.GetInspectData(c.ID)
if err != nil {
return result, err
}

result.ExitCode = utils.GetExitCode(result.InspectData)
if utils.IsExited(result.InspectData) {
if utils.IsZeroExitCode(result.InspectData) {
handleHealthy(&c, &result)
return result, nil
}

handleNonZeroExitCode(&c, &result)

return result, nil
}

// If its healthy, stop checking
if health == "healthy" {
handleHealthy(&c, &res)
checksCh <- res
return
// TODO: For a container without health check that has no exited
// we cannot really be sure if its a stuck init container or
// just a container that should run for ever but missing a health check
// For now we assume it is healthy
if utils.IsRunning(result.InspectData) {
result.Healthy = true
result.Logs, _ = utils.GetLogs(c.ID)
return result, nil
}

// Stop after the timeout is reached
if time.Since(start) > timeout {
handleTimeout(&c, &res)
checksCh <- res
return
result.Healthy = false
result.Logs, _ = utils.GetLogs(c.ID)
return result, nil
}

// Sleep for 2 seconds to avoid spamming the API
// Wait 2 seconds to avoid spamming
time.Sleep(2 * time.Second)
}
}

func handleNonZeroExitCode(exitCode int, c *types.Container, res *utils.Result) {
fmt.Printf("Container [%s] has a non-zero exit code, will be marked unhealthy\n", c.ID)
func checkContainer(c types.Container, checksCh chan utils.Result) {
var result utils.Result

container, _ := utils.GetInspectData(c.ID)
hasCheck := utils.HasHealthCheck(container)

if hasCheck {
result, _ = checkContainerWithHealthCheck(c)
} else {
fmt.Printf("[WARN] Container [%s] has no health check\n", normalizeContainerName(c.Names))
result, _ = checkContainerWithNoHealthCheck(c)
}

checksCh <- result
}

func handleNonZeroExitCode(c *types.Container, res *utils.Result) {
fmt.Printf("Container [%s] has a non-zero [%d] exit code, will be marked unhealthy\n", c.ID, res.ExitCode)
res.Healthy = false
res.Logs, _ = utils.GetLogs(c.ID)
res.InspectData, _ = utils.GetInspectData(c.ID)
res.Fatal = true
res.ExitCode = exitCode
if res.HasCheck {
res.ProbeLogs, _ = utils.GetFailedProbeLogs(c.ID)
res.ProbeLogs, _ = utils.GetFailedProbeLogs(res.InspectData)
}
}

Expand All @@ -200,13 +199,11 @@ func handleTimeout(c *types.Container, res *utils.Result) {
res.Healthy = false
res.TimedOut = true
res.Logs, _ = utils.GetLogs(c.ID)
res.ProbeLogs, _ = utils.GetFailedProbeLogs(c.ID)
res.InspectData, _ = utils.GetInspectData(c.ID)
res.ProbeLogs, _ = utils.GetFailedProbeLogs(res.InspectData)
}

func handleHealthy(c *types.Container, res *utils.Result) {
fmt.Printf("Container [%s] is marked healthy\n", c.ID)
res.Healthy = true
res.Logs, _ = utils.GetLogs(c.ID)
res.InspectData, _ = utils.GetInspectData(c.ID)
}
43 changes: 43 additions & 0 deletions tools/docker-health-check/pkg/utils/container.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package utils

import (
"strings"

"github.com/docker/docker/api/types"
)

func IsHealthy(c types.ContainerJSON) bool {
return c.State.Health.Status == "healthy"
}

func IsExited(c types.ContainerJSON) bool {
return c.State.Status == "exited"
}

func IsZeroExitCode(c types.ContainerJSON) bool {
return c.State.ExitCode == 0
}

func GetExitCode(c types.ContainerJSON) int {
return c.State.ExitCode
}

func IsRunning(c types.ContainerJSON) bool {
return c.State.Status == "running"
}

func HasHealthCheck(c types.ContainerJSON) bool {
return c.Config.Healthcheck != nil && len(c.Config.Healthcheck.Test) > 0
}

func GetFailedProbeLogs(c types.ContainerJSON) (string, error) {
var buf strings.Builder
for _, log := range c.State.Health.Log {
if log.ExitCode != 0 && log.Output != "" {
buf.WriteString(log.Output)
buf.WriteString("\n")
}
}

return buf.String(), nil
}
82 changes: 0 additions & 82 deletions tools/docker-health-check/pkg/utils/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"io"
"log"
"strings"

"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
Expand Down Expand Up @@ -35,22 +34,6 @@ func GetInspectData(cID string) (types.ContainerJSON, error) {
return container, nil
}

// HasHealthCheck checks if the container has a health check
func HasHealthCheck(cID string) (bool, error) {
container, err := GetInspectData(cID)
if err != nil {
return false, fmt.Errorf("failed to check if container has a health check: %w", err)
}
if container.Config.Healthcheck == nil {
return false, nil
}
if len(container.Config.Healthcheck.Test) > 0 {
return true, nil
}

return false, nil
}

// GetLogs returns the logs of the container
func GetLogs(cID string) (string, error) {
body, err := apiClient.ContainerLogs(context.Background(), cID,
Expand All @@ -70,71 +53,6 @@ func GetLogs(cID string) (string, error) {
return string(out), err
}

// GetExitCode returns the exit code of the container
func GetExitCode(cID string) (int, error) {
container, err := GetInspectData(cID)
if err != nil {
return 0, fmt.Errorf("failed to get exit code: %w", err)
}

return container.State.ExitCode, nil
}

// GetFailedProbeLogs returns the logs of the failed probes
func GetFailedProbeLogs(cID string) (string, error) {
container, err := GetInspectData(cID)
if err != nil {
return "", fmt.Errorf("failed to get failed probe logs: %w", err)
}

var buf strings.Builder
for _, log := range container.State.Health.Log {
if log.ExitCode != 0 && log.Output != "" {
buf.WriteString(log.Output)
buf.WriteString("\n")
}
}

return buf.String(), nil
}

// IsRunning returns true if the container is in the "running" state
func IsRunning(cID string) (bool, error) {
state, err := GetState(cID)
if err != nil {
return false, fmt.Errorf("failed to get running state: %w", err)
}
return state == "running", nil
}

func IsExited(cID string) (bool, error) {
state, err := GetState(cID)
if err != nil {
return false, fmt.Errorf("failed to get exited status: %w", err)
}
return state == "exited", nil
}

func GetState(cID string) (string, error) {
container, err := GetInspectData(cID)
if err != nil {
return "", fmt.Errorf("failed to get state of container: %w", err)
}
return container.State.Status, nil
}

// GetHealth returns the health status of the container
func GetHealth(cID string) (string, error) {
container, err := GetInspectData(cID)
if err != nil {
return "", fmt.Errorf("failed to get health status of container: %w", err)
}
if container.State.Health == nil {
return "", nil
}
return container.State.Health.Status, nil
}

// GetContainersFromProject returns the containers from the compose project based on the label
func GetContainersFromProject(composeName string) ([]types.Container, error) {
containers, err := apiClient.ContainerList(
Expand Down

0 comments on commit 622f37e

Please sign in to comment.