diff --git a/CHANGELOG.md b/CHANGELOG.md index f54b02e..fb90803 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ * Require two consecutive ping failures before restarting an actor. +### 🔧 Fixed + +* Added a timeout to the `lvmbeat set overwatcher` command to prevent it from hanging indefinitely. + ## 1.4.0 - January 1, 2025 diff --git a/src/gort/overwatcher/health.py b/src/gort/overwatcher/health.py index 7c1594c..1f688a8 100644 --- a/src/gort/overwatcher/health.py +++ b/src/gort/overwatcher/health.py @@ -15,6 +15,8 @@ from lvmopstools.retrier import Retrier from lvmopstools.utils import Trigger, with_timeout +from clu.tools import CommandStatus + from gort.overwatcher.core import OverwatcherModule, OverwatcherModuleTask from gort.overwatcher.helpers import get_actor_ping, restart_actors from gort.tools import decap @@ -40,8 +42,15 @@ async def task(self): while True: try: - cmd = await self.gort.send_command("lvmbeat", "set overwatcher") + cmd = await self.gort.send_command( + "lvmbeat", + "set overwatcher", + time_limit=5, + ) + if cmd.status.did_fail: + if cmd.status & CommandStatus.TIMEDOUT: + raise RuntimeError("Timed out setting overwatcher heartbeat.") raise RuntimeError("Failed to set overwatcher heartbeat.") except Exception as err: