Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make persistence work + experimental Systemd process manager. #31

Merged
merged 8 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- When using the "Systemd" process manager, the unit name of the service process.
ALTER TABLE ServerInstance ADD COLUMN PersistedSystemdUnit TEXT;

99 changes: 94 additions & 5 deletions SS14.Watchdog/Components/ProcessManagement/IProcessManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace SS14.Watchdog.Components.ProcessManagement;
/// Responsible for managing game server processes: start, stop, persistence.
/// </summary>
/// <seealso cref="IProcessHandle"/>
/// <seealso cref="ProcessOptions"/>
public interface IProcessManager
{
bool CanPersist { get; }
Expand All @@ -29,6 +30,7 @@ Task<IProcessHandle> StartServer(
/// </summary>
/// <param name="Program">The program to run to launch the game server. Full path.</param>
/// <param name="WorkingDirectory">The working directory of the launched process.</param>
/// <seealso cref="IProcessHandle"/>
public sealed record ProcessStartData(
string Program,
string WorkingDirectory,
Expand All @@ -41,12 +43,99 @@ public sealed record ProcessStartData(
/// </summary>
public interface IProcessHandle
{
bool HasExited { get; }
int ExitCode { get; }

void DumpProcess(string file, DumpType type);

Task WaitForExitAsync(CancellationToken cancel = default);

void Kill();
}
Task Kill();

Task<ProcessExitStatus?> GetExitStatusAsync();
}

/// <summary>
/// Status for how a process has exited.
/// </summary>
/// <param name="Reason">The reason why the process exited. Check the enum for possible values.</param>
/// <param name="Status">
/// Reason-specific value.
/// For <see cref="ProcessExitReason.ExitCode"/> this is the exit code.
/// For <see cref="ProcessExitReason.Signal"/> and <see cref="ProcessExitReason.CoreDumped"/> this is the signal that killed the process.
/// </param>
/// <seealso cref="IProcessHandle"/>
public sealed record ProcessExitStatus(ProcessExitReason Reason, int Status)
{
public ProcessExitStatus(ProcessExitReason reason) : this(reason, 0)
{
}

public bool IsClean => Reason == ProcessExitReason.ReasonUnavailable
|| Reason == ProcessExitReason.ExitCode && Status == 0
|| Reason == ProcessExitReason.Success;
}

/// <summary>
/// Reason values for <see cref="ProcessExitStatus"/>.
/// </summary>
public enum ProcessExitReason
{
// These somewhat correspond to systemd's values for "Result" on a Service, kinda.
// https://www.freedesktop.org/software/systemd/man/org.freedesktop.systemd1.html#Properties2

/// <summary>
/// Exit reason could not be determined.
/// </summary>
/// <remarks>
/// <para>
/// This happens on POSIX with the "basic" process manager after restarting the watchdog,
/// as it is not possible to get the exit status of persisted processes.
/// </para>
/// </remarks>
ReasonUnavailable,

/// <summary>
/// Process exited "successfully" according to systemd.
/// </summary>
/// <remarks>
/// This probably means exit code 0, but I want to distinguish them as technically they're not equal.
/// </remarks>
Success,

/// <summary>
/// Process exited recorded exit code.
/// </summary>
ExitCode,

/// <summary>
/// Process was killed by uncaught signal.
/// </summary>
/// <remarks>
/// This won't apply if the process is killed with SIGTERM,
/// as the game handles that and manually returns exit code signum + 128.
/// </remarks>
Signal,

/// <summary>
/// Process crashed and dumped core.
/// </summary>
CoreDump,

/// <summary>
/// Systemd operation failed.
/// </summary>
SystemdFailed,

/// <summary>
/// Timeout executing service operation.
/// </summary>
Timeout,

/// <summary>
/// Process was killed by the Linux OOM killer.
/// </summary>
OomKill,

/// <summary>
/// Catch-all for other unhandled status codes.
/// </summary>
Other,
}
28 changes: 22 additions & 6 deletions SS14.Watchdog/Components/ProcessManagement/ProcessManagerBasic.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,13 @@ private void PersistPid(IServerInstance instance, Process process)

_logger.LogDebug("Process looks good, guess we're using this!");

return Task.FromResult<IProcessHandle?>(new Handle(process));
return Task.FromResult<IProcessHandle?>(new Handle(process) { IsRecovered = true });
}

private sealed class Handle : IProcessHandle
{
private readonly Process _process;

public bool HasExited => _process.HasExited;
public int ExitCode => _process.ExitCode;
public bool IsRecovered;

public Handle(Process process)
{
Expand All @@ -187,9 +185,27 @@ public async Task WaitForExitAsync(CancellationToken cancel = default)
await _process.WaitForExitAsync(cancel);
}

public void Kill()
public Task<ProcessExitStatus?> GetExitStatusAsync()
{
if (!_process.HasExited)
return Task.FromResult<ProcessExitStatus?>(null);

// POSIX makes it impossible to fetch the exit code for processes that aren't our immediate children.
// This means we cannot tell what the exit code is if the process
// was started by a previous watchdog instance, and we "recovered" it from persistence.
// Windows does not have this issue. Microsoft wins again.
var processExitStatus = !OperatingSystem.IsWindows() && IsRecovered
? new ProcessExitStatus(ProcessExitReason.ReasonUnavailable)
: new ProcessExitStatus(ProcessExitReason.ExitCode, _process.ExitCode);

return Task.FromResult<ProcessExitStatus?>(processExitStatus);
}

public Task Kill()
{
_process.Kill(entireProcessTree: true);

return Task.CompletedTask;
}
}
}
}
Loading
Loading