Skip to content

Commit

Permalink
Merge pull request os-autoinst#2469 from mdoucha/ipmi_sol_err
Browse files Browse the repository at this point in the history
Automatically reconnect graphical IPMI SOL console on error
  • Loading branch information
mergify[bot] authored Mar 7, 2024
2 parents c0c1cf8 + 444a040 commit be01b33
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 12 deletions.
9 changes: 6 additions & 3 deletions consoles/localXvnc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ sub callxterm ($self, $command, $window_name) {
die('Missing "xterm"') unless which('xterm');
if ($self->{args}->{log}) {
mkpath 'ulogs';
$command = "script -f ulogs/hardware-console-log.txt -c \"$command\"";
$command = "script -af ulogs/hardware-console-log.txt -c \"$command\"";
}
eval { system("DISPLAY=$display $xterm_vt_cmd -title $window_name -e bash -c '$command' & echo \"xterm PID is \$!\""); };
die "cant' start xterm on $display (err: $! retval: $?)" if $@;
my $pid = fork();
exec("DISPLAY=$display $xterm_vt_cmd -title $window_name -e bash -c '$command'") # uncoverable statement
unless $pid;
bmwqemu::diag("Xterm PID: $pid");
return $pid;
}

sub fullscreen ($self, $args) {
Expand Down
30 changes: 25 additions & 5 deletions consoles/sshXtermIPMI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@ use Mojo::Base 'consoles::localXvnc', -signatures;
use autodie ':all';
require IPC::System::Simple;
use File::Which;
use Time::HiRes qw(usleep);
use POSIX qw(waitpid WNOHANG);

sub activate ($self) {
# start Xvnc
$self->SUPER::activate;

sub start_sol ($self) {
my $testapi_console = $self->{testapi_console};

my @command = $self->backend->ipmi_cmdline;
Expand All @@ -29,7 +28,14 @@ sub activate ($self) {
($ipmi_response =~ /SOL payload already de-activated/);
}

$self->callxterm($cstr, "ipmitool:$testapi_console");
$self->{xterm_pid} = $self->callxterm($cstr, "ipmitool:$testapi_console");
}

sub activate ($self) {
# start Xvnc
$self->SUPER::activate;
$self->start_sol;
$self->{reconnects} = 0;
}

sub reset ($self) {
Expand All @@ -55,4 +61,18 @@ sub do_mc_reset ($self) {
return;
}

sub current_screen ($self) {
my $retry = 0;
my $max_errs = $bmwqemu::vars{IPMI_SOL_MAX_RECONNECTS} // 5;

while (1) {
my $ret = $self->SUPER::current_screen;
return $ret if waitpid($self->{xterm_pid}, WNOHANG) == 0;
die 'Too many IPMI SOL errors' if ++$self->{reconnects} > $max_errs;
bmwqemu::fctwarn("IPMI SOL connection died, reconnect $self->{reconnects} / $max_errs");
usleep(500_000 * $retry++); # sleep between retries
$self->start_sol;
}
}

1;
1 change: 1 addition & 0 deletions doc/backend_vars.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ IPMI_MC_RESET_TIMEOUT;integer;60;Counts to try to reach IPMI interface after mc
IPMI_MC_RESET_PING_COUNT;integer;1;Ping counts that must be successful after mc reset
IPMI_MC_RESET_IPMI_TRIES;integer;3;Maximum number of IPMI command tries that are conducted after mc reset
IPMI_SOL_PERSISTENT_CONSOLE;boolean;1;Make SOL console persistent and don't reset it, enabled by default
IPMI_SOL_MAX_RECONNECTS;integer;5;Maximum number of SOL reconnects on connection failure
IPMI_$_;;;Internal iterator variable
WORKER_HOSTNAME;string;undef;Worker hostname
|====================
Expand Down
5 changes: 3 additions & 2 deletions t/27-consoles-local_xvnc.t
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ chdir $dir;
my $cleanup = scope_guard sub { chdir $Bin; undef $dir };

BEGIN { *consoles::localXvnc::system = sub { 1 } }
BEGIN { *consoles::localXvnc::exec = sub { _exit(0) } }
BEGIN { *CORE::GLOBAL::sleep = sub { 1 } }

# mock external tool for testing
Expand All @@ -42,11 +43,11 @@ my $local_xvnc_mock = Test::MockModule->new('consoles::localXvnc');
# uncoverable statement count:2
$local_xvnc_mock->redefine(start_xvnc => sub { _exit(0) });
stderr_like { $c->activate } qr/Connected to Xvnc/, 'can call activate';
is $c->callxterm('true', 'window1'), '', 'can call callxterm';
ok $c->callxterm('true', 'window1'), 'can call callxterm';
$vnc_mock->called_pos_ok(0, 'check_vnc_stalls', 'VNC stall detection configured');
$vnc_mock->called_args_pos_is(0, 2, 0, 'VNC stall detection disabled');
$c->{args}->{log} = 1;
is $c->callxterm('true', 'window1'), '', 'can call callxterm';
ok $c->callxterm('true', 'window1'), 'can call callxterm';
is $c->fullscreen({window_name => 'foo'}), 1, 'can call fullscreen';
is $c->disable, undef, 'can call disable';

Expand Down
35 changes: 33 additions & 2 deletions t/29-backend-ipmi.t
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,17 @@ use Test::MockModule;
use Test::MockObject;
use Test::Output qw(combined_like stderr_like);
use Test::Warnings qw(:all :report_warnings);
use POSIX qw(waitpid _exit);

BEGIN { *backend::ipmi::system = sub { 1 } }
BEGIN { *consoles::localXvnc::system = sub { "@_" =~ /hardware-console-log/ ? 1 : 0 } }
BEGIN { *consoles::localXvnc::system = sub { 0 } }
BEGIN { *consoles::localXvnc::exec = sub { _exit("@_" =~ /hardware-console-log/ ? 1 : 0); } }

use backend::ipmi; # SUT

$bmwqemu::vars{WORKER_HOSTNAME} = 'localhost';
$bmwqemu::vars{"HARDWARE_CONSOLE_LOG"} = 1;
$bmwqemu::vars{IPMI_SOL_MAX_RECONNECTS} = 1;
ok my $backend = backend::ipmi->new(), 'backend can be created';
$bmwqemu::vars{"IPMI_$_"} = "fake_$_" foreach qw(HOSTNAME USER PASSWORD);
my @ipmi_cmdline = $backend->ipmi_cmdline;
Expand Down Expand Up @@ -50,7 +53,9 @@ ok $backend->get_mc_status, 'can call get_mc_status';

is $testapi::distri->{consoles}->{sol}->{args}->{log}, '1';
$testapi::distri->{consoles}->{sol}->{DISPLAY} = "display";
ok !$testapi::distri->{consoles}->{sol}->callxterm('ipmi', "console"), "can create console with log enabled";
my $pid = $testapi::distri->{consoles}->{sol}->callxterm('ipmi', "console");
is waitpid($pid, 0), $pid, 'can start xterm subprocess';
is $?, 0x100, "can create console with log enabled";

subtest 'cold reset' => sub {
# reduce retries for testing
Expand All @@ -72,6 +77,32 @@ subtest 'dell sleep' => sub {
is time, $start + 4, 'slept 4 seconds';
};

subtest 'sol reconnect' => sub {
my $localXvnc_mock = Test::MockModule->new('consoles::localXvnc');
my $sol_mock = Test::MockModule->new('consoles::sshXtermIPMI');
my $screen_calls = 0;

$localXvnc_mock->noop('activate');
$localXvnc_mock->redefine(current_screen => sub {
$screen_calls++;
return 'image data';
});
$sol_mock->redefine(waitpid => -1);
$testapi::distri->{consoles}->{sol}->activate;

# Pretend the subprocess is dead, screen read should fail after
# 1 reconnect attempt
throws_ok { $testapi::distri->{consoles}->{sol}->current_screen; } qr/Too many IPMI SOL errors/, 'dies on reconnect failure';
is $screen_calls, 2, 'SOL reconnect count is correct';

# Pretend the subprocess is still running and check that screen read
# returns the correct data
$screen_calls = 0;
$sol_mock->redefine(waitpid => 0);
is $testapi::distri->{consoles}->{sol}->current_screen, 'image data', 'can read screen buffer';
is $screen_calls, 1, 'screen buffer read without reconnect';
};

done_testing;

END {
Expand Down

0 comments on commit be01b33

Please sign in to comment.