Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a delay between killing teamd processes #3325

Merged
merged 24 commits into from
Jan 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e5683d5
Add a delay between killing teamd processes
saiarcot895 Oct 13, 2024
f4fd3ab
Update LAG removal code to use the same logic as cleaning up all LAGs
saiarcot895 Oct 22, 2024
7b6fc53
Update tests to test LAG cleanup and to test with the new code
saiarcot895 Oct 22, 2024
27f6d3c
Merge remote-tracking branch 'origin/master' into teamd-delay-kill
saiarcot895 Oct 22, 2024
bdd47c7
Merge remote-tracking branch 'origin/master' into teamd-delay-kill
saiarcot895 Oct 22, 2024
c5d84cf
Add more tests to cover more cases
saiarcot895 Oct 23, 2024
1dd20a0
Merge branch 'master' into teamd-delay-kill
dgsudharsan Oct 28, 2024
8f71480
Merge branch 'master' into teamd-delay-kill
dgsudharsan Nov 4, 2024
f39d60f
Merge branch 'master' into teamd-delay-kill
dgsudharsan Nov 11, 2024
e7ce08d
Wait 200ms instead of 100ms, and fix teamd wait code
saiarcot895 Nov 13, 2024
ce1fdae
Merge remote-tracking branch 'refs/remotes/personal/teamd-delay-kill'…
saiarcot895 Nov 13, 2024
726f800
Merge remote-tracking branch 'origin/master' into teamd-delay-kill
saiarcot895 Nov 13, 2024
d2ccbb3
Merge remote-tracking branch 'origin/master' into teamd-delay-kill
saiarcot895 Nov 25, 2024
181bdb9
Merge branch 'master' into teamd-delay-kill
saiarcot895 Nov 26, 2024
4d99bf4
Merge branch 'master' into teamd-delay-kill
saiarcot895 Dec 2, 2024
6472d33
Merge branch 'master' into teamd-delay-kill
dgsudharsan Dec 3, 2024
fff96a3
Merge branch 'master' into teamd-delay-kill
saiarcot895 Dec 10, 2024
cd38a76
Merge branch 'master' into teamd-delay-kill
judyjoseph Dec 15, 2024
9f6ab64
Try 10ms sleep
saiarcot895 Jan 3, 2025
9aaf399
Merge remote-tracking branch 'origin/master' into teamd-delay-kill
saiarcot895 Jan 6, 2025
702ffda
Fix code style
saiarcot895 Jan 8, 2025
a4a5d83
Merge remote-tracking branch 'origin/master' into teamd-delay-kill
saiarcot895 Jan 8, 2025
e4b838b
Some more coding style fixes
saiarcot895 Jan 10, 2025
3adfe66
Merge branch 'master' into teamd-delay-kill
saiarcot895 Jan 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 39 additions & 61 deletions cfgmgr/teammgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <signal.h>


Expand Down Expand Up @@ -171,18 +173,29 @@ void TeamMgr::cleanTeamProcesses()
SWSS_LOG_ENTER();
SWSS_LOG_NOTICE("Cleaning up LAGs during shutdown...");

std::unordered_map<std::string, pid_t> aliasPidMap;
std::unordered_map<std::string, int> aliasPidMap;

for (const auto& alias: m_lagList)
{
std::string res;
pid_t pid;
// Sleep for 10 milliseconds so as to not overwhelm the netlink
// socket buffers with events about interfaces going down
std::this_thread::sleep_for(std::chrono::milliseconds(10));

try
{
std::stringstream cmd;
cmd << "cat " << shellquote("/var/run/teamd/" + alias + ".pid");
EXEC_WITH_ERROR_THROW(cmd.str(), res);
ifstream pidFile("/var/run/teamd/" + alias + ".pid");
if (pidFile.is_open())
{
pidFile >> pid;
aliasPidMap[alias] = pid;
SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
}
else
{
SWSS_LOG_NOTICE("Unable to read pid file for %s, skipping...", alias.c_str());
continue;
}
}
catch (const std::exception &e)
{
Expand All @@ -191,46 +204,28 @@ void TeamMgr::cleanTeamProcesses()
continue;
}

try
{
pid = static_cast<pid_t>(std::stoul(res, nullptr, 10));
aliasPidMap[alias] = pid;

SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
}
catch (const std::exception &e)
if (kill(pid, SIGTERM))
{
SWSS_LOG_ERROR("Failed to read port channel %s pid: %s", alias.c_str(), e.what());
continue;
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, strerror(errno));
aliasPidMap.erase(alias);
}

try
else
{
std::stringstream cmd;
cmd << "kill -TERM " << pid;
EXEC_WITH_ERROR_THROW(cmd.str(), res);

SWSS_LOG_NOTICE("Sent SIGTERM to port channel %s pid %d", alias.c_str(), pid);
}
catch (const std::exception &e)
{
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, e.what());
aliasPidMap.erase(alias);
}
}

for (const auto& cit: aliasPidMap)
{
const auto &alias = cit.first;
const auto &pid = cit.second;

std::stringstream cmd;
std::string res;

SWSS_LOG_NOTICE("Waiting for port channel %s pid %d to stop...", alias.c_str(), pid);

cmd << "tail -f --pid=" << pid << " /dev/null";
EXEC_WITH_ERROR_THROW(cmd.str(), res);
while (!kill(pid, 0))
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}

SWSS_LOG_NOTICE("LAGs cleanup is done");
Expand Down Expand Up @@ -658,42 +653,25 @@ bool TeamMgr::removeLag(const string &alias)
{
SWSS_LOG_ENTER();

stringstream cmd;
string res;
pid_t pid;

try
{
std::stringstream cmd;
cmd << "cat " << shellquote("/var/run/teamd/" + alias + ".pid");
EXEC_WITH_ERROR_THROW(cmd.str(), res);
}
catch (const std::exception &e)
{
SWSS_LOG_NOTICE("Failed to remove non-existent port channel %s pid...", alias.c_str());
return false;
}

try
{
pid = static_cast<pid_t>(std::stoul(res, nullptr, 10));
SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
}
catch (const std::exception &e)
{
SWSS_LOG_ERROR("Failed to read port channel %s pid: %s", alias.c_str(), e.what());
return false;
ifstream pidfile("/var/run/teamd/" + alias + ".pid");
if (pidfile.is_open())
{
pidfile >> pid;
SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
}
else
{
SWSS_LOG_NOTICE("Failed to remove non-existent port channel %s pid...", alias.c_str());
return false;
}
}

try
{
std::stringstream cmd;
cmd << "kill -TERM " << pid;
EXEC_WITH_ERROR_THROW(cmd.str(), res);
}
catch (const std::exception &e)
if (kill(pid, SIGTERM))
{
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, e.what());
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, strerror(errno));
return false;
}

Expand Down
4 changes: 2 additions & 2 deletions tests/mock_tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@ tests_teammgrd_SOURCES = teammgrd/teammgr_ut.cpp \
tests_teammgrd_INCLUDES = $(tests_INCLUDES) -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/lib
tests_teammgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI)
tests_teammgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI) $(tests_teammgrd_INCLUDES)
tests_teammgrd_LDADD = $(LDADD_GTEST) $(LDADD_SAI) -lnl-genl-3 -lhiredis -lhiredis \
-lswsscommon -lswsscommon -lgtest -lgtest_main -lzmq -lnl-3 -lnl-route-3 -lpthread -lgmock -lgmock_main
tests_teammgrd_LDADD = $(LDADD_GTEST) $(LDADD_SAI) -ldl -lhiredis \
-lswsscommon -lgtest -lgtest_main -lzmq -lpthread -lgmock -lgmock_main

## fpmsyncd unit tests

Expand Down
Loading
Loading