Skip to content

Commit

Permalink
host-device: use temp network namespace for rename
Browse files Browse the repository at this point in the history
Using a temporary name / doing a fast rename causes
some race conditions with udev and NetworkManager:
https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/1599

Signed-off-by: Etienne Champetier <[email protected]>
  • Loading branch information
champtar committed Aug 16, 2024
1 parent 96a9f56 commit 49e807e
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 129 deletions.
53 changes: 52 additions & 1 deletion pkg/ns/ns_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ func GetCurrentNS() (NetNS, error) {
// return an unexpected network namespace.
runtime.LockOSThread()
defer runtime.UnlockOSThread()
return getCurrentNSNoLock()
}

func getCurrentNSNoLock() (NetNS, error) {
return GetNS(getCurrentThreadNetNSPath())
}

Expand Down Expand Up @@ -152,6 +156,53 @@ func GetNS(nspath string) (NetNS, error) {
return &netNS{file: fd}, nil
}

// Returns a new empty NetNS.
func TempNetNS() (NetNS, error) {
var tempNS NetNS
var err error
var wg sync.WaitGroup
wg.Add(1)

// Create the new namespace in a new goroutine so that if we later fail
// to switch the namespace back to the original one, we can safely
// leave the thread locked to die without a risk of the current thread
// left lingering with incorrect namespace.
go func() {
defer wg.Done()
runtime.LockOSThread()

var threadNS NetNS
// save a handle to current network namespace
threadNS, err = getCurrentNSNoLock()
if err != nil {
err = fmt.Errorf("failed to open current namespace: %v", err)
return
}
defer threadNS.Close()

// create the temporary network namespace
err = unix.Unshare(unix.CLONE_NEWNET)
if err != nil {
return
}

// get a handle to the temporary network namespace
tempNS, err = getCurrentNSNoLock()

err2 := threadNS.Set()
if err2 == nil {
// Unlock the current thread only when we successfully switched back
// to the original namespace; otherwise leave the thread locked which
// will force the runtime to scrap the current thread, that is maybe
// not as optimal but at least always safe to do.
runtime.UnlockOSThread()
}
}()

wg.Wait()
return tempNS, err
}

func (ns *netNS) Path() string {
return ns.file.Name()
}
Expand All @@ -173,7 +224,7 @@ func (ns *netNS) Do(toRun func(NetNS) error) error {
}

containedCall := func(hostNS NetNS) error {
threadNS, err := GetCurrentNS()
threadNS, err := getCurrentNSNoLock()
if err != nil {
return fmt.Errorf("failed to open current netns: %v", err)
}
Expand Down
244 changes: 116 additions & 128 deletions plugins/main/host-device/host-device.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,117 +230,93 @@ func cmdDel(args *skel.CmdArgs) error {
return nil
}

// setTempName sets a temporary name for netdevice, returns updated Link object or error
// if occurred.
func setTempName(dev netlink.Link) (netlink.Link, error) {
tempName := fmt.Sprintf("%s%d", "temp_", dev.Attrs().Index)

// rename to tempName
if err := netlink.LinkSetName(dev, tempName); err != nil {
return nil, fmt.Errorf("failed to rename device %q to %q: %v", dev.Attrs().Name, tempName, err)
}

// Get updated Link obj
tempDev, err := netlink.LinkByName(tempName)
if err != nil {
return nil, fmt.Errorf("failed to find %q after rename to %q: %v", dev.Attrs().Name, tempName, err)
}

return tempDev, nil
}

func moveLinkIn(hostDev netlink.Link, containerNs ns.NetNS, ifName string) (netlink.Link, error) {
origLinkFlags := hostDev.Attrs().Flags
hostDevName := hostDev.Attrs().Name
defaultNs, err := ns.GetCurrentNS()

tempNS, err := ns.TempNetNS()
if err != nil {
return nil, fmt.Errorf("failed to get host namespace: %v", err)
return nil, fmt.Errorf("failed to create temp NetNS: %v", err)
}
defer tempNS.Close()

// Devices can be renamed only when down
if err = netlink.LinkSetDown(hostDev); err != nil {
return nil, fmt.Errorf("failed to set %q down: %v", hostDev.Attrs().Name, err)
if err = netlink.LinkSetNsFd(hostDev, int(tempNS.Fd())); err != nil {
return nil, fmt.Errorf("failed to move %q to temp NS: %v", hostDev.Attrs().Name, err)
}
// detroying tempNS will move the interface back to the host NS

// restore original link state in case of error
defer func() {
if err = tempNS.Do(func(_ ns.NetNS) error {
tempNsDev, err := netlink.LinkByName(hostDevName)
if err != nil {
if origLinkFlags&net.FlagUp == net.FlagUp && hostDev != nil {
_ = netlink.LinkSetUp(hostDev)
}
}
}()

hostDev, err = setTempName(hostDev)
if err != nil {
return nil, fmt.Errorf("failed to rename device %q to temporary name: %v", hostDevName, err)
}

// restore original netdev name in case of error
defer func() {
if err != nil && hostDev != nil {
_ = netlink.LinkSetName(hostDev, hostDevName)
return fmt.Errorf("failed to find %q in temp NS: %v", hostDevName, err)
}
}()

if err = netlink.LinkSetNsFd(hostDev, int(containerNs.Fd())); err != nil {
return nil, fmt.Errorf("failed to move %q to container ns: %v", hostDev.Attrs().Name, err)
}

var contDev netlink.Link
tempDevName := hostDev.Attrs().Name
if err = containerNs.Do(func(_ ns.NetNS) error {
var err error
contDev, err = netlink.LinkByName(tempDevName)
if err != nil {
return fmt.Errorf("failed to find %q: %v", tempDevName, err)
// Live device rename is only supported since kernel 6.2
// https://github.com/torvalds/linux/commit/bd039b5ea2a91ea707ee8539df26456bd5be80af
if err = netlink.LinkSetDown(tempNsDev); err != nil {
return fmt.Errorf("failed to set %q down: %v", tempNsDev.Attrs().Name, err)
}

// move netdev back to host namespace in case of error
// restore original link state in case of error
defer func() {
if err != nil {
_ = netlink.LinkSetNsFd(contDev, int(defaultNs.Fd()))
// we need to get updated link object as link was moved back to host namepsace
_ = defaultNs.Do(func(_ ns.NetNS) error {
hostDev, _ = netlink.LinkByName(tempDevName)
return nil
})
origLinkFlags := hostDev.Attrs().Flags
if origLinkFlags&net.FlagUp == net.FlagUp && tempNsDev != nil {
_ = netlink.LinkSetUp(tempNsDev)
}
}
}()

// Save host device name into the container device's alias property
if err = netlink.LinkSetAlias(contDev, hostDevName); err != nil {
return fmt.Errorf("failed to set alias to %q: %v", tempDevName, err)
}
// Rename container device to respect args.IfName
if err = netlink.LinkSetName(contDev, ifName); err != nil {
return fmt.Errorf("failed to rename device %q to %q: %v", tempDevName, ifName, err)
if err = netlink.LinkSetName(tempNsDev, ifName); err != nil {
return fmt.Errorf("failed to rename device %q to %q: %v", hostDevName, ifName, err)
}

// restore tempDevName in case of error
// restore hostDevName in case of error
defer func() {
if err != nil {
_ = netlink.LinkSetName(contDev, tempDevName)
if err != nil && tempNsDev != nil {
_ = netlink.LinkSetName(tempNsDev, hostDevName)
}
}()

// Bring container device up
if err = netlink.LinkSetUp(contDev); err != nil {
return fmt.Errorf("failed to set %q up: %v", ifName, err)
// Retrieve link again to get up-to-date name and attributes
tempNsDev, err = netlink.LinkByName(ifName)
if err != nil {
return fmt.Errorf("failed to find %q in temp NS: %v", ifName, err)
}

// Save host device name into the container device's alias property
if err = netlink.LinkSetAlias(tempNsDev, hostDevName); err != nil {
return fmt.Errorf("failed to set alias to %q: %v", hostDevName, err)
}

// bring device down in case of error
defer func() {
if err != nil {
_ = netlink.LinkSetDown(contDev)
_ = netlink.LinkSetAlias(tempNsDev, "")
}
}()

// Retrieve link again to get up-to-date name and attributes
if err = netlink.LinkSetNsFd(tempNsDev, int(containerNs.Fd())); err != nil {
return fmt.Errorf("failed to move %q (host: %q) to container NS: %v", ifName, hostDevName, err)
}

return nil
}); err != nil {
return nil, err
}

var contDev netlink.Link
if err = containerNs.Do(func(_ ns.NetNS) error {
var err error
contDev, err = netlink.LinkByName(ifName)
if err != nil {
return fmt.Errorf("failed to find %q: %v", ifName, err)
return fmt.Errorf("failed to find %q in container NS: %v", ifName, err)
}

// Bring container device up
if err = netlink.LinkSetUp(contDev); err != nil {
return fmt.Errorf("failed to set %q up: %v", ifName, err)
}

return nil
}); err != nil {
return nil, err
Expand All @@ -350,79 +326,91 @@ func moveLinkIn(hostDev netlink.Link, containerNs ns.NetNS, ifName string) (netl
}

func moveLinkOut(containerNs ns.NetNS, ifName string) error {
tempNS, err := ns.TempNetNS()
if err != nil {
return fmt.Errorf("failed to create temp NetNS: %v", err)
}
defer tempNS.Close()

var contDev netlink.Link
err = containerNs.Do(func(_ ns.NetNS) error {
var err error
contDev, err = netlink.LinkByName(ifName)
if err != nil {
return fmt.Errorf("failed to find %q in container NS: %v", ifName, err)
}
return nil
})
if err != nil {
return err
}

hostDevName := contDev.Attrs().Alias
if hostDevName == "" {
return fmt.Errorf("failed to find original ifname for %q", ifName)
}
_, err = netlink.LinkByName(hostDevName)
if err == nil {
return fmt.Errorf("%q already exist host", hostDevName)
}
if _, ok := err.(netlink.LinkNotFoundError); !ok {
return fmt.Errorf("failed to check the absence of %q on the host: %v", hostDevName, err)
}

err = containerNs.Do(func(_ ns.NetNS) error {
if err = netlink.LinkSetNsFd(contDev, int(tempNS.Fd())); err != nil {
return fmt.Errorf("failed to move %q to temp NS: %v", ifName, err)
}
return nil
})
if err != nil {
return err
}

defaultNs, err := ns.GetCurrentNS()
if err != nil {
return err
}
defer defaultNs.Close()

var tempName string
var origDev netlink.Link
err = containerNs.Do(func(_ ns.NetNS) error {
dev, err := netlink.LinkByName(ifName)
err = tempNS.Do(func(_ ns.NetNS) error {
tempNsDev, err := netlink.LinkByName(ifName)
if err != nil {
return fmt.Errorf("failed to find %q: %v", ifName, err)
return fmt.Errorf("failed to find %q in container NS: %v", ifName, err)
}
origDev = dev

// Devices can be renamed only when down
if err = netlink.LinkSetDown(dev); err != nil {
return fmt.Errorf("failed to set %q down: %v", ifName, err)
// Live device rename is only supported since kernel 6.2
// https://github.com/torvalds/linux/commit/bd039b5ea2a91ea707ee8539df26456bd5be80af
if err = netlink.LinkSetDown(tempNsDev); err != nil {
return fmt.Errorf("failed to set %q down: %v", tempNsDev.Attrs().Name, err)
}

defer func() {
// If moving the device to the host namespace fails, set its name back to ifName so that this
// function can be retried. Also bring the device back up, unless it was already down before.
if err != nil {
_ = netlink.LinkSetName(dev, ifName)
if dev.Attrs().Flags&net.FlagUp == net.FlagUp {
_ = netlink.LinkSetUp(dev)
}
}
}()
// Rename container device to hostDevName
if err = netlink.LinkSetName(tempNsDev, hostDevName); err != nil {
return fmt.Errorf("failed to rename device %q to %q: %v", ifName, hostDevName, err)
}

newLink, err := setTempName(dev)
// Retrieve link again to get up-to-date name and attributes
tempNsDev, err = netlink.LinkByName(hostDevName)
if err != nil {
return fmt.Errorf("failed to rename device %q to temporary name: %v", ifName, err)
return fmt.Errorf("failed to find %q in temp NS: %v", hostDevName, err)
}
dev = newLink
tempName = dev.Attrs().Name

if err = netlink.LinkSetNsFd(dev, int(defaultNs.Fd())); err != nil {
return fmt.Errorf("failed to move %q to host netns: %v", tempName, err)
// Unset device's alias property
if err = netlink.LinkSetAlias(tempNsDev, ""); err != nil {
return fmt.Errorf("failed to unset alias to %q: %v", hostDevName, err)
}

if err = netlink.LinkSetNsFd(tempNsDev, int(defaultNs.Fd())); err != nil {
return fmt.Errorf("failed to move %q to host NS: %v", hostDevName, err)
}

return nil
})

if err != nil {
return err
}

// Rename the device to its original name from the host namespace
tempDev, err := netlink.LinkByName(tempName)
if err != nil {
return fmt.Errorf("failed to find %q in host namespace: %v", tempName, err)
}

if err = netlink.LinkSetName(tempDev, tempDev.Attrs().Alias); err != nil {
// move device back to container ns so it may be retired
defer func() {
_ = netlink.LinkSetNsFd(tempDev, int(containerNs.Fd()))
_ = containerNs.Do(func(_ ns.NetNS) error {
lnk, err := netlink.LinkByName(tempName)
if err != nil {
return err
}
_ = netlink.LinkSetName(lnk, ifName)
if origDev.Attrs().Flags&net.FlagUp == net.FlagUp {
_ = netlink.LinkSetUp(lnk)
}
return nil
})
}()
return fmt.Errorf("failed to restore %q to original name %q: %v", tempName, tempDev.Attrs().Alias, err)
}

return nil
}

Expand Down

0 comments on commit 49e807e

Please sign in to comment.