From 669ed188a88f28e5d503af737b41157bf41d56e5 Mon Sep 17 00:00:00 2001 From: Etienne Champetier Date: Tue, 13 Aug 2024 17:32:21 -0400 Subject: [PATCH] host-device: use temp network namespace for rename Using a temporary name / doing a fast rename causes some race conditions with udev and NetworkManager: https://gitlab.freedesktop.org/NetworkManager/NetworkManager/-/issues/1599 Signed-off-by: Etienne Champetier --- pkg/ns/ns_linux.go | 53 ++++- plugins/main/host-device/host-device.go | 244 +++++++++++------------- 2 files changed, 168 insertions(+), 129 deletions(-) diff --git a/pkg/ns/ns_linux.go b/pkg/ns/ns_linux.go index f260f2813..df2086ccb 100644 --- a/pkg/ns/ns_linux.go +++ b/pkg/ns/ns_linux.go @@ -31,6 +31,10 @@ func GetCurrentNS() (NetNS, error) { // return an unexpected network namespace. runtime.LockOSThread() defer runtime.UnlockOSThread() + return getCurrentNSNoLock() +} + +func getCurrentNSNoLock() (NetNS, error) { return GetNS(getCurrentThreadNetNSPath()) } @@ -152,6 +156,53 @@ func GetNS(nspath string) (NetNS, error) { return &netNS{file: fd}, nil } +// Returns a new empty NetNS. +func TempNetNS() (NetNS, error) { + var tempNS NetNS + var err error + var wg sync.WaitGroup + wg.Add(1) + + // Create the new namespace in a new goroutine so that if we later fail + // to switch the namespace back to the original one, we can safely + // leave the thread locked to die without a risk of the current thread + // left lingering with incorrect namespace. + go func() { + defer wg.Done() + runtime.LockOSThread() + + var threadNS NetNS + // save a handle to current network namespace + threadNS, err = getCurrentNSNoLock() + if err != nil { + err = fmt.Errorf("failed to open current namespace: %v", err) + return + } + defer threadNS.Close() + + // create the temporary network namespace + err = unix.Unshare(unix.CLONE_NEWNET) + if err != nil { + return + } + + // get a handle to the temporary network namespace + tempNS, err = getCurrentNSNoLock() + + err2 := threadNS.Set() + if err2 == nil { + // Unlock the current thread only when we successfully switched back + // to the original namespace; otherwise leave the thread locked which + // will force the runtime to scrap the current thread, that is maybe + // not as optimal but at least always safe to do. + runtime.UnlockOSThread() + } + }() + + wg.Wait() + return tempNS, err +} + func (ns *netNS) Path() string { return ns.file.Name() } @@ -173,7 +224,7 @@ func (ns *netNS) Do(toRun func(NetNS) error) error { } containedCall := func(hostNS NetNS) error { - threadNS, err := GetCurrentNS() + threadNS, err := getCurrentNSNoLock() if err != nil { return fmt.Errorf("failed to open current netns: %v", err) } diff --git a/plugins/main/host-device/host-device.go b/plugins/main/host-device/host-device.go index 33f91414e..b282dfbc4 100644 --- a/plugins/main/host-device/host-device.go +++ b/plugins/main/host-device/host-device.go @@ -230,117 +230,93 @@ func cmdDel(args *skel.CmdArgs) error { return nil } -// setTempName sets a temporary name for netdevice, returns updated Link object or error -// if occurred. -func setTempName(dev netlink.Link) (netlink.Link, error) { - tempName := fmt.Sprintf("%s%d", "temp_", dev.Attrs().Index) - - // rename to tempName - if err := netlink.LinkSetName(dev, tempName); err != nil { - return nil, fmt.Errorf("failed to rename device %q to %q: %v", dev.Attrs().Name, tempName, err) - } - - // Get updated Link obj - tempDev, err := netlink.LinkByName(tempName) - if err != nil { - return nil, fmt.Errorf("failed to find %q after rename to %q: %v", dev.Attrs().Name, tempName, err) - } - - return tempDev, nil -} - func moveLinkIn(hostDev netlink.Link, containerNs ns.NetNS, ifName string) (netlink.Link, error) { - origLinkFlags := hostDev.Attrs().Flags hostDevName := hostDev.Attrs().Name - defaultNs, err := ns.GetCurrentNS() + + tempNS, err := ns.TempNetNS() if err != nil { - return nil, fmt.Errorf("failed to get host namespace: %v", err) + return nil, fmt.Errorf("failed to create temp NetNS: %v", err) } + defer tempNS.Close() - // Devices can be renamed only when down - if err = netlink.LinkSetDown(hostDev); err != nil { - return nil, fmt.Errorf("failed to set %q down: %v", hostDev.Attrs().Name, err) + if err = netlink.LinkSetNsFd(hostDev, int(tempNS.Fd())); err != nil { + return nil, fmt.Errorf("failed to move %q to temp NS: %v", hostDev.Attrs().Name, err) } + // detroying tempNS will move the interface back to the host NS - // restore original link state in case of error - defer func() { + if err = tempNS.Do(func(_ ns.NetNS) error { + tempNsDev, err := netlink.LinkByName(hostDevName) if err != nil { - if origLinkFlags&net.FlagUp == net.FlagUp && hostDev != nil { - _ = netlink.LinkSetUp(hostDev) - } - } - }() - - hostDev, err = setTempName(hostDev) - if err != nil { - return nil, fmt.Errorf("failed to rename device %q to temporary name: %v", hostDevName, err) - } - - // restore original netdev name in case of error - defer func() { - if err != nil && hostDev != nil { - _ = netlink.LinkSetName(hostDev, hostDevName) + return fmt.Errorf("failed to find %q in temp NS: %v", hostDevName, err) } - }() - if err = netlink.LinkSetNsFd(hostDev, int(containerNs.Fd())); err != nil { - return nil, fmt.Errorf("failed to move %q to container ns: %v", hostDev.Attrs().Name, err) - } - - var contDev netlink.Link - tempDevName := hostDev.Attrs().Name - if err = containerNs.Do(func(_ ns.NetNS) error { - var err error - contDev, err = netlink.LinkByName(tempDevName) - if err != nil { - return fmt.Errorf("failed to find %q: %v", tempDevName, err) + // Live device rename is only supported since kernel 6.2 + // https://github.com/torvalds/linux/commit/bd039b5ea2a91ea707ee8539df26456bd5be80af + if err = netlink.LinkSetDown(tempNsDev); err != nil { + return fmt.Errorf("failed to set %q down: %v", tempNsDev.Attrs().Name, err) } - // move netdev back to host namespace in case of error + // restore original link state in case of error defer func() { if err != nil { - _ = netlink.LinkSetNsFd(contDev, int(defaultNs.Fd())) - // we need to get updated link object as link was moved back to host namepsace - _ = defaultNs.Do(func(_ ns.NetNS) error { - hostDev, _ = netlink.LinkByName(tempDevName) - return nil - }) + origLinkFlags := hostDev.Attrs().Flags + if origLinkFlags&net.FlagUp == net.FlagUp && tempNsDev != nil { + _ = netlink.LinkSetUp(tempNsDev) + } } }() - // Save host device name into the container device's alias property - if err = netlink.LinkSetAlias(contDev, hostDevName); err != nil { - return fmt.Errorf("failed to set alias to %q: %v", tempDevName, err) - } // Rename container device to respect args.IfName - if err = netlink.LinkSetName(contDev, ifName); err != nil { - return fmt.Errorf("failed to rename device %q to %q: %v", tempDevName, ifName, err) + if err = netlink.LinkSetName(tempNsDev, ifName); err != nil { + return fmt.Errorf("failed to rename device %q to %q: %v", hostDevName, ifName, err) } - // restore tempDevName in case of error + // restore hostDevName in case of error defer func() { - if err != nil { - _ = netlink.LinkSetName(contDev, tempDevName) + if err != nil && tempNsDev != nil { + _ = netlink.LinkSetName(tempNsDev, hostDevName) } }() - // Bring container device up - if err = netlink.LinkSetUp(contDev); err != nil { - return fmt.Errorf("failed to set %q up: %v", ifName, err) + // Retrieve link again to get up-to-date name and attributes + tempNsDev, err = netlink.LinkByName(ifName) + if err != nil { + return fmt.Errorf("failed to find %q in temp NS: %v", ifName, err) + } + + // Save host device name into the container device's alias property + if err = netlink.LinkSetAlias(tempNsDev, hostDevName); err != nil { + return fmt.Errorf("failed to set alias to %q: %v", hostDevName, err) } - // bring device down in case of error defer func() { if err != nil { - _ = netlink.LinkSetDown(contDev) + _ = netlink.LinkSetAlias(tempNsDev, "") } }() - // Retrieve link again to get up-to-date name and attributes + if err = netlink.LinkSetNsFd(tempNsDev, int(containerNs.Fd())); err != nil { + return fmt.Errorf("failed to move %q (host: %q) to container NS: %v", ifName, hostDevName, err) + } + + return nil + }); err != nil { + return nil, err + } + + var contDev netlink.Link + if err = containerNs.Do(func(_ ns.NetNS) error { + var err error contDev, err = netlink.LinkByName(ifName) if err != nil { - return fmt.Errorf("failed to find %q: %v", ifName, err) + return fmt.Errorf("failed to find %q in container NS: %v", ifName, err) } + + // Bring container device up + if err = netlink.LinkSetUp(contDev); err != nil { + return fmt.Errorf("failed to set %q up: %v", ifName, err) + } + return nil }); err != nil { return nil, err @@ -350,79 +326,91 @@ func moveLinkIn(hostDev netlink.Link, containerNs ns.NetNS, ifName string) (netl } func moveLinkOut(containerNs ns.NetNS, ifName string) error { + tempNS, err := ns.TempNetNS() + if err != nil { + return fmt.Errorf("failed to create temp NetNS: %v", err) + } + defer tempNS.Close() + + var contDev netlink.Link + err = containerNs.Do(func(_ ns.NetNS) error { + var err error + contDev, err = netlink.LinkByName(ifName) + if err != nil { + return fmt.Errorf("failed to find %q in container NS: %v", ifName, err) + } + return nil + }) + if err != nil { + return err + } + + hostDevName := contDev.Attrs().Alias + if hostDevName == "" { + return fmt.Errorf("failed to find original ifname for %q", ifName) + } + _, err = netlink.LinkByName(hostDevName) + if err == nil { + return fmt.Errorf("%q already exist host", hostDevName) + } + if _, ok := err.(netlink.LinkNotFoundError); !ok { + return fmt.Errorf("failed to check the absence of %q on the host: %v", hostDevName, err) + } + + err = containerNs.Do(func(_ ns.NetNS) error { + if err = netlink.LinkSetNsFd(contDev, int(tempNS.Fd())); err != nil { + return fmt.Errorf("failed to move %q to temp NS: %v", ifName, err) + } + return nil + }) + if err != nil { + return err + } + defaultNs, err := ns.GetCurrentNS() if err != nil { return err } defer defaultNs.Close() - var tempName string - var origDev netlink.Link - err = containerNs.Do(func(_ ns.NetNS) error { - dev, err := netlink.LinkByName(ifName) + err = tempNS.Do(func(_ ns.NetNS) error { + tempNsDev, err := netlink.LinkByName(ifName) if err != nil { - return fmt.Errorf("failed to find %q: %v", ifName, err) + return fmt.Errorf("failed to find %q in container NS: %v", ifName, err) } - origDev = dev - // Devices can be renamed only when down - if err = netlink.LinkSetDown(dev); err != nil { - return fmt.Errorf("failed to set %q down: %v", ifName, err) + // Live device rename is only supported since kernel 6.2 + // https://github.com/torvalds/linux/commit/bd039b5ea2a91ea707ee8539df26456bd5be80af + if err = netlink.LinkSetDown(tempNsDev); err != nil { + return fmt.Errorf("failed to set %q down: %v", tempNsDev.Attrs().Name, err) } - defer func() { - // If moving the device to the host namespace fails, set its name back to ifName so that this - // function can be retried. Also bring the device back up, unless it was already down before. - if err != nil { - _ = netlink.LinkSetName(dev, ifName) - if dev.Attrs().Flags&net.FlagUp == net.FlagUp { - _ = netlink.LinkSetUp(dev) - } - } - }() + // Rename container device to hostDevName + if err = netlink.LinkSetName(tempNsDev, hostDevName); err != nil { + return fmt.Errorf("failed to rename device %q to %q: %v", ifName, hostDevName, err) + } - newLink, err := setTempName(dev) + // Retrieve link again to get up-to-date name and attributes + tempNsDev, err = netlink.LinkByName(hostDevName) if err != nil { - return fmt.Errorf("failed to rename device %q to temporary name: %v", ifName, err) + return fmt.Errorf("failed to find %q in temp NS: %v", hostDevName, err) } - dev = newLink - tempName = dev.Attrs().Name - if err = netlink.LinkSetNsFd(dev, int(defaultNs.Fd())); err != nil { - return fmt.Errorf("failed to move %q to host netns: %v", tempName, err) + // Unset device's alias property + if err = netlink.LinkSetAlias(tempNsDev, ""); err != nil { + return fmt.Errorf("failed to unset alias to %q: %v", hostDevName, err) } + + if err = netlink.LinkSetNsFd(tempNsDev, int(defaultNs.Fd())); err != nil { + return fmt.Errorf("failed to move %q to host NS: %v", hostDevName, err) + } + return nil }) - if err != nil { return err } - // Rename the device to its original name from the host namespace - tempDev, err := netlink.LinkByName(tempName) - if err != nil { - return fmt.Errorf("failed to find %q in host namespace: %v", tempName, err) - } - - if err = netlink.LinkSetName(tempDev, tempDev.Attrs().Alias); err != nil { - // move device back to container ns so it may be retired - defer func() { - _ = netlink.LinkSetNsFd(tempDev, int(containerNs.Fd())) - _ = containerNs.Do(func(_ ns.NetNS) error { - lnk, err := netlink.LinkByName(tempName) - if err != nil { - return err - } - _ = netlink.LinkSetName(lnk, ifName) - if origDev.Attrs().Flags&net.FlagUp == net.FlagUp { - _ = netlink.LinkSetUp(lnk) - } - return nil - }) - }() - return fmt.Errorf("failed to restore %q to original name %q: %v", tempName, tempDev.Attrs().Alias, err) - } - return nil }