From f28be507f6ed2b43169a9b1deaf5630a228e1a80 Mon Sep 17 00:00:00 2001 From: Nate Brown Date: Thu, 8 Feb 2024 14:30:15 -0600 Subject: [PATCH 1/4] linux --- overlay/route.go | 17 ++++ overlay/tun.go | 59 +++++-------- overlay/tun_linux.go | 200 +++++++++++++++++++++++++++++++------------ 3 files changed, 181 insertions(+), 95 deletions(-) diff --git a/overlay/route.go b/overlay/route.go index 793c8fd..9633d32 100644 --- a/overlay/route.go +++ b/overlay/route.go @@ -1,6 +1,7 @@ package overlay import ( + "bytes" "fmt" "math" "net" @@ -21,6 +22,22 @@ type Route struct { Install bool } +func (r Route) Equal(t Route) bool { + if !r.Cidr.IP.Equal(t.Cidr.IP) { + return false + } + if !bytes.Equal(r.Cidr.Mask, t.Cidr.Mask) { + return false + } + if r.Metric != t.Metric { + return false + } + if r.MTU != t.MTU { + return false + } + return true +} + func makeRouteTree(l *logrus.Logger, routes []Route, allowMTU bool) (*cidr.Tree4[iputil.VpnIp], error) { routeTree := cidr.NewTree4[iputil.VpnIp]() for _, r := range routes { diff --git a/overlay/tun.go b/overlay/tun.go index ca1a64a..27f43e1 100644 --- a/overlay/tun.go +++ b/overlay/tun.go @@ -5,65 +5,46 @@ import ( "github.com/sirupsen/logrus" "github.com/slackhq/nebula/config" - "github.com/slackhq/nebula/util" ) const DefaultMTU = 1300 +// TODO: We may be able to remove routines type DeviceFactory func(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) func NewDeviceFromConfig(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) { - routes, err := parseRoutes(c, tunCidr) - if err != nil { - return nil, util.NewContextualError("Could not parse tun.routes", nil, err) - } - - unsafeRoutes, err := parseUnsafeRoutes(c, tunCidr) - if err != nil { - return nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err) - } - routes = append(routes, unsafeRoutes...) - switch { case c.GetBool("tun.disabled", false): tun := newDisabledTun(tunCidr, c.GetInt("tun.tx_queue", 500), c.GetBool("stats.message_metrics", false), l) return tun, nil default: - return newTun( - l, - c.GetString("tun.dev", ""), - tunCidr, - c.GetInt("tun.mtu", DefaultMTU), - routes, - c.GetInt("tun.tx_queue", 500), - routines > 1, - c.GetBool("tun.use_system_route_table", false), - ) + return newTun(c, l, tunCidr, routines > 1) } } func NewFdDeviceFromConfig(fd *int) DeviceFactory { return func(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) { - routes, err := parseRoutes(c, tunCidr) - if err != nil { - return nil, util.NewContextualError("Could not parse tun.routes", nil, err) - } + return newTunFromFd(c, l, *fd, tunCidr) + } +} - unsafeRoutes, err := parseUnsafeRoutes(c, tunCidr) - if err != nil { - return nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err) +func findRemovedRoutes(newRoutes, oldRoutes []Route) []Route { + var removed []Route + has := func(entry Route) bool { + for _, check := range newRoutes { + if check.Equal(entry) { + return true + } } - routes = append(routes, unsafeRoutes...) - return newTunFromFd( - l, - *fd, - tunCidr, - c.GetInt("tun.mtu", DefaultMTU), - routes, - c.GetInt("tun.tx_queue", 500), - c.GetBool("tun.use_system_route_table", false), - ) + return false + } + for _, oldEntry := range oldRoutes { + if !has(oldEntry) { + removed = append(removed, oldEntry) + } } + + return removed } diff --git a/overlay/tun_linux.go b/overlay/tun_linux.go index a576bf3..fcd4aab 100644 --- a/overlay/tun_linux.go +++ b/overlay/tun_linux.go @@ -15,21 +15,24 @@ import ( "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" ) type tun struct { io.ReadWriteCloser - fd int - Device string - cidr *net.IPNet - MaxMTU int - DefaultMTU int - TXQueueLen int - - Routes []Route + fd int + Device string + cidr *net.IPNet + MaxMTU int + DefaultMTU int + TXQueueLen int + deviceIndex int + + Routes atomic.Pointer[[]Route] routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] routeChan chan struct{} useSystemRoutes bool @@ -61,30 +64,20 @@ type ifreqQLEN struct { pad [8]byte } -func newTunFromFd(l *logrus.Logger, deviceFd int, cidr *net.IPNet, defaultMTU int, routes []Route, txQueueLen int, useSystemRoutes bool) (*tun, error) { - routeTree, err := makeRouteTree(l, routes, true) +func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) { + file := os.NewFile(uintptr(deviceFd), "/dev/net/tun") + + t, err := newTunGeneric(c, l, file, cidr) if err != nil { return nil, err } - file := os.NewFile(uintptr(deviceFd), "/dev/net/tun") + t.Device = "tun0" - t := &tun{ - ReadWriteCloser: file, - fd: int(file.Fd()), - Device: "tun0", - cidr: cidr, - DefaultMTU: defaultMTU, - TXQueueLen: txQueueLen, - Routes: routes, - useSystemRoutes: useSystemRoutes, - l: l, - } - t.routeTree.Store(routeTree) return t, nil } -func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, txQueueLen int, multiqueue bool, useSystemRoutes bool) (*tun, error) { +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, multiqueue bool) (*tun, error) { fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0) if err != nil { return nil, err @@ -95,46 +88,99 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int if multiqueue { req.Flags |= unix.IFF_MULTI_QUEUE } - copy(req.Name[:], deviceName) + copy(req.Name[:], c.GetString("tun.dev", "")) if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil { return nil, err } name := strings.Trim(string(req.Name[:]), "\x00") file := os.NewFile(uintptr(fd), "/dev/net/tun") - - maxMTU := defaultMTU - for _, r := range routes { - if r.MTU == 0 { - r.MTU = defaultMTU - } - - if r.MTU > maxMTU { - maxMTU = r.MTU - } - } - - routeTree, err := makeRouteTree(l, routes, true) + t, err := newTunGeneric(c, l, file, cidr) if err != nil { return nil, err } + t.Device = name + + return t, nil +} + +func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr *net.IPNet) (*tun, error) { t := &tun{ ReadWriteCloser: file, fd: int(file.Fd()), - Device: name, cidr: cidr, - MaxMTU: maxMTU, - DefaultMTU: defaultMTU, - TXQueueLen: txQueueLen, - Routes: routes, - useSystemRoutes: useSystemRoutes, + DefaultMTU: c.GetInt("tun.mtu", DefaultMTU), + TXQueueLen: c.GetInt("tun.tx_queue", 500), + useSystemRoutes: c.GetBool("tun.use_system_route_table", false), l: l, } - t.routeTree.Store(routeTree) + + err := t.reload(c, true) + if err != nil { + return nil, err + } + + c.RegisterReloadCallback(func(c *config.C) { + //TODO: do we want to log the addition/removal of routes on reload? + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + return t, nil } +func (t *tun) reload(c *config.C, initial bool) error { + routes, err := parseRoutes(c, t.cidr) + if err != nil { + return util.NewContextualError("Could not parse tun.routes", nil, err) + } + + unsafeRoutes, err := parseUnsafeRoutes(c, t.cidr) + if err != nil { + return util.NewContextualError("Could not parse tun.unsafe_routes", nil, err) + } + + routes = append(routes, unsafeRoutes...) + routeTree, err := makeRouteTree(t.l, routes, true) + if err != nil { + return err + } + + for i, r := range routes { + if r.MTU == 0 { + //TODO: This was horribly broken before, I have doubts anyone is using it + routes[i].MTU = t.DefaultMTU + } + + if r.MTU > t.MaxMTU { + //TODO: This needs to be atomic but it is not used so maybe its fine? + //TODO: this is also not handled since it would adjust the main route and device mtu + t.MaxMTU = r.MTU + } + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // This should never be called since addRoutes should log its own errors in a reload condition + util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l) + } + } + + return nil +} + func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) { fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0) if err != nil { @@ -261,10 +307,12 @@ func (t *tun) Activate() error { return fmt.Errorf("failed to get tun device link: %s", err) } + t.deviceIndex = link.Attrs().Index + // Default route dr := &net.IPNet{IP: t.cidr.IP.Mask(t.cidr.Mask), Mask: t.cidr.Mask} nr := netlink.Route{ - LinkIndex: link.Attrs().Index, + LinkIndex: t.deviceIndex, Dst: dr, MTU: t.DefaultMTU, AdvMSS: t.advMSS(Route{}), @@ -279,14 +327,30 @@ func (t *tun) Activate() error { return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err) } + err = t.addRoutes(false) + if err != nil { + return err + } + + // Run the interface + ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING + if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil { + return fmt.Errorf("failed to run tun device: %s", err) + } + + return nil +} + +func (t *tun) addRoutes(logErrors bool) error { // Path routes - for _, r := range t.Routes { + routes := *t.Routes.Load() + for _, r := range routes { if !r.Install { continue } nr := netlink.Route{ - LinkIndex: link.Attrs().Index, + LinkIndex: t.deviceIndex, Dst: r.Cidr, MTU: r.MTU, AdvMSS: t.advMSS(r), @@ -297,21 +361,45 @@ func (t *tun) Activate() error { nr.Priority = r.Metric } - err = netlink.RouteAdd(&nr) + err := netlink.RouteReplace(&nr) if err != nil { - return fmt.Errorf("failed to set mtu %v on route %v; %v", r.MTU, r.Cidr, err) + retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } } } - // Run the interface - ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING - if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil { - return fmt.Errorf("failed to run tun device: %s", err) - } - return nil } +func (t *tun) removeRoutes(routes []Route) { + for _, r := range routes { + if !r.Install { + continue + } + + nr := netlink.Route{ + LinkIndex: t.deviceIndex, + Dst: r.Cidr, + MTU: r.MTU, + AdvMSS: t.advMSS(r), + Scope: unix.RT_SCOPE_LINK, + } + + if r.Metric > 0 { + nr.Priority = r.Metric + } + + err := netlink.RouteDel(&nr) + if err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } + } +} + func (t *tun) Cidr() *net.IPNet { return t.cidr } From f5fee9819f93c7bc7e695646d34083767bffe522 Mon Sep 17 00:00:00 2001 From: Nate Brown Date: Thu, 8 Feb 2024 15:58:08 -0600 Subject: [PATCH 2/4] darwin --- overlay/route.go | 8 ++ overlay/tun.go | 20 +++++ overlay/tun_darwin.go | 192 +++++++++++++++++++++++++++++++++++------- overlay/tun_linux.go | 15 ++-- 4 files changed, 198 insertions(+), 37 deletions(-) diff --git a/overlay/route.go b/overlay/route.go index 9633d32..467e362 100644 --- a/overlay/route.go +++ b/overlay/route.go @@ -38,6 +38,14 @@ func (r Route) Equal(t Route) bool { return true } +func (r Route) String() string { + s := r.Cidr.String() + if r.Metric != 0 { + s += fmt.Sprintf(" metric: %v", r.Metric) + } + return s +} + func makeRouteTree(l *logrus.Logger, routes []Route, allowMTU bool) (*cidr.Tree4[iputil.VpnIp], error) { routeTree := cidr.NewTree4[iputil.VpnIp]() for _, r := range routes { diff --git a/overlay/tun.go b/overlay/tun.go index 27f43e1..defeb94 100644 --- a/overlay/tun.go +++ b/overlay/tun.go @@ -5,6 +5,7 @@ import ( "github.com/sirupsen/logrus" "github.com/slackhq/nebula/config" + "github.com/slackhq/nebula/util" ) const DefaultMTU = 1300 @@ -29,6 +30,25 @@ func NewFdDeviceFromConfig(fd *int) DeviceFactory { } } +func getAllRoutesFromConfig(c *config.C, cidr *net.IPNet) (bool, []Route, error) { + if !c.HasChanged("tun.routes") && !c.HasChanged("tun.unsafe_routes") { + return false, nil, nil + } + + routes, err := parseRoutes(c, cidr) + if err != nil { + return true, nil, util.NewContextualError("Could not parse tun.routes", nil, err) + } + + unsafeRoutes, err := parseUnsafeRoutes(c, cidr) + if err != nil { + return true, nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err) + } + + routes = append(routes, unsafeRoutes...) + return true, routes, nil +} + func findRemovedRoutes(newRoutes, oldRoutes []Route) []Route { var removed []Route has := func(entry Route) bool { diff --git a/overlay/tun_darwin.go b/overlay/tun_darwin.go index caec580..55b7be2 100644 --- a/overlay/tun_darwin.go +++ b/overlay/tun_darwin.go @@ -9,12 +9,15 @@ import ( "io" "net" "os" + "sync/atomic" "syscall" "unsafe" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" netroute "golang.org/x/net/route" "golang.org/x/sys/unix" ) @@ -24,8 +27,9 @@ type tun struct { Device string cidr *net.IPNet DefaultMTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] + linkAddr *netroute.LinkAddr l *logrus.Logger // cache out buffer since we need to prepend 4 bytes for tun metadata @@ -69,12 +73,8 @@ type ifreqMTU struct { pad [8]byte } -func newTun(l *logrus.Logger, name string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) { - routeTree, err := makeRouteTree(l, routes, false) - if err != nil { - return nil, err - } - +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) { + name := c.GetString("tun.dev", "") ifIndex := -1 if name != "" && name != "utun" { _, err := fmt.Sscanf(name, "utun%d", &ifIndex) @@ -142,17 +142,27 @@ func newTun(l *logrus.Logger, name string, cidr *net.IPNet, defaultMTU int, rout file := os.NewFile(uintptr(fd), "") - tun := &tun{ + t := &tun{ ReadWriteCloser: file, Device: name, cidr: cidr, - DefaultMTU: defaultMTU, - Routes: routes, - routeTree: routeTree, + DefaultMTU: c.GetInt("tun.mtu", DefaultMTU), l: l, } - return tun, nil + err = t.reload(c, true) + if err != nil { + return nil, err + } + + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil } func (t *tun) deviceBytes() (o [16]byte) { @@ -162,7 +172,7 @@ func (t *tun) deviceBytes() (o [16]byte) { return } -func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) { +func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) { return nil, fmt.Errorf("newTunFromFd not supported in Darwin") } @@ -260,6 +270,7 @@ func (t *tun) Activate() error { if linkAddr == nil { return fmt.Errorf("unable to discover link_addr for tun interface") } + t.linkAddr = linkAddr copy(routeAddr.IP[:], addr[:]) copy(maskAddr.IP[:], mask[:]) @@ -278,33 +289,48 @@ func (t *tun) Activate() error { } // Unsafe path routes - for _, r := range t.Routes { - if r.Via == nil || !r.Install { - // We don't allow route MTUs so only install routes with a via - continue - } + return t.addRoutes(false) +} - copy(routeAddr.IP[:], r.Cidr.IP.To4()) - copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4()) +func (t *tun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr) + if err != nil { + return err + } + + if !initial && !change { + return nil + } - err = addRoute(routeSock, routeAddr, maskAddr, linkAddr) + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) if err != nil { - if errors.Is(err, unix.EEXIST) { - t.l.WithField("route", r.Cidr). - Warnf("unable to add unsafe_route, identical route already exists") - } else { - return err - } + util.LogWithContextIfNeeded("Failed to remove routes", err, t.l) } - // TODO how to set metric + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to add routes", err, t.l) + } } return nil } func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - ok, r := t.routeTree.MostSpecificContains(ip) + ok, r := t.routeTree.Load().MostSpecificContains(ip) if ok { return r } @@ -340,6 +366,88 @@ func getLinkAddr(name string) (*netroute.LinkAddr, error) { return nil, nil } +func (t *tun) addRoutes(logErrors bool) error { + routeSock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC) + if err != nil { + return fmt.Errorf("unable to create AF_ROUTE socket: %v", err) + } + + defer func() { + unix.Shutdown(routeSock, unix.SHUT_RDWR) + err := unix.Close(routeSock) + if err != nil { + t.l.WithError(err).Error("failed to close AF_ROUTE socket") + } + }() + + routeAddr := &netroute.Inet4Addr{} + maskAddr := &netroute.Inet4Addr{} + routes := *t.Routes.Load() + for _, r := range routes { + if r.Via == nil || !r.Install { + // We don't allow route MTUs so only install routes with a via + continue + } + + copy(routeAddr.IP[:], r.Cidr.IP.To4()) + copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4()) + + err := addRoute(routeSock, routeAddr, maskAddr, t.linkAddr) + if err != nil { + if errors.Is(err, unix.EEXIST) { + t.l.WithField("route", r.Cidr). + Warnf("unable to add unsafe_route, identical route already exists") + } else { + retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } + } + } else { + t.l.WithField("route", r).Info("Added route") + } + } + + return nil +} + +func (t *tun) removeRoutes(routes []Route) error { + routeSock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC) + if err != nil { + return fmt.Errorf("unable to create AF_ROUTE socket: %v", err) + } + + defer func() { + unix.Shutdown(routeSock, unix.SHUT_RDWR) + err := unix.Close(routeSock) + if err != nil { + t.l.WithError(err).Error("failed to close AF_ROUTE socket") + } + }() + + routeAddr := &netroute.Inet4Addr{} + maskAddr := &netroute.Inet4Addr{} + + for _, r := range routes { + if !r.Install { + continue + } + + copy(routeAddr.IP[:], r.Cidr.IP.To4()) + copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4()) + + err := delRoute(routeSock, routeAddr, maskAddr, t.linkAddr) + if err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } + return nil +} + func addRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) error { r := netroute.RouteMessage{ Version: unix.RTM_VERSION, @@ -365,6 +473,30 @@ func addRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) return nil } +func delRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) error { + r := netroute.RouteMessage{ + Version: unix.RTM_VERSION, + Type: unix.RTM_DELETE, + Seq: 1, + Addrs: []netroute.Addr{ + unix.RTAX_DST: addr, + unix.RTAX_GATEWAY: link, + unix.RTAX_NETMASK: mask, + }, + } + + data, err := r.Marshal() + if err != nil { + return fmt.Errorf("failed to create route.RouteMessage: %w", err) + } + _, err = unix.Write(sock, data[:]) + if err != nil { + return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err) + } + + return nil +} + func (t *tun) Read(to []byte) (int, error) { buf := make([]byte, len(to)+4) diff --git a/overlay/tun_linux.go b/overlay/tun_linux.go index fcd4aab..0b2ccb5 100644 --- a/overlay/tun_linux.go +++ b/overlay/tun_linux.go @@ -122,7 +122,6 @@ func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr *net.IPNet } c.RegisterReloadCallback(func(c *config.C) { - //TODO: do we want to log the addition/removal of routes on reload? err := t.reload(c, false) if err != nil { util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) @@ -133,17 +132,15 @@ func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr *net.IPNet } func (t *tun) reload(c *config.C, initial bool) error { - routes, err := parseRoutes(c, t.cidr) + routes, err := getAllRoutesFromConfig(c, t.cidr) if err != nil { - return util.NewContextualError("Could not parse tun.routes", nil, err) + return err } - unsafeRoutes, err := parseUnsafeRoutes(c, t.cidr) - if err != nil { - return util.NewContextualError("Could not parse tun.unsafe_routes", nil, err) + if !initial && !change { + return nil } - routes = append(routes, unsafeRoutes...) routeTree, err := makeRouteTree(t.l, routes, true) if err != nil { return err @@ -369,6 +366,8 @@ func (t *tun) addRoutes(logErrors bool) error { } else { return retErr } + } else { + t.l.WithField("route", r).Info("Added route") } } @@ -396,6 +395,8 @@ func (t *tun) removeRoutes(routes []Route) { err := netlink.RouteDel(&nr) if err != nil { t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") } } } From 92d8b55650faae14fff4a58288cc09ac0317b7ad Mon Sep 17 00:00:00 2001 From: Nate Brown Date: Thu, 22 Feb 2024 11:34:34 -0600 Subject: [PATCH 3/4] Windows --- overlay/tun.go | 4 +- overlay/tun_darwin.go | 2 +- overlay/tun_linux.go | 2 +- overlay/tun_water_windows.go | 122 +++++++++++++++++++++---- overlay/tun_windows.go | 9 +- overlay/tun_wintun_windows.go | 166 ++++++++++++++++++++++++++-------- 6 files changed, 242 insertions(+), 63 deletions(-) diff --git a/overlay/tun.go b/overlay/tun.go index defeb94..dc09e33 100644 --- a/overlay/tun.go +++ b/overlay/tun.go @@ -30,8 +30,8 @@ func NewFdDeviceFromConfig(fd *int) DeviceFactory { } } -func getAllRoutesFromConfig(c *config.C, cidr *net.IPNet) (bool, []Route, error) { - if !c.HasChanged("tun.routes") && !c.HasChanged("tun.unsafe_routes") { +func getAllRoutesFromConfig(c *config.C, cidr *net.IPNet, initial bool) (bool, []Route, error) { + if !initial && !c.HasChanged("tun.routes") && !c.HasChanged("tun.unsafe_routes") { return false, nil, nil } diff --git a/overlay/tun_darwin.go b/overlay/tun_darwin.go index 55b7be2..1c63828 100644 --- a/overlay/tun_darwin.go +++ b/overlay/tun_darwin.go @@ -293,7 +293,7 @@ func (t *tun) Activate() error { } func (t *tun) reload(c *config.C, initial bool) error { - change, routes, err := getAllRoutesFromConfig(c, t.cidr) + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) if err != nil { return err } diff --git a/overlay/tun_linux.go b/overlay/tun_linux.go index 0b2ccb5..1e5f592 100644 --- a/overlay/tun_linux.go +++ b/overlay/tun_linux.go @@ -132,7 +132,7 @@ func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr *net.IPNet } func (t *tun) reload(c *config.C, initial bool) error { - routes, err := getAllRoutesFromConfig(c, t.cidr) + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) if err != nil { return err } diff --git a/overlay/tun_water_windows.go b/overlay/tun_water_windows.go index e27cff2..a1acd2b 100644 --- a/overlay/tun_water_windows.go +++ b/overlay/tun_water_windows.go @@ -6,10 +6,13 @@ import ( "net" "os/exec" "strconv" + "sync/atomic" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" "github.com/songgao/water" ) @@ -17,25 +20,34 @@ type waterTun struct { Device string cidr *net.IPNet MTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] - + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] + l *logrus.Logger + f *net.Interface *water.Interface } -func newWaterTun(l *logrus.Logger, cidr *net.IPNet, defaultMTU int, routes []Route) (*waterTun, error) { - routeTree, err := makeRouteTree(l, routes, false) +func newWaterTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*waterTun, error) { + // NOTE: You cannot set the deviceName under Windows, so you must check tun.Device after calling .Activate() + t := &waterTun{ + cidr: cidr, + MTU: c.GetInt("tun.mtu", DefaultMTU), + l: l, + } + + err := t.reload(c, true) if err != nil { return nil, err } - // NOTE: You cannot set the deviceName under Windows, so you must check tun.Device after calling .Activate() - return &waterTun{ - cidr: cidr, - MTU: defaultMTU, - Routes: routes, - routeTree: routeTree, - }, nil + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil } func (t *waterTun) Activate() error { @@ -74,30 +86,104 @@ func (t *waterTun) Activate() error { return fmt.Errorf("failed to run 'netsh' to set MTU: %s", err) } - iface, err := net.InterfaceByName(t.Device) + t.f, err = net.InterfaceByName(t.Device) if err != nil { return fmt.Errorf("failed to find interface named %s: %v", t.Device, err) } - for _, r := range t.Routes { + err = t.addRoutes(false) + if err != nil { + return err + } + + return nil +} + +func (t *waterTun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to set routes", err, t.l) + } else { + for _, r := range findRemovedRoutes(routes, *oldRoutes) { + t.l.WithField("route", r).Info("Removed route") + } + } + } + + return nil +} + +func (t *waterTun) addRoutes(logErrors bool) error { + // Path routes + routes := *t.Routes.Load() + for _, r := range routes { if r.Via == nil || !r.Install { // We don't allow route MTUs so only install routes with a via continue } - err = exec.Command( - "C:\\Windows\\System32\\route.exe", "add", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(iface.Index), "METRIC", strconv.Itoa(r.Metric), + err := exec.Command( + "C:\\Windows\\System32\\route.exe", "add", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(t.f.Index), "METRIC", strconv.Itoa(r.Metric), ).Run() + if err != nil { - return fmt.Errorf("failed to add the unsafe_route %s: %v", r.Cidr.String(), err) + retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } + } else { + t.l.WithField("route", r).Info("Added route") } } return nil } +func (t *waterTun) removeRoutes(routes []Route) { + for _, r := range routes { + if !r.Install { + continue + } + + err := exec.Command( + "C:\\Windows\\System32\\route.exe", "delete", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(t.f.Index), "METRIC", strconv.Itoa(r.Metric), + ).Run() + if err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } +} + func (t *waterTun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) + _, r := t.routeTree.Load().MostSpecificContains(ip) return r } diff --git a/overlay/tun_windows.go b/overlay/tun_windows.go index 57d90cb..f85ee9c 100644 --- a/overlay/tun_windows.go +++ b/overlay/tun_windows.go @@ -12,13 +12,14 @@ import ( "syscall" "github.com/sirupsen/logrus" + "github.com/slackhq/nebula/config" ) -func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (Device, error) { +func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (Device, error) { return nil, fmt.Errorf("newTunFromFd not supported in Windows") } -func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (Device, error) { +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, multiqueue bool) (Device, error) { useWintun := true if err := checkWinTunExists(); err != nil { l.WithError(err).Warn("Check Wintun driver failed, fallback to wintap driver") @@ -26,14 +27,14 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int } if useWintun { - device, err := newWinTun(l, deviceName, cidr, defaultMTU, routes) + device, err := newWinTun(c, l, cidr, multiqueue) if err != nil { return nil, fmt.Errorf("create Wintun interface failed, %w", err) } return device, nil } - device, err := newWaterTun(l, cidr, defaultMTU, routes) + device, err := newWaterTun(c, l, cidr, multiqueue) if err != nil { return nil, fmt.Errorf("create wintap driver failed, %w", err) } diff --git a/overlay/tun_wintun_windows.go b/overlay/tun_wintun_windows.go index 9647024..197e3a7 100644 --- a/overlay/tun_wintun_windows.go +++ b/overlay/tun_wintun_windows.go @@ -6,11 +6,14 @@ import ( "io" "net" "net/netip" + "sync/atomic" "unsafe" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" "github.com/slackhq/nebula/wintun" "golang.org/x/sys/windows" "golang.zx2c4.com/wireguard/windows/tunnel/winipcfg" @@ -23,8 +26,9 @@ type winTun struct { cidr *net.IPNet prefix netip.Prefix MTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] + l *logrus.Logger tun *wintun.NativeTun } @@ -48,83 +52,148 @@ func generateGUIDByDeviceName(name string) (*windows.GUID, error) { return (*windows.GUID)(unsafe.Pointer(&sum[0])), nil } -func newWinTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route) (*winTun, error) { +func newWinTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*winTun, error) { + deviceName := c.GetString("tun.dev", "") guid, err := generateGUIDByDeviceName(deviceName) if err != nil { return nil, fmt.Errorf("generate GUID failed: %w", err) } + prefix, err := iputil.ToNetIpPrefix(*cidr) + if err != nil { + return nil, err + } + + t := &winTun{ + Device: deviceName, + cidr: cidr, + prefix: prefix, + MTU: c.GetInt("tun.mtu", DefaultMTU), + l: l, + } + + err = t.reload(c, true) + if err != nil { + return nil, err + } + var tunDevice wintun.Device - tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, defaultMTU) + tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, t.MTU) if err != nil { // Windows 10 has an issue with unclean shutdowns not fully cleaning up the wintun device. // Trying a second time resolves the issue. l.WithError(err).Debug("Failed to create wintun device, retrying") - tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, defaultMTU) + tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, t.MTU) if err != nil { return nil, fmt.Errorf("create TUN device failed: %w", err) } } + t.tun = tunDevice.(*wintun.NativeTun) + + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil +} - routeTree, err := makeRouteTree(l, routes, false) +func (t *winTun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) if err != nil { - return nil, err + return err } - prefix, err := iputil.ToNetIpPrefix(*cidr) + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) if err != nil { - return nil, err + return err } - return &winTun{ - Device: deviceName, - cidr: cidr, - prefix: prefix, - MTU: defaultMTU, - Routes: routes, - routeTree: routeTree, + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) - tun: tunDevice.(*wintun.NativeTun), - }, nil + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + if err != nil { + util.LogWithContextIfNeeded("Failed to remove routes", err, t.l) + } + + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to add routes", err, t.l) + } + } + + return nil } func (t *winTun) Activate() error { luid := winipcfg.LUID(t.tun.LUID()) - if err := luid.SetIPAddresses([]netip.Prefix{t.prefix}); err != nil { + err := luid.SetIPAddresses([]netip.Prefix{t.prefix}) + if err != nil { return fmt.Errorf("failed to set address: %w", err) } + err = t.addRoutes(false) + if err != nil { + return err + } + + return nil +} + +func (t *winTun) addRoutes(logErrors bool) error { + luid := winipcfg.LUID(t.tun.LUID()) + routes := *t.Routes.Load() foundDefault4 := false - routes := make([]*winipcfg.RouteData, 0, len(t.Routes)+1) - for _, r := range t.Routes { + for _, r := range routes { if r.Via == nil || !r.Install { // We don't allow route MTUs so only install routes with a via continue } - if !foundDefault4 { - if ones, bits := r.Cidr.Mask.Size(); ones == 0 && bits != 0 { - foundDefault4 = true - } - } - prefix, err := iputil.ToNetIpPrefix(*r.Cidr) if err != nil { - return err + retErr := util.NewContextualError("Failed to parse cidr to netip prefix, ignoring route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + continue + } else { + return retErr + } } // Add our unsafe route - routes = append(routes, &winipcfg.RouteData{ - Destination: prefix, - NextHop: r.Via.ToNetIpAddr(), - Metric: uint32(r.Metric), - }) - } + err = luid.AddRoute(prefix, r.Via.ToNetIpAddr(), uint32(r.Metric)) + if err != nil { + retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + continue + } else { + return retErr + } + } else { + t.l.WithField("route", r).Info("Added route") + } - if err := luid.AddRoutes(routes); err != nil { - return fmt.Errorf("failed to add routes: %w", err) + if !foundDefault4 { + if ones, bits := r.Cidr.Mask.Size(); ones == 0 && bits != 0 { + foundDefault4 = true + } + } } ipif, err := luid.IPInterface(windows.AF_INET) @@ -141,12 +210,35 @@ func (t *winTun) Activate() error { if err := ipif.Set(); err != nil { return fmt.Errorf("failed to set ip interface: %w", err) } + return nil +} + +func (t *winTun) removeRoutes(routes []Route) error { + luid := winipcfg.LUID(t.tun.LUID()) + + for _, r := range routes { + if !r.Install { + continue + } + prefix, err := iputil.ToNetIpPrefix(*r.Cidr) + if err != nil { + t.l.WithError(err).WithField("route", r).Info("Failed to convert cidr to netip prefix") + continue + } + + err = luid.DeleteRoute(prefix, r.Via.ToNetIpAddr()) + if err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } return nil } func (t *winTun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) + _, r := t.routeTree.Load().MostSpecificContains(ip) return r } From 74380b4f18e8cdbf0e4e03760b815178ac6cbd01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 19:09:43 +0000 Subject: [PATCH 4/4] Bump actions/download-artifact from 3 to 4 Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b5b8ced..5c3b984 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -117,7 +117,7 @@ jobs: - uses: actions/checkout@v4 - name: Download artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: artifacts