diff --git a/cmd/docker-proxy/main_linux.go b/cmd/docker-proxy/main_linux.go index 2c1cd4e1a5..b4e61c32dc 100644 --- a/cmd/docker-proxy/main_linux.go +++ b/cmd/docker-proxy/main_linux.go @@ -11,6 +11,8 @@ import ( "github.com/docker/docker/dockerversion" "github.com/ishidawataru/sctp" + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" ) // The caller is expected to pass-in open file descriptors ... @@ -59,9 +61,9 @@ func main() { } func newProxy(config ProxyConfig) (p Proxy, err error) { - ipv := ipv4 + ipv := ip4 if config.HostIP.To4() == nil { - ipv = ipv6 + ipv = ip6 } switch config.Proto { @@ -96,6 +98,21 @@ func newProxy(config ProxyConfig) (p Proxy, err error) { if err != nil { return nil, fmt.Errorf("failed to listen on %s: %w", hostAddr, err) } + // We need to setsockopt(IP_PKTINFO) on the listener to get the destination address as an ancillary + // message. The daddr will be used as the source address when sending back replies coming from the + // container to the client. If we don't do this, the kernel will have to pick a source address for us, and + // it might not pick what the client expects. That would result in ICMP Port Unreachable. + if ipv == ip4 { + pc := ipv4.NewPacketConn(listener) + if err := pc.SetControlMessage(ipv4.FlagDst, true); err != nil { + return nil, fmt.Errorf("failed to setsockopt(IP_PKTINFO): %w", err) + } + } else { + pc := ipv6.NewPacketConn(listener) + if err := pc.SetControlMessage(ipv6.FlagDst, true); err != nil { + return nil, fmt.Errorf("failed to setsockopt(IPV6_RECVPKTINFO): %w", err) + } + } } else { l, err := net.FilePacketConn(config.ListenSock) if err != nil { @@ -108,7 +125,7 @@ func newProxy(config ProxyConfig) (p Proxy, err error) { } } container := &net.UDPAddr{IP: config.ContainerIP, Port: config.ContainerPort} - p, err = NewUDPProxy(listener, container) + p, err = NewUDPProxy(listener, container, ipv) case "sctp": var listener *sctp.SCTPListener if config.ListenSock != nil { diff --git a/cmd/docker-proxy/proxy_linux.go b/cmd/docker-proxy/proxy_linux.go index b3e8050c50..34287ebb8f 100644 --- a/cmd/docker-proxy/proxy_linux.go +++ b/cmd/docker-proxy/proxy_linux.go @@ -7,9 +7,9 @@ type ipVersion string const ( // IPv4 is version 4 - ipv4 ipVersion = "4" + ip4 ipVersion = "4" // IPv4 is version 6 - ipv6 ipVersion = "6" + ip6 ipVersion = "6" ) // Proxy defines the behavior of a proxy. It forwards traffic back and forth diff --git a/cmd/docker-proxy/udp_proxy_linux.go b/cmd/docker-proxy/udp_proxy_linux.go index 585cc3616e..3796473165 100644 --- a/cmd/docker-proxy/udp_proxy_linux.go +++ b/cmd/docker-proxy/udp_proxy_linux.go @@ -2,12 +2,15 @@ package main import ( "encoding/binary" + "errors" "log" "net" - "strings" "sync" "syscall" "time" + + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" ) const ( @@ -51,19 +54,21 @@ type UDPProxy struct { backendAddr *net.UDPAddr connTrackTable connTrackMap connTrackLock sync.Mutex + ipVer ipVersion } // NewUDPProxy creates a new UDPProxy. -func NewUDPProxy(listener *net.UDPConn, backendAddr *net.UDPAddr) (*UDPProxy, error) { +func NewUDPProxy(listener *net.UDPConn, backendAddr *net.UDPAddr, ipVer ipVersion) (*UDPProxy, error) { return &UDPProxy{ listener: listener, frontendAddr: listener.LocalAddr().(*net.UDPAddr), backendAddr: backendAddr, connTrackTable: make(connTrackMap), + ipVer: ipVer, }, nil } -func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr, clientKey *connTrackKey) { +func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, serverAddr net.IP, clientAddr *net.UDPAddr, clientKey *connTrackKey) { defer func() { proxy.connTrackLock.Lock() delete(proxy.connTrackTable, *clientKey) @@ -71,6 +76,15 @@ func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr proxyConn.Close() }() + var oob []byte + if proxy.ipVer == ip4 { + cm := &ipv4.ControlMessage{Src: serverAddr} + oob = cm.Marshal() + } else { + cm := &ipv6.ControlMessage{Src: serverAddr} + oob = cm.Marshal() + } + readBuf := make([]byte, UDPBufSize) for { proxyConn.SetReadDeadline(time.Now().Add(UDPConnTrackTimeout)) @@ -88,7 +102,7 @@ func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr return } for i := 0; i != read; { - written, err := proxy.listener.WriteToUDP(readBuf[i:read], clientAddr) + written, _, err := proxy.listener.WriteMsgUDP(readBuf[i:read], oob, clientAddr) if err != nil { return } @@ -100,13 +114,19 @@ func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr // Run starts forwarding the traffic using UDP. func (proxy *UDPProxy) Run() { readBuf := make([]byte, UDPBufSize) + var oob []byte + if proxy.ipVer == ip4 { + oob = ipv4.NewControlMessage(ipv4.FlagDst) + } else { + oob = ipv6.NewControlMessage(ipv6.FlagDst) + } + for { - read, from, err := proxy.listener.ReadFromUDP(readBuf) + read, _, _, from, err := proxy.listener.ReadMsgUDP(readBuf, oob) if err != nil { - // NOTE: Apparently ReadFrom doesn't return - // ECONNREFUSED like Read do (see comment in - // UDPProxy.replyLoop) - if !isClosedError(err) { + // The frontend listener socket might be closed by the signal + // handler. In that case, don't log anything - it's not an error. + if !errors.Is(err, net.ErrClosed) { log.Printf("Stopping proxy on udp/%v for udp/%v (%s)", proxy.frontendAddr, proxy.backendAddr, err) } break @@ -123,7 +143,15 @@ func (proxy *UDPProxy) Run() { continue } proxy.connTrackTable[*fromKey] = proxyConn - go proxy.replyLoop(proxyConn, from, fromKey) + + daddr, err := readDestFromCmsg(oob, proxy.ipVer) + if err != nil { + log.Printf("Failed to parse control message: %v", err) + proxy.connTrackLock.Unlock() + continue + } + + go proxy.replyLoop(proxyConn, daddr, from, fromKey) } proxy.connTrackLock.Unlock() for i := 0; i != read; { @@ -137,6 +165,35 @@ func (proxy *UDPProxy) Run() { } } +func readDestFromCmsg(oob []byte, ipVer ipVersion) (_ net.IP, err error) { + defer func() { + // In case of partial upgrade / downgrade, docker-proxy could read + // control messages from a socket which doesn't have the sockopt + // IP_PKTINFO enabled. In that case, the control message will be all-0 + // and Go's ControlMessage.Parse() will report an 'invalid header + // length' error. In that case, ignore the error and return an empty + // daddr - the kernel will pick a source address for us anyway (but + // maybe it'll be the wrong one). + if err != nil && err.Error() == "invalid header length" { + err = nil + } + }() + + if ipVer == ip4 { + cm := &ipv4.ControlMessage{} + if err := cm.Parse(oob); err != nil { + return nil, err + } + return cm.Dst, nil + } + + cm := &ipv6.ControlMessage{} + if err := cm.Parse(oob); err != nil { + return nil, err + } + return cm.Dst, nil +} + // Close stops forwarding the traffic. func (proxy *UDPProxy) Close() { proxy.listener.Close() @@ -146,13 +203,3 @@ func (proxy *UDPProxy) Close() { conn.Close() } } - -func isClosedError(err error) bool { - /* This comparison is ugly, but unfortunately, net.go doesn't export errClosing. - * See: - * http://golang.org/src/pkg/net/net.go - * https://code.google.com/p/go/issues/detail?id=4337 - * https://groups.google.com/forum/#!msg/golang-nuts/0_aaCvBmOcM/SptmDyX1XJMJ - */ - return strings.HasSuffix(err.Error(), "use of closed network connection") -} diff --git a/integration/networking/port_mapping_linux_test.go b/integration/networking/port_mapping_linux_test.go index b882a3f49b..c217596f76 100644 --- a/integration/networking/port_mapping_linux_test.go +++ b/integration/networking/port_mapping_linux_test.go @@ -128,9 +128,9 @@ func TestDisableNAT(t *testing.T) { } } -// Check that a container on one network can reach a service in a container on -// another network, via a mapped port on the host. -func TestPortMappedHairpin(t *testing.T) { +// Check that a container on one network can reach a TCP service in a container +// on another network, via a mapped port on the host. +func TestPortMappedHairpinTCP(t *testing.T) { skip.If(t, testEnv.IsRootless) ctx := setupTest(t) @@ -174,6 +174,56 @@ func TestPortMappedHairpin(t *testing.T) { assert.Check(t, is.Contains(res.Stderr.String(), "404 Not Found")) } +// Check that a container on one network can reach a UDP service in a container +// on another network, via a mapped port on the host. +// Regression test for https://github.com/moby/libnetwork/issues/1729. +func TestPortMappedHairpinUDP(t *testing.T) { + skip.If(t, testEnv.IsRootless) + + ctx := setupTest(t) + d := daemon.New(t) + d.StartWithBusybox(ctx, t) + defer d.Stop(t) + c := d.NewClientT(t) + defer c.Close() + + // Find an address on the test host. + conn, err := net.Dial("tcp4", "hub.docker.com:80") + assert.NilError(t, err) + hostAddr := conn.LocalAddr().(*net.TCPAddr).IP.String() + conn.Close() + + const serverNetName = "servernet" + network.CreateNoError(ctx, t, c, serverNetName) + defer network.RemoveNoError(ctx, t, c, serverNetName) + const clientNetName = "clientnet" + network.CreateNoError(ctx, t, c, clientNetName) + defer network.RemoveNoError(ctx, t, c, clientNetName) + + serverId := container.Run(ctx, t, c, + container.WithNetworkMode(serverNetName), + container.WithExposedPorts("54/udp"), + container.WithPortMap(nat.PortMap{"54/udp": {{HostIP: "0.0.0.0"}}}), + container.WithCmd("/bin/sh", "-c", "echo 'foobar.internal 192.168.155.23' | dnsd -c - -p 54"), + ) + defer c.ContainerRemove(ctx, serverId, containertypes.RemoveOptions{Force: true}) + + inspect := container.Inspect(ctx, t, c, serverId) + hostPort := inspect.NetworkSettings.Ports["54/udp"][0].HostPort + + // nslookup gets an answer quickly from the dns server, but then tries to + // query another DNS server (for some unknown reasons) and times out. Hence, + // we need >5s to execute this test. + clientCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + res := container.RunAttach(clientCtx, t, c, + container.WithNetworkMode(clientNetName), + container.WithCmd("nslookup", "foobar.internal", net.JoinHostPort(hostAddr, hostPort)), + container.WithAutoRemove, + ) + assert.Check(t, is.Contains(res.Stdout.String(), "192.168.155.23")) +} + // Check that a container on an IPv4-only network can have a port mapping // from a specific IPv6 host address (using docker-proxy). // Regression test for https://github.com/moby/moby/issues/48067 (which diff --git a/libnetwork/drivers/bridge/port_mapping_linux.go b/libnetwork/drivers/bridge/port_mapping_linux.go index 0c3e3b89c7..9d0fa67d65 100644 --- a/libnetwork/drivers/bridge/port_mapping_linux.go +++ b/libnetwork/drivers/bridge/port_mapping_linux.go @@ -605,6 +605,22 @@ func bindTCPOrUDP(cfg portBindingReq, port, typ, proto int) (_ portBinding, retE if domain == syscall.AF_INET6 { syscall.SetsockoptInt(sd, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 1) } + if typ == syscall.SOCK_DGRAM { + // Enable IP_PKTINFO for UDP sockets to get the destination address. + // The destination address will be used as the source address when + // sending back replies coming from the container. + lvl := syscall.IPPROTO_IP + opt := syscall.IP_PKTINFO + optName := "IP_PKTINFO" + if domain == syscall.AF_INET6 { + lvl = syscall.IPPROTO_IPV6 + opt = syscall.IPV6_RECVPKTINFO + optName = "IPV6_RECVPKTINFO" + } + if err := syscall.SetsockoptInt(sd, lvl, opt, 1); err != nil { + return portBinding{}, fmt.Errorf("failed to setsockopt(%s) for %s: %w", optName, cfg, err) + } + } if err := syscall.Bind(sd, sa); err != nil { if cfg.HostPort == cfg.HostPortEnd { return portBinding{}, fmt.Errorf("failed to bind host port for %s: %w", cfg, err)