From 0f7436ca54c9db2d5460bd54e7613f64f4727102 Mon Sep 17 00:00:00 2001 From: Maxim Perevedentsev Date: Tue, 20 Oct 2015 18:44:18 +0300 Subject: [PATCH] network: wait for DAD to finish for bridge IPv6 addresses commit db488c79 assumed that dnsmasq would complete IPv6 DAD before daemonizing, but in reality it doesn't wait, which creates problems when libvirt's bridge driver sets the matching "dummy tap device" to IFF_DOWN prior to DAD completing. This patch waits for DAD completion by periodically polling the kernel using netlink to check whether there are any IPv6 addresses assigned to bridge which have a 'tentative' state (if there are any in this state, then DAD hasn't yet finished). After DAD is finished, execution continues. To avoid an endless hang in case something was wrong with the kernel's DAD, we wait a maximum of 5 seconds. --- src/libvirt_private.syms | 1 + src/network/bridge_driver.c | 36 +++++++++++- src/util/virnetdev.c | 110 ++++++++++++++++++++++++++++++++++++ src/util/virnetdev.h | 2 + 4 files changed, 147 insertions(+), 2 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4b7e1657f8..a835f18e9e 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1792,6 +1792,7 @@ virNetDevSetRcvMulti; virNetDevSetupControl; virNetDevSysfsFile; virNetDevValidateConfig; +virNetDevWaitDadFinish; # util/virnetdevbandwidth.h diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c index cd9a51e1c2..f838671ad4 100644 --- a/src/network/bridge_driver.c +++ b/src/network/bridge_driver.c @@ -2025,6 +2025,32 @@ networkAddRouteToBridge(virNetworkObjPtr network, return 0; } +static int +networkWaitDadFinish(virNetworkObjPtr network) +{ + virNetworkIpDefPtr ipdef; + virSocketAddrPtr *addrs = NULL, addr = NULL; + size_t naddrs = 0; + int ret = -1; + + VIR_DEBUG("Begin waiting for IPv6 DAD on network %s", network->def->name); + + while ((ipdef = virNetworkDefGetIpByIndex(network->def, + AF_INET6, naddrs))) { + addr = &ipdef->address; + if (VIR_APPEND_ELEMENT_COPY(addrs, naddrs, addr) < 0) + goto cleanup; + } + + ret = (naddrs == 0) ? 0 : virNetDevWaitDadFinish(addrs, naddrs); + + cleanup: + VIR_FREE(addrs); + VIR_DEBUG("Finished waiting for IPv6 DAD on network %s with status %d", + network->def->name, ret); + return ret; +} + static int networkStartNetworkVirtual(virNetworkDriverStatePtr driver, virNetworkObjPtr network) @@ -2159,8 +2185,14 @@ networkStartNetworkVirtual(virNetworkDriverStatePtr driver, if (v6present && networkStartRadvd(driver, network) < 0) goto err4; - /* DAD has happened (dnsmasq waits for it), dnsmasq is now bound to the - * bridge's IPv6 address, so we can now set the dummy tun down. + /* dnsmasq does not wait for DAD to complete before daemonizing, + * so we need to wait for it ourselves. + */ + if (v6present && networkWaitDadFinish(network) < 0) + goto err4; + + /* DAD has finished, dnsmasq is now bound to the + * bridge's IPv6 address, so we can set the dummy tun down. */ if (tapfd >= 0) { if (virNetDevSetOnline(macTapIfName, false) < 0) diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index 579ff3bd7c..b84437e464 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -96,6 +96,7 @@ VIR_LOG_INIT("util.netdev"); # define FEATURE_BIT_IS_SET(blocks, index, field) \ (FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index)) #endif +#define VIR_DAD_WAIT_TIMEOUT 5 /* seconds */ typedef enum { VIR_MCAST_TYPE_INDEX_TOKEN, @@ -1305,6 +1306,106 @@ int virNetDevClearIPAddress(const char *ifname, return ret; } +/* return true if there is a known address with 'tentative' flag set */ +static bool +virNetDevParseDadStatus(struct nlmsghdr *nlh, int len, + virSocketAddrPtr *addrs, size_t count) +{ + struct ifaddrmsg *ifaddrmsg_ptr; + unsigned int ifaddrmsg_len; + struct rtattr *rtattr_ptr; + size_t i; + struct in6_addr *addr; + for (; NLMSG_OK(nlh, len); nlh = NLMSG_NEXT(nlh, len)) { + if (NLMSG_PAYLOAD(nlh, 0) < sizeof(struct ifaddrmsg)) { + /* Message without payload is the last one. */ + break; + } + + ifaddrmsg_ptr = (struct ifaddrmsg *)NLMSG_DATA(nlh); + if (!(ifaddrmsg_ptr->ifa_flags & IFA_F_TENTATIVE)) { + /* Not tentative: we are not interested in this entry. */ + continue; + } + + ifaddrmsg_len = IFA_PAYLOAD(nlh); + rtattr_ptr = (struct rtattr *) IFA_RTA(ifaddrmsg_ptr); + for (; RTA_OK(rtattr_ptr, ifaddrmsg_len); + rtattr_ptr = RTA_NEXT(rtattr_ptr, ifaddrmsg_len)) { + if (RTA_PAYLOAD(rtattr_ptr) != sizeof(struct in6_addr)) { + /* No address: ignore. */ + continue; + } + + /* We check only known addresses. */ + for (i = 0; i < count; i++) { + addr = &addrs[i]->data.inet6.sin6_addr; + if (!memcmp(addr, RTA_DATA(rtattr_ptr), + sizeof(struct in6_addr))) { + /* We found matching tentative address. */ + return true; + } + } + } + } + return false; +} + +/* return after DAD finishes for all known IPv6 addresses or an error */ +int +virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count) +{ + struct nl_msg *nlmsg = NULL; + struct ifaddrmsg ifa; + struct nlmsghdr *resp = NULL; + unsigned int recvbuflen; + int ret = -1; + bool dad = true; + time_t max_time = time(NULL) + VIR_DAD_WAIT_TIMEOUT; + + if (!(nlmsg = nlmsg_alloc_simple(RTM_GETADDR, + NLM_F_REQUEST | NLM_F_DUMP))) { + virReportOOMError(); + return -1; + } + + memset(&ifa, 0, sizeof(ifa)); + /* DAD is for IPv6 adresses only. */ + ifa.ifa_family = AF_INET6; + if (nlmsg_append(nlmsg, &ifa, sizeof(ifa), NLMSG_ALIGNTO) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("allocated netlink buffer is too small")); + goto cleanup; + } + + /* Periodically query netlink until DAD finishes on all known addresses. */ + while (dad && time(NULL) < max_time) { + if (virNetlinkCommand(nlmsg, &resp, &recvbuflen, 0, 0, + NETLINK_ROUTE, 0) < 0) + goto cleanup; + + if (virNetlinkGetErrorCode(resp, recvbuflen) < 0) { + virReportError(VIR_ERR_SYSTEM_ERROR, "%s", + _("error reading DAD state information")); + goto cleanup; + } + + /* Parse response. */ + dad = virNetDevParseDadStatus(resp, recvbuflen, addrs, count); + if (dad) + usleep(1000 * 10); + + VIR_FREE(resp); + } + /* Check timeout. */ + ret = dad ? -1 : 0; + + cleanup: + VIR_FREE(resp); + nlmsg_free(nlmsg); + return ret; +} + #else /* defined(__linux__) && defined(HAVE_LIBNL) */ int virNetDevSetIPAddress(const char *ifname, @@ -1424,6 +1525,15 @@ int virNetDevClearIPAddress(const char *ifname, return ret; } +/* return after DAD finishes for all known IPv6 addresses or an error */ +int +virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count) +{ + virReportSystemError(ENOSYS, "%s", + _("Unable to wait for IPv6 DAD on this platform")); + return -1; +} + #endif /* defined(__linux__) && defined(HAVE_LIBNL) */ /** diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index a27504b8d9..9108be6b61 100644 --- a/src/util/virnetdev.h +++ b/src/util/virnetdev.h @@ -105,6 +105,8 @@ int virNetDevClearIPAddress(const char *ifname, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; int virNetDevGetIPAddress(const char *ifname, virSocketAddrPtr addr) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; +int virNetDevWaitDadFinish(virSocketAddrPtr *addrs, size_t count) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); int virNetDevSetMAC(const char *ifname, -- GitLab