/* * nwfilter_learnipaddr.c: support for learning IP address used by a VM * on an interface * * Copyright (C) 2011, 2013, 2014 Red Hat, Inc. * Copyright (C) 2010 IBM Corp. * Copyright (C) 2010 Stefan Berger * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see * . * * Author: Stefan Berger */ #include #ifdef HAVE_LIBPCAP # include #endif #include #include #include #include #include #include #include #include "internal.h" #include "intprops.h" #include "virbuffer.h" #include "viralloc.h" #include "virlog.h" #include "datatypes.h" #include "virnetdev.h" #include "virerror.h" #include "virthread.h" #include "conf/nwfilter_params.h" #include "conf/domain_conf.h" #include "nwfilter_gentech_driver.h" #include "nwfilter_ebiptables_driver.h" #include "nwfilter_ipaddrmap.h" #include "nwfilter_learnipaddr.h" #include "virstring.h" #define VIR_FROM_THIS VIR_FROM_NWFILTER VIR_LOG_INIT("nwfilter.nwfilter_learnipaddr"); #define IFINDEX2STR(VARNAME, ifindex) \ char VARNAME[INT_BUFSIZE_BOUND(ifindex)]; \ snprintf(VARNAME, sizeof(VARNAME), "%d", ifindex); #define PKT_TIMEOUT_MS 500 /* ms */ /* structure of an ARP request/reply message */ struct f_arphdr { struct arphdr arphdr; uint8_t ar_sha[ETH_ALEN]; uint32_t ar_sip; uint8_t ar_tha[ETH_ALEN]; uint32_t ar_tip; } ATTRIBUTE_PACKED; struct dhcp_option { uint8_t code; uint8_t len; uint8_t value[0]; /* length varies */ } ATTRIBUTE_PACKED; /* structure representing DHCP message */ struct dhcp { uint8_t op; uint8_t htype; uint8_t hlen; uint8_t hops; uint32_t xid; uint16_t secs; uint16_t flags; uint32_t ciaddr; uint32_t yiaddr; uint32_t siaddr; uint32_t giaddr; uint8_t chaddr[16]; uint8_t zeroes[192]; uint32_t magic; struct dhcp_option options[0]; } ATTRIBUTE_PACKED; #define DHCP_MSGT_DHCPOFFER 2 #define DHCP_MSGT_DHCPACK 5 #define DHCP_OPT_BCASTADDRESS 28 #define DHCP_OPT_MESSAGETYPE 53 struct ether_vlan_header { uint8_t dhost[ETH_ALEN]; uint8_t shost[ETH_ALEN]; uint16_t vlan_type; uint16_t vlan_flags; uint16_t ether_type; } ATTRIBUTE_PACKED; static virMutex pendingLearnReqLock = VIR_MUTEX_INITIALIZER; static virHashTablePtr pendingLearnReq; static virMutex ifaceMapLock = VIR_MUTEX_INITIALIZER; static virHashTablePtr ifaceLockMap; typedef struct _virNWFilterIfaceLock virNWFilterIfaceLock; typedef virNWFilterIfaceLock *virNWFilterIfaceLockPtr; struct _virNWFilterIfaceLock { char ifname[IF_NAMESIZE]; virMutex lock; int refctr; }; static bool threadsTerminate; int virNWFilterLockIface(const char *ifname) { virNWFilterIfaceLockPtr ifaceLock; virMutexLock(&ifaceMapLock); ifaceLock = virHashLookup(ifaceLockMap, ifname); if (!ifaceLock) { if (VIR_ALLOC(ifaceLock) < 0) goto err_exit; if (virMutexInitRecursive(&ifaceLock->lock) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("mutex initialization failed")); VIR_FREE(ifaceLock); goto err_exit; } if (virStrcpyStatic(ifaceLock->ifname, ifname) == NULL) { virReportError(VIR_ERR_INTERNAL_ERROR, _("interface name %s does not fit into " "buffer "), ifaceLock->ifname); VIR_FREE(ifaceLock); goto err_exit; } while (virHashAddEntry(ifaceLockMap, ifname, ifaceLock)) { VIR_FREE(ifaceLock); goto err_exit; } ifaceLock->refctr = 0; } ifaceLock->refctr++; virMutexUnlock(&ifaceMapLock); virMutexLock(&ifaceLock->lock); return 0; err_exit: virMutexUnlock(&ifaceMapLock); return -1; } void virNWFilterUnlockIface(const char *ifname) { virNWFilterIfaceLockPtr ifaceLock; virMutexLock(&ifaceMapLock); ifaceLock = virHashLookup(ifaceLockMap, ifname); if (ifaceLock) { virMutexUnlock(&ifaceLock->lock); ifaceLock->refctr--; if (ifaceLock->refctr == 0) virHashRemoveEntry(ifaceLockMap, ifname); } virMutexUnlock(&ifaceMapLock); } static void virNWFilterIPAddrLearnReqFree(virNWFilterIPAddrLearnReqPtr req) { if (!req) return; VIR_FREE(req->filtername); virNWFilterHashTableFree(req->filterparams); VIR_FREE(req); } #if HAVE_LIBPCAP static int virNWFilterRegisterLearnReq(virNWFilterIPAddrLearnReqPtr req) { int res = -1; IFINDEX2STR(ifindex_str, req->ifindex); virMutexLock(&pendingLearnReqLock); if (!virHashLookup(pendingLearnReq, ifindex_str)) res = virHashAddEntry(pendingLearnReq, ifindex_str, req); virMutexUnlock(&pendingLearnReqLock); return res; } #endif int virNWFilterTerminateLearnReq(const char *ifname) { int rc = -1; int ifindex; virNWFilterIPAddrLearnReqPtr req; /* It's possible that it's already been removed as a result of * virNWFilterDeregisterLearnReq during learnIPAddressThread() exit */ if (virNetDevExists(ifname) != 1) { virResetLastError(); return 0; } if (virNetDevGetIndex(ifname, &ifindex) < 0) { virResetLastError(); return rc; } IFINDEX2STR(ifindex_str, ifindex); virMutexLock(&pendingLearnReqLock); req = virHashLookup(pendingLearnReq, ifindex_str); if (req) { rc = 0; req->terminate = true; } virMutexUnlock(&pendingLearnReqLock); return rc; } virNWFilterIPAddrLearnReqPtr virNWFilterLookupLearnReq(int ifindex) { void *res; IFINDEX2STR(ifindex_str, ifindex); virMutexLock(&pendingLearnReqLock); res = virHashLookup(pendingLearnReq, ifindex_str); virMutexUnlock(&pendingLearnReqLock); return res; } static void freeLearnReqEntry(void *payload, const void *name ATTRIBUTE_UNUSED) { virNWFilterIPAddrLearnReqFree(payload); } #ifdef HAVE_LIBPCAP static virNWFilterIPAddrLearnReqPtr virNWFilterDeregisterLearnReq(int ifindex) { virNWFilterIPAddrLearnReqPtr res; IFINDEX2STR(ifindex_str, ifindex); virMutexLock(&pendingLearnReqLock); res = virHashSteal(pendingLearnReq, ifindex_str); virMutexUnlock(&pendingLearnReqLock); return res; } #endif #ifdef HAVE_LIBPCAP static void procDHCPOpts(struct dhcp *dhcp, int dhcp_opts_len, uint32_t *vmaddr, uint32_t *bcastaddr, enum howDetect *howDetected) { struct dhcp_option *dhcpopt = &dhcp->options[0]; while (dhcp_opts_len >= 2) { switch (dhcpopt->code) { case DHCP_OPT_BCASTADDRESS: /* Broadcast address */ if (dhcp_opts_len >= 6) { VIR_WARNINGS_NO_CAST_ALIGN uint32_t *tmp = (uint32_t *)&dhcpopt->value; VIR_WARNINGS_RESET (*bcastaddr) = ntohl(*tmp); } break; case DHCP_OPT_MESSAGETYPE: /* Message type */ if (dhcp_opts_len >= 3) { uint8_t *val = (uint8_t *)&dhcpopt->value; switch (*val) { case DHCP_MSGT_DHCPACK: case DHCP_MSGT_DHCPOFFER: *vmaddr = dhcp->yiaddr; *howDetected = DETECT_DHCP; break; } } } dhcp_opts_len -= (2 + dhcpopt->len); dhcpopt = (struct dhcp_option*)((char *)dhcpopt + 2 + dhcpopt->len); } } /** * learnIPAddressThread * arg: pointer to virNWFilterIPAddrLearnReq structure * * Learn the IP address being used on an interface. Use ARP Request and * Reply messages, DHCP offers and the first IP packet being sent from * the VM to detect the IP address it is using. Detects only one IP address * per interface (IP aliasing not supported). The method on how the * IP address is detected can be chosen through flags. DETECT_DHCP will * require that the IP address is detected from a DHCP OFFER, DETECT_STATIC * will require that the IP address was taken from an ARP packet or an IPv4 * packet. Both flags can be set at the same time. */ static void learnIPAddressThread(void *arg) { char errbuf[PCAP_ERRBUF_SIZE] = {0}; pcap_t *handle = NULL; struct bpf_program fp; struct pcap_pkthdr header; const u_char *packet; struct ether_header *ether_hdr; struct ether_vlan_header *vlan_hdr; virNWFilterIPAddrLearnReqPtr req = arg; uint32_t vmaddr = 0, bcastaddr = 0; unsigned int ethHdrSize; char *listen_if = (strlen(req->linkdev) != 0) ? req->linkdev : req->ifname; int dhcp_opts_len; char macaddr[VIR_MAC_STRING_BUFLEN]; virBuffer buf = VIR_BUFFER_INITIALIZER; char *filter = NULL; uint16_t etherType; bool showError = true; enum howDetect howDetected = 0; virNWFilterTechDriverPtr techdriver = req->techdriver; if (virNWFilterLockIface(req->ifname) < 0) goto err_no_lock; req->status = 0; /* anything change to the VM's interface -- check at least once */ if (virNetDevValidateConfig(req->ifname, NULL, req->ifindex) <= 0) { virResetLastError(); req->status = ENODEV; goto done; } handle = pcap_open_live(listen_if, BUFSIZ, 0, PKT_TIMEOUT_MS, errbuf); if (handle == NULL) { VIR_DEBUG("Couldn't open device %s: %s", listen_if, errbuf); req->status = ENODEV; goto done; } virMacAddrFormat(&req->macaddr, macaddr); switch (req->howDetect) { case DETECT_DHCP: if (techdriver->applyDHCPOnlyRules(req->ifname, &req->macaddr, NULL, false) < 0) { req->status = EINVAL; goto done; } virBufferAddLit(&buf, "src port 67 and dst port 68"); break; default: if (techdriver->applyBasicRules(req->ifname, &req->macaddr) < 0) { req->status = EINVAL; goto done; } virBufferAsprintf(&buf, "ether host %s or ether dst ff:ff:ff:ff:ff:ff", macaddr); } if (virBufferError(&buf)) { req->status = ENOMEM; goto done; } filter = virBufferContentAndReset(&buf); if (pcap_compile(handle, &fp, filter, 1, 0) != 0) { VIR_DEBUG("Couldn't compile filter '%s'", filter); req->status = EINVAL; goto done; } if (pcap_setfilter(handle, &fp) != 0) { VIR_DEBUG("Couldn't set filter '%s'", filter); req->status = EINVAL; pcap_freecode(&fp); goto done; } pcap_freecode(&fp); while (req->status == 0 && vmaddr == 0) { packet = pcap_next(handle, &header); if (!packet) { if (threadsTerminate || req->terminate) { req->status = ECANCELED; showError = false; break; } /* check whether VM's dev is still there */ if (virNetDevValidateConfig(req->ifname, NULL, req->ifindex) <= 0) { virResetLastError(); req->status = ENODEV; showError = false; break; } continue; } if (header.len >= sizeof(struct ether_header)) { ether_hdr = (struct ether_header*)packet; switch (ntohs(ether_hdr->ether_type)) { case ETHERTYPE_IP: ethHdrSize = sizeof(struct ether_header); etherType = ntohs(ether_hdr->ether_type); break; case ETHERTYPE_VLAN: ethHdrSize = sizeof(struct ether_vlan_header); vlan_hdr = (struct ether_vlan_header *)packet; if (ntohs(vlan_hdr->ether_type) != ETHERTYPE_IP || header.len < ethHdrSize) continue; etherType = ntohs(vlan_hdr->ether_type); break; default: continue; } if (virMacAddrCmpRaw(&req->macaddr, ether_hdr->ether_shost) == 0) { /* packets from the VM */ if (etherType == ETHERTYPE_IP && (header.len >= ethHdrSize + sizeof(struct iphdr))) { VIR_WARNINGS_NO_CAST_ALIGN struct iphdr *iphdr = (struct iphdr*)(packet + ethHdrSize); VIR_WARNINGS_RESET vmaddr = iphdr->saddr; /* skip mcast addresses (224.0.0.0 - 239.255.255.255), * class E (240.0.0.0 - 255.255.255.255, includes eth. * bcast) and zero address in DHCP Requests */ if ((ntohl(vmaddr) & 0xe0000000) == 0xe0000000 || vmaddr == 0) { vmaddr = 0; continue; } howDetected = DETECT_STATIC; } else if (etherType == ETHERTYPE_ARP && (header.len >= ethHdrSize + sizeof(struct f_arphdr))) { VIR_WARNINGS_NO_CAST_ALIGN struct f_arphdr *arphdr = (struct f_arphdr*)(packet + ethHdrSize); VIR_WARNINGS_RESET switch (ntohs(arphdr->arphdr.ar_op)) { case ARPOP_REPLY: vmaddr = arphdr->ar_sip; howDetected = DETECT_STATIC; break; case ARPOP_REQUEST: vmaddr = arphdr->ar_tip; howDetected = DETECT_STATIC; break; } } } else if (virMacAddrCmpRaw(&req->macaddr, ether_hdr->ether_dhost) == 0 || /* allow Broadcast replies from DHCP server */ virMacAddrIsBroadcastRaw(ether_hdr->ether_dhost)) { /* packets to the VM */ if (etherType == ETHERTYPE_IP && (header.len >= ethHdrSize + sizeof(struct iphdr))) { VIR_WARNINGS_NO_CAST_ALIGN struct iphdr *iphdr = (struct iphdr*)(packet + ethHdrSize); VIR_WARNINGS_RESET if ((iphdr->protocol == IPPROTO_UDP) && (header.len >= ethHdrSize + iphdr->ihl * 4 + sizeof(struct udphdr))) { VIR_WARNINGS_NO_CAST_ALIGN struct udphdr *udphdr = (struct udphdr *) ((char *)iphdr + iphdr->ihl * 4); VIR_WARNINGS_RESET if (ntohs(udphdr->source) == 67 && ntohs(udphdr->dest) == 68 && header.len >= ethHdrSize + iphdr->ihl * 4 + sizeof(struct udphdr) + sizeof(struct dhcp)) { struct dhcp *dhcp = (struct dhcp *) ((char *)udphdr + sizeof(udphdr)); if (dhcp->op == 2 /* BOOTREPLY */ && virMacAddrCmpRaw( &req->macaddr, &dhcp->chaddr[0]) == 0) { dhcp_opts_len = header.len - (ethHdrSize + iphdr->ihl * 4 + sizeof(struct udphdr) + sizeof(struct dhcp)); procDHCPOpts(dhcp, dhcp_opts_len, &vmaddr, &bcastaddr, &howDetected); } } } } } } if (vmaddr && (req->howDetect & howDetected) == 0) { vmaddr = 0; howDetected = 0; } } /* while */ done: VIR_FREE(filter); if (handle) pcap_close(handle); if (req->status == 0) { int ret; virSocketAddr sa; sa.len = sizeof(sa.data.inet4); sa.data.inet4.sin_family = AF_INET; sa.data.inet4.sin_addr.s_addr = vmaddr; char *inetaddr; /* It is necessary to unlock interface here to avoid updateMutex and * interface ordering deadlocks. Otherwise we are going to * instantiate the filter, which will try to lock updateMutex, and * some other thread instantiating a filter in parallel is holding * updateMutex and is trying to lock interface, both will deadlock. * Also it is safe to unlock interface here because we stopped * capturing and applied necessary rules on the interface, while * instantiating a new filter doesn't require a locked interface.*/ virNWFilterUnlockIface(req->ifname); if ((inetaddr = virSocketAddrFormat(&sa)) != NULL) { if (virNWFilterIPAddrMapAddIPAddr(req->ifname, inetaddr) < 0) { VIR_ERROR(_("Failed to add IP address %s to IP address " "cache for interface %s"), inetaddr, req->ifname); } ret = virNWFilterInstantiateFilterLate(req->driver, NULL, req->ifname, req->ifindex, req->linkdev, &req->macaddr, req->filtername, req->filterparams); VIR_DEBUG("Result from applying firewall rules on " "%s with IP addr %s : %d", req->ifname, inetaddr, ret); VIR_FREE(inetaddr); } } else { if (showError) virReportSystemError(req->status, _("encountered an error on interface %s " "index %d"), req->ifname, req->ifindex); techdriver->applyDropAllRules(req->ifname); virNWFilterUnlockIface(req->ifname); } VIR_DEBUG("pcap thread terminating for interface %s", req->ifname); err_no_lock: virNWFilterDeregisterLearnReq(req->ifindex); virNWFilterIPAddrLearnReqFree(req); } /** * virNWFilterLearnIPAddress * @techdriver : driver to build firewalls * @ifname: the name of the interface * @ifindex: the index of the interface * @linkdev : the name of the link device; currently only used in case of a * macvtap device * @macaddr : the MAC address of the interface * @filtername : the name of the top-level filter to apply to the interface * once its IP address has been detected * @driver : the network filter driver * @howDetect : the method on how the thread is supposed to detect the * IP address; must choose any of the available flags * * Instruct to learn the IP address being used on a given interface (ifname). * Unless there already is a thread attempting to learn the IP address * being used on the interface, a thread is started that will listen on * the traffic being sent on the interface (or link device) with the * MAC address that is provided. Will then launch the application of the * firewall rules on the interface. */ int virNWFilterLearnIPAddress(virNWFilterTechDriverPtr techdriver, const char *ifname, int ifindex, const char *linkdev, const virMacAddr *macaddr, const char *filtername, virNWFilterHashTablePtr filterparams, virNWFilterDriverStatePtr driver, enum howDetect howDetect) { int rc; virThread thread; virNWFilterIPAddrLearnReqPtr req = NULL; virNWFilterHashTablePtr ht = NULL; if (howDetect == 0) return -1; if (!techdriver->canApplyBasicRules()) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("IP parameter must be provided since " "snooping the IP address does not work " "possibly due to missing tools")); return -1; } if (VIR_ALLOC(req) < 0) goto err_no_req; ht = virNWFilterHashTableCreate(0); if (ht == NULL) goto err_free_req; if (virNWFilterHashTablePutAll(filterparams, ht) < 0) goto err_free_ht; if (VIR_STRDUP(req->filtername, filtername) < 0) goto err_free_ht; if (virStrcpyStatic(req->ifname, ifname) == NULL) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Destination buffer for ifname ('%s') " "not large enough"), ifname); goto err_free_ht; } if (linkdev) { if (virStrcpyStatic(req->linkdev, linkdev) == NULL) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Destination buffer for linkdev ('%s') " "not large enough"), linkdev); goto err_free_ht; } } req->ifindex = ifindex; virMacAddrSet(&req->macaddr, macaddr); req->driver = driver; req->filterparams = ht; ht = NULL; req->howDetect = howDetect; req->techdriver = techdriver; rc = virNWFilterRegisterLearnReq(req); if (rc < 0) goto err_free_req; if (virThreadCreate(&thread, false, learnIPAddressThread, req) != 0) goto err_dereg_req; return 0; err_dereg_req: virNWFilterDeregisterLearnReq(ifindex); err_free_ht: virNWFilterHashTableFree(ht); err_free_req: virNWFilterIPAddrLearnReqFree(req); err_no_req: return -1; } #else int virNWFilterLearnIPAddress(virNWFilterTechDriverPtr techdriver ATTRIBUTE_UNUSED, const char *ifname ATTRIBUTE_UNUSED, int ifindex ATTRIBUTE_UNUSED, const char *linkdev ATTRIBUTE_UNUSED, const virMacAddr *macaddr ATTRIBUTE_UNUSED, const char *filtername ATTRIBUTE_UNUSED, virNWFilterHashTablePtr filterparams ATTRIBUTE_UNUSED, virNWFilterDriverStatePtr driver ATTRIBUTE_UNUSED, enum howDetect howDetect ATTRIBUTE_UNUSED) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("IP parameter must be given since libvirt " "was not compiled with IP address learning " "support")); return -1; } #endif /* HAVE_LIBPCAP */ /** * virNWFilterLearnInit * Initialization of this layer */ int virNWFilterLearnInit(void) { if (pendingLearnReq) return 0; VIR_DEBUG("Initializing IP address learning"); threadsTerminate = false; pendingLearnReq = virHashCreate(0, freeLearnReqEntry); if (!pendingLearnReq) return -1; ifaceLockMap = virHashCreate(0, virHashValueFree); if (!ifaceLockMap) { virNWFilterLearnShutdown(); return -1; } return 0; } void virNWFilterLearnThreadsTerminate(bool allowNewThreads) { threadsTerminate = true; while (virHashSize(pendingLearnReq) != 0) usleep((PKT_TIMEOUT_MS * 1000) / 3); if (allowNewThreads) threadsTerminate = false; } /** * virNWFilterLearnShutdown * Shutdown of this layer */ void virNWFilterLearnShutdown(void) { if (!pendingLearnReq) return; virNWFilterLearnThreadsTerminate(false); virHashFree(pendingLearnReq); pendingLearnReq = NULL; virHashFree(ifaceLockMap); ifaceLockMap = NULL; }