diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index f6a1cd9d72a617e69efa5ee15c1da045fb81503f..72ab9ac34a300328c63db3558b154d87e179a1af 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -174,11 +174,9 @@ struct igb_tx_buffer { }; struct igb_rx_buffer { - struct sk_buff *skb; dma_addr_t dma; struct page *page; - dma_addr_t page_dma; - u32 page_offset; + unsigned int page_offset; }; struct igb_tx_queue_stats { @@ -251,6 +249,7 @@ struct igb_ring { }; /* RX */ struct { + struct sk_buff *skb; struct igb_rx_queue_stats rx_stats; struct u64_stats_sync rx_syncp; }; @@ -451,13 +450,11 @@ static inline void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) { - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(q_vector, skb->data, skb); - skb_pull(skb, IGB_TS_HDR_LEN); - } else if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS)) { + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS) && + !igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) igb_ptp_rx_rgtstamp(q_vector, skb); - } } + extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); #endif /* CONFIG_IGB_PTP */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 2ea012849825224af910ba7189aa850b58610806..0faac423bd5b5e4502ab8e38c59050f8f3878f69 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "igb.h" @@ -1685,16 +1686,24 @@ static void igb_create_lbtest_frame(struct sk_buff *skb, memset(&skb->data[frame_size + 12], 0xAF, 1); } -static int igb_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size) +static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer, + unsigned int frame_size) { - frame_size /= 2; - if (*(skb->data + 3) == 0xFF) { - if ((*(skb->data + frame_size + 10) == 0xBE) && - (*(skb->data + frame_size + 12) == 0xAF)) { - return 0; - } - } - return 13; + unsigned char *data; + bool match = true; + + frame_size >>= 1; + + data = kmap(rx_buffer->page) + rx_buffer->page_offset; + + if (data[3] != 0xFF || + data[frame_size + 10] != 0xBE || + data[frame_size + 12] != 0xAF) + match = false; + + kunmap(rx_buffer->page); + + return match; } static int igb_clean_test_rings(struct igb_ring *rx_ring, @@ -1720,12 +1729,12 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring, /* unmap rx buffer, will be remapped by alloc_rx_buffers */ dma_unmap_single(rx_ring->dev, rx_buffer_info->dma, - IGB_RX_HDR_LEN, + PAGE_SIZE / 2, DMA_FROM_DEVICE); rx_buffer_info->dma = 0; /* verify contents of skb */ - if (!igb_check_lbtest_frame(rx_buffer_info->skb, size)) + if (igb_check_lbtest_frame(rx_buffer_info, size)) count++; /* unmap buffer on tx side */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c611cffa788ff4d8c1018a9b705b6affa97443db..665eafa401d4b8e861f8680657bed653a4aadad3 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -534,25 +534,21 @@ static void igb_dump(struct igb_adapter *adapter) if (staterr & E1000_RXD_STAT_DD) { /* Descriptor Done */ - pr_info("%s[0x%03X] %016llX %016llX -------" - "--------- %p%s\n", "RWB", i, + pr_info("%s[0x%03X] %016llX %016llX ---------------- %s\n", + "RWB", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - buffer_info->skb, next_desc); + next_desc); } else { - pr_info("%s[0x%03X] %016llX %016llX %016llX" - " %p%s\n", "R ", i, + pr_info("%s[0x%03X] %016llX %016llX %016llX %s\n", + "R ", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), (u64)buffer_info->dma, - buffer_info->skb, next_desc); + next_desc); if (netif_msg_pktdata(adapter) && - buffer_info->dma && buffer_info->skb) { - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, - 16, 1, buffer_info->skb->data, - IGB_RX_HDR_LEN, true); + buffer_info->dma && buffer_info->page) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, @@ -3111,7 +3107,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, #else srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; #endif - srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; #ifdef CONFIG_IGB_PTP if (hw->mac.type >= e1000_82580) srrctl |= E1000_SRRCTL_TIMESTAMP; @@ -3305,36 +3301,27 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) unsigned long size; u16 i; + if (rx_ring->skb) + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + if (!rx_ring->rx_buffer_info) return; /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; - if (buffer_info->dma) { - dma_unmap_single(rx_ring->dev, - buffer_info->dma, - IGB_RX_HDR_LEN, - DMA_FROM_DEVICE); - buffer_info->dma = 0; - } - if (buffer_info->skb) { - dev_kfree_skb(buffer_info->skb); - buffer_info->skb = NULL; - } - if (buffer_info->page_dma) { + if (buffer_info->dma) dma_unmap_page(rx_ring->dev, - buffer_info->page_dma, + buffer_info->dma, PAGE_SIZE / 2, DMA_FROM_DEVICE); - buffer_info->page_dma = 0; - } - if (buffer_info->page) { - put_page(buffer_info->page); - buffer_info->page = NULL; - buffer_info->page_offset = 0; - } + buffer_info->dma = 0; + if (buffer_info->page) + __free_page(buffer_info->page); + buffer_info->page = NULL; + buffer_info->page_offset = 0; } size = sizeof(struct igb_rx_buffer) * rx_ring->count; @@ -5906,23 +5893,219 @@ static void igb_rx_vlan(struct igb_ring *ring, } } -static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc) +/** + * igb_get_headlen - determine size of header for LRO/GRO + * @data: pointer to the start of the headers + * @max_len: total length of section to find headers in + * + * This function is meant to determine the length of headers that will + * be recognized by hardware for LRO, and GRO offloads. The main + * motivation of doing this is to only perform one pull for IPv4 TCP + * packets so that we can do basic things like calculating the gso_size + * based on the average data per packet. + **/ +static unsigned int igb_get_headlen(unsigned char *data, + unsigned int max_len) +{ + union { + unsigned char *network; + /* l2 headers */ + struct ethhdr *eth; + struct vlan_hdr *vlan; + /* l3 headers */ + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + } hdr; + __be16 protocol; + u8 nexthdr = 0; /* default to not TCP */ + u8 hlen; + + /* this should never happen, but better safe than sorry */ + if (max_len < ETH_HLEN) + return max_len; + + /* initialize network frame pointer */ + hdr.network = data; + + /* set first protocol and move network header forward */ + protocol = hdr.eth->h_proto; + hdr.network += ETH_HLEN; + + /* handle any vlan tag if present */ + if (protocol == __constant_htons(ETH_P_8021Q)) { + if ((hdr.network - data) > (max_len - VLAN_HLEN)) + return max_len; + + protocol = hdr.vlan->h_vlan_encapsulated_proto; + hdr.network += VLAN_HLEN; + } + + /* handle L3 protocols */ + if (protocol == __constant_htons(ETH_P_IP)) { + if ((hdr.network - data) > (max_len - sizeof(struct iphdr))) + return max_len; + + /* access ihl as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct iphdr)) + return hdr.network - data; + + /* record next protocol */ + nexthdr = hdr.ipv4->protocol; + hdr.network += hlen; + } else if (protocol == __constant_htons(ETH_P_IPV6)) { + if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) + return max_len; + + /* record next protocol */ + nexthdr = hdr.ipv6->nexthdr; + hdr.network += sizeof(struct ipv6hdr); + } else { + return hdr.network - data; + } + + /* finally sort out TCP */ + if (nexthdr == IPPROTO_TCP) { + if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) + return max_len; + + /* access doff as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[12] & 0xF0) >> 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct tcphdr)) + return hdr.network - data; + + hdr.network += hlen; + } else if (nexthdr == IPPROTO_UDP) { + if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) + return max_len; + + hdr.network += sizeof(struct udphdr); + } + + /* + * If everything has gone correctly hdr.network should be the + * data section of the packet and will be the end of the header. + * If not then it probably represents the end of the last recognized + * header. + */ + if ((hdr.network - data) < max_len) + return hdr.network - data; + else + return max_len; +} + +/** + * igb_pull_tail - igb specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being adjusted + * + * This function is an igb specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void igb_pull_tail(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - /* HW will not DMA in data larger than the given buffer, even if it - * parses the (NFS, of course) header to be larger. In that case, it - * fills the header buffer and spills the rest into the page. + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) */ - u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) & - E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IGB_RX_HDR_LEN) - hlen = IGB_RX_HDR_LEN; - return hlen; + va = skb_frag_address(frag); + +#ifdef CONFIG_IGB_PTP + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + /* retrieve timestamp from buffer */ + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + + /* update pointers to remove timestamp header */ + skb_frag_size_sub(frag, IGB_TS_HDR_LEN); + frag->page_offset += IGB_TS_HDR_LEN; + skb->data_len -= IGB_TS_HDR_LEN; + skb->len -= IGB_TS_HDR_LEN; + + /* move va to start of packet data */ + va += IGB_TS_HDR_LEN; + } + +#endif + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * igb_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool igb_cleanup_headers(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + + if (unlikely((igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { + struct net_device *netdev = rx_ring->netdev; + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + igb_pull_tail(rx_ring, rx_desc, skb); + + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; + + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } + + return false; } static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) { struct igb_ring *rx_ring = q_vector->rx.ring; union e1000_adv_rx_desc *rx_desc; + struct sk_buff *skb = rx_ring->skb; const int current_node = numa_node_id(); unsigned int total_bytes = 0, total_packets = 0; u16 cleaned_count = igb_desc_unused(rx_ring); @@ -5932,12 +6115,9 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; - struct sk_buff *skb = buffer_info->skb; + struct page *page; union e1000_adv_rx_desc *next_rxd; - buffer_info->skb = NULL; - prefetch(skb->data); - i++; if (i == rx_ring->count) i = 0; @@ -5952,52 +6132,57 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) */ rmb(); - if (!skb_is_nonlinear(skb)) { - __skb_put(skb, igb_get_hlen(rx_desc)); - dma_unmap_single(rx_ring->dev, buffer_info->dma, - IGB_RX_HDR_LEN, - DMA_FROM_DEVICE); - buffer_info->dma = 0; - } - - if (rx_desc->wb.upper.length) { - u16 length = le16_to_cpu(rx_desc->wb.upper.length); + page = buffer_info->page; + prefetchw(page); - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - buffer_info->page, - buffer_info->page_offset, - length); + if (likely(!skb)) { + void *page_addr = page_address(page) + + buffer_info->page_offset; - skb->len += length; - skb->data_len += length; - skb->truesize += PAGE_SIZE / 2; + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif - if ((page_count(buffer_info->page) != 1) || - (page_to_nid(buffer_info->page) != current_node)) - buffer_info->page = NULL; - else - get_page(buffer_info->page); + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IGB_RX_HDR_LEN); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_failed++; + break; + } - dma_unmap_page(rx_ring->dev, buffer_info->page_dma, - PAGE_SIZE / 2, DMA_FROM_DEVICE); - buffer_info->page_dma = 0; + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); } - if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) { - struct igb_rx_buffer *next_buffer; - next_buffer = &rx_ring->rx_buffer_info[i]; - buffer_info->skb = next_buffer->skb; - buffer_info->dma = next_buffer->dma; - next_buffer->skb = skb; - next_buffer->dma = 0; - goto next_desc; - } + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + buffer_info->page_offset, + le16_to_cpu(rx_desc->wb.upper.length), + PAGE_SIZE / 2); - if (unlikely((igb_test_staterr(rx_desc, - E1000_RXDEXT_ERR_FRAME_ERR_MASK)) - && !(rx_ring->netdev->features & NETIF_F_RXALL))) { - dev_kfree_skb_any(skb); + if ((page_count(buffer_info->page) != 1) || + (page_to_nid(buffer_info->page) != current_node)) + buffer_info->page = NULL; + else + get_page(buffer_info->page); + + dma_unmap_page(rx_ring->dev, buffer_info->dma, + PAGE_SIZE / 2, DMA_FROM_DEVICE); + buffer_info->dma = 0; + + if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) goto next_desc; + + /* verify the packet layout is correct */ + if (igb_cleanup_headers(rx_ring, rx_desc, skb)) { + skb = NULL; + continue; } #ifdef CONFIG_IGB_PTP @@ -6010,10 +6195,14 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) total_bytes += skb->len; total_packets++; + skb_record_rx_queue(skb, rx_ring->queue_index); skb->protocol = eth_type_trans(skb, rx_ring->netdev); napi_gro_receive(&q_vector->napi, skb); + /* reset skb pointer */ + skb = NULL; + budget--; next_desc: if (!budget) @@ -6030,6 +6219,9 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) rx_desc = next_rxd; } + /* place incomplete frames back on ring for completion */ + rx_ring->skb = skb; + rx_ring->next_to_clean = i; u64_stats_update_begin(&rx_ring->rx_syncp); rx_ring->rx_stats.packets += total_packets; @@ -6044,70 +6236,37 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) return !!budget; } -static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, - struct igb_rx_buffer *bi) -{ - struct sk_buff *skb = bi->skb; - dma_addr_t dma = bi->dma; - - if (dma) - return true; - - if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IGB_RX_HDR_LEN); - bi->skb = skb; - if (!skb) { - rx_ring->rx_stats.alloc_failed++; - return false; - } - - /* initialize skb for ring */ - skb_record_rx_queue(skb, rx_ring->queue_index); - } - - dma = dma_map_single(rx_ring->dev, skb->data, - IGB_RX_HDR_LEN, DMA_FROM_DEVICE); - - if (dma_mapping_error(rx_ring->dev, dma)) { - rx_ring->rx_stats.alloc_failed++; - return false; - } - - bi->dma = dma; - return true; -} - static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, struct igb_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t page_dma = bi->page_dma; + dma_addr_t dma = bi->dma; unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); - if (page_dma) + if (dma) return true; if (!page) { - page = __skb_alloc_page(GFP_ATOMIC, bi->skb); - bi->page = page; + page = __skb_alloc_page(GFP_ATOMIC | __GFP_COLD, NULL); if (unlikely(!page)) { rx_ring->rx_stats.alloc_failed++; return false; } + bi->page = page; } - page_dma = dma_map_page(rx_ring->dev, page, - page_offset, PAGE_SIZE / 2, - DMA_FROM_DEVICE); + dma = dma_map_page(rx_ring->dev, page, + page_offset, PAGE_SIZE / 2, + DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, page_dma)) { + if (dma_mapping_error(rx_ring->dev, dma)) { rx_ring->rx_stats.alloc_failed++; return false; } - bi->page_dma = page_dma; + bi->dma = dma; bi->page_offset = page_offset; + return true; } @@ -6126,17 +6285,12 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) i -= rx_ring->count; while (cleaned_count--) { - if (!igb_alloc_mapped_skb(rx_ring, bi)) + if (!igb_alloc_mapped_page(rx_ring, bi)) break; /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); - - if (!igb_alloc_mapped_page(rx_ring, bi)) - break; - - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); rx_desc++; bi++;