diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index f069c1b1075357f92532985b9f281aa585b73ff0..b1e3baf8d450755ea17630132aa42418cbd5dd40 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -72,12 +72,6 @@ /* Supported Rx Buffer Sizes */ #define IXGBE_RXBUFFER_512 512 /* Used for packet split */ -#define IXGBE_RXBUFFER_2K 2048 -#define IXGBE_RXBUFFER_3K 3072 -#define IXGBE_RXBUFFER_4K 4096 -#define IXGBE_RXBUFFER_7K 7168 -#define IXGBE_RXBUFFER_8K 8192 -#define IXGBE_RXBUFFER_15K 15360 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ /* @@ -168,7 +162,6 @@ struct ixgbe_rx_buffer { struct sk_buff *skb; dma_addr_t dma; struct page *page; - dma_addr_t page_dma; unsigned int page_offset; }; @@ -193,21 +186,15 @@ struct ixgbe_rx_queue_stats { u64 csum_err; }; -enum ixbge_ring_state_t { +enum ixgbe_ring_state_t { __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_DETECT_HANG, __IXGBE_HANG_CHECK_ARMED, - __IXGBE_RX_PS_ENABLED, __IXGBE_RX_RSC_ENABLED, __IXGBE_RX_CSUM_UDP_ZERO_ERR, + __IXGBE_RX_FCOE_BUFSZ, }; -#define ring_is_ps_enabled(ring) \ - test_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) -#define set_ring_ps_enabled(ring) \ - set_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) -#define clear_ring_ps_enabled(ring) \ - clear_bit(__IXGBE_RX_PS_ENABLED, &(ring)->state) #define check_for_tx_hang(ring) \ test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) #define set_check_for_tx_hang(ring) \ @@ -233,7 +220,6 @@ struct ixgbe_ring { u8 __iomem *tail; u16 count; /* amount of descriptors */ - u16 rx_buf_len; u8 queue_index; /* needed for multiqueue queue management */ u8 reg_idx; /* holds the special value that gets @@ -241,8 +227,13 @@ struct ixgbe_ring { * associated with this ring, which is * different for DCB and RSS modes */ - u8 atr_sample_rate; - u8 atr_count; + union { + struct { + u8 atr_sample_rate; + u8 atr_count; + }; + u16 next_to_alloc; + }; u16 next_to_use; u16 next_to_clean; @@ -287,6 +278,22 @@ struct ixgbe_ring_feature { int mask; } ____cacheline_internodealigned_in_smp; +/* + * FCoE requires that all Rx buffers be over 2200 bytes in length. Since + * this is twice the size of a half page we need to double the page order + * for FCoE enabled Rx queues. + */ +#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ + return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0; +} +#else +#define ixgbe_rx_pg_order(_ring) 0 +#endif +#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) +#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) + struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ unsigned int total_bytes; /* total bytes processed this int */ @@ -554,7 +561,7 @@ struct ixgbe_cb { }; dma_addr_t dma; u16 append_cnt; - bool delay_unmap; + bool page_released; }; #define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 24f7291dbe5763bd9c8b97fee5cd62d2de869c91..b09e67cc9d6e5a81b9a0b3f638a849e46efacff9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "ixgbe.h" @@ -1615,7 +1616,6 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter) rx_ring->dev = &adapter->pdev->dev; rx_ring->netdev = adapter->netdev; rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx; - rx_ring->rx_buf_len = IXGBE_RXBUFFER_2K; err = ixgbe_setup_rx_resources(rx_ring); if (err) { @@ -1718,13 +1718,15 @@ static bool ixgbe_check_lbtest_frame(struct ixgbe_rx_buffer *rx_buffer, frame_size >>= 1; - data = rx_buffer->skb->data; + data = kmap(rx_buffer->page) + rx_buffer->page_offset; if (data[3] != 0xFF || data[frame_size + 10] != 0xBE || data[frame_size + 12] != 0xAF) match = false; + kunmap(rx_buffer->page); + return match; } @@ -1746,17 +1748,22 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, /* check Rx buffer */ rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; - /* unmap Rx buffer, will be remapped by alloc_rx_buffers */ - dma_unmap_single(rx_ring->dev, - rx_buffer->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - rx_buffer->dma = 0; + /* sync Rx buffer for CPU read */ + dma_sync_single_for_cpu(rx_ring->dev, + rx_buffer->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); /* verify contents of skb */ if (ixgbe_check_lbtest_frame(rx_buffer, size)) count++; + /* sync Rx buffer for device write */ + dma_sync_single_for_device(rx_ring->dev, + rx_buffer->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + /* unmap buffer on Tx side */ tx_buffer = &tx_ring->tx_buffer_info[tx_ntc]; ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index cae763c57dc7b8203b4b368561f237741962ac5c..e97ef4591adec6064b554571926b50281135f0ff 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -469,17 +469,7 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, phys_to_virt(rx_buffer_info->dma), - rx_ring->rx_buf_len, true); - - if (rx_ring->rx_buf_len - < IXGBE_RXBUFFER_2K) - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt( - rx_buffer_info->page_dma + - rx_buffer_info->page_offset - ), - PAGE_SIZE/2, true); + ixgbe_rx_bufsz(rx_ring), true); } } @@ -1006,6 +996,7 @@ static inline void ixgbe_rx_hash(struct ixgbe_ring *ring, skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); } +#ifdef IXGBE_FCOE /** * ixgbe_rx_is_fcoe - check the rx desc for incoming pkt type * @adapter: address of board private structure @@ -1024,6 +1015,7 @@ static inline bool ixgbe_rx_is_fcoe(struct ixgbe_adapter *adapter, IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT))); } +#endif /* IXGBE_FCOE */ /** * ixgbe_rx_checksum - indicate in skb if hw indicated a good cksum * @ring: structure containing ring specific data @@ -1051,7 +1043,7 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, return; if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { - u16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; /* * 82599 errata, UDP frames with a 0 checksum can be marked as @@ -1072,6 +1064,9 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, static inline void ixgbe_release_rx_desc(struct ixgbe_ring *rx_ring, u32 val) { rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; /* * Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -1082,67 +1077,46 @@ static inline void ixgbe_release_rx_desc(struct ixgbe_ring *rx_ring, u32 val) writel(val, rx_ring->tail); } -static bool ixgbe_alloc_mapped_skb(struct ixgbe_ring *rx_ring, - struct ixgbe_rx_buffer *bi) -{ - struct sk_buff *skb = bi->skb; - dma_addr_t dma = bi->dma; - - if (dma) - return true; - - if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); - bi->skb = skb; - if (!skb) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return false; - } - } - - dma = dma_map_single(rx_ring->dev, skb->data, - rx_ring->rx_buf_len, DMA_FROM_DEVICE); - - if (dma_mapping_error(rx_ring->dev, dma)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return false; - } - - bi->dma = dma; - return true; -} - static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t page_dma = bi->page_dma; - unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); + dma_addr_t dma = bi->dma; - if (page_dma) + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(dma)) return true; - if (!page) { - page = alloc_page(GFP_ATOMIC | __GFP_COLD); - bi->page = page; + /* alloc new page for storage */ + if (likely(!page)) { + page = alloc_pages(GFP_ATOMIC | __GFP_COLD, + ixgbe_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } + bi->page = page; } - page_dma = dma_map_page(rx_ring->dev, page, - page_offset, PAGE_SIZE / 2, - DMA_FROM_DEVICE); + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + + /* + * if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + put_page(page); + bi->page = NULL; - if (dma_mapping_error(rx_ring->dev, page_dma)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } - bi->page_dma = page_dma; - bi->page_offset = page_offset; + bi->dma = dma; + bi->page_offset ^= ixgbe_rx_bufsz(rx_ring); + return true; } @@ -1157,30 +1131,23 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) struct ixgbe_rx_buffer *bi; u16 i = rx_ring->next_to_use; - /* nothing to do or no valid netdev defined */ - if (!cleaned_count || !rx_ring->netdev) + /* nothing to do */ + if (!cleaned_count) return; rx_desc = IXGBE_RX_DESC(rx_ring, i); bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; - while (cleaned_count--) { - if (!ixgbe_alloc_mapped_skb(rx_ring, bi)) + do { + if (!ixgbe_alloc_mapped_page(rx_ring, bi)) break; - /* Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. */ - if (ring_is_ps_enabled(rx_ring)) { - rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); - - if (!ixgbe_alloc_mapped_page(rx_ring, bi)) - break; - - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); - } else { - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); - } + /* + * Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; @@ -1193,7 +1160,9 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) /* clear the hdr_addr for the next_to_use descriptor */ rx_desc->read.hdr_addr = 0; - } + + cleaned_count--; + } while (cleaned_count); i += rx_ring->count; @@ -1201,90 +1170,6 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) ixgbe_release_rx_desc(rx_ring, i); } -static inline u16 ixgbe_get_hlen(union ixgbe_adv_rx_desc *rx_desc) -{ - /* HW will not DMA in data larger than the given buffer, even if it - * parses the (NFS, of course) header to be larger. In that case, it - * fills the header buffer and spills the rest into the page. - */ - u16 hdr_info = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info); - u16 hlen = (hdr_info & IXGBE_RXDADV_HDRBUFLEN_MASK) >> - IXGBE_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IXGBE_RX_HDR_SIZE) - hlen = IXGBE_RX_HDR_SIZE; - return hlen; -} - -/** - * ixgbe_merge_active_tail - merge active tail into lro skb - * @tail: pointer to active tail in frag_list - * - * This function merges the length and data of an active tail into the - * skb containing the frag_list. It resets the tail's pointer to the head, - * but it leaves the heads pointer to tail intact. - **/ -static inline struct sk_buff *ixgbe_merge_active_tail(struct sk_buff *tail) -{ - struct sk_buff *head = IXGBE_CB(tail)->head; - - if (!head) - return tail; - - head->len += tail->len; - head->data_len += tail->len; - head->truesize += tail->len; - - IXGBE_CB(tail)->head = NULL; - - return head; -} - -/** - * ixgbe_add_active_tail - adds an active tail into the skb frag_list - * @head: pointer to the start of the skb - * @tail: pointer to active tail to add to frag_list - * - * This function adds an active tail to the end of the frag list. This tail - * will still be receiving data so we cannot yet ad it's stats to the main - * skb. That is done via ixgbe_merge_active_tail. - **/ -static inline void ixgbe_add_active_tail(struct sk_buff *head, - struct sk_buff *tail) -{ - struct sk_buff *old_tail = IXGBE_CB(head)->tail; - - if (old_tail) { - ixgbe_merge_active_tail(old_tail); - old_tail->next = tail; - } else { - skb_shinfo(head)->frag_list = tail; - } - - IXGBE_CB(tail)->head = head; - IXGBE_CB(head)->tail = tail; -} - -/** - * ixgbe_close_active_frag_list - cleanup pointers on a frag_list skb - * @head: pointer to head of an active frag list - * - * This function will clear the frag_tail_tracker pointer on an active - * frag_list and returns true if the pointer was actually set - **/ -static inline bool ixgbe_close_active_frag_list(struct sk_buff *head) -{ - struct sk_buff *tail = IXGBE_CB(head)->tail; - - if (!tail) - return false; - - ixgbe_merge_active_tail(tail); - - IXGBE_CB(head)->tail = NULL; - - return true; -} - /** * ixgbe_get_headlen - determine size of header for RSC/LRO/GRO/FCOE * @data: pointer to the start of the headers @@ -1346,7 +1231,7 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, /* record next protocol */ nexthdr = hdr.ipv4->protocol; hdr.network += hlen; -#ifdef CONFIG_FCOE +#ifdef IXGBE_FCOE } else if (protocol == __constant_htons(ETH_P_FCOE)) { if ((hdr.network - data) > (max_len - FCOE_HEADER_LEN)) return max_len; @@ -1409,7 +1294,7 @@ static void ixgbe_get_rsc_cnt(struct ixgbe_ring *rx_ring, static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, struct sk_buff *skb) { - u16 hdr_len = ixgbe_get_headlen(skb->data, skb_headlen(skb)); + u16 hdr_len = skb_headlen(skb); /* set gso_size to avoid messing up TCP MSS */ skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len), @@ -1473,150 +1358,346 @@ static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, netif_rx(skb); } +/** + * ixgbe_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(IXGBE_RX_DESC(rx_ring, ntc)); + + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + + /* append_cnt indicates packet is RSC, if so fetch nextp */ + if (IXGBE_CB(skb)->append_cnt) { + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= IXGBE_RXDADV_NEXTP_MASK; + ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + } + + /* place skb in next buffer to be received */ + rx_ring->rx_buffer_info[ntc].skb = skb; + rx_ring->rx_stats.non_eop_descs++; + + return true; +} + +/** + * ixgbe_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Check for corrupted packet headers caused by senders on the local L2 + * embedded NIC switch not setting up their Tx Descriptors right. These + * should be very rare. + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + struct net_device *netdev = rx_ring->netdev; + unsigned char *va; + unsigned int pull_len; + + /* if the page was released unmap it, else just sync our portion */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } else { + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + frag->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + IXGBE_CB(skb)->dma = 0; + + /* verify that the packet does not have any known errors */ + if (unlikely(ixgbe_test_staterr(rx_desc, + IXGBE_RXDADV_ERR_FRAME_ERR_MASK) && + !(netdev->features & NETIF_F_RXALL))) { + dev_kfree_skb_any(skb); + return true; + } + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = skb_frag_size(frag); + if (pull_len > 256) + pull_len = ixgbe_get_headlen(va, pull_len); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; + + /* + * if we sucked the frag empty then we should free it, + * if there are other frags here something is screwed up in hardware + */ + if (skb_frag_size(frag) == 0) { + BUG_ON(skb_shinfo(skb)->nr_frags != 1); + skb_shinfo(skb)->nr_frags = 0; + __skb_frag_unref(frag); + skb->truesize -= ixgbe_rx_bufsz(rx_ring); + } + + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; + + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } + + return false; +} + +/** + * ixgbe_can_reuse_page - determine if we can reuse a page + * @rx_buffer: pointer to rx_buffer containing the page we want to reuse + * + * Returns true if page can be reused in another Rx buffer + **/ +static inline bool ixgbe_can_reuse_page(struct ixgbe_rx_buffer *rx_buffer) +{ + struct page *page = rx_buffer->page; + + /* if we are only owner of page and it is local we can reuse it */ + return likely(page_count(page) == 1) && + likely(page_to_nid(page) == numa_node_id()); +} + +/** + * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Syncronizes page for reuse by the adapter + **/ +static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *old_buff) +{ + struct ixgbe_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + u16 bufsz = ixgbe_rx_bufsz(rx_ring); + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_buff->page = old_buff->page; + new_buff->dma = old_buff->dma; + + /* flip page offset to other buffer and store to new_buff */ + new_buff->page_offset = old_buff->page_offset ^ bufsz; + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, + new_buff->page_offset, bufsz, + DMA_FROM_DEVICE); + + /* bump ref count on page before it is given to the stack */ + get_page(new_buff->page); +} + +/** + * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into + * + * This function is based on skb_add_rx_frag. I would have used that + * function however it doesn't handle the truesize case correctly since we + * are allocating more memory than might be used for a single receive. + **/ +static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + struct sk_buff *skb, int size) +{ + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, + rx_buffer->page, rx_buffer->page_offset, + size); + skb->len += size; + skb->data_len += size; + skb->truesize += ixgbe_rx_bufsz(rx_ring); +} + +/** + * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf + * @q_vector: structure containing interrupt and ring information + * @rx_ring: rx descriptor ring to transact packets on + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the syste. + * + * Returns true if all work is completed without reaching budget + **/ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, int budget) { - union ixgbe_adv_rx_desc *rx_desc, *next_rxd; - struct ixgbe_rx_buffer *rx_buffer_info; - struct sk_buff *skb; unsigned int total_rx_bytes = 0, total_rx_packets = 0; - const int current_node = numa_node_id(); - struct ixgbe_adapter *adapter = q_vector->adapter; #ifdef IXGBE_FCOE + struct ixgbe_adapter *adapter = q_vector->adapter; int ddp_bytes = 0; #endif /* IXGBE_FCOE */ - u16 i; - u16 cleaned_count = 0; + u16 cleaned_count = ixgbe_desc_unused(rx_ring); - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC(rx_ring, i); + do { + struct ixgbe_rx_buffer *rx_buffer; + union ixgbe_adv_rx_desc *rx_desc; + struct sk_buff *skb; + struct page *page; + u16 ntc; - while (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) { - u32 upper_len = 0; + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { + ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } - rmb(); /* read descriptor and rx_buffer_info after status DD */ + ntc = rx_ring->next_to_clean; + rx_desc = IXGBE_RX_DESC(rx_ring, ntc); + rx_buffer = &rx_ring->rx_buffer_info[ntc]; - rx_buffer_info = &rx_ring->rx_buffer_info[i]; + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) + break; - skb = rx_buffer_info->skb; - rx_buffer_info->skb = NULL; - prefetch(skb->data); + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); - /* linear means we are building an skb from multiple pages */ - if (!skb_is_nonlinear(skb)) { - u16 hlen; - if (ring_is_ps_enabled(rx_ring)) { - hlen = ixgbe_get_hlen(rx_desc); - upper_len = le16_to_cpu(rx_desc->wb.upper.length); - } else { - hlen = le16_to_cpu(rx_desc->wb.upper.length); + page = rx_buffer->page; + prefetchw(page); + + skb = rx_buffer->skb; + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + break; } - skb_put(skb, hlen); + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); /* * Delay unmapping of the first packet. It carries the * header information, HW may still access the header - * after writeback. Only unmap it when EOP is reached + * after the writeback. Only unmap it when EOP is + * reached */ - if (!IXGBE_CB(skb)->head) { - IXGBE_CB(skb)->delay_unmap = true; - IXGBE_CB(skb)->dma = rx_buffer_info->dma; - } else { - skb = ixgbe_merge_active_tail(skb); - dma_unmap_single(rx_ring->dev, - rx_buffer_info->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - } - rx_buffer_info->dma = 0; + IXGBE_CB(skb)->dma = rx_buffer->dma; } else { - /* assume packet split since header is unmapped */ - upper_len = le16_to_cpu(rx_desc->wb.upper.length); + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); } - if (upper_len) { - dma_unmap_page(rx_ring->dev, - rx_buffer_info->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_buffer_info->page_dma = 0; - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buffer_info->page, - rx_buffer_info->page_offset, - upper_len); - - if ((page_count(rx_buffer_info->page) == 1) && - (page_to_nid(rx_buffer_info->page) == current_node)) - get_page(rx_buffer_info->page); - else - rx_buffer_info->page = NULL; + /* pull page into skb */ + ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, + le16_to_cpu(rx_desc->wb.upper.length)); - skb->len += upper_len; - skb->data_len += upper_len; - skb->truesize += PAGE_SIZE / 2; + if (ixgbe_can_reuse_page(rx_buffer)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); } - ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); + /* clear contents of buffer_info */ + rx_buffer->skb = NULL; + rx_buffer->dma = 0; + rx_buffer->page = NULL; - i++; - if (i == rx_ring->count) - i = 0; + ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); - next_rxd = IXGBE_RX_DESC(rx_ring, i); - prefetch(next_rxd); cleaned_count++; - if ((!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) { - struct ixgbe_rx_buffer *next_buffer; - u32 nextp; - - if (IXGBE_CB(skb)->append_cnt) { - nextp = le32_to_cpu( - rx_desc->wb.upper.status_error); - nextp >>= IXGBE_RXDADV_NEXTP_SHIFT; - } else { - nextp = i; - } - - next_buffer = &rx_ring->rx_buffer_info[nextp]; - - if (ring_is_ps_enabled(rx_ring)) { - rx_buffer_info->skb = next_buffer->skb; - rx_buffer_info->dma = next_buffer->dma; - next_buffer->skb = skb; - next_buffer->dma = 0; - } else { - struct sk_buff *next_skb = next_buffer->skb; - ixgbe_add_active_tail(skb, next_skb); - IXGBE_CB(next_skb)->head = skb; - } - rx_ring->rx_stats.non_eop_descs++; - goto next_desc; - } - - dma_unmap_single(rx_ring->dev, - IXGBE_CB(skb)->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - IXGBE_CB(skb)->dma = 0; - IXGBE_CB(skb)->delay_unmap = false; - - if (ixgbe_close_active_frag_list(skb) && - !IXGBE_CB(skb)->append_cnt) { - /* if we got here without RSC the packet is invalid */ - dev_kfree_skb_any(skb); - goto next_desc; - } + /* place incomplete frames back on ring for completion */ + if (ixgbe_is_non_eop(rx_ring, rx_desc, skb)) + continue; - /* ERR_MASK will only have valid bits if EOP set */ - if (unlikely(ixgbe_test_staterr(rx_desc, - IXGBE_RXDADV_ERR_FRAME_ERR_MASK) && - !(adapter->netdev->features & NETIF_F_RXALL))) { - dev_kfree_skb_any(skb); - goto next_desc; - } + /* verify the packet layout is correct */ + if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb)) + continue; /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1631,32 +1712,16 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb); if (!ddp_bytes) { dev_kfree_skb_any(skb); - goto next_desc; + continue; } } + #endif /* IXGBE_FCOE */ ixgbe_rx_skb(q_vector, skb); + /* update budget accounting */ budget--; -next_desc: - if (!budget) - break; - - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { - ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); - cleaned_count = 0; - } - - /* use prefetched values */ - rx_desc = next_rxd; - } - - rx_ring->next_to_clean = i; - cleaned_count = ixgbe_desc_unused(rx_ring); - - if (cleaned_count) - ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + } while (likely(budget)); #ifdef IXGBE_FCOE /* include DDPed FCoE data */ @@ -1671,8 +1736,8 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_bytes += ddp_bytes; total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); } -#endif /* IXGBE_FCOE */ +#endif /* IXGBE_FCOE */ u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -1680,6 +1745,9 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, q_vector->rx.total_packets += total_rx_packets; q_vector->rx.total_bytes += total_rx_bytes; + if (cleaned_count) + ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + return !!budget; } @@ -2635,18 +2703,12 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl |= (IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) & IXGBE_SRRCTL_BSIZEHDR_MASK; - if (ring_is_ps_enabled(rx_ring)) { -#if (PAGE_SIZE / 2) > IXGBE_MAX_RXBUFFER - srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; +#if PAGE_SIZE > IXGBE_MAX_RXBUFFER + srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; #else - srrctl |= (PAGE_SIZE / 2) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; #endif - srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; - } else { - srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; - srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - } + srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(&adapter->hw, IXGBE_SRRCTL(reg_idx), srrctl); } @@ -2729,13 +2791,11 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, { struct ixgbe_hw *hw = &adapter->hw; u32 rscctrl; - int rx_buf_len; u8 reg_idx = ring->reg_idx; if (!ring_is_rsc_enabled(ring)) return; - rx_buf_len = ring->rx_buf_len; rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(reg_idx)); rscctrl |= IXGBE_RSCCTL_RSCEN; /* @@ -2743,24 +2803,13 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, * total size of max desc * buf_len is not greater * than 65536 */ - if (ring_is_ps_enabled(ring)) { -#if (PAGE_SIZE < 8192) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (PAGE_SIZE < 16384) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#elif (PAGE_SIZE < 32768) - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; +#if (PAGE_SIZE <= 8192) + rscctrl |= IXGBE_RSCCTL_MAXDESC_16; +#elif (PAGE_SIZE <= 16384) + rscctrl |= IXGBE_RSCCTL_MAXDESC_8; #else - rscctrl |= IXGBE_RSCCTL_MAXDESC_1; + rscctrl |= IXGBE_RSCCTL_MAXDESC_4; #endif - } else { - if (rx_buf_len <= IXGBE_RXBUFFER_4K) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; - else if (rx_buf_len <= IXGBE_RXBUFFER_8K) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; - else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; - } IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl); } @@ -2977,23 +3026,10 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - int rx_buf_len; struct ixgbe_ring *rx_ring; int i; u32 mhadd, hlreg0; - /* Decide whether to use packet split mode or not */ - /* On by default */ - adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED; - - /* Do not use packet split if we're in SR-IOV Mode */ - if (adapter->num_vfs) - adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; - - /* Disable packet split due to 82599 erratum #45 */ - if (hw->mac.type == ixgbe_mac_82599EB) - adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; - #ifdef IXGBE_FCOE /* adjust max frame to be able to do baby jumbo for FCoE */ if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && @@ -3012,27 +3048,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) /* MHADD will allow an extra 4 bytes past for vlan tagged frames */ max_frame += VLAN_HLEN; - /* Set the RX buffer length according to the mode */ - if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) { - rx_buf_len = IXGBE_RX_HDR_SIZE; - } else { - if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) && - (netdev->mtu <= ETH_DATA_LEN)) - rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; - /* - * Make best use of allocation by using all but 1K of a - * power of 2 allocation that will be used for skb->head. - */ - else if (max_frame <= IXGBE_RXBUFFER_3K) - rx_buf_len = IXGBE_RXBUFFER_3K; - else if (max_frame <= IXGBE_RXBUFFER_7K) - rx_buf_len = IXGBE_RXBUFFER_7K; - else if (max_frame <= IXGBE_RXBUFFER_15K) - rx_buf_len = IXGBE_RXBUFFER_15K; - else - rx_buf_len = IXGBE_MAX_RXBUFFER; - } - hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); /* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */ hlreg0 |= IXGBE_HLREG0_JUMBOEN; @@ -3044,32 +3059,16 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) */ for (i = 0; i < adapter->num_rx_queues; i++) { rx_ring = adapter->rx_ring[i]; - rx_ring->rx_buf_len = rx_buf_len; - - if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) - set_ring_ps_enabled(rx_ring); - else - clear_ring_ps_enabled(rx_ring); - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_ring_rsc_enabled(rx_ring); else clear_ring_rsc_enabled(rx_ring); - #ifdef IXGBE_FCOE if (netdev->features & NETIF_F_FCOE_MTU) { struct ixgbe_ring_feature *f; f = &adapter->ring_feature[RING_F_FCOE]; - if ((i >= f->mask) && (i < f->mask + f->indices)) { - clear_ring_ps_enabled(rx_ring); - if (rx_buf_len < IXGBE_FCOE_JUMBO_FRAME_SIZE) - rx_ring->rx_buf_len = - IXGBE_FCOE_JUMBO_FRAME_SIZE; - } else if (!ring_is_rsc_enabled(rx_ring) && - !ring_is_ps_enabled(rx_ring)) { - rx_ring->rx_buf_len = - IXGBE_FCOE_JUMBO_FRAME_SIZE; - } + if ((i >= f->mask) && (i < f->mask + f->indices)) + set_bit(__IXGBE_RX_FCOE_BUFSZ, &rx_ring->state); } #endif /* IXGBE_FCOE */ } @@ -3990,6 +3989,27 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) IXGBE_RAH_AV); } +/** + * ixgbe_init_rx_page_offset - initialize page offset values for Rx buffers + * @rx_ring: ring to setup + * + * On many IA platforms the L1 cache has a critical stride of 4K, this + * results in each receive buffer starting in the same cache set. To help + * reduce the pressure on this cache set we can interleave the offsets so + * that only every other buffer will be in the same cache set. + **/ +static void ixgbe_init_rx_page_offset(struct ixgbe_ring *rx_ring) +{ + struct ixgbe_rx_buffer *rx_buffer = rx_ring->rx_buffer_info; + u16 i; + + for (i = 0; i < rx_ring->count; i += 2) { + rx_buffer[0].page_offset = 0; + rx_buffer[1].page_offset = ixgbe_rx_bufsz(rx_ring); + rx_buffer = &rx_buffer[2]; + } +} + /** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from @@ -4006,49 +4026,40 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { - struct ixgbe_rx_buffer *rx_buffer_info; - - rx_buffer_info = &rx_ring->rx_buffer_info[i]; - if (rx_buffer_info->dma) { - dma_unmap_single(rx_ring->dev, rx_buffer_info->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - rx_buffer_info->dma = 0; - } - if (rx_buffer_info->skb) { - struct sk_buff *skb = rx_buffer_info->skb; - rx_buffer_info->skb = NULL; - /* We need to clean up RSC frag lists */ - skb = ixgbe_merge_active_tail(skb); - ixgbe_close_active_frag_list(skb); - if (IXGBE_CB(skb)->delay_unmap) { - dma_unmap_single(dev, - IXGBE_CB(skb)->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - IXGBE_CB(skb)->dma = 0; - IXGBE_CB(skb)->delay_unmap = false; + struct ixgbe_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_buffer_info[i]; + if (rx_buffer->skb) { + struct sk_buff *skb = rx_buffer->skb; + if (IXGBE_CB(skb)->page_released) { + dma_unmap_page(dev, + IXGBE_CB(skb)->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; } dev_kfree_skb(skb); } - if (!rx_buffer_info->page) - continue; - if (rx_buffer_info->page_dma) { - dma_unmap_page(dev, rx_buffer_info->page_dma, - PAGE_SIZE / 2, DMA_FROM_DEVICE); - rx_buffer_info->page_dma = 0; - } - put_page(rx_buffer_info->page); - rx_buffer_info->page = NULL; - rx_buffer_info->page_offset = 0; + rx_buffer->skb = NULL; + if (rx_buffer->dma) + dma_unmap_page(dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + rx_buffer->dma = 0; + if (rx_buffer->page) + put_page(rx_buffer->page); + rx_buffer->page = NULL; } size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; memset(rx_ring->rx_buffer_info, 0, size); + ixgbe_init_rx_page_offset(rx_ring); + /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -5412,6 +5423,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + ixgbe_init_rx_page_offset(rx_ring); + return 0; err: vfree(rx_ring->rx_buffer_info);