提交 036d61f0 编写于 作者: A Anirban Chakraborty 提交者: David S. Miller

qlcnic: Code optimization patch

Optimized code resulted in achieving lower CPU utilization on transmit path
and higher throughput for small packet sizes (64 bytes).
Signed-off-by: NAnirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 b1fc6d3c
...@@ -434,50 +434,49 @@ struct qlcnic_adapter_stats { ...@@ -434,50 +434,49 @@ struct qlcnic_adapter_stats {
* be one Rcv Descriptor for normal packets, one for jumbo and may be others. * be one Rcv Descriptor for normal packets, one for jumbo and may be others.
*/ */
struct qlcnic_host_rds_ring { struct qlcnic_host_rds_ring {
u32 producer; void __iomem *crb_rcv_producer;
struct rcv_desc *desc_head;
struct qlcnic_rx_buffer *rx_buf_arr;
u32 num_desc; u32 num_desc;
u32 producer;
u32 dma_size; u32 dma_size;
u32 skb_size; u32 skb_size;
u32 flags; u32 flags;
void __iomem *crb_rcv_producer;
struct rcv_desc *desc_head;
struct qlcnic_rx_buffer *rx_buf_arr;
struct list_head free_list; struct list_head free_list;
spinlock_t lock; spinlock_t lock;
dma_addr_t phys_addr; dma_addr_t phys_addr;
}; } ____cacheline_internodealigned_in_smp;
struct qlcnic_host_sds_ring { struct qlcnic_host_sds_ring {
u32 consumer; u32 consumer;
u32 num_desc; u32 num_desc;
void __iomem *crb_sts_consumer; void __iomem *crb_sts_consumer;
void __iomem *crb_intr_mask;
struct status_desc *desc_head; struct status_desc *desc_head;
struct qlcnic_adapter *adapter; struct qlcnic_adapter *adapter;
struct napi_struct napi; struct napi_struct napi;
struct list_head free_list[NUM_RCV_DESC_RINGS]; struct list_head free_list[NUM_RCV_DESC_RINGS];
void __iomem *crb_intr_mask;
int irq; int irq;
dma_addr_t phys_addr; dma_addr_t phys_addr;
char name[IFNAMSIZ+4]; char name[IFNAMSIZ+4];
}; } ____cacheline_internodealigned_in_smp;
struct qlcnic_host_tx_ring { struct qlcnic_host_tx_ring {
u32 producer; u32 producer;
__le32 *hw_consumer;
u32 sw_consumer; u32 sw_consumer;
void __iomem *crb_cmd_producer;
u32 num_desc; u32 num_desc;
void __iomem *crb_cmd_producer;
struct netdev_queue *txq;
struct qlcnic_cmd_buffer *cmd_buf_arr;
struct cmd_desc_type0 *desc_head; struct cmd_desc_type0 *desc_head;
struct qlcnic_cmd_buffer *cmd_buf_arr;
__le32 *hw_consumer;
dma_addr_t phys_addr; dma_addr_t phys_addr;
dma_addr_t hw_cons_phys_addr; dma_addr_t hw_cons_phys_addr;
}; struct netdev_queue *txq;
} ____cacheline_internodealigned_in_smp;
/* /*
* Receive context. There is one such structure per instance of the * Receive context. There is one such structure per instance of the
...@@ -1328,8 +1327,7 @@ static const struct qlcnic_brdinfo qlcnic_boards[] = { ...@@ -1328,8 +1327,7 @@ static const struct qlcnic_brdinfo qlcnic_boards[] = {
static inline u32 qlcnic_tx_avail(struct qlcnic_host_tx_ring *tx_ring) static inline u32 qlcnic_tx_avail(struct qlcnic_host_tx_ring *tx_ring)
{ {
smp_mb(); if (likely(tx_ring->producer < tx_ring->sw_consumer))
if (tx_ring->producer < tx_ring->sw_consumer)
return tx_ring->sw_consumer - tx_ring->producer; return tx_ring->sw_consumer - tx_ring->producer;
else else
return tx_ring->sw_consumer + tx_ring->num_desc - return tx_ring->sw_consumer + tx_ring->num_desc -
......
...@@ -1861,6 +1861,7 @@ static void qlcnic_change_filter(struct qlcnic_adapter *adapter, ...@@ -1861,6 +1861,7 @@ static void qlcnic_change_filter(struct qlcnic_adapter *adapter,
vlan_req->vlan_id = vlan_id; vlan_req->vlan_id = vlan_id;
tx_ring->producer = get_next_index(producer, tx_ring->num_desc); tx_ring->producer = get_next_index(producer, tx_ring->num_desc);
smp_mb();
} }
#define QLCNIC_MAC_HASH(MAC)\ #define QLCNIC_MAC_HASH(MAC)\
...@@ -1921,58 +1922,122 @@ qlcnic_send_filter(struct qlcnic_adapter *adapter, ...@@ -1921,58 +1922,122 @@ qlcnic_send_filter(struct qlcnic_adapter *adapter,
spin_unlock(&adapter->mac_learn_lock); spin_unlock(&adapter->mac_learn_lock);
} }
static void static int
qlcnic_tso_check(struct net_device *netdev, qlcnic_tx_pkt(struct qlcnic_adapter *adapter,
struct qlcnic_host_tx_ring *tx_ring,
struct cmd_desc_type0 *first_desc, struct cmd_desc_type0 *first_desc,
struct sk_buff *skb) struct sk_buff *skb)
{ {
u8 opcode = TX_ETHER_PKT; u8 opcode = 0, hdr_len = 0;
__be16 protocol = skb->protocol; u16 flags = 0, vlan_tci = 0;
u16 flags = 0; int copied, offset, copy_len;
int copied, offset, copy_len, hdr_len = 0, tso = 0;
struct cmd_desc_type0 *hwdesc; struct cmd_desc_type0 *hwdesc;
struct vlan_ethhdr *vh; struct vlan_ethhdr *vh;
struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring;
u16 protocol = ntohs(skb->protocol);
u32 producer = tx_ring->producer; u32 producer = tx_ring->producer;
__le16 vlan_oob = first_desc->flags_opcode &
cpu_to_le16(FLAGS_VLAN_OOB); if (protocol == ETH_P_8021Q) {
vh = (struct vlan_ethhdr *)skb->data;
flags = FLAGS_VLAN_TAGGED;
vlan_tci = vh->h_vlan_TCI;
} else if (vlan_tx_tag_present(skb)) {
flags = FLAGS_VLAN_OOB;
vlan_tci = vlan_tx_tag_get(skb);
}
if (unlikely(adapter->pvid)) {
if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED))
return -EIO;
if (vlan_tci && (adapter->flags & QLCNIC_TAGGING_ENABLED))
goto set_flags;
flags = FLAGS_VLAN_OOB;
vlan_tci = adapter->pvid;
}
set_flags:
qlcnic_set_tx_vlan_tci(first_desc, vlan_tci);
qlcnic_set_tx_flags_opcode(first_desc, flags, opcode);
if (*(skb->data) & BIT_0) { if (*(skb->data) & BIT_0) {
flags |= BIT_0; flags |= BIT_0;
memcpy(&first_desc->eth_addr, skb->data, ETH_ALEN); memcpy(&first_desc->eth_addr, skb->data, ETH_ALEN);
} }
opcode = TX_ETHER_PKT;
if ((netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) && if ((adapter->netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) &&
skb_shinfo(skb)->gso_size > 0) { skb_shinfo(skb)->gso_size > 0) {
hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
first_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); first_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
first_desc->total_hdr_length = hdr_len; first_desc->total_hdr_length = hdr_len;
if (vlan_oob) {
opcode = (protocol == ETH_P_IPV6) ? TX_TCP_LSO6 : TX_TCP_LSO;
/* For LSO, we need to copy the MAC/IP/TCP headers into
* the descriptor ring */
copied = 0;
offset = 2;
if (flags & FLAGS_VLAN_OOB) {
first_desc->total_hdr_length += VLAN_HLEN; first_desc->total_hdr_length += VLAN_HLEN;
first_desc->tcp_hdr_offset = VLAN_HLEN; first_desc->tcp_hdr_offset = VLAN_HLEN;
first_desc->ip_hdr_offset = VLAN_HLEN; first_desc->ip_hdr_offset = VLAN_HLEN;
/* Only in case of TSO on vlan device */ /* Only in case of TSO on vlan device */
flags |= FLAGS_VLAN_TAGGED; flags |= FLAGS_VLAN_TAGGED;
/* Create a TSO vlan header template for firmware */
hwdesc = &tx_ring->desc_head[producer];
tx_ring->cmd_buf_arr[producer].skb = NULL;
copy_len = min((int)sizeof(struct cmd_desc_type0) -
offset, hdr_len + VLAN_HLEN);
vh = (struct vlan_ethhdr *)((char *) hwdesc + 2);
skb_copy_from_linear_data(skb, vh, 12);
vh->h_vlan_proto = htons(ETH_P_8021Q);
vh->h_vlan_TCI = htons(vlan_tci);
skb_copy_from_linear_data_offset(skb, 12,
(char *)vh + 16, copy_len - 16);
copied = copy_len - VLAN_HLEN;
offset = 0;
producer = get_next_index(producer, tx_ring->num_desc);
} }
opcode = (protocol == cpu_to_be16(ETH_P_IPV6)) ? while (copied < hdr_len) {
TX_TCP_LSO6 : TX_TCP_LSO;
tso = 1; copy_len = min((int)sizeof(struct cmd_desc_type0) -
offset, (hdr_len - copied));
hwdesc = &tx_ring->desc_head[producer];
tx_ring->cmd_buf_arr[producer].skb = NULL;
skb_copy_from_linear_data_offset(skb, copied,
(char *) hwdesc + offset, copy_len);
copied += copy_len;
offset = 0;
producer = get_next_index(producer, tx_ring->num_desc);
}
tx_ring->producer = producer;
smp_mb();
adapter->stats.lso_frames++;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
u8 l4proto; u8 l4proto;
if (protocol == cpu_to_be16(ETH_P_IP)) { if (protocol == ETH_P_IP) {
l4proto = ip_hdr(skb)->protocol; l4proto = ip_hdr(skb)->protocol;
if (l4proto == IPPROTO_TCP) if (l4proto == IPPROTO_TCP)
opcode = TX_TCP_PKT; opcode = TX_TCP_PKT;
else if (l4proto == IPPROTO_UDP) else if (l4proto == IPPROTO_UDP)
opcode = TX_UDP_PKT; opcode = TX_UDP_PKT;
} else if (protocol == cpu_to_be16(ETH_P_IPV6)) { } else if (protocol == ETH_P_IPV6) {
l4proto = ipv6_hdr(skb)->nexthdr; l4proto = ipv6_hdr(skb)->nexthdr;
if (l4proto == IPPROTO_TCP) if (l4proto == IPPROTO_TCP)
...@@ -1981,63 +2046,11 @@ qlcnic_tso_check(struct net_device *netdev, ...@@ -1981,63 +2046,11 @@ qlcnic_tso_check(struct net_device *netdev,
opcode = TX_UDPV6_PKT; opcode = TX_UDPV6_PKT;
} }
} }
first_desc->tcp_hdr_offset += skb_transport_offset(skb); first_desc->tcp_hdr_offset += skb_transport_offset(skb);
first_desc->ip_hdr_offset += skb_network_offset(skb); first_desc->ip_hdr_offset += skb_network_offset(skb);
qlcnic_set_tx_flags_opcode(first_desc, flags, opcode); qlcnic_set_tx_flags_opcode(first_desc, flags, opcode);
if (!tso) return 0;
return;
/* For LSO, we need to copy the MAC/IP/TCP headers into
* the descriptor ring
*/
copied = 0;
offset = 2;
if (vlan_oob) {
/* Create a TSO vlan header template for firmware */
hwdesc = &tx_ring->desc_head[producer];
tx_ring->cmd_buf_arr[producer].skb = NULL;
copy_len = min((int)sizeof(struct cmd_desc_type0) - offset,
hdr_len + VLAN_HLEN);
vh = (struct vlan_ethhdr *)((char *)hwdesc + 2);
skb_copy_from_linear_data(skb, vh, 12);
vh->h_vlan_proto = htons(ETH_P_8021Q);
vh->h_vlan_TCI = (__be16)swab16((u16)first_desc->vlan_TCI);
skb_copy_from_linear_data_offset(skb, 12,
(char *)vh + 16, copy_len - 16);
copied = copy_len - VLAN_HLEN;
offset = 0;
producer = get_next_index(producer, tx_ring->num_desc);
}
while (copied < hdr_len) {
copy_len = min((int)sizeof(struct cmd_desc_type0) - offset,
(hdr_len - copied));
hwdesc = &tx_ring->desc_head[producer];
tx_ring->cmd_buf_arr[producer].skb = NULL;
skb_copy_from_linear_data_offset(skb, copied,
(char *)hwdesc + offset, copy_len);
copied += copy_len;
offset = 0;
producer = get_next_index(producer, tx_ring->num_desc);
}
tx_ring->producer = producer;
barrier();
adapter->stats.lso_frames++;
} }
static int static int
...@@ -2088,39 +2101,21 @@ qlcnic_map_tx_skb(struct pci_dev *pdev, ...@@ -2088,39 +2101,21 @@ qlcnic_map_tx_skb(struct pci_dev *pdev,
return -ENOMEM; return -ENOMEM;
} }
static int static void
qlcnic_check_tx_tagging(struct qlcnic_adapter *adapter, qlcnic_unmap_buffers(struct pci_dev *pdev, struct sk_buff *skb,
struct sk_buff *skb, struct qlcnic_cmd_buffer *pbuf)
struct cmd_desc_type0 *first_desc)
{ {
u8 opcode = 0; struct qlcnic_skb_frag *nf = &pbuf->frag_array[0];
u16 flags = 0; int nr_frags = skb_shinfo(skb)->nr_frags;
__be16 protocol = skb->protocol; int i;
struct vlan_ethhdr *vh;
if (protocol == cpu_to_be16(ETH_P_8021Q)) { for (i = 0; i < nr_frags; i++) {
vh = (struct vlan_ethhdr *)skb->data; nf = &pbuf->frag_array[i+1];
protocol = vh->h_vlan_encapsulated_proto; pci_unmap_page(pdev, nf->dma, nf->length, PCI_DMA_TODEVICE);
flags = FLAGS_VLAN_TAGGED;
qlcnic_set_tx_vlan_tci(first_desc, ntohs(vh->h_vlan_TCI));
} else if (vlan_tx_tag_present(skb)) {
flags = FLAGS_VLAN_OOB;
qlcnic_set_tx_vlan_tci(first_desc, vlan_tx_tag_get(skb));
} }
if (unlikely(adapter->pvid)) {
if (first_desc->vlan_TCI &&
!(adapter->flags & QLCNIC_TAGGING_ENABLED))
return -EIO;
if (first_desc->vlan_TCI &&
(adapter->flags & QLCNIC_TAGGING_ENABLED))
goto set_flags;
flags = FLAGS_VLAN_OOB; nf = &pbuf->frag_array[0];
qlcnic_set_tx_vlan_tci(first_desc, adapter->pvid); pci_unmap_single(pdev, nf->dma, skb_headlen(skb), PCI_DMA_TODEVICE);
}
set_flags:
qlcnic_set_tx_flags_opcode(first_desc, flags, opcode);
return 0;
} }
static inline void static inline void
...@@ -2144,7 +2139,7 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -2144,7 +2139,7 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
int i, k; int i, k;
u32 producer; u32 producer;
int frag_count, no_of_desc; int frag_count;
u32 num_txd = tx_ring->num_desc; u32 num_txd = tx_ring->num_desc;
if (!test_bit(__QLCNIC_DEV_UP, &adapter->state)) { if (!test_bit(__QLCNIC_DEV_UP, &adapter->state)) {
...@@ -2161,12 +2156,8 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -2161,12 +2156,8 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
frag_count = skb_shinfo(skb)->nr_frags + 1; frag_count = skb_shinfo(skb)->nr_frags + 1;
/* 4 fragments per cmd des */
no_of_desc = (frag_count + 3) >> 2;
if (unlikely(qlcnic_tx_avail(tx_ring) <= TX_STOP_THRESH)) { if (unlikely(qlcnic_tx_avail(tx_ring) <= TX_STOP_THRESH)) {
netif_stop_queue(netdev); netif_stop_queue(netdev);
smp_mb();
if (qlcnic_tx_avail(tx_ring) > TX_STOP_THRESH) if (qlcnic_tx_avail(tx_ring) > TX_STOP_THRESH)
netif_start_queue(netdev); netif_start_queue(netdev);
else { else {
...@@ -2183,9 +2174,6 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -2183,9 +2174,6 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
first_desc = hwdesc = &tx_ring->desc_head[producer]; first_desc = hwdesc = &tx_ring->desc_head[producer];
qlcnic_clear_cmddesc((u64 *)hwdesc); qlcnic_clear_cmddesc((u64 *)hwdesc);
if (qlcnic_check_tx_tagging(adapter, skb, first_desc))
goto drop_packet;
if (qlcnic_map_tx_skb(pdev, skb, pbuf)) { if (qlcnic_map_tx_skb(pdev, skb, pbuf)) {
adapter->stats.tx_dma_map_error++; adapter->stats.tx_dma_map_error++;
goto drop_packet; goto drop_packet;
...@@ -2229,8 +2217,10 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -2229,8 +2217,10 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
} }
tx_ring->producer = get_next_index(producer, num_txd); tx_ring->producer = get_next_index(producer, num_txd);
smp_mb();
qlcnic_tso_check(netdev, tx_ring, first_desc, skb); if (unlikely(qlcnic_tx_pkt(adapter, first_desc, skb)))
goto unwind_buff;
if (qlcnic_mac_learn) if (qlcnic_mac_learn)
qlcnic_send_filter(adapter, tx_ring, first_desc, skb); qlcnic_send_filter(adapter, tx_ring, first_desc, skb);
...@@ -2242,6 +2232,8 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -2242,6 +2232,8 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
unwind_buff:
qlcnic_unmap_buffers(pdev, skb, pbuf);
drop_packet: drop_packet:
adapter->stats.txdropped++; adapter->stats.txdropped++;
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册