提交 76a540d4 编写于 作者: A Alexander Duyck 提交者: Jeff Kirsher

fm10k: Add support for netdev offloads

This patch adds support for basic offloads including TSO, Tx checksum, Rx
checksum, Rx hash, and the same features applied to VXLAN/NVGRE tunnels.
Signed-off-by: NAlexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: NJeff Kirsher <jeffrey.t.kirsher@intel.com>
上级 aa3ac822
...@@ -342,6 +342,59 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring, ...@@ -342,6 +342,59 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
return skb; return skb;
} }
static inline void fm10k_rx_checksum(struct fm10k_ring *ring,
union fm10k_rx_desc *rx_desc,
struct sk_buff *skb)
{
skb_checksum_none_assert(skb);
/* Rx checksum disabled via ethtool */
if (!(ring->netdev->features & NETIF_F_RXCSUM))
return;
/* TCP/UDP checksum error bit is set */
if (fm10k_test_staterr(rx_desc,
FM10K_RXD_STATUS_L4E |
FM10K_RXD_STATUS_L4E2 |
FM10K_RXD_STATUS_IPE |
FM10K_RXD_STATUS_IPE2)) {
ring->rx_stats.csum_err++;
return;
}
/* It must be a TCP or UDP packet with a valid checksum */
if (fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS2))
skb->encapsulation = true;
else if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS))
return;
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
#define FM10K_RSS_L4_TYPES_MASK \
((1ul << FM10K_RSSTYPE_IPV4_TCP) | \
(1ul << FM10K_RSSTYPE_IPV4_UDP) | \
(1ul << FM10K_RSSTYPE_IPV6_TCP) | \
(1ul << FM10K_RSSTYPE_IPV6_UDP))
static inline void fm10k_rx_hash(struct fm10k_ring *ring,
union fm10k_rx_desc *rx_desc,
struct sk_buff *skb)
{
u16 rss_type;
if (!(ring->netdev->features & NETIF_F_RXHASH))
return;
rss_type = le16_to_cpu(rx_desc->w.pkt_info) & FM10K_RXD_RSSTYPE_MASK;
if (!rss_type)
return;
skb_set_hash(skb, le32_to_cpu(rx_desc->d.rss),
(FM10K_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
}
/** /**
* fm10k_process_skb_fields - Populate skb header fields from Rx descriptor * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor
* @rx_ring: rx descriptor ring packet is being transacted on * @rx_ring: rx descriptor ring packet is being transacted on
...@@ -358,6 +411,10 @@ static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring, ...@@ -358,6 +411,10 @@ static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring,
{ {
unsigned int len = skb->len; unsigned int len = skb->len;
fm10k_rx_hash(rx_ring, rx_desc, skb);
fm10k_rx_checksum(rx_ring, rx_desc, skb);
FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan; FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan;
skb_record_rx_queue(skb, rx_ring->queue_index); skb_record_rx_queue(skb, rx_ring->queue_index);
...@@ -569,6 +626,240 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, ...@@ -569,6 +626,240 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
return total_packets < budget; return total_packets < budget;
} }
#define VXLAN_HLEN (sizeof(struct udphdr) + 8)
static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb)
{
struct fm10k_intfc *interface = netdev_priv(skb->dev);
struct fm10k_vxlan_port *vxlan_port;
/* we can only offload a vxlan if we recognize it as such */
vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
struct fm10k_vxlan_port, list);
if (!vxlan_port)
return NULL;
if (vxlan_port->port != udp_hdr(skb)->dest)
return NULL;
/* return offset of udp_hdr plus 8 bytes for VXLAN header */
return (struct ethhdr *)(skb_transport_header(skb) + VXLAN_HLEN);
}
#define FM10K_NVGRE_RESERVED0_FLAGS htons(0x9FFF)
#define NVGRE_TNI htons(0x2000)
struct fm10k_nvgre_hdr {
__be16 flags;
__be16 proto;
__be32 tni;
};
static struct ethhdr *fm10k_gre_is_nvgre(struct sk_buff *skb)
{
struct fm10k_nvgre_hdr *nvgre_hdr;
int hlen = ip_hdrlen(skb);
/* currently only IPv4 is supported due to hlen above */
if (vlan_get_protocol(skb) != htons(ETH_P_IP))
return NULL;
/* our transport header should be NVGRE */
nvgre_hdr = (struct fm10k_nvgre_hdr *)(skb_network_header(skb) + hlen);
/* verify all reserved flags are 0 */
if (nvgre_hdr->flags & FM10K_NVGRE_RESERVED0_FLAGS)
return NULL;
/* verify protocol is transparent Ethernet bridging */
if (nvgre_hdr->proto != htons(ETH_P_TEB))
return NULL;
/* report start of ethernet header */
if (nvgre_hdr->flags & NVGRE_TNI)
return (struct ethhdr *)(nvgre_hdr + 1);
return (struct ethhdr *)(&nvgre_hdr->tni);
}
static __be16 fm10k_tx_encap_offload(struct sk_buff *skb)
{
struct ethhdr *eth_hdr;
u8 l4_hdr = 0;
switch (vlan_get_protocol(skb)) {
case htons(ETH_P_IP):
l4_hdr = ip_hdr(skb)->protocol;
break;
case htons(ETH_P_IPV6):
l4_hdr = ipv6_hdr(skb)->nexthdr;
break;
default:
return 0;
}
switch (l4_hdr) {
case IPPROTO_UDP:
eth_hdr = fm10k_port_is_vxlan(skb);
break;
case IPPROTO_GRE:
eth_hdr = fm10k_gre_is_nvgre(skb);
break;
default:
return 0;
}
if (!eth_hdr)
return 0;
switch (eth_hdr->h_proto) {
case htons(ETH_P_IP):
case htons(ETH_P_IPV6):
break;
default:
return 0;
}
return eth_hdr->h_proto;
}
static int fm10k_tso(struct fm10k_ring *tx_ring,
struct fm10k_tx_buffer *first)
{
struct sk_buff *skb = first->skb;
struct fm10k_tx_desc *tx_desc;
unsigned char *th;
u8 hdrlen;
if (skb->ip_summed != CHECKSUM_PARTIAL)
return 0;
if (!skb_is_gso(skb))
return 0;
/* compute header lengths */
if (skb->encapsulation) {
if (!fm10k_tx_encap_offload(skb))
goto err_vxlan;
th = skb_inner_transport_header(skb);
} else {
th = skb_transport_header(skb);
}
/* compute offset from SOF to transport header and add header len */
hdrlen = (th - skb->data) + (((struct tcphdr *)th)->doff << 2);
first->tx_flags |= FM10K_TX_FLAGS_CSUM;
/* update gso size and bytecount with header size */
first->gso_segs = skb_shinfo(skb)->gso_segs;
first->bytecount += (first->gso_segs - 1) * hdrlen;
/* populate Tx descriptor header size and mss */
tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use);
tx_desc->hdrlen = hdrlen;
tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
return 1;
err_vxlan:
tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL;
if (!net_ratelimit())
netdev_err(tx_ring->netdev,
"TSO requested for unsupported tunnel, disabling offload\n");
return -1;
}
static void fm10k_tx_csum(struct fm10k_ring *tx_ring,
struct fm10k_tx_buffer *first)
{
struct sk_buff *skb = first->skb;
struct fm10k_tx_desc *tx_desc;
union {
struct iphdr *ipv4;
struct ipv6hdr *ipv6;
u8 *raw;
} network_hdr;
__be16 protocol;
u8 l4_hdr = 0;
if (skb->ip_summed != CHECKSUM_PARTIAL)
goto no_csum;
if (skb->encapsulation) {
protocol = fm10k_tx_encap_offload(skb);
if (!protocol) {
if (skb_checksum_help(skb)) {
dev_warn(tx_ring->dev,
"failed to offload encap csum!\n");
tx_ring->tx_stats.csum_err++;
}
goto no_csum;
}
network_hdr.raw = skb_inner_network_header(skb);
} else {
protocol = vlan_get_protocol(skb);
network_hdr.raw = skb_network_header(skb);
}
switch (protocol) {
case htons(ETH_P_IP):
l4_hdr = network_hdr.ipv4->protocol;
break;
case htons(ETH_P_IPV6):
l4_hdr = network_hdr.ipv6->nexthdr;
break;
default:
if (unlikely(net_ratelimit())) {
dev_warn(tx_ring->dev,
"partial checksum but ip version=%x!\n",
protocol);
}
tx_ring->tx_stats.csum_err++;
goto no_csum;
}
switch (l4_hdr) {
case IPPROTO_TCP:
case IPPROTO_UDP:
break;
case IPPROTO_GRE:
if (skb->encapsulation)
break;
default:
if (unlikely(net_ratelimit())) {
dev_warn(tx_ring->dev,
"partial checksum but l4 proto=%x!\n",
l4_hdr);
}
tx_ring->tx_stats.csum_err++;
goto no_csum;
}
/* update TX checksum flag */
first->tx_flags |= FM10K_TX_FLAGS_CSUM;
no_csum:
/* populate Tx descriptor header size and mss */
tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use);
tx_desc->hdrlen = 0;
tx_desc->mss = 0;
}
#define FM10K_SET_FLAG(_input, _flag, _result) \
((_flag <= _result) ? \
((u32)(_input & _flag) * (_result / _flag)) : \
((u32)(_input & _flag) / (_flag / _result)))
static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags)
{
/* set type for advanced descriptor with frame checksum insertion */
u32 desc_flags = 0;
/* set checksum offload bits */
desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM,
FM10K_TXD_FLAG_CSUM);
return desc_flags;
}
static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring, static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring,
struct fm10k_tx_desc *tx_desc, u16 i, struct fm10k_tx_desc *tx_desc, u16 i,
dma_addr_t dma, unsigned int size, u8 desc_flags) dma_addr_t dma, unsigned int size, u8 desc_flags)
...@@ -596,8 +887,9 @@ static void fm10k_tx_map(struct fm10k_ring *tx_ring, ...@@ -596,8 +887,9 @@ static void fm10k_tx_map(struct fm10k_ring *tx_ring,
unsigned char *data; unsigned char *data;
dma_addr_t dma; dma_addr_t dma;
unsigned int data_len, size; unsigned int data_len, size;
u32 tx_flags = first->tx_flags;
u16 i = tx_ring->next_to_use; u16 i = tx_ring->next_to_use;
u8 flags = 0; u8 flags = fm10k_tx_desc_flags(skb, tx_flags);
tx_desc = FM10K_TX_DESC(tx_ring, i); tx_desc = FM10K_TX_DESC(tx_ring, i);
...@@ -732,6 +1024,7 @@ netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, ...@@ -732,6 +1024,7 @@ netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
struct fm10k_ring *tx_ring) struct fm10k_ring *tx_ring)
{ {
struct fm10k_tx_buffer *first; struct fm10k_tx_buffer *first;
int tso;
u32 tx_flags = 0; u32 tx_flags = 0;
#if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD #if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
unsigned short f; unsigned short f;
...@@ -763,10 +1056,22 @@ netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, ...@@ -763,10 +1056,22 @@ netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
/* record initial flags and protocol */ /* record initial flags and protocol */
first->tx_flags = tx_flags; first->tx_flags = tx_flags;
tso = fm10k_tso(tx_ring, first);
if (tso < 0)
goto out_drop;
else if (!tso)
fm10k_tx_csum(tx_ring, first);
fm10k_tx_map(tx_ring, first); fm10k_tx_map(tx_ring, first);
fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED); fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
return NETDEV_TX_OK;
out_drop:
dev_kfree_skb_any(first->skb);
first->skb = NULL;
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
......
...@@ -20,6 +20,9 @@ ...@@ -20,6 +20,9 @@
#include "fm10k.h" #include "fm10k.h"
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#if IS_ENABLED(CONFIG_VXLAN)
#include <net/vxlan.h>
#endif /* CONFIG_VXLAN */
/** /**
* fm10k_setup_tx_resources - allocate Tx resources (Descriptors) * fm10k_setup_tx_resources - allocate Tx resources (Descriptors)
...@@ -368,6 +371,128 @@ static void fm10k_request_glort_range(struct fm10k_intfc *interface) ...@@ -368,6 +371,128 @@ static void fm10k_request_glort_range(struct fm10k_intfc *interface)
interface->glort_count = mask + 1; interface->glort_count = mask + 1;
} }
/**
* fm10k_del_vxlan_port_all
* @interface: board private structure
*
* This function frees the entire vxlan_port list
**/
static void fm10k_del_vxlan_port_all(struct fm10k_intfc *interface)
{
struct fm10k_vxlan_port *vxlan_port;
/* flush all entries from list */
vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
struct fm10k_vxlan_port, list);
while (vxlan_port) {
list_del(&vxlan_port->list);
kfree(vxlan_port);
vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
struct fm10k_vxlan_port,
list);
}
}
/**
* fm10k_restore_vxlan_port
* @interface: board private structure
*
* This function restores the value in the tunnel_cfg register after reset
**/
static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface)
{
struct fm10k_hw *hw = &interface->hw;
struct fm10k_vxlan_port *vxlan_port;
/* only the PF supports configuring tunnels */
if (hw->mac.type != fm10k_mac_pf)
return;
vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
struct fm10k_vxlan_port, list);
/* restore tunnel configuration register */
fm10k_write_reg(hw, FM10K_TUNNEL_CFG,
(vxlan_port ? ntohs(vxlan_port->port) : 0) |
(ETH_P_TEB << FM10K_TUNNEL_CFG_NVGRE_SHIFT));
}
/**
* fm10k_add_vxlan_port
* @netdev: network interface device structure
* @sa_family: Address family of new port
* @port: port number used for VXLAN
*
* This funciton is called when a new VXLAN interface has added a new port
* number to the range that is currently in use for VXLAN. The new port
* number is always added to the tail so that the port number list should
* match the order in which the ports were allocated. The head of the list
* is always used as the VXLAN port number for offloads.
**/
static void fm10k_add_vxlan_port(struct net_device *dev,
sa_family_t sa_family, __be16 port) {
struct fm10k_intfc *interface = netdev_priv(dev);
struct fm10k_vxlan_port *vxlan_port;
/* only the PF supports configuring tunnels */
if (interface->hw.mac.type != fm10k_mac_pf)
return;
/* existing ports are pulled out so our new entry is always last */
fm10k_vxlan_port_for_each(vxlan_port, interface) {
if ((vxlan_port->port == port) &&
(vxlan_port->sa_family == sa_family)) {
list_del(&vxlan_port->list);
goto insert_tail;
}
}
/* allocate memory to track ports */
vxlan_port = kmalloc(sizeof(*vxlan_port), GFP_ATOMIC);
if (!vxlan_port)
return;
vxlan_port->port = port;
vxlan_port->sa_family = sa_family;
insert_tail:
/* add new port value to list */
list_add_tail(&vxlan_port->list, &interface->vxlan_port);
fm10k_restore_vxlan_port(interface);
}
/**
* fm10k_del_vxlan_port
* @netdev: network interface device structure
* @sa_family: Address family of freed port
* @port: port number used for VXLAN
*
* This funciton is called when a new VXLAN interface has freed a port
* number from the range that is currently in use for VXLAN. The freed
* port is removed from the list and the new head is used to determine
* the port number for offloads.
**/
static void fm10k_del_vxlan_port(struct net_device *dev,
sa_family_t sa_family, __be16 port) {
struct fm10k_intfc *interface = netdev_priv(dev);
struct fm10k_vxlan_port *vxlan_port;
if (interface->hw.mac.type != fm10k_mac_pf)
return;
/* find the port in the list and free it */
fm10k_vxlan_port_for_each(vxlan_port, interface) {
if ((vxlan_port->port == port) &&
(vxlan_port->sa_family == sa_family)) {
list_del(&vxlan_port->list);
kfree(vxlan_port);
break;
}
}
fm10k_restore_vxlan_port(interface);
}
/** /**
* fm10k_open - Called when a network interface is made active * fm10k_open - Called when a network interface is made active
* @netdev: network interface device structure * @netdev: network interface device structure
...@@ -410,6 +535,11 @@ int fm10k_open(struct net_device *netdev) ...@@ -410,6 +535,11 @@ int fm10k_open(struct net_device *netdev)
if (err) if (err)
goto err_set_queues; goto err_set_queues;
#if IS_ENABLED(CONFIG_VXLAN)
/* update VXLAN port configuration */
vxlan_get_rx_port(netdev);
#endif
fm10k_up(interface); fm10k_up(interface);
return 0; return 0;
...@@ -443,6 +573,8 @@ int fm10k_close(struct net_device *netdev) ...@@ -443,6 +573,8 @@ int fm10k_close(struct net_device *netdev)
fm10k_qv_free_irq(interface); fm10k_qv_free_irq(interface);
fm10k_del_vxlan_port_all(interface);
fm10k_free_all_tx_resources(interface); fm10k_free_all_tx_resources(interface);
fm10k_free_all_rx_resources(interface); fm10k_free_all_rx_resources(interface);
...@@ -892,6 +1024,9 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) ...@@ -892,6 +1024,9 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
/* record updated xcast mode state */ /* record updated xcast mode state */
interface->xcast_mode = xcast_mode; interface->xcast_mode = xcast_mode;
/* Restore tunnel configuration */
fm10k_restore_vxlan_port(interface);
} }
void fm10k_reset_rx_state(struct fm10k_intfc *interface) void fm10k_reset_rx_state(struct fm10k_intfc *interface)
...@@ -1026,6 +1161,8 @@ static const struct net_device_ops fm10k_netdev_ops = { ...@@ -1026,6 +1161,8 @@ static const struct net_device_ops fm10k_netdev_ops = {
.ndo_set_rx_mode = fm10k_set_rx_mode, .ndo_set_rx_mode = fm10k_set_rx_mode,
.ndo_get_stats64 = fm10k_get_stats64, .ndo_get_stats64 = fm10k_get_stats64,
.ndo_setup_tc = fm10k_setup_tc, .ndo_setup_tc = fm10k_setup_tc,
.ndo_add_vxlan_port = fm10k_add_vxlan_port,
.ndo_del_vxlan_port = fm10k_del_vxlan_port,
}; };
#define DEFAULT_DEBUG_LEVEL_SHIFT 3 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
...@@ -1048,7 +1185,15 @@ struct net_device *fm10k_alloc_netdev(void) ...@@ -1048,7 +1185,15 @@ struct net_device *fm10k_alloc_netdev(void)
interface->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1; interface->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
/* configure default features */ /* configure default features */
dev->features |= NETIF_F_SG; dev->features |= NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM |
NETIF_F_SG |
NETIF_F_TSO |
NETIF_F_TSO6 |
NETIF_F_TSO_ECN |
NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_RXHASH |
NETIF_F_RXCSUM;
/* all features defined to this point should be changeable */ /* all features defined to this point should be changeable */
dev->hw_features |= dev->features; dev->hw_features |= dev->features;
...@@ -1057,7 +1202,13 @@ struct net_device *fm10k_alloc_netdev(void) ...@@ -1057,7 +1202,13 @@ struct net_device *fm10k_alloc_netdev(void)
dev->vlan_features |= dev->features; dev->vlan_features |= dev->features;
/* configure tunnel offloads */ /* configure tunnel offloads */
dev->hw_enc_features = NETIF_F_SG; dev->hw_enc_features = NETIF_F_IP_CSUM |
NETIF_F_TSO |
NETIF_F_TSO6 |
NETIF_F_TSO_ECN |
NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_IPV6_CSUM |
NETIF_F_SG;
/* we want to leave these both on as we cannot disable VLAN tag /* we want to leave these both on as we cannot disable VLAN tag
* insertion or stripping on the hardware since it is contained * insertion or stripping on the hardware since it is contained
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册