diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h index 17d83f37fbf26cbeee5e7aa4fe1c66d923d41a58..41ad07d45144ec79f373052e3ee5742545ba5eed 100644 --- a/drivers/net/ethernet/sfc/bitfield.h +++ b/drivers/net/ethernet/sfc/bitfield.h @@ -433,6 +433,9 @@ typedef union efx_oword { (oword).u64[1] = (from).u64[1] & (mask).u64[1]; \ } while (0) +#define EFX_AND_QWORD(qword, from, mask) \ + (qword).u64[0] = (from).u64[0] & (mask).u64[0] + #define EFX_OR_OWORD(oword, from, mask) \ do { \ (oword).u64[0] = (from).u64[0] | (mask).u64[0]; \ diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index dec0c8083ff33db5365416c220a1d66b7162ee75..6bba2d206d82a7566ba3b1894b7142b00d0b2c9c 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -132,6 +132,7 @@ static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid); static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx, struct efx_ef10_filter_vlan *vlan); static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid); +static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading); static int efx_ef10_get_warm_boot_count(struct efx_nic *efx) { @@ -624,6 +625,8 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc) goto fail2; + mutex_init(&nic_data->udp_tunnels_lock); + /* Reset (most) configuration for this function */ rc = efx_mcdi_reset(efx, RESET_TYPE_ALL); if (rc) @@ -712,6 +715,14 @@ static int efx_ef10_probe(struct efx_nic *efx) fail4: device_remove_file(&efx->pci_dev->dev, &dev_attr_link_control_flag); fail3: + efx_mcdi_detach(efx); + + mutex_lock(&nic_data->udp_tunnels_lock); + memset(nic_data->udp_tunnels, 0, sizeof(nic_data->udp_tunnels)); + (void)efx_ef10_set_udp_tnl_ports(efx, true); + mutex_unlock(&nic_data->udp_tunnels_lock); + mutex_destroy(&nic_data->udp_tunnels_lock); + efx_mcdi_fini(efx); fail2: efx_nic_free_buffer(efx, &nic_data->mcdi_buf); @@ -981,6 +992,15 @@ static void efx_ef10_remove(struct efx_nic *efx) device_remove_file(&efx->pci_dev->dev, &dev_attr_primary_flag); device_remove_file(&efx->pci_dev->dev, &dev_attr_link_control_flag); + efx_mcdi_detach(efx); + + memset(nic_data->udp_tunnels, 0, sizeof(nic_data->udp_tunnels)); + mutex_lock(&nic_data->udp_tunnels_lock); + (void)efx_ef10_set_udp_tnl_ports(efx, true); + mutex_unlock(&nic_data->udp_tunnels_lock); + + mutex_destroy(&nic_data->udp_tunnels_lock); + efx_mcdi_fini(efx); efx_nic_free_buffer(efx, &nic_data->mcdi_buf); kfree(nic_data); @@ -3154,13 +3174,103 @@ static void efx_ef10_handle_rx_abort(struct efx_rx_queue *rx_queue) ++efx_rx_queue_channel(rx_queue)->n_rx_nodesc_trunc; } +static u16 efx_ef10_handle_rx_event_errors(struct efx_channel *channel, + unsigned int n_packets, + unsigned int rx_encap_hdr, + unsigned int rx_l3_class, + unsigned int rx_l4_class, + const efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + + if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)) { + if (!efx->loopback_selftest) + channel->n_rx_eth_crc_err += n_packets; + return EFX_RX_PKT_DISCARD; + } + if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR)) { + if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN && + rx_l3_class != ESE_DZ_L3_CLASS_IP4 && + rx_l3_class != ESE_DZ_L3_CLASS_IP4_FRAG && + rx_l3_class != ESE_DZ_L3_CLASS_IP6 && + rx_l3_class != ESE_DZ_L3_CLASS_IP6_FRAG)) + netdev_WARN(efx->net_dev, + "invalid class for RX_IPCKSUM_ERR: event=" + EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); + if (!efx->loopback_selftest) + *(rx_encap_hdr ? + &channel->n_rx_outer_ip_hdr_chksum_err : + &channel->n_rx_ip_hdr_chksum_err) += n_packets; + return 0; + } + if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_TCPUDP_CKSUM_ERR)) { + if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN && + ((rx_l3_class != ESE_DZ_L3_CLASS_IP4 && + rx_l3_class != ESE_DZ_L3_CLASS_IP6) || + (rx_l4_class != ESE_DZ_L4_CLASS_TCP && + rx_l4_class != ESE_DZ_L4_CLASS_UDP)))) + netdev_WARN(efx->net_dev, + "invalid class for RX_TCPUDP_CKSUM_ERR: event=" + EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); + if (!efx->loopback_selftest) + *(rx_encap_hdr ? + &channel->n_rx_outer_tcp_udp_chksum_err : + &channel->n_rx_tcp_udp_chksum_err) += n_packets; + return 0; + } + if (EFX_QWORD_FIELD(*event, ESF_EZ_RX_IP_INNER_CHKSUM_ERR)) { + if (unlikely(!rx_encap_hdr)) + netdev_WARN(efx->net_dev, + "invalid encapsulation type for RX_IP_INNER_CHKSUM_ERR: event=" + EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); + else if (unlikely(rx_l3_class != ESE_DZ_L3_CLASS_IP4 && + rx_l3_class != ESE_DZ_L3_CLASS_IP4_FRAG && + rx_l3_class != ESE_DZ_L3_CLASS_IP6 && + rx_l3_class != ESE_DZ_L3_CLASS_IP6_FRAG)) + netdev_WARN(efx->net_dev, + "invalid class for RX_IP_INNER_CHKSUM_ERR: event=" + EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); + if (!efx->loopback_selftest) + channel->n_rx_inner_ip_hdr_chksum_err += n_packets; + return 0; + } + if (EFX_QWORD_FIELD(*event, ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR)) { + if (unlikely(!rx_encap_hdr)) + netdev_WARN(efx->net_dev, + "invalid encapsulation type for RX_TCP_UDP_INNER_CHKSUM_ERR: event=" + EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); + else if (unlikely((rx_l3_class != ESE_DZ_L3_CLASS_IP4 && + rx_l3_class != ESE_DZ_L3_CLASS_IP6) || + (rx_l4_class != ESE_DZ_L4_CLASS_TCP && + rx_l4_class != ESE_DZ_L4_CLASS_UDP))) + netdev_WARN(efx->net_dev, + "invalid class for RX_TCP_UDP_INNER_CHKSUM_ERR: event=" + EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); + if (!efx->loopback_selftest) + channel->n_rx_inner_tcp_udp_chksum_err += n_packets; + return 0; + } + + WARN_ON(1); /* No error bits were recognised */ + return 0; +} + static int efx_ef10_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) { - unsigned int rx_bytes, next_ptr_lbits, rx_queue_label, rx_l4_class; + unsigned int rx_bytes, next_ptr_lbits, rx_queue_label; + unsigned int rx_l3_class, rx_l4_class, rx_encap_hdr; unsigned int n_descs, n_packets, i; struct efx_nic *efx = channel->efx; + struct efx_ef10_nic_data *nic_data = efx->nic_data; struct efx_rx_queue *rx_queue; + efx_qword_t errors; bool rx_cont; u16 flags = 0; @@ -3171,8 +3281,14 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel, rx_bytes = EFX_QWORD_FIELD(*event, ESF_DZ_RX_BYTES); next_ptr_lbits = EFX_QWORD_FIELD(*event, ESF_DZ_RX_DSC_PTR_LBITS); rx_queue_label = EFX_QWORD_FIELD(*event, ESF_DZ_RX_QLABEL); + rx_l3_class = EFX_QWORD_FIELD(*event, ESF_DZ_RX_L3_CLASS); rx_l4_class = EFX_QWORD_FIELD(*event, ESF_DZ_RX_L4_CLASS); rx_cont = EFX_QWORD_FIELD(*event, ESF_DZ_RX_CONT); + rx_encap_hdr = + nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN) ? + EFX_QWORD_FIELD(*event, ESF_EZ_RX_ENCAP_HDR) : + ESE_EZ_ENCAP_HDR_NONE; if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_DROP_EVENT)) netdev_WARN(efx->net_dev, "saw RX_DROP_EVENT: event=" @@ -3232,17 +3348,37 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel, n_packets = 1; } - if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR))) - flags |= EFX_RX_PKT_DISCARD; - - if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR))) { - channel->n_rx_ip_hdr_chksum_err += n_packets; - } else if (unlikely(EFX_QWORD_FIELD(*event, - ESF_DZ_RX_TCPUDP_CKSUM_ERR))) { - channel->n_rx_tcp_udp_chksum_err += n_packets; - } else if (rx_l4_class == ESE_DZ_L4_CLASS_TCP || - rx_l4_class == ESE_DZ_L4_CLASS_UDP) { - flags |= EFX_RX_PKT_CSUMMED; + EFX_POPULATE_QWORD_5(errors, ESF_DZ_RX_ECRC_ERR, 1, + ESF_DZ_RX_IPCKSUM_ERR, 1, + ESF_DZ_RX_TCPUDP_CKSUM_ERR, 1, + ESF_EZ_RX_IP_INNER_CHKSUM_ERR, 1, + ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR, 1); + EFX_AND_QWORD(errors, *event, errors); + if (unlikely(!EFX_QWORD_IS_ZERO(errors))) { + flags |= efx_ef10_handle_rx_event_errors(channel, n_packets, + rx_l3_class, rx_l4_class, + rx_encap_hdr, event); + } else { + bool tcpudp = rx_l4_class == ESE_DZ_L4_CLASS_TCP || + rx_l4_class == ESE_DZ_L4_CLASS_UDP; + + switch (rx_encap_hdr) { + case ESE_EZ_ENCAP_HDR_VXLAN: /* VxLAN or GENEVE */ + flags |= EFX_RX_PKT_CSUMMED; /* outer UDP csum */ + if (tcpudp) + flags |= EFX_RX_PKT_CSUM_LEVEL; /* inner L4 */ + break; + case ESE_EZ_ENCAP_HDR_GRE: + case ESE_EZ_ENCAP_HDR_NONE: + if (tcpudp) + flags |= EFX_RX_PKT_CSUMMED; + break; + default: + netdev_WARN(efx->net_dev, + "unknown encapsulation type: event=" + EFX_QWORD_FMT "\n", + EFX_QWORD_VAL(*event)); + } } if (rx_l4_class == ESE_DZ_L4_CLASS_TCP) @@ -5950,6 +6086,271 @@ static int efx_ef10_vlan_rx_kill_vid(struct efx_nic *efx, __be16 proto, u16 vid) return efx_ef10_del_vlan(efx, vid); } +/* We rely on the MCDI wiping out our TX rings if it made any changes to the + * ports table, ensuring that any TSO descriptors that were made on a now- + * removed tunnel port will be blown away and won't break things when we try + * to transmit them using the new ports table. + */ +static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LENMAX); + MCDI_DECLARE_BUF(outbuf, MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_LEN); + bool will_reset = false; + size_t num_entries = 0; + size_t inlen, outlen; + size_t i; + int rc; + efx_dword_t flags_and_num_entries; + + WARN_ON(!mutex_is_locked(&nic_data->udp_tunnels_lock)); + + nic_data->udp_tunnels_dirty = false; + + if (!(nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))) { + netif_device_attach(efx->net_dev); + return 0; + } + + BUILD_BUG_ON(ARRAY_SIZE(nic_data->udp_tunnels) > + MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES_MAXNUM); + + for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i) { + if (nic_data->udp_tunnels[i].count && + nic_data->udp_tunnels[i].port) { + efx_dword_t entry; + + EFX_POPULATE_DWORD_2(entry, + TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT, + ntohs(nic_data->udp_tunnels[i].port), + TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL, + nic_data->udp_tunnels[i].type); + *_MCDI_ARRAY_DWORD(inbuf, + SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES, + num_entries++) = entry; + } + } + + BUILD_BUG_ON((MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_NUM_ENTRIES_OFST - + MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_FLAGS_OFST) * 8 != + EFX_WORD_1_LBN); + BUILD_BUG_ON(MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_NUM_ENTRIES_LEN * 8 != + EFX_WORD_1_WIDTH); + EFX_POPULATE_DWORD_2(flags_and_num_entries, + MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_UNLOADING, + !!unloading, + EFX_WORD_1, num_entries); + *_MCDI_DWORD(inbuf, SET_TUNNEL_ENCAP_UDP_PORTS_IN_FLAGS) = + flags_and_num_entries; + + inlen = MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LEN(num_entries); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS, + inbuf, inlen, outbuf, sizeof(outbuf), &outlen); + if (rc == -EIO) { + /* Most likely the MC rebooted due to another function also + * setting its tunnel port list. Mark the tunnel port list as + * dirty, so it will be pushed upon coming up from the reboot. + */ + nic_data->udp_tunnels_dirty = true; + return 0; + } + + if (rc) { + /* expected not available on unprivileged functions */ + if (rc != -EPERM) + netif_warn(efx, drv, efx->net_dev, + "Unable to set UDP tunnel ports; rc=%d.\n", rc); + } else if (MCDI_DWORD(outbuf, SET_TUNNEL_ENCAP_UDP_PORTS_OUT_FLAGS) & + (1 << MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_RESETTING_LBN)) { + netif_info(efx, drv, efx->net_dev, + "Rebooting MC due to UDP tunnel port list change\n"); + will_reset = true; + if (unloading) + /* Delay for the MC reset to complete. This will make + * unloading other functions a bit smoother. This is a + * race, but the other unload will work whichever way + * it goes, this just avoids an unnecessary error + * message. + */ + msleep(100); + } + if (!will_reset && !unloading) { + /* The caller will have detached, relying on the MC reset to + * trigger a re-attach. Since there won't be an MC reset, we + * have to do the attach ourselves. + */ + netif_device_attach(efx->net_dev); + } + + return rc; +} + +static int efx_ef10_udp_tnl_push_ports(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc = 0; + + mutex_lock(&nic_data->udp_tunnels_lock); + if (nic_data->udp_tunnels_dirty) { + /* Make sure all TX are stopped while we modify the table, else + * we might race against an efx_features_check(). + */ + efx_device_detach_sync(efx); + rc = efx_ef10_set_udp_tnl_ports(efx, false); + } + mutex_unlock(&nic_data->udp_tunnels_lock); + return rc; +} + +static struct efx_udp_tunnel *__efx_ef10_udp_tnl_lookup_port(struct efx_nic *efx, + __be16 port) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t i; + + for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i) { + if (!nic_data->udp_tunnels[i].count) + continue; + if (nic_data->udp_tunnels[i].port == port) + return &nic_data->udp_tunnels[i]; + } + return NULL; +} + +static int efx_ef10_udp_tnl_add_port(struct efx_nic *efx, + struct efx_udp_tunnel tnl) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_udp_tunnel *match; + char typebuf[8]; + size_t i; + int rc; + + if (!(nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))) + return 0; + + efx_get_udp_tunnel_type_name(tnl.type, typebuf, sizeof(typebuf)); + netif_dbg(efx, drv, efx->net_dev, "Adding UDP tunnel (%s) port %d\n", + typebuf, ntohs(tnl.port)); + + mutex_lock(&nic_data->udp_tunnels_lock); + /* Make sure all TX are stopped while we add to the table, else we + * might race against an efx_features_check(). + */ + efx_device_detach_sync(efx); + + match = __efx_ef10_udp_tnl_lookup_port(efx, tnl.port); + if (match != NULL) { + if (match->type == tnl.type) { + netif_dbg(efx, drv, efx->net_dev, + "Referencing existing tunnel entry\n"); + match->count++; + /* No need to cause an MCDI update */ + rc = 0; + goto unlock_out; + } + efx_get_udp_tunnel_type_name(match->type, + typebuf, sizeof(typebuf)); + netif_dbg(efx, drv, efx->net_dev, + "UDP port %d is already in use by %s\n", + ntohs(tnl.port), typebuf); + rc = -EEXIST; + goto unlock_out; + } + + for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i) + if (!nic_data->udp_tunnels[i].count) { + nic_data->udp_tunnels[i] = tnl; + nic_data->udp_tunnels[i].count = 1; + rc = efx_ef10_set_udp_tnl_ports(efx, false); + goto unlock_out; + } + + netif_dbg(efx, drv, efx->net_dev, + "Unable to add UDP tunnel (%s) port %d; insufficient resources.\n", + typebuf, ntohs(tnl.port)); + + rc = -ENOMEM; + +unlock_out: + mutex_unlock(&nic_data->udp_tunnels_lock); + return rc; +} + +/* Called under the TX lock with the TX queue running, hence no-one can be + * in the middle of updating the UDP tunnels table. However, they could + * have tried and failed the MCDI, in which case they'll have set the dirty + * flag before dropping their locks. + */ +static bool efx_ef10_udp_tnl_has_port(struct efx_nic *efx, __be16 port) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + if (!(nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))) + return false; + + if (nic_data->udp_tunnels_dirty) + /* SW table may not match HW state, so just assume we can't + * use any UDP tunnel offloads. + */ + return false; + + return __efx_ef10_udp_tnl_lookup_port(efx, port) != NULL; +} + +static int efx_ef10_udp_tnl_del_port(struct efx_nic *efx, + struct efx_udp_tunnel tnl) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_udp_tunnel *match; + char typebuf[8]; + int rc; + + if (!(nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))) + return 0; + + efx_get_udp_tunnel_type_name(tnl.type, typebuf, sizeof(typebuf)); + netif_dbg(efx, drv, efx->net_dev, "Removing UDP tunnel (%s) port %d\n", + typebuf, ntohs(tnl.port)); + + mutex_lock(&nic_data->udp_tunnels_lock); + /* Make sure all TX are stopped while we remove from the table, else we + * might race against an efx_features_check(). + */ + efx_device_detach_sync(efx); + + match = __efx_ef10_udp_tnl_lookup_port(efx, tnl.port); + if (match != NULL) { + if (match->type == tnl.type) { + if (--match->count) { + /* Port is still in use, so nothing to do */ + netif_dbg(efx, drv, efx->net_dev, + "UDP tunnel port %d remains active\n", + ntohs(tnl.port)); + rc = 0; + goto out_unlock; + } + rc = efx_ef10_set_udp_tnl_ports(efx, false); + goto out_unlock; + } + efx_get_udp_tunnel_type_name(match->type, + typebuf, sizeof(typebuf)); + netif_warn(efx, drv, efx->net_dev, + "UDP port %d is actually in use by %s, not removing\n", + ntohs(tnl.port), typebuf); + } + rc = -ENOENT; + +out_unlock: + mutex_unlock(&nic_data->udp_tunnels_lock); + return rc; +} + #define EF10_OFFLOAD_FEATURES \ (NETIF_F_IP_CSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | \ @@ -6153,6 +6554,10 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .ptp_set_ts_config = efx_ef10_ptp_set_ts_config, .vlan_rx_add_vid = efx_ef10_vlan_rx_add_vid, .vlan_rx_kill_vid = efx_ef10_vlan_rx_kill_vid, + .udp_tnl_push_ports = efx_ef10_udp_tnl_push_ports, + .udp_tnl_add_port = efx_ef10_udp_tnl_add_port, + .udp_tnl_has_port = efx_ef10_udp_tnl_has_port, + .udp_tnl_del_port = efx_ef10_udp_tnl_del_port, #ifdef CONFIG_SFC_SRIOV .sriov_configure = efx_ef10_sriov_configure, .sriov_init = efx_ef10_sriov_init, diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index c28cd9daf94963d20c391c7508560a429f2eff37..cb8e2c3f806a66c6eb59a43caa4bab5dd9b7dfa8 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -23,12 +23,15 @@ #include #include #include "net_driver.h" +#include +#include #include "efx.h" #include "nic.h" #include "selftest.h" #include "sriov.h" #include "mcdi.h" +#include "mcdi_pcol.h" #include "workarounds.h" /************************************************************************** @@ -88,6 +91,21 @@ const char *const efx_reset_type_names[] = { [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", }; +/* UDP tunnel type names */ +static const char *const efx_udp_tunnel_type_names[] = { + [TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan", + [TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve", +}; + +void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen) +{ + if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) && + efx_udp_tunnel_type_names[type] != NULL) + snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]); + else + snprintf(buf, buflen, "type %d", type); +} + /* Reset workqueue. If any NIC has a hardware failure then a reset will be * queued onto this work queue. This is not a per-nic work queue, because * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. @@ -2336,6 +2354,52 @@ static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vi return -EOPNOTSUPP; } +static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in) +{ + switch (in) { + case UDP_TUNNEL_TYPE_VXLAN: + return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN; + case UDP_TUNNEL_TYPE_GENEVE: + return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE; + default: + return -1; + } +} + +static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti) +{ + struct efx_nic *efx = netdev_priv(dev); + struct efx_udp_tunnel tnl; + int efx_tunnel_type; + + efx_tunnel_type = efx_udp_tunnel_type_map(ti->type); + if (efx_tunnel_type < 0) + return; + + tnl.type = (u16)efx_tunnel_type; + tnl.port = ti->port; + + if (efx->type->udp_tnl_add_port) + (void)efx->type->udp_tnl_add_port(efx, tnl); +} + +static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti) +{ + struct efx_nic *efx = netdev_priv(dev); + struct efx_udp_tunnel tnl; + int efx_tunnel_type; + + efx_tunnel_type = efx_udp_tunnel_type_map(ti->type); + if (efx_tunnel_type < 0) + return; + + tnl.type = (u16)efx_tunnel_type; + tnl.port = ti->port; + + if (efx->type->udp_tnl_add_port) + (void)efx->type->udp_tnl_del_port(efx, tnl); +} + static const struct net_device_ops efx_netdev_ops = { .ndo_open = efx_net_open, .ndo_stop = efx_net_stop, @@ -2366,6 +2430,8 @@ static const struct net_device_ops efx_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = efx_filter_rfs, #endif + .ndo_udp_tunnel_add = efx_udp_tunnel_add, + .ndo_udp_tunnel_del = efx_udp_tunnel_del, }; static void efx_update_name(struct efx_nic *efx) @@ -2605,6 +2671,9 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) efx_start_all(efx); + if (efx->type->udp_tnl_push_ports) + efx->type->udp_tnl_push_ports(efx); + return 0; fail: @@ -3136,6 +3205,51 @@ static int efx_pci_probe_main(struct efx_nic *efx) return rc; } +static int efx_pci_probe_post_io(struct efx_nic *efx) +{ + struct net_device *net_dev = efx->net_dev; + int rc = efx_pci_probe_main(efx); + + if (rc) + return rc; + + if (efx->type->sriov_init) { + rc = efx->type->sriov_init(efx); + if (rc) + netif_err(efx, probe, efx->net_dev, + "SR-IOV can't be enabled rc %d\n", rc); + } + + /* Determine netdevice features */ + net_dev->features |= (efx->type->offload_features | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_RXCSUM); + if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) + net_dev->features |= NETIF_F_TSO6; + /* Check whether device supports TSO */ + if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) + net_dev->features &= ~NETIF_F_ALL_TSO; + /* Mask for features that also apply to VLAN devices */ + net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | + NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | + NETIF_F_RXCSUM); + + net_dev->hw_features = net_dev->features & ~efx->fixed_features; + + /* Disable VLAN filtering by default. It may be enforced if + * the feature is fixed (i.e. VLAN filters are required to + * receive VLAN tagged packets due to vPort restrictions). + */ + net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + net_dev->features |= efx->fixed_features; + + rc = efx_register_netdev(efx); + if (!rc) + return 0; + + efx_pci_remove_main(efx); + return rc; +} + /* NIC initialisation * * This is called at module load (or hotplug insertion, @@ -3178,42 +3292,28 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (rc) goto fail2; - rc = efx_pci_probe_main(efx); + rc = efx_pci_probe_post_io(efx); + if (rc) { + /* On failure, retry once immediately. + * If we aborted probe due to a scheduled reset, dismiss it. + */ + efx->reset_pending = 0; + rc = efx_pci_probe_post_io(efx); + if (rc) { + /* On another failure, retry once more + * after a 50-305ms delay. + */ + unsigned char r; + + get_random_bytes(&r, 1); + msleep((unsigned int)r + 50); + efx->reset_pending = 0; + rc = efx_pci_probe_post_io(efx); + } + } if (rc) goto fail3; - net_dev->features |= (efx->type->offload_features | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_RXCSUM); - if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) - net_dev->features |= NETIF_F_TSO6; - /* Check whether device supports TSO */ - if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) - net_dev->features &= ~NETIF_F_ALL_TSO; - /* Mask for features that also apply to VLAN devices */ - net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | - NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | - NETIF_F_RXCSUM); - - net_dev->hw_features = net_dev->features & ~efx->fixed_features; - - /* Disable VLAN filtering by default. It may be enforced if - * the feature is fixed (i.e. VLAN filters are required to - * receive VLAN tagged packets due to vPort restrictions). - */ - net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; - net_dev->features |= efx->fixed_features; - - rc = efx_register_netdev(efx); - if (rc) - goto fail4; - - if (efx->type->sriov_init) { - rc = efx->type->sriov_init(efx); - if (rc) - netif_err(efx, probe, efx->net_dev, - "SR-IOV can't be enabled rc %d\n", rc); - } - netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); /* Try to create MTDs, but allow this to fail */ @@ -3230,10 +3330,11 @@ static int efx_pci_probe(struct pci_dev *pci_dev, "PCIE error reporting unavailable (%d).\n", rc); + if (efx->type->udp_tnl_push_ports) + efx->type->udp_tnl_push_ports(efx); + return 0; - fail4: - efx_pci_remove_main(efx); fail3: efx_fini_io(efx); fail2: diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index adddf70780adcba76b76905701d8572787350421..3747b564411073350c930305c941df8e713edd8c 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -77,6 +77,11 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 24b271b9c2601c018caa010f1d13c6d0fb075fb3..b9422450deb8e91b4a34bee26e8648d6e3a6ac8b 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -128,7 +128,7 @@ int efx_mcdi_init(struct efx_nic *efx) return rc; } -void efx_mcdi_fini(struct efx_nic *efx) +void efx_mcdi_detach(struct efx_nic *efx) { if (!efx->mcdi) return; @@ -137,6 +137,12 @@ void efx_mcdi_fini(struct efx_nic *efx) /* Relinquish the device (back to the BMC, if this is a LOM) */ efx_mcdi_drv_attach(efx, false, NULL); +} + +void efx_mcdi_fini(struct efx_nic *efx) +{ + if (!efx->mcdi) + return; #ifdef CONFIG_SFC_MCDI_LOGGING free_page((unsigned long)efx->mcdi->iface.logging_buffer); @@ -716,8 +722,11 @@ static int _efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned int cmd, if (cmd == MC_CMD_REBOOT && rc == -EIO) { /* Don't reset if MC_CMD_REBOOT returns EIO */ } else if (rc == -EIO || rc == -EINTR) { - netif_err(efx, hw, efx->net_dev, "MC fatal error %d\n", - -rc); + netif_err(efx, hw, efx->net_dev, "MC reboot detected\n"); + netif_dbg(efx, hw, efx->net_dev, "MC rebooted during command %d rc %d\n", + cmd, -rc); + if (efx->type->mcdi_reboot_detected) + efx->type->mcdi_reboot_detected(efx); efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE); } else if (proxy_handle && (rc == -EPROTO) && efx_mcdi_get_proxy_handle(efx, hdr_len, data_len, diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 4472107ca8c144ee611a3bf9dc0b6982d45d232e..154ef41d19275ded25c7b34a84ac8769b99be013 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -142,6 +142,7 @@ static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx) #endif int efx_mcdi_init(struct efx_nic *efx); +void efx_mcdi_detach(struct efx_nic *efx); void efx_mcdi_fini(struct efx_nic *efx); int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const efx_dword_t *inbuf, diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index 35cc3d4fa5f692a3ae58503ecd94c995b260fc0e..47ced8a898ca305885136acc4c54dd8b95786a11 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -11913,6 +11913,27 @@ #define MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_RESETTING_LBN 0 #define MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_RESETTING_WIDTH 1 +/* TUNNEL_ENCAP_UDP_PORT_ENTRY structuredef */ +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_LEN 4 +/* UDP port (the standard ports are named below but any port may be used) */ +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_OFST 0 +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LEN 2 +/* enum: the IANA allocated UDP port for VXLAN */ +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT 0x12b5 +/* enum: the IANA allocated UDP port for Geneve */ +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT 0x17c1 +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LBN 0 +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_WIDTH 16 +/* tunnel encapsulation protocol (only those named below are supported) */ +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_OFST 2 +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LEN 2 +/* enum: VXLAN */ +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN 0x0 +/* enum: Geneve */ +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE 0x1 +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LBN 16 +#define TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_WIDTH 16 + /***********************************/ /* MC_CMD_RX_BALANCING diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 73810d2d630ccf4b410002b2d3a72e04c955a7f9..5cb8112b1cf3d3871f95bc395945c76a63e4a3a4 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -307,6 +307,7 @@ struct efx_rx_buffer { #define EFX_RX_PKT_DISCARD 0x0004 #define EFX_RX_PKT_TCP 0x0040 #define EFX_RX_PKT_PREFIX_LEN 0x0080 /* length is in prefix only */ +#define EFX_RX_PKT_CSUM_LEVEL 0x0200 /** * struct efx_rx_page_state - Page-based rx buffer state @@ -469,13 +470,18 @@ struct efx_channel { u32 *rps_flow_id; #endif - unsigned n_rx_tobe_disc; - unsigned n_rx_ip_hdr_chksum_err; - unsigned n_rx_tcp_udp_chksum_err; - unsigned n_rx_mcast_mismatch; - unsigned n_rx_frm_trunc; - unsigned n_rx_overlength; - unsigned n_skbuff_leaks; + unsigned int n_rx_tobe_disc; + unsigned int n_rx_ip_hdr_chksum_err; + unsigned int n_rx_tcp_udp_chksum_err; + unsigned int n_rx_outer_ip_hdr_chksum_err; + unsigned int n_rx_outer_tcp_udp_chksum_err; + unsigned int n_rx_inner_ip_hdr_chksum_err; + unsigned int n_rx_inner_tcp_udp_chksum_err; + unsigned int n_rx_eth_crc_err; + unsigned int n_rx_mcast_mismatch; + unsigned int n_rx_frm_trunc; + unsigned int n_rx_overlength; + unsigned int n_skbuff_leaks; unsigned int n_rx_nodesc_trunc; unsigned int n_rx_merge_events; unsigned int n_rx_merge_packets; @@ -548,6 +554,8 @@ extern const unsigned int efx_reset_type_max; #define RESET_TYPE(type) \ STRING_TABLE_LOOKUP(type, efx_reset_type) +void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen); + enum efx_int_mode { /* Be careful if altering to correct macro below */ EFX_INT_MODE_MSIX = 0, @@ -987,6 +995,15 @@ struct efx_mtd_partition { char name[IFNAMSIZ + 20]; }; +struct efx_udp_tunnel { + u16 type; /* TUNNEL_ENCAP_UDP_PORT_ENTRY_foo, see mcdi_pcol.h */ + __be16 port; + /* Count of repeated adds of the same port. Used only inside the list, + * not in request arguments. + */ + u16 count; +}; + /** * struct efx_nic_type - Efx device type definition * @mem_bar: Get the memory BAR @@ -1107,6 +1124,10 @@ struct efx_mtd_partition { * @set_mac_address: Set the MAC address of the device * @tso_versions: Returns mask of firmware-assisted TSO versions supported. * If %NULL, then device does not support any TSO version. + * @udp_tnl_push_ports: Push the list of UDP tunnel ports to the NIC if required. + * @udp_tnl_add_port: Add a UDP tunnel port + * @udp_tnl_has_port: Check if a port has been added as UDP tunnel + * @udp_tnl_del_port: Remove a UDP tunnel port * @revision: Hardware architecture revision * @txd_ptr_tbl_base: TX descriptor ring base address * @rxd_ptr_tbl_base: RX descriptor ring base address @@ -1266,6 +1287,10 @@ struct efx_nic_type { int (*get_mac_address)(struct efx_nic *efx, unsigned char *perm_addr); int (*set_mac_address)(struct efx_nic *efx); u32 (*tso_versions)(struct efx_nic *efx); + int (*udp_tnl_push_ports)(struct efx_nic *efx); + int (*udp_tnl_add_port)(struct efx_nic *efx, struct efx_udp_tunnel tnl); + bool (*udp_tnl_has_port)(struct efx_nic *efx, __be16 port); + int (*udp_tnl_del_port)(struct efx_nic *efx, struct efx_udp_tunnel tnl); int revision; unsigned int txd_ptr_tbl_base; diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 85cf131288b7c0bd959e0d6837c7ad135f66ef35..7b916aa21bdef18896debc4bdd7384696e50f3a3 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -369,6 +369,10 @@ enum { * @vport_mac: The MAC address on the vport, only for PFs; VFs will be zero * @vlan_list: List of VLANs added over the interface. Serialised by vlan_lock. * @vlan_lock: Lock to serialize access to vlan_list. + * @udp_tunnels: UDP tunnel port numbers and types. + * @udp_tunnels_dirty: flag indicating a reboot occurred while pushing + * @udp_tunnels to hardware and thus the push must be re-done. + * @udp_tunnels_lock: Serialises writes to @udp_tunnels and @udp_tunnels_dirty. */ struct efx_ef10_nic_data { struct efx_buffer mcdi_buf; @@ -405,6 +409,9 @@ struct efx_ef10_nic_data { u8 vport_mac[ETH_ALEN]; struct list_head vlan_list; struct mutex vlan_lock; + struct efx_udp_tunnel udp_tunnels[16]; + bool udp_tunnels_dirty; + struct mutex udp_tunnels_lock; }; int efx_init_sriov(void); diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 31587f4066ab08f52409b2c666df0b92b4707781..42443f434569b22865a450b0a3e249753fe86d76 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -434,6 +434,7 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, PKT_HASH_TYPE_L3); skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE); + skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); for (;;) { skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, @@ -621,8 +622,10 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, /* Set the SKB flags */ skb_checksum_none_assert(skb); - if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) + if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) { skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); + } efx_rx_skb_attach_timestamp(channel, skb); diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index af7cd8565a41ee7ce740f1159178727a6240ed14..b1d36df71ecb91cf2b2a86cabf5e61bbc8b687b8 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -326,6 +326,7 @@ static int siena_probe_nic(struct efx_nic *efx) efx_nic_free_buffer(efx, &efx->irq_status); fail4: fail3: + efx_mcdi_detach(efx); efx_mcdi_fini(efx); fail1: kfree(efx->nic_data); @@ -450,6 +451,7 @@ static void siena_remove_nic(struct efx_nic *efx) efx_mcdi_reset(efx, RESET_TYPE_ALL); + efx_mcdi_detach(efx); efx_mcdi_fini(efx); /* Tear down the private nic state */