提交 ccfdf21b 编写于 作者: D David S. Miller

Merge branch 'mlxsw-Offloading-GRE-tunnels'

Jiri Pirko says:

====================
mlxsw: Offloading GRE tunnels

Petr says:

This patch series introduces to mlxsw driver support for offloading
IP-in-IP tunnels in general, and for (subset of) GRE in particular.

This patchset supports two ways of configuring GRE:

- So called "hierarchical configuration", where the GRE device has a bound
  dummy device, which is in a different VRF. The VRF with host traffic is
  called "overlay", the one with encapsulated traffic is called "underlay".

- So called "flat configuration", where the GRE device doesn't have a bound
  device, and overlay and underlay are both in the same VRF (possibly the
  default one).

Two routes are then interesting: a route that directs traffic to a GRE
device (which would typically be in overlay VRF, but could be in another
one), and a local route for the tunnel's local address (in underlay).
Handling of these two route types is then introduced as patches to support,
respectively, IPv4 and IPv6 encapsulation and IPv4 decapsulation.

The encap and decap routes then reference a loopback device, a new type of
RIF introduced by this patchset for the specific use of offloading tunnels.

The encap and decap code is abstract with respect to the particulars of
individual L3 tunnel types. This patchset introduces support for GRE
tunnels in particular.

Limitations:

- Each tunnel needs to have a different local address (within a given VRF).
  When two tunnels are used that are in conflict, FIB abort is triggered
  and the driver ceases offloading FIBs. Full handling of such
  configurations needs special setup in the hardware, such that the tunnels
  that share an address are dispatched correctly according to their key (or
  lack thereof). That's currently not implemented, and to keep things
  deterministic, the driver triggers FIB abort.

- A next hop that uses an incompletely-specified tunnel (e.g. such that are
  used for LWT) is not offloaded, but doesn't trigger FIB abort like the
  above. If such routes end up being in a de facto conflict with other
  tunnels, then if there already is an offload for that address, the
  traffic for the conflicting tunnel will end up mismatching the
  configuration of the offloaded tunnel, and thus gets to slow path through
  an error trap.

- GRE checksumming and sequence numbers are not supported and TTL and TOS
  need to be set to inherit. Tunnels with a different configuration are not
  offloaded and their traffic is trapping to slow path.

  Note in particular that TOS of inherit is not the default configuration
  and needs to be explicitly specified when the tunnel is created.

- The only feature that is not graciously handled is that if a change is
  made to the tunnel, e.g. through "ip tunnel change", such changes are not
  reflected in the driver. There is currently no notification mechanism for
  these changes. Introduction of this mechanism and its leverage in the
  driver will be subject of follow-up work. For now this limitation can be
  worked around by removing and re-adding the encap route.

---
v1->v2:
-fix order of patch 5
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -16,8 +16,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_switchdev.o spectrum_router.o \
spectrum_kvdl.o spectrum_acl_tcam.o \
spectrum_acl.o spectrum_flower.o \
spectrum_cnt.o \
spectrum_fid.o
spectrum_cnt.o spectrum_fid.o \
spectrum_ipip.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
......
......@@ -5,6 +5,7 @@
* Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
* Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
* Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
* Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
......@@ -3998,6 +3999,8 @@ enum mlxsw_reg_ritr_if_type {
MLXSW_REG_RITR_FID_IF,
/* Sub-port interface. */
MLXSW_REG_RITR_SP_IF,
/* Loopback Interface. */
MLXSW_REG_RITR_LOOPBACK_IF,
};
/* reg_ritr_type
......@@ -4129,6 +4132,67 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16);
*/
MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12);
/* Loopback Interface */
enum mlxsw_reg_ritr_loopback_protocol {
/* IPinIP IPv4 underlay Unicast */
MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4,
/* IPinIP IPv6 underlay Unicast */
MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6,
};
/* reg_ritr_loopback_protocol
* Access: RW
*/
MLXSW_ITEM32(reg, ritr, loopback_protocol, 0x08, 28, 4);
enum mlxsw_reg_ritr_loopback_ipip_type {
/* Tunnel is IPinIP. */
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_IP,
/* Tunnel is GRE, no key. */
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP,
/* Tunnel is GRE, with a key. */
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP,
};
/* reg_ritr_loopback_ipip_type
* Encapsulation type.
* Access: RW
*/
MLXSW_ITEM32(reg, ritr, loopback_ipip_type, 0x10, 24, 4);
enum mlxsw_reg_ritr_loopback_ipip_options {
/* The key is defined by gre_key. */
MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
};
/* reg_ritr_loopback_ipip_options
* Access: RW
*/
MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4);
/* reg_ritr_loopback_ipip_uvr
* Underlay Virtual Router ID.
* Range is 0..cap_max_virtual_routers-1.
* Reserved for Spectrum-2.
* Access: RW
*/
MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16);
/* reg_ritr_loopback_ipip_usip*
* Encapsulation Underlay source IP.
* Access: RW
*/
MLXSW_ITEM_BUF(reg, ritr, loopback_ipip_usip6, 0x18, 16);
MLXSW_ITEM32(reg, ritr, loopback_ipip_usip4, 0x24, 0, 32);
/* reg_ritr_loopback_ipip_gre_key
* GRE Key.
* Reserved when ipip_type is not IP_IN_GRE_KEY_IN_IP.
* Access: RW
*/
MLXSW_ITEM32(reg, ritr, loopback_ipip_gre_key, 0x28, 0, 32);
/* Shared between ingress/egress */
enum mlxsw_reg_ritr_counter_set_type {
/* No Count. */
......@@ -4199,8 +4263,7 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag,
static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
enum mlxsw_reg_ritr_if_type type,
u16 rif, u16 vr_id, u16 mtu,
const char *mac)
u16 rif, u16 vr_id, u16 mtu)
{
bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL;
......@@ -4216,9 +4279,38 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
mlxsw_reg_ritr_lb_en_set(payload, 1);
mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
mlxsw_reg_ritr_mtu_set(payload, mtu);
}
static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac)
{
mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
}
static inline void
mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload,
enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
enum mlxsw_reg_ritr_loopback_ipip_options options,
u16 uvr_id, u32 gre_key)
{
mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type);
mlxsw_reg_ritr_loopback_ipip_options_set(payload, options);
mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id);
mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key);
}
static inline void
mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
enum mlxsw_reg_ritr_loopback_ipip_options options,
u16 uvr_id, u32 usip, u32 gre_key)
{
mlxsw_reg_ritr_loopback_protocol_set(payload,
MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4);
mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options,
uvr_id, gre_key);
mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
}
/* RATR - Router Adjacency Table Register
* --------------------------------------
* The RATR register is used to configure the Router Adjacency (next-hop)
......@@ -4274,6 +4366,38 @@ MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1);
*/
MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1);
enum mlxsw_reg_ratr_type {
/* Ethernet */
MLXSW_REG_RATR_TYPE_ETHERNET,
/* IPoIB Unicast without GRH.
* Reserved for Spectrum.
*/
MLXSW_REG_RATR_TYPE_IPOIB_UC,
/* IPoIB Unicast with GRH. Supported only in table 0 (Ethernet unicast
* adjacency).
* Reserved for Spectrum.
*/
MLXSW_REG_RATR_TYPE_IPOIB_UC_W_GRH,
/* IPoIB Multicast.
* Reserved for Spectrum.
*/
MLXSW_REG_RATR_TYPE_IPOIB_MC,
/* MPLS.
* Reserved for SwitchX/-2.
*/
MLXSW_REG_RATR_TYPE_MPLS,
/* IPinIP Encap.
* Reserved for SwitchX/-2.
*/
MLXSW_REG_RATR_TYPE_IPIP,
};
/* reg_ratr_type
* Adjacency entry type.
* Access: RW
*/
MLXSW_ITEM32(reg, ratr, type, 0x04, 28, 4);
/* reg_ratr_adjacency_index_low
* Bits 15:0 of index into the adjacency table.
* For SwitchX and SwitchX-2, the adjacency table is linear and
......@@ -4303,17 +4427,17 @@ enum mlxsw_reg_ratr_trap_action {
*/
MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
enum mlxsw_reg_ratr_trap_id {
MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0,
MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1,
};
/* reg_ratr_adjacency_index_high
* Bits 23:16 of the adjacency_index.
* Access: Index
*/
MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
enum mlxsw_reg_ratr_trap_id {
MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0,
MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1,
};
/* reg_ratr_trap_id
* Trap ID to be reported to CPU.
* Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
......@@ -4328,14 +4452,44 @@ MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8);
*/
MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6);
enum mlxsw_reg_ratr_ipip_type {
/* IPv4, address set by mlxsw_reg_ratr_ipip_ipv4_udip. */
MLXSW_REG_RATR_IPIP_TYPE_IPV4,
/* IPv6, address set by mlxsw_reg_ratr_ipip_ipv6_ptr. */
MLXSW_REG_RATR_IPIP_TYPE_IPV6,
};
/* reg_ratr_ipip_type
* Underlay destination ip type.
* Note: the type field must match the protocol of the router interface.
* Access: RW
*/
MLXSW_ITEM32(reg, ratr, ipip_type, 0x10, 16, 4);
/* reg_ratr_ipip_ipv4_udip
* Underlay ipv4 dip.
* Reserved when ipip_type is IPv6.
* Access: RW
*/
MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32);
/* reg_ratr_ipip_ipv6_ptr
* Pointer to IPv6 underlay destination ip address.
* For Spectrum: Pointer to KVD linear space.
* Access: RW
*/
MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24);
static inline void
mlxsw_reg_ratr_pack(char *payload,
enum mlxsw_reg_ratr_op op, bool valid,
enum mlxsw_reg_ratr_type type,
u32 adjacency_index, u16 egress_rif)
{
MLXSW_REG_ZERO(ratr, payload);
mlxsw_reg_ratr_op_set(payload, op);
mlxsw_reg_ratr_v_set(payload, valid);
mlxsw_reg_ratr_type_set(payload, type);
mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
......@@ -4347,6 +4501,12 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload,
mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac);
}
static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip)
{
mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV4);
mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip);
}
/* RICNT - Router Interface Counter Register
* -----------------------------------------
* The RICNT register retrieves per port performance counters
......@@ -4900,6 +5060,15 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload)
MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
}
static inline void
mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr)
{
mlxsw_reg_ralue_action_type_set(payload,
MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
mlxsw_reg_ralue_ip2me_v_set(payload, 1);
mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr);
}
/* RAUHT - Router Algorithmic LPM Unicast Host Table Register
* ----------------------------------------------------------
* The RAUHT register is used to configure and query the Unicast Host table in
......@@ -5300,6 +5469,133 @@ static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload,
mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip);
}
/* RTDP - Routing Tunnel Decap Properties Register
* -----------------------------------------------
* The RTDP register is used for configuring the tunnel decap properties of NVE
* and IPinIP.
*/
#define MLXSW_REG_RTDP_ID 0x8020
#define MLXSW_REG_RTDP_LEN 0x44
MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN);
enum mlxsw_reg_rtdp_type {
MLXSW_REG_RTDP_TYPE_NVE,
MLXSW_REG_RTDP_TYPE_IPIP,
};
/* reg_rtdp_type
* Type of the RTDP entry as per enum mlxsw_reg_rtdp_type.
* Access: RW
*/
MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4);
/* reg_rtdp_tunnel_index
* Index to the Decap entry.
* For Spectrum, Index to KVD Linear.
* Access: Index
*/
MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24);
/* IPinIP */
/* reg_rtdp_ipip_irif
* Ingress Router Interface for the overlay router
* Access: RW
*/
MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16);
enum mlxsw_reg_rtdp_ipip_sip_check {
/* No sip checks. */
MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO,
/* Filter packet if underlay is not IPv4 or if underlay SIP does not
* equal ipv4_usip.
*/
MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
/* Filter packet if underlay is not IPv6 or if underlay SIP does not
* equal ipv6_usip.
*/
MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3,
};
/* reg_rtdp_ipip_sip_check
* SIP check to perform. If decapsulation failed due to these configurations
* then trap_id is IPIP_DECAP_ERROR.
* Access: RW
*/
MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3);
/* If set, allow decapsulation of IPinIP (without GRE). */
#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP BIT(0)
/* If set, allow decapsulation of IPinGREinIP without a key. */
#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE BIT(1)
/* If set, allow decapsulation of IPinGREinIP with a key. */
#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY BIT(2)
/* reg_rtdp_ipip_type_check
* Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due to
* these configurations then trap_id is IPIP_DECAP_ERROR.
* Access: RW
*/
MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3);
/* reg_rtdp_ipip_gre_key_check
* Whether GRE key should be checked. When check is enabled:
* - A packet received as IPinIP (without GRE) will always pass.
* - A packet received as IPinGREinIP without a key will not pass the check.
* - A packet received as IPinGREinIP with a key will pass the check only if the
* key in the packet is equal to expected_gre_key.
* If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR.
* Access: RW
*/
MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1);
/* reg_rtdp_ipip_ipv4_usip
* Underlay IPv4 address for ipv4 source address check.
* Reserved when sip_check is not '1'.
* Access: RW
*/
MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32);
/* reg_rtdp_ipip_ipv6_usip_ptr
* This field is valid when sip_check is "sipv6 check explicitly". This is a
* pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index
* is to the KVD linear.
* Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6.
* Access: RW
*/
MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24);
/* reg_rtdp_ipip_expected_gre_key
* GRE key for checking.
* Reserved when gre_key_check is '0'.
* Access: RW
*/
MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32);
static inline void mlxsw_reg_rtdp_pack(char *payload,
enum mlxsw_reg_rtdp_type type,
u32 tunnel_index)
{
MLXSW_REG_ZERO(rtdp, payload);
mlxsw_reg_rtdp_type_set(payload, type);
mlxsw_reg_rtdp_tunnel_index_set(payload, tunnel_index);
}
static inline void
mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif,
enum mlxsw_reg_rtdp_ipip_sip_check sip_check,
unsigned int type_check, bool gre_key_check,
u32 ipv4_usip, u32 expected_gre_key)
{
mlxsw_reg_rtdp_ipip_irif_set(payload, irif);
mlxsw_reg_rtdp_ipip_sip_check_set(payload, sip_check);
mlxsw_reg_rtdp_ipip_type_check_set(payload, type_check);
mlxsw_reg_rtdp_ipip_gre_key_check_set(payload, gre_key_check);
mlxsw_reg_rtdp_ipip_ipv4_usip_set(payload, ipv4_usip);
mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key);
}
/* MFCR - Management Fan Control Register
* --------------------------------------
* This register controls the settings of the Fan Speed PWM mechanism.
......@@ -6561,6 +6857,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(rgcr),
MLXSW_REG(ritr),
MLXSW_REG(ratr),
MLXSW_REG(rtdp),
MLXSW_REG(ricnt),
MLXSW_REG(ralta),
MLXSW_REG(ralst),
......
......@@ -3400,6 +3400,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
/* PKT Sample trap */
MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
false, SP_IP2ME, DISCARD),
......
......@@ -77,6 +77,7 @@ enum mlxsw_sp_rif_type {
MLXSW_SP_RIF_TYPE_SUBPORT,
MLXSW_SP_RIF_TYPE_VLAN,
MLXSW_SP_RIF_TYPE_FID,
MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */
MLXSW_SP_RIF_TYPE_MAX,
};
......
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <net/ip_tunnels.h>
#include "spectrum_ipip.h"
static bool
mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return !!(tun->parms.i_flags & TUNNEL_KEY);
}
static bool
mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return !!(tun->parms.o_flags & TUNNEL_KEY);
}
static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ?
be32_to_cpu(tun->parms.i_key) : 0;
}
static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
{
struct ip_tunnel *tun = netdev_priv(ol_dev);
return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
be32_to_cpu(tun->parms.o_key) : 0;
}
static int
mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
char ratr_pl[MLXSW_REG_RATR_LEN];
mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
true, MLXSW_REG_RATR_TYPE_IPIP,
adj_index, rif_index);
mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
}
static int
mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
u32 tunnel_index,
struct mlxsw_sp_ipip_entry *ipip_entry)
{
bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
char rtdp_pl[MLXSW_REG_RTDP_LEN];
unsigned int type_check;
u32 daddr4;
mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
type_check = has_ikey ?
MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE;
/* Linux demuxes tunnels based on packet SIP (which must match tunnel
* remote IP). Thus configure decap so that it filters out packets that
* are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is
* generated for packets that fail this criterion. Linux then handles
* such packets in slow path and generates ICMP destination unreachable.
*/
daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev));
mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index,
MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
type_check, has_ikey, daddr4, ikey);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
}
static int
mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp,
u32 dip, u8 prefix_len, u16 ul_vr_id,
enum mlxsw_reg_ralue_op op,
u32 tunnel_index)
{
char ralue_pl[MLXSW_REG_RALUE_LEN];
mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op,
ul_vr_id, prefix_len, dip);
mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
}
static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
enum mlxsw_reg_ralue_op op,
u32 tunnel_index)
{
u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb);
__be32 dip;
int err;
err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index,
ipip_entry);
if (err)
return err;
dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
ipip_entry->ol_dev).addr4;
return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip),
32, ul_vr_id, op,
tunnel_index);
}
static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev)
{
union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
union mlxsw_sp_l3addr naddr = {0};
/* Tunnels with unset local or remote address are valid in Linux and
* used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
* (NBMA) tunnels. In principle these can be offloaded, but the driver
* currently doesn't support this. So punt.
*/
return memcmp(&saddr, &naddr, sizeof(naddr)) &&
memcmp(&daddr, &naddr, sizeof(naddr));
}
static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev,
enum mlxsw_sp_l3proto ol_proto)
{
struct ip_tunnel *tunnel = netdev_priv(ol_dev);
__be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */
bool inherit_ttl = tunnel->parms.iph.ttl == 0;
bool inherit_tos = tunnel->parms.iph.tos & 0x1;
return (tunnel->parms.i_flags & ~okflags) == 0 &&
(tunnel->parms.o_flags & ~okflags) == 0 &&
inherit_ttl && inherit_tos &&
mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev);
}
static struct mlxsw_sp_rif_ipip_lb_config
mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev)
{
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
return (struct mlxsw_sp_rif_ipip_lb_config){
.lb_ipipt = lb_ipipt,
.okey = mlxsw_sp_ipip_netdev_okey(ol_dev),
.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
ol_dev),
};
}
static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
.dev_type = ARPHRD_IPGRE,
.ul_proto = MLXSW_SP_L3_PROTO_IPV4,
.nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4,
.fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4,
.can_offload = mlxsw_sp_ipip_can_offload_gre4,
.ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4,
};
const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
[MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
};
/*
* drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
* Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the names of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* Alternatively, this software may be distributed under the terms of the
* GNU General Public License ("GPL") version 2 as published by the Free
* Software Foundation.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _MLXSW_IPIP_H_
#define _MLXSW_IPIP_H_
#include "spectrum_router.h"
#include <net/ip_fib.h>
enum mlxsw_sp_ipip_type {
MLXSW_SP_IPIP_TYPE_GRE4,
MLXSW_SP_IPIP_TYPE_MAX,
};
struct mlxsw_sp_ipip_entry {
enum mlxsw_sp_ipip_type ipipt;
struct net_device *ol_dev; /* Overlay. */
struct mlxsw_sp_rif_ipip_lb *ol_lb;
unsigned int ref_count; /* Number of next hops using the tunnel. */
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct list_head ipip_list_node;
};
struct mlxsw_sp_ipip_ops {
int dev_type;
enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_ipip_entry *ipip_entry);
bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev,
enum mlxsw_sp_l3proto ol_proto);
/* Return a configuration for creating an overlay loopback RIF. */
struct mlxsw_sp_rif_ipip_lb_config
(*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp,
const struct net_device *ol_dev);
int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_ipip_entry *ipip_entry,
enum mlxsw_reg_ralue_op op,
u32 tunnel_index);
};
extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
#endif /* _MLXSW_IPIP_H_*/
......@@ -36,6 +36,25 @@
#define _MLXSW_ROUTER_H_
#include "spectrum.h"
#include "reg.h"
enum mlxsw_sp_l3proto {
MLXSW_SP_L3_PROTO_IPV4,
MLXSW_SP_L3_PROTO_IPV6,
};
union mlxsw_sp_l3addr {
__be32 addr4;
struct in6_addr addr6;
};
struct mlxsw_sp_rif_ipip_lb;
struct mlxsw_sp_rif_ipip_lb_config {
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
u32 okey;
enum mlxsw_sp_l3proto ul_protocol; /* Underlay. */
union mlxsw_sp_l3addr saddr;
};
enum mlxsw_sp_rif_counter_dir {
MLXSW_SP_RIF_COUNTER_INGRESS,
......@@ -47,6 +66,8 @@ struct mlxsw_sp_neigh_entry;
struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif,
......@@ -79,5 +100,12 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_neigh_entry *neigh_entry,
bool adding);
bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev);
#endif /* _MLXSW_ROUTER_H_*/
......@@ -85,6 +85,7 @@ enum {
MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
MLXSW_TRAP_ID_ACL0 = 0x1C0,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册