提交 cf4828d1 编写于 作者: D David S. Miller

Merge branch 'ipv6-sr-updates'

David Lebrun says:

====================
net: updates for IPv6 Segment Routing

v2: seg6_lwt_headroom() is not relevant for lwtunnel_input_redirect()
    use cases, and L2ENCAP only uses this redirection. Fix incoherence
    between arbitrary MAC header size support and fixed headroom
    computation by setting only LWTUNNEL_STATE_INPUT_REDIRECT for L2ENCAP
    mode.

This patch series provides several updates for the SRv6 implementation. The
first patch leverages the existing infrastructure to support encapsulation
of IPv4 packets. The second patch implements the T.Encaps.L2 SR function,
enabling to encapsulate an L2 Ethernet frame within an IPv6+SRH packet.
The last three patches update the seg6local lightweight tunnel, and mainly
implement four new actions: End.T, End.DX2, End.DX4 and End.DT6.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -60,7 +60,8 @@ extern int seg6_local_init(void);
extern void seg6_local_exit(void);
extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
int proto);
extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
#endif
......@@ -33,16 +33,26 @@ struct seg6_iptunnel_encap {
enum {
SEG6_IPTUN_MODE_INLINE,
SEG6_IPTUN_MODE_ENCAP,
SEG6_IPTUN_MODE_L2ENCAP,
};
#ifdef __KERNEL__
static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{
int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP);
return ((tuninfo->srh->hdrlen + 1) << 3) +
(encap * sizeof(struct ipv6hdr));
int head = 0;
switch (tuninfo->mode) {
case SEG6_IPTUN_MODE_INLINE:
break;
case SEG6_IPTUN_MODE_ENCAP:
head = sizeof(struct ipv6hdr);
break;
case SEG6_IPTUN_MODE_L2ENCAP:
return 0;
}
return ((tuninfo->srh->hdrlen + 1) << 3) + head;
}
#endif
......
......@@ -308,6 +308,7 @@ config IPV6_SEG6_LWTUNNEL
depends on IPV6
select LWTUNNEL
select DST_CACHE
select IPV6_MULTIPLE_TABLES
---help---
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
......
......@@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
}
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct ipv6hdr *hdr, *inner_hdr;
......@@ -116,15 +116,22 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
* hlim will be decremented in ip6_forward() afterwards and
* decapsulation will overwrite inner hlim with outer hlim
*/
if (skb->protocol == htons(ETH_P_IPV6)) {
ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
ip6_flowlabel(inner_hdr));
hdr->hop_limit = inner_hdr->hop_limit;
} else {
ip6_flow_hdr(hdr, 0, 0);
hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
}
hdr->nexthdr = NEXTHDR_ROUTING;
isrh = (void *)hdr + sizeof(*hdr);
memcpy(isrh, osrh, hdrlen);
isrh->nexthdr = NEXTHDR_IPV6;
isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
......@@ -199,7 +206,7 @@ static int seg6_do_srh(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
int err = 0;
int proto, err = 0;
tinfo = seg6_encap_lwtunnel(dst->lwtstate);
......@@ -210,17 +217,47 @@ static int seg6_do_srh(struct sk_buff *skb)
switch (tinfo->mode) {
case SEG6_IPTUN_MODE_INLINE:
if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
err = seg6_do_srh_inline(skb, tinfo->srh);
if (err)
return err;
skb_reset_inner_headers(skb);
break;
case SEG6_IPTUN_MODE_ENCAP:
err = seg6_do_srh_encap(skb, tinfo->srh);
if (skb->protocol == htons(ETH_P_IPV6))
proto = IPPROTO_IPV6;
else if (skb->protocol == htons(ETH_P_IP))
proto = IPPROTO_IPIP;
else
return -EINVAL;
err = seg6_do_srh_encap(skb, tinfo->srh, proto);
if (err)
return err;
skb->protocol = htons(ETH_P_IPV6);
break;
}
case SEG6_IPTUN_MODE_L2ENCAP:
if (!skb_mac_header_was_set(skb))
return -EINVAL;
if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
return -ENOMEM;
skb_mac_header_rebuild(skb);
skb_push(skb, skb->mac_len);
err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
if (err)
return err;
skb->protocol = htons(ETH_P_IPV6);
break;
}
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
......@@ -334,6 +371,9 @@ static int seg6_build_state(struct nlattr *nla,
struct seg6_lwt *slwt;
int err;
if (family != AF_INET && family != AF_INET6)
return -EINVAL;
err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
seg6_iptunnel_policy, extack);
......@@ -356,9 +396,14 @@ static int seg6_build_state(struct nlattr *nla,
switch (tuninfo->mode) {
case SEG6_IPTUN_MODE_INLINE:
if (family != AF_INET6)
return -EINVAL;
break;
case SEG6_IPTUN_MODE_ENCAP:
break;
case SEG6_IPTUN_MODE_L2ENCAP:
break;
default:
return -EINVAL;
}
......@@ -382,8 +427,11 @@ static int seg6_build_state(struct nlattr *nla,
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
newts->type = LWTUNNEL_ENCAP_SEG6;
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
LWTUNNEL_STATE_INPUT_REDIRECT;
newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
newts->headroom = seg6_lwt_headroom(tuninfo);
*ts = newts;
......
......@@ -30,6 +30,7 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
#include <linux/etherdevice.h>
struct seg6_local_lwt;
......@@ -99,23 +100,105 @@ static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
return srh;
}
static bool decap_and_validate(struct sk_buff *skb, int proto)
{
struct ipv6_sr_hdr *srh;
unsigned int off = 0;
srh = get_srh(skb);
if (srh && srh->segments_left > 0)
return false;
#ifdef CONFIG_IPV6_SEG6_HMAC
if (srh && !seg6_hmac_validate_skb(skb))
return false;
#endif
if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
return false;
if (!pskb_pull(skb, off))
return false;
skb_postpull_rcsum(skb, skb_network_header(skb), off);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->encapsulation = 0;
return true;
}
static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
{
struct in6_addr *addr;
srh->segments_left--;
addr = srh->segments + srh->segments_left;
*daddr = *addr;
}
static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *hdr = ipv6_hdr(skb);
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct dst_entry *dst = NULL;
struct rt6_info *rt;
struct flowi6 fl6;
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
fl6.saddr = hdr->saddr;
fl6.flowlabel = ip6_flowinfo(hdr);
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = hdr->nexthdr;
if (nhaddr)
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
if (!tbl_id) {
dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
} else {
struct fib6_table *table;
table = fib6_get_table(net, tbl_id);
if (!table)
goto out;
rt = ip6_pol_route(net, table, 0, &fl6, flags);
dst = &rt->dst;
}
if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
dst_release(dst);
dst = NULL;
}
out:
if (!dst) {
rt = net->ipv6.ip6_blk_hole_entry;
dst = &rt->dst;
dst_hold(dst);
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
/* regular endpoint function */
static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
struct ipv6_sr_hdr *srh;
struct in6_addr *addr;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
srh->segments_left--;
addr = srh->segments + srh->segments_left;
ipv6_hdr(skb)->daddr = *addr;
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
skb_dst_drop(skb);
ip6_route_input(skb);
lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
......@@ -127,41 +210,34 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
/* regular endpoint, and forward to specified nexthop */
static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
struct net *net = dev_net(skb->dev);
struct ipv6_sr_hdr *srh;
struct dst_entry *dst;
struct in6_addr *addr;
struct ipv6hdr *hdr;
struct flowi6 fl6;
int flags;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
srh->segments_left--;
addr = srh->segments + srh->segments_left;
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
hdr = ipv6_hdr(skb);
hdr->daddr = *addr;
lookup_nexthop(skb, &slwt->nh6, 0);
skb_dst_drop(skb);
return dst_input(skb);
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = slwt->nh6;
fl6.saddr = hdr->saddr;
fl6.flowlabel = ip6_flowinfo(hdr);
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = hdr->nexthdr;
drop:
kfree_skb(skb);
return -EINVAL;
}
flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE |
RT6_LOOKUP_F_REACHABLE;
static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
struct ipv6_sr_hdr *srh;
dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
if (dst->dev->flags & IFF_LOOPBACK)
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
skb_dst_set(skb, dst);
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
......@@ -170,45 +246,78 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
return -EINVAL;
}
/* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb,
/* decapsulate and forward inner L2 frame on specified interface */
static int input_action_end_dx2(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *inner_hdr;
struct ipv6_sr_hdr *srh;
struct dst_entry *dst;
unsigned int off = 0;
struct flowi6 fl6;
bool use_nh;
int flags;
struct net_device *odev;
struct ethhdr *eth;
/* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH.
if (!decap_and_validate(skb, NEXTHDR_NONE))
goto drop;
if (!pskb_may_pull(skb, ETH_HLEN))
goto drop;
skb_reset_mac_header(skb);
eth = (struct ethhdr *)skb->data;
/* To determine the frame's protocol, we assume it is 802.3. This avoids
* a call to eth_type_trans(), which is not really relevant for our
* use case.
*/
if (!eth_proto_is_802_3(eth->h_proto))
goto drop;
srh = get_srh(skb);
if (srh && srh->segments_left > 0)
odev = dev_get_by_index_rcu(net, slwt->oif);
if (!odev)
goto drop;
#ifdef CONFIG_IPV6_SEG6_HMAC
if (srh && !seg6_hmac_validate_skb(skb))
/* As we accept Ethernet frames, make sure the egress device is of
* the correct type.
*/
if (odev->type != ARPHRD_ETHER)
goto drop;
#endif
if (ipv6_find_hdr(skb, &off, IPPROTO_IPV6, NULL, NULL) < 0)
if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
goto drop;
if (!pskb_pull(skb, off))
skb_orphan(skb);
if (skb_warn_if_lro(skb))
goto drop;
skb_postpull_rcsum(skb, skb_network_header(skb), off);
skb_forward_csum(skb);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->encapsulation = 0;
if (skb->len - ETH_HLEN > odev->mtu)
goto drop;
skb->dev = odev;
skb->protocol = eth->h_proto;
return dev_queue_xmit(skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
/* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct in6_addr *nhaddr = NULL;
/* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH.
*/
inner_hdr = ipv6_hdr(skb);
if (!decap_and_validate(skb, IPPROTO_IPV6))
goto drop;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
......@@ -217,28 +326,62 @@ static int input_action_end_dx6(struct sk_buff *skb,
* inner packet's DA. Otherwise, use the specified nexthop.
*/
use_nh = !ipv6_addr_any(&slwt->nh6);
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
static int input_action_end_dx4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct iphdr *iph;
__be32 nhaddr;
int err;
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
skb->protocol = htons(ETH_P_IP);
iph = ip_hdr(skb);
nhaddr = slwt->nh4.s_addr ?: iph->daddr;
skb_dst_drop(skb);
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = use_nh ? slwt->nh6 : inner_hdr->daddr;
fl6.saddr = inner_hdr->saddr;
fl6.flowlabel = ip6_flowinfo(inner_hdr);
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = inner_hdr->nexthdr;
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
if (err)
goto drop;
flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_REACHABLE;
if (use_nh)
flags |= RT6_LOOKUP_F_IFACE;
return dst_input(skb);
dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
if (dst->dev->flags & IFF_LOOPBACK)
drop:
kfree_skb(skb);
return -EINVAL;
}
static int input_action_end_dt6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
if (!decap_and_validate(skb, IPPROTO_IPV6))
goto drop;
skb_dst_set(skb, dst);
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
drop:
kfree_skb(skb);
return -EINVAL;
......@@ -261,8 +404,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
skb_dst_drop(skb);
ip6_route_input(skb);
lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
......@@ -276,29 +418,25 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct ipv6_sr_hdr *srh;
struct in6_addr *addr;
int err = -EINVAL;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
srh->segments_left--;
addr = srh->segments + srh->segments_left;
ipv6_hdr(skb)->daddr = *addr;
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
err = seg6_do_srh_encap(skb, slwt->srh);
err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
if (err)
goto drop;
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
skb_dst_drop(skb);
ip6_route_input(skb);
lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
......@@ -318,11 +456,31 @@ static struct seg6_action_desc seg6_action_table[] = {
.attrs = (1 << SEG6_LOCAL_NH6),
.input = input_action_end_x,
},
{
.action = SEG6_LOCAL_ACTION_END_T,
.attrs = (1 << SEG6_LOCAL_TABLE),
.input = input_action_end_t,
},
{
.action = SEG6_LOCAL_ACTION_END_DX2,
.attrs = (1 << SEG6_LOCAL_OIF),
.input = input_action_end_dx2,
},
{
.action = SEG6_LOCAL_ACTION_END_DX6,
.attrs = (1 << SEG6_LOCAL_NH6),
.input = input_action_end_dx6,
},
{
.action = SEG6_LOCAL_ACTION_END_DX4,
.attrs = (1 << SEG6_LOCAL_NH4),
.input = input_action_end_dx4,
},
{
.action = SEG6_LOCAL_ACTION_END_DT6,
.attrs = (1 << SEG6_LOCAL_TABLE),
.input = input_action_end_dt6,
},
{
.action = SEG6_LOCAL_ACTION_END_B6,
.attrs = (1 << SEG6_LOCAL_SRH),
......@@ -357,6 +515,11 @@ static int seg6_local_input(struct sk_buff *skb)
struct seg6_action_desc *desc;
struct seg6_local_lwt *slwt;
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;
......@@ -623,6 +786,9 @@ static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
struct seg6_local_lwt *slwt;
int err;
if (family != AF_INET6)
return -EINVAL;
err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
extack);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册