diff --git a/include/net/seg6.h b/include/net/seg6.h index 5379f550f521d841de956ad4d3a8c5cc4285e275..099bad59dc90150457f02e64d87b1354140ef0f3 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -60,7 +60,8 @@ extern int seg6_local_init(void); extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); -extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); +extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, + int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); #endif diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h index b6e5a0a1afd7a180938ba75250c7858c65868dc5..b23df9f58354e6e2d0174ddf356494c89a1dbc9b 100644 --- a/include/uapi/linux/seg6_iptunnel.h +++ b/include/uapi/linux/seg6_iptunnel.h @@ -33,16 +33,26 @@ struct seg6_iptunnel_encap { enum { SEG6_IPTUN_MODE_INLINE, SEG6_IPTUN_MODE_ENCAP, + SEG6_IPTUN_MODE_L2ENCAP, }; #ifdef __KERNEL__ static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) { - int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); - - return ((tuninfo->srh->hdrlen + 1) << 3) + - (encap * sizeof(struct ipv6hdr)); + int head = 0; + + switch (tuninfo->mode) { + case SEG6_IPTUN_MODE_INLINE: + break; + case SEG6_IPTUN_MODE_ENCAP: + head = sizeof(struct ipv6hdr); + break; + case SEG6_IPTUN_MODE_L2ENCAP: + return 0; + } + + return ((tuninfo->srh->hdrlen + 1) << 3) + head; } #endif diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 0d722396dce65f1dbc691d51cea3be9160dc06b2..ea71e4b0ab7aea80fc6b564fddeea7a6b01feaeb 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -308,6 +308,7 @@ config IPV6_SEG6_LWTUNNEL depends on IPV6 select LWTUNNEL select DST_CACHE + select IPV6_MULTIPLE_TABLES ---help--- Support for encapsulation of packets within an outer IPv6 header and a Segment Routing Header using the lightweight diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 5012330405702939c728fd546ce83022cc787ca5..bd6cc688bd199ae98cc1be8d0851b08cb6709486 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev, } /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ -int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { struct net *net = dev_net(skb_dst(skb)->dev); struct ipv6hdr *hdr, *inner_hdr; @@ -116,15 +116,22 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) * hlim will be decremented in ip6_forward() afterwards and * decapsulation will overwrite inner hlim with outer hlim */ - ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), - ip6_flowlabel(inner_hdr)); - hdr->hop_limit = inner_hdr->hop_limit; + + if (skb->protocol == htons(ETH_P_IPV6)) { + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + ip6_flowlabel(inner_hdr)); + hdr->hop_limit = inner_hdr->hop_limit; + } else { + ip6_flow_hdr(hdr, 0, 0); + hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + } + hdr->nexthdr = NEXTHDR_ROUTING; isrh = (void *)hdr + sizeof(*hdr); memcpy(isrh, osrh, hdrlen); - isrh->nexthdr = NEXTHDR_IPV6; + isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); @@ -199,7 +206,7 @@ static int seg6_do_srh(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct seg6_iptunnel_encap *tinfo; - int err = 0; + int proto, err = 0; tinfo = seg6_encap_lwtunnel(dst->lwtstate); @@ -210,16 +217,46 @@ static int seg6_do_srh(struct sk_buff *skb) switch (tinfo->mode) { case SEG6_IPTUN_MODE_INLINE: + if (skb->protocol != htons(ETH_P_IPV6)) + return -EINVAL; + err = seg6_do_srh_inline(skb, tinfo->srh); + if (err) + return err; + skb_reset_inner_headers(skb); break; case SEG6_IPTUN_MODE_ENCAP: - err = seg6_do_srh_encap(skb, tinfo->srh); + if (skb->protocol == htons(ETH_P_IPV6)) + proto = IPPROTO_IPV6; + else if (skb->protocol == htons(ETH_P_IP)) + proto = IPPROTO_IPIP; + else + return -EINVAL; + + err = seg6_do_srh_encap(skb, tinfo->srh, proto); + if (err) + return err; + + skb->protocol = htons(ETH_P_IPV6); break; - } + case SEG6_IPTUN_MODE_L2ENCAP: + if (!skb_mac_header_was_set(skb)) + return -EINVAL; - if (err) - return err; + if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0) + return -ENOMEM; + + skb_mac_header_rebuild(skb); + skb_push(skb, skb->mac_len); + + err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE); + if (err) + return err; + + skb->protocol = htons(ETH_P_IPV6); + break; + } ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); @@ -334,6 +371,9 @@ static int seg6_build_state(struct nlattr *nla, struct seg6_lwt *slwt; int err; + if (family != AF_INET && family != AF_INET6) + return -EINVAL; + err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, seg6_iptunnel_policy, extack); @@ -356,9 +396,14 @@ static int seg6_build_state(struct nlattr *nla, switch (tuninfo->mode) { case SEG6_IPTUN_MODE_INLINE: + if (family != AF_INET6) + return -EINVAL; + break; case SEG6_IPTUN_MODE_ENCAP: break; + case SEG6_IPTUN_MODE_L2ENCAP: + break; default: return -EINVAL; } @@ -382,8 +427,11 @@ static int seg6_build_state(struct nlattr *nla, memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); newts->type = LWTUNNEL_ENCAP_SEG6; - newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | - LWTUNNEL_STATE_INPUT_REDIRECT; + newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; + + if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + newts->headroom = seg6_lwt_headroom(tuninfo); *ts = newts; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 147680e7a00c8f9dda7edd9e9262d57f1a78f078..9c1a885ee48216c766616f10eb4d945f5249fd23 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -30,6 +30,7 @@ #ifdef CONFIG_IPV6_SEG6_HMAC #include #endif +#include struct seg6_local_lwt; @@ -99,23 +100,105 @@ static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb) return srh; } +static bool decap_and_validate(struct sk_buff *skb, int proto) +{ + struct ipv6_sr_hdr *srh; + unsigned int off = 0; + + srh = get_srh(skb); + if (srh && srh->segments_left > 0) + return false; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (srh && !seg6_hmac_validate_skb(skb)) + return false; +#endif + + if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0) + return false; + + if (!pskb_pull(skb, off)) + return false; + + skb_postpull_rcsum(skb, skb_network_header(skb), off); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + return true; +} + +static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) +{ + struct in6_addr *addr; + + srh->segments_left--; + addr = srh->segments + srh->segments_left; + *daddr = *addr; +} + +static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, + u32 tbl_id) +{ + struct net *net = dev_net(skb->dev); + struct ipv6hdr *hdr = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct dst_entry *dst = NULL; + struct rt6_info *rt; + struct flowi6 fl6; + + fl6.flowi6_iif = skb->dev->ifindex; + fl6.daddr = nhaddr ? *nhaddr : hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + if (nhaddr) + fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; + + if (!tbl_id) { + dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); + } else { + struct fib6_table *table; + + table = fib6_get_table(net, tbl_id); + if (!table) + goto out; + + rt = ip6_pol_route(net, table, 0, &fl6, flags); + dst = &rt->dst; + } + + if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) { + dst_release(dst); + dst = NULL; + } + +out: + if (!dst) { + rt = net->ipv6.ip6_blk_hole_entry; + dst = &rt->dst; + dst_hold(dst); + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); +} + /* regular endpoint function */ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct ipv6_sr_hdr *srh; - struct in6_addr *addr; srh = get_and_validate_srh(skb); if (!srh) goto drop; - srh->segments_left--; - addr = srh->segments + srh->segments_left; - - ipv6_hdr(skb)->daddr = *addr; + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); - skb_dst_drop(skb); - ip6_route_input(skb); + lookup_nexthop(skb, NULL, 0); return dst_input(skb); @@ -127,41 +210,34 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) /* regular endpoint, and forward to specified nexthop */ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - struct net *net = dev_net(skb->dev); struct ipv6_sr_hdr *srh; - struct dst_entry *dst; - struct in6_addr *addr; - struct ipv6hdr *hdr; - struct flowi6 fl6; - int flags; srh = get_and_validate_srh(skb); if (!srh) goto drop; - srh->segments_left--; - addr = srh->segments + srh->segments_left; + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); - hdr = ipv6_hdr(skb); - hdr->daddr = *addr; + lookup_nexthop(skb, &slwt->nh6, 0); - skb_dst_drop(skb); + return dst_input(skb); - fl6.flowi6_iif = skb->dev->ifindex; - fl6.daddr = slwt->nh6; - fl6.saddr = hdr->saddr; - fl6.flowlabel = ip6_flowinfo(hdr); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = hdr->nexthdr; +drop: + kfree_skb(skb); + return -EINVAL; +} - flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE | - RT6_LOOKUP_F_REACHABLE; +static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; - dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); - if (dst->dev->flags & IFF_LOOPBACK) + srh = get_and_validate_srh(skb); + if (!srh) goto drop; - skb_dst_set(skb, dst); + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); + + lookup_nexthop(skb, NULL, slwt->table); return dst_input(skb); @@ -170,45 +246,78 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) return -EINVAL; } -/* decapsulate and forward to specified nexthop */ -static int input_action_end_dx6(struct sk_buff *skb, +/* decapsulate and forward inner L2 frame on specified interface */ +static int input_action_end_dx2(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct net *net = dev_net(skb->dev); - struct ipv6hdr *inner_hdr; - struct ipv6_sr_hdr *srh; - struct dst_entry *dst; - unsigned int off = 0; - struct flowi6 fl6; - bool use_nh; - int flags; + struct net_device *odev; + struct ethhdr *eth; - /* this function accepts IPv6 encapsulated packets, with either - * an SRH with SL=0, or no SRH. + if (!decap_and_validate(skb, NEXTHDR_NONE)) + goto drop; + + if (!pskb_may_pull(skb, ETH_HLEN)) + goto drop; + + skb_reset_mac_header(skb); + eth = (struct ethhdr *)skb->data; + + /* To determine the frame's protocol, we assume it is 802.3. This avoids + * a call to eth_type_trans(), which is not really relevant for our + * use case. */ + if (!eth_proto_is_802_3(eth->h_proto)) + goto drop; - srh = get_srh(skb); - if (srh && srh->segments_left > 0) + odev = dev_get_by_index_rcu(net, slwt->oif); + if (!odev) goto drop; -#ifdef CONFIG_IPV6_SEG6_HMAC - if (srh && !seg6_hmac_validate_skb(skb)) + /* As we accept Ethernet frames, make sure the egress device is of + * the correct type. + */ + if (odev->type != ARPHRD_ETHER) goto drop; -#endif - if (ipv6_find_hdr(skb, &off, IPPROTO_IPV6, NULL, NULL) < 0) + if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev)) goto drop; - if (!pskb_pull(skb, off)) + skb_orphan(skb); + + if (skb_warn_if_lro(skb)) goto drop; - skb_postpull_rcsum(skb, skb_network_header(skb), off); + skb_forward_csum(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb->encapsulation = 0; + if (skb->len - ETH_HLEN > odev->mtu) + goto drop; - inner_hdr = ipv6_hdr(skb); + skb->dev = odev; + skb->protocol = eth->h_proto; + + return dev_queue_xmit(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +/* decapsulate and forward to specified nexthop */ +static int input_action_end_dx6(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct in6_addr *nhaddr = NULL; + + /* this function accepts IPv6 encapsulated packets, with either + * an SRH with SL=0, or no SRH. + */ + + if (!decap_and_validate(skb, IPPROTO_IPV6)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto drop; /* The inner packet is not associated to any local interface, * so we do not call netif_rx(). @@ -217,28 +326,62 @@ static int input_action_end_dx6(struct sk_buff *skb, * inner packet's DA. Otherwise, use the specified nexthop. */ - use_nh = !ipv6_addr_any(&slwt->nh6); + if (!ipv6_addr_any(&slwt->nh6)) + nhaddr = &slwt->nh6; + + lookup_nexthop(skb, nhaddr, 0); + + return dst_input(skb); +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int input_action_end_dx4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct iphdr *iph; + __be32 nhaddr; + int err; + + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb->protocol = htons(ETH_P_IP); + + iph = ip_hdr(skb); + + nhaddr = slwt->nh4.s_addr ?: iph->daddr; skb_dst_drop(skb); - fl6.flowi6_iif = skb->dev->ifindex; - fl6.daddr = use_nh ? slwt->nh6 : inner_hdr->daddr; - fl6.saddr = inner_hdr->saddr; - fl6.flowlabel = ip6_flowinfo(inner_hdr); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = inner_hdr->nexthdr; + err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); + if (err) + goto drop; - flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_REACHABLE; - if (use_nh) - flags |= RT6_LOOKUP_F_IFACE; + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int input_action_end_dt6(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + if (!decap_and_validate(skb, IPPROTO_IPV6)) + goto drop; - dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); - if (dst->dev->flags & IFF_LOOPBACK) + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; - skb_dst_set(skb, dst); + lookup_nexthop(skb, NULL, slwt->table); return dst_input(skb); + drop: kfree_skb(skb); return -EINVAL; @@ -261,8 +404,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); - skb_dst_drop(skb); - ip6_route_input(skb); + lookup_nexthop(skb, NULL, 0); return dst_input(skb); @@ -276,29 +418,25 @@ static int input_action_end_b6_encap(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct ipv6_sr_hdr *srh; - struct in6_addr *addr; int err = -EINVAL; srh = get_and_validate_srh(skb); if (!srh) goto drop; - srh->segments_left--; - addr = srh->segments + srh->segments_left; - ipv6_hdr(skb)->daddr = *addr; + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); skb_reset_inner_headers(skb); skb->encapsulation = 1; - err = seg6_do_srh_encap(skb, slwt->srh); + err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6); if (err) goto drop; ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); - skb_dst_drop(skb); - ip6_route_input(skb); + lookup_nexthop(skb, NULL, 0); return dst_input(skb); @@ -318,11 +456,31 @@ static struct seg6_action_desc seg6_action_table[] = { .attrs = (1 << SEG6_LOCAL_NH6), .input = input_action_end_x, }, + { + .action = SEG6_LOCAL_ACTION_END_T, + .attrs = (1 << SEG6_LOCAL_TABLE), + .input = input_action_end_t, + }, + { + .action = SEG6_LOCAL_ACTION_END_DX2, + .attrs = (1 << SEG6_LOCAL_OIF), + .input = input_action_end_dx2, + }, { .action = SEG6_LOCAL_ACTION_END_DX6, .attrs = (1 << SEG6_LOCAL_NH6), .input = input_action_end_dx6, }, + { + .action = SEG6_LOCAL_ACTION_END_DX4, + .attrs = (1 << SEG6_LOCAL_NH4), + .input = input_action_end_dx4, + }, + { + .action = SEG6_LOCAL_ACTION_END_DT6, + .attrs = (1 << SEG6_LOCAL_TABLE), + .input = input_action_end_dt6, + }, { .action = SEG6_LOCAL_ACTION_END_B6, .attrs = (1 << SEG6_LOCAL_SRH), @@ -357,6 +515,11 @@ static int seg6_local_input(struct sk_buff *skb) struct seg6_action_desc *desc; struct seg6_local_lwt *slwt; + if (skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return -EINVAL; + } + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); desc = slwt->desc; @@ -623,6 +786,9 @@ static int seg6_local_build_state(struct nlattr *nla, unsigned int family, struct seg6_local_lwt *slwt; int err; + if (family != AF_INET6) + return -EINVAL; + err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy, extack);