Merge branch 'l2tp-auto-mtu'

R. Parameswaran says: ==================== L2TP:Adjust intf MTU, add underlay L3, L2 hdrs. Existing L2TP kernel code does not derive the optimal MTU for Ethernet pseudowires and instead leaves this to a userspace L2TP daemon or operator. If an MTU is not specified, the existing kernel code chooses an MTU that does not take account of all tunnel header overheads, which can lead to unwanted IP fragmentation. When L2TP is used without a control plane (userspace daemon), we would prefer that the kernel does a better job of choosing a default pseudowire MTU, taking account of all tunnel header overheads, including IP header options, if any. This patch addresses this. Change-set is organized as a two part patch series, with one patch introducing a new kernel function to compute the IP overhead on a socket, and the other patch using this new kernel function to compute the default L2TP MTU for an Ethernet pseudowire. Existing code also seems to assume an Ethernet (non-jumbo) underlay. The change proposed here uses the PMTU mechanism and the dst entry in the L2TP tunnel socket to directly pull up the underlay MTU (as the baseline number on top of which the encapsulation headers are factored in). An default MTU value of 1500 bytes is assumed as a fallback only if this fails. Fixed the kbuild test robot error in the previous posting. ==================== Signed-off-by: N David S. Miller <davem@davemloft.net>

Merge branch 'l2tp-auto-mtu'
R. Parameswaran says: ==================== L2TP:Adjust intf MTU, add underlay L3, L2 hdrs. Existing L2TP kernel code does not derive the optimal MTU for Ethernet pseudowires and instead leaves this to a userspace L2TP daemon or operator. If an MTU is not specified, the existing kernel code chooses an MTU that does not take account of all tunnel header overheads, which can lead to unwanted IP fragmentation. When L2TP is used without a control plane (userspace daemon), we would prefer that the kernel does a better job of choosing a default pseudowire MTU, taking account of all tunnel header overheads, including IP header options, if any. This patch addresses this. Change-set is organized as a two part patch series, with one patch introducing a new kernel function to compute the IP overhead on a socket, and the other patch using this new kernel function to compute the default L2TP MTU for an Ethernet pseudowire. Existing code also seems to assume an Ethernet (non-jumbo) underlay. The change proposed here uses the PMTU mechanism and the dst entry in the L2TP tunnel socket to directly pull up the underlay MTU (as the baseline number on top of which the encapsulation headers are factored in). An default MTU value of 1500 bytes is assumed as a fallback only if this fails. Fixed the kbuild test robot error in the previous posting. ==================== Signed-off-by: N David S. Miller <davem@davemloft.net>
6d2d34a9 · David S. Miller · 129858fa · b784e7eb · 6d2d34a9 · 6d2d34a9
显示空白变更内容
内联并排

Showing with 100 addition and 4 deletion

include/linux/net.h include/linux/net.h +3 -0

net/l2tp/l2tp_eth.c net/l2tp/l2tp_eth.c +51 -4

net/socket.c net/socket.c +46 -0

未找到文件。
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);

+/* Following routine returns the IP overhead imposed by a socket.  */
+u32 kernel_sock_ip_overhead(struct sock *sk);
+
 #define MODULE_ALIAS_NETPROTO(proto) \
 	MODULE_ALIAS("net-pf-" __stringify(proto))


--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>

 #include "l2tp_core.h"

@@ -204,6 +207,53 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
 }
 #endif

+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+				struct l2tp_session *session,
+				struct net_device *dev)
+{
+	unsigned int overhead = 0;
+	struct dst_entry *dst;
+	u32 l3_overhead = 0;
+
+	/* if the encap is UDP, account for UDP header size */
+	if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+		overhead += sizeof(struct udphdr);
+		dev->needed_headroom += sizeof(struct udphdr);
+	}
+	if (session->mtu != 0) {
+		dev->mtu = session->mtu;
+		dev->needed_headroom += session->hdr_len;
+		return;
+	}
+	l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+	if (l3_overhead == 0) {
+		/* L3 Overhead couldn't be identified, this could be
+		 * because tunnel->sock was NULL or the socket's
+		 * address family was not IPv4 or IPv6,
+		 * dev mtu stays at 1500.
+		 */
+		return;
+	}
+	/* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
+	 * UDP overhead, if any, was already factored in above.
+	 */
+	overhead += session->hdr_len + ETH_HLEN + l3_overhead;
+
+	/* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+	dst = sk_dst_get(tunnel->sock);
+	if (dst) {
+		/* dst_mtu will use PMTU if found, else fallback to intf MTU */
+		u32 pmtu = dst_mtu(dst);
+
+		if (pmtu != 0)
+			dev->mtu = pmtu;
+		dst_release(dst);
+	}
+	session->mtu = dev->mtu - overhead;
+	dev->mtu = session->mtu;
+	dev->needed_headroom += session->hdr_len;
+}
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
 {
 	struct net_device *dev;
@@ -247,12 +297,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 	}

 	dev_net_set(dev, net);
-	if (session->mtu == 0)
-		session->mtu = dev->mtu - session->hdr_len;
-	dev->mtu = session->mtu;
-	dev->needed_headroom += session->hdr_len;
 	dev->min_mtu = 0;
 	dev->max_mtu = ETH_MAX_MTU;
+	l2tp_eth_adjust_mtu(tunnel, session, dev);

 	priv = netdev_priv(dev);
 	priv->dev = dev;

--- a/net/socket.c
+++ b/net/socket.c
@@ -3356,3 +3356,49 @@ int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
 	return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+	struct inet_sock *inet;
+	struct ip_options_rcu *opt;
+	u32 overhead = 0;
+	bool owned_by_user;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6_pinfo *np;
+	struct ipv6_txoptions *optv6 = NULL;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+	if (!sk)
+		return overhead;
+
+	owned_by_user = sock_owned_by_user(sk);
+	switch (sk->sk_family) {
+	case AF_INET:
+		inet = inet_sk(sk);
+		overhead += sizeof(struct iphdr);
+		opt = rcu_dereference_protected(inet->inet_opt,
+						owned_by_user);
+		if (opt)
+			overhead += opt->opt.optlen;
+		return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		np = inet6_sk(sk);
+		overhead += sizeof(struct ipv6hdr);
+		if (np)
+			optv6 = rcu_dereference_protected(np->opt,
+							  owned_by_user);
+		if (optv6)
+			overhead += (optv6->opt_flen + optv6->opt_nflen);
+		return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+	default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+		return overhead;
+	}
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);