• E
    net: loopback: fix a dst refcounting issue · 794ed393
    Eric Dumazet 提交于
    Ben Greear reported crashes in ip_rcv_finish() on a stress
    test involving many macvlans.
    
    We tracked the bug to a dst use after free. ip_rcv_finish()
    was calling dst->input() and got garbage for dst->input value.
    
    It appears the bug is in loopback driver, lacking
    a skb_dst_force() before calling netif_rx().
    
    As a result, a non refcounted dst, normally protected by a
    RCU read_lock section, was escaping this section and could
    be freed before the packet being processed.
    
      [<ffffffff813a3c4d>] loopback_xmit+0x64/0x83
      [<ffffffff81477364>] dev_hard_start_xmit+0x26c/0x35e
      [<ffffffff8147771a>] dev_queue_xmit+0x2c4/0x37c
      [<ffffffff81477456>] ? dev_hard_start_xmit+0x35e/0x35e
      [<ffffffff8148cfa6>] ? eth_header+0x28/0xb6
      [<ffffffff81480f09>] neigh_resolve_output+0x176/0x1a7
      [<ffffffff814ad835>] ip_finish_output2+0x297/0x30d
      [<ffffffff814ad6d5>] ? ip_finish_output2+0x137/0x30d
      [<ffffffff814ad90e>] ip_finish_output+0x63/0x68
      [<ffffffff814ae412>] ip_output+0x61/0x67
      [<ffffffff814ab904>] dst_output+0x17/0x1b
      [<ffffffff814adb6d>] ip_local_out+0x1e/0x23
      [<ffffffff814ae1c4>] ip_queue_xmit+0x315/0x353
      [<ffffffff814adeaf>] ? ip_send_unicast_reply+0x2cc/0x2cc
      [<ffffffff814c018f>] tcp_transmit_skb+0x7ca/0x80b
      [<ffffffff814c3571>] tcp_connect+0x53c/0x587
      [<ffffffff810c2f0c>] ? getnstimeofday+0x44/0x7d
      [<ffffffff810c2f56>] ? ktime_get_real+0x11/0x3e
      [<ffffffff814c6f9b>] tcp_v4_connect+0x3c2/0x431
      [<ffffffff814d6913>] __inet_stream_connect+0x84/0x287
      [<ffffffff814d6b38>] ? inet_stream_connect+0x22/0x49
      [<ffffffff8108d695>] ? _local_bh_enable_ip+0x84/0x9f
      [<ffffffff8108d6c8>] ? local_bh_enable+0xd/0x11
      [<ffffffff8146763c>] ? lock_sock_nested+0x6e/0x79
      [<ffffffff814d6b38>] ? inet_stream_connect+0x22/0x49
      [<ffffffff814d6b49>] inet_stream_connect+0x33/0x49
      [<ffffffff814632c6>] sys_connect+0x75/0x98
    
    This bug was introduced in linux-2.6.35, in commit
    7fee226a (net: add a noref bit on skb dst)
    
    skb_dst_force() is enforced in dev_queue_xmit() for devices having a
    qdisc.
    Reported-by: NBen Greear <greearb@candelatech.com>
    Signed-off-by: NEric Dumazet <edumazet@google.com>
    Tested-by: NBen Greear <greearb@candelatech.com>
    Signed-off-by: NDavid S. Miller <davem@davemloft.net>
    794ed393
loopback.c 5.5 KB
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Pseudo-driver for the loopback interface.
 *
 * Version:	@(#)loopback.c	1.0.4b	08/16/93
 *
 * Authors:	Ross Biro
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Donald Becker, <becker@scyld.com>
 *
 *		Alan Cox	:	Fixed oddments for NET3.014
 *		Alan Cox	:	Rejig for NET3.029 snap #3
 *		Alan Cox	: 	Fixed NET3.029 bugs and sped up
 *		Larry McVoy	:	Tiny tweak to double performance
 *		Alan Cox	:	Backed out LMV's tweak - the linux mm
 *					can't take it...
 *              Michael Griffith:       Don't bother computing the checksums
 *                                      on packets received on the loopback
 *                                      interface.
 *		Alexey Kuznetsov:	Potential hang under some extreme
 *					cases removed.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/in.h>
#include <linux/init.h>

#include <asm/uaccess.h>
#include <asm/io.h>

#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/ethtool.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <linux/if_ether.h>	/* For the statistics structure. */
#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/percpu.h>
#include <net/net_namespace.h>
#include <linux/u64_stats_sync.h>

struct pcpu_lstats {
	u64			packets;
	u64			bytes;
	struct u64_stats_sync	syncp;
};

/*
 * The higher levels take care of making this non-reentrant (it's
 * called with bh's disabled).
 */
static netdev_tx_t loopback_xmit(struct sk_buff *skb,
				 struct net_device *dev)
{
	struct pcpu_lstats *lb_stats;
	int len;

	skb_orphan(skb);

	/* Before queueing this packet to netif_rx(),
	 * make sure dst is refcounted.
	 */
	skb_dst_force(skb);

	skb->protocol = eth_type_trans(skb, dev);

	/* it's OK to use per_cpu_ptr() because BHs are off */
	lb_stats = this_cpu_ptr(dev->lstats);

	len = skb->len;
	if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
		u64_stats_update_begin(&lb_stats->syncp);
		lb_stats->bytes += len;
		lb_stats->packets++;
		u64_stats_update_end(&lb_stats->syncp);
	}

	return NETDEV_TX_OK;
}

static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev,
						      struct rtnl_link_stats64 *stats)
{
	u64 bytes = 0;
	u64 packets = 0;
	int i;

	for_each_possible_cpu(i) {
		const struct pcpu_lstats *lb_stats;
		u64 tbytes, tpackets;
		unsigned int start;

		lb_stats = per_cpu_ptr(dev->lstats, i);
		do {
			start = u64_stats_fetch_begin_bh(&lb_stats->syncp);
			tbytes = lb_stats->bytes;
			tpackets = lb_stats->packets;
		} while (u64_stats_fetch_retry_bh(&lb_stats->syncp, start));
		bytes   += tbytes;
		packets += tpackets;
	}
	stats->rx_packets = packets;
	stats->tx_packets = packets;
	stats->rx_bytes   = bytes;
	stats->tx_bytes   = bytes;
	return stats;
}

static u32 always_on(struct net_device *dev)
{
	return 1;
}

static const struct ethtool_ops loopback_ethtool_ops = {
	.get_link		= always_on,
};

static int loopback_dev_init(struct net_device *dev)
{
	dev->lstats = alloc_percpu(struct pcpu_lstats);
	if (!dev->lstats)
		return -ENOMEM;

	return 0;
}

static void loopback_dev_free(struct net_device *dev)
{
	free_percpu(dev->lstats);
	free_netdev(dev);
}

static const struct net_device_ops loopback_ops = {
	.ndo_init      = loopback_dev_init,
	.ndo_start_xmit= loopback_xmit,
	.ndo_get_stats64 = loopback_get_stats64,
};

/*
 * The loopback device is special. There is only one instance
 * per network namespace.
 */
static void loopback_setup(struct net_device *dev)
{
	dev->mtu		= 64 * 1024;
	dev->hard_header_len	= ETH_HLEN;	/* 14	*/
	dev->addr_len		= ETH_ALEN;	/* 6	*/
	dev->tx_queue_len	= 0;
	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/
	dev->flags		= IFF_LOOPBACK;
	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
	dev->hw_features	= NETIF_F_ALL_TSO | NETIF_F_UFO;
	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
		| NETIF_F_ALL_TSO
		| NETIF_F_UFO
		| NETIF_F_HW_CSUM
		| NETIF_F_RXCSUM
		| NETIF_F_HIGHDMA
		| NETIF_F_LLTX
		| NETIF_F_NETNS_LOCAL
		| NETIF_F_VLAN_CHALLENGED
		| NETIF_F_LOOPBACK;
	dev->ethtool_ops	= &loopback_ethtool_ops;
	dev->header_ops		= &eth_header_ops;
	dev->netdev_ops		= &loopback_ops;
	dev->destructor		= loopback_dev_free;
}

/* Setup and register the loopback device. */
static __net_init int loopback_net_init(struct net *net)
{
	struct net_device *dev;
	int err;

	err = -ENOMEM;
	dev = alloc_netdev(0, "lo", loopback_setup);
	if (!dev)
		goto out;

	dev_net_set(dev, net);
	err = register_netdev(dev);
	if (err)
		goto out_free_netdev;

	BUG_ON(dev->ifindex != LOOPBACK_IFINDEX);
	net->loopback_dev = dev;
	return 0;


out_free_netdev:
	free_netdev(dev);
out:
	if (net_eq(net, &init_net))
		panic("loopback: Failed to register netdevice: %d\n", err);
	return err;
}

/* Registered in net/core/dev.c */
struct pernet_operations __net_initdata loopback_net_ops = {
       .init = loopback_net_init,
};
反馈
建议
客服 返回
顶部