ifb.c 8.5 KB
Newer Older
1
/* drivers/net/ifb.c:
2 3 4 5 6 7 8 9 10

	The purpose of this driver is to provide a device that allows
	for sharing of resources:

	1) qdiscs/policies that are per device as opposed to system wide.
	ifb allows for a device which can be redirected to thus providing
	an impression of sharing.

	2) Allows for queueing incoming traffic for shaping instead of
11 12
	dropping.

13 14 15 16 17 18 19 20 21 22 23
	The original concept is based on what is known as the IMQ
	driver initially written by Martin Devera, later rewritten
	by Patrick McHardy and then maintained by Andre Correa.

	You need the tc action  mirror or redirect to feed this device
       	packets.

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version
	2 of the License, or (at your option) any later version.
24

25
  	Authors:	Jamal Hadi Salim (2005)
26

27 28 29 30 31 32 33 34
*/


#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/init.h>
35
#include <linux/interrupt.h>
36
#include <linux/moduleparam.h>
37
#include <net/pkt_sched.h>
38
#include <net/net_namespace.h>
39 40

#define TX_Q_LIMIT    32
E
Eric Dumazet 已提交
41 42
struct ifb_q_private {
	struct net_device	*dev;
43
	struct tasklet_struct   ifb_tasklet;
E
Eric Dumazet 已提交
44 45
	int			tasklet_pending;
	int			txqnum;
46
	struct sk_buff_head     rq;
E
Eric Dumazet 已提交
47 48 49
	u64			rx_packets;
	u64			rx_bytes;
	struct u64_stats_sync	rsync;
S
stephen hemminger 已提交
50 51

	struct u64_stats_sync	tsync;
E
Eric Dumazet 已提交
52 53
	u64			tx_packets;
	u64			tx_bytes;
54
	struct sk_buff_head     tq;
E
Eric Dumazet 已提交
55
} ____cacheline_aligned_in_smp;
56

E
Eric Dumazet 已提交
57 58 59
struct ifb_dev_private {
	struct ifb_q_private *tx_private;
};
60

61
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
62 63 64
static int ifb_open(struct net_device *dev);
static int ifb_close(struct net_device *dev);

E
Eric Dumazet 已提交
65
static void ifb_ri_tasklet(unsigned long _txp)
66
{
E
Eric Dumazet 已提交
67
	struct ifb_q_private *txp = (struct ifb_q_private *)_txp;
68
	struct netdev_queue *txq;
69 70
	struct sk_buff *skb;

E
Eric Dumazet 已提交
71 72 73 74
	txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
	skb = skb_peek(&txp->tq);
	if (!skb) {
		if (!__netif_tx_trylock(txq))
75
			goto resched;
E
Eric Dumazet 已提交
76 77
		skb_queue_splice_tail_init(&txp->rq, &txp->tq);
		__netif_tx_unlock(txq);
78 79
	}

E
Eric Dumazet 已提交
80
	while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
81 82 83 84
		u32 from = G_TC_FROM(skb->tc_verd);

		skb->tc_verd = 0;
		skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
S
stephen hemminger 已提交
85

E
Eric Dumazet 已提交
86 87 88 89
		u64_stats_update_begin(&txp->tsync);
		txp->tx_packets++;
		txp->tx_bytes += skb->len;
		u64_stats_update_end(&txp->tsync);
90

91
		rcu_read_lock();
E
Eric Dumazet 已提交
92
		skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
93
		if (!skb->dev) {
94
			rcu_read_unlock();
95
			dev_kfree_skb(skb);
E
Eric Dumazet 已提交
96 97
			txp->dev->stats.tx_dropped++;
			if (skb_queue_len(&txp->tq) != 0)
98
				goto resched;
99 100
			break;
		}
101
		rcu_read_unlock();
E
Eric Dumazet 已提交
102
		skb->skb_iif = txp->dev->ifindex;
103

104 105 106
		if (from & AT_EGRESS) {
			dev_queue_xmit(skb);
		} else if (from & AT_INGRESS) {
107
			skb_pull(skb, skb->mac_len);
108
			netif_receive_skb(skb);
109 110
		} else
			BUG();
111 112
	}

113
	if (__netif_tx_trylock(txq)) {
E
Eric Dumazet 已提交
114 115 116 117 118
		skb = skb_peek(&txp->rq);
		if (!skb) {
			txp->tasklet_pending = 0;
			if (netif_tx_queue_stopped(txq))
				netif_tx_wake_queue(txq);
119
		} else {
120
			__netif_tx_unlock(txq);
121 122
			goto resched;
		}
123
		__netif_tx_unlock(txq);
124 125
	} else {
resched:
E
Eric Dumazet 已提交
126 127
		txp->tasklet_pending = 1;
		tasklet_schedule(&txp->ifb_tasklet);
128 129 130 131
	}

}

S
stephen hemminger 已提交
132 133 134
static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev,
					     struct rtnl_link_stats64 *stats)
{
E
Eric Dumazet 已提交
135 136
	struct ifb_dev_private *dp = netdev_priv(dev);
	struct ifb_q_private *txp = dp->tx_private;
S
stephen hemminger 已提交
137
	unsigned int start;
E
Eric Dumazet 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
	u64 packets, bytes;
	int i;

	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
		do {
			start = u64_stats_fetch_begin_irq(&txp->rsync);
			packets = txp->rx_packets;
			bytes = txp->rx_bytes;
		} while (u64_stats_fetch_retry_irq(&txp->rsync, start));
		stats->rx_packets += packets;
		stats->rx_bytes += bytes;

		do {
			start = u64_stats_fetch_begin_irq(&txp->tsync);
			packets = txp->tx_packets;
			bytes = txp->tx_bytes;
		} while (u64_stats_fetch_retry_irq(&txp->tsync, start));
		stats->tx_packets += packets;
		stats->tx_bytes += bytes;
	}
S
stephen hemminger 已提交
158 159 160 161 162 163
	stats->rx_dropped = dev->stats.rx_dropped;
	stats->tx_dropped = dev->stats.tx_dropped;

	return stats;
}

E
Eric Dumazet 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
static int ifb_dev_init(struct net_device *dev)
{
	struct ifb_dev_private *dp = netdev_priv(dev);
	struct ifb_q_private *txp;
	int i;

	txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
	if (!txp)
		return -ENOMEM;
	dp->tx_private = txp;
	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
		txp->txqnum = i;
		txp->dev = dev;
		__skb_queue_head_init(&txp->rq);
		__skb_queue_head_init(&txp->tq);
		u64_stats_init(&txp->rsync);
		u64_stats_init(&txp->tsync);
		tasklet_init(&txp->ifb_tasklet, ifb_ri_tasklet,
			     (unsigned long)txp);
		netif_tx_start_queue(netdev_get_tx_queue(dev, i));
	}
	return 0;
}
S
stephen hemminger 已提交
187

S
Stephen Hemminger 已提交
188 189 190
static const struct net_device_ops ifb_netdev_ops = {
	.ndo_open	= ifb_open,
	.ndo_stop	= ifb_close,
S
stephen hemminger 已提交
191
	.ndo_get_stats64 = ifb_stats64,
192 193
	.ndo_start_xmit	= ifb_xmit,
	.ndo_validate_addr = eth_validate_addr,
E
Eric Dumazet 已提交
194
	.ndo_init	= ifb_dev_init,
S
Stephen Hemminger 已提交
195 196
};

197
#define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
E
Eric Dumazet 已提交
198
		      NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6	| \
199 200
		      NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX		| \
		      NETIF_F_HW_VLAN_STAG_TX)
E
Eric Dumazet 已提交
201

E
Eric Dumazet 已提交
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
static void ifb_dev_free(struct net_device *dev)
{
	struct ifb_dev_private *dp = netdev_priv(dev);
	struct ifb_q_private *txp = dp->tx_private;
	int i;

	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
		tasklet_kill(&txp->ifb_tasklet);
		__skb_queue_purge(&txp->rq);
		__skb_queue_purge(&txp->tq);
	}
	kfree(dp->tx_private);
	free_netdev(dev);
}

P
Patrick McHardy 已提交
217
static void ifb_setup(struct net_device *dev)
218 219
{
	/* Initialize the device structure. */
S
Stephen Hemminger 已提交
220
	dev->netdev_ops = &ifb_netdev_ops;
221 222 223 224

	/* Fill in device structure with ethernet-generic values. */
	ether_setup(dev);
	dev->tx_queue_len = TX_Q_LIMIT;
S
Stephen Hemminger 已提交
225

E
Eric Dumazet 已提交
226
	dev->features |= IFB_FEATURES;
227 228
	dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
					       NETIF_F_HW_VLAN_STAG_TX);
E
Eric Dumazet 已提交
229

230 231
	dev->flags |= IFF_NOARP;
	dev->flags &= ~IFF_MULTICAST;
232 233
	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
	netif_keep_dst(dev);
234
	eth_hw_addr_random(dev);
E
Eric Dumazet 已提交
235
	dev->destructor = ifb_dev_free;
236 237
}

238
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
239
{
E
Eric Dumazet 已提交
240
	struct ifb_dev_private *dp = netdev_priv(dev);
241
	u32 from = G_TC_FROM(skb->tc_verd);
E
Eric Dumazet 已提交
242
	struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
243

E
Eric Dumazet 已提交
244 245 246 247
	u64_stats_update_begin(&txp->rsync);
	txp->rx_packets++;
	txp->rx_bytes += skb->len;
	u64_stats_update_end(&txp->rsync);
248

249
	if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
250
		dev_kfree_skb(skb);
S
stephen hemminger 已提交
251
		dev->stats.rx_dropped++;
252
		return NETDEV_TX_OK;
253 254
	}

E
Eric Dumazet 已提交
255 256
	if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
		netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
257

E
Eric Dumazet 已提交
258 259 260 261
	__skb_queue_tail(&txp->rq, skb);
	if (!txp->tasklet_pending) {
		txp->tasklet_pending = 1;
		tasklet_schedule(&txp->ifb_tasklet);
262 263
	}

264
	return NETDEV_TX_OK;
265 266 267 268
}

static int ifb_close(struct net_device *dev)
{
E
Eric Dumazet 已提交
269
	netif_tx_stop_all_queues(dev);
270 271 272 273 274
	return 0;
}

static int ifb_open(struct net_device *dev)
{
E
Eric Dumazet 已提交
275
	netif_tx_start_all_queues(dev);
276 277 278
	return 0;
}

279 280 281 282 283 284 285 286 287 288 289
static int ifb_validate(struct nlattr *tb[], struct nlattr *data[])
{
	if (tb[IFLA_ADDRESS]) {
		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
			return -EINVAL;
		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
			return -EADDRNOTAVAIL;
	}
	return 0;
}

P
Patrick McHardy 已提交
290 291
static struct rtnl_link_ops ifb_link_ops __read_mostly = {
	.kind		= "ifb",
E
Eric Dumazet 已提交
292
	.priv_size	= sizeof(struct ifb_dev_private),
P
Patrick McHardy 已提交
293
	.setup		= ifb_setup,
294
	.validate	= ifb_validate,
P
Patrick McHardy 已提交
295 296
};

E
Eric Dumazet 已提交
297 298 299 300 301
/* Number of ifb devices to be set up by this module.
 * Note that these legacy devices have one queue.
 * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
 */
static int numifbs = 2;
302 303 304
module_param(numifbs, int, 0);
MODULE_PARM_DESC(numifbs, "Number of ifb devices");

305 306 307 308 309
static int __init ifb_init_one(int index)
{
	struct net_device *dev_ifb;
	int err;

E
Eric Dumazet 已提交
310
	dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
311
			       NET_NAME_UNKNOWN, ifb_setup);
312 313 314 315

	if (!dev_ifb)
		return -ENOMEM;

P
Patrick McHardy 已提交
316 317 318 319
	dev_ifb->rtnl_link_ops = &ifb_link_ops;
	err = register_netdevice(dev_ifb);
	if (err < 0)
		goto err;
320

P
Patrick McHardy 已提交
321
	return 0;
322

P
Patrick McHardy 已提交
323 324 325
err:
	free_netdev(dev_ifb);
	return err;
326
}
327 328

static int __init ifb_init_module(void)
329
{
P
Patrick McHardy 已提交
330 331 332 333
	int i, err;

	rtnl_lock();
	err = __rtnl_link_register(&ifb_link_ops);
334 335
	if (err < 0)
		goto out;
336

337
	for (i = 0; i < numifbs && !err; i++) {
338
		err = ifb_init_one(i);
339 340
		cond_resched();
	}
341
	if (err)
P
Patrick McHardy 已提交
342
		__rtnl_link_unregister(&ifb_link_ops);
343 344

out:
P
Patrick McHardy 已提交
345
	rtnl_unlock();
346 347

	return err;
348
}
349 350 351

static void __exit ifb_cleanup_module(void)
{
352
	rtnl_link_unregister(&ifb_link_ops);
353 354 355 356 357 358
}

module_init(ifb_init_module);
module_exit(ifb_cleanup_module);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jamal Hadi Salim");
P
Patrick McHardy 已提交
359
MODULE_ALIAS_RTNL_LINK("ifb");