ifb.c 8.6 KB
Newer Older
1
/* drivers/net/ifb.c:
2 3 4 5 6 7 8 9 10

	The purpose of this driver is to provide a device that allows
	for sharing of resources:

	1) qdiscs/policies that are per device as opposed to system wide.
	ifb allows for a device which can be redirected to thus providing
	an impression of sharing.

	2) Allows for queueing incoming traffic for shaping instead of
11 12
	dropping.

13 14 15 16 17 18 19 20 21 22 23
	The original concept is based on what is known as the IMQ
	driver initially written by Martin Devera, later rewritten
	by Patrick McHardy and then maintained by Andre Correa.

	You need the tc action  mirror or redirect to feed this device
       	packets.

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version
	2 of the License, or (at your option) any later version.
24

25
  	Authors:	Jamal Hadi Salim (2005)
26

27 28 29 30 31 32 33 34
*/


#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/init.h>
35
#include <linux/interrupt.h>
36
#include <linux/moduleparam.h>
37
#include <net/pkt_sched.h>
38
#include <net/net_namespace.h>
39 40

#define TX_Q_LIMIT    32
E
Eric Dumazet 已提交
41 42
struct ifb_q_private {
	struct net_device	*dev;
43
	struct tasklet_struct   ifb_tasklet;
E
Eric Dumazet 已提交
44 45
	int			tasklet_pending;
	int			txqnum;
46
	struct sk_buff_head     rq;
E
Eric Dumazet 已提交
47 48 49
	u64			rx_packets;
	u64			rx_bytes;
	struct u64_stats_sync	rsync;
S
stephen hemminger 已提交
50 51

	struct u64_stats_sync	tsync;
E
Eric Dumazet 已提交
52 53
	u64			tx_packets;
	u64			tx_bytes;
54
	struct sk_buff_head     tq;
E
Eric Dumazet 已提交
55
} ____cacheline_aligned_in_smp;
56

E
Eric Dumazet 已提交
57 58 59
struct ifb_dev_private {
	struct ifb_q_private *tx_private;
};
60

61
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
62 63 64
static int ifb_open(struct net_device *dev);
static int ifb_close(struct net_device *dev);

E
Eric Dumazet 已提交
65
static void ifb_ri_tasklet(unsigned long _txp)
66
{
E
Eric Dumazet 已提交
67
	struct ifb_q_private *txp = (struct ifb_q_private *)_txp;
68
	struct netdev_queue *txq;
69 70
	struct sk_buff *skb;

E
Eric Dumazet 已提交
71 72 73 74
	txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
	skb = skb_peek(&txp->tq);
	if (!skb) {
		if (!__netif_tx_trylock(txq))
75
			goto resched;
E
Eric Dumazet 已提交
76 77
		skb_queue_splice_tail_init(&txp->rq, &txp->tq);
		__netif_tx_unlock(txq);
78 79
	}

E
Eric Dumazet 已提交
80
	while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
81 82 83 84
		u32 from = G_TC_FROM(skb->tc_verd);

		skb->tc_verd = 0;
		skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
S
stephen hemminger 已提交
85

E
Eric Dumazet 已提交
86 87 88 89
		u64_stats_update_begin(&txp->tsync);
		txp->tx_packets++;
		txp->tx_bytes += skb->len;
		u64_stats_update_end(&txp->tsync);
90

91
		rcu_read_lock();
E
Eric Dumazet 已提交
92
		skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
93
		if (!skb->dev) {
94
			rcu_read_unlock();
95
			dev_kfree_skb(skb);
E
Eric Dumazet 已提交
96 97
			txp->dev->stats.tx_dropped++;
			if (skb_queue_len(&txp->tq) != 0)
98
				goto resched;
99 100
			break;
		}
101
		rcu_read_unlock();
E
Eric Dumazet 已提交
102
		skb->skb_iif = txp->dev->ifindex;
103

104 105 106
		if (from & AT_EGRESS) {
			dev_queue_xmit(skb);
		} else if (from & AT_INGRESS) {
107
			skb_pull(skb, skb->mac_len);
108
			netif_receive_skb(skb);
109 110
		} else
			BUG();
111 112
	}

113
	if (__netif_tx_trylock(txq)) {
E
Eric Dumazet 已提交
114 115 116 117 118
		skb = skb_peek(&txp->rq);
		if (!skb) {
			txp->tasklet_pending = 0;
			if (netif_tx_queue_stopped(txq))
				netif_tx_wake_queue(txq);
119
		} else {
120
			__netif_tx_unlock(txq);
121 122
			goto resched;
		}
123
		__netif_tx_unlock(txq);
124 125
	} else {
resched:
E
Eric Dumazet 已提交
126 127
		txp->tasklet_pending = 1;
		tasklet_schedule(&txp->ifb_tasklet);
128 129 130 131
	}

}

S
stephen hemminger 已提交
132 133 134
static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev,
					     struct rtnl_link_stats64 *stats)
{
E
Eric Dumazet 已提交
135 136
	struct ifb_dev_private *dp = netdev_priv(dev);
	struct ifb_q_private *txp = dp->tx_private;
S
stephen hemminger 已提交
137
	unsigned int start;
E
Eric Dumazet 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
	u64 packets, bytes;
	int i;

	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
		do {
			start = u64_stats_fetch_begin_irq(&txp->rsync);
			packets = txp->rx_packets;
			bytes = txp->rx_bytes;
		} while (u64_stats_fetch_retry_irq(&txp->rsync, start));
		stats->rx_packets += packets;
		stats->rx_bytes += bytes;

		do {
			start = u64_stats_fetch_begin_irq(&txp->tsync);
			packets = txp->tx_packets;
			bytes = txp->tx_bytes;
		} while (u64_stats_fetch_retry_irq(&txp->tsync, start));
		stats->tx_packets += packets;
		stats->tx_bytes += bytes;
	}
S
stephen hemminger 已提交
158 159 160 161 162 163
	stats->rx_dropped = dev->stats.rx_dropped;
	stats->tx_dropped = dev->stats.tx_dropped;

	return stats;
}

E
Eric Dumazet 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
static int ifb_dev_init(struct net_device *dev)
{
	struct ifb_dev_private *dp = netdev_priv(dev);
	struct ifb_q_private *txp;
	int i;

	txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
	if (!txp)
		return -ENOMEM;
	dp->tx_private = txp;
	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
		txp->txqnum = i;
		txp->dev = dev;
		__skb_queue_head_init(&txp->rq);
		__skb_queue_head_init(&txp->tq);
		u64_stats_init(&txp->rsync);
		u64_stats_init(&txp->tsync);
		tasklet_init(&txp->ifb_tasklet, ifb_ri_tasklet,
			     (unsigned long)txp);
		netif_tx_start_queue(netdev_get_tx_queue(dev, i));
	}
	return 0;
}
S
stephen hemminger 已提交
187

S
Stephen Hemminger 已提交
188 189 190
static const struct net_device_ops ifb_netdev_ops = {
	.ndo_open	= ifb_open,
	.ndo_stop	= ifb_close,
S
stephen hemminger 已提交
191
	.ndo_get_stats64 = ifb_stats64,
192 193
	.ndo_start_xmit	= ifb_xmit,
	.ndo_validate_addr = eth_validate_addr,
E
Eric Dumazet 已提交
194
	.ndo_init	= ifb_dev_init,
S
Stephen Hemminger 已提交
195 196
};

197
#define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
E
Eric Dumazet 已提交
198
		      NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6	| \
E
Eric Dumazet 已提交
199
		      NETIF_F_GSO_ENCAP_ALL 				| \
200 201
		      NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX		| \
		      NETIF_F_HW_VLAN_STAG_TX)
E
Eric Dumazet 已提交
202

E
Eric Dumazet 已提交
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
static void ifb_dev_free(struct net_device *dev)
{
	struct ifb_dev_private *dp = netdev_priv(dev);
	struct ifb_q_private *txp = dp->tx_private;
	int i;

	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
		tasklet_kill(&txp->ifb_tasklet);
		__skb_queue_purge(&txp->rq);
		__skb_queue_purge(&txp->tq);
	}
	kfree(dp->tx_private);
	free_netdev(dev);
}

P
Patrick McHardy 已提交
218
static void ifb_setup(struct net_device *dev)
219 220
{
	/* Initialize the device structure. */
S
Stephen Hemminger 已提交
221
	dev->netdev_ops = &ifb_netdev_ops;
222 223 224 225

	/* Fill in device structure with ethernet-generic values. */
	ether_setup(dev);
	dev->tx_queue_len = TX_Q_LIMIT;
S
Stephen Hemminger 已提交
226

E
Eric Dumazet 已提交
227
	dev->features |= IFB_FEATURES;
E
Eric Dumazet 已提交
228 229
	dev->hw_features |= dev->features;
	dev->hw_enc_features |= dev->features;
230 231
	dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
					       NETIF_F_HW_VLAN_STAG_TX);
E
Eric Dumazet 已提交
232

233 234
	dev->flags |= IFF_NOARP;
	dev->flags &= ~IFF_MULTICAST;
235 236
	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
	netif_keep_dst(dev);
237
	eth_hw_addr_random(dev);
E
Eric Dumazet 已提交
238
	dev->destructor = ifb_dev_free;
239 240
}

241
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
242
{
E
Eric Dumazet 已提交
243
	struct ifb_dev_private *dp = netdev_priv(dev);
244
	u32 from = G_TC_FROM(skb->tc_verd);
E
Eric Dumazet 已提交
245
	struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
246

E
Eric Dumazet 已提交
247 248 249 250
	u64_stats_update_begin(&txp->rsync);
	txp->rx_packets++;
	txp->rx_bytes += skb->len;
	u64_stats_update_end(&txp->rsync);
251

252
	if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
253
		dev_kfree_skb(skb);
S
stephen hemminger 已提交
254
		dev->stats.rx_dropped++;
255
		return NETDEV_TX_OK;
256 257
	}

E
Eric Dumazet 已提交
258 259
	if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
		netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
260

E
Eric Dumazet 已提交
261 262 263 264
	__skb_queue_tail(&txp->rq, skb);
	if (!txp->tasklet_pending) {
		txp->tasklet_pending = 1;
		tasklet_schedule(&txp->ifb_tasklet);
265 266
	}

267
	return NETDEV_TX_OK;
268 269 270 271
}

static int ifb_close(struct net_device *dev)
{
E
Eric Dumazet 已提交
272
	netif_tx_stop_all_queues(dev);
273 274 275 276 277
	return 0;
}

static int ifb_open(struct net_device *dev)
{
E
Eric Dumazet 已提交
278
	netif_tx_start_all_queues(dev);
279 280 281
	return 0;
}

282 283 284 285 286 287 288 289 290 291 292
static int ifb_validate(struct nlattr *tb[], struct nlattr *data[])
{
	if (tb[IFLA_ADDRESS]) {
		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
			return -EINVAL;
		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
			return -EADDRNOTAVAIL;
	}
	return 0;
}

P
Patrick McHardy 已提交
293 294
static struct rtnl_link_ops ifb_link_ops __read_mostly = {
	.kind		= "ifb",
E
Eric Dumazet 已提交
295
	.priv_size	= sizeof(struct ifb_dev_private),
P
Patrick McHardy 已提交
296
	.setup		= ifb_setup,
297
	.validate	= ifb_validate,
P
Patrick McHardy 已提交
298 299
};

E
Eric Dumazet 已提交
300 301 302 303 304
/* Number of ifb devices to be set up by this module.
 * Note that these legacy devices have one queue.
 * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
 */
static int numifbs = 2;
305 306 307
module_param(numifbs, int, 0);
MODULE_PARM_DESC(numifbs, "Number of ifb devices");

308 309 310 311 312
static int __init ifb_init_one(int index)
{
	struct net_device *dev_ifb;
	int err;

E
Eric Dumazet 已提交
313
	dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
314
			       NET_NAME_UNKNOWN, ifb_setup);
315 316 317 318

	if (!dev_ifb)
		return -ENOMEM;

P
Patrick McHardy 已提交
319 320 321 322
	dev_ifb->rtnl_link_ops = &ifb_link_ops;
	err = register_netdevice(dev_ifb);
	if (err < 0)
		goto err;
323

P
Patrick McHardy 已提交
324
	return 0;
325

P
Patrick McHardy 已提交
326 327 328
err:
	free_netdev(dev_ifb);
	return err;
329
}
330 331

static int __init ifb_init_module(void)
332
{
P
Patrick McHardy 已提交
333 334 335 336
	int i, err;

	rtnl_lock();
	err = __rtnl_link_register(&ifb_link_ops);
337 338
	if (err < 0)
		goto out;
339

340
	for (i = 0; i < numifbs && !err; i++) {
341
		err = ifb_init_one(i);
342 343
		cond_resched();
	}
344
	if (err)
P
Patrick McHardy 已提交
345
		__rtnl_link_unregister(&ifb_link_ops);
346 347

out:
P
Patrick McHardy 已提交
348
	rtnl_unlock();
349 350

	return err;
351
}
352 353 354

static void __exit ifb_cleanup_module(void)
{
355
	rtnl_link_unregister(&ifb_link_ops);
356 357 358 359 360 361
}

module_init(ifb_init_module);
module_exit(ifb_cleanup_module);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jamal Hadi Salim");
P
Patrick McHardy 已提交
362
MODULE_ALIAS_RTNL_LINK("ifb");