act_api.c 40.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11
/*
 * net/sched/act_api.c	Packet action API.
 *
 * Author:	Jamal Hadi Salim
 */

#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
12
#include <linux/slab.h>
L
Linus Torvalds 已提交
13 14 15
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
16
#include <linux/err.h>
17
#include <linux/module.h>
18 19
#include <net/net_namespace.h>
#include <net/sock.h>
L
Linus Torvalds 已提交
20
#include <net/sch_generic.h>
21
#include <net/pkt_cls.h>
L
Linus Torvalds 已提交
22
#include <net/act_api.h>
23
#include <net/netlink.h>
L
Linus Torvalds 已提交
24

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
#ifdef CONFIG_INET
DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count);
EXPORT_SYMBOL_GPL(tcf_frag_xmit_count);
#endif

int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb))
{
#ifdef CONFIG_INET
	if (static_branch_unlikely(&tcf_frag_xmit_count))
		return sch_frag_xmit_hook(skb, xmit);
#endif

	return xmit(skb);
}
EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit);

41 42 43
static void tcf_action_goto_chain_exec(const struct tc_action *a,
				       struct tcf_result *res)
{
44
	const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
45 46 47 48

	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
}

49 50 51 52 53 54 55 56 57 58 59 60 61
static void tcf_free_cookie_rcu(struct rcu_head *p)
{
	struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu);

	kfree(cookie->data);
	kfree(cookie);
}

static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
				  struct tc_cookie *new_cookie)
{
	struct tc_cookie *old;

62
	old = xchg((__force struct tc_cookie **)old_cookie, new_cookie);
63 64 65 66
	if (old)
		call_rcu(&old->rcu, tcf_free_cookie_rcu);
}

67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
			     struct tcf_chain **newchain,
			     struct netlink_ext_ack *extack)
{
	int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL;
	u32 chain_index;

	if (!opcode)
		ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0;
	else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC)
		ret = 0;
	if (ret) {
		NL_SET_ERR_MSG(extack, "invalid control action");
		goto end;
	}

	if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) {
		chain_index = action & TC_ACT_EXT_VAL_MASK;
		if (!tp || !newchain) {
			ret = -EINVAL;
			NL_SET_ERR_MSG(extack,
				       "can't goto NULL proto/chain");
			goto end;
		}
		*newchain = tcf_chain_get_by_act(tp->chain->block, chain_index);
		if (!*newchain) {
			ret = -ENOMEM;
			NL_SET_ERR_MSG(extack,
				       "can't allocate goto_chain");
		}
	}
end:
	return ret;
}
EXPORT_SYMBOL(tcf_action_check_ctrlact);

struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
104
					 struct tcf_chain *goto_chain)
105 106
{
	a->tcfa_action = action;
107
	goto_chain = rcu_replace_pointer(a->goto_chain, goto_chain, 1);
108
	return goto_chain;
109 110 111
}
EXPORT_SYMBOL(tcf_action_set_ctrlact);

C
Cong Wang 已提交
112 113 114 115 116 117
/* XXX: For standalone actions, we don't need a RCU grace period either, because
 * actions are always connected to filters and filters are already destroyed in
 * RCU callbacks, so after a RCU grace period actions are already disconnected
 * from filters. Readers later can not find us.
 */
static void free_tcf(struct tc_action *p)
118
{
119
	struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
120

121
	free_percpu(p->cpu_bstats);
122
	free_percpu(p->cpu_bstats_hw);
123
	free_percpu(p->cpu_qstats);
124

125
	tcf_set_action_cookie(&p->act_cookie, NULL);
126 127
	if (chain)
		tcf_chain_put_by_act(chain);
128

129 130 131
	kfree(p);
}

132
static void tcf_action_cleanup(struct tc_action *p)
133
{
134 135 136
	if (p->ops->cleanup)
		p->ops->cleanup(p);

137
	gen_kill_estimator(&p->tcfa_rate_est);
C
Cong Wang 已提交
138
	free_tcf(p);
139 140
}

141 142 143 144
static int __tcf_action_put(struct tc_action *p, bool bind)
{
	struct tcf_idrinfo *idrinfo = p->idrinfo;

145
	if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
146 147 148
		if (bind)
			atomic_dec(&p->tcfa_bindcnt);
		idr_remove(&idrinfo->action_idr, p->tcfa_index);
149
		mutex_unlock(&idrinfo->lock);
150 151 152 153 154 155 156 157 158 159 160

		tcf_action_cleanup(p);
		return 1;
	}

	if (bind)
		atomic_dec(&p->tcfa_bindcnt);

	return 0;
}

161
int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
162 163 164
{
	int ret = 0;

165 166 167 168 169 170 171 172 173 174 175 176
	/* Release with strict==1 and bind==0 is only called through act API
	 * interface (classifiers always bind). Only case when action with
	 * positive reference count and zero bind count can exist is when it was
	 * also created with act API (unbinding last classifier will destroy the
	 * action if it was created by classifier). So only case when bind count
	 * can be changed after initial check is when unbound action is
	 * destroyed by act API while classifier binds to action with same id
	 * concurrently. This result either creation of new action(same behavior
	 * as before), or reusing existing action if concurrent process
	 * increments reference count before action is deleted. Both scenarios
	 * are acceptable.
	 */
177
	if (p) {
178
		if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0)
179
			return -EPERM;
180

181
		if (__tcf_action_put(p, bind))
182
			ret = ACT_P_DELETED;
183
	}
184

185 186
	return ret;
}
187
EXPORT_SYMBOL(__tcf_idr_release);
188

189 190
static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
{
191
	struct tc_cookie *act_cookie;
192 193
	u32 cookie_len = 0;

194 195 196 197 198 199
	rcu_read_lock();
	act_cookie = rcu_dereference(act->act_cookie);

	if (act_cookie)
		cookie_len = nla_total_size(act_cookie->len);
	rcu_read_unlock();
200 201 202 203

	return  nla_total_size(0) /* action number nested */
		+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
		+ cookie_len /* TCA_ACT_COOKIE */
204
		+ nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS */
205
		+ nla_total_size(0) /* TCA_ACT_STATS nested */
206
		+ nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */
207 208
		/* TCA_STATS_BASIC */
		+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
209 210
		/* TCA_STATS_PKT64 */
		+ nla_total_size_64bit(sizeof(u64))
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
		/* TCA_STATS_QUEUE */
		+ nla_total_size_64bit(sizeof(struct gnet_stats_queue))
		+ nla_total_size(0) /* TCA_OPTIONS nested */
		+ nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
}

static size_t tcf_action_full_attrs_size(size_t sz)
{
	return NLMSG_HDRLEN                     /* struct nlmsghdr */
		+ sizeof(struct tcamsg)
		+ nla_total_size(0)             /* TCA_ACT_TAB nested */
		+ sz;
}

static size_t tcf_action_fill_size(const struct tc_action *act)
{
	size_t sz = tcf_action_shared_attrs_size(act);

	if (act->ops->get_fill_size)
		return act->ops->get_fill_size(act) + sz;
	return sz;
}

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
static int
tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act)
{
	unsigned char *b = skb_tail_pointer(skb);
	struct tc_cookie *cookie;

	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
		goto nla_put_failure;
	if (tcf_action_copy_stats(skb, a, 0))
		goto nla_put_failure;
	if (from_act && nla_put_u32(skb, TCA_ACT_INDEX, a->tcfa_index))
		goto nla_put_failure;

	rcu_read_lock();
	cookie = rcu_dereference(a->act_cookie);
	if (cookie) {
		if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
			rcu_read_unlock();
			goto nla_put_failure;
		}
	}
	rcu_read_unlock();

	return 0;

nla_put_failure:
	nlmsg_trim(skb, b);
	return -1;
}

264
static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
265
			   struct netlink_callback *cb)
266
{
267
	int err = 0, index = -1, s_i = 0, n_i = 0;
268
	u32 act_flags = cb->args[2];
269
	unsigned long jiffy_since = cb->args[3];
270
	struct nlattr *nest;
271 272 273
	struct idr *idr = &idrinfo->action_idr;
	struct tc_action *p;
	unsigned long id = 1;
274
	unsigned long tmp;
275

276
	mutex_lock(&idrinfo->lock);
277 278 279

	s_i = cb->args[0];

280
	idr_for_each_entry_ul(idr, p, tmp, id) {
281 282 283
		index++;
		if (index < s_i)
			continue;
284 285
		if (IS_ERR(p))
			continue;
286 287 288 289 290 291

		if (jiffy_since &&
		    time_after(jiffy_since,
			       (unsigned long)p->tcfa_tm.lastuse))
			continue;

292
		nest = nla_nest_start_noflag(skb, n_i);
293 294
		if (!nest) {
			index--;
295
			goto nla_put_failure;
296
		}
297
		err = (act_flags & TCA_ACT_FLAG_TERSE_DUMP) ?
298 299
			tcf_action_dump_terse(skb, p, true) :
			tcf_action_dump_1(skb, p, 0, 0);
300 301 302 303
		if (err < 0) {
			index--;
			nlmsg_trim(skb, nest);
			goto done;
304
		}
305 306
		nla_nest_end(skb, nest);
		n_i++;
307
		if (!(act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) &&
308 309
		    n_i >= TCA_ACT_MAX_PRIO)
			goto done;
310 311
	}
done:
312 313 314
	if (index >= 0)
		cb->args[0] = index + 1;

315
	mutex_unlock(&idrinfo->lock);
316
	if (n_i) {
317
		if (act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON)
318 319
			cb->args[1] = n_i;
	}
320 321
	return n_i;

322
nla_put_failure:
323
	nla_nest_cancel(skb, nest);
324 325 326
	goto done;
}

327 328 329 330 331 332 333 334 335 336 337 338 339 340
static int tcf_idr_release_unsafe(struct tc_action *p)
{
	if (atomic_read(&p->tcfa_bindcnt) > 0)
		return -EPERM;

	if (refcount_dec_and_test(&p->tcfa_refcnt)) {
		idr_remove(&p->idrinfo->action_idr, p->tcfa_index);
		tcf_action_cleanup(p);
		return ACT_P_DELETED;
	}

	return 0;
}

341
static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
342
			  const struct tc_action_ops *ops)
343
{
344
	struct nlattr *nest;
345
	int n_i = 0;
346
	int ret = -EINVAL;
347 348 349
	struct idr *idr = &idrinfo->action_idr;
	struct tc_action *p;
	unsigned long id = 1;
350
	unsigned long tmp;
351

352
	nest = nla_nest_start_noflag(skb, 0);
353 354
	if (nest == NULL)
		goto nla_put_failure;
355
	if (nla_put_string(skb, TCA_KIND, ops->kind))
356
		goto nla_put_failure;
357

358
	mutex_lock(&idrinfo->lock);
359
	idr_for_each_entry_ul(idr, p, tmp, id) {
360 361
		if (IS_ERR(p))
			continue;
362
		ret = tcf_idr_release_unsafe(p);
363
		if (ret == ACT_P_DELETED) {
364
			module_put(ops->owner);
365 366
			n_i++;
		} else if (ret < 0) {
367
			mutex_unlock(&idrinfo->lock);
368
			goto nla_put_failure;
369 370
		}
	}
371
	mutex_unlock(&idrinfo->lock);
372

373 374
	if (nla_put_u32(skb, TCA_FCNT, n_i))
		goto nla_put_failure;
375
	nla_nest_end(skb, nest);
376 377

	return n_i;
378
nla_put_failure:
379
	nla_nest_cancel(skb, nest);
380
	return ret;
381 382
}

383 384
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
		       struct netlink_callback *cb, int type,
385 386
		       const struct tc_action_ops *ops,
		       struct netlink_ext_ack *extack)
387
{
388
	struct tcf_idrinfo *idrinfo = tn->idrinfo;
389

390
	if (type == RTM_DELACTION) {
391
		return tcf_del_walker(idrinfo, skb, ops);
392
	} else if (type == RTM_GETACTION) {
393
		return tcf_dump_walker(idrinfo, skb, cb);
394
	} else {
395 396
		WARN(1, "tcf_generic_walker: unknown command %d\n", type);
		NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command");
397 398 399
		return -EINVAL;
	}
}
400
EXPORT_SYMBOL(tcf_generic_walker);
401

402
int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
403
{
404 405
	struct tcf_idrinfo *idrinfo = tn->idrinfo;
	struct tc_action *p;
406

407
	mutex_lock(&idrinfo->lock);
408
	p = idr_find(&idrinfo->action_idr, index);
409
	if (IS_ERR(p))
410
		p = NULL;
411
	else if (p)
412
		refcount_inc(&p->tcfa_refcnt);
413
	mutex_unlock(&idrinfo->lock);
414

415 416 417 418 419
	if (p) {
		*a = p;
		return true;
	}
	return false;
420
}
421
EXPORT_SYMBOL(tcf_idr_search);
422

423
static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index)
424 425 426 427
{
	struct tc_action *p;
	int ret = 0;

428
	mutex_lock(&idrinfo->lock);
429 430
	p = idr_find(&idrinfo->action_idr, index);
	if (!p) {
431
		mutex_unlock(&idrinfo->lock);
432 433 434 435 436 437 438 439 440
		return -ENOENT;
	}

	if (!atomic_read(&p->tcfa_bindcnt)) {
		if (refcount_dec_and_test(&p->tcfa_refcnt)) {
			struct module *owner = p->ops->owner;

			WARN_ON(p != idr_remove(&idrinfo->action_idr,
						p->tcfa_index));
441
			mutex_unlock(&idrinfo->lock);
442

443
			tcf_action_cleanup(p);
444 445 446 447 448 449 450 451
			module_put(owner);
			return 0;
		}
		ret = 0;
	} else {
		ret = -EPERM;
	}

452
	mutex_unlock(&idrinfo->lock);
453 454 455
	return ret;
}

456 457
int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
		   struct tc_action **a, const struct tc_action_ops *ops,
458
		   int bind, bool cpustats, u32 flags)
459
{
460
	struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
461
	struct tcf_idrinfo *idrinfo = tn->idrinfo;
462
	int err = -ENOMEM;
463 464

	if (unlikely(!p))
465
		return -ENOMEM;
466
	refcount_set(&p->tcfa_refcnt, 1);
467
	if (bind)
468
		atomic_set(&p->tcfa_bindcnt, 1);
469

470 471
	if (cpustats) {
		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
472
		if (!p->cpu_bstats)
473
			goto err1;
474 475 476
		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
		if (!p->cpu_bstats_hw)
			goto err2;
477 478
		p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
		if (!p->cpu_qstats)
479
			goto err3;
480
	}
481
	spin_lock_init(&p->tcfa_lock);
482
	p->tcfa_index = index;
483 484 485
	p->tcfa_tm.install = jiffies;
	p->tcfa_tm.lastuse = jiffies;
	p->tcfa_tm.firstuse = 0;
486
	p->tcfa_flags = flags;
487
	if (est) {
488 489 490
		err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
					&p->tcfa_rate_est,
					&p->tcfa_lock, NULL, est);
491
		if (err)
492
			goto err4;
493 494
	}

495
	p->idrinfo = idrinfo;
496 497
	p->ops = ops;
	*a = p;
498
	return 0;
499
err4:
500
	free_percpu(p->cpu_qstats);
501 502
err3:
	free_percpu(p->cpu_bstats_hw);
503 504 505 506 507
err2:
	free_percpu(p->cpu_bstats);
err1:
	kfree(p);
	return err;
508
}
509
EXPORT_SYMBOL(tcf_idr_create);
510

511 512 513 514 515 516 517 518 519 520 521
int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
			      struct nlattr *est, struct tc_action **a,
			      const struct tc_action_ops *ops, int bind,
			      u32 flags)
{
	/* Set cpustats according to actions flags. */
	return tcf_idr_create(tn, index, est, a, ops, bind,
			      !(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags);
}
EXPORT_SYMBOL(tcf_idr_create_from_flags);

522 523 524 525 526 527
/* Cleanup idr index that was allocated but not initialized. */

void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
{
	struct tcf_idrinfo *idrinfo = tn->idrinfo;

528
	mutex_lock(&idrinfo->lock);
529 530
	/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
	WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
531
	mutex_unlock(&idrinfo->lock);
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
}
EXPORT_SYMBOL(tcf_idr_cleanup);

/* Check if action with specified index exists. If actions is found, increments
 * its reference and bind counters, and return 1. Otherwise insert temporary
 * error pointer (to prevent concurrent users from inserting actions with same
 * index) and return 0.
 */

int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
			struct tc_action **a, int bind)
{
	struct tcf_idrinfo *idrinfo = tn->idrinfo;
	struct tc_action *p;
	int ret;

again:
549
	mutex_lock(&idrinfo->lock);
550 551 552 553 554 555
	if (*index) {
		p = idr_find(&idrinfo->action_idr, *index);
		if (IS_ERR(p)) {
			/* This means that another process allocated
			 * index but did not assign the pointer yet.
			 */
556
			mutex_unlock(&idrinfo->lock);
557 558 559 560 561 562 563 564 565 566 567 568
			goto again;
		}

		if (p) {
			refcount_inc(&p->tcfa_refcnt);
			if (bind)
				atomic_inc(&p->tcfa_bindcnt);
			*a = p;
			ret = 1;
		} else {
			*a = NULL;
			ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
569
					    *index, GFP_KERNEL);
570 571 572 573 574 575 576 577
			if (!ret)
				idr_replace(&idrinfo->action_idr,
					    ERR_PTR(-EBUSY), *index);
		}
	} else {
		*index = 1;
		*a = NULL;
		ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
578
				    UINT_MAX, GFP_KERNEL);
579 580 581 582
		if (!ret)
			idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
				    *index);
	}
583
	mutex_unlock(&idrinfo->lock);
584 585 586 587
	return ret;
}
EXPORT_SYMBOL(tcf_idr_check_alloc);

588 589
void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
			 struct tcf_idrinfo *idrinfo)
590
{
591 592 593 594
	struct idr *idr = &idrinfo->action_idr;
	struct tc_action *p;
	int ret;
	unsigned long id = 1;
595
	unsigned long tmp;
596

597
	idr_for_each_entry_ul(idr, p, tmp, id) {
598 599 600 601 602
		ret = __tcf_idr_release(p, false, true);
		if (ret == ACT_P_DELETED)
			module_put(ops->owner);
		else if (ret < 0)
			return;
603
	}
604
	idr_destroy(&idrinfo->action_idr);
605
}
606
EXPORT_SYMBOL(tcf_idrinfo_destroy);
607

608
static LIST_HEAD(act_base);
L
Linus Torvalds 已提交
609 610
static DEFINE_RWLOCK(act_mod_lock);

611 612
int tcf_register_action(struct tc_action_ops *act,
			struct pernet_operations *ops)
L
Linus Torvalds 已提交
613
{
614
	struct tc_action_ops *a;
615
	int ret;
L
Linus Torvalds 已提交
616

617
	if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
618 619
		return -EINVAL;

620 621 622 623 624 625 626 627
	/* We have to register pernet ops before making the action ops visible,
	 * otherwise tcf_action_init_1() could get a partially initialized
	 * netns.
	 */
	ret = register_pernet_subsys(ops);
	if (ret)
		return ret;

L
Linus Torvalds 已提交
628
	write_lock(&act_mod_lock);
629
	list_for_each_entry(a, &act_base, head) {
630
		if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) {
L
Linus Torvalds 已提交
631
			write_unlock(&act_mod_lock);
632
			unregister_pernet_subsys(ops);
L
Linus Torvalds 已提交
633 634 635
			return -EEXIST;
		}
	}
636
	list_add_tail(&act->head, &act_base);
L
Linus Torvalds 已提交
637
	write_unlock(&act_mod_lock);
638

L
Linus Torvalds 已提交
639 640
	return 0;
}
641
EXPORT_SYMBOL(tcf_register_action);
L
Linus Torvalds 已提交
642

643 644
int tcf_unregister_action(struct tc_action_ops *act,
			  struct pernet_operations *ops)
L
Linus Torvalds 已提交
645
{
646
	struct tc_action_ops *a;
L
Linus Torvalds 已提交
647 648 649
	int err = -ENOENT;

	write_lock(&act_mod_lock);
650 651 652 653
	list_for_each_entry(a, &act_base, head) {
		if (a == act) {
			list_del(&act->head);
			err = 0;
L
Linus Torvalds 已提交
654
			break;
655
		}
L
Linus Torvalds 已提交
656 657
	}
	write_unlock(&act_mod_lock);
658 659
	if (!err)
		unregister_pernet_subsys(ops);
L
Linus Torvalds 已提交
660 661
	return err;
}
662
EXPORT_SYMBOL(tcf_unregister_action);
L
Linus Torvalds 已提交
663 664 665 666

/* lookup by name */
static struct tc_action_ops *tc_lookup_action_n(char *kind)
{
667
	struct tc_action_ops *a, *res = NULL;
L
Linus Torvalds 已提交
668 669 670

	if (kind) {
		read_lock(&act_mod_lock);
671
		list_for_each_entry(a, &act_base, head) {
L
Linus Torvalds 已提交
672
			if (strcmp(kind, a->kind) == 0) {
673 674
				if (try_module_get(a->owner))
					res = a;
L
Linus Torvalds 已提交
675 676 677 678 679
				break;
			}
		}
		read_unlock(&act_mod_lock);
	}
680
	return res;
L
Linus Torvalds 已提交
681 682
}

683 684
/* lookup by nlattr */
static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
L
Linus Torvalds 已提交
685
{
686
	struct tc_action_ops *a, *res = NULL;
L
Linus Torvalds 已提交
687 688 689

	if (kind) {
		read_lock(&act_mod_lock);
690
		list_for_each_entry(a, &act_base, head) {
691
			if (nla_strcmp(kind, a->kind) == 0) {
692 693
				if (try_module_get(a->owner))
					res = a;
L
Linus Torvalds 已提交
694 695 696 697 698
				break;
			}
		}
		read_unlock(&act_mod_lock);
	}
699
	return res;
L
Linus Torvalds 已提交
700 701
}

702
/*TCA_ACT_MAX_PRIO is 32, there count up to 32 */
703
#define TCA_ACT_MAX_PRIO_MASK 0x1FF
704 705
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
		    int nr_actions, struct tcf_result *res)
L
Linus Torvalds 已提交
706
{
707 708
	u32 jmp_prgcnt = 0;
	u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
709 710
	int i;
	int ret = TC_ACT_OK;
L
Linus Torvalds 已提交
711

712 713 714
	if (skb_skip_tc_classify(skb))
		return TC_ACT_OK;

715
restart_act_graph:
716 717 718
	for (i = 0; i < nr_actions; i++) {
		const struct tc_action *a = actions[i];

719 720 721 722
		if (jmp_prgcnt > 0) {
			jmp_prgcnt -= 1;
			continue;
		}
L
Linus Torvalds 已提交
723
repeat:
724 725 726
		ret = a->ops->act(skb, a, res);
		if (ret == TC_ACT_REPEAT)
			goto repeat;	/* we need a ttl - JHS */
727

728
		if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
729 730 731 732 733 734 735 736 737 738 739
			jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
			if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) {
				/* faulty opcode, stop pipeline */
				return TC_ACT_OK;
			} else {
				jmp_ttl -= 1;
				if (jmp_ttl > 0)
					goto restart_act_graph;
				else /* faulty graph, stop pipeline */
					return TC_ACT_OK;
			}
740
		} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
741 742 743 744
			if (unlikely(!rcu_access_pointer(a->goto_chain))) {
				net_warn_ratelimited("can't go to NULL chain!\n");
				return TC_ACT_SHOT;
			}
745
			tcf_action_goto_chain_exec(a, res);
746 747
		}

748
		if (ret != TC_ACT_PIPE)
749
			break;
L
Linus Torvalds 已提交
750
	}
751

L
Linus Torvalds 已提交
752 753
	return ret;
}
754
EXPORT_SYMBOL(tcf_action_exec);
L
Linus Torvalds 已提交
755

756
int tcf_action_destroy(struct tc_action *actions[], int bind)
L
Linus Torvalds 已提交
757
{
758
	const struct tc_action_ops *ops;
759 760
	struct tc_action *a;
	int ret = 0, i;
L
Linus Torvalds 已提交
761

762 763 764
	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
		a = actions[i];
		actions[i] = NULL;
765
		ops = a->ops;
766
		ret = __tcf_idr_release(a, bind, true);
767
		if (ret == ACT_P_DELETED)
768
			module_put(ops->owner);
769 770
		else if (ret < 0)
			return ret;
L
Linus Torvalds 已提交
771
	}
772
	return ret;
L
Linus Torvalds 已提交
773 774
}

775 776 777 778 779
static int tcf_action_put(struct tc_action *p)
{
	return __tcf_action_put(p, false);
}

780
/* Put all actions in this array, skip those NULL's. */
781
static void tcf_action_put_many(struct tc_action *actions[])
782
{
783
	int i;
784

785
	for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
786
		struct tc_action *a = actions[i];
787
		const struct tc_action_ops *ops;
788

789 790 791
		if (!a)
			continue;
		ops = a->ops;
792 793 794 795 796
		if (tcf_action_put(a))
			module_put(ops->owner);
	}
}

L
Linus Torvalds 已提交
797 798 799 800 801 802
int
tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
	return a->ops->dump(skb, a, bind, ref);
}

803 804 805 806 807 808 809
int
tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
	int err = -EINVAL;
	unsigned char *b = skb_tail_pointer(skb);
	struct nlattr *nest;

810
	if (tcf_action_dump_terse(skb, a, false))
811 812
		goto nla_put_failure;

813 814 815 816
	if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
	    nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
			       a->hw_stats, TCA_ACT_HW_STATS_ANY))
		goto nla_put_failure;
817

818 819 820 821 822
	if (a->used_hw_stats_valid &&
	    nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS,
			       a->used_hw_stats, TCA_ACT_HW_STATS_ANY))
		goto nla_put_failure;

823 824 825 826
	if (a->tcfa_flags &&
	    nla_put_bitfield32(skb, TCA_ACT_FLAGS,
			       a->tcfa_flags, a->tcfa_flags))
		goto nla_put_failure;
827

828
	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
829 830
	if (nest == NULL)
		goto nla_put_failure;
E
Eric Dumazet 已提交
831 832
	err = tcf_action_dump_old(skb, a, bind, ref);
	if (err > 0) {
833
		nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
834 835 836
		return err;
	}

837
nla_put_failure:
838
	nlmsg_trim(skb, b);
L
Linus Torvalds 已提交
839 840
	return -1;
}
841
EXPORT_SYMBOL(tcf_action_dump_1);
L
Linus Torvalds 已提交
842

843
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
844
		    int bind, int ref, bool terse)
L
Linus Torvalds 已提交
845 846
{
	struct tc_action *a;
847
	int err = -EINVAL, i;
848
	struct nlattr *nest;
L
Linus Torvalds 已提交
849

850 851
	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
		a = actions[i];
852
		nest = nla_nest_start_noflag(skb, i + 1);
853 854
		if (nest == NULL)
			goto nla_put_failure;
855
		err = terse ? tcf_action_dump_terse(skb, a, false) :
856
			tcf_action_dump_1(skb, a, bind, ref);
L
Linus Torvalds 已提交
857
		if (err < 0)
858
			goto errout;
859
		nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
860 861 862 863
	}

	return 0;

864
nla_put_failure:
865 866
	err = -EINVAL;
errout:
867
	nla_nest_cancel(skb, nest);
868
	return err;
L
Linus Torvalds 已提交
869 870
}

871
static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
872
{
873 874 875 876 877 878 879 880
	struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL);
	if (!c)
		return NULL;

	c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL);
	if (!c->data) {
		kfree(c);
		return NULL;
881
	}
882
	c->len = nla_len(tb[TCA_ACT_COOKIE]);
883

884
	return c;
885 886
}

887
static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr)
888
{
889
	struct nla_bitfield32 hw_stats_bf;
890 891 892 893 894

	/* If the user did not pass the attr, that means he does
	 * not care about the type. Return "any" in that case
	 * which is setting on all supported types.
	 */
895 896 897 898
	if (!hw_stats_attr)
		return TCA_ACT_HW_STATS_ANY;
	hw_stats_bf = nla_get_bitfield32(hw_stats_attr);
	return hw_stats_bf.value;
899 900
}

901
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
902
	[TCA_ACT_KIND]		= { .type = NLA_STRING },
903 904 905 906
	[TCA_ACT_INDEX]		= { .type = NLA_U32 },
	[TCA_ACT_COOKIE]	= { .type = NLA_BINARY,
				    .len = TC_COOKIE_MAX_SIZE },
	[TCA_ACT_OPTIONS]	= { .type = NLA_NESTED },
907 908
	[TCA_ACT_FLAGS]		= NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS),
	[TCA_ACT_HW_STATS]	= NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
909 910
};

911
void tcf_idr_insert_many(struct tc_action *actions[])
912
{
913
	int i;
914

915 916 917 918 919 920 921 922 923 924 925 926 927 928
	for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
		struct tc_action *a = actions[i];
		struct tcf_idrinfo *idrinfo;

		if (!a)
			continue;
		idrinfo = a->idrinfo;
		mutex_lock(&idrinfo->lock);
		/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if
		 * it is just created, otherwise this is just a nop.
		 */
		idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
		mutex_unlock(&idrinfo->lock);
	}
929 930
}

931 932 933
struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
					 bool rtnl_held,
					 struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
934
{
935
	struct nlattr *tb[TCA_ACT_MAX + 1];
L
Linus Torvalds 已提交
936 937
	struct tc_action_ops *a_o;
	char act_name[IFNAMSIZ];
938
	struct nlattr *kind;
939
	int err;
L
Linus Torvalds 已提交
940 941

	if (name == NULL) {
942 943
		err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
						  tcf_action_policy, extack);
944
		if (err < 0)
945
			return ERR_PTR(err);
946
		err = -EINVAL;
947
		kind = tb[TCA_ACT_KIND];
948 949
		if (!kind) {
			NL_SET_ERR_MSG(extack, "TC action kind must be specified");
950
			return ERR_PTR(err);
951
		}
952
		if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) {
953
			NL_SET_ERR_MSG(extack, "TC action name too long");
954
			return ERR_PTR(err);
955
		}
L
Linus Torvalds 已提交
956
	} else {
957 958
		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
			NL_SET_ERR_MSG(extack, "TC action name too long");
959
			return ERR_PTR(-EINVAL);
960
		}
L
Linus Torvalds 已提交
961 962 963 964
	}

	a_o = tc_lookup_action_n(act_name);
	if (a_o == NULL) {
965
#ifdef CONFIG_MODULES
966 967
		if (rtnl_held)
			rtnl_unlock();
968
		request_module("act_%s", act_name);
969 970
		if (rtnl_held)
			rtnl_lock();
L
Linus Torvalds 已提交
971 972 973 974 975 976 977 978 979 980

		a_o = tc_lookup_action_n(act_name);

		/* We dropped the RTNL semaphore in order to
		 * perform the module load.  So, even if we
		 * succeeded in loading the module we have to
		 * tell the caller to replay the request.  We
		 * indicate this using -EAGAIN.
		 */
		if (a_o != NULL) {
981 982
			module_put(a_o->owner);
			return ERR_PTR(-EAGAIN);
L
Linus Torvalds 已提交
983 984
		}
#endif
985
		NL_SET_ERR_MSG(extack, "Failed to load TC action module");
986
		return ERR_PTR(-ENOENT);
L
Linus Torvalds 已提交
987 988
	}

989 990 991 992 993 994
	return a_o;
}

struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
				    struct nlattr *nla, struct nlattr *est,
				    char *name, int ovr, int bind,
995 996
				    struct tc_action_ops *a_o, int *init_res,
				    bool rtnl_held,
997 998 999 1000 1001 1002 1003 1004 1005
				    struct netlink_ext_ack *extack)
{
	struct nla_bitfield32 flags = { 0, 0 };
	u8 hw_stats = TCA_ACT_HW_STATS_ANY;
	struct nlattr *tb[TCA_ACT_MAX + 1];
	struct tc_cookie *cookie = NULL;
	struct tc_action *a;
	int err;

L
Linus Torvalds 已提交
1006
	/* backward compatibility for policer */
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
	if (name == NULL) {
		err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
						  tcf_action_policy, extack);
		if (err < 0)
			return ERR_PTR(err);
		if (tb[TCA_ACT_COOKIE]) {
			cookie = nla_memdup_cookie(tb);
			if (!cookie) {
				NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
				err = -ENOMEM;
				goto err_out;
			}
		}
		hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
		if (tb[TCA_ACT_FLAGS])
			flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);

1024
		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
1025
				rtnl_held, tp, flags.value, extack);
1026
	} else {
1027
		err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
1028
				tp, flags.value, extack);
1029
	}
1030
	if (err < 0)
1031
		goto err_out;
1032
	*init_res = err;
L
Linus Torvalds 已提交
1033

1034 1035
	if (!name && tb[TCA_ACT_COOKIE])
		tcf_set_action_cookie(&a->act_cookie, cookie);
1036

1037
	if (!name)
1038
		a->hw_stats = hw_stats;
1039

L
Linus Torvalds 已提交
1040
	/* module count goes up only when brand new policy is created
E
Eric Dumazet 已提交
1041 1042 1043
	 * if it exists and is only bound to in a_o->init() then
	 * ACT_P_CREATED is not returned (a zero is).
	 */
1044
	if (err != ACT_P_CREATED)
L
Linus Torvalds 已提交
1045 1046 1047 1048
		module_put(a_o->owner);

	return a;

1049
err_out:
1050 1051 1052 1053
	if (cookie) {
		kfree(cookie->data);
		kfree(cookie);
	}
1054
	return ERR_PTR(err);
L
Linus Torvalds 已提交
1055 1056
}

1057 1058
/* Returns numbers of initialized actions or negative error. */

1059 1060
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
		    struct nlattr *est, char *name, int ovr, int bind,
1061
		    struct tc_action *actions[], int init_res[], size_t *attr_size,
1062
		    bool rtnl_held, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1063
{
1064
	struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
E
Eric Dumazet 已提交
1065
	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
1066
	struct tc_action *act;
1067
	size_t sz = 0;
1068
	int err;
L
Linus Torvalds 已提交
1069 1070
	int i;

1071 1072
	err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL,
					  extack);
1073
	if (err < 0)
1074
		return err;
L
Linus Torvalds 已提交
1075

1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
		struct tc_action_ops *a_o;

		a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
		if (IS_ERR(a_o)) {
			err = PTR_ERR(a_o);
			goto err_mod;
		}
		ops[i - 1] = a_o;
	}

1087
	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
1088
		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
1089 1090
					ops[i - 1], &init_res[i - 1], rtnl_held,
					extack);
1091 1092
		if (IS_ERR(act)) {
			err = PTR_ERR(act);
L
Linus Torvalds 已提交
1093
			goto err;
1094
		}
1095
		sz += tcf_action_fill_size(act);
1096 1097
		/* Start from index 0 */
		actions[i - 1] = act;
L
Linus Torvalds 已提交
1098
	}
1099

1100 1101 1102 1103 1104
	/* We have to commit them all together, because if any error happened in
	 * between, we could not handle the failure gracefully.
	 */
	tcf_idr_insert_many(actions);

1105
	*attr_size = tcf_action_full_attrs_size(sz);
1106
	return i - 1;
L
Linus Torvalds 已提交
1107 1108

err:
1109
	tcf_action_destroy(actions, bind);
1110 1111 1112 1113 1114
err_mod:
	for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
		if (ops[i])
			module_put(ops[i]->owner);
	}
1115
	return err;
L
Linus Torvalds 已提交
1116 1117
}

1118 1119
void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
			     u64 drops, bool hw)
1120
{
1121 1122
	if (a->cpu_bstats) {
		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
1123

1124
		this_cpu_ptr(a->cpu_qstats)->drops += drops;
1125 1126 1127 1128 1129 1130

		if (hw)
			_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
					   bytes, packets);
		return;
	}
1131

1132
	_bstats_update(&a->tcfa_bstats, bytes, packets);
1133
	a->tcfa_qstats.drops += drops;
1134
	if (hw)
1135
		_bstats_update(&a->tcfa_bstats_hw, bytes, packets);
1136 1137 1138
}
EXPORT_SYMBOL(tcf_action_update_stats);

1139
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
L
Linus Torvalds 已提交
1140 1141 1142 1143
			  int compat_mode)
{
	int err = 0;
	struct gnet_dump d;
1144

1145
	if (p == NULL)
L
Linus Torvalds 已提交
1146 1147 1148
		goto errout;

	/* compat_mode being true specifies a call that is supposed
1149
	 * to add additional backward compatibility statistic TLVs.
L
Linus Torvalds 已提交
1150 1151
	 */
	if (compat_mode) {
1152
		if (p->type == TCA_OLD_COMPAT)
L
Linus Torvalds 已提交
1153
			err = gnet_stats_start_copy_compat(skb, 0,
1154 1155
							   TCA_STATS,
							   TCA_XSTATS,
1156
							   &p->tcfa_lock, &d,
1157
							   TCA_PAD);
L
Linus Torvalds 已提交
1158 1159 1160 1161
		else
			return 0;
	} else
		err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
1162
					    &p->tcfa_lock, &d, TCA_ACT_PAD);
L
Linus Torvalds 已提交
1163 1164 1165 1166

	if (err < 0)
		goto errout;

1167
	if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
1168 1169
	    gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
				     &p->tcfa_bstats_hw) < 0 ||
1170
	    gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
1171
	    gnet_stats_copy_queue(&d, p->cpu_qstats,
1172 1173
				  &p->tcfa_qstats,
				  p->tcfa_qstats.qlen) < 0)
L
Linus Torvalds 已提交
1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
		goto errout;

	if (gnet_stats_finish_copy(&d) < 0)
		goto errout;

	return 0;

errout:
	return -1;
}

1185
static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
1186 1187
			u32 portid, u32 seq, u16 flags, int event, int bind,
			int ref)
L
Linus Torvalds 已提交
1188 1189 1190
{
	struct tcamsg *t;
	struct nlmsghdr *nlh;
1191
	unsigned char *b = skb_tail_pointer(skb);
1192
	struct nlattr *nest;
L
Linus Torvalds 已提交
1193

1194
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
1195 1196 1197
	if (!nlh)
		goto out_nlmsg_trim;
	t = nlmsg_data(nlh);
L
Linus Torvalds 已提交
1198
	t->tca_family = AF_UNSPEC;
1199 1200
	t->tca__pad1 = 0;
	t->tca__pad2 = 0;
1201

1202
	nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
1203
	if (!nest)
1204
		goto out_nlmsg_trim;
L
Linus Torvalds 已提交
1205

1206
	if (tcf_action_dump(skb, actions, bind, ref, false) < 0)
1207
		goto out_nlmsg_trim;
L
Linus Torvalds 已提交
1208

1209
	nla_nest_end(skb, nest);
1210

1211
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
L
Linus Torvalds 已提交
1212 1213
	return skb->len;

1214
out_nlmsg_trim:
1215
	nlmsg_trim(skb, b);
L
Linus Torvalds 已提交
1216 1217 1218 1219
	return -1;
}

static int
1220
tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
1221
	       struct tc_action *actions[], int event,
1222
	       struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1223 1224 1225 1226 1227 1228
{
	struct sk_buff *skb;

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
	if (!skb)
		return -ENOBUFS;
1229
	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
1230
			 0, 1) <= 0) {
1231
		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
L
Linus Torvalds 已提交
1232 1233 1234
		kfree_skb(skb);
		return -EINVAL;
	}
1235

1236
	return rtnl_unicast(skb, net, portid);
L
Linus Torvalds 已提交
1237 1238
}

1239
static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
1240 1241
					  struct nlmsghdr *n, u32 portid,
					  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1242
{
E
Eric Dumazet 已提交
1243
	struct nlattr *tb[TCA_ACT_MAX + 1];
1244
	const struct tc_action_ops *ops;
L
Linus Torvalds 已提交
1245 1246
	struct tc_action *a;
	int index;
1247
	int err;
L
Linus Torvalds 已提交
1248

1249 1250
	err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
					  tcf_action_policy, extack);
1251
	if (err < 0)
1252
		goto err_out;
L
Linus Torvalds 已提交
1253

1254
	err = -EINVAL;
1255
	if (tb[TCA_ACT_INDEX] == NULL ||
1256 1257
	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) {
		NL_SET_ERR_MSG(extack, "Invalid TC action index value");
1258
		goto err_out;
1259
	}
1260
	index = nla_get_u32(tb[TCA_ACT_INDEX]);
L
Linus Torvalds 已提交
1261

1262
	err = -EINVAL;
1263
	ops = tc_lookup_action(tb[TCA_ACT_KIND]);
1264
	if (!ops) { /* could happen in batch of actions */
1265
		NL_SET_ERR_MSG(extack, "Specified TC action kind not found");
1266
		goto err_out;
1267
	}
1268
	err = -ENOENT;
1269 1270
	if (ops->lookup(net, &a, index) == 0) {
		NL_SET_ERR_MSG(extack, "TC action with specified index not found");
L
Linus Torvalds 已提交
1271
		goto err_mod;
1272
	}
L
Linus Torvalds 已提交
1273

1274
	module_put(ops->owner);
L
Linus Torvalds 已提交
1275
	return a;
1276

L
Linus Torvalds 已提交
1277
err_mod:
1278
	module_put(ops->owner);
1279 1280
err_out:
	return ERR_PTR(err);
L
Linus Torvalds 已提交
1281 1282
}

1283
static int tca_action_flush(struct net *net, struct nlattr *nla,
1284 1285
			    struct nlmsghdr *n, u32 portid,
			    struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1286 1287 1288 1289 1290 1291
{
	struct sk_buff *skb;
	unsigned char *b;
	struct nlmsghdr *nlh;
	struct tcamsg *t;
	struct netlink_callback dcb;
1292
	struct nlattr *nest;
E
Eric Dumazet 已提交
1293
	struct nlattr *tb[TCA_ACT_MAX + 1];
1294
	const struct tc_action_ops *ops;
1295
	struct nlattr *kind;
1296
	int err = -ENOMEM;
L
Linus Torvalds 已提交
1297 1298

	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1299
	if (!skb)
1300
		return err;
L
Linus Torvalds 已提交
1301

1302
	b = skb_tail_pointer(skb);
L
Linus Torvalds 已提交
1303

1304 1305
	err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
					  tcf_action_policy, extack);
1306
	if (err < 0)
L
Linus Torvalds 已提交
1307 1308
		goto err_out;

1309
	err = -EINVAL;
1310
	kind = tb[TCA_ACT_KIND];
1311
	ops = tc_lookup_action(kind);
1312 1313
	if (!ops) { /*some idjot trying to flush unknown action */
		NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action");
L
Linus Torvalds 已提交
1314
		goto err_out;
1315
	}
L
Linus Torvalds 已提交
1316

1317 1318
	nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
			sizeof(*t), 0);
1319 1320
	if (!nlh) {
		NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification");
1321
		goto out_module_put;
1322
	}
1323
	t = nlmsg_data(nlh);
L
Linus Torvalds 已提交
1324
	t->tca_family = AF_UNSPEC;
1325 1326
	t->tca__pad1 = 0;
	t->tca__pad2 = 0;
L
Linus Torvalds 已提交
1327

1328
	nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
1329 1330
	if (!nest) {
		NL_SET_ERR_MSG(extack, "Failed to add new netlink message");
1331
		goto out_module_put;
1332
	}
L
Linus Torvalds 已提交
1333

1334
	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack);
1335 1336
	if (err <= 0) {
		nla_nest_cancel(skb, nest);
1337
		goto out_module_put;
1338
	}
L
Linus Torvalds 已提交
1339

1340
	nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
1341

1342
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
L
Linus Torvalds 已提交
1343
	nlh->nlmsg_flags |= NLM_F_ROOT;
1344
	module_put(ops->owner);
1345
	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
E
Eric Dumazet 已提交
1346
			     n->nlmsg_flags & NLM_F_ECHO);
L
Linus Torvalds 已提交
1347 1348
	if (err > 0)
		return 0;
1349 1350
	if (err < 0)
		NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
L
Linus Torvalds 已提交
1351 1352 1353

	return err;

1354
out_module_put:
1355
	module_put(ops->owner);
L
Linus Torvalds 已提交
1356 1357 1358 1359 1360
err_out:
	kfree_skb(skb);
	return err;
}

1361
static int tcf_action_delete(struct net *net, struct tc_action *actions[])
1362
{
1363
	int i;
1364

1365 1366
	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
		struct tc_action *a = actions[i];
1367 1368 1369 1370
		const struct tc_action_ops *ops = a->ops;
		/* Actions can be deleted concurrently so we must save their
		 * type and id to search again after reference is released.
		 */
1371 1372
		struct tcf_idrinfo *idrinfo = a->idrinfo;
		u32 act_index = a->tcfa_index;
1373

1374
		actions[i] = NULL;
1375 1376 1377 1378
		if (tcf_action_put(a)) {
			/* last reference, action was deleted concurrently */
			module_put(ops->owner);
		} else  {
1379 1380
			int ret;

1381
			/* now do the delete */
1382
			ret = tcf_idr_delete_index(idrinfo, act_index);
1383
			if (ret < 0)
1384 1385 1386 1387 1388 1389
				return ret;
		}
	}
	return 0;
}

1390
static int
1391
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
1392
	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
1393 1394 1395 1396
{
	int ret;
	struct sk_buff *skb;

1397 1398
	skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
			GFP_KERNEL);
1399 1400 1401 1402
	if (!skb)
		return -ENOBUFS;

	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
1403
			 0, 2) <= 0) {
1404
		NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
1405 1406 1407 1408 1409
		kfree_skb(skb);
		return -EINVAL;
	}

	/* now do the delete */
1410
	ret = tcf_action_delete(net, actions);
1411
	if (ret < 0) {
1412
		NL_SET_ERR_MSG(extack, "Failed to delete TC action");
1413 1414 1415
		kfree_skb(skb);
		return ret;
	}
1416 1417 1418 1419 1420 1421 1422 1423

	ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
			     n->nlmsg_flags & NLM_F_ECHO);
	if (ret > 0)
		return 0;
	return ret;
}

L
Linus Torvalds 已提交
1424
static int
1425
tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
1426
	      u32 portid, int event, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1427
{
1428
	int i, ret;
E
Eric Dumazet 已提交
1429
	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
1430
	struct tc_action *act;
1431
	size_t attr_size = 0;
1432
	struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
L
Linus Torvalds 已提交
1433

1434 1435
	ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL,
					  extack);
1436 1437
	if (ret < 0)
		return ret;
L
Linus Torvalds 已提交
1438

E
Eric Dumazet 已提交
1439
	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
1440
		if (tb[1])
1441
			return tca_action_flush(net, tb[1], n, portid, extack);
1442

1443
		NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action");
1444
		return -EINVAL;
L
Linus Torvalds 已提交
1445 1446
	}

1447
	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
1448
		act = tcf_action_get_1(net, tb[i], n, portid, extack);
1449 1450
		if (IS_ERR(act)) {
			ret = PTR_ERR(act);
L
Linus Torvalds 已提交
1451
			goto err;
1452
		}
1453
		attr_size += tcf_action_fill_size(act);
1454
		actions[i - 1] = act;
L
Linus Torvalds 已提交
1455
	}
1456 1457

	attr_size = tcf_action_full_attrs_size(attr_size);
L
Linus Torvalds 已提交
1458 1459

	if (event == RTM_GETACTION)
1460
		ret = tcf_get_notify(net, portid, n, actions, event, extack);
L
Linus Torvalds 已提交
1461
	else { /* delete */
1462
		ret = tcf_del_notify(net, n, actions, portid, attr_size, extack);
1463
		if (ret)
L
Linus Torvalds 已提交
1464
			goto err;
1465
		return 0;
L
Linus Torvalds 已提交
1466 1467
	}
err:
1468
	tcf_action_put_many(actions);
L
Linus Torvalds 已提交
1469 1470 1471
	return ret;
}

1472
static int
1473
tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
1474
	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1475 1476 1477 1478
{
	struct sk_buff *skb;
	int err = 0;

1479 1480
	skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
			GFP_KERNEL);
L
Linus Torvalds 已提交
1481 1482 1483
	if (!skb)
		return -ENOBUFS;

1484 1485
	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
			 RTM_NEWACTION, 0, 0) <= 0) {
1486
		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
1487 1488 1489
		kfree_skb(skb);
		return -EINVAL;
	}
1490

1491 1492
	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
			     n->nlmsg_flags & NLM_F_ECHO);
L
Linus Torvalds 已提交
1493 1494 1495 1496 1497
	if (err > 0)
		err = 0;
	return err;
}

J
Jamal Hadi Salim 已提交
1498
static int tcf_action_add(struct net *net, struct nlattr *nla,
1499 1500
			  struct nlmsghdr *n, u32 portid, int ovr,
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1501
{
1502
	size_t attr_size = 0;
1503
	int loop, ret, i;
1504
	struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
1505
	int init_res[TCA_ACT_MAX_PRIO] = {};
L
Linus Torvalds 已提交
1506

1507 1508
	for (loop = 0; loop < 10; loop++) {
		ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
1509
				      actions, init_res, &attr_size, true, extack);
1510 1511 1512 1513
		if (ret != -EAGAIN)
			break;
	}

1514
	if (ret < 0)
1515
		return ret;
1516
	ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
1517 1518 1519 1520 1521 1522

	/* only put existing actions */
	for (i = 0; i < TCA_ACT_MAX_PRIO; i++)
		if (init_res[i] == ACT_P_CREATED)
			actions[i] = NULL;
	tcf_action_put_many(actions);
L
Linus Torvalds 已提交
1523

1524
	return ret;
L
Linus Torvalds 已提交
1525 1526
}

1527
static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
1528 1529
	[TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAG_LARGE_DUMP_ON |
						 TCA_ACT_FLAG_TERSE_DUMP),
1530
	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
1531 1532
};

1533 1534
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
			 struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
1535
{
1536
	struct net *net = sock_net(skb->sk);
1537
	struct nlattr *tca[TCA_ROOT_MAX + 1];
G
Gaurav Singh 已提交
1538
	u32 portid = NETLINK_CB(skb).portid;
L
Linus Torvalds 已提交
1539 1540
	int ret = 0, ovr = 0;

1541 1542
	if ((n->nlmsg_type != RTM_GETACTION) &&
	    !netlink_capable(skb, CAP_NET_ADMIN))
1543 1544
		return -EPERM;

1545 1546
	ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca,
				     TCA_ROOT_MAX, NULL, extack);
1547 1548 1549 1550
	if (ret < 0)
		return ret;

	if (tca[TCA_ACT_TAB] == NULL) {
1551
		NL_SET_ERR_MSG(extack, "Netlink action attributes missing");
L
Linus Torvalds 已提交
1552 1553 1554
		return -EINVAL;
	}

E
Eric Dumazet 已提交
1555
	/* n->nlmsg_flags & NLM_F_CREATE */
L
Linus Torvalds 已提交
1556 1557 1558
	switch (n->nlmsg_type) {
	case RTM_NEWACTION:
		/* we are going to assume all other flags
L
Lucas De Marchi 已提交
1559
		 * imply create only if it doesn't exist
L
Linus Torvalds 已提交
1560 1561 1562 1563
		 * Note that CREATE | EXCL implies that
		 * but since we want avoid ambiguity (eg when flags
		 * is zero) then just set this
		 */
E
Eric Dumazet 已提交
1564
		if (n->nlmsg_flags & NLM_F_REPLACE)
L
Linus Torvalds 已提交
1565
			ovr = 1;
1566 1567
		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
				     extack);
L
Linus Torvalds 已提交
1568 1569
		break;
	case RTM_DELACTION:
1570
		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
1571
				    portid, RTM_DELACTION, extack);
L
Linus Torvalds 已提交
1572 1573
		break;
	case RTM_GETACTION:
1574
		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
1575
				    portid, RTM_GETACTION, extack);
L
Linus Torvalds 已提交
1576 1577 1578 1579 1580 1581 1582 1583
		break;
	default:
		BUG();
	}

	return ret;
}

1584
static struct nlattr *find_dump_kind(struct nlattr **nla)
L
Linus Torvalds 已提交
1585
{
E
Eric Dumazet 已提交
1586
	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
1587 1588
	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
	struct nlattr *kind;
L
Linus Torvalds 已提交
1589

1590
	tb1 = nla[TCA_ACT_TAB];
L
Linus Torvalds 已提交
1591 1592 1593
	if (tb1 == NULL)
		return NULL;

1594
	if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0)
L
Linus Torvalds 已提交
1595 1596
		return NULL;

1597 1598
	if (tb[1] == NULL)
		return NULL;
1599
	if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], tcf_action_policy, NULL) < 0)
L
Linus Torvalds 已提交
1600
		return NULL;
1601
	kind = tb2[TCA_ACT_KIND];
L
Linus Torvalds 已提交
1602

1603
	return kind;
L
Linus Torvalds 已提交
1604 1605
}

J
Jamal Hadi Salim 已提交
1606
static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
L
Linus Torvalds 已提交
1607
{
1608
	struct net *net = sock_net(skb->sk);
L
Linus Torvalds 已提交
1609
	struct nlmsghdr *nlh;
1610
	unsigned char *b = skb_tail_pointer(skb);
1611
	struct nlattr *nest;
L
Linus Torvalds 已提交
1612 1613
	struct tc_action_ops *a_o;
	int ret = 0;
1614
	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
1615 1616
	struct nlattr *tb[TCA_ROOT_MAX + 1];
	struct nlattr *count_attr = NULL;
1617
	unsigned long jiffy_since = 0;
1618 1619
	struct nlattr *kind = NULL;
	struct nla_bitfield32 bf;
1620
	u32 msecs_since = 0;
1621 1622
	u32 act_count = 0;

1623 1624
	ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb,
				     TCA_ROOT_MAX, tcaa_policy, cb->extack);
1625 1626
	if (ret < 0)
		return ret;
L
Linus Torvalds 已提交
1627

1628
	kind = find_dump_kind(tb);
L
Linus Torvalds 已提交
1629
	if (kind == NULL) {
1630
		pr_info("tc_dump_action: action bad kind\n");
L
Linus Torvalds 已提交
1631 1632 1633
		return 0;
	}

1634
	a_o = tc_lookup_action(kind);
E
Eric Dumazet 已提交
1635
	if (a_o == NULL)
L
Linus Torvalds 已提交
1636 1637
		return 0;

1638 1639 1640 1641 1642 1643
	cb->args[2] = 0;
	if (tb[TCA_ROOT_FLAGS]) {
		bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]);
		cb->args[2] = bf.value;
	}

1644 1645 1646 1647
	if (tb[TCA_ROOT_TIME_DELTA]) {
		msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
	}

1648
	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1649 1650 1651
			cb->nlh->nlmsg_type, sizeof(*t), 0);
	if (!nlh)
		goto out_module_put;
1652

1653 1654 1655
	if (msecs_since)
		jiffy_since = jiffies - msecs_to_jiffies(msecs_since);

1656
	t = nlmsg_data(nlh);
L
Linus Torvalds 已提交
1657
	t->tca_family = AF_UNSPEC;
1658 1659
	t->tca__pad1 = 0;
	t->tca__pad2 = 0;
1660
	cb->args[3] = jiffy_since;
1661 1662 1663
	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
	if (!count_attr)
		goto out_module_put;
L
Linus Torvalds 已提交
1664

1665
	nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
1666
	if (nest == NULL)
1667
		goto out_module_put;
L
Linus Torvalds 已提交
1668

1669
	ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL);
L
Linus Torvalds 已提交
1670
	if (ret < 0)
1671
		goto out_module_put;
L
Linus Torvalds 已提交
1672 1673

	if (ret > 0) {
1674
		nla_nest_end(skb, nest);
L
Linus Torvalds 已提交
1675
		ret = skb->len;
1676 1677 1678
		act_count = cb->args[1];
		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
		cb->args[1] = 0;
L
Linus Torvalds 已提交
1679
	} else
1680
		nlmsg_trim(skb, b);
L
Linus Torvalds 已提交
1681

1682
	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1683
	if (NETLINK_CB(cb->skb).portid && ret)
L
Linus Torvalds 已提交
1684 1685 1686 1687
		nlh->nlmsg_flags |= NLM_F_MULTI;
	module_put(a_o->owner);
	return skb->len;

1688
out_module_put:
L
Linus Torvalds 已提交
1689
	module_put(a_o->owner);
1690
	nlmsg_trim(skb, b);
L
Linus Torvalds 已提交
1691 1692 1693 1694 1695
	return skb->len;
}

static int __init tc_action_init(void)
{
1696 1697
	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
1698
	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
1699
		      0);
L
Linus Torvalds 已提交
1700 1701 1702 1703 1704

	return 0;
}

subsys_initcall(tc_action_init);