nf_tables_api.c 184.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
P
Patrick McHardy 已提交
2
/*
3
 * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
P
Patrick McHardy 已提交
4 5 6 7 8 9 10 11 12
 *
 * Development of this code funded by Astaro AG (http://www.astaro.com/)
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
13
#include <linux/vmalloc.h>
N
NeilBrown 已提交
14
#include <linux/rhashtable.h>
P
Patrick McHardy 已提交
15 16 17
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
18
#include <net/netfilter/nf_flow_table.h>
P
Patrick McHardy 已提交
19 20
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
21
#include <net/netfilter/nf_tables_offload.h>
22
#include <net/net_namespace.h>
P
Patrick McHardy 已提交
23 24 25
#include <net/sock.h>

static LIST_HEAD(nf_tables_expressions);
26
static LIST_HEAD(nf_tables_objects);
27
static LIST_HEAD(nf_tables_flowtables);
28 29
static LIST_HEAD(nf_tables_destroy_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
30
static u64 table_handle;
P
Patrick McHardy 已提交
31

32 33 34 35 36 37
enum {
	NFT_VALIDATE_SKIP	= 0,
	NFT_VALIDATE_NEED,
	NFT_VALIDATE_DO,
};

38 39
static struct rhltable nft_objname_ht;

40 41 42 43
static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);

44 45 46 47
static u32 nft_objname_hash(const void *data, u32 len, u32 seed);
static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed);
static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *);

48 49 50 51 52 53 54 55 56
static const struct rhashtable_params nft_chain_ht_params = {
	.head_offset		= offsetof(struct nft_chain, rhlhead),
	.key_offset		= offsetof(struct nft_chain, name),
	.hashfn			= nft_chain_hash,
	.obj_hashfn		= nft_chain_hash_obj,
	.obj_cmpfn		= nft_chain_hash_cmp,
	.automatic_shrinking	= true,
};

57 58 59 60 61 62 63 64 65
static const struct rhashtable_params nft_objname_ht_params = {
	.head_offset		= offsetof(struct nft_object, rhlhead),
	.key_offset		= offsetof(struct nft_object, key),
	.hashfn			= nft_objname_hash,
	.obj_hashfn		= nft_objname_hash_obj,
	.obj_cmpfn		= nft_objname_hash_cmp,
	.automatic_shrinking	= true,
};

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
static void nft_validate_state_update(struct net *net, u8 new_validate_state)
{
	switch (net->nft.validate_state) {
	case NFT_VALIDATE_SKIP:
		WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO);
		break;
	case NFT_VALIDATE_NEED:
		break;
	case NFT_VALIDATE_DO:
		if (new_validate_state == NFT_VALIDATE_NEED)
			return;
	}

	net->nft.validate_state = new_validate_state;
}
81 82
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
83

84
static void nft_ctx_init(struct nft_ctx *ctx,
85
			 struct net *net,
86 87
			 const struct sk_buff *skb,
			 const struct nlmsghdr *nlh,
88
			 u8 family,
89 90 91 92
			 struct nft_table *table,
			 struct nft_chain *chain,
			 const struct nlattr * const *nla)
{
93
	ctx->net	= net;
94
	ctx->family	= family;
95
	ctx->level	= 0;
96 97 98 99 100
	ctx->table	= table;
	ctx->chain	= chain;
	ctx->nla   	= nla;
	ctx->portid	= NETLINK_CB(skb).portid;
	ctx->report	= nlmsg_report(nlh);
101
	ctx->flags	= nlh->nlmsg_flags;
102
	ctx->seq	= nlh->nlmsg_seq;
103 104
}

105 106
static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
					     int msg_type, u32 size, gfp_t gfp)
107 108 109
{
	struct nft_trans *trans;

110
	trans = kzalloc(sizeof(struct nft_trans) + size, gfp);
111 112 113
	if (trans == NULL)
		return NULL;

114
	trans->msg_type = msg_type;
115 116 117 118 119
	trans->ctx	= *ctx;

	return trans;
}

120 121 122 123 124 125
static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
					 int msg_type, u32 size)
{
	return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}

126 127 128 129 130 131
static void nft_trans_destroy(struct nft_trans *trans)
{
	list_del(&trans->list);
	kfree(trans);
}

132 133 134 135 136 137 138 139 140 141 142
static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
{
	struct net *net = ctx->net;
	struct nft_trans *trans;

	if (!nft_set_is_anonymous(set))
		return;

	list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
		if (trans->msg_type == NFT_MSG_NEWSET &&
		    nft_trans_set(trans) == set) {
143
			set->bound = true;
144 145 146 147 148
			break;
		}
	}
}

149 150 151
static int nf_tables_register_hook(struct net *net,
				   const struct nft_table *table,
				   struct nft_chain *chain)
152
{
153
	const struct nft_base_chain *basechain;
154
	const struct nf_hook_ops *ops;
155

156
	if (table->flags & NFT_TABLE_F_DORMANT ||
157
	    !nft_is_base_chain(chain))
158 159
		return 0;

160 161
	basechain = nft_base_chain(chain);
	ops = &basechain->ops;
162

163 164 165
	if (basechain->type->ops_register)
		return basechain->type->ops_register(net, ops);

166
	return nf_register_net_hook(net, ops);
167 168
}

169 170 171
static void nf_tables_unregister_hook(struct net *net,
				      const struct nft_table *table,
				      struct nft_chain *chain)
172
{
173 174 175
	const struct nft_base_chain *basechain;
	const struct nf_hook_ops *ops;

176
	if (table->flags & NFT_TABLE_F_DORMANT ||
177
	    !nft_is_base_chain(chain))
178
		return;
179 180
	basechain = nft_base_chain(chain);
	ops = &basechain->ops;
181

182 183 184 185
	if (basechain->type->ops_unregister)
		return basechain->type->ops_unregister(net, ops);

	nf_unregister_net_hook(net, ops);
186 187
}

188 189 190 191 192 193 194 195 196
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
	struct nft_trans *trans;

	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
	if (trans == NULL)
		return -ENOMEM;

	if (msg_type == NFT_MSG_NEWTABLE)
197
		nft_activate_next(ctx->net, ctx->table);
198 199 200 201 202 203 204 205 206 207 208 209 210

	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
	return 0;
}

static int nft_deltable(struct nft_ctx *ctx)
{
	int err;

	err = nft_trans_table_add(ctx, NFT_MSG_DELTABLE);
	if (err < 0)
		return err;

211
	nft_deactivate_next(ctx->net, ctx->table);
212 213 214
	return err;
}

215
static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
216 217 218 219 220
{
	struct nft_trans *trans;

	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
	if (trans == NULL)
221
		return ERR_PTR(-ENOMEM);
222 223

	if (msg_type == NFT_MSG_NEWCHAIN)
224
		nft_activate_next(ctx->net, ctx->chain);
225 226

	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
227
	return trans;
228 229 230 231
}

static int nft_delchain(struct nft_ctx *ctx)
{
232
	struct nft_trans *trans;
233

234 235 236
	trans = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
	if (IS_ERR(trans))
		return PTR_ERR(trans);
237 238

	ctx->table->use--;
239
	nft_deactivate_next(ctx->net, ctx->chain);
240

241
	return 0;
242 243
}

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
static void nft_rule_expr_activate(const struct nft_ctx *ctx,
				   struct nft_rule *rule)
{
	struct nft_expr *expr;

	expr = nft_expr_first(rule);
	while (expr != nft_expr_last(rule) && expr->ops) {
		if (expr->ops->activate)
			expr->ops->activate(ctx, expr);

		expr = nft_expr_next(expr);
	}
}

static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
259 260
				     struct nft_rule *rule,
				     enum nft_trans_phase phase)
261 262 263 264 265 266
{
	struct nft_expr *expr;

	expr = nft_expr_first(rule);
	while (expr != nft_expr_last(rule) && expr->ops) {
		if (expr->ops->deactivate)
267
			expr->ops->deactivate(ctx, expr, phase);
268 269 270 271 272

		expr = nft_expr_next(expr);
	}
}

273 274 275 276
static int
nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
{
	/* You cannot delete the same rule twice */
277 278
	if (nft_is_active_next(ctx->net, rule)) {
		nft_deactivate_next(ctx->net, rule);
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
		ctx->chain->use--;
		return 0;
	}
	return -ENOENT;
}

static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
					    struct nft_rule *rule)
{
	struct nft_trans *trans;

	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
	if (trans == NULL)
		return NULL;

294 295 296 297
	if (msg_type == NFT_MSG_NEWRULE && ctx->nla[NFTA_RULE_ID] != NULL) {
		nft_trans_rule_id(trans) =
			ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID]));
	}
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
	nft_trans_rule(trans) = rule;
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);

	return trans;
}

static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
{
	struct nft_trans *trans;
	int err;

	trans = nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule);
	if (trans == NULL)
		return -ENOMEM;

	err = nf_tables_delrule_deactivate(ctx, rule);
	if (err < 0) {
		nft_trans_destroy(trans);
		return err;
	}
318
	nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
319 320 321 322 323 324 325 326 327 328

	return 0;
}

static int nft_delrule_by_chain(struct nft_ctx *ctx)
{
	struct nft_rule *rule;
	int err;

	list_for_each_entry(rule, &ctx->chain->rules, list) {
329 330 331
		if (!nft_is_active_next(ctx->net, rule))
			continue;

332 333 334 335 336 337 338
		err = nft_delrule(ctx, rule);
		if (err < 0)
			return err;
	}
	return 0;
}

339
static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
340 341 342 343 344 345 346 347 348 349 350
			     struct nft_set *set)
{
	struct nft_trans *trans;

	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
	if (trans == NULL)
		return -ENOMEM;

	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
		nft_trans_set_id(trans) =
			ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
351
		nft_activate_next(ctx->net, set);
352 353 354 355 356 357 358
	}
	nft_trans_set(trans) = set;
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);

	return 0;
}

359
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
360 361 362 363 364 365 366
{
	int err;

	err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set);
	if (err < 0)
		return err;

367
	nft_deactivate_next(ctx->net, set);
368 369 370 371 372
	ctx->table->use--;

	return err;
}

373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type,
			     struct nft_object *obj)
{
	struct nft_trans *trans;

	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj));
	if (trans == NULL)
		return -ENOMEM;

	if (msg_type == NFT_MSG_NEWOBJ)
		nft_activate_next(ctx->net, obj);

	nft_trans_obj(trans) = obj;
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);

	return 0;
}

static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
{
	int err;

	err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj);
	if (err < 0)
		return err;

	nft_deactivate_next(ctx->net, obj);
	ctx->table->use--;

	return err;
}

405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
				   struct nft_flowtable *flowtable)
{
	struct nft_trans *trans;

	trans = nft_trans_alloc(ctx, msg_type,
				sizeof(struct nft_trans_flowtable));
	if (trans == NULL)
		return -ENOMEM;

	if (msg_type == NFT_MSG_NEWFLOWTABLE)
		nft_activate_next(ctx->net, flowtable);

	nft_trans_flowtable(trans) = flowtable;
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);

	return 0;
}

static int nft_delflowtable(struct nft_ctx *ctx,
			    struct nft_flowtable *flowtable)
{
	int err;

	err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
	if (err < 0)
		return err;

	nft_deactivate_next(ctx->net, flowtable);
	ctx->table->use--;

	return err;
}

P
Patrick McHardy 已提交
439 440 441 442
/*
 * Tables
 */

443
static struct nft_table *nft_table_lookup(const struct net *net,
444
					  const struct nlattr *nla,
445
					  u8 family, u8 genmask)
P
Patrick McHardy 已提交
446 447 448
{
	struct nft_table *table;

449 450 451
	if (nla == NULL)
		return ERR_PTR(-EINVAL);

452
	list_for_each_entry_rcu(table, &net->nft.tables, list) {
453
		if (!nla_strcmp(nla, table->name) &&
454
		    table->family == family &&
455
		    nft_active_genmask(table, genmask))
P
Patrick McHardy 已提交
456 457
			return table;
	}
458 459

	return ERR_PTR(-ENOENT);
P
Patrick McHardy 已提交
460 461
}

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
						   const struct nlattr *nla,
						   u8 genmask)
{
	struct nft_table *table;

	list_for_each_entry(table, &net->nft.tables, list) {
		if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
		    nft_active_genmask(table, genmask))
			return table;
	}

	return ERR_PTR(-ENOENT);
}

P
Patrick McHardy 已提交
477 478 479 480 481
static inline u64 nf_tables_alloc_handle(struct nft_table *table)
{
	return ++table->hgenerator;
}

482
static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
483

484
static const struct nft_chain_type *
485
__nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
486 487 488
{
	int i;

489
	for (i = 0; i < NFT_CHAIN_T_MAX; i++) {
490 491
		if (chain_type[family][i] != NULL &&
		    !nla_strcmp(nla, chain_type[family][i]->name))
492
			return chain_type[family][i];
493
	}
494
	return NULL;
495 496
}

497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
/*
 * Loading a module requires dropping mutex that guards the
 * transaction.
 * We first need to abort any pending transactions as once
 * mutex is unlocked a different client could start a new
 * transaction.  It must not see any 'future generation'
 * changes * as these changes will never happen.
 */
#ifdef CONFIG_MODULES
static int __nf_tables_abort(struct net *net);

static void nft_request_module(struct net *net, const char *fmt, ...)
{
	char module_name[MODULE_NAME_LEN];
	va_list args;
	int ret;

	__nf_tables_abort(net);

	va_start(args, fmt);
	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
	va_end(args);
	if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret))
		return;

522
	mutex_unlock(&net->nft.commit_mutex);
523
	request_module("%s", module_name);
524
	mutex_lock(&net->nft.commit_mutex);
525 526 527
}
#endif

528 529 530 531 532 533 534
static void lockdep_nfnl_nft_mutex_not_held(void)
{
#ifdef CONFIG_PROVE_LOCKING
	WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
#endif
}

535
static const struct nft_chain_type *
536 537
nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
			    u8 family, bool autoload)
538
{
539
	const struct nft_chain_type *type;
540

541
	type = __nf_tables_chain_type_lookup(nla, family);
542 543
	if (type != NULL)
		return type;
544 545

	lockdep_nfnl_nft_mutex_not_held();
546
#ifdef CONFIG_MODULES
547
	if (autoload) {
548 549
		nft_request_module(net, "nft-chain-%u-%.*s", family,
				   nla_len(nla), (const char *)nla_data(nla));
550
		type = __nf_tables_chain_type_lookup(nla, family);
551 552
		if (type != NULL)
			return ERR_PTR(-EAGAIN);
553 554
	}
#endif
555
	return ERR_PTR(-ENOENT);
556 557
}

P
Patrick McHardy 已提交
558
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
559 560
	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
				    .len = NFT_TABLE_MAXNAMELEN - 1 },
561
	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
562
	[NFTA_TABLE_HANDLE]	= { .type = NLA_U64 },
P
Patrick McHardy 已提交
563 564
};

565 566 567
static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
				     u32 portid, u32 seq, int event, u32 flags,
				     int family, const struct nft_table *table)
P
Patrick McHardy 已提交
568 569 570 571
{
	struct nlmsghdr *nlh;
	struct nfgenmsg *nfmsg;

572
	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
P
Patrick McHardy 已提交
573 574 575 576 577 578 579
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
	nfmsg->nfgen_family	= family;
	nfmsg->version		= NFNETLINK_V0;
580
	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
P
Patrick McHardy 已提交
581

582
	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
583
	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
584 585 586
	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
	    nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
			 NFTA_TABLE_PAD))
P
Patrick McHardy 已提交
587 588
		goto nla_put_failure;

589 590
	nlmsg_end(skb, nlh);
	return 0;
P
Patrick McHardy 已提交
591 592 593 594 595 596

nla_put_failure:
	nlmsg_trim(skb, nlh);
	return -1;
}

597
static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
P
Patrick McHardy 已提交
598 599 600 601
{
	struct sk_buff *skb;
	int err;

602 603
	if (!ctx->report &&
	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
604
		return;
P
Patrick McHardy 已提交
605 606 607 608 609

	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
	if (skb == NULL)
		goto err;

610
	err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
611
					event, 0, ctx->family, ctx->table);
P
Patrick McHardy 已提交
612 613 614 615 616
	if (err < 0) {
		kfree_skb(skb);
		goto err;
	}

617 618 619
	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
		       ctx->report, GFP_KERNEL);
	return;
P
Patrick McHardy 已提交
620
err:
621
	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
P
Patrick McHardy 已提交
622 623 624 625 626 627 628 629
}

static int nf_tables_dump_tables(struct sk_buff *skb,
				 struct netlink_callback *cb)
{
	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
	const struct nft_table *table;
	unsigned int idx = 0, s_idx = cb->args[0];
630
	struct net *net = sock_net(skb->sk);
P
Patrick McHardy 已提交
631 632
	int family = nfmsg->nfgen_family;

633
	rcu_read_lock();
634 635
	cb->seq = net->nft.base_seq;

636
	list_for_each_entry_rcu(table, &net->nft.tables, list) {
637
		if (family != NFPROTO_UNSPEC && family != table->family)
P
Patrick McHardy 已提交
638 639
			continue;

640 641 642 643 644 645 646 647 648 649 650
		if (idx < s_idx)
			goto cont;
		if (idx > s_idx)
			memset(&cb->args[1], 0,
			       sizeof(cb->args) - sizeof(cb->args[0]));
		if (!nft_is_active(net, table))
			continue;
		if (nf_tables_fill_table_info(skb, net,
					      NETLINK_CB(cb->skb).portid,
					      cb->nlh->nlmsg_seq,
					      NFT_MSG_NEWTABLE, NLM_F_MULTI,
651
					      table->family, table) < 0)
652 653 654
			goto done;

		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
P
Patrick McHardy 已提交
655
cont:
656
		idx++;
P
Patrick McHardy 已提交
657 658
	}
done:
659
	rcu_read_unlock();
P
Patrick McHardy 已提交
660 661 662 663
	cb->args[0] = idx;
	return skb->len;
}

664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681
static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
				      const struct nlmsghdr *nlh,
				      struct netlink_dump_control *c)
{
	int err;

	if (!try_module_get(THIS_MODULE))
		return -EINVAL;

	rcu_read_unlock();
	err = netlink_dump_start(nlsk, skb, nlh, c);
	rcu_read_lock();
	module_put(THIS_MODULE);

	return err;
}

/* called with rcu_read_lock held */
682 683
static int nf_tables_gettable(struct net *net, struct sock *nlsk,
			      struct sk_buff *skb, const struct nlmsghdr *nlh,
684 685
			      const struct nlattr * const nla[],
			      struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
686 687
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
688
	u8 genmask = nft_genmask_cur(net);
P
Patrick McHardy 已提交
689 690 691 692 693 694 695 696
	const struct nft_table *table;
	struct sk_buff *skb2;
	int family = nfmsg->nfgen_family;
	int err;

	if (nlh->nlmsg_flags & NLM_F_DUMP) {
		struct netlink_dump_control c = {
			.dump = nf_tables_dump_tables,
697
			.module = THIS_MODULE,
P
Patrick McHardy 已提交
698
		};
699 700

		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
P
Patrick McHardy 已提交
701 702
	}

703
	table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask);
704 705
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]);
P
Patrick McHardy 已提交
706
		return PTR_ERR(table);
707
	}
P
Patrick McHardy 已提交
708

709
	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
P
Patrick McHardy 已提交
710 711 712
	if (!skb2)
		return -ENOMEM;

713
	err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
P
Patrick McHardy 已提交
714 715 716 717 718 719 720 721 722 723 724 725
					nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
					family, table);
	if (err < 0)
		goto err;

	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);

err:
	kfree_skb(skb2);
	return err;
}

726
static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
727 728 729 730 731 732 733
{
	struct nft_chain *chain;
	u32 i = 0;

	list_for_each_entry(chain, &table->chains, list) {
		if (!nft_is_active_next(net, chain))
			continue;
734
		if (!nft_is_base_chain(chain))
735 736 737 738 739
			continue;

		if (cnt && i++ == cnt)
			break;

740
		nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
741 742 743
	}
}

744
static int nf_tables_table_enable(struct net *net, struct nft_table *table)
745 746 747 748 749
{
	struct nft_chain *chain;
	int err, i = 0;

	list_for_each_entry(chain, &table->chains, list) {
750 751
		if (!nft_is_active_next(net, chain))
			continue;
752
		if (!nft_is_base_chain(chain))
753 754
			continue;

755
		err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
756 757 758 759 760 761 762
		if (err < 0)
			goto err;

		i++;
	}
	return 0;
err:
763
	if (i)
764
		nft_table_disable(net, table, i);
765 766 767
	return err;
}

768
static void nf_tables_table_disable(struct net *net, struct nft_table *table)
769
{
770
	nft_table_disable(net, table, 0);
771 772
}

773
static int nf_tables_updtable(struct nft_ctx *ctx)
774
{
775
	struct nft_trans *trans;
776
	u32 flags;
777
	int ret = 0;
778

779 780
	if (!ctx->nla[NFTA_TABLE_FLAGS])
		return 0;
781

782 783 784 785
	flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
	if (flags & ~NFT_TABLE_F_DORMANT)
		return -EINVAL;

786 787 788
	if (flags == ctx->table->flags)
		return 0;

789 790 791 792
	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
				sizeof(struct nft_trans_table));
	if (trans == NULL)
		return -ENOMEM;
793

794 795
	if ((flags & NFT_TABLE_F_DORMANT) &&
	    !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
796
		nft_trans_table_enable(trans) = false;
797 798
	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
799
		ret = nf_tables_table_enable(ctx->net, ctx->table);
800
		if (ret >= 0) {
801
			ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
802
			nft_trans_table_enable(trans) = true;
803 804
		}
	}
805 806
	if (ret < 0)
		goto err;
807

808 809 810
	nft_trans_table_update(trans) = true;
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
	return 0;
811
err:
812
	nft_trans_destroy(trans);
813 814 815
	return ret;
}

816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
static u32 nft_chain_hash(const void *data, u32 len, u32 seed)
{
	const char *name = data;

	return jhash(name, strlen(name), seed);
}

static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed)
{
	const struct nft_chain *chain = data;

	return nft_chain_hash(chain->name, 0, seed);
}

static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
			      const void *ptr)
{
	const struct nft_chain *chain = ptr;
	const char *name = arg->key;

	return strcmp(chain->name, name);
}

839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
static u32 nft_objname_hash(const void *data, u32 len, u32 seed)
{
	const struct nft_object_hash_key *k = data;

	seed ^= hash_ptr(k->table, 32);

	return jhash(k->name, strlen(k->name), seed);
}

static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed)
{
	const struct nft_object *obj = data;

	return nft_objname_hash(&obj->key, 0, seed);
}

static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
				const void *ptr)
{
	const struct nft_object_hash_key *k = arg->key;
	const struct nft_object *obj = ptr;

	if (obj->key.table != k->table)
		return -1;

	return strcmp(obj->key.name, k->name);
}

867 868
static int nf_tables_newtable(struct net *net, struct sock *nlsk,
			      struct sk_buff *skb, const struct nlmsghdr *nlh,
869 870
			      const struct nlattr * const nla[],
			      struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
871 872
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
873
	u8 genmask = nft_genmask_next(net);
P
Patrick McHardy 已提交
874
	int family = nfmsg->nfgen_family;
875 876
	const struct nlattr *attr;
	struct nft_table *table;
877
	u32 flags = 0;
878
	struct nft_ctx ctx;
879
	int err;
P
Patrick McHardy 已提交
880

881
	lockdep_assert_held(&net->nft.commit_mutex);
882 883
	attr = nla[NFTA_TABLE_NAME];
	table = nft_table_lookup(net, attr, family, genmask);
P
Patrick McHardy 已提交
884 885 886
	if (IS_ERR(table)) {
		if (PTR_ERR(table) != -ENOENT)
			return PTR_ERR(table);
887
	} else {
888 889
		if (nlh->nlmsg_flags & NLM_F_EXCL) {
			NL_SET_BAD_ATTR(extack, attr);
P
Patrick McHardy 已提交
890
			return -EEXIST;
891
		}
P
Patrick McHardy 已提交
892 893
		if (nlh->nlmsg_flags & NLM_F_REPLACE)
			return -EOPNOTSUPP;
894

895
		nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
896
		return nf_tables_updtable(&ctx);
P
Patrick McHardy 已提交
897 898
	}

899 900 901 902 903 904
	if (nla[NFTA_TABLE_FLAGS]) {
		flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
		if (flags & ~NFT_TABLE_F_DORMANT)
			return -EINVAL;
	}

905
	err = -ENOMEM;
906
	table = kzalloc(sizeof(*table), GFP_KERNEL);
907
	if (table == NULL)
908
		goto err_kzalloc;
P
Patrick McHardy 已提交
909

910
	table->name = nla_strdup(attr, GFP_KERNEL);
911
	if (table->name == NULL)
912
		goto err_strdup;
913

914 915 916 917
	err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
	if (err)
		goto err_chain_ht;

P
Patrick McHardy 已提交
918
	INIT_LIST_HEAD(&table->chains);
919
	INIT_LIST_HEAD(&table->sets);
920
	INIT_LIST_HEAD(&table->objects);
921
	INIT_LIST_HEAD(&table->flowtables);
922
	table->family = family;
923
	table->flags = flags;
924
	table->handle = ++table_handle;
925

926
	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
927
	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
928
	if (err < 0)
929
		goto err_trans;
930

931
	list_add_tail_rcu(&table->list, &net->nft.tables);
P
Patrick McHardy 已提交
932
	return 0;
933
err_trans:
934 935
	rhltable_destroy(&table->chains_ht);
err_chain_ht:
936
	kfree(table->name);
937
err_strdup:
938
	kfree(table);
939
err_kzalloc:
940
	return err;
P
Patrick McHardy 已提交
941 942
}

943 944
static int nft_flush_table(struct nft_ctx *ctx)
{
945
	struct nft_flowtable *flowtable, *nft;
946
	struct nft_chain *chain, *nc;
947
	struct nft_object *obj, *ne;
948
	struct nft_set *set, *ns;
949
	int err;
950

951
	list_for_each_entry(chain, &ctx->table->chains, list) {
952 953 954
		if (!nft_is_active_next(ctx->net, chain))
			continue;

955 956 957 958 959 960 961 962
		ctx->chain = chain;

		err = nft_delrule_by_chain(ctx);
		if (err < 0)
			goto out;
	}

	list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
963 964 965
		if (!nft_is_active_next(ctx->net, set))
			continue;

966
		if (nft_set_is_anonymous(set) &&
967 968 969 970 971 972 973 974
		    !list_empty(&set->bindings))
			continue;

		err = nft_delset(ctx, set);
		if (err < 0)
			goto out;
	}

975 976 977 978 979 980
	list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
		err = nft_delflowtable(ctx, flowtable);
		if (err < 0)
			goto out;
	}

981 982 983 984 985 986
	list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
		err = nft_delobj(ctx, obj);
		if (err < 0)
			goto out;
	}

987
	list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
988 989 990
		if (!nft_is_active_next(ctx->net, chain))
			continue;

991 992 993 994 995 996 997
		ctx->chain = chain;

		err = nft_delchain(ctx);
		if (err < 0)
			goto out;
	}

998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
	err = nft_deltable(ctx);
out:
	return err;
}

static int nft_flush(struct nft_ctx *ctx, int family)
{
	struct nft_table *table, *nt;
	const struct nlattr * const *nla = ctx->nla;
	int err = 0;

1009
	list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) {
1010
		if (family != AF_UNSPEC && table->family != family)
1011 1012
			continue;

1013
		ctx->family = table->family;
1014

1015 1016
		if (!nft_is_active_next(ctx->net, table))
			continue;
1017

1018 1019 1020
		if (nla[NFTA_TABLE_NAME] &&
		    nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
			continue;
1021

1022 1023 1024 1025 1026
		ctx->table = table;

		err = nft_flush_table(ctx);
		if (err < 0)
			goto out;
1027 1028 1029 1030 1031
	}
out:
	return err;
}

1032 1033
static int nf_tables_deltable(struct net *net, struct sock *nlsk,
			      struct sk_buff *skb, const struct nlmsghdr *nlh,
1034 1035
			      const struct nlattr * const nla[],
			      struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
1036 1037
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1038
	u8 genmask = nft_genmask_next(net);
1039
	int family = nfmsg->nfgen_family;
1040 1041
	const struct nlattr *attr;
	struct nft_table *table;
1042
	struct nft_ctx ctx;
P
Patrick McHardy 已提交
1043

1044
	nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
1045 1046
	if (family == AF_UNSPEC ||
	    (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
1047 1048
		return nft_flush(&ctx, family);

1049 1050 1051 1052 1053 1054 1055
	if (nla[NFTA_TABLE_HANDLE]) {
		attr = nla[NFTA_TABLE_HANDLE];
		table = nft_table_lookup_byhandle(net, attr, genmask);
	} else {
		attr = nla[NFTA_TABLE_NAME];
		table = nft_table_lookup(net, attr, family, genmask);
	}
1056

1057 1058
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, attr);
P
Patrick McHardy 已提交
1059
		return PTR_ERR(table);
1060
	}
P
Patrick McHardy 已提交
1061

1062 1063 1064 1065
	if (nlh->nlmsg_flags & NLM_F_NONREC &&
	    table->use > 0)
		return -EBUSY;

1066
	ctx.family = family;
1067
	ctx.table = table;
1068

1069
	return nft_flush_table(&ctx);
P
Patrick McHardy 已提交
1070 1071
}

1072 1073
static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
1074 1075
	if (WARN_ON(ctx->table->use > 0))
		return;
1076

1077
	rhltable_destroy(&ctx->table->chains_ht);
1078
	kfree(ctx->table->name);
1079 1080 1081
	kfree(ctx->table);
}

1082
void nft_register_chain_type(const struct nft_chain_type *ctype)
P
Patrick McHardy 已提交
1083
{
1084
	if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO))
1085
		return;
1086

P
Patrick McHardy 已提交
1087
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
1088 1089 1090
	if (WARN_ON(chain_type[ctype->family][ctype->type] != NULL)) {
		nfnl_unlock(NFNL_SUBSYS_NFTABLES);
		return;
P
Patrick McHardy 已提交
1091
	}
1092
	chain_type[ctype->family][ctype->type] = ctype;
P
Patrick McHardy 已提交
1093 1094
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
1095
EXPORT_SYMBOL_GPL(nft_register_chain_type);
P
Patrick McHardy 已提交
1096

1097
void nft_unregister_chain_type(const struct nft_chain_type *ctype)
P
Patrick McHardy 已提交
1098 1099
{
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
1100
	chain_type[ctype->family][ctype->type] = NULL;
P
Patrick McHardy 已提交
1101 1102
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
1103
EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
P
Patrick McHardy 已提交
1104 1105 1106 1107 1108 1109

/*
 * Chains
 */

static struct nft_chain *
1110
nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
P
Patrick McHardy 已提交
1111 1112 1113 1114
{
	struct nft_chain *chain;

	list_for_each_entry(chain, &table->chains, list) {
1115 1116
		if (chain->handle == handle &&
		    nft_active_genmask(chain, genmask))
P
Patrick McHardy 已提交
1117 1118 1119 1120 1121 1122
			return chain;
	}

	return ERR_PTR(-ENOENT);
}

1123
static bool lockdep_commit_lock_is_held(const struct net *net)
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
{
#ifdef CONFIG_PROVE_LOCKING
	return lockdep_is_held(&net->nft.commit_mutex);
#else
	return true;
#endif
}

static struct nft_chain *nft_chain_lookup(struct net *net,
					  struct nft_table *table,
1134
					  const struct nlattr *nla, u8 genmask)
P
Patrick McHardy 已提交
1135
{
1136 1137
	char search[NFT_CHAIN_MAXNAMELEN + 1];
	struct rhlist_head *tmp, *list;
P
Patrick McHardy 已提交
1138 1139 1140 1141 1142
	struct nft_chain *chain;

	if (nla == NULL)
		return ERR_PTR(-EINVAL);

1143
	nla_strlcpy(search, nla, sizeof(search));
P
Patrick McHardy 已提交
1144

1145
	WARN_ON(!rcu_read_lock_held() &&
1146
		!lockdep_commit_lock_is_held(net));
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161

	chain = ERR_PTR(-ENOENT);
	rcu_read_lock();
	list = rhltable_lookup(&table->chains_ht, search, nft_chain_ht_params);
	if (!list)
		goto out_unlock;

	rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
		if (nft_active_genmask(chain, genmask))
			goto out_unlock;
	}
	chain = ERR_PTR(-ENOENT);
out_unlock:
	rcu_read_unlock();
	return chain;
P
Patrick McHardy 已提交
1162 1163 1164
}

static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
1165 1166
	[NFTA_CHAIN_TABLE]	= { .type = NLA_STRING,
				    .len = NFT_TABLE_MAXNAMELEN - 1 },
P
Patrick McHardy 已提交
1167 1168 1169 1170
	[NFTA_CHAIN_HANDLE]	= { .type = NLA_U64 },
	[NFTA_CHAIN_NAME]	= { .type = NLA_STRING,
				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
	[NFTA_CHAIN_HOOK]	= { .type = NLA_NESTED },
1171
	[NFTA_CHAIN_POLICY]	= { .type = NLA_U32 },
1172
	[NFTA_CHAIN_TYPE]	= { .type = NLA_STRING },
1173
	[NFTA_CHAIN_COUNTERS]	= { .type = NLA_NESTED },
1174
	[NFTA_CHAIN_FLAGS]	= { .type = NLA_U32 },
P
Patrick McHardy 已提交
1175 1176 1177 1178 1179
};

static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
	[NFTA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
	[NFTA_HOOK_PRIORITY]	= { .type = NLA_U32 },
1180 1181
	[NFTA_HOOK_DEV]		= { .type = NLA_STRING,
				    .len = IFNAMSIZ - 1 },
P
Patrick McHardy 已提交
1182 1183
};

1184 1185 1186 1187
static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
{
	struct nft_stats *cpu_stats, total;
	struct nlattr *nest;
1188 1189
	unsigned int seq;
	u64 pkts, bytes;
1190 1191
	int cpu;

1192 1193 1194
	if (!stats)
		return 0;

1195 1196 1197
	memset(&total, 0, sizeof(total));
	for_each_possible_cpu(cpu) {
		cpu_stats = per_cpu_ptr(stats, cpu);
1198 1199 1200 1201 1202 1203 1204
		do {
			seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
			pkts = cpu_stats->pkts;
			bytes = cpu_stats->bytes;
		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
		total.pkts += pkts;
		total.bytes += bytes;
1205
	}
1206
	nest = nla_nest_start_noflag(skb, NFTA_CHAIN_COUNTERS);
1207 1208 1209
	if (nest == NULL)
		goto nla_put_failure;

1210 1211 1212 1213
	if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts),
			 NFTA_COUNTER_PAD) ||
	    nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes),
			 NFTA_COUNTER_PAD))
1214 1215 1216 1217 1218 1219 1220 1221 1222
		goto nla_put_failure;

	nla_nest_end(skb, nest);
	return 0;

nla_put_failure:
	return -ENOSPC;
}

1223 1224 1225
static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
				     u32 portid, u32 seq, int event, u32 flags,
				     int family, const struct nft_table *table,
P
Patrick McHardy 已提交
1226 1227 1228 1229 1230
				     const struct nft_chain *chain)
{
	struct nlmsghdr *nlh;
	struct nfgenmsg *nfmsg;

1231
	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
P
Patrick McHardy 已提交
1232 1233 1234 1235 1236 1237 1238
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
	nfmsg->nfgen_family	= family;
	nfmsg->version		= NFNETLINK_V0;
1239
	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
P
Patrick McHardy 已提交
1240 1241 1242

	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
		goto nla_put_failure;
1243 1244
	if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle),
			 NFTA_CHAIN_PAD))
P
Patrick McHardy 已提交
1245 1246 1247 1248
		goto nla_put_failure;
	if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
		goto nla_put_failure;

1249
	if (nft_is_base_chain(chain)) {
1250
		const struct nft_base_chain *basechain = nft_base_chain(chain);
1251
		const struct nf_hook_ops *ops = &basechain->ops;
1252
		struct nft_stats __percpu *stats;
1253 1254
		struct nlattr *nest;

1255
		nest = nla_nest_start_noflag(skb, NFTA_CHAIN_HOOK);
P
Patrick McHardy 已提交
1256 1257 1258 1259 1260 1261
		if (nest == NULL)
			goto nla_put_failure;
		if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
			goto nla_put_failure;
		if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
			goto nla_put_failure;
1262 1263 1264
		if (basechain->dev_name[0] &&
		    nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
			goto nla_put_failure;
P
Patrick McHardy 已提交
1265
		nla_nest_end(skb, nest);
1266

1267 1268 1269 1270
		if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
				 htonl(basechain->policy)))
			goto nla_put_failure;

1271 1272
		if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
			goto nla_put_failure;
1273

1274 1275 1276
		stats = rcu_dereference_check(basechain->stats,
					      lockdep_commit_lock_is_held(net));
		if (nft_dump_stats(skb, stats))
1277
			goto nla_put_failure;
P
Patrick McHardy 已提交
1278 1279
	}

1280 1281 1282
	if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
		goto nla_put_failure;

1283 1284
	nlmsg_end(skb, nlh);
	return 0;
P
Patrick McHardy 已提交
1285 1286 1287 1288 1289 1290

nla_put_failure:
	nlmsg_trim(skb, nlh);
	return -1;
}

1291
static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
P
Patrick McHardy 已提交
1292 1293 1294 1295
{
	struct sk_buff *skb;
	int err;

1296 1297
	if (!ctx->report &&
	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
1298
		return;
P
Patrick McHardy 已提交
1299 1300 1301 1302 1303

	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
	if (skb == NULL)
		goto err;

1304
	err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
1305
					event, 0, ctx->family, ctx->table,
1306
					ctx->chain);
P
Patrick McHardy 已提交
1307 1308 1309 1310 1311
	if (err < 0) {
		kfree_skb(skb);
		goto err;
	}

1312 1313 1314
	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
		       ctx->report, GFP_KERNEL);
	return;
P
Patrick McHardy 已提交
1315
err:
1316
	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
P
Patrick McHardy 已提交
1317 1318 1319 1320 1321 1322 1323 1324 1325
}

static int nf_tables_dump_chains(struct sk_buff *skb,
				 struct netlink_callback *cb)
{
	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
	const struct nft_table *table;
	const struct nft_chain *chain;
	unsigned int idx = 0, s_idx = cb->args[0];
1326
	struct net *net = sock_net(skb->sk);
P
Patrick McHardy 已提交
1327 1328
	int family = nfmsg->nfgen_family;

1329
	rcu_read_lock();
1330 1331
	cb->seq = net->nft.base_seq;

1332
	list_for_each_entry_rcu(table, &net->nft.tables, list) {
1333
		if (family != NFPROTO_UNSPEC && family != table->family)
P
Patrick McHardy 已提交
1334 1335
			continue;

1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348
		list_for_each_entry_rcu(chain, &table->chains, list) {
			if (idx < s_idx)
				goto cont;
			if (idx > s_idx)
				memset(&cb->args[1], 0,
				       sizeof(cb->args) - sizeof(cb->args[0]));
			if (!nft_is_active(net, chain))
				continue;
			if (nf_tables_fill_chain_info(skb, net,
						      NETLINK_CB(cb->skb).portid,
						      cb->nlh->nlmsg_seq,
						      NFT_MSG_NEWCHAIN,
						      NLM_F_MULTI,
1349
						      table->family, table,
1350 1351
						      chain) < 0)
				goto done;
1352

1353
			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
P
Patrick McHardy 已提交
1354
cont:
1355
			idx++;
P
Patrick McHardy 已提交
1356 1357 1358
		}
	}
done:
1359
	rcu_read_unlock();
P
Patrick McHardy 已提交
1360 1361 1362 1363
	cb->args[0] = idx;
	return skb->len;
}

1364
/* called with rcu_read_lock held */
1365 1366
static int nf_tables_getchain(struct net *net, struct sock *nlsk,
			      struct sk_buff *skb, const struct nlmsghdr *nlh,
1367 1368
			      const struct nlattr * const nla[],
			      struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
1369 1370
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1371
	u8 genmask = nft_genmask_cur(net);
P
Patrick McHardy 已提交
1372
	const struct nft_chain *chain;
1373
	struct nft_table *table;
P
Patrick McHardy 已提交
1374 1375 1376 1377 1378 1379 1380
	struct sk_buff *skb2;
	int family = nfmsg->nfgen_family;
	int err;

	if (nlh->nlmsg_flags & NLM_F_DUMP) {
		struct netlink_dump_control c = {
			.dump = nf_tables_dump_chains,
1381
			.module = THIS_MODULE,
P
Patrick McHardy 已提交
1382
		};
1383 1384

		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
P
Patrick McHardy 已提交
1385 1386
	}

1387
	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
1388 1389
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
P
Patrick McHardy 已提交
1390
		return PTR_ERR(table);
1391
	}
P
Patrick McHardy 已提交
1392

1393
	chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask);
1394 1395
	if (IS_ERR(chain)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
P
Patrick McHardy 已提交
1396
		return PTR_ERR(chain);
1397
	}
P
Patrick McHardy 已提交
1398

1399
	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
P
Patrick McHardy 已提交
1400 1401 1402
	if (!skb2)
		return -ENOMEM;

1403
	err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
P
Patrick McHardy 已提交
1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415
					nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
					family, table, chain);
	if (err < 0)
		goto err;

	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);

err:
	kfree_skb(skb2);
	return err;
}

1416 1417 1418 1419 1420
static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
	[NFTA_COUNTER_PACKETS]	= { .type = NLA_U64 },
	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
};

1421
static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
1422 1423 1424 1425 1426 1427
{
	struct nlattr *tb[NFTA_COUNTER_MAX+1];
	struct nft_stats __percpu *newstats;
	struct nft_stats *stats;
	int err;

1428 1429
	err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr,
					  nft_counter_policy, NULL);
1430
	if (err < 0)
1431
		return ERR_PTR(err);
1432 1433

	if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
1434
		return ERR_PTR(-EINVAL);
1435

1436
	newstats = netdev_alloc_pcpu_stats(struct nft_stats);
1437
	if (newstats == NULL)
1438
		return ERR_PTR(-ENOMEM);
1439 1440 1441 1442

	/* Restore old counters on this cpu, no problem. Per-cpu statistics
	 * are not exposed to userspace.
	 */
1443
	preempt_disable();
1444 1445 1446
	stats = this_cpu_ptr(newstats);
	stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
	stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
1447
	preempt_enable();
1448

1449 1450 1451
	return newstats;
}

1452
static void nft_chain_stats_replace(struct nft_trans *trans)
1453
{
1454
	struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain);
1455

1456
	if (!nft_trans_chain_stats(trans))
1457 1458
		return;

1459 1460 1461 1462
	rcu_swap_protected(chain->stats, nft_trans_chain_stats(trans),
			   lockdep_commit_lock_is_held(trans->ctx.net));

	if (!nft_trans_chain_stats(trans))
1463
		static_branch_inc(&nft_counters_enabled);
1464 1465
}

1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
{
	struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0);
	struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1);

	if (g0 != g1)
		kvfree(g1);
	kvfree(g0);

	/* should be NULL either via abort or via successful commit */
	WARN_ON_ONCE(chain->rules_next);
	kvfree(chain->rules_next);
}

1480
static void nf_tables_chain_destroy(struct nft_ctx *ctx)
1481
{
1482 1483
	struct nft_chain *chain = ctx->chain;

1484 1485
	if (WARN_ON(chain->use > 0))
		return;
1486

1487 1488 1489
	/* no concurrent access possible anymore */
	nf_tables_chain_free_chain_rules(chain);

1490
	if (nft_is_base_chain(chain)) {
1491 1492 1493
		struct nft_base_chain *basechain = nft_base_chain(chain);

		module_put(basechain->type->owner);
1494
		if (rcu_access_pointer(basechain->stats)) {
1495
			static_branch_dec(&nft_counters_enabled);
1496 1497
			free_percpu(rcu_dereference_raw(basechain->stats));
		}
1498
		kfree(chain->name);
1499
		kfree(basechain);
1500
	} else {
1501
		kfree(chain->name);
1502 1503 1504 1505
		kfree(chain);
	}
}

1506 1507
struct nft_chain_hook {
	u32				num;
1508
	s32				priority;
1509
	const struct nft_chain_type	*type;
1510 1511 1512 1513 1514
	struct net_device		*dev;
};

static int nft_chain_parse_hook(struct net *net,
				const struct nlattr * const nla[],
1515
				struct nft_chain_hook *hook, u8 family,
1516
				bool autoload)
1517 1518
{
	struct nlattr *ha[NFTA_HOOK_MAX + 1];
1519
	const struct nft_chain_type *type;
1520 1521 1522
	struct net_device *dev;
	int err;

1523 1524 1525
	lockdep_assert_held(&net->nft.commit_mutex);
	lockdep_nfnl_nft_mutex_not_held();

1526 1527 1528
	err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX,
					  nla[NFTA_CHAIN_HOOK],
					  nft_hook_policy, NULL);
1529 1530 1531 1532 1533 1534 1535 1536 1537 1538
	if (err < 0)
		return err;

	if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
	    ha[NFTA_HOOK_PRIORITY] == NULL)
		return -EINVAL;

	hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
	hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));

1539
	type = chain_type[family][NFT_CHAIN_T_DEFAULT];
1540
	if (nla[NFTA_CHAIN_TYPE]) {
1541
		type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
1542
						   family, autoload);
1543 1544 1545
		if (IS_ERR(type))
			return PTR_ERR(type);
	}
1546
	if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
1547
		return -EOPNOTSUPP;
1548 1549 1550 1551 1552

	if (type->type == NFT_CHAIN_T_NAT &&
	    hook->priority <= NF_IP_PRI_CONNTRACK)
		return -EOPNOTSUPP;

1553 1554 1555 1556 1557 1558
	if (!try_module_get(type->owner))
		return -ENOENT;

	hook->type = type;

	hook->dev = NULL;
1559
	if (family == NFPROTO_NETDEV) {
1560 1561 1562 1563 1564 1565 1566 1567
		char ifname[IFNAMSIZ];

		if (!ha[NFTA_HOOK_DEV]) {
			module_put(type->owner);
			return -EOPNOTSUPP;
		}

		nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
1568
		dev = __dev_get_by_name(net, ifname);
1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
		if (!dev) {
			module_put(type->owner);
			return -ENOENT;
		}
		hook->dev = dev;
	} else if (ha[NFTA_HOOK_DEV]) {
		module_put(type->owner);
		return -EOPNOTSUPP;
	}

	return 0;
}

static void nft_chain_release_hook(struct nft_chain_hook *hook)
{
	module_put(hook->type->owner);
}

1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607
struct nft_rules_old {
	struct rcu_head h;
	struct nft_rule **start;
};

static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain,
						     unsigned int alloc)
{
	if (alloc > INT_MAX)
		return NULL;

	alloc += 1;	/* NULL, ends rules */
	if (sizeof(struct nft_rule *) > INT_MAX / alloc)
		return NULL;

	alloc *= sizeof(struct nft_rule *);
	alloc += sizeof(struct nft_rules_old);

	return kvmalloc(alloc, GFP_KERNEL);
}

1608
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
1609
			      u8 policy, u32 flags)
1610 1611 1612 1613 1614 1615
{
	const struct nlattr * const *nla = ctx->nla;
	struct nft_table *table = ctx->table;
	struct nft_base_chain *basechain;
	struct nft_stats __percpu *stats;
	struct net *net = ctx->net;
1616
	struct nft_trans *trans;
1617
	struct nft_chain *chain;
1618
	struct nft_rule **rules;
1619 1620 1621 1622 1623 1624 1625 1626 1627
	int err;

	if (table->use == UINT_MAX)
		return -EOVERFLOW;

	if (nla[NFTA_CHAIN_HOOK]) {
		struct nft_chain_hook hook;
		struct nf_hook_ops *ops;

1628
		err = nft_chain_parse_hook(net, nla, &hook, family, true);
1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647
		if (err < 0)
			return err;

		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
		if (basechain == NULL) {
			nft_chain_release_hook(&hook);
			return -ENOMEM;
		}

		if (hook.dev != NULL)
			strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);

		if (nla[NFTA_CHAIN_COUNTERS]) {
			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
			if (IS_ERR(stats)) {
				nft_chain_release_hook(&hook);
				kfree(basechain);
				return PTR_ERR(stats);
			}
1648
			rcu_assign_pointer(basechain->stats, stats);
1649 1650 1651 1652 1653 1654
			static_branch_inc(&nft_counters_enabled);
		}

		basechain->type = hook.type;
		chain = &basechain->chain;

1655 1656 1657 1658 1659
		ops		= &basechain->ops;
		ops->pf		= family;
		ops->hooknum	= hook.num;
		ops->priority	= hook.priority;
		ops->priv	= chain;
1660
		ops->hook	= hook.type->hooks[ops->hooknum];
1661 1662
		ops->dev	= hook.dev;

1663
		chain->flags |= NFT_BASE_CHAIN | flags;
1664
		basechain->policy = NF_ACCEPT;
1665
		flow_block_init(&basechain->flow_block);
1666 1667 1668 1669 1670
	} else {
		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
		if (chain == NULL)
			return -ENOMEM;
	}
1671 1672
	ctx->chain = chain;

1673 1674 1675 1676 1677 1678 1679 1680 1681
	INIT_LIST_HEAD(&chain->rules);
	chain->handle = nf_tables_alloc_handle(table);
	chain->table = table;
	chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
	if (!chain->name) {
		err = -ENOMEM;
		goto err1;
	}

1682 1683 1684 1685 1686 1687 1688 1689 1690 1691
	rules = nf_tables_chain_alloc_rules(chain, 0);
	if (!rules) {
		err = -ENOMEM;
		goto err1;
	}

	*rules = NULL;
	rcu_assign_pointer(chain->rules_gen_0, rules);
	rcu_assign_pointer(chain->rules_gen_1, rules);

1692
	err = nf_tables_register_hook(net, table, chain);
1693 1694 1695
	if (err < 0)
		goto err1;

1696 1697 1698 1699 1700
	err = rhltable_insert_key(&table->chains_ht, chain->name,
				  &chain->rhlhead, nft_chain_ht_params);
	if (err)
		goto err2;

1701 1702 1703
	trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
	if (IS_ERR(trans)) {
		err = PTR_ERR(trans);
1704 1705
		rhltable_remove(&table->chains_ht, &chain->rhlhead,
				nft_chain_ht_params);
1706
		goto err2;
1707
	}
1708

1709 1710 1711 1712
	nft_trans_chain_policy(trans) = -1;
	if (nft_is_base_chain(chain))
		nft_trans_chain_policy(trans) = policy;

1713 1714 1715 1716 1717
	table->use++;
	list_add_tail_rcu(&chain->list, &table->chains);

	return 0;
err2:
1718
	nf_tables_unregister_hook(net, table, chain);
1719
err1:
1720
	nf_tables_chain_destroy(ctx);
1721 1722 1723 1724

	return err;
}

1725 1726
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
			      u32 flags)
1727 1728 1729 1730 1731 1732 1733 1734 1735
{
	const struct nlattr * const *nla = ctx->nla;
	struct nft_table *table = ctx->table;
	struct nft_chain *chain = ctx->chain;
	struct nft_base_chain *basechain;
	struct nft_stats *stats = NULL;
	struct nft_chain_hook hook;
	struct nf_hook_ops *ops;
	struct nft_trans *trans;
1736
	int err;
1737

1738 1739 1740
	if (chain->flags ^ flags)
		return -EOPNOTSUPP;

1741 1742 1743 1744
	if (nla[NFTA_CHAIN_HOOK]) {
		if (!nft_is_base_chain(chain))
			return -EBUSY;

1745
		err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
1746
					   false);
1747 1748 1749 1750 1751 1752 1753 1754 1755
		if (err < 0)
			return err;

		basechain = nft_base_chain(chain);
		if (basechain->type != hook.type) {
			nft_chain_release_hook(&hook);
			return -EBUSY;
		}

1756 1757 1758 1759 1760 1761
		ops = &basechain->ops;
		if (ops->hooknum != hook.num ||
		    ops->priority != hook.priority ||
		    ops->dev != hook.dev) {
			nft_chain_release_hook(&hook);
			return -EBUSY;
1762 1763 1764 1765 1766 1767 1768 1769
		}
		nft_chain_release_hook(&hook);
	}

	if (nla[NFTA_CHAIN_HANDLE] &&
	    nla[NFTA_CHAIN_NAME]) {
		struct nft_chain *chain2;

1770 1771
		chain2 = nft_chain_lookup(ctx->net, table,
					  nla[NFTA_CHAIN_NAME], genmask);
1772 1773
		if (!IS_ERR(chain2))
			return -EEXIST;
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784
	}

	if (nla[NFTA_CHAIN_COUNTERS]) {
		if (!nft_is_base_chain(chain))
			return -EOPNOTSUPP;

		stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
		if (IS_ERR(stats))
			return PTR_ERR(stats);
	}

1785
	err = -ENOMEM;
1786 1787
	trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
				sizeof(struct nft_trans_chain));
1788 1789
	if (trans == NULL)
		goto err;
1790 1791 1792 1793 1794 1795 1796 1797 1798

	nft_trans_chain_stats(trans) = stats;
	nft_trans_chain_update(trans) = true;

	if (nla[NFTA_CHAIN_POLICY])
		nft_trans_chain_policy(trans) = policy;
	else
		nft_trans_chain_policy(trans) = -1;

1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818
	if (nla[NFTA_CHAIN_HANDLE] &&
	    nla[NFTA_CHAIN_NAME]) {
		struct nft_trans *tmp;
		char *name;

		err = -ENOMEM;
		name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
		if (!name)
			goto err;

		err = -EEXIST;
		list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) {
			if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
			    tmp->ctx.table == table &&
			    nft_trans_chain_update(tmp) &&
			    nft_trans_chain_name(tmp) &&
			    strcmp(name, nft_trans_chain_name(tmp)) == 0) {
				kfree(name);
				goto err;
			}
1819
		}
1820 1821

		nft_trans_chain_name(trans) = name;
1822 1823 1824 1825
	}
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);

	return 0;
1826 1827 1828 1829
err:
	free_percpu(stats);
	kfree(trans);
	return err;
1830 1831
}

1832 1833
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
			      struct sk_buff *skb, const struct nlmsghdr *nlh,
1834 1835
			      const struct nlattr * const nla[],
			      struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
1836 1837
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1838 1839
	u8 genmask = nft_genmask_next(net);
	int family = nfmsg->nfgen_family;
1840
	const struct nlattr *attr;
P
Patrick McHardy 已提交
1841 1842
	struct nft_table *table;
	struct nft_chain *chain;
1843
	u8 policy = NF_ACCEPT;
1844
	struct nft_ctx ctx;
P
Patrick McHardy 已提交
1845
	u64 handle = 0;
1846
	u32 flags = 0;
P
Patrick McHardy 已提交
1847

1848 1849
	lockdep_assert_held(&net->nft.commit_mutex);

1850
	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
1851 1852
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
P
Patrick McHardy 已提交
1853
		return PTR_ERR(table);
1854
	}
P
Patrick McHardy 已提交
1855 1856

	chain = NULL;
1857
	attr = nla[NFTA_CHAIN_NAME];
P
Patrick McHardy 已提交
1858 1859 1860

	if (nla[NFTA_CHAIN_HANDLE]) {
		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
1861
		chain = nft_chain_lookup_byhandle(table, handle, genmask);
1862 1863
		if (IS_ERR(chain)) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_HANDLE]);
P
Patrick McHardy 已提交
1864
			return PTR_ERR(chain);
1865 1866
		}
		attr = nla[NFTA_CHAIN_HANDLE];
P
Patrick McHardy 已提交
1867
	} else {
1868
		chain = nft_chain_lookup(net, table, attr, genmask);
P
Patrick McHardy 已提交
1869
		if (IS_ERR(chain)) {
1870 1871
			if (PTR_ERR(chain) != -ENOENT) {
				NL_SET_BAD_ATTR(extack, attr);
P
Patrick McHardy 已提交
1872
				return PTR_ERR(chain);
1873
			}
P
Patrick McHardy 已提交
1874 1875 1876 1877
			chain = NULL;
		}
	}

1878
	if (nla[NFTA_CHAIN_POLICY]) {
1879
		if (chain != NULL &&
1880 1881
		    !nft_is_base_chain(chain)) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
1882
			return -EOPNOTSUPP;
1883
		}
1884 1885

		if (chain == NULL &&
1886 1887
		    nla[NFTA_CHAIN_HOOK] == NULL) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_POLICY]);
1888
			return -EOPNOTSUPP;
1889
		}
1890

1891
		policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
1892 1893 1894 1895 1896 1897 1898 1899 1900
		switch (policy) {
		case NF_DROP:
		case NF_ACCEPT:
			break;
		default:
			return -EINVAL;
		}
	}

1901 1902
	if (nla[NFTA_CHAIN_FLAGS])
		flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
1903 1904
	else if (chain)
		flags = chain->flags;
1905

1906
	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
1907

P
Patrick McHardy 已提交
1908
	if (chain != NULL) {
1909 1910
		if (nlh->nlmsg_flags & NLM_F_EXCL) {
			NL_SET_BAD_ATTR(extack, attr);
P
Patrick McHardy 已提交
1911
			return -EEXIST;
1912
		}
P
Patrick McHardy 已提交
1913 1914 1915
		if (nlh->nlmsg_flags & NLM_F_REPLACE)
			return -EOPNOTSUPP;

1916
		return nf_tables_updchain(&ctx, genmask, policy, flags);
P
Patrick McHardy 已提交
1917 1918
	}

1919
	return nf_tables_addchain(&ctx, family, genmask, policy, flags);
P
Patrick McHardy 已提交
1920 1921
}

1922 1923
static int nf_tables_delchain(struct net *net, struct sock *nlsk,
			      struct sk_buff *skb, const struct nlmsghdr *nlh,
1924 1925
			      const struct nlattr * const nla[],
			      struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
1926 1927
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1928
	u8 genmask = nft_genmask_next(net);
1929 1930
	int family = nfmsg->nfgen_family;
	const struct nlattr *attr;
P
Patrick McHardy 已提交
1931 1932
	struct nft_table *table;
	struct nft_chain *chain;
1933
	struct nft_rule *rule;
1934
	struct nft_ctx ctx;
1935
	u64 handle;
1936 1937
	u32 use;
	int err;
P
Patrick McHardy 已提交
1938

1939
	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
1940 1941
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
P
Patrick McHardy 已提交
1942
		return PTR_ERR(table);
1943
	}
P
Patrick McHardy 已提交
1944

1945
	if (nla[NFTA_CHAIN_HANDLE]) {
1946 1947
		attr = nla[NFTA_CHAIN_HANDLE];
		handle = be64_to_cpu(nla_get_be64(attr));
1948
		chain = nft_chain_lookup_byhandle(table, handle, genmask);
1949
	} else {
1950
		attr = nla[NFTA_CHAIN_NAME];
1951
		chain = nft_chain_lookup(net, table, attr, genmask);
1952
	}
1953 1954
	if (IS_ERR(chain)) {
		NL_SET_BAD_ATTR(extack, attr);
P
Patrick McHardy 已提交
1955
		return PTR_ERR(chain);
1956
	}
1957 1958 1959

	if (nlh->nlmsg_flags & NLM_F_NONREC &&
	    chain->use > 0)
P
Patrick McHardy 已提交
1960 1961
		return -EBUSY;

1962
	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
1963

1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977
	use = chain->use;
	list_for_each_entry(rule, &chain->rules, list) {
		if (!nft_is_active_next(net, rule))
			continue;
		use--;

		err = nft_delrule(&ctx, rule);
		if (err < 0)
			return err;
	}

	/* There are rules and elements that are still holding references to us,
	 * we cannot do a recursive removal in this case.
	 */
1978 1979
	if (use > 0) {
		NL_SET_BAD_ATTR(extack, attr);
1980
		return -EBUSY;
1981
	}
1982

1983
	return nft_delchain(&ctx);
P
Patrick McHardy 已提交
1984 1985 1986 1987 1988 1989 1990
}

/*
 * Expressions
 */

/**
1991 1992
 *	nft_register_expr - register nf_tables expr type
 *	@ops: expr type
P
Patrick McHardy 已提交
1993
 *
1994
 *	Registers the expr type for use with nf_tables. Returns zero on
P
Patrick McHardy 已提交
1995 1996
 *	success or a negative errno code otherwise.
 */
1997
int nft_register_expr(struct nft_expr_type *type)
P
Patrick McHardy 已提交
1998 1999
{
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
2000
	if (type->family == NFPROTO_UNSPEC)
2001
		list_add_tail_rcu(&type->list, &nf_tables_expressions);
2002
	else
2003
		list_add_rcu(&type->list, &nf_tables_expressions);
P
Patrick McHardy 已提交
2004 2005 2006 2007 2008 2009
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
	return 0;
}
EXPORT_SYMBOL_GPL(nft_register_expr);

/**
2010 2011
 *	nft_unregister_expr - unregister nf_tables expr type
 *	@ops: expr type
P
Patrick McHardy 已提交
2012
 *
2013
 * 	Unregisters the expr typefor use with nf_tables.
P
Patrick McHardy 已提交
2014
 */
2015
void nft_unregister_expr(struct nft_expr_type *type)
P
Patrick McHardy 已提交
2016 2017
{
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
2018
	list_del_rcu(&type->list);
P
Patrick McHardy 已提交
2019 2020 2021 2022
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_expr);

2023 2024
static const struct nft_expr_type *__nft_expr_type_get(u8 family,
						       struct nlattr *nla)
P
Patrick McHardy 已提交
2025
{
2026
	const struct nft_expr_type *type, *candidate = NULL;
P
Patrick McHardy 已提交
2027

2028
	list_for_each_entry(type, &nf_tables_expressions, list) {
2029 2030 2031 2032 2033 2034
		if (!nla_strcmp(nla, type->name)) {
			if (!type->family && !candidate)
				candidate = type;
			else if (type->family == family)
				candidate = type;
		}
P
Patrick McHardy 已提交
2035
	}
2036
	return candidate;
P
Patrick McHardy 已提交
2037 2038
}

2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051
#ifdef CONFIG_MODULES
static int nft_expr_type_request_module(struct net *net, u8 family,
					struct nlattr *nla)
{
	nft_request_module(net, "nft-expr-%u-%.*s", family,
			   nla_len(nla), (char *)nla_data(nla));
	if (__nft_expr_type_get(family, nla))
		return -EAGAIN;

	return 0;
}
#endif

2052 2053
static const struct nft_expr_type *nft_expr_type_get(struct net *net,
						     u8 family,
2054
						     struct nlattr *nla)
P
Patrick McHardy 已提交
2055
{
2056
	const struct nft_expr_type *type;
P
Patrick McHardy 已提交
2057 2058 2059 2060

	if (nla == NULL)
		return ERR_PTR(-EINVAL);

2061
	type = __nft_expr_type_get(family, nla);
2062 2063
	if (type != NULL && try_module_get(type->owner))
		return type;
P
Patrick McHardy 已提交
2064

2065
	lockdep_nfnl_nft_mutex_not_held();
P
Patrick McHardy 已提交
2066
#ifdef CONFIG_MODULES
2067
	if (type == NULL) {
2068
		if (nft_expr_type_request_module(net, family, nla) == -EAGAIN)
2069 2070
			return ERR_PTR(-EAGAIN);

2071 2072
		nft_request_module(net, "nft-expr-%.*s",
				   nla_len(nla), (char *)nla_data(nla));
2073
		if (__nft_expr_type_get(family, nla))
P
Patrick McHardy 已提交
2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087
			return ERR_PTR(-EAGAIN);
	}
#endif
	return ERR_PTR(-ENOENT);
}

static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
	[NFTA_EXPR_NAME]	= { .type = NLA_STRING },
	[NFTA_EXPR_DATA]	= { .type = NLA_NESTED },
};

static int nf_tables_fill_expr_info(struct sk_buff *skb,
				    const struct nft_expr *expr)
{
2088
	if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name))
P
Patrick McHardy 已提交
2089 2090 2091
		goto nla_put_failure;

	if (expr->ops->dump) {
2092 2093
		struct nlattr *data = nla_nest_start_noflag(skb,
							    NFTA_EXPR_DATA);
P
Patrick McHardy 已提交
2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106
		if (data == NULL)
			goto nla_put_failure;
		if (expr->ops->dump(skb, expr) < 0)
			goto nla_put_failure;
		nla_nest_end(skb, data);
	}

	return skb->len;

nla_put_failure:
	return -1;
};

2107 2108 2109 2110 2111
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
		  const struct nft_expr *expr)
{
	struct nlattr *nest;

2112
	nest = nla_nest_start_noflag(skb, attr);
2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123
	if (!nest)
		goto nla_put_failure;
	if (nf_tables_fill_expr_info(skb, expr) < 0)
		goto nla_put_failure;
	nla_nest_end(skb, nest);
	return 0;

nla_put_failure:
	return -1;
}

P
Patrick McHardy 已提交
2124 2125
struct nft_expr_info {
	const struct nft_expr_ops	*ops;
2126
	struct nlattr			*tb[NFT_EXPR_MAXATTR + 1];
P
Patrick McHardy 已提交
2127 2128
};

2129 2130
static int nf_tables_expr_parse(const struct nft_ctx *ctx,
				const struct nlattr *nla,
P
Patrick McHardy 已提交
2131 2132
				struct nft_expr_info *info)
{
2133
	const struct nft_expr_type *type;
P
Patrick McHardy 已提交
2134
	const struct nft_expr_ops *ops;
2135
	struct nlattr *tb[NFTA_EXPR_MAX + 1];
P
Patrick McHardy 已提交
2136 2137
	int err;

2138 2139
	err = nla_parse_nested_deprecated(tb, NFTA_EXPR_MAX, nla,
					  nft_expr_policy, NULL);
P
Patrick McHardy 已提交
2140 2141 2142
	if (err < 0)
		return err;

2143
	type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]);
2144 2145 2146 2147
	if (IS_ERR(type))
		return PTR_ERR(type);

	if (tb[NFTA_EXPR_DATA]) {
2148 2149 2150
		err = nla_parse_nested_deprecated(info->tb, type->maxattr,
						  tb[NFTA_EXPR_DATA],
						  type->policy, NULL);
2151 2152 2153 2154 2155 2156
		if (err < 0)
			goto err1;
	} else
		memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1));

	if (type->select_ops != NULL) {
2157 2158
		ops = type->select_ops(ctx,
				       (const struct nlattr * const *)info->tb);
2159 2160
		if (IS_ERR(ops)) {
			err = PTR_ERR(ops);
2161 2162 2163 2164 2165 2166
#ifdef CONFIG_MODULES
			if (err == -EAGAIN)
				nft_expr_type_request_module(ctx->net,
							     ctx->family,
							     tb[NFTA_EXPR_NAME]);
#endif
2167 2168 2169 2170 2171
			goto err1;
		}
	} else
		ops = type->ops;

P
Patrick McHardy 已提交
2172 2173
	info->ops = ops;
	return 0;
2174 2175 2176 2177

err1:
	module_put(type->owner);
	return err;
P
Patrick McHardy 已提交
2178 2179 2180
}

static int nf_tables_newexpr(const struct nft_ctx *ctx,
2181
			     const struct nft_expr_info *info,
P
Patrick McHardy 已提交
2182 2183 2184 2185 2186 2187 2188
			     struct nft_expr *expr)
{
	const struct nft_expr_ops *ops = info->ops;
	int err;

	expr->ops = ops;
	if (ops->init) {
2189
		err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
P
Patrick McHardy 已提交
2190 2191 2192 2193 2194 2195 2196 2197 2198 2199
		if (err < 0)
			goto err1;
	}

	return 0;
err1:
	expr->ops = NULL;
	return err;
}

2200 2201
static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
				   struct nft_expr *expr)
P
Patrick McHardy 已提交
2202
{
2203 2204
	const struct nft_expr_type *type = expr->ops->type;

P
Patrick McHardy 已提交
2205
	if (expr->ops->destroy)
2206
		expr->ops->destroy(ctx, expr);
2207
	module_put(type->owner);
P
Patrick McHardy 已提交
2208 2209
}

2210 2211 2212 2213 2214
struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
			       const struct nlattr *nla)
{
	struct nft_expr_info info;
	struct nft_expr *expr;
2215
	struct module *owner;
2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228
	int err;

	err = nf_tables_expr_parse(ctx, nla, &info);
	if (err < 0)
		goto err1;

	err = -ENOMEM;
	expr = kzalloc(info.ops->size, GFP_KERNEL);
	if (expr == NULL)
		goto err2;

	err = nf_tables_newexpr(ctx, &info, expr);
	if (err < 0)
2229
		goto err3;
2230 2231

	return expr;
2232 2233
err3:
	kfree(expr);
2234
err2:
2235 2236 2237 2238 2239
	owner = info.ops->type->owner;
	if (info.ops->type->release_ops)
		info.ops->type->release_ops(info.ops);

	module_put(owner);
2240 2241 2242 2243 2244 2245 2246 2247 2248 2249
err1:
	return ERR_PTR(err);
}

void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
{
	nf_tables_expr_destroy(ctx, expr);
	kfree(expr);
}

P
Patrick McHardy 已提交
2250 2251 2252 2253
/*
 * Rules
 */

2254 2255
static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
					  u64 handle)
P
Patrick McHardy 已提交
2256 2257 2258 2259
{
	struct nft_rule *rule;

	// FIXME: this sucks
2260
	list_for_each_entry_rcu(rule, &chain->rules, list) {
P
Patrick McHardy 已提交
2261 2262 2263 2264 2265 2266 2267
		if (handle == rule->handle)
			return rule;
	}

	return ERR_PTR(-ENOENT);
}

2268 2269
static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
					const struct nlattr *nla)
P
Patrick McHardy 已提交
2270 2271 2272 2273
{
	if (nla == NULL)
		return ERR_PTR(-EINVAL);

2274
	return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
P
Patrick McHardy 已提交
2275 2276 2277
}

static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
2278 2279
	[NFTA_RULE_TABLE]	= { .type = NLA_STRING,
				    .len = NFT_TABLE_MAXNAMELEN - 1 },
P
Patrick McHardy 已提交
2280 2281 2282 2283
	[NFTA_RULE_CHAIN]	= { .type = NLA_STRING,
				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
	[NFTA_RULE_HANDLE]	= { .type = NLA_U64 },
	[NFTA_RULE_EXPRESSIONS]	= { .type = NLA_NESTED },
2284
	[NFTA_RULE_COMPAT]	= { .type = NLA_NESTED },
2285
	[NFTA_RULE_POSITION]	= { .type = NLA_U64 },
2286 2287
	[NFTA_RULE_USERDATA]	= { .type = NLA_BINARY,
				    .len = NFT_USERDATA_MAXLEN },
2288
	[NFTA_RULE_ID]		= { .type = NLA_U32 },
2289
	[NFTA_RULE_POSITION_ID]	= { .type = NLA_U32 },
P
Patrick McHardy 已提交
2290 2291
};

2292 2293 2294
static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
				    u32 portid, u32 seq, int event,
				    u32 flags, int family,
P
Patrick McHardy 已提交
2295 2296
				    const struct nft_table *table,
				    const struct nft_chain *chain,
2297 2298
				    const struct nft_rule *rule,
				    const struct nft_rule *prule)
P
Patrick McHardy 已提交
2299 2300 2301 2302 2303
{
	struct nlmsghdr *nlh;
	struct nfgenmsg *nfmsg;
	const struct nft_expr *expr, *next;
	struct nlattr *list;
2304
	u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
P
Patrick McHardy 已提交
2305

2306
	nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
P
Patrick McHardy 已提交
2307 2308 2309 2310 2311 2312
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
	nfmsg->nfgen_family	= family;
	nfmsg->version		= NFNETLINK_V0;
2313
	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
P
Patrick McHardy 已提交
2314 2315 2316 2317 2318

	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
		goto nla_put_failure;
	if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
		goto nla_put_failure;
2319 2320
	if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle),
			 NFTA_RULE_PAD))
P
Patrick McHardy 已提交
2321 2322
		goto nla_put_failure;

2323
	if (event != NFT_MSG_DELRULE && prule) {
2324
		if (nla_put_be64(skb, NFTA_RULE_POSITION,
2325 2326
				 cpu_to_be64(prule->handle),
				 NFTA_RULE_PAD))
2327 2328 2329
			goto nla_put_failure;
	}

2330
	list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS);
P
Patrick McHardy 已提交
2331 2332 2333
	if (list == NULL)
		goto nla_put_failure;
	nft_rule_for_each_expr(expr, next, rule) {
2334
		if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
P
Patrick McHardy 已提交
2335 2336 2337 2338
			goto nla_put_failure;
	}
	nla_nest_end(skb, list);

2339 2340 2341 2342 2343 2344
	if (rule->udata) {
		struct nft_userdata *udata = nft_userdata(rule);
		if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1,
			    udata->data) < 0)
			goto nla_put_failure;
	}
2345

2346 2347
	nlmsg_end(skb, nlh);
	return 0;
P
Patrick McHardy 已提交
2348 2349 2350 2351 2352 2353

nla_put_failure:
	nlmsg_trim(skb, nlh);
	return -1;
}

2354 2355
static void nf_tables_rule_notify(const struct nft_ctx *ctx,
				  const struct nft_rule *rule, int event)
P
Patrick McHardy 已提交
2356 2357 2358 2359
{
	struct sk_buff *skb;
	int err;

2360 2361
	if (!ctx->report &&
	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
2362
		return;
P
Patrick McHardy 已提交
2363 2364 2365 2366 2367

	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
	if (skb == NULL)
		goto err;

2368
	err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
2369
				       event, 0, ctx->family, ctx->table,
2370
				       ctx->chain, rule, NULL);
P
Patrick McHardy 已提交
2371 2372 2373 2374 2375
	if (err < 0) {
		kfree_skb(skb);
		goto err;
	}

2376 2377 2378
	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
		       ctx->report, GFP_KERNEL);
	return;
P
Patrick McHardy 已提交
2379
err:
2380
	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
P
Patrick McHardy 已提交
2381 2382
}

2383
struct nft_rule_dump_ctx {
2384
	char *table;
2385
	char *chain;
2386 2387
};

2388 2389 2390 2391 2392 2393 2394
static int __nf_tables_dump_rules(struct sk_buff *skb,
				  unsigned int *idx,
				  struct netlink_callback *cb,
				  const struct nft_table *table,
				  const struct nft_chain *chain)
{
	struct net *net = sock_net(skb->sk);
2395
	const struct nft_rule *rule, *prule;
2396 2397
	unsigned int s_idx = cb->args[0];

2398
	prule = NULL;
2399 2400
	list_for_each_entry_rcu(rule, &chain->rules, list) {
		if (!nft_is_active(net, rule))
2401
			goto cont_skip;
2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412
		if (*idx < s_idx)
			goto cont;
		if (*idx > s_idx) {
			memset(&cb->args[1], 0,
					sizeof(cb->args) - sizeof(cb->args[0]));
		}
		if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
					cb->nlh->nlmsg_seq,
					NFT_MSG_NEWRULE,
					NLM_F_MULTI | NLM_F_APPEND,
					table->family,
2413
					table, chain, rule, prule) < 0)
2414
			return 1;
2415 2416 2417

		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
2418 2419
		prule = rule;
cont_skip:
2420 2421
		(*idx)++;
	}
2422
	return 0;
2423 2424
}

P
Patrick McHardy 已提交
2425 2426 2427 2428
static int nf_tables_dump_rules(struct sk_buff *skb,
				struct netlink_callback *cb)
{
	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
2429
	const struct nft_rule_dump_ctx *ctx = cb->data;
2430
	struct nft_table *table;
P
Patrick McHardy 已提交
2431
	const struct nft_chain *chain;
2432
	unsigned int idx = 0;
2433
	struct net *net = sock_net(skb->sk);
P
Patrick McHardy 已提交
2434 2435
	int family = nfmsg->nfgen_family;

2436
	rcu_read_lock();
2437 2438
	cb->seq = net->nft.base_seq;

2439
	list_for_each_entry_rcu(table, &net->nft.tables, list) {
2440
		if (family != NFPROTO_UNSPEC && family != table->family)
2441 2442 2443
			continue;

		if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
P
Patrick McHardy 已提交
2444 2445
			continue;

2446
		if (ctx && ctx->table && ctx->chain) {
2447
			struct rhlist_head *list, *tmp;
2448

2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459
			list = rhltable_lookup(&table->chains_ht, ctx->chain,
					       nft_chain_ht_params);
			if (!list)
				goto done;

			rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
				if (!nft_is_active(net, chain))
					continue;
				__nf_tables_dump_rules(skb, &idx,
						       cb, table, chain);
				break;
P
Patrick McHardy 已提交
2460
			}
2461
			goto done;
P
Patrick McHardy 已提交
2462
		}
2463 2464 2465 2466 2467 2468 2469 2470

		list_for_each_entry_rcu(chain, &table->chains, list) {
			if (__nf_tables_dump_rules(skb, &idx, cb, table, chain))
				goto done;
		}

		if (ctx && ctx->table)
			break;
P
Patrick McHardy 已提交
2471 2472
	}
done:
2473
	rcu_read_unlock();
2474 2475

	cb->args[0] = idx;
P
Patrick McHardy 已提交
2476 2477 2478
	return skb->len;
}

2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511
static int nf_tables_dump_rules_start(struct netlink_callback *cb)
{
	const struct nlattr * const *nla = cb->data;
	struct nft_rule_dump_ctx *ctx = NULL;

	if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
		ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
		if (!ctx)
			return -ENOMEM;

		if (nla[NFTA_RULE_TABLE]) {
			ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
							GFP_ATOMIC);
			if (!ctx->table) {
				kfree(ctx);
				return -ENOMEM;
			}
		}
		if (nla[NFTA_RULE_CHAIN]) {
			ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
						GFP_ATOMIC);
			if (!ctx->chain) {
				kfree(ctx->table);
				kfree(ctx);
				return -ENOMEM;
			}
		}
	}

	cb->data = ctx;
	return 0;
}

2512 2513
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
2514 2515 2516 2517
	struct nft_rule_dump_ctx *ctx = cb->data;

	if (ctx) {
		kfree(ctx->table);
2518
		kfree(ctx->chain);
2519 2520
		kfree(ctx);
	}
2521 2522 2523
	return 0;
}

2524
/* called with rcu_read_lock held */
2525 2526
static int nf_tables_getrule(struct net *net, struct sock *nlsk,
			     struct sk_buff *skb, const struct nlmsghdr *nlh,
2527 2528
			     const struct nlattr * const nla[],
			     struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
2529 2530
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2531
	u8 genmask = nft_genmask_cur(net);
P
Patrick McHardy 已提交
2532 2533
	const struct nft_chain *chain;
	const struct nft_rule *rule;
2534
	struct nft_table *table;
P
Patrick McHardy 已提交
2535 2536 2537 2538 2539 2540
	struct sk_buff *skb2;
	int family = nfmsg->nfgen_family;
	int err;

	if (nlh->nlmsg_flags & NLM_F_DUMP) {
		struct netlink_dump_control c = {
2541
			.start= nf_tables_dump_rules_start,
P
Patrick McHardy 已提交
2542
			.dump = nf_tables_dump_rules,
2543
			.done = nf_tables_dump_rules_done,
2544
			.module = THIS_MODULE,
2545
			.data = (void *)nla,
P
Patrick McHardy 已提交
2546
		};
2547

2548
		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
P
Patrick McHardy 已提交
2549 2550
	}

2551
	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
2552 2553
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
P
Patrick McHardy 已提交
2554
		return PTR_ERR(table);
2555
	}
P
Patrick McHardy 已提交
2556

2557
	chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
2558 2559
	if (IS_ERR(chain)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
P
Patrick McHardy 已提交
2560
		return PTR_ERR(chain);
2561
	}
P
Patrick McHardy 已提交
2562

2563
	rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
2564 2565
	if (IS_ERR(rule)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
P
Patrick McHardy 已提交
2566
		return PTR_ERR(rule);
2567
	}
P
Patrick McHardy 已提交
2568

2569
	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
P
Patrick McHardy 已提交
2570 2571 2572
	if (!skb2)
		return -ENOMEM;

2573
	err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
P
Patrick McHardy 已提交
2574
				       nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
2575
				       family, table, chain, rule, NULL);
P
Patrick McHardy 已提交
2576 2577 2578 2579 2580 2581 2582 2583 2584 2585
	if (err < 0)
		goto err;

	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);

err:
	kfree_skb(skb2);
	return err;
}

2586 2587
static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
				   struct nft_rule *rule)
P
Patrick McHardy 已提交
2588
{
2589
	struct nft_expr *expr, *next;
P
Patrick McHardy 已提交
2590 2591 2592 2593 2594 2595

	/*
	 * Careful: some expressions might not be initialized in case this
	 * is called on error from nf_tables_newrule().
	 */
	expr = nft_expr_first(rule);
2596
	while (expr != nft_expr_last(rule) && expr->ops) {
2597
		next = nft_expr_next(expr);
2598
		nf_tables_expr_destroy(ctx, expr);
2599
		expr = next;
P
Patrick McHardy 已提交
2600 2601 2602 2603
	}
	kfree(rule);
}

2604 2605 2606
static void nf_tables_rule_release(const struct nft_ctx *ctx,
				   struct nft_rule *rule)
{
2607
	nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
2608 2609 2610
	nf_tables_rule_destroy(ctx, rule);
}

2611 2612 2613 2614 2615 2616 2617
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
	struct nft_expr *expr, *last;
	const struct nft_data *data;
	struct nft_rule *rule;
	int err;

2618 2619 2620
	if (ctx->level == NFT_JUMP_STACK_SIZE)
		return -EMLINK;

2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660
	list_for_each_entry(rule, &chain->rules, list) {
		if (!nft_is_active_next(ctx->net, rule))
			continue;

		nft_rule_for_each_expr(expr, last, rule) {
			if (!expr->ops->validate)
				continue;

			err = expr->ops->validate(ctx, expr, &data);
			if (err < 0)
				return err;
		}
	}

	return 0;
}
EXPORT_SYMBOL_GPL(nft_chain_validate);

static int nft_table_validate(struct net *net, const struct nft_table *table)
{
	struct nft_chain *chain;
	struct nft_ctx ctx = {
		.net	= net,
		.family	= table->family,
	};
	int err;

	list_for_each_entry(chain, &table->chains, list) {
		if (!nft_is_base_chain(chain))
			continue;

		ctx.chain = chain;
		err = nft_chain_validate(&ctx, chain);
		if (err < 0)
			return err;
	}

	return 0;
}

2661 2662 2663
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
					     const struct nlattr *nla);

2664 2665
#define NFT_RULE_MAXEXPRS	128

2666 2667
static int nf_tables_newrule(struct net *net, struct sock *nlsk,
			     struct sk_buff *skb, const struct nlmsghdr *nlh,
2668 2669
			     const struct nlattr * const nla[],
			     struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
2670 2671
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2672
	u8 genmask = nft_genmask_next(net);
2673
	struct nft_expr_info *info = NULL;
2674
	int family = nfmsg->nfgen_family;
2675
	struct nft_flow_rule *flow;
P
Patrick McHardy 已提交
2676 2677 2678
	struct nft_table *table;
	struct nft_chain *chain;
	struct nft_rule *rule, *old_rule = NULL;
2679
	struct nft_userdata *udata;
2680
	struct nft_trans *trans = NULL;
P
Patrick McHardy 已提交
2681 2682 2683
	struct nft_expr *expr;
	struct nft_ctx ctx;
	struct nlattr *tmp;
2684
	unsigned int size, i, n, ulen = 0, usize = 0;
P
Patrick McHardy 已提交
2685
	int err, rem;
2686
	u64 handle, pos_handle;
P
Patrick McHardy 已提交
2687

2688 2689
	lockdep_assert_held(&net->nft.commit_mutex);

2690
	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
2691 2692
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
P
Patrick McHardy 已提交
2693
		return PTR_ERR(table);
2694
	}
P
Patrick McHardy 已提交
2695

2696
	chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
2697 2698
	if (IS_ERR(chain)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
P
Patrick McHardy 已提交
2699
		return PTR_ERR(chain);
2700
	}
P
Patrick McHardy 已提交
2701 2702 2703

	if (nla[NFTA_RULE_HANDLE]) {
		handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
2704
		rule = __nft_rule_lookup(chain, handle);
2705 2706
		if (IS_ERR(rule)) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
P
Patrick McHardy 已提交
2707
			return PTR_ERR(rule);
2708
		}
P
Patrick McHardy 已提交
2709

2710 2711
		if (nlh->nlmsg_flags & NLM_F_EXCL) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
P
Patrick McHardy 已提交
2712
			return -EEXIST;
2713
		}
P
Patrick McHardy 已提交
2714 2715 2716 2717 2718
		if (nlh->nlmsg_flags & NLM_F_REPLACE)
			old_rule = rule;
		else
			return -EOPNOTSUPP;
	} else {
2719 2720
		if (!(nlh->nlmsg_flags & NLM_F_CREATE) ||
		    nlh->nlmsg_flags & NLM_F_REPLACE)
P
Patrick McHardy 已提交
2721 2722
			return -EINVAL;
		handle = nf_tables_alloc_handle(table);
2723 2724 2725

		if (chain->use == UINT_MAX)
			return -EOVERFLOW;
2726

2727 2728 2729 2730 2731 2732 2733
		if (nla[NFTA_RULE_POSITION]) {
			pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
			old_rule = __nft_rule_lookup(chain, pos_handle);
			if (IS_ERR(old_rule)) {
				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
				return PTR_ERR(old_rule);
			}
2734 2735 2736 2737 2738 2739
		} else if (nla[NFTA_RULE_POSITION_ID]) {
			old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
			if (IS_ERR(old_rule)) {
				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
				return PTR_ERR(old_rule);
			}
2740
		}
2741 2742
	}

2743
	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
2744

P
Patrick McHardy 已提交
2745 2746 2747
	n = 0;
	size = 0;
	if (nla[NFTA_RULE_EXPRESSIONS]) {
2748 2749 2750 2751 2752 2753
		info = kvmalloc_array(NFT_RULE_MAXEXPRS,
				      sizeof(struct nft_expr_info),
				      GFP_KERNEL);
		if (!info)
			return -ENOMEM;

P
Patrick McHardy 已提交
2754 2755 2756 2757 2758 2759
		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
			err = -EINVAL;
			if (nla_type(tmp) != NFTA_LIST_ELEM)
				goto err1;
			if (n == NFT_RULE_MAXEXPRS)
				goto err1;
2760
			err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
P
Patrick McHardy 已提交
2761 2762 2763 2764 2765 2766
			if (err < 0)
				goto err1;
			size += info[n].ops->size;
			n++;
		}
	}
2767 2768 2769 2770
	/* Check for overflow of dlen field */
	err = -EFBIG;
	if (size >= 1 << 12)
		goto err1;
P
Patrick McHardy 已提交
2771

2772
	if (nla[NFTA_RULE_USERDATA]) {
2773
		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
2774 2775 2776
		if (ulen > 0)
			usize = sizeof(struct nft_userdata) + ulen;
	}
2777

P
Patrick McHardy 已提交
2778
	err = -ENOMEM;
2779
	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
P
Patrick McHardy 已提交
2780 2781 2782
	if (rule == NULL)
		goto err1;

2783
	nft_activate_next(net, rule);
2784

P
Patrick McHardy 已提交
2785 2786
	rule->handle = handle;
	rule->dlen   = size;
2787
	rule->udata  = ulen ? 1 : 0;
2788

2789 2790 2791 2792 2793
	if (ulen) {
		udata = nft_userdata(rule);
		udata->len = ulen - 1;
		nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen);
	}
P
Patrick McHardy 已提交
2794 2795 2796 2797 2798 2799

	expr = nft_expr_first(rule);
	for (i = 0; i < n; i++) {
		err = nf_tables_newexpr(&ctx, &info[i], expr);
		if (err < 0)
			goto err2;
2800 2801 2802 2803

		if (info[i].ops->validate)
			nft_validate_state_update(net, NFT_VALIDATE_NEED);

2804
		info[i].ops = NULL;
P
Patrick McHardy 已提交
2805 2806 2807 2808
		expr = nft_expr_next(expr);
	}

	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
2809
		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
2810 2811 2812 2813
		if (trans == NULL) {
			err = -ENOMEM;
			goto err2;
		}
2814 2815 2816
		err = nft_delrule(&ctx, old_rule);
		if (err < 0) {
			nft_trans_destroy(trans);
2817 2818 2819 2820 2821
			goto err2;
		}

		list_add_tail_rcu(&rule->list, &old_rule->list);
	} else {
2822 2823
		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
		if (!trans) {
2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838
			err = -ENOMEM;
			goto err2;
		}

		if (nlh->nlmsg_flags & NLM_F_APPEND) {
			if (old_rule)
				list_add_rcu(&rule->list, &old_rule->list);
			else
				list_add_tail_rcu(&rule->list, &chain->rules);
		 } else {
			if (old_rule)
				list_add_tail_rcu(&rule->list, &old_rule->list);
			else
				list_add_rcu(&rule->list, &chain->rules);
		}
2839
	}
2840
	kvfree(info);
2841
	chain->use++;
P
Patrick McHardy 已提交
2842

2843 2844 2845
	if (net->nft.validate_state == NFT_VALIDATE_DO)
		return nft_table_validate(net, table);

2846 2847 2848 2849 2850 2851 2852 2853
	if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
		flow = nft_flow_rule_create(rule);
		if (IS_ERR(flow))
			return PTR_ERR(flow);

		nft_trans_flow_rule(trans) = flow;
	}

2854
	return 0;
P
Patrick McHardy 已提交
2855
err2:
2856
	nf_tables_rule_release(&ctx, rule);
P
Patrick McHardy 已提交
2857 2858
err1:
	for (i = 0; i < n; i++) {
2859
		if (info[i].ops) {
2860
			module_put(info[i].ops->type->owner);
2861 2862 2863
			if (info[i].ops->type->release_ops)
				info[i].ops->type->release_ops(info[i].ops);
		}
P
Patrick McHardy 已提交
2864
	}
2865
	kvfree(info);
P
Patrick McHardy 已提交
2866 2867 2868
	return err;
}

2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
					     const struct nlattr *nla)
{
	u32 id = ntohl(nla_get_be32(nla));
	struct nft_trans *trans;

	list_for_each_entry(trans, &net->nft.commit_list, list) {
		struct nft_rule *rule = nft_trans_rule(trans);

		if (trans->msg_type == NFT_MSG_NEWRULE &&
		    id == nft_trans_rule_id(trans))
			return rule;
	}
	return ERR_PTR(-ENOENT);
}

2885 2886
static int nf_tables_delrule(struct net *net, struct sock *nlsk,
			     struct sk_buff *skb, const struct nlmsghdr *nlh,
2887 2888
			     const struct nlattr * const nla[],
			     struct netlink_ext_ack *extack)
P
Patrick McHardy 已提交
2889 2890
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2891
	u8 genmask = nft_genmask_next(net);
2892
	struct nft_table *table;
2893 2894
	struct nft_chain *chain = NULL;
	struct nft_rule *rule;
2895 2896
	int family = nfmsg->nfgen_family, err = 0;
	struct nft_ctx ctx;
P
Patrick McHardy 已提交
2897

2898
	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
2899 2900
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
P
Patrick McHardy 已提交
2901
		return PTR_ERR(table);
2902
	}
P
Patrick McHardy 已提交
2903

2904
	if (nla[NFTA_RULE_CHAIN]) {
2905 2906
		chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
					 genmask);
2907 2908
		if (IS_ERR(chain)) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
2909
			return PTR_ERR(chain);
2910
		}
2911
	}
P
Patrick McHardy 已提交
2912

2913
	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
2914

2915 2916
	if (chain) {
		if (nla[NFTA_RULE_HANDLE]) {
2917
			rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
2918 2919
			if (IS_ERR(rule)) {
				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
2920
				return PTR_ERR(rule);
2921
			}
P
Patrick McHardy 已提交
2922

2923 2924 2925
			err = nft_delrule(&ctx, rule);
		} else if (nla[NFTA_RULE_ID]) {
			rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
2926 2927
			if (IS_ERR(rule)) {
				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
2928
				return PTR_ERR(rule);
2929
			}
2930

2931
			err = nft_delrule(&ctx, rule);
2932
		} else {
2933
			err = nft_delrule_by_chain(&ctx);
2934 2935 2936
		}
	} else {
		list_for_each_entry(chain, &table->chains, list) {
2937 2938 2939
			if (!nft_is_active_next(net, chain))
				continue;

2940
			ctx.chain = chain;
2941
			err = nft_delrule_by_chain(&ctx);
2942 2943 2944 2945 2946 2947 2948 2949
			if (err < 0)
				break;
		}
	}

	return err;
}

2950 2951 2952 2953
/*
 * Sets
 */

2954
static LIST_HEAD(nf_tables_set_types);
2955

2956
int nft_register_set(struct nft_set_type *type)
2957 2958
{
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
2959
	list_add_tail_rcu(&type->list, &nf_tables_set_types);
2960 2961 2962 2963 2964
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
	return 0;
}
EXPORT_SYMBOL_GPL(nft_register_set);

2965
void nft_unregister_set(struct nft_set_type *type)
2966 2967
{
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
2968
	list_del_rcu(&type->list);
2969 2970 2971 2972
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_set);

2973
#define NFT_SET_FEATURES	(NFT_SET_INTERVAL | NFT_SET_MAP | \
2974 2975
				 NFT_SET_TIMEOUT | NFT_SET_OBJECT | \
				 NFT_SET_EVAL)
2976

2977
static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
2978
{
2979
	return (flags & type->features) == (flags & NFT_SET_FEATURES);
2980 2981
}

2982 2983 2984 2985 2986 2987
/*
 * Select a set implementation based on the data characteristics and the
 * given policy. The total memory use might not be known if no size is
 * given, in that case the amount of memory per element is used.
 */
static const struct nft_set_ops *
2988 2989
nft_select_set_ops(const struct nft_ctx *ctx,
		   const struct nlattr * const nla[],
2990 2991
		   const struct nft_set_desc *desc,
		   enum nft_set_policies policy)
2992
{
2993 2994
	const struct nft_set_ops *ops, *bops;
	struct nft_set_estimate est, best;
2995 2996
	const struct nft_set_type *type;
	u32 flags = 0;
2997

2998 2999
	lockdep_assert_held(&ctx->net->nft.commit_mutex);
	lockdep_nfnl_nft_mutex_not_held();
3000
#ifdef CONFIG_MODULES
3001
	if (list_empty(&nf_tables_set_types)) {
3002
		nft_request_module(ctx->net, "nft-set");
3003
		if (!list_empty(&nf_tables_set_types))
3004 3005 3006
			return ERR_PTR(-EAGAIN);
	}
#endif
3007 3008
	if (nla[NFTA_SET_FLAGS] != NULL)
		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
3009

3010 3011 3012
	bops	    = NULL;
	best.size   = ~0;
	best.lookup = ~0;
3013
	best.space  = ~0;
3014

3015
	list_for_each_entry(type, &nf_tables_set_types, list) {
3016
		ops = &type->ops;
3017

3018
		if (!nft_set_ops_candidate(type, flags))
3019
			continue;
3020
		if (!ops->estimate(desc, flags, &est))
3021 3022 3023 3024
			continue;

		switch (policy) {
		case NFT_SET_POL_PERFORMANCE:
3025
			if (est.lookup < best.lookup)
3026
				break;
3027 3028 3029
			if (est.lookup == best.lookup &&
			    est.space < best.space)
				break;
3030 3031
			continue;
		case NFT_SET_POL_MEMORY:
3032 3033 3034 3035 3036 3037
			if (!desc->size) {
				if (est.space < best.space)
					break;
				if (est.space == best.space &&
				    est.lookup < best.lookup)
					break;
3038
			} else if (est.size < best.size || !bops) {
3039
				break;
3040
			}
3041 3042 3043 3044 3045
			continue;
		default:
			break;
		}

3046
		if (!try_module_get(type->owner))
3047
			continue;
3048
		if (bops != NULL)
3049
			module_put(to_set_type(bops)->owner);
3050 3051 3052

		bops = ops;
		best = est;
3053 3054
	}

3055 3056 3057
	if (bops != NULL)
		return bops;

3058 3059 3060 3061
	return ERR_PTR(-EOPNOTSUPP);
}

static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
3062 3063
	[NFTA_SET_TABLE]		= { .type = NLA_STRING,
					    .len = NFT_TABLE_MAXNAMELEN - 1 },
3064
	[NFTA_SET_NAME]			= { .type = NLA_STRING,
3065
					    .len = NFT_SET_MAXNAMELEN - 1 },
3066 3067 3068 3069 3070
	[NFTA_SET_FLAGS]		= { .type = NLA_U32 },
	[NFTA_SET_KEY_TYPE]		= { .type = NLA_U32 },
	[NFTA_SET_KEY_LEN]		= { .type = NLA_U32 },
	[NFTA_SET_DATA_TYPE]		= { .type = NLA_U32 },
	[NFTA_SET_DATA_LEN]		= { .type = NLA_U32 },
3071 3072
	[NFTA_SET_POLICY]		= { .type = NLA_U32 },
	[NFTA_SET_DESC]			= { .type = NLA_NESTED },
3073
	[NFTA_SET_ID]			= { .type = NLA_U32 },
3074 3075
	[NFTA_SET_TIMEOUT]		= { .type = NLA_U64 },
	[NFTA_SET_GC_INTERVAL]		= { .type = NLA_U32 },
3076 3077
	[NFTA_SET_USERDATA]		= { .type = NLA_BINARY,
					    .len  = NFT_USERDATA_MAXLEN },
3078
	[NFTA_SET_OBJ_TYPE]		= { .type = NLA_U32 },
3079
	[NFTA_SET_HANDLE]		= { .type = NLA_U64 },
3080 3081 3082 3083
};

static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
	[NFTA_SET_DESC_SIZE]		= { .type = NLA_U32 },
3084 3085
};

3086
static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
3087 3088
				     const struct sk_buff *skb,
				     const struct nlmsghdr *nlh,
3089
				     const struct nlattr * const nla[],
3090
				     struct netlink_ext_ack *extack,
3091
				     u8 genmask)
3092 3093
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3094
	int family = nfmsg->nfgen_family;
3095
	struct nft_table *table = NULL;
3096 3097

	if (nla[NFTA_SET_TABLE] != NULL) {
3098 3099
		table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
					 genmask);
3100 3101
		if (IS_ERR(table)) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
3102
			return PTR_ERR(table);
3103
		}
3104 3105
	}

3106
	nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
3107 3108 3109
	return 0;
}

3110 3111
static struct nft_set *nft_set_lookup(const struct nft_table *table,
				      const struct nlattr *nla, u8 genmask)
3112 3113 3114 3115 3116 3117
{
	struct nft_set *set;

	if (nla == NULL)
		return ERR_PTR(-EINVAL);

3118
	list_for_each_entry_rcu(set, &table->sets, list) {
3119 3120
		if (!nla_strcmp(nla, set->name) &&
		    nft_active_genmask(set, genmask))
3121 3122 3123 3124 3125
			return set;
	}
	return ERR_PTR(-ENOENT);
}

3126 3127 3128
static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
					       const struct nlattr *nla,
					       u8 genmask)
3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139
{
	struct nft_set *set;

	list_for_each_entry(set, &table->sets, list) {
		if (be64_to_cpu(nla_get_be64(nla)) == set->handle &&
		    nft_active_genmask(set, genmask))
			return set;
	}
	return ERR_PTR(-ENOENT);
}

3140 3141
static struct nft_set *nft_set_lookup_byid(const struct net *net,
					   const struct nlattr *nla, u8 genmask)
3142 3143 3144 3145 3146
{
	struct nft_trans *trans;
	u32 id = ntohl(nla_get_be32(nla));

	list_for_each_entry(trans, &net->nft.commit_list, list) {
3147 3148
		if (trans->msg_type == NFT_MSG_NEWSET) {
			struct nft_set *set = nft_trans_set(trans);
3149

3150 3151 3152 3153
			if (id == nft_trans_set_id(trans) &&
			    nft_active_genmask(set, genmask))
				return set;
		}
3154 3155 3156
	}
	return ERR_PTR(-ENOENT);
}
3157

3158 3159 3160 3161 3162
struct nft_set *nft_set_lookup_global(const struct net *net,
				      const struct nft_table *table,
				      const struct nlattr *nla_set_name,
				      const struct nlattr *nla_set_id,
				      u8 genmask)
3163 3164 3165
{
	struct nft_set *set;

3166
	set = nft_set_lookup(table, nla_set_name, genmask);
3167 3168 3169 3170
	if (IS_ERR(set)) {
		if (!nla_set_id)
			return set;

3171
		set = nft_set_lookup_byid(net, nla_set_id, genmask);
3172 3173 3174
	}
	return set;
}
3175
EXPORT_SYMBOL_GPL(nft_set_lookup_global);
3176

3177 3178 3179 3180 3181 3182
static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
				    const char *name)
{
	const struct nft_set *i;
	const char *p;
	unsigned long *inuse;
3183
	unsigned int n = 0, min = 0;
3184

3185
	p = strchr(name, '%');
3186 3187 3188 3189 3190 3191 3192
	if (p != NULL) {
		if (p[1] != 'd' || strchr(p + 2, '%'))
			return -EINVAL;

		inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
		if (inuse == NULL)
			return -ENOMEM;
3193
cont:
3194
		list_for_each_entry(i, &ctx->table->sets, list) {
3195 3196
			int tmp;

3197 3198
			if (!nft_is_active_next(ctx->net, set))
				continue;
3199
			if (!sscanf(i->name, name, &tmp))
3200
				continue;
3201
			if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
3202
				continue;
3203

3204
			set_bit(tmp - min, inuse);
3205 3206
		}

3207
		n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
3208 3209 3210 3211 3212
		if (n >= BITS_PER_BYTE * PAGE_SIZE) {
			min += BITS_PER_BYTE * PAGE_SIZE;
			memset(inuse, 0, PAGE_SIZE);
			goto cont;
		}
3213 3214 3215
		free_page((unsigned long)inuse);
	}

3216 3217 3218 3219
	set->name = kasprintf(GFP_KERNEL, name, min + n);
	if (!set->name)
		return -ENOMEM;

3220
	list_for_each_entry(i, &ctx->table->sets, list) {
3221 3222
		if (!nft_is_active_next(ctx->net, i))
			continue;
3223 3224
		if (!strcmp(set->name, i->name)) {
			kfree(set->name);
3225
			return -ENFILE;
3226
		}
3227 3228 3229 3230
	}
	return 0;
}

3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244
static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
{
	u64 ms = be64_to_cpu(nla_get_be64(nla));
	u64 max = (u64)(~((u64)0));

	max = div_u64(max, NSEC_PER_MSEC);
	if (ms >= max)
		return -ERANGE;

	ms *= NSEC_PER_MSEC;
	*result = nsecs_to_jiffies64(ms);
	return 0;
}

3245
static __be64 nf_jiffies64_to_msecs(u64 input)
3246
{
3247
	return cpu_to_be64(jiffies64_to_msecs(input));
3248 3249
}

3250 3251 3252 3253 3254
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
			      const struct nft_set *set, u16 event, u16 flags)
{
	struct nfgenmsg *nfmsg;
	struct nlmsghdr *nlh;
3255
	struct nlattr *desc;
3256 3257
	u32 portid = ctx->portid;
	u32 seq = ctx->seq;
3258

3259
	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
3260 3261 3262 3263 3264 3265
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
			flags);
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
3266
	nfmsg->nfgen_family	= ctx->family;
3267
	nfmsg->version		= NFNETLINK_V0;
3268
	nfmsg->res_id		= htons(ctx->net->nft.base_seq & 0xffff);
3269 3270 3271 3272 3273

	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
		goto nla_put_failure;
	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
		goto nla_put_failure;
3274 3275 3276
	if (nla_put_be64(skb, NFTA_SET_HANDLE, cpu_to_be64(set->handle),
			 NFTA_SET_PAD))
		goto nla_put_failure;
3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290
	if (set->flags != 0)
		if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
			goto nla_put_failure;

	if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype)))
		goto nla_put_failure;
	if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen)))
		goto nla_put_failure;
	if (set->flags & NFT_SET_MAP) {
		if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype)))
			goto nla_put_failure;
		if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
			goto nla_put_failure;
	}
3291 3292 3293
	if (set->flags & NFT_SET_OBJECT &&
	    nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
		goto nla_put_failure;
3294

3295
	if (set->timeout &&
3296
	    nla_put_be64(skb, NFTA_SET_TIMEOUT,
3297
			 nf_jiffies64_to_msecs(set->timeout),
3298
			 NFTA_SET_PAD))
3299 3300 3301 3302 3303
		goto nla_put_failure;
	if (set->gc_int &&
	    nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
		goto nla_put_failure;

3304 3305 3306 3307 3308
	if (set->policy != NFT_SET_POL_PERFORMANCE) {
		if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
			goto nla_put_failure;
	}

3309 3310 3311
	if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata))
		goto nla_put_failure;

3312
	desc = nla_nest_start_noflag(skb, NFTA_SET_DESC);
3313 3314 3315 3316 3317 3318 3319
	if (desc == NULL)
		goto nla_put_failure;
	if (set->size &&
	    nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
		goto nla_put_failure;
	nla_nest_end(skb, desc);

3320 3321
	nlmsg_end(skb, nlh);
	return 0;
3322 3323 3324 3325 3326 3327

nla_put_failure:
	nlmsg_trim(skb, nlh);
	return -1;
}

3328 3329 3330
static void nf_tables_set_notify(const struct nft_ctx *ctx,
				 const struct nft_set *set, int event,
			         gfp_t gfp_flags)
3331 3332
{
	struct sk_buff *skb;
3333
	u32 portid = ctx->portid;
3334 3335
	int err;

3336 3337
	if (!ctx->report &&
	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
3338
		return;
3339

3340
	skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
3341 3342 3343 3344 3345 3346 3347 3348 3349
	if (skb == NULL)
		goto err;

	err = nf_tables_fill_set(skb, ctx, set, event, 0);
	if (err < 0) {
		kfree_skb(skb);
		goto err;
	}

3350 3351 3352
	nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report,
		       gfp_flags);
	return;
3353
err:
3354
	nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
3355 3356
}

3357
static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
3358 3359 3360 3361 3362
{
	const struct nft_set *set;
	unsigned int idx, s_idx = cb->args[0];
	struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
	struct net *net = sock_net(skb->sk);
3363
	struct nft_ctx *ctx = cb->data, ctx_set;
3364 3365 3366 3367

	if (cb->args[1])
		return skb->len;

3368
	rcu_read_lock();
3369 3370
	cb->seq = net->nft.base_seq;

3371 3372
	list_for_each_entry_rcu(table, &net->nft.tables, list) {
		if (ctx->family != NFPROTO_UNSPEC &&
3373
		    ctx->family != table->family)
3374 3375 3376
			continue;

		if (ctx->table && ctx->table != table)
3377 3378
			continue;

3379 3380
		if (cur_table) {
			if (cur_table != table)
3381 3382
				continue;

3383
			cur_table = NULL;
3384
		}
3385 3386 3387 3388 3389 3390
		idx = 0;
		list_for_each_entry_rcu(set, &table->sets, list) {
			if (idx < s_idx)
				goto cont;
			if (!nft_is_active(net, set))
				goto cont;
3391

3392 3393
			ctx_set = *ctx;
			ctx_set.table = table;
3394
			ctx_set.family = table->family;
3395

3396 3397 3398 3399 3400 3401
			if (nf_tables_fill_set(skb, &ctx_set, set,
					       NFT_MSG_NEWSET,
					       NLM_F_MULTI) < 0) {
				cb->args[0] = idx;
				cb->args[2] = (unsigned long) table;
				goto done;
3402
			}
3403
			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
3404
cont:
3405
			idx++;
3406
		}
3407 3408
		if (s_idx)
			s_idx = 0;
3409 3410 3411
	}
	cb->args[1] = 1;
done:
3412
	rcu_read_unlock();
3413 3414 3415
	return skb->len;
}

3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427
static int nf_tables_dump_sets_start(struct netlink_callback *cb)
{
	struct nft_ctx *ctx_dump = NULL;

	ctx_dump = kmemdup(cb->data, sizeof(*ctx_dump), GFP_ATOMIC);
	if (ctx_dump == NULL)
		return -ENOMEM;

	cb->data = ctx_dump;
	return 0;
}

3428
static int nf_tables_dump_sets_done(struct netlink_callback *cb)
3429
{
3430 3431
	kfree(cb->data);
	return 0;
3432 3433
}

3434
/* called with rcu_read_lock held */
3435 3436
static int nf_tables_getset(struct net *net, struct sock *nlsk,
			    struct sk_buff *skb, const struct nlmsghdr *nlh,
3437 3438
			    const struct nlattr * const nla[],
			    struct netlink_ext_ack *extack)
3439
{
3440
	u8 genmask = nft_genmask_cur(net);
3441 3442 3443
	const struct nft_set *set;
	struct nft_ctx ctx;
	struct sk_buff *skb2;
3444
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3445 3446
	int err;

S
stephen hemminger 已提交
3447
	/* Verify existence before starting dump */
3448 3449
	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
					genmask);
3450 3451 3452 3453 3454
	if (err < 0)
		return err;

	if (nlh->nlmsg_flags & NLM_F_DUMP) {
		struct netlink_dump_control c = {
3455
			.start = nf_tables_dump_sets_start,
3456
			.dump = nf_tables_dump_sets,
3457
			.done = nf_tables_dump_sets_done,
3458
			.data = &ctx,
3459
			.module = THIS_MODULE,
3460
		};
3461

3462
		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
3463 3464
	}

3465 3466 3467
	/* Only accept unspec with dump */
	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
		return -EAFNOSUPPORT;
3468 3469
	if (!nla[NFTA_SET_TABLE])
		return -EINVAL;
3470

3471
	set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
3472 3473 3474
	if (IS_ERR(set))
		return PTR_ERR(set);

3475
	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489
	if (skb2 == NULL)
		return -ENOMEM;

	err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0);
	if (err < 0)
		goto err;

	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);

err:
	kfree_skb(skb2);
	return err;
}

3490
static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
3491 3492 3493 3494 3495
				    const struct nlattr *nla)
{
	struct nlattr *da[NFTA_SET_DESC_MAX + 1];
	int err;

3496 3497
	err = nla_parse_nested_deprecated(da, NFTA_SET_DESC_MAX, nla,
					  nft_set_desc_policy, NULL);
3498 3499 3500 3501 3502 3503 3504 3505 3506
	if (err < 0)
		return err;

	if (da[NFTA_SET_DESC_SIZE] != NULL)
		desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));

	return 0;
}

3507 3508
static int nf_tables_newset(struct net *net, struct sock *nlsk,
			    struct sk_buff *skb, const struct nlmsghdr *nlh,
3509 3510
			    const struct nlattr * const nla[],
			    struct netlink_ext_ack *extack)
3511 3512
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3513
	u8 genmask = nft_genmask_next(net);
3514
	int family = nfmsg->nfgen_family;
3515 3516 3517 3518
	const struct nft_set_ops *ops;
	struct nft_table *table;
	struct nft_set *set;
	struct nft_ctx ctx;
3519
	char *name;
3520
	u64 size;
3521
	u64 timeout;
3522
	u32 ktype, dtype, flags, policy, gc_int, objtype;
3523
	struct nft_set_desc desc;
3524 3525
	unsigned char *udata;
	u16 udlen;
3526 3527 3528 3529
	int err;

	if (nla[NFTA_SET_TABLE] == NULL ||
	    nla[NFTA_SET_NAME] == NULL ||
3530 3531
	    nla[NFTA_SET_KEY_LEN] == NULL ||
	    nla[NFTA_SET_ID] == NULL)
3532 3533
		return -EINVAL;

3534 3535
	memset(&desc, 0, sizeof(desc));

3536 3537 3538 3539 3540 3541 3542
	ktype = NFT_DATA_VALUE;
	if (nla[NFTA_SET_KEY_TYPE] != NULL) {
		ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
		if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
			return -EINVAL;
	}

3543
	desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
3544
	if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN)
3545 3546 3547 3548 3549 3550
		return -EINVAL;

	flags = 0;
	if (nla[NFTA_SET_FLAGS] != NULL) {
		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
3551
			      NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
3552 3553
			      NFT_SET_MAP | NFT_SET_EVAL |
			      NFT_SET_OBJECT))
3554
			return -EINVAL;
3555 3556 3557
		/* Only one of these operations is supported */
		if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) ==
			     (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT))
3558
			return -EOPNOTSUPP;
3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573
	}

	dtype = 0;
	if (nla[NFTA_SET_DATA_TYPE] != NULL) {
		if (!(flags & NFT_SET_MAP))
			return -EINVAL;

		dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
		if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
		    dtype != NFT_DATA_VERDICT)
			return -EINVAL;

		if (dtype != NFT_DATA_VERDICT) {
			if (nla[NFTA_SET_DATA_LEN] == NULL)
				return -EINVAL;
3574
			desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
3575
			if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN)
3576 3577
				return -EINVAL;
		} else
3578
			desc.dlen = sizeof(struct nft_verdict);
3579 3580 3581
	} else if (flags & NFT_SET_MAP)
		return -EINVAL;

3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594
	if (nla[NFTA_SET_OBJ_TYPE] != NULL) {
		if (!(flags & NFT_SET_OBJECT))
			return -EINVAL;

		objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
		if (objtype == NFT_OBJECT_UNSPEC ||
		    objtype > NFT_OBJECT_MAX)
			return -EINVAL;
	} else if (flags & NFT_SET_OBJECT)
		return -EINVAL;
	else
		objtype = NFT_OBJECT_UNSPEC;

3595 3596 3597 3598
	timeout = 0;
	if (nla[NFTA_SET_TIMEOUT] != NULL) {
		if (!(flags & NFT_SET_TIMEOUT))
			return -EINVAL;
3599 3600 3601 3602

		err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
		if (err)
			return err;
3603 3604 3605 3606 3607 3608 3609 3610
	}
	gc_int = 0;
	if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
		if (!(flags & NFT_SET_TIMEOUT))
			return -EINVAL;
		gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
	}

3611 3612 3613 3614 3615
	policy = NFT_SET_POL_PERFORMANCE;
	if (nla[NFTA_SET_POLICY] != NULL)
		policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));

	if (nla[NFTA_SET_DESC] != NULL) {
3616
		err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
3617 3618 3619 3620
		if (err < 0)
			return err;
	}

3621
	table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask);
3622 3623
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
3624
		return PTR_ERR(table);
3625
	}
3626

3627
	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
3628

3629
	set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
3630
	if (IS_ERR(set)) {
3631 3632
		if (PTR_ERR(set) != -ENOENT) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
3633
			return PTR_ERR(set);
3634
		}
3635
	} else {
3636 3637
		if (nlh->nlmsg_flags & NLM_F_EXCL) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
3638
			return -EEXIST;
3639
		}
3640 3641
		if (nlh->nlmsg_flags & NLM_F_REPLACE)
			return -EOPNOTSUPP;
3642

3643 3644 3645 3646 3647 3648
		return 0;
	}

	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
		return -ENOENT;

3649
	ops = nft_select_set_ops(&ctx, nla, &desc, policy);
3650 3651 3652
	if (IS_ERR(ops))
		return PTR_ERR(ops);

3653 3654 3655 3656
	udlen = 0;
	if (nla[NFTA_SET_USERDATA])
		udlen = nla_len(nla[NFTA_SET_USERDATA]);

3657 3658
	size = 0;
	if (ops->privsize != NULL)
3659
		size = ops->privsize(nla, &desc);
3660

3661 3662 3663
	set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
	if (!set) {
		err = -ENOMEM;
3664
		goto err1;
3665
	}
3666

3667 3668 3669 3670 3671 3672
	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
	if (!name) {
		err = -ENOMEM;
		goto err2;
	}

3673
	err = nf_tables_set_alloc_name(&ctx, set, name);
3674
	kfree(name);
3675 3676 3677
	if (err < 0)
		goto err2;

3678 3679 3680 3681 3682 3683
	udata = NULL;
	if (udlen) {
		udata = set->data + size;
		nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
	}

3684
	INIT_LIST_HEAD(&set->bindings);
3685 3686
	set->table = table;
	write_pnet(&set->net, net);
3687 3688
	set->ops   = ops;
	set->ktype = ktype;
3689
	set->klen  = desc.klen;
3690
	set->dtype = dtype;
3691
	set->objtype = objtype;
3692
	set->dlen  = desc.dlen;
3693
	set->flags = flags;
3694
	set->size  = desc.size;
3695
	set->policy = policy;
3696 3697
	set->udlen  = udlen;
	set->udata  = udata;
3698 3699
	set->timeout = timeout;
	set->gc_int = gc_int;
3700
	set->handle = nf_tables_alloc_handle(table);
3701

3702
	err = ops->init(set, &desc, nla);
3703
	if (err < 0)
3704
		goto err3;
3705

3706
	err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
3707
	if (err < 0)
3708
		goto err4;
3709

3710
	list_add_tail_rcu(&set->list, &table->sets);
3711
	table->use++;
3712 3713
	return 0;

3714
err4:
3715
	ops->destroy(set);
3716 3717
err3:
	kfree(set->name);
3718
err2:
3719
	kvfree(set);
3720
err1:
3721
	module_put(to_set_type(ops)->owner);
3722 3723 3724
	return err;
}

3725
static void nft_set_destroy(struct nft_set *set)
3726
{
3727 3728 3729
	if (WARN_ON(set->use > 0))
		return;

3730
	set->ops->destroy(set);
3731
	module_put(to_set_type(set->ops)->owner);
3732
	kfree(set->name);
3733
	kvfree(set);
3734 3735
}

3736 3737
static int nf_tables_delset(struct net *net, struct sock *nlsk,
			    struct sk_buff *skb, const struct nlmsghdr *nlh,
3738 3739
			    const struct nlattr * const nla[],
			    struct netlink_ext_ack *extack)
3740
{
3741
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3742
	u8 genmask = nft_genmask_next(net);
3743
	const struct nlattr *attr;
3744 3745 3746 3747
	struct nft_set *set;
	struct nft_ctx ctx;
	int err;

3748 3749
	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
		return -EAFNOSUPPORT;
3750 3751 3752
	if (nla[NFTA_SET_TABLE] == NULL)
		return -EINVAL;

3753 3754
	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
					genmask);
3755 3756 3757
	if (err < 0)
		return err;

3758 3759 3760 3761 3762 3763 3764
	if (nla[NFTA_SET_HANDLE]) {
		attr = nla[NFTA_SET_HANDLE];
		set = nft_set_lookup_byhandle(ctx.table, attr, genmask);
	} else {
		attr = nla[NFTA_SET_NAME];
		set = nft_set_lookup(ctx.table, attr, genmask);
	}
3765

3766 3767 3768 3769
	if (IS_ERR(set)) {
		NL_SET_BAD_ATTR(extack, attr);
		return PTR_ERR(set);
	}
3770
	if (set->use ||
3771 3772
	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
		NL_SET_BAD_ATTR(extack, attr);
3773
		return -EBUSY;
3774
	}
3775

3776
	return nft_delset(&ctx, set);
3777 3778 3779
}

static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
3780
					struct nft_set *set,
3781
					const struct nft_set_iter *iter,
3782
					struct nft_set_elem *elem)
3783
{
3784
	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
3785 3786 3787
	enum nft_registers dreg;

	dreg = nft_type_to_reg(set->dtype);
3788 3789 3790 3791
	return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext),
					   set->dtype == NFT_DATA_VERDICT ?
					   NFT_DATA_VERDICT : NFT_DATA_VALUE,
					   set->dlen);
3792 3793 3794 3795 3796 3797 3798 3799
}

int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
		       struct nft_set_binding *binding)
{
	struct nft_set_binding *i;
	struct nft_set_iter iter;

3800 3801 3802
	if (set->use == UINT_MAX)
		return -EOVERFLOW;

3803
	if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
3804 3805
		return -EBUSY;

3806
	if (binding->flags & NFT_SET_MAP) {
3807 3808 3809 3810
		/* If the set is already bound to the same chain all
		 * jumps are already validated for that chain.
		 */
		list_for_each_entry(i, &set->bindings, list) {
3811
			if (i->flags & NFT_SET_MAP &&
3812
			    i->chain == binding->chain)
3813 3814 3815
				goto bind;
		}

3816
		iter.genmask	= nft_genmask_next(ctx->net);
3817 3818 3819 3820 3821 3822
		iter.skip 	= 0;
		iter.count	= 0;
		iter.err	= 0;
		iter.fn		= nf_tables_bind_check_setelem;

		set->ops->walk(ctx, set, &iter);
3823
		if (iter.err < 0)
3824 3825 3826 3827
			return iter.err;
	}
bind:
	binding->chain = ctx->chain;
3828
	list_add_tail_rcu(&binding->list, &set->bindings);
3829
	nft_set_trans_bind(ctx, set);
3830
	set->use++;
3831

3832 3833
	return 0;
}
3834
EXPORT_SYMBOL_GPL(nf_tables_bind_set);
3835

3836 3837
static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
				 struct nft_set_binding *binding, bool event)
3838
{
3839
	list_del_rcu(&binding->list);
3840

3841
	if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
3842
		list_del_rcu(&set->list);
3843 3844 3845 3846
		if (event)
			nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
					     GFP_KERNEL);
	}
3847 3848
}

3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867
void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
			      struct nft_set_binding *binding,
			      enum nft_trans_phase phase)
{
	switch (phase) {
	case NFT_TRANS_PREPARE:
		set->use--;
		return;
	case NFT_TRANS_ABORT:
	case NFT_TRANS_RELEASE:
		set->use--;
		/* fall through */
	default:
		nf_tables_unbind_set(ctx, set, binding,
				     phase == NFT_TRANS_COMMIT);
	}
}
EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);

3868 3869
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
{
3870
	if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
3871 3872 3873 3874
		nft_set_destroy(set);
}
EXPORT_SYMBOL_GPL(nf_tables_destroy_set);

3875 3876
const struct nft_set_ext_type nft_set_ext_types[] = {
	[NFT_SET_EXT_KEY]		= {
3877
		.align	= __alignof__(u32),
3878 3879
	},
	[NFT_SET_EXT_DATA]		= {
3880
		.align	= __alignof__(u32),
3881
	},
3882 3883 3884
	[NFT_SET_EXT_EXPR]		= {
		.align	= __alignof__(struct nft_expr),
	},
3885 3886 3887 3888
	[NFT_SET_EXT_OBJREF]		= {
		.len	= sizeof(struct nft_object *),
		.align	= __alignof__(struct nft_object *),
	},
3889 3890 3891 3892
	[NFT_SET_EXT_FLAGS]		= {
		.len	= sizeof(u8),
		.align	= __alignof__(u8),
	},
3893 3894 3895 3896 3897
	[NFT_SET_EXT_TIMEOUT]		= {
		.len	= sizeof(u64),
		.align	= __alignof__(u64),
	},
	[NFT_SET_EXT_EXPIRATION]	= {
3898 3899
		.len	= sizeof(u64),
		.align	= __alignof__(u64),
3900
	},
3901 3902 3903 3904
	[NFT_SET_EXT_USERDATA]		= {
		.len	= sizeof(struct nft_userdata),
		.align	= __alignof__(struct nft_userdata),
	},
3905 3906 3907
};
EXPORT_SYMBOL_GPL(nft_set_ext_types);

3908 3909 3910 3911 3912 3913 3914 3915
/*
 * Set elements
 */

static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
	[NFTA_SET_ELEM_KEY]		= { .type = NLA_NESTED },
	[NFTA_SET_ELEM_DATA]		= { .type = NLA_NESTED },
	[NFTA_SET_ELEM_FLAGS]		= { .type = NLA_U32 },
3916
	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
3917
	[NFTA_SET_ELEM_EXPIRATION]	= { .type = NLA_U64 },
3918 3919
	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
					    .len = NFT_USERDATA_MAXLEN },
3920 3921
	[NFTA_SET_ELEM_EXPR]		= { .type = NLA_NESTED },
	[NFTA_SET_ELEM_OBJREF]		= { .type = NLA_STRING },
3922 3923 3924
};

static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
3925 3926 3927 3928
	[NFTA_SET_ELEM_LIST_TABLE]	= { .type = NLA_STRING,
					    .len = NFT_TABLE_MAXNAMELEN - 1 },
	[NFTA_SET_ELEM_LIST_SET]	= { .type = NLA_STRING,
					    .len = NFT_SET_MAXNAMELEN - 1 },
3929
	[NFTA_SET_ELEM_LIST_ELEMENTS]	= { .type = NLA_NESTED },
3930
	[NFTA_SET_ELEM_LIST_SET_ID]	= { .type = NLA_U32 },
3931 3932
};

3933
static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
3934 3935
				      const struct sk_buff *skb,
				      const struct nlmsghdr *nlh,
3936
				      const struct nlattr * const nla[],
3937
				      struct netlink_ext_ack *extack,
3938
				      u8 genmask)
3939 3940
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
3941
	int family = nfmsg->nfgen_family;
3942
	struct nft_table *table;
3943

3944 3945
	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
				 genmask);
3946 3947
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
3948
		return PTR_ERR(table);
3949
	}
3950

3951
	nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
3952 3953 3954 3955 3956 3957 3958
	return 0;
}

static int nf_tables_fill_setelem(struct sk_buff *skb,
				  const struct nft_set *set,
				  const struct nft_set_elem *elem)
{
3959
	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
3960 3961 3962
	unsigned char *b = skb_tail_pointer(skb);
	struct nlattr *nest;

3963
	nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM);
3964 3965 3966
	if (nest == NULL)
		goto nla_put_failure;

3967 3968
	if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
			  NFT_DATA_VALUE, set->klen) < 0)
3969 3970
		goto nla_put_failure;

3971 3972
	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
	    nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
3973 3974 3975 3976
			  set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
			  set->dlen) < 0)
		goto nla_put_failure;

3977 3978 3979 3980
	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) &&
	    nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0)
		goto nla_put_failure;

3981 3982
	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
	    nla_put_string(skb, NFTA_SET_ELEM_OBJREF,
3983
			   (*nft_set_ext_obj(ext))->key.name) < 0)
3984 3985
		goto nla_put_failure;

3986 3987 3988 3989
	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
	    nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
		         htonl(*nft_set_ext_flags(ext))))
		goto nla_put_failure;
3990

3991 3992
	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
	    nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
3993
			 nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
3994
			 NFTA_SET_ELEM_PAD))
3995 3996 3997
		goto nla_put_failure;

	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
3998
		u64 expires, now = get_jiffies_64();
3999 4000

		expires = *nft_set_ext_expiration(ext);
4001
		if (time_before64(now, expires))
4002 4003 4004 4005 4006
			expires -= now;
		else
			expires = 0;

		if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
4007
				 nf_jiffies64_to_msecs(expires),
4008
				 NFTA_SET_ELEM_PAD))
4009 4010 4011
			goto nla_put_failure;
	}

4012 4013 4014 4015 4016 4017 4018 4019 4020
	if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
		struct nft_userdata *udata;

		udata = nft_set_ext_userdata(ext);
		if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
			    udata->len + 1, udata->data))
			goto nla_put_failure;
	}

4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035
	nla_nest_end(skb, nest);
	return 0;

nla_put_failure:
	nlmsg_trim(skb, b);
	return -EMSGSIZE;
}

struct nft_set_dump_args {
	const struct netlink_callback	*cb;
	struct nft_set_iter		iter;
	struct sk_buff			*skb;
};

static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
4036
				  struct nft_set *set,
4037
				  const struct nft_set_iter *iter,
4038
				  struct nft_set_elem *elem)
4039 4040 4041 4042 4043 4044 4045
{
	struct nft_set_dump_args *args;

	args = container_of(iter, struct nft_set_dump_args, iter);
	return nf_tables_fill_setelem(args->skb, set, elem);
}

4046 4047 4048 4049 4050
struct nft_set_dump_ctx {
	const struct nft_set	*set;
	struct nft_ctx		ctx;
};

4051 4052
static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
4053
	struct nft_set_dump_ctx *dump_ctx = cb->data;
4054
	struct net *net = sock_net(skb->sk);
4055
	struct nft_table *table;
4056
	struct nft_set *set;
4057
	struct nft_set_dump_args args;
4058
	bool set_found = false;
4059 4060 4061 4062
	struct nfgenmsg *nfmsg;
	struct nlmsghdr *nlh;
	struct nlattr *nest;
	u32 portid, seq;
4063
	int event;
4064

4065
	rcu_read_lock();
4066 4067
	list_for_each_entry_rcu(table, &net->nft.tables, list) {
		if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
4068
		    dump_ctx->ctx.family != table->family)
4069
			continue;
4070

4071 4072
		if (table != dump_ctx->ctx.table)
			continue;
4073

4074 4075 4076 4077
		list_for_each_entry_rcu(set, &table->sets, list) {
			if (set == dump_ctx->set) {
				set_found = true;
				break;
4078 4079 4080 4081 4082 4083 4084 4085 4086
			}
		}
		break;
	}

	if (!set_found) {
		rcu_read_unlock();
		return -ENOENT;
	}
4087

4088
	event  = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWSETELEM);
4089 4090 4091 4092 4093 4094 4095 4096 4097
	portid = NETLINK_CB(cb->skb).portid;
	seq    = cb->nlh->nlmsg_seq;

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
			NLM_F_MULTI);
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
4098
	nfmsg->nfgen_family = table->family;
4099
	nfmsg->version      = NFNETLINK_V0;
4100
	nfmsg->res_id	    = htons(net->nft.base_seq & 0xffff);
4101

4102
	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
4103 4104 4105 4106
		goto nla_put_failure;
	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
		goto nla_put_failure;

4107
	nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
4108 4109 4110
	if (nest == NULL)
		goto nla_put_failure;

4111 4112
	args.cb			= cb;
	args.skb		= skb;
4113
	args.iter.genmask	= nft_genmask_cur(net);
4114 4115 4116 4117
	args.iter.skip		= cb->args[0];
	args.iter.count		= 0;
	args.iter.err		= 0;
	args.iter.fn		= nf_tables_dump_setelem;
4118 4119
	set->ops->walk(&dump_ctx->ctx, set, &args.iter);
	rcu_read_unlock();
4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132

	nla_nest_end(skb, nest);
	nlmsg_end(skb, nlh);

	if (args.iter.err && args.iter.err != -EMSGSIZE)
		return args.iter.err;
	if (args.iter.count == cb->args[0])
		return 0;

	cb->args[0] = args.iter.count;
	return skb->len;

nla_put_failure:
4133
	rcu_read_unlock();
4134 4135 4136
	return -ENOSPC;
}

4137 4138 4139 4140 4141 4142 4143 4144 4145
static int nf_tables_dump_set_start(struct netlink_callback *cb)
{
	struct nft_set_dump_ctx *dump_ctx = cb->data;

	cb->data = kmemdup(dump_ctx, sizeof(*dump_ctx), GFP_ATOMIC);

	return cb->data ? 0 : -ENOMEM;
}

4146 4147 4148 4149 4150 4151
static int nf_tables_dump_set_done(struct netlink_callback *cb)
{
	kfree(cb->data);
	return 0;
}

4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162
static int nf_tables_fill_setelem_info(struct sk_buff *skb,
				       const struct nft_ctx *ctx, u32 seq,
				       u32 portid, int event, u16 flags,
				       const struct nft_set *set,
				       const struct nft_set_elem *elem)
{
	struct nfgenmsg *nfmsg;
	struct nlmsghdr *nlh;
	struct nlattr *nest;
	int err;

4163
	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
4164 4165 4166 4167 4168 4169
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
			flags);
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
4170
	nfmsg->nfgen_family	= ctx->family;
4171
	nfmsg->version		= NFNETLINK_V0;
4172
	nfmsg->res_id		= htons(ctx->net->nft.base_seq & 0xffff);
4173 4174 4175 4176 4177 4178

	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
		goto nla_put_failure;
	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
		goto nla_put_failure;

4179
	nest = nla_nest_start_noflag(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
4180 4181 4182 4183 4184 4185 4186 4187 4188
	if (nest == NULL)
		goto nla_put_failure;

	err = nf_tables_fill_setelem(skb, set, elem);
	if (err < 0)
		goto nla_put_failure;

	nla_nest_end(skb, nest);

4189 4190
	nlmsg_end(skb, nlh);
	return 0;
4191 4192 4193 4194 4195 4196

nla_put_failure:
	nlmsg_trim(skb, nlh);
	return -1;
}

4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223
static int nft_setelem_parse_flags(const struct nft_set *set,
				   const struct nlattr *attr, u32 *flags)
{
	if (attr == NULL)
		return 0;

	*flags = ntohl(nla_get_be32(attr));
	if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
		return -EINVAL;
	if (!(set->flags & NFT_SET_INTERVAL) &&
	    *flags & NFT_SET_ELEM_INTERVAL_END)
		return -EINVAL;

	return 0;
}

static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
			    const struct nlattr *attr)
{
	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
	struct nft_data_desc desc;
	struct nft_set_elem elem;
	struct sk_buff *skb;
	uint32_t flags = 0;
	void *priv;
	int err;

4224 4225
	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
					  nft_set_elem_policy, NULL);
4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251
	if (err < 0)
		return err;

	if (!nla[NFTA_SET_ELEM_KEY])
		return -EINVAL;

	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
	if (err < 0)
		return err;

	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
			    nla[NFTA_SET_ELEM_KEY]);
	if (err < 0)
		return err;

	err = -EINVAL;
	if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
		return err;

	priv = set->ops->get(ctx->net, set, &elem, flags);
	if (IS_ERR(priv))
		return PTR_ERR(priv);

	elem.priv = priv;

	err = -ENOMEM;
4252
	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273
	if (skb == NULL)
		goto err1;

	err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
					  NFT_MSG_NEWSETELEM, 0, set, &elem);
	if (err < 0)
		goto err2;

	err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT);
	/* This avoids a loop in nfnetlink. */
	if (err < 0)
		goto err1;

	return 0;
err2:
	kfree_skb(skb);
err1:
	/* this avoids a loop in nfnetlink. */
	return err == -EAGAIN ? -ENOBUFS : err;
}

4274
/* called with rcu_read_lock held */
4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285
static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
				struct sk_buff *skb, const struct nlmsghdr *nlh,
				const struct nlattr * const nla[],
				struct netlink_ext_ack *extack)
{
	u8 genmask = nft_genmask_cur(net);
	struct nft_set *set;
	struct nlattr *attr;
	struct nft_ctx ctx;
	int rem, err = 0;

4286 4287
	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
					 genmask);
4288 4289 4290
	if (err < 0)
		return err;

4291
	set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
4292 4293 4294 4295 4296
	if (IS_ERR(set))
		return PTR_ERR(set);

	if (nlh->nlmsg_flags & NLM_F_DUMP) {
		struct netlink_dump_control c = {
4297
			.start = nf_tables_dump_set_start,
4298 4299
			.dump = nf_tables_dump_set,
			.done = nf_tables_dump_set_done,
4300
			.module = THIS_MODULE,
4301
		};
4302 4303 4304 4305
		struct nft_set_dump_ctx dump_ctx = {
			.set = set,
			.ctx = ctx,
		};
4306

4307
		c.data = &dump_ctx;
4308
		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322
	}

	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
		return -EINVAL;

	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
		err = nft_get_set_elem(&ctx, set, attr);
		if (err < 0)
			break;
	}

	return err;
}

4323 4324 4325 4326
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
				     const struct nft_set *set,
				     const struct nft_set_elem *elem,
				     int event, u16 flags)
4327
{
4328 4329
	struct net *net = ctx->net;
	u32 portid = ctx->portid;
4330 4331 4332
	struct sk_buff *skb;
	int err;

4333
	if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
4334
		return;
4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346

	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
	if (skb == NULL)
		goto err;

	err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
					  set, elem);
	if (err < 0) {
		kfree_skb(skb);
		goto err;
	}

4347 4348 4349
	nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
		       GFP_KERNEL);
	return;
4350
err:
4351
	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
4352 4353
}

4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367
static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
					      int msg_type,
					      struct nft_set *set)
{
	struct nft_trans *trans;

	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
	if (trans == NULL)
		return NULL;

	nft_trans_elem_set(trans) = set;
	return trans;
}

4368 4369
void *nft_set_elem_init(const struct nft_set *set,
			const struct nft_set_ext_tmpl *tmpl,
4370
			const u32 *key, const u32 *data,
4371
			u64 timeout, u64 expiration, gfp_t gfp)
4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385
{
	struct nft_set_ext *ext;
	void *elem;

	elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
	if (elem == NULL)
		return NULL;

	ext = nft_set_elem_ext(set, elem);
	nft_set_ext_init(ext, tmpl);

	memcpy(nft_set_ext_key(ext), key, set->klen);
	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
		memcpy(nft_set_ext_data(ext), data, set->dlen);
4386 4387 4388 4389 4390
	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
		*nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
		if (expiration == 0)
			*nft_set_ext_expiration(ext) += timeout;
	}
4391 4392
	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
		*nft_set_ext_timeout(ext) = timeout;
4393 4394 4395 4396

	return elem;
}

4397 4398
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
			  bool destroy_expr)
4399 4400
{
	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
4401 4402 4403 4404
	struct nft_ctx ctx = {
		.net	= read_pnet(&set->net),
		.family	= set->table->family,
	};
4405

4406
	nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
4407
	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
4408
		nft_data_release(nft_set_ext_data(ext), set->dtype);
4409 4410 4411 4412 4413 4414 4415 4416 4417 4418
	if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) {
		struct nft_expr *expr = nft_set_ext_expr(ext);

		if (expr->ops->destroy_clone) {
			expr->ops->destroy_clone(&ctx, expr);
			module_put(expr->ops->type->owner);
		} else {
			nf_tables_expr_destroy(&ctx, expr);
		}
	}
4419 4420
	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
		(*nft_set_ext_obj(ext))->use--;
4421 4422 4423 4424
	kfree(elem);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);

4425 4426 4427
/* Only called from commit path, nft_set_elem_deactivate() already deals with
 * the refcounting from the preparation phase.
 */
4428 4429
static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
				       const struct nft_set *set, void *elem)
4430 4431 4432 4433
{
	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);

	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
4434
		nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext));
4435 4436 4437
	kfree(elem);
}

4438
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
4439
			    const struct nlattr *attr, u32 nlmsg_flags)
4440 4441
{
	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
4442
	u8 genmask = nft_genmask_next(ctx->net);
4443
	struct nft_data_desc d1, d2;
4444
	struct nft_set_ext_tmpl tmpl;
4445
	struct nft_set_ext *ext, *ext2;
4446 4447
	struct nft_set_elem elem;
	struct nft_set_binding *binding;
4448
	struct nft_object *obj = NULL;
4449
	struct nft_userdata *udata;
4450
	struct nft_data data;
4451
	enum nft_registers dreg;
4452
	struct nft_trans *trans;
4453
	u32 flags = 0;
4454
	u64 timeout;
4455
	u64 expiration;
4456
	u8 ulen;
4457 4458
	int err;

4459 4460
	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
					  nft_set_elem_policy, NULL);
4461 4462 4463 4464 4465 4466
	if (err < 0)
		return err;

	if (nla[NFTA_SET_ELEM_KEY] == NULL)
		return -EINVAL;

4467 4468
	nft_set_ext_prepare(&tmpl);

4469 4470 4471 4472 4473
	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
	if (err < 0)
		return err;
	if (flags != 0)
		nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
4474 4475 4476

	if (set->flags & NFT_SET_MAP) {
		if (nla[NFTA_SET_ELEM_DATA] == NULL &&
4477
		    !(flags & NFT_SET_ELEM_INTERVAL_END))
4478
			return -EINVAL;
4479
		if (nla[NFTA_SET_ELEM_DATA] != NULL &&
4480
		    flags & NFT_SET_ELEM_INTERVAL_END)
4481
			return -EINVAL;
4482 4483 4484 4485 4486
	} else {
		if (nla[NFTA_SET_ELEM_DATA] != NULL)
			return -EINVAL;
	}

4487 4488 4489 4490
	timeout = 0;
	if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
		if (!(set->flags & NFT_SET_TIMEOUT))
			return -EINVAL;
4491 4492 4493 4494
		err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_TIMEOUT],
					    &timeout);
		if (err)
			return err;
4495 4496 4497 4498
	} else if (set->flags & NFT_SET_TIMEOUT) {
		timeout = set->timeout;
	}

4499 4500 4501 4502 4503 4504 4505 4506 4507 4508
	expiration = 0;
	if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) {
		if (!(set->flags & NFT_SET_TIMEOUT))
			return -EINVAL;
		err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION],
					    &expiration);
		if (err)
			return err;
	}

4509
	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
4510
			    nla[NFTA_SET_ELEM_KEY]);
4511 4512 4513 4514 4515 4516
	if (err < 0)
		goto err1;
	err = -EINVAL;
	if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
		goto err2;

4517
	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len);
4518 4519 4520 4521 4522
	if (timeout > 0) {
		nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
		if (timeout != set->timeout)
			nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
	}
4523

4524 4525 4526 4527 4528
	if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
		if (!(set->flags & NFT_SET_OBJECT)) {
			err = -EINVAL;
			goto err2;
		}
4529 4530
		obj = nft_obj_lookup(ctx->net, ctx->table,
				     nla[NFTA_SET_ELEM_OBJREF],
4531
				     set->objtype, genmask);
4532 4533 4534 4535 4536 4537 4538
		if (IS_ERR(obj)) {
			err = PTR_ERR(obj);
			goto err2;
		}
		nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
	}

4539
	if (nla[NFTA_SET_ELEM_DATA] != NULL) {
4540 4541
		err = nft_data_init(ctx, &data, sizeof(data), &d2,
				    nla[NFTA_SET_ELEM_DATA]);
4542 4543 4544 4545 4546 4547 4548 4549 4550 4551
		if (err < 0)
			goto err2;

		err = -EINVAL;
		if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen)
			goto err3;

		dreg = nft_type_to_reg(set->dtype);
		list_for_each_entry(binding, &set->bindings, list) {
			struct nft_ctx bind_ctx = {
4552
				.net	= ctx->net,
4553
				.family	= ctx->family,
4554
				.table	= ctx->table,
4555
				.chain	= (struct nft_chain *)binding->chain,
4556 4557
			};

4558 4559 4560
			if (!(binding->flags & NFT_SET_MAP))
				continue;

4561 4562 4563
			err = nft_validate_register_store(&bind_ctx, dreg,
							  &data,
							  d2.type, d2.len);
4564 4565
			if (err < 0)
				goto err3;
4566 4567 4568 4569 4570 4571

			if (d2.type == NFT_DATA_VERDICT &&
			    (data.verdict.code == NFT_GOTO ||
			     data.verdict.code == NFT_JUMP))
				nft_validate_state_update(ctx->net,
							  NFT_VALIDATE_NEED);
4572
		}
4573

4574
		nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len);
4575 4576
	}

4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588
	/* The full maximum length of userdata can exceed the maximum
	 * offset value (U8_MAX) for following extensions, therefor it
	 * must be the last extension added.
	 */
	ulen = 0;
	if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
		ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
		if (ulen > 0)
			nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
					       ulen);
	}

4589
	err = -ENOMEM;
4590
	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
4591
				      timeout, expiration, GFP_KERNEL);
4592 4593 4594 4595 4596 4597
	if (elem.priv == NULL)
		goto err3;

	ext = nft_set_elem_ext(set, elem.priv);
	if (flags)
		*nft_set_ext_flags(ext) = flags;
4598 4599 4600 4601 4602
	if (ulen > 0) {
		udata = nft_set_ext_userdata(ext);
		udata->len = ulen - 1;
		nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
	}
4603 4604 4605 4606
	if (obj) {
		*nft_set_ext_obj(ext) = obj;
		obj->use++;
	}
4607

4608 4609
	trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
	if (trans == NULL)
4610
		goto err4;
4611

4612
	ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
4613 4614 4615
	err = set->ops->insert(ctx->net, set, &elem, &ext2);
	if (err) {
		if (err == -EEXIST) {
4616 4617 4618
			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
			    nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
			    nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
4619 4620 4621 4622
			    nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) {
				err = -EBUSY;
				goto err5;
			}
4623 4624 4625 4626 4627 4628 4629
			if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
			     nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
			     memcmp(nft_set_ext_data(ext),
				    nft_set_ext_data(ext2), set->dlen) != 0) ||
			    (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
			     nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) &&
			     *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2)))
4630 4631 4632 4633
				err = -EBUSY;
			else if (!(nlmsg_flags & NLM_F_EXCL))
				err = 0;
		}
4634
		goto err5;
4635
	}
4636

4637 4638 4639 4640 4641 4642
	if (set->size &&
	    !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
		err = -ENFILE;
		goto err6;
	}

4643
	nft_trans_elem(trans) = elem;
4644
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
4645 4646
	return 0;

4647
err6:
4648
	set->ops->remove(ctx->net, set, &elem);
4649
err5:
4650
	kfree(trans);
4651
err4:
4652 4653
	if (obj)
		obj->use--;
4654
	kfree(elem.priv);
4655 4656
err3:
	if (nla[NFTA_SET_ELEM_DATA] != NULL)
4657
		nft_data_release(&data, d2.type);
4658
err2:
4659
	nft_data_release(&elem.key.val, d1.type);
4660 4661 4662 4663
err1:
	return err;
}

4664 4665
static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
				struct sk_buff *skb, const struct nlmsghdr *nlh,
4666 4667
				const struct nlattr * const nla[],
				struct netlink_ext_ack *extack)
4668
{
4669
	u8 genmask = nft_genmask_next(net);
4670 4671 4672
	const struct nlattr *attr;
	struct nft_set *set;
	struct nft_ctx ctx;
4673
	int rem, err;
4674

4675 4676 4677
	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
		return -EINVAL;

4678 4679
	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
					 genmask);
4680 4681 4682
	if (err < 0)
		return err;

4683 4684 4685 4686
	set = nft_set_lookup_global(net, ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
				    nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
	if (IS_ERR(set))
		return PTR_ERR(set);
4687

4688 4689 4690 4691
	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
		return -EBUSY;

	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
4692
		err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
4693
		if (err < 0)
4694
			return err;
4695
	}
4696 4697 4698 4699 4700

	if (net->nft.validate_state == NFT_VALIDATE_DO)
		return nft_table_validate(net, ctx.table);

	return 0;
4701 4702
}

4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713
/**
 *	nft_data_hold - hold a nft_data item
 *
 *	@data: struct nft_data to release
 *	@type: type of data
 *
 *	Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded,
 *	NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and
 *	NFT_GOTO verdicts. This function must be called on active data objects
 *	from the second phase of the commit protocol.
 */
4714
void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749
{
	if (type == NFT_DATA_VERDICT) {
		switch (data->verdict.code) {
		case NFT_JUMP:
		case NFT_GOTO:
			data->verdict.chain->use++;
			break;
		}
	}
}

static void nft_set_elem_activate(const struct net *net,
				  const struct nft_set *set,
				  struct nft_set_elem *elem)
{
	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);

	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
		nft_data_hold(nft_set_ext_data(ext), set->dtype);
	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
		(*nft_set_ext_obj(ext))->use++;
}

static void nft_set_elem_deactivate(const struct net *net,
				    const struct nft_set *set,
				    struct nft_set_elem *elem)
{
	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);

	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
		nft_data_release(nft_set_ext_data(ext), set->dtype);
	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
		(*nft_set_ext_obj(ext))->use--;
}

4750
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
4751 4752 4753
			   const struct nlattr *attr)
{
	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
4754
	struct nft_set_ext_tmpl tmpl;
4755 4756
	struct nft_data_desc desc;
	struct nft_set_elem elem;
4757
	struct nft_set_ext *ext;
4758
	struct nft_trans *trans;
4759 4760
	u32 flags = 0;
	void *priv;
4761 4762
	int err;

4763 4764
	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
					  nft_set_elem_policy, NULL);
4765 4766 4767 4768 4769 4770 4771
	if (err < 0)
		goto err1;

	err = -EINVAL;
	if (nla[NFTA_SET_ELEM_KEY] == NULL)
		goto err1;

4772 4773 4774 4775 4776 4777 4778 4779
	nft_set_ext_prepare(&tmpl);

	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
	if (err < 0)
		return err;
	if (flags != 0)
		nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);

4780
	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
4781
			    nla[NFTA_SET_ELEM_KEY]);
4782 4783 4784 4785 4786 4787 4788
	if (err < 0)
		goto err1;

	err = -EINVAL;
	if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
		goto err2;

4789 4790 4791 4792
	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len);

	err = -ENOMEM;
	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0,
4793
				      0, GFP_KERNEL);
4794 4795 4796 4797 4798 4799 4800
	if (elem.priv == NULL)
		goto err2;

	ext = nft_set_elem_ext(set, elem.priv);
	if (flags)
		*nft_set_ext_flags(ext) = flags;

4801
	trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
4802 4803
	if (trans == NULL) {
		err = -ENOMEM;
4804
		goto err3;
4805
	}
4806

4807
	priv = set->ops->deactivate(ctx->net, set, &elem);
4808
	if (priv == NULL) {
4809
		err = -ENOENT;
4810
		goto err4;
4811
	}
4812 4813
	kfree(elem.priv);
	elem.priv = priv;
4814

4815 4816
	nft_set_elem_deactivate(ctx->net, set, &elem);

4817
	nft_trans_elem(trans) = elem;
4818
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
4819
	return 0;
4820

4821
err4:
4822
	kfree(trans);
4823 4824
err3:
	kfree(elem.priv);
4825
err2:
4826
	nft_data_release(&elem.key.val, desc.type);
4827 4828 4829 4830
err1:
	return err;
}

4831
static int nft_flush_set(const struct nft_ctx *ctx,
4832
			 struct nft_set *set,
4833
			 const struct nft_set_iter *iter,
4834
			 struct nft_set_elem *elem)
4835 4836 4837 4838 4839 4840 4841 4842 4843
{
	struct nft_trans *trans;
	int err;

	trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
				    sizeof(struct nft_trans_elem), GFP_ATOMIC);
	if (!trans)
		return -ENOMEM;

4844
	if (!set->ops->flush(ctx->net, set, elem->priv)) {
4845 4846 4847
		err = -ENOENT;
		goto err1;
	}
4848
	set->ndeact++;
4849

4850
	nft_set_elem_deactivate(ctx->net, set, elem);
4851 4852
	nft_trans_elem_set(trans) = set;
	nft_trans_elem(trans) = *elem;
4853 4854 4855 4856 4857 4858 4859 4860
	list_add_tail(&trans->list, &ctx->net->nft.commit_list);

	return 0;
err1:
	kfree(trans);
	return err;
}

4861 4862
static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
				struct sk_buff *skb, const struct nlmsghdr *nlh,
4863 4864
				const struct nlattr * const nla[],
				struct netlink_ext_ack *extack)
4865
{
4866
	u8 genmask = nft_genmask_next(net);
4867 4868 4869
	const struct nlattr *attr;
	struct nft_set *set;
	struct nft_ctx ctx;
4870
	int rem, err = 0;
4871

4872 4873
	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
					 genmask);
4874 4875 4876
	if (err < 0)
		return err;

4877
	set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
4878 4879 4880 4881 4882
	if (IS_ERR(set))
		return PTR_ERR(set);
	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
		return -EBUSY;

4883
	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
4884 4885 4886
		struct nft_set_iter iter = {
			.genmask	= genmask,
			.fn		= nft_flush_set,
4887
		};
4888
		set->ops->walk(&ctx, set, &iter);
4889

4890
		return iter.err;
4891 4892
	}

4893 4894 4895
	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
		err = nft_del_setelem(&ctx, set, attr);
		if (err < 0)
4896
			break;
4897

4898
		set->ndeact++;
4899
	}
4900
	return err;
4901 4902
}

4903 4904 4905 4906 4907 4908 4909
void nft_set_gc_batch_release(struct rcu_head *rcu)
{
	struct nft_set_gc_batch *gcb;
	unsigned int i;

	gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
	for (i = 0; i < gcb->head.cnt; i++)
4910
		nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927
	kfree(gcb);
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);

struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
						gfp_t gfp)
{
	struct nft_set_gc_batch *gcb;

	gcb = kzalloc(sizeof(*gcb), gfp);
	if (gcb == NULL)
		return gcb;
	gcb->head.set = set;
	return gcb;
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);

4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964
/*
 * Stateful objects
 */

/**
 *	nft_register_obj- register nf_tables stateful object type
 *	@obj: object type
 *
 *	Registers the object type for use with nf_tables. Returns zero on
 *	success or a negative errno code otherwise.
 */
int nft_register_obj(struct nft_object_type *obj_type)
{
	if (obj_type->type == NFT_OBJECT_UNSPEC)
		return -EINVAL;

	nfnl_lock(NFNL_SUBSYS_NFTABLES);
	list_add_rcu(&obj_type->list, &nf_tables_objects);
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
	return 0;
}
EXPORT_SYMBOL_GPL(nft_register_obj);

/**
 *	nft_unregister_obj - unregister nf_tables object type
 *	@obj: object type
 *
 * 	Unregisters the object type for use with nf_tables.
 */
void nft_unregister_obj(struct nft_object_type *obj_type)
{
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
	list_del_rcu(&obj_type->list);
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_obj);

4965 4966
struct nft_object *nft_obj_lookup(const struct net *net,
				  const struct nft_table *table,
4967 4968
				  const struct nlattr *nla, u32 objtype,
				  u8 genmask)
4969
{
4970 4971 4972
	struct nft_object_hash_key k = { .table = table };
	char search[NFT_OBJ_MAXNAMELEN];
	struct rhlist_head *tmp, *list;
4973 4974
	struct nft_object *obj;

4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989
	nla_strlcpy(search, nla, sizeof(search));
	k.name = search;

	WARN_ON_ONCE(!rcu_read_lock_held() &&
		     !lockdep_commit_lock_is_held(net));

	rcu_read_lock();
	list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params);
	if (!list)
		goto out;

	rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) {
		if (objtype == obj->ops->type->type &&
		    nft_active_genmask(obj, genmask)) {
			rcu_read_unlock();
4990
			return obj;
4991
		}
4992
	}
4993 4994
out:
	rcu_read_unlock();
4995 4996
	return ERR_PTR(-ENOENT);
}
4997
EXPORT_SYMBOL_GPL(nft_obj_lookup);
4998

4999 5000 5001
static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table,
						  const struct nlattr *nla,
						  u32 objtype, u8 genmask)
5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013
{
	struct nft_object *obj;

	list_for_each_entry(obj, &table->objects, list) {
		if (be64_to_cpu(nla_get_be64(nla)) == obj->handle &&
		    objtype == obj->ops->type->type &&
		    nft_active_genmask(obj, genmask))
			return obj;
	}
	return ERR_PTR(-ENOENT);
}

5014
static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = {
5015 5016 5017 5018
	[NFTA_OBJ_TABLE]	= { .type = NLA_STRING,
				    .len = NFT_TABLE_MAXNAMELEN - 1 },
	[NFTA_OBJ_NAME]		= { .type = NLA_STRING,
				    .len = NFT_OBJ_MAXNAMELEN - 1 },
5019 5020
	[NFTA_OBJ_TYPE]		= { .type = NLA_U32 },
	[NFTA_OBJ_DATA]		= { .type = NLA_NESTED },
5021
	[NFTA_OBJ_HANDLE]	= { .type = NLA_U64},
5022 5023
};

5024 5025
static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
				       const struct nft_object_type *type,
5026 5027
				       const struct nlattr *attr)
{
5028
	struct nlattr **tb;
5029
	const struct nft_object_ops *ops;
5030
	struct nft_object *obj;
5031 5032 5033 5034 5035
	int err = -ENOMEM;

	tb = kmalloc_array(type->maxattr + 1, sizeof(*tb), GFP_KERNEL);
	if (!tb)
		goto err1;
5036 5037

	if (attr) {
5038 5039
		err = nla_parse_nested_deprecated(tb, type->maxattr, attr,
						  type->policy, NULL);
5040
		if (err < 0)
5041
			goto err2;
5042 5043 5044 5045
	} else {
		memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
	}

5046 5047 5048 5049
	if (type->select_ops) {
		ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
		if (IS_ERR(ops)) {
			err = PTR_ERR(ops);
5050
			goto err2;
5051 5052 5053 5054 5055
		}
	} else {
		ops = type->ops;
	}

5056
	err = -ENOMEM;
5057
	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
5058 5059
	if (!obj)
		goto err2;
5060

5061
	err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
5062
	if (err < 0)
5063
		goto err3;
5064

5065 5066
	obj->ops = ops;

5067
	kfree(tb);
5068
	return obj;
5069
err3:
5070
	kfree(obj);
5071 5072
err2:
	kfree(tb);
5073 5074 5075 5076 5077
err1:
	return ERR_PTR(err);
}

static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
5078
			   struct nft_object *obj, bool reset)
5079 5080 5081
{
	struct nlattr *nest;

5082
	nest = nla_nest_start_noflag(skb, attr);
5083 5084
	if (!nest)
		goto nla_put_failure;
5085
	if (obj->ops->dump(skb, obj, reset) < 0)
5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104
		goto nla_put_failure;
	nla_nest_end(skb, nest);
	return 0;

nla_put_failure:
	return -1;
}

static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
{
	const struct nft_object_type *type;

	list_for_each_entry(type, &nf_tables_objects, list) {
		if (objtype == type->type)
			return type;
	}
	return NULL;
}

5105 5106
static const struct nft_object_type *
nft_obj_type_get(struct net *net, u32 objtype)
5107 5108 5109 5110 5111 5112 5113
{
	const struct nft_object_type *type;

	type = __nft_obj_type_get(objtype);
	if (type != NULL && try_module_get(type->owner))
		return type;

5114
	lockdep_nfnl_nft_mutex_not_held();
5115 5116
#ifdef CONFIG_MODULES
	if (type == NULL) {
5117
		nft_request_module(net, "nft-obj-%u", objtype);
5118 5119 5120 5121 5122 5123 5124 5125 5126
		if (__nft_obj_type_get(objtype))
			return ERR_PTR(-EAGAIN);
	}
#endif
	return ERR_PTR(-ENOENT);
}

static int nf_tables_newobj(struct net *net, struct sock *nlsk,
			    struct sk_buff *skb, const struct nlmsghdr *nlh,
5127 5128
			    const struct nlattr * const nla[],
			    struct netlink_ext_ack *extack)
5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
	const struct nft_object_type *type;
	u8 genmask = nft_genmask_next(net);
	int family = nfmsg->nfgen_family;
	struct nft_table *table;
	struct nft_object *obj;
	struct nft_ctx ctx;
	u32 objtype;
	int err;

	if (!nla[NFTA_OBJ_TYPE] ||
	    !nla[NFTA_OBJ_NAME] ||
	    !nla[NFTA_OBJ_DATA])
		return -EINVAL;

5145
	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
5146 5147
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
5148
		return PTR_ERR(table);
5149
	}
5150 5151

	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
5152
	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
5153 5154
	if (IS_ERR(obj)) {
		err = PTR_ERR(obj);
5155 5156
		if (err != -ENOENT) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
5157
			return err;
5158
		}
5159
	} else {
5160 5161
		if (nlh->nlmsg_flags & NLM_F_EXCL) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
5162
			return -EEXIST;
5163
		}
5164 5165 5166
		return 0;
	}

5167
	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
5168

5169
	type = nft_obj_type_get(net, objtype);
5170 5171 5172
	if (IS_ERR(type))
		return PTR_ERR(type);

5173
	obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
5174 5175 5176 5177
	if (IS_ERR(obj)) {
		err = PTR_ERR(obj);
		goto err1;
	}
5178
	obj->key.table = table;
5179 5180
	obj->handle = nf_tables_alloc_handle(table);

5181 5182
	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
	if (!obj->key.name) {
5183 5184 5185
		err = -ENOMEM;
		goto err2;
	}
5186 5187 5188

	err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
	if (err < 0)
5189
		goto err3;
5190

5191 5192 5193 5194 5195
	err = rhltable_insert(&nft_objname_ht, &obj->rhlhead,
			      nft_objname_ht_params);
	if (err < 0)
		goto err4;

5196 5197 5198
	list_add_tail_rcu(&obj->list, &table->objects);
	table->use++;
	return 0;
5199 5200 5201 5202
err4:
	/* queued in transaction log */
	INIT_LIST_HEAD(&obj->list);
	return err;
5203
err3:
5204
	kfree(obj->key.name);
5205
err2:
5206
	if (obj->ops->destroy)
5207
		obj->ops->destroy(&ctx, obj);
5208 5209 5210 5211 5212 5213 5214 5215 5216
	kfree(obj);
err1:
	module_put(type->owner);
	return err;
}

static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
				   u32 portid, u32 seq, int event, u32 flags,
				   int family, const struct nft_table *table,
5217
				   struct nft_object *obj, bool reset)
5218 5219 5220 5221
{
	struct nfgenmsg *nfmsg;
	struct nlmsghdr *nlh;

5222
	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
5223 5224 5225 5226 5227 5228 5229 5230 5231 5232
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
	nfmsg->nfgen_family	= family;
	nfmsg->version		= NFNETLINK_V0;
	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);

	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
5233
	    nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
5234
	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
5235
	    nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
5236 5237 5238
	    nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
	    nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
			 NFTA_OBJ_PAD))
5239 5240 5241 5242 5243 5244 5245 5246 5247 5248
		goto nla_put_failure;

	nlmsg_end(skb, nlh);
	return 0;

nla_put_failure:
	nlmsg_trim(skb, nlh);
	return -1;
}

5249
struct nft_obj_filter {
5250
	char		*table;
5251 5252 5253
	u32		type;
};

5254 5255 5256 5257 5258
static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
{
	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
	const struct nft_table *table;
	unsigned int idx = 0, s_idx = cb->args[0];
5259
	struct nft_obj_filter *filter = cb->data;
5260 5261
	struct net *net = sock_net(skb->sk);
	int family = nfmsg->nfgen_family;
5262 5263 5264 5265 5266
	struct nft_object *obj;
	bool reset = false;

	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
		reset = true;
5267 5268 5269 5270

	rcu_read_lock();
	cb->seq = net->nft.base_seq;

5271
	list_for_each_entry_rcu(table, &net->nft.tables, list) {
5272
		if (family != NFPROTO_UNSPEC && family != table->family)
5273 5274
			continue;

5275 5276 5277 5278 5279 5280 5281 5282
		list_for_each_entry_rcu(obj, &table->objects, list) {
			if (!nft_is_active(net, obj))
				goto cont;
			if (idx < s_idx)
				goto cont;
			if (idx > s_idx)
				memset(&cb->args[1], 0,
				       sizeof(cb->args) - sizeof(cb->args[0]));
5283
			if (filter && filter->table &&
5284 5285 5286 5287 5288 5289
			    strcmp(filter->table, table->name))
				goto cont;
			if (filter &&
			    filter->type != NFT_OBJECT_UNSPEC &&
			    obj->ops->type->type != filter->type)
				goto cont;
5290

5291 5292 5293 5294
			if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
						    cb->nlh->nlmsg_seq,
						    NFT_MSG_NEWOBJ,
						    NLM_F_MULTI | NLM_F_APPEND,
5295
						    table->family, table,
5296 5297
						    obj, reset) < 0)
				goto done;
5298

5299
			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
5300
cont:
5301
			idx++;
5302 5303 5304 5305 5306 5307 5308 5309 5310
		}
	}
done:
	rcu_read_unlock();

	cb->args[0] = idx;
	return skb->len;
}

5311
static int nf_tables_dump_obj_start(struct netlink_callback *cb)
5312
{
5313 5314
	const struct nlattr * const *nla = cb->data;
	struct nft_obj_filter *filter = NULL;
5315

5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330
	if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) {
		filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
		if (!filter)
			return -ENOMEM;

		if (nla[NFTA_OBJ_TABLE]) {
			filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
			if (!filter->table) {
				kfree(filter);
				return -ENOMEM;
			}
		}

		if (nla[NFTA_OBJ_TYPE])
			filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
5331
	}
5332

5333
	cb->data = filter;
5334 5335 5336
	return 0;
}

5337
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
5338
{
5339
	struct nft_obj_filter *filter = cb->data;
5340

5341 5342 5343
	if (filter) {
		kfree(filter->table);
		kfree(filter);
5344
	}
5345

5346
	return 0;
5347 5348
}

5349
/* called with rcu_read_lock held */
5350 5351
static int nf_tables_getobj(struct net *net, struct sock *nlsk,
			    struct sk_buff *skb, const struct nlmsghdr *nlh,
5352 5353
			    const struct nlattr * const nla[],
			    struct netlink_ext_ack *extack)
5354 5355 5356 5357 5358 5359 5360
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
	u8 genmask = nft_genmask_cur(net);
	int family = nfmsg->nfgen_family;
	const struct nft_table *table;
	struct nft_object *obj;
	struct sk_buff *skb2;
5361
	bool reset = false;
5362 5363 5364 5365 5366
	u32 objtype;
	int err;

	if (nlh->nlmsg_flags & NLM_F_DUMP) {
		struct netlink_dump_control c = {
5367
			.start = nf_tables_dump_obj_start,
5368
			.dump = nf_tables_dump_obj,
5369
			.done = nf_tables_dump_obj_done,
5370
			.module = THIS_MODULE,
5371
			.data = (void *)nla,
5372
		};
5373

5374
		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
5375 5376 5377 5378 5379 5380
	}

	if (!nla[NFTA_OBJ_NAME] ||
	    !nla[NFTA_OBJ_TYPE])
		return -EINVAL;

5381
	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
5382 5383
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
5384
		return PTR_ERR(table);
5385
	}
5386 5387

	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
5388
	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
5389 5390
	if (IS_ERR(obj)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
5391
		return PTR_ERR(obj);
5392
	}
5393

5394
	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
5395 5396 5397
	if (!skb2)
		return -ENOMEM;

5398 5399 5400
	if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
		reset = true;

5401 5402
	err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
				      nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
5403
				      family, table, obj, reset);
5404 5405 5406 5407 5408 5409 5410 5411 5412
	if (err < 0)
		goto err;

	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
err:
	kfree_skb(skb2);
	return err;
}

5413
static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
5414
{
5415
	if (obj->ops->destroy)
5416
		obj->ops->destroy(ctx, obj);
5417

5418
	module_put(obj->ops->type->owner);
5419
	kfree(obj->key.name);
5420 5421 5422 5423
	kfree(obj);
}

static int nf_tables_delobj(struct net *net, struct sock *nlsk,
5424 5425 5426
			    struct sk_buff *skb, const struct nlmsghdr *nlh,
			    const struct nlattr * const nla[],
			    struct netlink_ext_ack *extack)
5427 5428 5429 5430
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
	u8 genmask = nft_genmask_next(net);
	int family = nfmsg->nfgen_family;
5431
	const struct nlattr *attr;
5432 5433 5434 5435 5436 5437
	struct nft_table *table;
	struct nft_object *obj;
	struct nft_ctx ctx;
	u32 objtype;

	if (!nla[NFTA_OBJ_TYPE] ||
5438
	    (!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
5439 5440
		return -EINVAL;

5441
	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
5442 5443
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
5444
		return PTR_ERR(table);
5445
	}
5446 5447

	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
5448 5449 5450 5451 5452
	if (nla[NFTA_OBJ_HANDLE]) {
		attr = nla[NFTA_OBJ_HANDLE];
		obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask);
	} else {
		attr = nla[NFTA_OBJ_NAME];
5453
		obj = nft_obj_lookup(net, table, attr, objtype, genmask);
5454 5455 5456 5457
	}

	if (IS_ERR(obj)) {
		NL_SET_BAD_ATTR(extack, attr);
5458
		return PTR_ERR(obj);
5459 5460 5461
	}
	if (obj->use > 0) {
		NL_SET_BAD_ATTR(extack, attr);
5462
		return -EBUSY;
5463
	}
5464

5465
	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
5466 5467 5468 5469

	return nft_delobj(&ctx, obj);
}

5470
void nft_obj_notify(struct net *net, const struct nft_table *table,
5471 5472
		    struct nft_object *obj, u32 portid, u32 seq, int event,
		    int family, int report, gfp_t gfp)
5473 5474 5475 5476
{
	struct sk_buff *skb;
	int err;

5477 5478
	if (!report &&
	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
5479
		return;
5480

5481
	skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
5482 5483 5484
	if (skb == NULL)
		goto err;

5485 5486
	err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family,
				      table, obj, false);
5487 5488 5489 5490 5491
	if (err < 0) {
		kfree_skb(skb);
		goto err;
	}

5492 5493
	nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
	return;
5494
err:
5495
	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
5496
}
5497 5498
EXPORT_SYMBOL_GPL(nft_obj_notify);

5499 5500
static void nf_tables_obj_notify(const struct nft_ctx *ctx,
				 struct nft_object *obj, int event)
5501
{
5502
	nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
5503
		       ctx->family, ctx->report, GFP_KERNEL);
5504
}
5505

5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530
/*
 * Flow tables
 */
void nft_register_flowtable_type(struct nf_flowtable_type *type)
{
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
	list_add_tail_rcu(&type->list, &nf_tables_flowtables);
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_register_flowtable_type);

void nft_unregister_flowtable_type(struct nf_flowtable_type *type)
{
	nfnl_lock(NFNL_SUBSYS_NFTABLES);
	list_del_rcu(&type->list);
	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
EXPORT_SYMBOL_GPL(nft_unregister_flowtable_type);

static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
	[NFTA_FLOWTABLE_TABLE]		= { .type = NLA_STRING,
					    .len = NFT_NAME_MAXLEN - 1 },
	[NFTA_FLOWTABLE_NAME]		= { .type = NLA_STRING,
					    .len = NFT_NAME_MAXLEN - 1 },
	[NFTA_FLOWTABLE_HOOK]		= { .type = NLA_NESTED },
5531
	[NFTA_FLOWTABLE_HANDLE]		= { .type = NLA_U64 },
5532 5533
};

5534 5535
struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
					   const struct nlattr *nla, u8 genmask)
5536 5537 5538
{
	struct nft_flowtable *flowtable;

5539
	list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
5540 5541 5542 5543 5544 5545
		if (!nla_strcmp(nla, flowtable->name) &&
		    nft_active_genmask(flowtable, genmask))
			return flowtable;
	}
	return ERR_PTR(-ENOENT);
}
5546
EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
5547

5548
static struct nft_flowtable *
5549 5550
nft_flowtable_lookup_byhandle(const struct nft_table *table,
			      const struct nlattr *nla, u8 genmask)
5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561
{
       struct nft_flowtable *flowtable;

       list_for_each_entry(flowtable, &table->flowtables, list) {
               if (be64_to_cpu(nla_get_be64(nla)) == flowtable->handle &&
                   nft_active_genmask(flowtable, genmask))
                       return flowtable;
       }
       return ERR_PTR(-ENOENT);
}

5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577
static int nf_tables_parse_devices(const struct nft_ctx *ctx,
				   const struct nlattr *attr,
				   struct net_device *dev_array[], int *len)
{
	const struct nlattr *tmp;
	struct net_device *dev;
	char ifname[IFNAMSIZ];
	int rem, n = 0, err;

	nla_for_each_nested(tmp, attr, rem) {
		if (nla_type(tmp) != NFTA_DEVICE_NAME) {
			err = -EINVAL;
			goto err1;
		}

		nla_strlcpy(ifname, tmp, IFNAMSIZ);
5578
		dev = __dev_get_by_name(ctx->net, ifname);
5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614
		if (!dev) {
			err = -ENOENT;
			goto err1;
		}

		dev_array[n++] = dev;
		if (n == NFT_FLOWTABLE_DEVICE_MAX) {
			err = -EFBIG;
			goto err1;
		}
	}
	if (!len)
		return -EINVAL;

	err = 0;
err1:
	*len = n;
	return err;
}

static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
	[NFTA_FLOWTABLE_HOOK_NUM]	= { .type = NLA_U32 },
	[NFTA_FLOWTABLE_HOOK_PRIORITY]	= { .type = NLA_U32 },
	[NFTA_FLOWTABLE_HOOK_DEVS]	= { .type = NLA_NESTED },
};

static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
					  const struct nlattr *attr,
					  struct nft_flowtable *flowtable)
{
	struct net_device *dev_array[NFT_FLOWTABLE_DEVICE_MAX];
	struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
	struct nf_hook_ops *ops;
	int hooknum, priority;
	int err, n = 0, i;

5615 5616
	err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
					  nft_flowtable_hook_policy, NULL);
5617 5618 5619 5620 5621 5622 5623 5624 5625
	if (err < 0)
		return err;

	if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
	    !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] ||
	    !tb[NFTA_FLOWTABLE_HOOK_DEVS])
		return -EINVAL;

	hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
5626
	if (hooknum != NF_NETDEV_INGRESS)
5627 5628 5629 5630 5631 5632 5633
		return -EINVAL;

	priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));

	err = nf_tables_parse_devices(ctx, tb[NFTA_FLOWTABLE_HOOK_DEVS],
				      dev_array, &n);
	if (err < 0)
5634
		return err;
5635

K
Kees Cook 已提交
5636
	ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
5637 5638
	if (!ops)
		return -ENOMEM;
5639

5640 5641
	flowtable->hooknum	= hooknum;
	flowtable->priority	= priority;
5642 5643 5644 5645 5646 5647 5648
	flowtable->ops		= ops;
	flowtable->ops_len	= n;

	for (i = 0; i < n; i++) {
		flowtable->ops[i].pf		= NFPROTO_NETDEV;
		flowtable->ops[i].hooknum	= hooknum;
		flowtable->ops[i].priority	= priority;
5649
		flowtable->ops[i].priv		= &flowtable->data;
5650 5651 5652 5653 5654 5655 5656
		flowtable->ops[i].hook		= flowtable->data.type->hook;
		flowtable->ops[i].dev		= dev_array[i];
	}

	return err;
}

5657
static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
5658 5659 5660 5661
{
	const struct nf_flowtable_type *type;

	list_for_each_entry(type, &nf_tables_flowtables, list) {
5662
		if (family == type->family)
5663 5664 5665 5666 5667
			return type;
	}
	return NULL;
}

5668 5669
static const struct nf_flowtable_type *
nft_flowtable_type_get(struct net *net, u8 family)
5670 5671 5672
{
	const struct nf_flowtable_type *type;

5673
	type = __nft_flowtable_type_get(family);
5674 5675 5676
	if (type != NULL && try_module_get(type->owner))
		return type;

5677
	lockdep_nfnl_nft_mutex_not_held();
5678 5679
#ifdef CONFIG_MODULES
	if (type == NULL) {
5680
		nft_request_module(net, "nf-flowtable-%u", family);
5681
		if (__nft_flowtable_type_get(family))
5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708
			return ERR_PTR(-EAGAIN);
	}
#endif
	return ERR_PTR(-ENOENT);
}

static void nft_unregister_flowtable_net_hooks(struct net *net,
					       struct nft_flowtable *flowtable)
{
	int i;

	for (i = 0; i < flowtable->ops_len; i++) {
		if (!flowtable->ops[i].dev)
			continue;

		nf_unregister_net_hook(net, &flowtable->ops[i]);
	}
}

static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
				  struct sk_buff *skb,
				  const struct nlmsghdr *nlh,
				  const struct nlattr * const nla[],
				  struct netlink_ext_ack *extack)
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
	const struct nf_flowtable_type *type;
5709
	struct nft_flowtable *flowtable, *ft;
5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720
	u8 genmask = nft_genmask_next(net);
	int family = nfmsg->nfgen_family;
	struct nft_table *table;
	struct nft_ctx ctx;
	int err, i, k;

	if (!nla[NFTA_FLOWTABLE_TABLE] ||
	    !nla[NFTA_FLOWTABLE_NAME] ||
	    !nla[NFTA_FLOWTABLE_HOOK])
		return -EINVAL;

5721 5722
	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
				 genmask);
5723 5724
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
5725
		return PTR_ERR(table);
5726
	}
5727

5728 5729
	flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
					 genmask);
5730 5731
	if (IS_ERR(flowtable)) {
		err = PTR_ERR(flowtable);
5732 5733
		if (err != -ENOENT) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
5734
			return err;
5735
		}
5736
	} else {
5737 5738
		if (nlh->nlmsg_flags & NLM_F_EXCL) {
			NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
5739
			return -EEXIST;
5740
		}
5741 5742 5743 5744

		return 0;
	}

5745
	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
5746 5747 5748 5749 5750 5751

	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
	if (!flowtable)
		return -ENOMEM;

	flowtable->table = table;
5752 5753
	flowtable->handle = nf_tables_alloc_handle(table);

5754 5755 5756 5757 5758 5759
	flowtable->name = nla_strdup(nla[NFTA_FLOWTABLE_NAME], GFP_KERNEL);
	if (!flowtable->name) {
		err = -ENOMEM;
		goto err1;
	}

5760
	type = nft_flowtable_type_get(net, family);
5761 5762 5763 5764 5765 5766
	if (IS_ERR(type)) {
		err = PTR_ERR(type);
		goto err2;
	}

	flowtable->data.type = type;
5767
	err = type->init(&flowtable->data);
5768 5769 5770 5771 5772 5773
	if (err < 0)
		goto err3;

	err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
					     flowtable);
	if (err < 0)
5774
		goto err4;
5775 5776

	for (i = 0; i < flowtable->ops_len; i++) {
5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787
		if (!flowtable->ops[i].dev)
			continue;

		list_for_each_entry(ft, &table->flowtables, list) {
			for (k = 0; k < ft->ops_len; k++) {
				if (!ft->ops[k].dev)
					continue;

				if (flowtable->ops[i].dev == ft->ops[k].dev &&
				    flowtable->ops[i].pf == ft->ops[k].pf) {
					err = -EBUSY;
5788
					goto err5;
5789 5790 5791 5792
				}
			}
		}

5793 5794
		err = nf_register_net_hook(net, &flowtable->ops[i]);
		if (err < 0)
5795
			goto err5;
5796 5797 5798 5799
	}

	err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
	if (err < 0)
5800
		goto err6;
5801 5802 5803 5804 5805

	list_add_tail_rcu(&flowtable->list, &table->flowtables);
	table->use++;

	return 0;
5806
err6:
5807
	i = flowtable->ops_len;
5808
err5:
5809
	for (k = i - 1; k >= 0; k--)
5810
		nf_unregister_net_hook(net, &flowtable->ops[k]);
5811 5812

	kfree(flowtable->ops);
5813 5814
err4:
	flowtable->data.type->free(&flowtable->data);
5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833
err3:
	module_put(type->owner);
err2:
	kfree(flowtable->name);
err1:
	kfree(flowtable);
	return err;
}

static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
				  struct sk_buff *skb,
				  const struct nlmsghdr *nlh,
				  const struct nlattr * const nla[],
				  struct netlink_ext_ack *extack)
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
	u8 genmask = nft_genmask_next(net);
	int family = nfmsg->nfgen_family;
	struct nft_flowtable *flowtable;
5834
	const struct nlattr *attr;
5835 5836 5837
	struct nft_table *table;
	struct nft_ctx ctx;

5838 5839 5840 5841 5842
	if (!nla[NFTA_FLOWTABLE_TABLE] ||
	    (!nla[NFTA_FLOWTABLE_NAME] &&
	     !nla[NFTA_FLOWTABLE_HANDLE]))
		return -EINVAL;

5843 5844
	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
				 genmask);
5845 5846
	if (IS_ERR(table)) {
		NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
5847
		return PTR_ERR(table);
5848
	}
5849

5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863
	if (nla[NFTA_FLOWTABLE_HANDLE]) {
		attr = nla[NFTA_FLOWTABLE_HANDLE];
		flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask);
	} else {
		attr = nla[NFTA_FLOWTABLE_NAME];
		flowtable = nft_flowtable_lookup(table, attr, genmask);
	}

	if (IS_ERR(flowtable)) {
		NL_SET_BAD_ATTR(extack, attr);
		return PTR_ERR(flowtable);
	}
	if (flowtable->use > 0) {
		NL_SET_BAD_ATTR(extack, attr);
5864
		return -EBUSY;
5865
	}
5866

5867
	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893

	return nft_delflowtable(&ctx, flowtable);
}

static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
					 u32 portid, u32 seq, int event,
					 u32 flags, int family,
					 struct nft_flowtable *flowtable)
{
	struct nlattr *nest, *nest_devs;
	struct nfgenmsg *nfmsg;
	struct nlmsghdr *nlh;
	int i;

	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
	nfmsg->nfgen_family	= family;
	nfmsg->version		= NFNETLINK_V0;
	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);

	if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
	    nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
5894 5895 5896
	    nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
	    nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
			 NFTA_FLOWTABLE_PAD))
5897 5898
		goto nla_put_failure;

5899
	nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
5900 5901
	if (!nest)
		goto nla_put_failure;
5902 5903 5904 5905
	if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
	    nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
		goto nla_put_failure;

5906
	nest_devs = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK_DEVS);
5907 5908 5909 5910
	if (!nest_devs)
		goto nla_put_failure;

	for (i = 0; i < flowtable->ops_len; i++) {
5911 5912 5913 5914
		const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);

		if (dev &&
		    nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945
			goto nla_put_failure;
	}
	nla_nest_end(skb, nest_devs);
	nla_nest_end(skb, nest);

	nlmsg_end(skb, nlh);
	return 0;

nla_put_failure:
	nlmsg_trim(skb, nlh);
	return -1;
}

struct nft_flowtable_filter {
	char		*table;
};

static int nf_tables_dump_flowtable(struct sk_buff *skb,
				    struct netlink_callback *cb)
{
	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
	struct nft_flowtable_filter *filter = cb->data;
	unsigned int idx = 0, s_idx = cb->args[0];
	struct net *net = sock_net(skb->sk);
	int family = nfmsg->nfgen_family;
	struct nft_flowtable *flowtable;
	const struct nft_table *table;

	rcu_read_lock();
	cb->seq = net->nft.base_seq;

5946
	list_for_each_entry_rcu(table, &net->nft.tables, list) {
5947
		if (family != NFPROTO_UNSPEC && family != table->family)
5948 5949
			continue;

5950 5951 5952 5953 5954 5955 5956 5957
		list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
			if (!nft_is_active(net, flowtable))
				goto cont;
			if (idx < s_idx)
				goto cont;
			if (idx > s_idx)
				memset(&cb->args[1], 0,
				       sizeof(cb->args) - sizeof(cb->args[0]));
5958
			if (filter && filter->table &&
5959 5960
			    strcmp(filter->table, table->name))
				goto cont;
5961

5962 5963 5964 5965
			if (nf_tables_fill_flowtable_info(skb, net, NETLINK_CB(cb->skb).portid,
							  cb->nlh->nlmsg_seq,
							  NFT_MSG_NEWFLOWTABLE,
							  NLM_F_MULTI | NLM_F_APPEND,
5966
							  table->family, flowtable) < 0)
5967
				goto done;
5968

5969
			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
5970
cont:
5971
			idx++;
5972 5973 5974 5975 5976 5977 5978 5979 5980
		}
	}
done:
	rcu_read_unlock();

	cb->args[0] = idx;
	return skb->len;
}

5981
static int nf_tables_dump_flowtable_start(struct netlink_callback *cb)
5982
{
5983 5984
	const struct nlattr * const *nla = cb->data;
	struct nft_flowtable_filter *filter = NULL;
5985

5986 5987 5988 5989
	if (nla[NFTA_FLOWTABLE_TABLE]) {
		filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
		if (!filter)
			return -ENOMEM;
5990

5991 5992 5993 5994 5995 5996 5997
		filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
					   GFP_ATOMIC);
		if (!filter->table) {
			kfree(filter);
			return -ENOMEM;
		}
	}
5998

5999
	cb->data = filter;
6000 6001 6002
	return 0;
}

6003
static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
6004
{
6005
	struct nft_flowtable_filter *filter = cb->data;
6006 6007

	if (!filter)
6008
		return 0;
6009

6010 6011 6012 6013
	kfree(filter->table);
	kfree(filter);

	return 0;
6014 6015
}

6016
/* called with rcu_read_lock held */
6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032
static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
				  struct sk_buff *skb,
				  const struct nlmsghdr *nlh,
				  const struct nlattr * const nla[],
				  struct netlink_ext_ack *extack)
{
	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
	u8 genmask = nft_genmask_cur(net);
	int family = nfmsg->nfgen_family;
	struct nft_flowtable *flowtable;
	const struct nft_table *table;
	struct sk_buff *skb2;
	int err;

	if (nlh->nlmsg_flags & NLM_F_DUMP) {
		struct netlink_dump_control c = {
6033
			.start = nf_tables_dump_flowtable_start,
6034 6035
			.dump = nf_tables_dump_flowtable,
			.done = nf_tables_dump_flowtable_done,
6036
			.module = THIS_MODULE,
6037
			.data = (void *)nla,
6038 6039
		};

6040
		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
6041 6042 6043 6044 6045
	}

	if (!nla[NFTA_FLOWTABLE_NAME])
		return -EINVAL;

6046 6047
	table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
				 genmask);
6048 6049 6050
	if (IS_ERR(table))
		return PTR_ERR(table);

6051 6052
	flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
					 genmask);
6053
	if (IS_ERR(flowtable))
6054 6055
		return PTR_ERR(flowtable);

6056
	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089
	if (!skb2)
		return -ENOMEM;

	err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
					    nlh->nlmsg_seq,
					    NFT_MSG_NEWFLOWTABLE, 0, family,
					    flowtable);
	if (err < 0)
		goto err;

	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
err:
	kfree_skb(skb2);
	return err;
}

static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
				       struct nft_flowtable *flowtable,
				       int event)
{
	struct sk_buff *skb;
	int err;

	if (ctx->report &&
	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
		return;

	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
	if (skb == NULL)
		goto err;

	err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
					    ctx->seq, event, 0,
6090
					    ctx->family, flowtable);
6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104
	if (err < 0) {
		kfree_skb(skb);
		goto err;
	}

	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
		       ctx->report, GFP_KERNEL);
	return;
err:
	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}

static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
6105
	kfree(flowtable->ops);
6106
	kfree(flowtable->name);
6107
	flowtable->data.type->free(&flowtable->data);
6108
	module_put(flowtable->data.type->owner);
6109
	kfree(flowtable);
6110 6111
}

6112 6113 6114 6115 6116
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
				   u32 portid, u32 seq)
{
	struct nlmsghdr *nlh;
	struct nfgenmsg *nfmsg;
6117
	char buf[TASK_COMM_LEN];
6118
	int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
6119 6120 6121 6122 6123 6124 6125 6126 6127 6128

	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
	if (nlh == NULL)
		goto nla_put_failure;

	nfmsg = nlmsg_data(nlh);
	nfmsg->nfgen_family	= AF_UNSPEC;
	nfmsg->version		= NFNETLINK_V0;
	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);

6129 6130 6131
	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
	    nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
	    nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
6132 6133
		goto nla_put_failure;

6134 6135
	nlmsg_end(skb, nlh);
	return 0;
6136 6137 6138 6139 6140 6141

nla_put_failure:
	nlmsg_trim(skb, nlh);
	return -EMSGSIZE;
}

6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162
static void nft_flowtable_event(unsigned long event, struct net_device *dev,
				struct nft_flowtable *flowtable)
{
	int i;

	for (i = 0; i < flowtable->ops_len; i++) {
		if (flowtable->ops[i].dev != dev)
			continue;

		nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
		flowtable->ops[i].dev = NULL;
		break;
	}
}

static int nf_tables_flowtable_event(struct notifier_block *this,
				     unsigned long event, void *ptr)
{
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
	struct nft_flowtable *flowtable;
	struct nft_table *table;
6163
	struct net *net;
6164 6165 6166 6167

	if (event != NETDEV_UNREGISTER)
		return 0;

6168
	net = dev_net(dev);
6169
	mutex_lock(&net->nft.commit_mutex);
6170
	list_for_each_entry(table, &net->nft.tables, list) {
6171 6172
		list_for_each_entry(flowtable, &table->flowtables, list) {
			nft_flowtable_event(event, dev, flowtable);
6173 6174
		}
	}
6175
	mutex_unlock(&net->nft.commit_mutex);
6176

6177 6178 6179 6180 6181 6182 6183
	return NOTIFY_DONE;
}

static struct notifier_block nf_tables_flowtable_notifier = {
	.notifier_call	= nf_tables_flowtable_event,
};

6184 6185
static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
				 int event)
6186 6187 6188 6189 6190 6191 6192
{
	struct nlmsghdr *nlh = nlmsg_hdr(skb);
	struct sk_buff *skb2;
	int err;

	if (nlmsg_report(nlh) &&
	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
6193
		return;
6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205

	skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
	if (skb2 == NULL)
		goto err;

	err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
				      nlh->nlmsg_seq);
	if (err < 0) {
		kfree_skb(skb2);
		goto err;
	}

6206 6207 6208
	nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
		       nlmsg_report(nlh), GFP_KERNEL);
	return;
6209
err:
6210 6211
	nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
			  -ENOBUFS);
6212 6213
}

6214 6215
static int nf_tables_getgen(struct net *net, struct sock *nlsk,
			    struct sk_buff *skb, const struct nlmsghdr *nlh,
6216 6217
			    const struct nlattr * const nla[],
			    struct netlink_ext_ack *extack)
6218 6219 6220 6221
{
	struct sk_buff *skb2;
	int err;

6222
	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236
	if (skb2 == NULL)
		return -ENOMEM;

	err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
				      nlh->nlmsg_seq);
	if (err < 0)
		goto err;

	return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
err:
	kfree_skb(skb2);
	return err;
}

P
Patrick McHardy 已提交
6237 6238
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
	[NFT_MSG_NEWTABLE] = {
6239
		.call_batch	= nf_tables_newtable,
P
Patrick McHardy 已提交
6240 6241 6242 6243
		.attr_count	= NFTA_TABLE_MAX,
		.policy		= nft_table_policy,
	},
	[NFT_MSG_GETTABLE] = {
6244
		.call_rcu	= nf_tables_gettable,
P
Patrick McHardy 已提交
6245 6246 6247 6248
		.attr_count	= NFTA_TABLE_MAX,
		.policy		= nft_table_policy,
	},
	[NFT_MSG_DELTABLE] = {
6249
		.call_batch	= nf_tables_deltable,
P
Patrick McHardy 已提交
6250 6251 6252 6253
		.attr_count	= NFTA_TABLE_MAX,
		.policy		= nft_table_policy,
	},
	[NFT_MSG_NEWCHAIN] = {
6254
		.call_batch	= nf_tables_newchain,
P
Patrick McHardy 已提交
6255 6256 6257 6258
		.attr_count	= NFTA_CHAIN_MAX,
		.policy		= nft_chain_policy,
	},
	[NFT_MSG_GETCHAIN] = {
6259
		.call_rcu	= nf_tables_getchain,
P
Patrick McHardy 已提交
6260 6261 6262 6263
		.attr_count	= NFTA_CHAIN_MAX,
		.policy		= nft_chain_policy,
	},
	[NFT_MSG_DELCHAIN] = {
6264
		.call_batch	= nf_tables_delchain,
P
Patrick McHardy 已提交
6265 6266 6267 6268
		.attr_count	= NFTA_CHAIN_MAX,
		.policy		= nft_chain_policy,
	},
	[NFT_MSG_NEWRULE] = {
6269
		.call_batch	= nf_tables_newrule,
P
Patrick McHardy 已提交
6270 6271 6272 6273
		.attr_count	= NFTA_RULE_MAX,
		.policy		= nft_rule_policy,
	},
	[NFT_MSG_GETRULE] = {
6274
		.call_rcu	= nf_tables_getrule,
P
Patrick McHardy 已提交
6275 6276 6277 6278
		.attr_count	= NFTA_RULE_MAX,
		.policy		= nft_rule_policy,
	},
	[NFT_MSG_DELRULE] = {
6279
		.call_batch	= nf_tables_delrule,
P
Patrick McHardy 已提交
6280 6281 6282
		.attr_count	= NFTA_RULE_MAX,
		.policy		= nft_rule_policy,
	},
6283
	[NFT_MSG_NEWSET] = {
6284
		.call_batch	= nf_tables_newset,
6285 6286 6287 6288
		.attr_count	= NFTA_SET_MAX,
		.policy		= nft_set_policy,
	},
	[NFT_MSG_GETSET] = {
6289
		.call_rcu	= nf_tables_getset,
6290 6291 6292 6293
		.attr_count	= NFTA_SET_MAX,
		.policy		= nft_set_policy,
	},
	[NFT_MSG_DELSET] = {
6294
		.call_batch	= nf_tables_delset,
6295 6296 6297 6298
		.attr_count	= NFTA_SET_MAX,
		.policy		= nft_set_policy,
	},
	[NFT_MSG_NEWSETELEM] = {
6299
		.call_batch	= nf_tables_newsetelem,
6300 6301 6302 6303
		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
		.policy		= nft_set_elem_list_policy,
	},
	[NFT_MSG_GETSETELEM] = {
6304
		.call_rcu	= nf_tables_getsetelem,
6305 6306 6307 6308
		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
		.policy		= nft_set_elem_list_policy,
	},
	[NFT_MSG_DELSETELEM] = {
6309
		.call_batch	= nf_tables_delsetelem,
6310 6311 6312
		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
		.policy		= nft_set_elem_list_policy,
	},
6313
	[NFT_MSG_GETGEN] = {
6314
		.call_rcu	= nf_tables_getgen,
6315
	},
6316 6317 6318 6319 6320 6321
	[NFT_MSG_NEWOBJ] = {
		.call_batch	= nf_tables_newobj,
		.attr_count	= NFTA_OBJ_MAX,
		.policy		= nft_obj_policy,
	},
	[NFT_MSG_GETOBJ] = {
6322
		.call_rcu	= nf_tables_getobj,
6323 6324 6325 6326 6327 6328 6329 6330
		.attr_count	= NFTA_OBJ_MAX,
		.policy		= nft_obj_policy,
	},
	[NFT_MSG_DELOBJ] = {
		.call_batch	= nf_tables_delobj,
		.attr_count	= NFTA_OBJ_MAX,
		.policy		= nft_obj_policy,
	},
6331
	[NFT_MSG_GETOBJ_RESET] = {
6332
		.call_rcu	= nf_tables_getobj,
6333 6334 6335
		.attr_count	= NFTA_OBJ_MAX,
		.policy		= nft_obj_policy,
	},
6336 6337 6338 6339 6340 6341
	[NFT_MSG_NEWFLOWTABLE] = {
		.call_batch	= nf_tables_newflowtable,
		.attr_count	= NFTA_FLOWTABLE_MAX,
		.policy		= nft_flowtable_policy,
	},
	[NFT_MSG_GETFLOWTABLE] = {
6342
		.call_rcu	= nf_tables_getflowtable,
6343 6344 6345 6346 6347 6348 6349 6350
		.attr_count	= NFTA_FLOWTABLE_MAX,
		.policy		= nft_flowtable_policy,
	},
	[NFT_MSG_DELFLOWTABLE] = {
		.call_batch	= nf_tables_delflowtable,
		.attr_count	= NFTA_FLOWTABLE_MAX,
		.policy		= nft_flowtable_policy,
	},
P
Patrick McHardy 已提交
6351 6352
};

6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373
static int nf_tables_validate(struct net *net)
{
	struct nft_table *table;

	switch (net->nft.validate_state) {
	case NFT_VALIDATE_SKIP:
		break;
	case NFT_VALIDATE_NEED:
		nft_validate_state_update(net, NFT_VALIDATE_DO);
		/* fall through */
	case NFT_VALIDATE_DO:
		list_for_each_entry(table, &net->nft.tables, list) {
			if (nft_table_validate(net, table) < 0)
				return -EAGAIN;
		}
		break;
	}

	return 0;
}

6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394
/* a drop policy has to be deferred until all rules have been activated,
 * otherwise a large ruleset that contains a drop-policy base chain will
 * cause all packets to get dropped until the full transaction has been
 * processed.
 *
 * We defer the drop policy until the transaction has been finalized.
 */
static void nft_chain_commit_drop_policy(struct nft_trans *trans)
{
	struct nft_base_chain *basechain;

	if (nft_trans_chain_policy(trans) != NF_DROP)
		return;

	if (!nft_is_base_chain(trans->ctx.chain))
		return;

	basechain = nft_base_chain(trans->ctx.chain);
	basechain->policy = NF_DROP;
}

6395 6396 6397 6398
static void nft_chain_commit_update(struct nft_trans *trans)
{
	struct nft_base_chain *basechain;

6399 6400 6401 6402
	if (nft_trans_chain_name(trans)) {
		rhltable_remove(&trans->ctx.table->chains_ht,
				&trans->ctx.chain->rhlhead,
				nft_chain_ht_params);
6403
		swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
6404 6405 6406 6407 6408
		rhltable_insert_key(&trans->ctx.table->chains_ht,
				    trans->ctx.chain->name,
				    &trans->ctx.chain->rhlhead,
				    nft_chain_ht_params);
	}
6409

6410
	if (!nft_is_base_chain(trans->ctx.chain))
6411 6412
		return;

6413 6414
	nft_chain_stats_replace(trans);

6415 6416 6417 6418 6419 6420 6421 6422 6423 6424
	basechain = nft_base_chain(trans->ctx.chain);

	switch (nft_trans_chain_policy(trans)) {
	case NF_DROP:
	case NF_ACCEPT:
		basechain->policy = nft_trans_chain_policy(trans);
		break;
	}
}

6425
static void nft_commit_release(struct nft_trans *trans)
6426 6427 6428 6429 6430
{
	switch (trans->msg_type) {
	case NFT_MSG_DELTABLE:
		nf_tables_table_destroy(&trans->ctx);
		break;
6431
	case NFT_MSG_NEWCHAIN:
6432
		free_percpu(nft_trans_chain_stats(trans));
6433 6434
		kfree(nft_trans_chain_name(trans));
		break;
6435
	case NFT_MSG_DELCHAIN:
6436
		nf_tables_chain_destroy(&trans->ctx);
6437 6438 6439 6440 6441 6442 6443
		break;
	case NFT_MSG_DELRULE:
		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
		break;
	case NFT_MSG_DELSET:
		nft_set_destroy(nft_trans_set(trans));
		break;
6444
	case NFT_MSG_DELSETELEM:
6445 6446
		nf_tables_set_elem_destroy(&trans->ctx,
					   nft_trans_elem_set(trans),
6447
					   nft_trans_elem(trans).priv);
6448
		break;
6449
	case NFT_MSG_DELOBJ:
6450
		nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
6451
		break;
6452 6453 6454
	case NFT_MSG_DELFLOWTABLE:
		nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
		break;
6455
	}
6456 6457 6458 6459

	if (trans->put_net)
		put_net(trans->ctx.net);

6460 6461 6462
	kfree(trans);
}

6463
static void nf_tables_trans_destroy_work(struct work_struct *w)
6464 6465
{
	struct nft_trans *trans, *next;
6466 6467 6468 6469 6470
	LIST_HEAD(head);

	spin_lock(&nf_tables_destroy_list_lock);
	list_splice_init(&nf_tables_destroy_list, &head);
	spin_unlock(&nf_tables_destroy_list_lock);
6471

6472
	if (list_empty(&head))
6473 6474 6475 6476
		return;

	synchronize_rcu();

6477
	list_for_each_entry_safe(trans, next, &head, list) {
6478 6479 6480 6481 6482
		list_del(&trans->list);
		nft_commit_release(trans);
	}
}

6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
{
	struct nft_rule *rule;
	unsigned int alloc = 0;
	int i;

	/* already handled or inactive chain? */
	if (chain->rules_next || !nft_is_active_next(net, chain))
		return 0;

	rule = list_entry(&chain->rules, struct nft_rule, list);
	i = 0;

	list_for_each_entry_continue(rule, &chain->rules, list) {
		if (nft_is_active_next(net, rule))
			alloc++;
	}

	chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc);
	if (!chain->rules_next)
		return -ENOMEM;

	list_for_each_entry_continue(rule, &chain->rules, list) {
		if (nft_is_active_next(net, rule))
			chain->rules_next[i++] = rule;
	}

	chain->rules_next[i] = NULL;
	return 0;
}

static void nf_tables_commit_chain_prepare_cancel(struct net *net)
{
	struct nft_trans *trans, *next;

	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
		struct nft_chain *chain = trans->ctx.chain;

		if (trans->msg_type == NFT_MSG_NEWRULE ||
		    trans->msg_type == NFT_MSG_DELRULE) {
			kvfree(chain->rules_next);
			chain->rules_next = NULL;
		}
	}
}

static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h)
{
	struct nft_rules_old *o = container_of(h, struct nft_rules_old, h);

	kvfree(o->start);
}

static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
{
	struct nft_rule **r = rules;
	struct nft_rules_old *old;

	while (*r)
		r++;

	r++;	/* rcu_head is after end marker */
	old = (void *) r;
	old->start = rules;

	call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
}

6551
static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
6552 6553 6554 6555 6556 6557 6558
{
	struct nft_rule **g0, **g1;
	bool next_genbit;

	next_genbit = nft_gencursor_next(net);

	g0 = rcu_dereference_protected(chain->rules_gen_0,
6559
				       lockdep_commit_lock_is_held(net));
6560
	g1 = rcu_dereference_protected(chain->rules_gen_1,
6561
				       lockdep_commit_lock_is_held(net));
6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598

	/* No changes to this chain? */
	if (chain->rules_next == NULL) {
		/* chain had no change in last or next generation */
		if (g0 == g1)
			return;
		/*
		 * chain had no change in this generation; make sure next
		 * one uses same rules as current generation.
		 */
		if (next_genbit) {
			rcu_assign_pointer(chain->rules_gen_1, g0);
			nf_tables_commit_chain_free_rules_old(g1);
		} else {
			rcu_assign_pointer(chain->rules_gen_0, g1);
			nf_tables_commit_chain_free_rules_old(g0);
		}

		return;
	}

	if (next_genbit)
		rcu_assign_pointer(chain->rules_gen_1, chain->rules_next);
	else
		rcu_assign_pointer(chain->rules_gen_0, chain->rules_next);

	chain->rules_next = NULL;

	if (g0 == g1)
		return;

	if (next_genbit)
		nf_tables_commit_chain_free_rules_old(g1);
	else
		nf_tables_commit_chain_free_rules_old(g0);
}

6599 6600
static void nft_obj_del(struct nft_object *obj)
{
6601
	rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params);
6602 6603 6604
	list_del_rcu(&obj->list);
}

6605 6606 6607 6608 6609 6610 6611 6612 6613
static void nft_chain_del(struct nft_chain *chain)
{
	struct nft_table *table = chain->table;

	WARN_ON_ONCE(rhltable_remove(&table->chains_ht, &chain->rhlhead,
				     nft_chain_ht_params));
	list_del_rcu(&chain->list);
}

6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644
static void nf_tables_commit_release(struct net *net)
{
	struct nft_trans *trans;

	/* all side effects have to be made visible.
	 * For example, if a chain named 'foo' has been deleted, a
	 * new transaction must not find it anymore.
	 *
	 * Memory reclaim happens asynchronously from work queue
	 * to prevent expensive synchronize_rcu() in commit phase.
	 */
	if (list_empty(&net->nft.commit_list)) {
		mutex_unlock(&net->nft.commit_mutex);
		return;
	}

	trans = list_last_entry(&net->nft.commit_list,
				struct nft_trans, list);
	get_net(trans->ctx.net);
	WARN_ON_ONCE(trans->put_net);

	trans->put_net = true;
	spin_lock(&nf_tables_destroy_list_lock);
	list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list);
	spin_unlock(&nf_tables_destroy_list_lock);

	mutex_unlock(&net->nft.commit_mutex);

	schedule_work(&trans_destroy_work);
}

6645
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
6646 6647
{
	struct nft_trans *trans, *next;
6648
	struct nft_trans_elem *te;
6649 6650
	struct nft_chain *chain;
	struct nft_table *table;
6651
	int err;
6652

6653 6654 6655 6656 6657
	if (list_empty(&net->nft.commit_list)) {
		mutex_unlock(&net->nft.commit_mutex);
		return 0;
	}

6658 6659 6660 6661
	/* 0. Validate ruleset, otherwise roll back for error reporting. */
	if (nf_tables_validate(net) < 0)
		return -EAGAIN;

6662 6663 6664 6665
	err = nft_flow_rule_offload_commit(net);
	if (err < 0)
		return err;

6666 6667 6668
	/* 1.  Allocate space for next generation rules_gen_X[] */
	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
		int ret;
6669

6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680
		if (trans->msg_type == NFT_MSG_NEWRULE ||
		    trans->msg_type == NFT_MSG_DELRULE) {
			chain = trans->ctx.chain;

			ret = nf_tables_commit_chain_prepare(net, chain);
			if (ret < 0) {
				nf_tables_commit_chain_prepare_cancel(net);
				return ret;
			}
		}
	}
6681

6682 6683
	/* step 2.  Make rules_gen_X visible to packet path */
	list_for_each_entry(table, &net->nft.tables, list) {
6684 6685
		list_for_each_entry(chain, &table->chains, list)
			nf_tables_commit_chain(net, chain);
6686 6687 6688 6689 6690
	}

	/*
	 * Bump generation counter, invalidate any dump in progress.
	 * Cannot fail after this point.
6691
	 */
6692 6693 6694 6695
	while (++net->nft.base_seq == 0);

	/* step 3. Start new generation, rules_gen_X now in use. */
	net->nft.gencursor = nft_gencursor_next(net);
6696 6697

	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
6698
		switch (trans->msg_type) {
6699 6700 6701
		case NFT_MSG_NEWTABLE:
			if (nft_trans_table_update(trans)) {
				if (!nft_trans_table_enable(trans)) {
6702
					nf_tables_table_disable(net,
6703 6704 6705 6706
								trans->ctx.table);
					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
				}
			} else {
6707
				nft_clear(net, trans->ctx.table);
6708
			}
6709
			nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
6710 6711 6712
			nft_trans_destroy(trans);
			break;
		case NFT_MSG_DELTABLE:
6713
			list_del_rcu(&trans->ctx.table->list);
6714
			nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
6715
			break;
6716
		case NFT_MSG_NEWCHAIN:
6717
			if (nft_trans_chain_update(trans)) {
6718
				nft_chain_commit_update(trans);
6719 6720 6721
				nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
				/* trans destroyed after rcu grace period */
			} else {
6722
				nft_chain_commit_drop_policy(trans);
6723
				nft_clear(net, trans->ctx.chain);
6724 6725 6726
				nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
				nft_trans_destroy(trans);
			}
6727 6728
			break;
		case NFT_MSG_DELCHAIN:
6729
			nft_chain_del(trans->ctx.chain);
6730
			nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
6731 6732 6733
			nf_tables_unregister_hook(trans->ctx.net,
						  trans->ctx.table,
						  trans->ctx.chain);
6734
			break;
6735
		case NFT_MSG_NEWRULE:
6736
			nft_clear(trans->ctx.net, nft_trans_rule(trans));
6737
			nf_tables_rule_notify(&trans->ctx,
6738
					      nft_trans_rule(trans),
6739
					      NFT_MSG_NEWRULE);
6740
			nft_trans_destroy(trans);
6741 6742 6743
			break;
		case NFT_MSG_DELRULE:
			list_del_rcu(&nft_trans_rule(trans)->list);
6744 6745 6746
			nf_tables_rule_notify(&trans->ctx,
					      nft_trans_rule(trans),
					      NFT_MSG_DELRULE);
6747 6748 6749
			nft_rule_expr_deactivate(&trans->ctx,
						 nft_trans_rule(trans),
						 NFT_TRANS_COMMIT);
6750
			break;
6751
		case NFT_MSG_NEWSET:
6752
			nft_clear(net, nft_trans_set(trans));
6753 6754 6755
			/* This avoids hitting -EBUSY when deleting the table
			 * from the transaction.
			 */
6756
			if (nft_set_is_anonymous(nft_trans_set(trans)) &&
6757 6758 6759
			    !list_empty(&nft_trans_set(trans)->bindings))
				trans->ctx.table->use--;

6760
			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
6761
					     NFT_MSG_NEWSET, GFP_KERNEL);
6762 6763 6764
			nft_trans_destroy(trans);
			break;
		case NFT_MSG_DELSET:
6765
			list_del_rcu(&nft_trans_set(trans)->list);
6766
			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
6767
					     NFT_MSG_DELSET, GFP_KERNEL);
6768
			break;
6769
		case NFT_MSG_NEWSETELEM:
6770 6771
			te = (struct nft_trans_elem *)trans->data;

6772
			te->set->ops->activate(net, te->set, &te->elem);
6773 6774
			nf_tables_setelem_notify(&trans->ctx, te->set,
						 &te->elem,
6775 6776 6777 6778
						 NFT_MSG_NEWSETELEM, 0);
			nft_trans_destroy(trans);
			break;
		case NFT_MSG_DELSETELEM:
6779
			te = (struct nft_trans_elem *)trans->data;
6780

6781 6782
			nf_tables_setelem_notify(&trans->ctx, te->set,
						 &te->elem,
6783
						 NFT_MSG_DELSETELEM, 0);
6784
			te->set->ops->remove(net, te->set, &te->elem);
6785 6786
			atomic_dec(&te->set->nelems);
			te->set->ndeact--;
6787
			break;
6788 6789 6790 6791 6792 6793 6794
		case NFT_MSG_NEWOBJ:
			nft_clear(net, nft_trans_obj(trans));
			nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
					     NFT_MSG_NEWOBJ);
			nft_trans_destroy(trans);
			break;
		case NFT_MSG_DELOBJ:
6795
			nft_obj_del(nft_trans_obj(trans));
6796 6797 6798
			nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
					     NFT_MSG_DELOBJ);
			break;
6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813
		case NFT_MSG_NEWFLOWTABLE:
			nft_clear(net, nft_trans_flowtable(trans));
			nf_tables_flowtable_notify(&trans->ctx,
						   nft_trans_flowtable(trans),
						   NFT_MSG_NEWFLOWTABLE);
			nft_trans_destroy(trans);
			break;
		case NFT_MSG_DELFLOWTABLE:
			list_del_rcu(&nft_trans_flowtable(trans)->list);
			nf_tables_flowtable_notify(&trans->ctx,
						   nft_trans_flowtable(trans),
						   NFT_MSG_DELFLOWTABLE);
			nft_unregister_flowtable_net_hooks(net,
					nft_trans_flowtable(trans));
			break;
6814 6815 6816
		}
	}

6817
	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
6818
	nf_tables_commit_release(net);
6819 6820 6821 6822

	return 0;
}

6823
static void nf_tables_abort_release(struct nft_trans *trans)
6824 6825 6826 6827 6828 6829
{
	switch (trans->msg_type) {
	case NFT_MSG_NEWTABLE:
		nf_tables_table_destroy(&trans->ctx);
		break;
	case NFT_MSG_NEWCHAIN:
6830
		nf_tables_chain_destroy(&trans->ctx);
6831 6832 6833 6834 6835
		break;
	case NFT_MSG_NEWRULE:
		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
		break;
	case NFT_MSG_NEWSET:
6836
		nft_set_destroy(nft_trans_set(trans));
6837
		break;
6838 6839
	case NFT_MSG_NEWSETELEM:
		nft_set_elem_destroy(nft_trans_elem_set(trans),
6840
				     nft_trans_elem(trans).priv, true);
6841
		break;
6842
	case NFT_MSG_NEWOBJ:
6843
		nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
6844
		break;
6845 6846 6847
	case NFT_MSG_NEWFLOWTABLE:
		nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
		break;
6848 6849 6850 6851
	}
	kfree(trans);
}

6852
static int __nf_tables_abort(struct net *net)
6853 6854
{
	struct nft_trans *trans, *next;
6855
	struct nft_trans_elem *te;
6856

6857 6858
	list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
					 list) {
6859
		switch (trans->msg_type) {
6860 6861 6862
		case NFT_MSG_NEWTABLE:
			if (nft_trans_table_update(trans)) {
				if (nft_trans_table_enable(trans)) {
6863
					nf_tables_table_disable(net,
6864 6865 6866 6867 6868
								trans->ctx.table);
					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
				}
				nft_trans_destroy(trans);
			} else {
6869
				list_del_rcu(&trans->ctx.table->list);
6870 6871 6872
			}
			break;
		case NFT_MSG_DELTABLE:
6873
			nft_clear(trans->ctx.net, trans->ctx.table);
6874 6875
			nft_trans_destroy(trans);
			break;
6876 6877
		case NFT_MSG_NEWCHAIN:
			if (nft_trans_chain_update(trans)) {
6878
				free_percpu(nft_trans_chain_stats(trans));
6879
				kfree(nft_trans_chain_name(trans));
6880 6881
				nft_trans_destroy(trans);
			} else {
6882
				trans->ctx.table->use--;
6883
				nft_chain_del(trans->ctx.chain);
6884 6885 6886
				nf_tables_unregister_hook(trans->ctx.net,
							  trans->ctx.table,
							  trans->ctx.chain);
6887 6888 6889
			}
			break;
		case NFT_MSG_DELCHAIN:
6890
			trans->ctx.table->use++;
6891
			nft_clear(trans->ctx.net, trans->ctx.chain);
6892 6893
			nft_trans_destroy(trans);
			break;
6894
		case NFT_MSG_NEWRULE:
6895
			trans->ctx.chain->use--;
6896
			list_del_rcu(&nft_trans_rule(trans)->list);
6897 6898 6899
			nft_rule_expr_deactivate(&trans->ctx,
						 nft_trans_rule(trans),
						 NFT_TRANS_ABORT);
6900 6901
			break;
		case NFT_MSG_DELRULE:
6902
			trans->ctx.chain->use++;
6903
			nft_clear(trans->ctx.net, nft_trans_rule(trans));
6904
			nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
6905
			nft_trans_destroy(trans);
6906
			break;
6907
		case NFT_MSG_NEWSET:
6908
			trans->ctx.table->use--;
6909 6910 6911 6912 6913
			if (nft_trans_set(trans)->bound) {
				nft_trans_destroy(trans);
				break;
			}
			list_del_rcu(&nft_trans_set(trans)->list);
6914 6915
			break;
		case NFT_MSG_DELSET:
6916
			trans->ctx.table->use++;
6917
			nft_clear(trans->ctx.net, nft_trans_set(trans));
6918 6919
			nft_trans_destroy(trans);
			break;
6920
		case NFT_MSG_NEWSETELEM:
6921 6922 6923 6924
			if (nft_trans_elem_set(trans)->bound) {
				nft_trans_destroy(trans);
				break;
			}
6925
			te = (struct nft_trans_elem *)trans->data;
6926
			te->set->ops->remove(net, te->set, &te->elem);
6927
			atomic_dec(&te->set->nelems);
6928 6929
			break;
		case NFT_MSG_DELSETELEM:
6930 6931
			te = (struct nft_trans_elem *)trans->data;

6932
			nft_set_elem_activate(net, te->set, &te->elem);
6933
			te->set->ops->activate(net, te->set, &te->elem);
6934
			te->set->ndeact--;
6935

6936 6937 6938 6939
			nft_trans_destroy(trans);
			break;
		case NFT_MSG_NEWOBJ:
			trans->ctx.table->use--;
6940
			nft_obj_del(nft_trans_obj(trans));
6941 6942 6943 6944
			break;
		case NFT_MSG_DELOBJ:
			trans->ctx.table->use++;
			nft_clear(trans->ctx.net, nft_trans_obj(trans));
6945 6946
			nft_trans_destroy(trans);
			break;
6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957
		case NFT_MSG_NEWFLOWTABLE:
			trans->ctx.table->use--;
			list_del_rcu(&nft_trans_flowtable(trans)->list);
			nft_unregister_flowtable_net_hooks(net,
					nft_trans_flowtable(trans));
			break;
		case NFT_MSG_DELFLOWTABLE:
			trans->ctx.table->use++;
			nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
			nft_trans_destroy(trans);
			break;
6958 6959 6960
		}
	}

6961 6962
	synchronize_rcu();

6963 6964
	list_for_each_entry_safe_reverse(trans, next,
					 &net->nft.commit_list, list) {
6965
		list_del(&trans->list);
6966
		nf_tables_abort_release(trans);
6967 6968 6969 6970 6971
	}

	return 0;
}

6972 6973 6974 6975 6976
static void nf_tables_cleanup(struct net *net)
{
	nft_validate_state_update(net, NFT_VALIDATE_SKIP);
}

6977 6978
static int nf_tables_abort(struct net *net, struct sk_buff *skb)
{
6979 6980 6981 6982 6983
	int ret = __nf_tables_abort(net);

	mutex_unlock(&net->nft.commit_mutex);

	return ret;
6984 6985
}

6986 6987
static bool nf_tables_valid_genid(struct net *net, u32 genid)
{
6988 6989 6990 6991 6992 6993 6994 6995 6996 6997
	bool genid_ok;

	mutex_lock(&net->nft.commit_mutex);

	genid_ok = genid == 0 || net->nft.base_seq == genid;
	if (!genid_ok)
		mutex_unlock(&net->nft.commit_mutex);

	/* else, commit mutex has to be released by commit or abort function */
	return genid_ok;
6998 6999
}

P
Patrick McHardy 已提交
7000 7001 7002 7003 7004
static const struct nfnetlink_subsystem nf_tables_subsys = {
	.name		= "nf_tables",
	.subsys_id	= NFNL_SUBSYS_NFTABLES,
	.cb_count	= NFT_MSG_MAX,
	.cb		= nf_tables_cb,
7005 7006
	.commit		= nf_tables_commit,
	.abort		= nf_tables_abort,
7007
	.cleanup	= nf_tables_cleanup,
7008
	.valid_genid	= nf_tables_valid_genid,
7009
	.owner		= THIS_MODULE,
P
Patrick McHardy 已提交
7010 7011
};

7012
int nft_chain_validate_dependency(const struct nft_chain *chain,
7013
				  enum nft_chain_types type)
7014 7015 7016
{
	const struct nft_base_chain *basechain;

7017
	if (nft_is_base_chain(chain)) {
7018 7019 7020 7021 7022 7023 7024 7025
		basechain = nft_base_chain(chain);
		if (basechain->type->type != type)
			return -EOPNOTSUPP;
	}
	return 0;
}
EXPORT_SYMBOL_GPL(nft_chain_validate_dependency);

7026 7027 7028 7029 7030
int nft_chain_validate_hooks(const struct nft_chain *chain,
			     unsigned int hook_flags)
{
	struct nft_base_chain *basechain;

7031
	if (nft_is_base_chain(chain)) {
7032 7033
		basechain = nft_base_chain(chain);

7034
		if ((1 << basechain->ops.hooknum) & hook_flags)
7035 7036 7037 7038 7039 7040 7041 7042 7043
			return 0;

		return -EOPNOTSUPP;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);

7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056
/*
 * Loop detection - walk through the ruleset beginning at the destination chain
 * of a new jump until either the source chain is reached (loop) or all
 * reachable chains have been traversed.
 *
 * The loop check is performed whenever a new jump verdict is added to an
 * expression or verdict map or a verdict map is bound to a new chain.
 */

static int nf_tables_check_loops(const struct nft_ctx *ctx,
				 const struct nft_chain *chain);

static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
7057
					struct nft_set *set,
7058
					const struct nft_set_iter *iter,
7059
					struct nft_set_elem *elem)
7060
{
7061 7062 7063 7064 7065
	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
	const struct nft_data *data;

	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
7066 7067
		return 0;

7068
	data = nft_set_ext_data(ext);
7069
	switch (data->verdict.code) {
7070 7071
	case NFT_JUMP:
	case NFT_GOTO:
7072
		return nf_tables_check_loops(ctx, data->verdict.chain);
7073 7074 7075 7076 7077 7078 7079 7080 7081 7082
	default:
		return 0;
	}
}

static int nf_tables_check_loops(const struct nft_ctx *ctx,
				 const struct nft_chain *chain)
{
	const struct nft_rule *rule;
	const struct nft_expr *expr, *last;
7083
	struct nft_set *set;
7084 7085 7086 7087 7088 7089 7090 7091
	struct nft_set_binding *binding;
	struct nft_set_iter iter;

	if (ctx->chain == chain)
		return -ELOOP;

	list_for_each_entry(rule, &chain->rules, list) {
		nft_rule_for_each_expr(expr, last, rule) {
7092 7093
			struct nft_immediate_expr *priv;
			const struct nft_data *data;
7094 7095
			int err;

7096
			if (strcmp(expr->ops->type->name, "immediate"))
7097 7098
				continue;

7099 7100
			priv = nft_expr_priv(expr);
			if (priv->dreg != NFT_REG_VERDICT)
7101
				continue;
7102

7103
			data = &priv->data;
7104
			switch (data->verdict.code) {
7105 7106
			case NFT_JUMP:
			case NFT_GOTO:
7107 7108
				err = nf_tables_check_loops(ctx,
							data->verdict.chain);
7109 7110 7111 7112 7113 7114 7115 7116 7117
				if (err < 0)
					return err;
			default:
				break;
			}
		}
	}

	list_for_each_entry(set, &ctx->table->sets, list) {
7118 7119
		if (!nft_is_active_next(ctx->net, set))
			continue;
7120 7121 7122 7123 7124
		if (!(set->flags & NFT_SET_MAP) ||
		    set->dtype != NFT_DATA_VERDICT)
			continue;

		list_for_each_entry(binding, &set->bindings, list) {
7125 7126
			if (!(binding->flags & NFT_SET_MAP) ||
			    binding->chain != chain)
7127 7128
				continue;

7129
			iter.genmask	= nft_genmask_next(ctx->net);
7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143
			iter.skip 	= 0;
			iter.count	= 0;
			iter.err	= 0;
			iter.fn		= nf_tables_loop_check_setelem;

			set->ops->walk(ctx, set, &iter);
			if (iter.err < 0)
				return iter.err;
		}
	}

	return 0;
}

7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155
/**
 *	nft_parse_u32_check - fetch u32 attribute and check for maximum value
 *
 *	@attr: netlink attribute to fetch value from
 *	@max: maximum value to be stored in dest
 *	@dest: pointer to the variable
 *
 *	Parse, check and store a given u32 netlink attribute into variable.
 *	This function returns -ERANGE if the value goes over maximum value.
 *	Otherwise a 0 is returned and the attribute value is stored in the
 *	destination variable.
 */
7156
int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
7157
{
7158
	u32 val;
7159 7160 7161 7162 7163 7164 7165 7166 7167 7168

	val = ntohl(nla_get_be32(attr));
	if (val > max)
		return -ERANGE;

	*dest = val;
	return 0;
}
EXPORT_SYMBOL_GPL(nft_parse_u32_check);

7169 7170 7171 7172 7173 7174 7175 7176 7177
/**
 *	nft_parse_register - parse a register value from a netlink attribute
 *
 *	@attr: netlink attribute
 *
 *	Parse and translate a register value from a netlink attribute.
 *	Registers used to be 128 bit wide, these register numbers will be
 *	mapped to the corresponding 32 bit register numbers.
 */
7178 7179
unsigned int nft_parse_register(const struct nlattr *attr)
{
7180 7181 7182 7183 7184 7185 7186 7187 7188
	unsigned int reg;

	reg = ntohl(nla_get_be32(attr));
	switch (reg) {
	case NFT_REG_VERDICT...NFT_REG_4:
		return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
	default:
		return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
	}
7189 7190 7191
}
EXPORT_SYMBOL_GPL(nft_parse_register);

7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202
/**
 *	nft_dump_register - dump a register value to a netlink attribute
 *
 *	@skb: socket buffer
 *	@attr: attribute number
 *	@reg: register number
 *
 *	Construct a netlink attribute containing the register number. For
 *	compatibility reasons, register numbers being a multiple of 4 are
 *	translated to the corresponding 128 bit register numbers.
 */
7203 7204
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
{
7205 7206 7207 7208 7209
	if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0)
		reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE);
	else
		reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00;

7210 7211 7212 7213
	return nla_put_be32(skb, attr, htonl(reg));
}
EXPORT_SYMBOL_GPL(nft_dump_register);

P
Patrick McHardy 已提交
7214
/**
7215
 *	nft_validate_register_load - validate a load from a register
P
Patrick McHardy 已提交
7216 7217
 *
 *	@reg: the register number
7218
 *	@len: the length of the data
P
Patrick McHardy 已提交
7219 7220
 *
 * 	Validate that the input register is one of the general purpose
7221
 * 	registers and that the length of the load is within the bounds.
P
Patrick McHardy 已提交
7222
 */
7223
int nft_validate_register_load(enum nft_registers reg, unsigned int len)
P
Patrick McHardy 已提交
7224
{
7225
	if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
P
Patrick McHardy 已提交
7226
		return -EINVAL;
7227 7228
	if (len == 0)
		return -EINVAL;
7229
	if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
7230
		return -ERANGE;
7231

P
Patrick McHardy 已提交
7232 7233
	return 0;
}
7234
EXPORT_SYMBOL_GPL(nft_validate_register_load);
P
Patrick McHardy 已提交
7235 7236

/**
7237
 *	nft_validate_register_store - validate an expressions' register store
P
Patrick McHardy 已提交
7238 7239 7240 7241 7242
 *
 *	@ctx: context of the expression performing the load
 * 	@reg: the destination register number
 * 	@data: the data to load
 * 	@type: the data type
7243
 * 	@len: the length of the data
P
Patrick McHardy 已提交
7244 7245
 *
 * 	Validate that a data load uses the appropriate data type for
7246 7247
 * 	the destination register and the length is within the bounds.
 * 	A value of NULL for the data means that its runtime gathered
7248
 * 	data.
P
Patrick McHardy 已提交
7249
 */
7250 7251 7252 7253
int nft_validate_register_store(const struct nft_ctx *ctx,
				enum nft_registers reg,
				const struct nft_data *data,
				enum nft_data_types type, unsigned int len)
P
Patrick McHardy 已提交
7254
{
7255 7256
	int err;

P
Patrick McHardy 已提交
7257 7258
	switch (reg) {
	case NFT_REG_VERDICT:
7259
		if (type != NFT_DATA_VERDICT)
P
Patrick McHardy 已提交
7260
			return -EINVAL;
7261

7262
		if (data != NULL &&
7263 7264 7265
		    (data->verdict.code == NFT_GOTO ||
		     data->verdict.code == NFT_JUMP)) {
			err = nf_tables_check_loops(ctx, data->verdict.chain);
7266 7267 7268 7269
			if (err < 0)
				return err;
		}

P
Patrick McHardy 已提交
7270 7271
		return 0;
	default:
7272
		if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
7273
			return -EINVAL;
7274 7275
		if (len == 0)
			return -EINVAL;
7276 7277
		if (reg * NFT_REG32_SIZE + len >
		    FIELD_SIZEOF(struct nft_regs, data))
7278
			return -ERANGE;
7279

P
Patrick McHardy 已提交
7280 7281 7282 7283 7284
		if (data != NULL && type != NFT_DATA_VALUE)
			return -EINVAL;
		return 0;
	}
}
7285
EXPORT_SYMBOL_GPL(nft_validate_register_store);
P
Patrick McHardy 已提交
7286 7287 7288 7289 7290 7291 7292 7293 7294 7295

static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
	[NFTA_VERDICT_CODE]	= { .type = NLA_U32 },
	[NFTA_VERDICT_CHAIN]	= { .type = NLA_STRING,
				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
};

static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
			    struct nft_data_desc *desc, const struct nlattr *nla)
{
7296
	u8 genmask = nft_genmask_next(ctx->net);
P
Patrick McHardy 已提交
7297 7298 7299 7300
	struct nlattr *tb[NFTA_VERDICT_MAX + 1];
	struct nft_chain *chain;
	int err;

7301 7302
	err = nla_parse_nested_deprecated(tb, NFTA_VERDICT_MAX, nla,
					  nft_verdict_policy, NULL);
P
Patrick McHardy 已提交
7303 7304 7305 7306 7307
	if (err < 0)
		return err;

	if (!tb[NFTA_VERDICT_CODE])
		return -EINVAL;
7308
	data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
P
Patrick McHardy 已提交
7309

7310
	switch (data->verdict.code) {
7311
	default:
7312
		switch (data->verdict.code & NF_VERDICT_MASK) {
7313 7314 7315 7316 7317 7318 7319 7320
		case NF_ACCEPT:
		case NF_DROP:
		case NF_QUEUE:
			break;
		default:
			return -EINVAL;
		}
		/* fall through */
P
Patrick McHardy 已提交
7321 7322 7323 7324 7325 7326 7327 7328
	case NFT_CONTINUE:
	case NFT_BREAK:
	case NFT_RETURN:
		break;
	case NFT_JUMP:
	case NFT_GOTO:
		if (!tb[NFTA_VERDICT_CHAIN])
			return -EINVAL;
7329 7330
		chain = nft_chain_lookup(ctx->net, ctx->table,
					 tb[NFTA_VERDICT_CHAIN], genmask);
P
Patrick McHardy 已提交
7331 7332
		if (IS_ERR(chain))
			return PTR_ERR(chain);
7333
		if (nft_is_base_chain(chain))
P
Patrick McHardy 已提交
7334 7335 7336
			return -EOPNOTSUPP;

		chain->use++;
7337
		data->verdict.chain = chain;
P
Patrick McHardy 已提交
7338 7339 7340
		break;
	}

7341
	desc->len = sizeof(data->verdict);
P
Patrick McHardy 已提交
7342 7343 7344 7345 7346 7347
	desc->type = NFT_DATA_VERDICT;
	return 0;
}

static void nft_verdict_uninit(const struct nft_data *data)
{
7348
	switch (data->verdict.code) {
P
Patrick McHardy 已提交
7349 7350
	case NFT_JUMP:
	case NFT_GOTO:
7351
		data->verdict.chain->use--;
P
Patrick McHardy 已提交
7352 7353 7354 7355
		break;
	}
}

7356
int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v)
P
Patrick McHardy 已提交
7357 7358 7359
{
	struct nlattr *nest;

7360
	nest = nla_nest_start_noflag(skb, type);
P
Patrick McHardy 已提交
7361 7362 7363
	if (!nest)
		goto nla_put_failure;

7364
	if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code)))
P
Patrick McHardy 已提交
7365 7366
		goto nla_put_failure;

7367
	switch (v->code) {
P
Patrick McHardy 已提交
7368 7369
	case NFT_JUMP:
	case NFT_GOTO:
7370
		if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
7371
				   v->chain->name))
P
Patrick McHardy 已提交
7372 7373 7374 7375 7376 7377 7378 7379 7380
			goto nla_put_failure;
	}
	nla_nest_end(skb, nest);
	return 0;

nla_put_failure:
	return -1;
}

7381 7382
static int nft_value_init(const struct nft_ctx *ctx,
			  struct nft_data *data, unsigned int size,
P
Patrick McHardy 已提交
7383 7384 7385 7386 7387 7388 7389
			  struct nft_data_desc *desc, const struct nlattr *nla)
{
	unsigned int len;

	len = nla_len(nla);
	if (len == 0)
		return -EINVAL;
7390
	if (len > size)
P
Patrick McHardy 已提交
7391 7392
		return -EOVERFLOW;

7393
	nla_memcpy(data->data, nla, len);
P
Patrick McHardy 已提交
7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405
	desc->type = NFT_DATA_VALUE;
	desc->len  = len;
	return 0;
}

static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
			  unsigned int len)
{
	return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
}

static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
7406
	[NFTA_DATA_VALUE]	= { .type = NLA_BINARY },
P
Patrick McHardy 已提交
7407 7408 7409 7410 7411 7412 7413 7414
	[NFTA_DATA_VERDICT]	= { .type = NLA_NESTED },
};

/**
 *	nft_data_init - parse nf_tables data netlink attributes
 *
 *	@ctx: context of the expression using the data
 *	@data: destination struct nft_data
7415
 *	@size: maximum data length
P
Patrick McHardy 已提交
7416 7417 7418 7419 7420 7421 7422 7423 7424
 *	@desc: data description
 *	@nla: netlink attribute containing data
 *
 *	Parse the netlink data attributes and initialize a struct nft_data.
 *	The type and length of data are returned in the data description.
 *
 *	The caller can indicate that it only wants to accept data of type
 *	NFT_DATA_VALUE by passing NULL for the ctx argument.
 */
7425 7426
int nft_data_init(const struct nft_ctx *ctx,
		  struct nft_data *data, unsigned int size,
P
Patrick McHardy 已提交
7427 7428 7429 7430 7431
		  struct nft_data_desc *desc, const struct nlattr *nla)
{
	struct nlattr *tb[NFTA_DATA_MAX + 1];
	int err;

7432 7433
	err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla,
					  nft_data_policy, NULL);
P
Patrick McHardy 已提交
7434 7435 7436 7437
	if (err < 0)
		return err;

	if (tb[NFTA_DATA_VALUE])
7438 7439
		return nft_value_init(ctx, data, size, desc,
				      tb[NFTA_DATA_VALUE]);
P
Patrick McHardy 已提交
7440 7441 7442 7443 7444 7445 7446
	if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
		return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
	return -EINVAL;
}
EXPORT_SYMBOL_GPL(nft_data_init);

/**
7447
 *	nft_data_release - release a nft_data item
P
Patrick McHardy 已提交
7448 7449 7450 7451 7452 7453 7454
 *
 *	@data: struct nft_data to release
 *	@type: type of data
 *
 *	Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
 *	all others need to be released by calling this function.
 */
7455
void nft_data_release(const struct nft_data *data, enum nft_data_types type)
P
Patrick McHardy 已提交
7456
{
7457
	if (type < NFT_DATA_VERDICT)
P
Patrick McHardy 已提交
7458
		return;
7459
	switch (type) {
P
Patrick McHardy 已提交
7460 7461 7462 7463 7464 7465
	case NFT_DATA_VERDICT:
		return nft_verdict_uninit(data);
	default:
		WARN_ON(1);
	}
}
7466
EXPORT_SYMBOL_GPL(nft_data_release);
P
Patrick McHardy 已提交
7467 7468 7469 7470 7471 7472 7473

int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
		  enum nft_data_types type, unsigned int len)
{
	struct nlattr *nest;
	int err;

7474
	nest = nla_nest_start_noflag(skb, attr);
P
Patrick McHardy 已提交
7475 7476 7477 7478 7479 7480 7481 7482
	if (nest == NULL)
		return -1;

	switch (type) {
	case NFT_DATA_VALUE:
		err = nft_value_dump(skb, data, len);
		break;
	case NFT_DATA_VERDICT:
7483
		err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict);
P
Patrick McHardy 已提交
7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494
		break;
	default:
		err = -EINVAL;
		WARN_ON(1);
	}

	nla_nest_end(skb, nest);
	return err;
}
EXPORT_SYMBOL_GPL(nft_data_dump);

7495 7496 7497 7498
int __nft_release_basechain(struct nft_ctx *ctx)
{
	struct nft_rule *rule, *nr;

7499 7500
	if (WARN_ON(!nft_is_base_chain(ctx->chain)))
		return 0;
7501

7502
	nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
7503 7504 7505
	list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
		list_del(&rule->list);
		ctx->chain->use--;
7506
		nf_tables_rule_release(ctx, rule);
7507
	}
7508
	nft_chain_del(ctx->chain);
7509
	ctx->table->use--;
7510
	nf_tables_chain_destroy(ctx);
7511 7512 7513 7514 7515

	return 0;
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);

7516
static void __nft_release_tables(struct net *net)
7517
{
7518
	struct nft_flowtable *flowtable, *nf;
7519 7520
	struct nft_table *table, *nt;
	struct nft_chain *chain, *nc;
7521
	struct nft_object *obj, *ne;
7522 7523 7524 7525
	struct nft_rule *rule, *nr;
	struct nft_set *set, *ns;
	struct nft_ctx ctx = {
		.net	= net,
7526
		.family	= NFPROTO_NETDEV,
7527 7528
	};

7529
	list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
7530
		ctx.family = table->family;
7531

7532
		list_for_each_entry(chain, &table->chains, list)
7533
			nf_tables_unregister_hook(net, table, chain);
7534 7535 7536 7537 7538 7539 7540
		/* No packets are walking on these chains anymore. */
		ctx.table = table;
		list_for_each_entry(chain, &table->chains, list) {
			ctx.chain = chain;
			list_for_each_entry_safe(rule, nr, &chain->rules, list) {
				list_del(&rule->list);
				chain->use--;
7541
				nf_tables_rule_release(&ctx, rule);
7542 7543
			}
		}
7544 7545 7546 7547 7548
		list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
			list_del(&flowtable->list);
			table->use--;
			nf_tables_flowtable_destroy(flowtable);
		}
7549 7550 7551 7552 7553
		list_for_each_entry_safe(set, ns, &table->sets, list) {
			list_del(&set->list);
			table->use--;
			nft_set_destroy(set);
		}
7554
		list_for_each_entry_safe(obj, ne, &table->objects, list) {
7555
			nft_obj_del(obj);
7556
			table->use--;
7557
			nft_obj_destroy(&ctx, obj);
7558
		}
7559
		list_for_each_entry_safe(chain, nc, &table->chains, list) {
7560
			ctx.chain = chain;
7561
			nft_chain_del(chain);
7562
			table->use--;
7563
			nf_tables_chain_destroy(&ctx);
7564 7565 7566 7567 7568 7569
		}
		list_del(&table->list);
		nf_tables_table_destroy(&ctx);
	}
}

7570 7571 7572 7573
static int __net_init nf_tables_init_net(struct net *net)
{
	INIT_LIST_HEAD(&net->nft.tables);
	INIT_LIST_HEAD(&net->nft.commit_list);
7574
	mutex_init(&net->nft.commit_mutex);
7575
	net->nft.base_seq = 1;
7576 7577
	net->nft.validate_state = NFT_VALIDATE_SKIP;

7578 7579 7580 7581 7582
	return 0;
}

static void __net_exit nf_tables_exit_net(struct net *net)
{
7583
	mutex_lock(&net->nft.commit_mutex);
7584 7585
	if (!list_empty(&net->nft.commit_list))
		__nf_tables_abort(net);
7586
	__nft_release_tables(net);
7587
	mutex_unlock(&net->nft.commit_mutex);
7588 7589 7590
	WARN_ON_ONCE(!list_empty(&net->nft.tables));
}

7591 7592
static struct pernet_operations nf_tables_net_ops = {
	.init	= nf_tables_init_net,
7593
	.exit	= nf_tables_exit_net,
7594 7595
};

7596 7597 7598 7599 7600
static struct flow_indr_block_ing_entry block_ing_entry = {
	.cb = nft_indr_block_get_and_ing_cmd,
	.list = LIST_HEAD_INIT(block_ing_entry.list),
};

P
Patrick McHardy 已提交
7601 7602 7603 7604
static int __init nf_tables_module_init(void)
{
	int err;

7605
	spin_lock_init(&nf_tables_destroy_list_lock);
7606 7607 7608 7609 7610 7611 7612
	err = register_pernet_subsys(&nf_tables_net_ops);
	if (err < 0)
		return err;

	err = nft_chain_filter_init();
	if (err < 0)
		goto err1;
7613

P
Patrick McHardy 已提交
7614 7615
	err = nf_tables_core_module_init();
	if (err < 0)
7616
		goto err2;
P
Patrick McHardy 已提交
7617

7618
	err = register_netdevice_notifier(&nf_tables_flowtable_notifier);
P
Patrick McHardy 已提交
7619
	if (err < 0)
7620
		goto err3;
P
Patrick McHardy 已提交
7621

7622 7623 7624 7625
	err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params);
	if (err < 0)
		goto err4;

7626 7627 7628
	/* must be last */
	err = nfnetlink_subsys_register(&nf_tables_subsys);
	if (err < 0)
7629
		goto err5;
7630

7631
	nft_chain_route_init();
7632
	flow_indr_add_block_ing_cb(&block_ing_entry);
7633
	return err;
7634 7635
err5:
	rhltable_destroy(&nft_objname_ht);
7636 7637 7638
err4:
	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
err3:
P
Patrick McHardy 已提交
7639
	nf_tables_core_module_exit();
7640 7641 7642 7643
err2:
	nft_chain_filter_fini();
err1:
	unregister_pernet_subsys(&nf_tables_net_ops);
P
Patrick McHardy 已提交
7644 7645 7646 7647 7648
	return err;
}

static void __exit nf_tables_module_exit(void)
{
7649
	flow_indr_del_block_ing_cb(&block_ing_entry);
P
Patrick McHardy 已提交
7650
	nfnetlink_subsys_unregister(&nf_tables_subsys);
7651
	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
7652
	nft_chain_filter_fini();
7653
	nft_chain_route_fini();
7654
	unregister_pernet_subsys(&nf_tables_net_ops);
7655
	cancel_work_sync(&trans_destroy_work);
7656
	rcu_barrier();
7657
	rhltable_destroy(&nft_objname_ht);
P
Patrick McHardy 已提交
7658 7659 7660 7661 7662 7663 7664 7665 7666
	nf_tables_core_module_exit();
}

module_init(nf_tables_module_init);
module_exit(nf_tables_module_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);