x_tables.h 13.6 KB
Newer Older
1 2
#ifndef _X_TABLES_H
#define _X_TABLES_H
3

4 5

#include <linux/netdevice.h>
6
#include <linux/static_key.h>
7
#include <uapi/linux/netfilter/x_tables.h>
8

9
/**
10
 * struct xt_action_param - parameters for matches/targets
11
 *
12 13 14 15
 * @match:	the match extension
 * @target:	the target extension
 * @matchinfo:	per-match data
 * @targetinfo:	per-target data
16
 * @net		network namespace through which the action was invoked
17 18 19 20
 * @in:		input netdevice
 * @out:	output netdevice
 * @fragoff:	packet is a fragment, this is the data offset
 * @thoff:	position of transport header relative to skb->data
21
 * @hook:	hook number given packet came from
22 23
 * @family:	Actual NFPROTO_* through which the function is invoked
 * 		(helpful when match->family == NFPROTO_UNSPEC)
24 25 26
 *
 * Fields written to by extensions:
 *
27
 * @hotdrop:	drop packet if we had inspection problems
28
 */
29 30 31 32 33 34 35 36
struct xt_action_param {
	union {
		const struct xt_match *match;
		const struct xt_target *target;
	};
	union {
		const void *matchinfo, *targinfo;
	};
37
	struct net *net;
38 39 40
	const struct net_device *in, *out;
	int fragoff;
	unsigned int thoff;
41
	unsigned int hooknum;
42
	u_int8_t family;
43
	bool hotdrop;
44 45
};

46 47 48 49
/**
 * struct xt_mtchk_param - parameters for match extensions'
 * checkentry functions
 *
50
 * @net:	network namespace through which the check was invoked
51 52
 * @table:	table the rule is tried to be inserted into
 * @entryinfo:	the family-specific rule data
53
 * 		(struct ipt_ip, ip6t_ip, arpt_arp or (note) ebt_entry)
54 55 56
 * @match:	struct xt_match through which this function was invoked
 * @matchinfo:	per-match data
 * @hook_mask:	via which hooks the new rule is reachable
57
 * Other fields as above.
58 59
 */
struct xt_mtchk_param {
60
	struct net *net;
61 62 63 64 65
	const char *table;
	const void *entryinfo;
	const struct xt_match *match;
	void *matchinfo;
	unsigned int hook_mask;
66
	u_int8_t family;
67
	bool nft_compat;
68 69
};

70 71 72 73
/**
 * struct xt_mdtor_param - match destructor parameters
 * Fields as above.
 */
74
struct xt_mtdtor_param {
75
	struct net *net;
76 77
	const struct xt_match *match;
	void *matchinfo;
78
	u_int8_t family;
79 80
};

81 82 83 84 85 86 87 88 89 90
/**
 * struct xt_tgchk_param - parameters for target extensions'
 * checkentry functions
 *
 * @entryinfo:	the family-specific rule data
 * 		(struct ipt_entry, ip6t_entry, arpt_entry, ebt_entry)
 *
 * Other fields see above.
 */
struct xt_tgchk_param {
91
	struct net *net;
92
	const char *table;
93
	const void *entryinfo;
94 95 96
	const struct xt_target *target;
	void *targinfo;
	unsigned int hook_mask;
97
	u_int8_t family;
98
	bool nft_compat;
99 100
};

101 102
/* Target destructor parameters */
struct xt_tgdtor_param {
103
	struct net *net;
104 105
	const struct xt_target *target;
	void *targinfo;
106
	u_int8_t family;
107 108
};

E
Eric Dumazet 已提交
109
struct xt_match {
110 111
	struct list_head list;

112
	const char name[XT_EXTENSION_MAXNAMELEN];
113
	u_int8_t revision;
114 115 116 117 118 119

	/* Return true or false: return FALSE and set *hotdrop = 1 to
           force immediate packet drop. */
	/* Arguments changed since 2.6.9, as this must now handle
	   non-linear skb, using skb_header_pointer and
	   skb_ip_make_writable. */
120
	bool (*match)(const struct sk_buff *skb,
121
		      struct xt_action_param *);
122 123

	/* Called when user tries to insert an entry of this type. */
124
	int (*checkentry)(const struct xt_mtchk_param *);
125 126

	/* Called when entry of this type deleted. */
127
	void (*destroy)(const struct xt_mtdtor_param *);
128
#ifdef CONFIG_COMPAT
129
	/* Called when userspace align differs from kernel space one */
130 131
	void (*compat_from_user)(void *dst, const void *src);
	int (*compat_to_user)(void __user *dst, const void *src);
132
#endif
133 134
	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
	struct module *me;
135

136
	const char *table;
137
	unsigned int matchsize;
138
#ifdef CONFIG_COMPAT
139
	unsigned int compatsize;
140
#endif
141 142
	unsigned int hooks;
	unsigned short proto;
143 144

	unsigned short family;
145 146 147
};

/* Registration hooks for targets. */
E
Eric Dumazet 已提交
148
struct xt_target {
149 150
	struct list_head list;

151
	const char name[XT_EXTENSION_MAXNAMELEN];
152
	u_int8_t revision;
153 154 155 156

	/* Returns verdict. Argument order changed since 2.6.9, as this
	   must now handle non-linear skbs, using skb_copy_bits and
	   skb_ip_make_writable. */
157
	unsigned int (*target)(struct sk_buff *skb,
158
			       const struct xt_action_param *);
159 160 161 162

	/* Called when user tries to insert an entry of this type:
           hook_mask is a bitmask of hooks from which it can be
           called. */
163
	/* Should return 0 on success or an error code otherwise (-Exxxx). */
164
	int (*checkentry)(const struct xt_tgchk_param *);
165 166

	/* Called when entry of this type deleted. */
167
	void (*destroy)(const struct xt_tgdtor_param *);
168
#ifdef CONFIG_COMPAT
169
	/* Called when userspace align differs from kernel space one */
170 171
	void (*compat_from_user)(void *dst, const void *src);
	int (*compat_to_user)(void __user *dst, const void *src);
172
#endif
173 174
	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
	struct module *me;
175

176
	const char *table;
177
	unsigned int targetsize;
178
#ifdef CONFIG_COMPAT
179
	unsigned int compatsize;
180
#endif
181 182
	unsigned int hooks;
	unsigned short proto;
183 184

	unsigned short family;
185 186 187
};

/* Furniture shopping... */
E
Eric Dumazet 已提交
188
struct xt_table {
189 190 191 192 193 194
	struct list_head list;

	/* What hooks you will enter on */
	unsigned int valid_hooks;

	/* Man behind the curtain... */
195
	struct xt_table_info *private;
196 197 198 199

	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
	struct module *me;

200
	u_int8_t af;		/* address/protocol family */
201
	int priority;		/* hook order */
202

203 204 205
	/* called when table is needed in the given netns */
	int (*table_init)(struct net *net);

206 207
	/* A unique name... */
	const char name[XT_TABLE_MAXNAMELEN];
208 209 210 211 212
};

#include <linux/netfilter_ipv4.h>

/* The table itself */
E
Eric Dumazet 已提交
213
struct xt_table_info {
214 215 216 217 218 219 220 221
	/* Size per table */
	unsigned int size;
	/* Number of entries: FIXME. --RR */
	unsigned int number;
	/* Initial number of entries. Needed for module usage count */
	unsigned int initial_entries;

	/* Entry points and underflows */
222 223
	unsigned int hook_entry[NF_INET_NUMHOOKS];
	unsigned int underflow[NF_INET_NUMHOOKS];
224

225 226 227 228 229 230
	/*
	 * Number of user chains. Since tables cannot have loops, at most
	 * @stacksize jumps (number of user chains) can possibly be made.
	 */
	unsigned int stacksize;
	void ***jumpstack;
231

232
	unsigned char entries[0] __aligned(8);
233 234
};

235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
int xt_register_target(struct xt_target *target);
void xt_unregister_target(struct xt_target *target);
int xt_register_targets(struct xt_target *target, unsigned int n);
void xt_unregister_targets(struct xt_target *target, unsigned int n);

int xt_register_match(struct xt_match *target);
void xt_unregister_match(struct xt_match *target);
int xt_register_matches(struct xt_match *match, unsigned int n);
void xt_unregister_matches(struct xt_match *match, unsigned int n);

int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
		   bool inv_proto);
int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
		    bool inv_proto);

struct xt_table *xt_register_table(struct net *net,
				   const struct xt_table *table,
				   struct xt_table_info *bootstrap,
				   struct xt_table_info *newinfo);
void *xt_unregister_table(struct xt_table *table);

struct xt_table_info *xt_replace_table(struct xt_table *table,
				       unsigned int num_counters,
				       struct xt_table_info *newinfo,
				       int *error);

struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision);
struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision);
int xt_find_revision(u8 af, const char *name, u8 revision, int target,
		     int *err);

struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
				    const char *name);
void xt_table_unlock(struct xt_table *t);

int xt_proto_init(struct net *net, u_int8_t af);
void xt_proto_fini(struct net *net, u_int8_t af);

struct xt_table_info *xt_alloc_table_info(unsigned int size);
void xt_free_table_info(struct xt_table_info *info);
277

278 279 280 281 282 283 284
/**
 * xt_recseq - recursive seqcount for netfilter use
 * 
 * Packet processing changes the seqcount only if no recursion happened
 * get_counters() can use read_seqcount_begin()/read_seqcount_retry(),
 * because we use the normal seqcount convention :
 * Low order bit set to 1 if a writer is active.
285
 */
286
DECLARE_PER_CPU(seqcount_t, xt_recseq);
287

288 289 290 291 292 293
/* xt_tee_enabled - true if x_tables needs to handle reentrancy
 *
 * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
 */
extern struct static_key xt_tee_enabled;

294 295
/**
 * xt_write_recseq_begin - start of a write section
296
 *
297 298
 * Begin packet processing : all readers must wait the end
 * 1) Must be called with preemption disabled
299
 * 2) softirqs must be disabled too (or we should use this_cpu_add())
300 301 302
 * Returns :
 *  1 if no recursion on this cpu
 *  0 if recursion detected
303
 */
304
static inline unsigned int xt_write_recseq_begin(void)
305
{
306
	unsigned int addend;
307

308 309 310 311 312
	/*
	 * Low order bit of sequence is set if we already
	 * called xt_write_recseq_begin().
	 */
	addend = (__this_cpu_read(xt_recseq.sequence) + 1) & 1;
313

314 315 316 317 318 319 320
	/*
	 * This is kind of a write_seqcount_begin(), but addend is 0 or 1
	 * We dont check addend value to avoid a test and conditional jump,
	 * since addend is most likely 1
	 */
	__this_cpu_add(xt_recseq.sequence, addend);
	smp_wmb();
321

322
	return addend;
323 324
}

325 326 327 328 329 330
/**
 * xt_write_recseq_end - end of a write section
 * @addend: return value from previous xt_write_recseq_begin()
 *
 * End packet processing : all readers can proceed
 * 1) Must be called with preemption disabled
331
 * 2) softirqs must be disabled too (or we should use this_cpu_add())
332
 */
333
static inline void xt_write_recseq_end(unsigned int addend)
334
{
335 336 337
	/* this is kind of a write_seqcount_end(), but addend is 0 or 1 */
	smp_wmb();
	__this_cpu_add(xt_recseq.sequence, addend);
338
}
339

340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
/*
 * This helper is performance critical and must be inlined
 */
static inline unsigned long ifname_compare_aligned(const char *_a,
						   const char *_b,
						   const char *_mask)
{
	const unsigned long *a = (const unsigned long *)_a;
	const unsigned long *b = (const unsigned long *)_b;
	const unsigned long *mask = (const unsigned long *)_mask;
	unsigned long ret;

	ret = (a[0] ^ b[0]) & mask[0];
	if (IFNAMSIZ > sizeof(unsigned long))
		ret |= (a[1] ^ b[1]) & mask[1];
	if (IFNAMSIZ > 2 * sizeof(unsigned long))
		ret |= (a[2] ^ b[2]) & mask[2];
	if (IFNAMSIZ > 3 * sizeof(unsigned long))
		ret |= (a[3] ^ b[3]) & mask[3];
	BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
	return ret;
}

363 364 365 366 367 368

/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
 * real (percpu) counter.  On !SMP, its just the packet count,
 * so nothing needs to be done there.
 *
 * xt_percpu_counter_alloc returns the address of the percpu
369 370
 * counter, or 0 on !SMP. We force an alignment of 16 bytes
 * so that bytes/packets share a common cache line.
371 372 373 374 375 376 377 378
 *
 * Hence caller must use IS_ERR_VALUE to check for error, this
 * allows us to return 0 for single core systems without forcing
 * callers to deal with SMP vs. NONSMP issues.
 */
static inline u64 xt_percpu_counter_alloc(void)
{
	if (nr_cpu_ids > 1) {
379 380
		void __percpu *res = __alloc_percpu(sizeof(struct xt_counters),
						    sizeof(struct xt_counters));
381 382 383 384

		if (res == NULL)
			return (u64) -ENOMEM;

385
		return (u64) (__force unsigned long) res;
386 387 388 389 390 391 392
	}

	return 0;
}
static inline void xt_percpu_counter_free(u64 pcnt)
{
	if (nr_cpu_ids > 1)
393
		free_percpu((void __percpu *) (unsigned long) pcnt);
394 395 396 397 398 399
}

static inline struct xt_counters *
xt_get_this_cpu_counter(struct xt_counters *cnt)
{
	if (nr_cpu_ids > 1)
400
		return this_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt);
401 402 403 404 405 406 407 408

	return cnt;
}

static inline struct xt_counters *
xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
{
	if (nr_cpu_ids > 1)
409
		return per_cpu_ptr((void __percpu *) (unsigned long) cnt->pcnt, cpu);
410 411 412 413

	return cnt;
}

414
struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
415

416 417 418
#ifdef CONFIG_COMPAT
#include <net/compat.h>

E
Eric Dumazet 已提交
419
struct compat_xt_entry_match {
420 421 422 423 424 425
	union {
		struct {
			u_int16_t match_size;
			char name[XT_FUNCTION_MAXNAMELEN - 1];
			u_int8_t revision;
		} user;
426 427 428 429
		struct {
			u_int16_t match_size;
			compat_uptr_t match;
		} kernel;
430 431 432 433 434
		u_int16_t match_size;
	} u;
	unsigned char data[0];
};

E
Eric Dumazet 已提交
435
struct compat_xt_entry_target {
436 437 438 439 440 441
	union {
		struct {
			u_int16_t target_size;
			char name[XT_FUNCTION_MAXNAMELEN - 1];
			u_int8_t revision;
		} user;
442 443 444 445
		struct {
			u_int16_t target_size;
			compat_uptr_t target;
		} kernel;
446 447 448 449 450 451 452 453 454
		u_int16_t target_size;
	} u;
	unsigned char data[0];
};

/* FIXME: this works only on 32 bit tasks
 * need to change whole approach in order to calculate align as function of
 * current task alignment */

E
Eric Dumazet 已提交
455
struct compat_xt_counters {
456
	compat_u64 pcnt, bcnt;			/* Packet and byte counters */
457 458
};

E
Eric Dumazet 已提交
459
struct compat_xt_counters_info {
460 461 462 463 464
	char name[XT_TABLE_MAXNAMELEN];
	compat_uint_t num_counters;
	struct compat_xt_counters counters[0];
};

465 466 467 468 469 470 471
struct _compat_xt_align {
	__u8 u8;
	__u16 u16;
	__u32 u32;
	compat_u64 u64;
};

472
#define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align))
473

474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
void xt_compat_lock(u_int8_t af);
void xt_compat_unlock(u_int8_t af);

int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
void xt_compat_flush_offsets(u_int8_t af);
void xt_compat_init_offsets(u_int8_t af, unsigned int number);
int xt_compat_calc_jump(u_int8_t af, unsigned int offset);

int xt_compat_match_offset(const struct xt_match *match);
int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
			      unsigned int *size);
int xt_compat_match_to_user(const struct xt_entry_match *m,
			    void __user **dstptr, unsigned int *size);

int xt_compat_target_offset(const struct xt_target *target);
void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
				unsigned int *size);
int xt_compat_target_to_user(const struct xt_entry_target *t,
			     void __user **dstptr, unsigned int *size);
493 494

#endif /* CONFIG_COMPAT */
495
#endif /* _X_TABLES_H */