filter.c 17.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Linux Socket Filter - Kernel level socket filtering
 *
 * Author:
 *     Jay Schulist <jschlst@samba.org>
 *
 * Based on the design of:
 *     - The Berkeley Packet Filter
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * Andi Kleen - Fix a few bad bugs and races.
16
 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26 27
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
28
#include <linux/gfp.h>
L
Linus Torvalds 已提交
29 30
#include <net/ip.h>
#include <net/protocol.h>
31
#include <net/netlink.h>
L
Linus Torvalds 已提交
32 33 34 35 36
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
37
#include <asm/unaligned.h>
L
Linus Torvalds 已提交
38
#include <linux/filter.h>
E
Eric Dumazet 已提交
39
#include <linux/reciprocal_div.h>
40
#include <linux/ratelimit.h>
41
#include <linux/seccomp.h>
L
Linus Torvalds 已提交
42

43 44 45 46 47
/* No hurry in this branch
 *
 * Exported for the bpf jit load helper.
 */
void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
L
Linus Torvalds 已提交
48 49 50 51
{
	u8 *ptr = NULL;

	if (k >= SKF_NET_OFF)
52
		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
L
Linus Torvalds 已提交
53
	else if (k >= SKF_LL_OFF)
54
		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
L
Linus Torvalds 已提交
55

56
	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
L
Linus Torvalds 已提交
57 58 59 60
		return ptr;
	return NULL;
}

E
Eric Dumazet 已提交
61
static inline void *load_pointer(const struct sk_buff *skb, int k,
62
				 unsigned int size, void *buffer)
63 64 65
{
	if (k >= 0)
		return skb_header_pointer(skb, k, size, buffer);
66
	return bpf_internal_load_pointer_neg_helper(skb, k, size);
67 68
}

S
Stephen Hemminger 已提交
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
/**
 *	sk_filter - run a packet through a socket filter
 *	@sk: sock associated with &sk_buff
 *	@skb: buffer to filter
 *
 * Run the filter code and then cut skb->data to correct size returned by
 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data. This is the socket level
 * wrapper to sk_run_filter. It returns 0 if the packet should
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
int sk_filter(struct sock *sk, struct sk_buff *skb)
{
	int err;
	struct sk_filter *filter;

	err = security_sock_rcv_skb(sk, skb);
	if (err)
		return err;

90 91
	rcu_read_lock();
	filter = rcu_dereference(sk->sk_filter);
S
Stephen Hemminger 已提交
92
	if (filter) {
93
		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
94

S
Stephen Hemminger 已提交
95 96
		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
	}
97
	rcu_read_unlock();
S
Stephen Hemminger 已提交
98 99 100 101 102

	return err;
}
EXPORT_SYMBOL(sk_filter);

L
Linus Torvalds 已提交
103
/**
104
 *	sk_run_filter - run a filter on a socket
L
Linus Torvalds 已提交
105
 *	@skb: buffer to run the filter on
106
 *	@fentry: filter to apply
L
Linus Torvalds 已提交
107 108
 *
 * Decode and apply filter instructions to the skb->data.
E
Eric Dumazet 已提交
109 110 111 112 113
 * Return length to keep, 0 for none. @skb is the data we are
 * filtering, @filter is the array of filter instructions.
 * Because all jumps are guaranteed to be before last instruction,
 * and last instruction guaranteed to be a RET, we dont need to check
 * flen. (We used to pass to this function the length of filter)
L
Linus Torvalds 已提交
114
 */
E
Eric Dumazet 已提交
115 116
unsigned int sk_run_filter(const struct sk_buff *skb,
			   const struct sock_filter *fentry)
L
Linus Torvalds 已提交
117
{
118
	void *ptr;
119 120
	u32 A = 0;			/* Accumulator */
	u32 X = 0;			/* Index Register */
L
Linus Torvalds 已提交
121
	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
122
	u32 tmp;
L
Linus Torvalds 已提交
123 124 125 126 127
	int k;

	/*
	 * Process array of filter instructions.
	 */
E
Eric Dumazet 已提交
128 129 130 131 132 133
	for (;; fentry++) {
#if defined(CONFIG_X86_32)
#define	K (fentry->k)
#else
		const u32 K = fentry->k;
#endif
134

L
Linus Torvalds 已提交
135
		switch (fentry->code) {
136
		case BPF_S_ALU_ADD_X:
L
Linus Torvalds 已提交
137 138
			A += X;
			continue;
139
		case BPF_S_ALU_ADD_K:
E
Eric Dumazet 已提交
140
			A += K;
L
Linus Torvalds 已提交
141
			continue;
142
		case BPF_S_ALU_SUB_X:
L
Linus Torvalds 已提交
143 144
			A -= X;
			continue;
145
		case BPF_S_ALU_SUB_K:
E
Eric Dumazet 已提交
146
			A -= K;
L
Linus Torvalds 已提交
147
			continue;
148
		case BPF_S_ALU_MUL_X:
L
Linus Torvalds 已提交
149 150
			A *= X;
			continue;
151
		case BPF_S_ALU_MUL_K:
E
Eric Dumazet 已提交
152
			A *= K;
L
Linus Torvalds 已提交
153
			continue;
154
		case BPF_S_ALU_DIV_X:
L
Linus Torvalds 已提交
155 156 157 158
			if (X == 0)
				return 0;
			A /= X;
			continue;
159
		case BPF_S_ALU_DIV_K:
E
Eric Dumazet 已提交
160
			A = reciprocal_divide(A, K);
L
Linus Torvalds 已提交
161
			continue;
162
		case BPF_S_ALU_AND_X:
L
Linus Torvalds 已提交
163 164
			A &= X;
			continue;
165
		case BPF_S_ALU_AND_K:
E
Eric Dumazet 已提交
166
			A &= K;
L
Linus Torvalds 已提交
167
			continue;
168
		case BPF_S_ALU_OR_X:
L
Linus Torvalds 已提交
169 170
			A |= X;
			continue;
171
		case BPF_S_ALU_OR_K:
E
Eric Dumazet 已提交
172
			A |= K;
L
Linus Torvalds 已提交
173
			continue;
174
		case BPF_S_ALU_LSH_X:
L
Linus Torvalds 已提交
175 176
			A <<= X;
			continue;
177
		case BPF_S_ALU_LSH_K:
E
Eric Dumazet 已提交
178
			A <<= K;
L
Linus Torvalds 已提交
179
			continue;
180
		case BPF_S_ALU_RSH_X:
L
Linus Torvalds 已提交
181 182
			A >>= X;
			continue;
183
		case BPF_S_ALU_RSH_K:
E
Eric Dumazet 已提交
184
			A >>= K;
L
Linus Torvalds 已提交
185
			continue;
186
		case BPF_S_ALU_NEG:
L
Linus Torvalds 已提交
187 188
			A = -A;
			continue;
189
		case BPF_S_JMP_JA:
E
Eric Dumazet 已提交
190
			fentry += K;
L
Linus Torvalds 已提交
191
			continue;
192
		case BPF_S_JMP_JGT_K:
E
Eric Dumazet 已提交
193
			fentry += (A > K) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
194
			continue;
195
		case BPF_S_JMP_JGE_K:
E
Eric Dumazet 已提交
196
			fentry += (A >= K) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
197
			continue;
198
		case BPF_S_JMP_JEQ_K:
E
Eric Dumazet 已提交
199
			fentry += (A == K) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
200
			continue;
201
		case BPF_S_JMP_JSET_K:
E
Eric Dumazet 已提交
202
			fentry += (A & K) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
203
			continue;
204
		case BPF_S_JMP_JGT_X:
E
Eric Dumazet 已提交
205
			fentry += (A > X) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
206
			continue;
207
		case BPF_S_JMP_JGE_X:
E
Eric Dumazet 已提交
208
			fentry += (A >= X) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
209
			continue;
210
		case BPF_S_JMP_JEQ_X:
E
Eric Dumazet 已提交
211
			fentry += (A == X) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
212
			continue;
213
		case BPF_S_JMP_JSET_X:
E
Eric Dumazet 已提交
214
			fentry += (A & X) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
215
			continue;
216
		case BPF_S_LD_W_ABS:
E
Eric Dumazet 已提交
217
			k = K;
218
load_w:
219 220
			ptr = load_pointer(skb, k, 4, &tmp);
			if (ptr != NULL) {
221
				A = get_unaligned_be32(ptr);
222
				continue;
L
Linus Torvalds 已提交
223
			}
224
			return 0;
225
		case BPF_S_LD_H_ABS:
E
Eric Dumazet 已提交
226
			k = K;
227
load_h:
228 229
			ptr = load_pointer(skb, k, 2, &tmp);
			if (ptr != NULL) {
230
				A = get_unaligned_be16(ptr);
231
				continue;
L
Linus Torvalds 已提交
232
			}
233
			return 0;
234
		case BPF_S_LD_B_ABS:
E
Eric Dumazet 已提交
235
			k = K;
L
Linus Torvalds 已提交
236
load_b:
237 238 239 240
			ptr = load_pointer(skb, k, 1, &tmp);
			if (ptr != NULL) {
				A = *(u8 *)ptr;
				continue;
L
Linus Torvalds 已提交
241
			}
242
			return 0;
243
		case BPF_S_LD_W_LEN:
244
			A = skb->len;
L
Linus Torvalds 已提交
245
			continue;
246
		case BPF_S_LDX_W_LEN:
247
			X = skb->len;
L
Linus Torvalds 已提交
248
			continue;
249
		case BPF_S_LD_W_IND:
E
Eric Dumazet 已提交
250
			k = X + K;
L
Linus Torvalds 已提交
251
			goto load_w;
252
		case BPF_S_LD_H_IND:
E
Eric Dumazet 已提交
253
			k = X + K;
L
Linus Torvalds 已提交
254
			goto load_h;
255
		case BPF_S_LD_B_IND:
E
Eric Dumazet 已提交
256
			k = X + K;
L
Linus Torvalds 已提交
257
			goto load_b;
258
		case BPF_S_LDX_B_MSH:
E
Eric Dumazet 已提交
259
			ptr = load_pointer(skb, K, 1, &tmp);
260 261 262 263 264
			if (ptr != NULL) {
				X = (*(u8 *)ptr & 0xf) << 2;
				continue;
			}
			return 0;
265
		case BPF_S_LD_IMM:
E
Eric Dumazet 已提交
266
			A = K;
L
Linus Torvalds 已提交
267
			continue;
268
		case BPF_S_LDX_IMM:
E
Eric Dumazet 已提交
269
			X = K;
L
Linus Torvalds 已提交
270
			continue;
271
		case BPF_S_LD_MEM:
272
			A = mem[K];
L
Linus Torvalds 已提交
273
			continue;
274
		case BPF_S_LDX_MEM:
275
			X = mem[K];
L
Linus Torvalds 已提交
276
			continue;
277
		case BPF_S_MISC_TAX:
L
Linus Torvalds 已提交
278 279
			X = A;
			continue;
280
		case BPF_S_MISC_TXA:
L
Linus Torvalds 已提交
281 282
			A = X;
			continue;
283
		case BPF_S_RET_K:
E
Eric Dumazet 已提交
284
			return K;
285
		case BPF_S_RET_A:
286
			return A;
287
		case BPF_S_ST:
E
Eric Dumazet 已提交
288
			mem[K] = A;
L
Linus Torvalds 已提交
289
			continue;
290
		case BPF_S_STX:
E
Eric Dumazet 已提交
291
			mem[K] = X;
L
Linus Torvalds 已提交
292
			continue;
293
		case BPF_S_ANC_PROTOCOL:
A
Al Viro 已提交
294
			A = ntohs(skb->protocol);
L
Linus Torvalds 已提交
295
			continue;
296
		case BPF_S_ANC_PKTTYPE:
L
Linus Torvalds 已提交
297 298
			A = skb->pkt_type;
			continue;
299
		case BPF_S_ANC_IFINDEX:
300 301
			if (!skb->dev)
				return 0;
L
Linus Torvalds 已提交
302 303
			A = skb->dev->ifindex;
			continue;
304
		case BPF_S_ANC_MARK:
J
jamal 已提交
305 306
			A = skb->mark;
			continue;
307
		case BPF_S_ANC_QUEUE:
308 309
			A = skb->queue_mapping;
			continue;
310
		case BPF_S_ANC_HATYPE:
311 312 313 314
			if (!skb->dev)
				return 0;
			A = skb->dev->type;
			continue;
315
		case BPF_S_ANC_RXHASH:
316 317
			A = skb->rxhash;
			continue;
318
		case BPF_S_ANC_CPU:
319 320
			A = raw_smp_processor_id();
			continue;
J
Jiri Pirko 已提交
321 322 323
		case BPF_S_ANC_ALU_XOR_X:
			A ^= X;
			continue;
324
		case BPF_S_ANC_NLATTR: {
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
			struct nlattr *nla;

			if (skb_is_nonlinear(skb))
				return 0;
			if (A > skb->len - sizeof(struct nlattr))
				return 0;

			nla = nla_find((struct nlattr *)&skb->data[A],
				       skb->len - A, X);
			if (nla)
				A = (void *)nla - (void *)skb->data;
			else
				A = 0;
			continue;
		}
340
		case BPF_S_ANC_NLATTR_NEST: {
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
			struct nlattr *nla;

			if (skb_is_nonlinear(skb))
				return 0;
			if (A > skb->len - sizeof(struct nlattr))
				return 0;

			nla = (struct nlattr *)&skb->data[A];
			if (nla->nla_len > A - skb->len)
				return 0;

			nla = nla_find_nested(nla, X);
			if (nla)
				A = (void *)nla - (void *)skb->data;
			else
				A = 0;
			continue;
		}
359 360 361 362 363
#ifdef CONFIG_SECCOMP_FILTER
		case BPF_S_ANC_SECCOMP_LD_W:
			A = seccomp_bpf_load(fentry->k);
			continue;
#endif
L
Linus Torvalds 已提交
364
		default:
J
Joe Perches 已提交
365 366 367
			WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n",
				       fentry->code, fentry->jt,
				       fentry->jf, fentry->k);
L
Linus Torvalds 已提交
368 369 370 371 372 373
			return 0;
		}
	}

	return 0;
}
374
EXPORT_SYMBOL(sk_run_filter);
L
Linus Torvalds 已提交
375

376 377 378 379 380 381 382
/*
 * Security :
 * A BPF program is able to use 16 cells of memory to store intermediate
 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
 * As we dont want to clear mem[] array for each packet going through
 * sk_run_filter(), we check that filter loaded by user never try to read
 * a cell if not previously written, and we check all branches to be sure
L
Lucas De Marchi 已提交
383
 * a malicious user doesn't try to abuse us.
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
 */
static int check_load_and_stores(struct sock_filter *filter, int flen)
{
	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
	int pc, ret = 0;

	BUILD_BUG_ON(BPF_MEMWORDS > 16);
	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
	if (!masks)
		return -ENOMEM;
	memset(masks, 0xff, flen * sizeof(*masks));

	for (pc = 0; pc < flen; pc++) {
		memvalid &= masks[pc];

		switch (filter[pc].code) {
		case BPF_S_ST:
		case BPF_S_STX:
			memvalid |= (1 << filter[pc].k);
			break;
		case BPF_S_LD_MEM:
		case BPF_S_LDX_MEM:
			if (!(memvalid & (1 << filter[pc].k))) {
				ret = -EINVAL;
				goto error;
			}
			break;
		case BPF_S_JMP_JA:
			/* a jump must set masks on target */
			masks[pc + 1 + filter[pc].k] &= memvalid;
			memvalid = ~0;
			break;
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
			/* a jump must set masks on targets */
			masks[pc + 1 + filter[pc].jt] &= memvalid;
			masks[pc + 1 + filter[pc].jf] &= memvalid;
			memvalid = ~0;
			break;
		}
	}
error:
	kfree(masks);
	return ret;
}

L
Linus Torvalds 已提交
436 437 438 439 440 441 442
/**
 *	sk_chk_filter - verify socket filter code
 *	@filter: filter to verify
 *	@flen: length of filter
 *
 * Check the user's filter code. If we let some ugly
 * filter code slip through kaboom! The filter must contain
443 444
 * no references or jumps that are out of range, no illegal
 * instructions, and must end with a RET instruction.
L
Linus Torvalds 已提交
445
 *
446 447 448
 * All jumps are forward as they are not signed.
 *
 * Returns 0 if the rule set is legal or -EINVAL if not.
L
Linus Torvalds 已提交
449
 */
450
int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
L
Linus Torvalds 已提交
451
{
452 453 454 455 456
	/*
	 * Valid instructions are initialized to non-0.
	 * Invalid instructions are initialized to 0.
	 */
	static const u8 codes[] = {
E
Eric Dumazet 已提交
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
		[BPF_RET|BPF_K]          = BPF_S_RET_K,
		[BPF_RET|BPF_A]          = BPF_S_RET_A,
		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
		[BPF_ST]                 = BPF_S_ST,
		[BPF_STX]                = BPF_S_STX,
		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
502
	};
L
Linus Torvalds 已提交
503 504
	int pc;

505
	if (flen == 0 || flen > BPF_MAXINSNS)
L
Linus Torvalds 已提交
506 507 508 509
		return -EINVAL;

	/* check the filter code now */
	for (pc = 0; pc < flen; pc++) {
510 511
		struct sock_filter *ftest = &filter[pc];
		u16 code = ftest->code;
512

513 514 515
		if (code >= ARRAY_SIZE(codes))
			return -EINVAL;
		code = codes[code];
E
Eric Dumazet 已提交
516
		if (!code)
517
			return -EINVAL;
518
		/* Some instructions need special checks */
519 520
		switch (code) {
		case BPF_S_ALU_DIV_K:
521 522
			/* check for division by zero */
			if (ftest->k == 0)
L
Linus Torvalds 已提交
523
				return -EINVAL;
E
Eric Dumazet 已提交
524
			ftest->k = reciprocal_value(ftest->k);
525
			break;
526 527 528 529 530
		case BPF_S_LD_MEM:
		case BPF_S_LDX_MEM:
		case BPF_S_ST:
		case BPF_S_STX:
			/* check for invalid memory addresses */
531 532 533
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			break;
534
		case BPF_S_JMP_JA:
535 536 537 538 539
			/*
			 * Note, the large ftest->k might cause loops.
			 * Compare this with conditional jumps below,
			 * where offsets are limited. --ANK (981016)
			 */
540
			if (ftest->k >= (unsigned int)(flen-pc-1))
541
				return -EINVAL;
542 543 544 545 546 547 548 549 550
			break;
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
551
			/* for conditionals both must be safe */
552
			if (pc + ftest->jt + 1 >= flen ||
553 554
			    pc + ftest->jf + 1 >= flen)
				return -EINVAL;
555
			break;
556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
		case BPF_S_LD_W_ABS:
		case BPF_S_LD_H_ABS:
		case BPF_S_LD_B_ABS:
#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
				code = BPF_S_ANC_##CODE;	\
				break
			switch (ftest->k) {
			ANCILLARY(PROTOCOL);
			ANCILLARY(PKTTYPE);
			ANCILLARY(IFINDEX);
			ANCILLARY(NLATTR);
			ANCILLARY(NLATTR_NEST);
			ANCILLARY(MARK);
			ANCILLARY(QUEUE);
			ANCILLARY(HATYPE);
			ANCILLARY(RXHASH);
			ANCILLARY(CPU);
J
Jiri Pirko 已提交
573
			ANCILLARY(ALU_XOR_X);
574
			}
575
		}
576
		ftest->code = code;
577
	}
578

579 580 581 582
	/* last instruction must be a RET code */
	switch (filter[flen - 1].code) {
	case BPF_S_RET_K:
	case BPF_S_RET_A:
583
		return check_load_and_stores(filter, flen);
584 585
	}
	return -EINVAL;
L
Linus Torvalds 已提交
586
}
587
EXPORT_SYMBOL(sk_chk_filter);
L
Linus Torvalds 已提交
588

589
/**
E
Eric Dumazet 已提交
590
 * 	sk_filter_release_rcu - Release a socket filter by rcu_head
591 592
 *	@rcu: rcu_head that contains the sk_filter to free
 */
E
Eric Dumazet 已提交
593
void sk_filter_release_rcu(struct rcu_head *rcu)
594 595 596
{
	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

597
	bpf_jit_free(fp);
E
Eric Dumazet 已提交
598
	kfree(fp);
599
}
E
Eric Dumazet 已提交
600
EXPORT_SYMBOL(sk_filter_release_rcu);
601

602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
static int __sk_prepare_filter(struct sk_filter *fp)
{
	int err;

	fp->bpf_func = sk_run_filter;

	err = sk_chk_filter(fp->insns, fp->len);
	if (err)
		return err;

	bpf_jit_compile(fp);
	return 0;
}

/**
 *	sk_unattached_filter_create - create an unattached filter
 *	@fprog: the filter program
R
Randy Dunlap 已提交
619
 *	@pfp: the unattached filter that is created
620
 *
R
Randy Dunlap 已提交
621
 * Create a filter independent of any socket. We first run some
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
 * sanity checks on it to make sure it does not explode on us later.
 * If an error occurs or there is insufficient memory for the filter
 * a negative errno code is returned. On success the return is zero.
 */
int sk_unattached_filter_create(struct sk_filter **pfp,
				struct sock_fprog *fprog)
{
	struct sk_filter *fp;
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
	int err;

	/* Make sure new filter is there and in the right amounts. */
	if (fprog->filter == NULL)
		return -EINVAL;

	fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL);
	if (!fp)
		return -ENOMEM;
	memcpy(fp->insns, fprog->filter, fsize);

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len;

	err = __sk_prepare_filter(fp);
	if (err)
		goto free_mem;

	*pfp = fp;
	return 0;
free_mem:
	kfree(fp);
	return err;
}
EXPORT_SYMBOL_GPL(sk_unattached_filter_create);

void sk_unattached_filter_destroy(struct sk_filter *fp)
{
	sk_filter_release(fp);
}
EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);

L
Linus Torvalds 已提交
663 664 665 666 667 668 669 670 671 672 673 674
/**
 *	sk_attach_filter - attach a socket filter
 *	@fprog: the filter program
 *	@sk: the socket to use
 *
 * Attach the user's filter code. We first run some sanity checks on
 * it to make sure it does not explode on us later. If an error
 * occurs or there is insufficient memory for the filter a negative
 * errno code is returned. On success the return is zero.
 */
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
675
	struct sk_filter *fp, *old_fp;
L
Linus Torvalds 已提交
676 677 678 679
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
	int err;

	/* Make sure new filter is there and in the right amounts. */
680 681
	if (fprog->filter == NULL)
		return -EINVAL;
L
Linus Torvalds 已提交
682 683 684 685 686

	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
	if (!fp)
		return -ENOMEM;
	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
687
		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
L
Linus Torvalds 已提交
688 689 690 691 692 693
		return -EFAULT;
	}

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len;

694
	err = __sk_prepare_filter(fp);
695 696 697
	if (err) {
		sk_filter_uncharge(sk, fp);
		return err;
L
Linus Torvalds 已提交
698 699
	}

700 701
	old_fp = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
702 703
	rcu_assign_pointer(sk->sk_filter, fp);

704
	if (old_fp)
E
Eric Dumazet 已提交
705
		sk_filter_uncharge(sk, old_fp);
706
	return 0;
L
Linus Torvalds 已提交
707
}
708
EXPORT_SYMBOL_GPL(sk_attach_filter);
L
Linus Torvalds 已提交
709

710 711 712 713 714
int sk_detach_filter(struct sock *sk)
{
	int ret = -ENOENT;
	struct sk_filter *filter;

715 716
	filter = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
717
	if (filter) {
718
		RCU_INIT_POINTER(sk->sk_filter, NULL);
E
Eric Dumazet 已提交
719
		sk_filter_uncharge(sk, filter);
720 721 722 723
		ret = 0;
	}
	return ret;
}
724
EXPORT_SYMBOL_GPL(sk_detach_filter);