filter.c 17.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Linux Socket Filter - Kernel level socket filtering
 *
 * Author:
 *     Jay Schulist <jschlst@samba.org>
 *
 * Based on the design of:
 *     - The Berkeley Packet Filter
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * Andi Kleen - Fix a few bad bugs and races.
16
 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24 25 26 27
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
28
#include <linux/gfp.h>
L
Linus Torvalds 已提交
29 30
#include <net/ip.h>
#include <net/protocol.h>
31
#include <net/netlink.h>
L
Linus Torvalds 已提交
32 33 34 35 36 37
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/system.h>
#include <asm/uaccess.h>
38
#include <asm/unaligned.h>
L
Linus Torvalds 已提交
39
#include <linux/filter.h>
E
Eric Dumazet 已提交
40
#include <linux/reciprocal_div.h>
41
#include <linux/ratelimit.h>
L
Linus Torvalds 已提交
42 43

/* No hurry in this branch */
44
static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
L
Linus Torvalds 已提交
45 46 47 48
{
	u8 *ptr = NULL;

	if (k >= SKF_NET_OFF)
49
		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
L
Linus Torvalds 已提交
50
	else if (k >= SKF_LL_OFF)
51
		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
L
Linus Torvalds 已提交
52

53
	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
L
Linus Torvalds 已提交
54 55 56 57
		return ptr;
	return NULL;
}

E
Eric Dumazet 已提交
58
static inline void *load_pointer(const struct sk_buff *skb, int k,
59
				 unsigned int size, void *buffer)
60 61 62
{
	if (k >= 0)
		return skb_header_pointer(skb, k, size, buffer);
63
	return __load_pointer(skb, k, size);
64 65
}

S
Stephen Hemminger 已提交
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
/**
 *	sk_filter - run a packet through a socket filter
 *	@sk: sock associated with &sk_buff
 *	@skb: buffer to filter
 *
 * Run the filter code and then cut skb->data to correct size returned by
 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data. This is the socket level
 * wrapper to sk_run_filter. It returns 0 if the packet should
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
int sk_filter(struct sock *sk, struct sk_buff *skb)
{
	int err;
	struct sk_filter *filter;

	err = security_sock_rcv_skb(sk, skb);
	if (err)
		return err;

87 88
	rcu_read_lock();
	filter = rcu_dereference(sk->sk_filter);
S
Stephen Hemminger 已提交
89
	if (filter) {
90
		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
91

S
Stephen Hemminger 已提交
92 93
		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
	}
94
	rcu_read_unlock();
S
Stephen Hemminger 已提交
95 96 97 98 99

	return err;
}
EXPORT_SYMBOL(sk_filter);

L
Linus Torvalds 已提交
100
/**
101
 *	sk_run_filter - run a filter on a socket
L
Linus Torvalds 已提交
102
 *	@skb: buffer to run the filter on
103
 *	@fentry: filter to apply
L
Linus Torvalds 已提交
104 105
 *
 * Decode and apply filter instructions to the skb->data.
E
Eric Dumazet 已提交
106 107 108 109 110
 * Return length to keep, 0 for none. @skb is the data we are
 * filtering, @filter is the array of filter instructions.
 * Because all jumps are guaranteed to be before last instruction,
 * and last instruction guaranteed to be a RET, we dont need to check
 * flen. (We used to pass to this function the length of filter)
L
Linus Torvalds 已提交
111
 */
E
Eric Dumazet 已提交
112 113
unsigned int sk_run_filter(const struct sk_buff *skb,
			   const struct sock_filter *fentry)
L
Linus Torvalds 已提交
114
{
115
	void *ptr;
116 117
	u32 A = 0;			/* Accumulator */
	u32 X = 0;			/* Index Register */
L
Linus Torvalds 已提交
118
	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
119
	u32 tmp;
L
Linus Torvalds 已提交
120 121 122 123 124
	int k;

	/*
	 * Process array of filter instructions.
	 */
E
Eric Dumazet 已提交
125 126 127 128 129 130
	for (;; fentry++) {
#if defined(CONFIG_X86_32)
#define	K (fentry->k)
#else
		const u32 K = fentry->k;
#endif
131

L
Linus Torvalds 已提交
132
		switch (fentry->code) {
133
		case BPF_S_ALU_ADD_X:
L
Linus Torvalds 已提交
134 135
			A += X;
			continue;
136
		case BPF_S_ALU_ADD_K:
E
Eric Dumazet 已提交
137
			A += K;
L
Linus Torvalds 已提交
138
			continue;
139
		case BPF_S_ALU_SUB_X:
L
Linus Torvalds 已提交
140 141
			A -= X;
			continue;
142
		case BPF_S_ALU_SUB_K:
E
Eric Dumazet 已提交
143
			A -= K;
L
Linus Torvalds 已提交
144
			continue;
145
		case BPF_S_ALU_MUL_X:
L
Linus Torvalds 已提交
146 147
			A *= X;
			continue;
148
		case BPF_S_ALU_MUL_K:
E
Eric Dumazet 已提交
149
			A *= K;
L
Linus Torvalds 已提交
150
			continue;
151
		case BPF_S_ALU_DIV_X:
L
Linus Torvalds 已提交
152 153 154 155
			if (X == 0)
				return 0;
			A /= X;
			continue;
156
		case BPF_S_ALU_DIV_K:
E
Eric Dumazet 已提交
157
			A = reciprocal_divide(A, K);
L
Linus Torvalds 已提交
158
			continue;
159
		case BPF_S_ALU_AND_X:
L
Linus Torvalds 已提交
160 161
			A &= X;
			continue;
162
		case BPF_S_ALU_AND_K:
E
Eric Dumazet 已提交
163
			A &= K;
L
Linus Torvalds 已提交
164
			continue;
165
		case BPF_S_ALU_OR_X:
L
Linus Torvalds 已提交
166 167
			A |= X;
			continue;
168
		case BPF_S_ALU_OR_K:
E
Eric Dumazet 已提交
169
			A |= K;
L
Linus Torvalds 已提交
170
			continue;
171
		case BPF_S_ALU_LSH_X:
L
Linus Torvalds 已提交
172 173
			A <<= X;
			continue;
174
		case BPF_S_ALU_LSH_K:
E
Eric Dumazet 已提交
175
			A <<= K;
L
Linus Torvalds 已提交
176
			continue;
177
		case BPF_S_ALU_RSH_X:
L
Linus Torvalds 已提交
178 179
			A >>= X;
			continue;
180
		case BPF_S_ALU_RSH_K:
E
Eric Dumazet 已提交
181
			A >>= K;
L
Linus Torvalds 已提交
182
			continue;
183
		case BPF_S_ALU_NEG:
L
Linus Torvalds 已提交
184 185
			A = -A;
			continue;
186
		case BPF_S_JMP_JA:
E
Eric Dumazet 已提交
187
			fentry += K;
L
Linus Torvalds 已提交
188
			continue;
189
		case BPF_S_JMP_JGT_K:
E
Eric Dumazet 已提交
190
			fentry += (A > K) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
191
			continue;
192
		case BPF_S_JMP_JGE_K:
E
Eric Dumazet 已提交
193
			fentry += (A >= K) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
194
			continue;
195
		case BPF_S_JMP_JEQ_K:
E
Eric Dumazet 已提交
196
			fentry += (A == K) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
197
			continue;
198
		case BPF_S_JMP_JSET_K:
E
Eric Dumazet 已提交
199
			fentry += (A & K) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
200
			continue;
201
		case BPF_S_JMP_JGT_X:
E
Eric Dumazet 已提交
202
			fentry += (A > X) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
203
			continue;
204
		case BPF_S_JMP_JGE_X:
E
Eric Dumazet 已提交
205
			fentry += (A >= X) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
206
			continue;
207
		case BPF_S_JMP_JEQ_X:
E
Eric Dumazet 已提交
208
			fentry += (A == X) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
209
			continue;
210
		case BPF_S_JMP_JSET_X:
E
Eric Dumazet 已提交
211
			fentry += (A & X) ? fentry->jt : fentry->jf;
L
Linus Torvalds 已提交
212
			continue;
213
		case BPF_S_LD_W_ABS:
E
Eric Dumazet 已提交
214
			k = K;
215
load_w:
216 217
			ptr = load_pointer(skb, k, 4, &tmp);
			if (ptr != NULL) {
218
				A = get_unaligned_be32(ptr);
219
				continue;
L
Linus Torvalds 已提交
220
			}
221
			return 0;
222
		case BPF_S_LD_H_ABS:
E
Eric Dumazet 已提交
223
			k = K;
224
load_h:
225 226
			ptr = load_pointer(skb, k, 2, &tmp);
			if (ptr != NULL) {
227
				A = get_unaligned_be16(ptr);
228
				continue;
L
Linus Torvalds 已提交
229
			}
230
			return 0;
231
		case BPF_S_LD_B_ABS:
E
Eric Dumazet 已提交
232
			k = K;
L
Linus Torvalds 已提交
233
load_b:
234 235 236 237
			ptr = load_pointer(skb, k, 1, &tmp);
			if (ptr != NULL) {
				A = *(u8 *)ptr;
				continue;
L
Linus Torvalds 已提交
238
			}
239
			return 0;
240
		case BPF_S_LD_W_LEN:
241
			A = skb->len;
L
Linus Torvalds 已提交
242
			continue;
243
		case BPF_S_LDX_W_LEN:
244
			X = skb->len;
L
Linus Torvalds 已提交
245
			continue;
246
		case BPF_S_LD_W_IND:
E
Eric Dumazet 已提交
247
			k = X + K;
L
Linus Torvalds 已提交
248
			goto load_w;
249
		case BPF_S_LD_H_IND:
E
Eric Dumazet 已提交
250
			k = X + K;
L
Linus Torvalds 已提交
251
			goto load_h;
252
		case BPF_S_LD_B_IND:
E
Eric Dumazet 已提交
253
			k = X + K;
L
Linus Torvalds 已提交
254
			goto load_b;
255
		case BPF_S_LDX_B_MSH:
E
Eric Dumazet 已提交
256
			ptr = load_pointer(skb, K, 1, &tmp);
257 258 259 260 261
			if (ptr != NULL) {
				X = (*(u8 *)ptr & 0xf) << 2;
				continue;
			}
			return 0;
262
		case BPF_S_LD_IMM:
E
Eric Dumazet 已提交
263
			A = K;
L
Linus Torvalds 已提交
264
			continue;
265
		case BPF_S_LDX_IMM:
E
Eric Dumazet 已提交
266
			X = K;
L
Linus Torvalds 已提交
267
			continue;
268
		case BPF_S_LD_MEM:
269
			A = mem[K];
L
Linus Torvalds 已提交
270
			continue;
271
		case BPF_S_LDX_MEM:
272
			X = mem[K];
L
Linus Torvalds 已提交
273
			continue;
274
		case BPF_S_MISC_TAX:
L
Linus Torvalds 已提交
275 276
			X = A;
			continue;
277
		case BPF_S_MISC_TXA:
L
Linus Torvalds 已提交
278 279
			A = X;
			continue;
280
		case BPF_S_RET_K:
E
Eric Dumazet 已提交
281
			return K;
282
		case BPF_S_RET_A:
283
			return A;
284
		case BPF_S_ST:
E
Eric Dumazet 已提交
285
			mem[K] = A;
L
Linus Torvalds 已提交
286
			continue;
287
		case BPF_S_STX:
E
Eric Dumazet 已提交
288
			mem[K] = X;
L
Linus Torvalds 已提交
289
			continue;
290
		case BPF_S_ANC_PROTOCOL:
A
Al Viro 已提交
291
			A = ntohs(skb->protocol);
L
Linus Torvalds 已提交
292
			continue;
293
		case BPF_S_ANC_PKTTYPE:
L
Linus Torvalds 已提交
294 295
			A = skb->pkt_type;
			continue;
296
		case BPF_S_ANC_IFINDEX:
297 298
			if (!skb->dev)
				return 0;
L
Linus Torvalds 已提交
299 300
			A = skb->dev->ifindex;
			continue;
301
		case BPF_S_ANC_MARK:
J
jamal 已提交
302 303
			A = skb->mark;
			continue;
304
		case BPF_S_ANC_QUEUE:
305 306
			A = skb->queue_mapping;
			continue;
307
		case BPF_S_ANC_HATYPE:
308 309 310 311
			if (!skb->dev)
				return 0;
			A = skb->dev->type;
			continue;
312
		case BPF_S_ANC_RXHASH:
313 314
			A = skb->rxhash;
			continue;
315
		case BPF_S_ANC_CPU:
316 317
			A = raw_smp_processor_id();
			continue;
318
		case BPF_S_ANC_NLATTR: {
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
			struct nlattr *nla;

			if (skb_is_nonlinear(skb))
				return 0;
			if (A > skb->len - sizeof(struct nlattr))
				return 0;

			nla = nla_find((struct nlattr *)&skb->data[A],
				       skb->len - A, X);
			if (nla)
				A = (void *)nla - (void *)skb->data;
			else
				A = 0;
			continue;
		}
334
		case BPF_S_ANC_NLATTR_NEST: {
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
			struct nlattr *nla;

			if (skb_is_nonlinear(skb))
				return 0;
			if (A > skb->len - sizeof(struct nlattr))
				return 0;

			nla = (struct nlattr *)&skb->data[A];
			if (nla->nla_len > A - skb->len)
				return 0;

			nla = nla_find_nested(nla, X);
			if (nla)
				A = (void *)nla - (void *)skb->data;
			else
				A = 0;
			continue;
		}
L
Linus Torvalds 已提交
353
		default:
J
Joe Perches 已提交
354 355 356
			WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n",
				       fentry->code, fentry->jt,
				       fentry->jf, fentry->k);
L
Linus Torvalds 已提交
357 358 359 360 361 362
			return 0;
		}
	}

	return 0;
}
363
EXPORT_SYMBOL(sk_run_filter);
L
Linus Torvalds 已提交
364

365 366 367 368 369 370 371
/*
 * Security :
 * A BPF program is able to use 16 cells of memory to store intermediate
 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
 * As we dont want to clear mem[] array for each packet going through
 * sk_run_filter(), we check that filter loaded by user never try to read
 * a cell if not previously written, and we check all branches to be sure
L
Lucas De Marchi 已提交
372
 * a malicious user doesn't try to abuse us.
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
 */
static int check_load_and_stores(struct sock_filter *filter, int flen)
{
	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
	int pc, ret = 0;

	BUILD_BUG_ON(BPF_MEMWORDS > 16);
	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
	if (!masks)
		return -ENOMEM;
	memset(masks, 0xff, flen * sizeof(*masks));

	for (pc = 0; pc < flen; pc++) {
		memvalid &= masks[pc];

		switch (filter[pc].code) {
		case BPF_S_ST:
		case BPF_S_STX:
			memvalid |= (1 << filter[pc].k);
			break;
		case BPF_S_LD_MEM:
		case BPF_S_LDX_MEM:
			if (!(memvalid & (1 << filter[pc].k))) {
				ret = -EINVAL;
				goto error;
			}
			break;
		case BPF_S_JMP_JA:
			/* a jump must set masks on target */
			masks[pc + 1 + filter[pc].k] &= memvalid;
			memvalid = ~0;
			break;
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
			/* a jump must set masks on targets */
			masks[pc + 1 + filter[pc].jt] &= memvalid;
			masks[pc + 1 + filter[pc].jf] &= memvalid;
			memvalid = ~0;
			break;
		}
	}
error:
	kfree(masks);
	return ret;
}

L
Linus Torvalds 已提交
425 426 427 428 429 430 431
/**
 *	sk_chk_filter - verify socket filter code
 *	@filter: filter to verify
 *	@flen: length of filter
 *
 * Check the user's filter code. If we let some ugly
 * filter code slip through kaboom! The filter must contain
432 433
 * no references or jumps that are out of range, no illegal
 * instructions, and must end with a RET instruction.
L
Linus Torvalds 已提交
434
 *
435 436 437
 * All jumps are forward as they are not signed.
 *
 * Returns 0 if the rule set is legal or -EINVAL if not.
L
Linus Torvalds 已提交
438
 */
439
int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
L
Linus Torvalds 已提交
440
{
441 442 443 444 445
	/*
	 * Valid instructions are initialized to non-0.
	 * Invalid instructions are initialized to 0.
	 */
	static const u8 codes[] = {
E
Eric Dumazet 已提交
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
		[BPF_RET|BPF_K]          = BPF_S_RET_K,
		[BPF_RET|BPF_A]          = BPF_S_RET_A,
		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
		[BPF_ST]                 = BPF_S_ST,
		[BPF_STX]                = BPF_S_STX,
		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
491
	};
L
Linus Torvalds 已提交
492 493
	int pc;

494
	if (flen == 0 || flen > BPF_MAXINSNS)
L
Linus Torvalds 已提交
495 496 497 498
		return -EINVAL;

	/* check the filter code now */
	for (pc = 0; pc < flen; pc++) {
499 500
		struct sock_filter *ftest = &filter[pc];
		u16 code = ftest->code;
501

502 503 504
		if (code >= ARRAY_SIZE(codes))
			return -EINVAL;
		code = codes[code];
E
Eric Dumazet 已提交
505
		if (!code)
506
			return -EINVAL;
507
		/* Some instructions need special checks */
508 509
		switch (code) {
		case BPF_S_ALU_DIV_K:
510 511
			/* check for division by zero */
			if (ftest->k == 0)
L
Linus Torvalds 已提交
512
				return -EINVAL;
E
Eric Dumazet 已提交
513
			ftest->k = reciprocal_value(ftest->k);
514
			break;
515 516 517 518 519
		case BPF_S_LD_MEM:
		case BPF_S_LDX_MEM:
		case BPF_S_ST:
		case BPF_S_STX:
			/* check for invalid memory addresses */
520 521 522
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			break;
523
		case BPF_S_JMP_JA:
524 525 526 527 528 529 530
			/*
			 * Note, the large ftest->k might cause loops.
			 * Compare this with conditional jumps below,
			 * where offsets are limited. --ANK (981016)
			 */
			if (ftest->k >= (unsigned)(flen-pc-1))
				return -EINVAL;
531 532 533 534 535 536 537 538 539
			break;
		case BPF_S_JMP_JEQ_K:
		case BPF_S_JMP_JEQ_X:
		case BPF_S_JMP_JGE_K:
		case BPF_S_JMP_JGE_X:
		case BPF_S_JMP_JGT_K:
		case BPF_S_JMP_JGT_X:
		case BPF_S_JMP_JSET_X:
		case BPF_S_JMP_JSET_K:
540
			/* for conditionals both must be safe */
541
			if (pc + ftest->jt + 1 >= flen ||
542 543
			    pc + ftest->jf + 1 >= flen)
				return -EINVAL;
544
			break;
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
		case BPF_S_LD_W_ABS:
		case BPF_S_LD_H_ABS:
		case BPF_S_LD_B_ABS:
#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
				code = BPF_S_ANC_##CODE;	\
				break
			switch (ftest->k) {
			ANCILLARY(PROTOCOL);
			ANCILLARY(PKTTYPE);
			ANCILLARY(IFINDEX);
			ANCILLARY(NLATTR);
			ANCILLARY(NLATTR_NEST);
			ANCILLARY(MARK);
			ANCILLARY(QUEUE);
			ANCILLARY(HATYPE);
			ANCILLARY(RXHASH);
			ANCILLARY(CPU);
			}
563
		}
564
		ftest->code = code;
565
	}
566

567 568 569 570
	/* last instruction must be a RET code */
	switch (filter[flen - 1].code) {
	case BPF_S_RET_K:
	case BPF_S_RET_A:
571
		return check_load_and_stores(filter, flen);
572 573
	}
	return -EINVAL;
L
Linus Torvalds 已提交
574
}
575
EXPORT_SYMBOL(sk_chk_filter);
L
Linus Torvalds 已提交
576

577
/**
E
Eric Dumazet 已提交
578
 * 	sk_filter_release_rcu - Release a socket filter by rcu_head
579 580
 *	@rcu: rcu_head that contains the sk_filter to free
 */
E
Eric Dumazet 已提交
581
void sk_filter_release_rcu(struct rcu_head *rcu)
582 583 584
{
	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

585
	bpf_jit_free(fp);
E
Eric Dumazet 已提交
586
	kfree(fp);
587
}
E
Eric Dumazet 已提交
588
EXPORT_SYMBOL(sk_filter_release_rcu);
589

590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
static int __sk_prepare_filter(struct sk_filter *fp)
{
	int err;

	fp->bpf_func = sk_run_filter;

	err = sk_chk_filter(fp->insns, fp->len);
	if (err)
		return err;

	bpf_jit_compile(fp);
	return 0;
}

/**
 *	sk_unattached_filter_create - create an unattached filter
 *	@fprog: the filter program
 *	@sk: the socket to use
 *
 * Create a filter independent ofr any socket. We first run some
 * sanity checks on it to make sure it does not explode on us later.
 * If an error occurs or there is insufficient memory for the filter
 * a negative errno code is returned. On success the return is zero.
 */
int sk_unattached_filter_create(struct sk_filter **pfp,
				struct sock_fprog *fprog)
{
	struct sk_filter *fp;
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
	int err;

	/* Make sure new filter is there and in the right amounts. */
	if (fprog->filter == NULL)
		return -EINVAL;

	fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL);
	if (!fp)
		return -ENOMEM;
	memcpy(fp->insns, fprog->filter, fsize);

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len;

	err = __sk_prepare_filter(fp);
	if (err)
		goto free_mem;

	*pfp = fp;
	return 0;
free_mem:
	kfree(fp);
	return err;
}
EXPORT_SYMBOL_GPL(sk_unattached_filter_create);

void sk_unattached_filter_destroy(struct sk_filter *fp)
{
	sk_filter_release(fp);
}
EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);

L
Linus Torvalds 已提交
651 652 653 654 655 656 657 658 659 660 661 662
/**
 *	sk_attach_filter - attach a socket filter
 *	@fprog: the filter program
 *	@sk: the socket to use
 *
 * Attach the user's filter code. We first run some sanity checks on
 * it to make sure it does not explode on us later. If an error
 * occurs or there is insufficient memory for the filter a negative
 * errno code is returned. On success the return is zero.
 */
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
663
	struct sk_filter *fp, *old_fp;
L
Linus Torvalds 已提交
664 665 666 667
	unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
	int err;

	/* Make sure new filter is there and in the right amounts. */
668 669
	if (fprog->filter == NULL)
		return -EINVAL;
L
Linus Torvalds 已提交
670 671 672 673 674

	fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
	if (!fp)
		return -ENOMEM;
	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
675
		sock_kfree_s(sk, fp, fsize+sizeof(*fp));
L
Linus Torvalds 已提交
676 677 678 679 680 681
		return -EFAULT;
	}

	atomic_set(&fp->refcnt, 1);
	fp->len = fprog->len;

682
	err = __sk_prepare_filter(fp);
683 684 685
	if (err) {
		sk_filter_uncharge(sk, fp);
		return err;
L
Linus Torvalds 已提交
686 687
	}

688 689
	old_fp = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
690 691
	rcu_assign_pointer(sk->sk_filter, fp);

692
	if (old_fp)
E
Eric Dumazet 已提交
693
		sk_filter_uncharge(sk, old_fp);
694
	return 0;
L
Linus Torvalds 已提交
695
}
696
EXPORT_SYMBOL_GPL(sk_attach_filter);
L
Linus Torvalds 已提交
697

698 699 700 701 702
int sk_detach_filter(struct sock *sk)
{
	int ret = -ENOENT;
	struct sk_filter *filter;

703 704
	filter = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
705
	if (filter) {
706
		RCU_INIT_POINTER(sk->sk_filter, NULL);
E
Eric Dumazet 已提交
707
		sk_filter_uncharge(sk, filter);
708 709 710 711
		ret = 0;
	}
	return ret;
}
712
EXPORT_SYMBOL_GPL(sk_detach_filter);