filter.c 30.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 * Linux Socket Filter - Kernel level socket filtering
 *
4 5
 * Based on the design of the Berkeley Packet Filter. The new
 * internal format has been designed by PLUMgrid:
L
Linus Torvalds 已提交
6
 *
7 8 9 10 11 12 13
 *	Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
 *
 * Authors:
 *
 *	Jay Schulist <jschlst@samba.org>
 *	Alexei Starovoitov <ast@plumgrid.com>
 *	Daniel Borkmann <dborkman@redhat.com>
L
Linus Torvalds 已提交
14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 *
 * Andi Kleen - Fix a few bad bugs and races.
21
 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
L
Linus Torvalds 已提交
22 23 24 25 26 27 28 29 30 31 32
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
33
#include <linux/gfp.h>
L
Linus Torvalds 已提交
34 35
#include <net/ip.h>
#include <net/protocol.h>
36
#include <net/netlink.h>
L
Linus Torvalds 已提交
37 38 39 40 41
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
42
#include <asm/unaligned.h>
L
Linus Torvalds 已提交
43
#include <linux/filter.h>
44
#include <linux/ratelimit.h>
45
#include <linux/seccomp.h>
E
Eric Dumazet 已提交
46
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
47

S
Stephen Hemminger 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
/**
 *	sk_filter - run a packet through a socket filter
 *	@sk: sock associated with &sk_buff
 *	@skb: buffer to filter
 *
 * Run the filter code and then cut skb->data to correct size returned by
 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
 * than pkt_len we keep whole skb->data. This is the socket level
 * wrapper to sk_run_filter. It returns 0 if the packet should
 * be accepted or -EPERM if the packet should be tossed.
 *
 */
int sk_filter(struct sock *sk, struct sk_buff *skb)
{
	int err;
	struct sk_filter *filter;

65 66 67 68 69 70 71 72
	/*
	 * If the skb was allocated from pfmemalloc reserves, only
	 * allow SOCK_MEMALLOC sockets to use it as this socket is
	 * helping free memory
	 */
	if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
		return -ENOMEM;

S
Stephen Hemminger 已提交
73 74 75 76
	err = security_sock_rcv_skb(sk, skb);
	if (err)
		return err;

77 78
	rcu_read_lock();
	filter = rcu_dereference(sk->sk_filter);
S
Stephen Hemminger 已提交
79
	if (filter) {
80
		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
81

S
Stephen Hemminger 已提交
82 83
		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
	}
84
	rcu_read_unlock();
S
Stephen Hemminger 已提交
85 86 87 88 89

	return err;
}
EXPORT_SYMBOL(sk_filter);

90 91 92 93
/* Helper to find the offset of pkt_type in sk_buff structure. We want
 * to make sure its still a 3bit field starting at a byte boundary;
 * taken from arch/x86/net/bpf_jit_comp.c.
 */
94 95 96
#ifdef __BIG_ENDIAN_BITFIELD
#define PKT_TYPE_MAX	(7 << 5)
#else
97
#define PKT_TYPE_MAX	7
98
#endif
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
static unsigned int pkt_type_offset(void)
{
	struct sk_buff skb_probe = { .pkt_type = ~0, };
	u8 *ct = (u8 *) &skb_probe;
	unsigned int off;

	for (off = 0; off < sizeof(struct sk_buff); off++) {
		if (ct[off] == PKT_TYPE_MAX)
			return off;
	}

	pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
	return -1;
}

114
static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
115
{
116
	return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
117 118
}

119
static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
120
{
121
	struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
122 123 124 125 126
	struct nlattr *nla;

	if (skb_is_nonlinear(skb))
		return 0;

127 128 129
	if (skb->len < sizeof(struct nlattr))
		return 0;

130
	if (a > skb->len - sizeof(struct nlattr))
131 132
		return 0;

133
	nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
134 135 136 137 138 139
	if (nla)
		return (void *) nla - (void *) skb->data;

	return 0;
}

140
static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
141
{
142
	struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
143 144 145 146 147
	struct nlattr *nla;

	if (skb_is_nonlinear(skb))
		return 0;

148 149 150
	if (skb->len < sizeof(struct nlattr))
		return 0;

151
	if (a > skb->len - sizeof(struct nlattr))
152 153
		return 0;

154 155
	nla = (struct nlattr *) &skb->data[a];
	if (nla->nla_len > skb->len - a)
156 157
		return 0;

158
	nla = nla_find_nested(nla, x);
159 160 161 162 163 164
	if (nla)
		return (void *) nla - (void *) skb->data;

	return 0;
}

165
static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
166 167 168 169
{
	return raw_smp_processor_id();
}

C
Chema Gonzalez 已提交
170
/* note that this only generates 32-bit random numbers */
171
static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
C
Chema Gonzalez 已提交
172
{
173
	return prandom_u32();
C
Chema Gonzalez 已提交
174 175
}

176
static bool convert_bpf_extensions(struct sock_filter *fp,
177
				   struct bpf_insn **insnp)
178
{
179
	struct bpf_insn *insn = *insnp;
180 181 182 183 184

	switch (fp->k) {
	case SKF_AD_OFF + SKF_AD_PROTOCOL:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);

185
		/* A = *(u16 *) (CTX + offsetof(protocol)) */
186 187
		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
				      offsetof(struct sk_buff, protocol));
188
		/* A = ntohs(A) [emitting a nop or swap16] */
189
		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
190 191 192
		break;

	case SKF_AD_OFF + SKF_AD_PKTTYPE:
193 194
		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
				    pkt_type_offset());
195 196 197
		if (insn->off < 0)
			return false;
		insn++;
198
		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
199 200
#ifdef __BIG_ENDIAN_BITFIELD
		insn++;
201
                *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
202
#endif
203 204 205 206 207 208
		break;

	case SKF_AD_OFF + SKF_AD_IFINDEX:
	case SKF_AD_OFF + SKF_AD_HATYPE:
		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
209 210 211 212 213 214 215 216 217 218 219 220 221 222
		BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);

		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
				      BPF_REG_TMP, BPF_REG_CTX,
				      offsetof(struct sk_buff, dev));
		/* if (tmp != 0) goto pc + 1 */
		*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
		*insn++ = BPF_EXIT_INSN();
		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
					    offsetof(struct net_device, ifindex));
		else
			*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
					    offsetof(struct net_device, type));
223 224 225 226 227
		break;

	case SKF_AD_OFF + SKF_AD_MARK:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);

228 229
		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
				    offsetof(struct sk_buff, mark));
230 231 232 233 234
		break;

	case SKF_AD_OFF + SKF_AD_RXHASH:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);

235 236
		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
				    offsetof(struct sk_buff, hash));
237 238 239 240 241
		break;

	case SKF_AD_OFF + SKF_AD_QUEUE:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);

242 243
		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
				    offsetof(struct sk_buff, queue_mapping));
244 245 246 247 248 249 250
		break;

	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);

251
		/* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
252 253
		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
				      offsetof(struct sk_buff, vlan_tci));
254
		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
255 256
			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
					      ~VLAN_TAG_PRESENT);
257
		} else {
258
			/* A >>= 12 */
259
			*insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
260 261
			/* A &= 1 */
			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
262 263 264 265 266 267 268
		}
		break;

	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
	case SKF_AD_OFF + SKF_AD_NLATTR:
	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
	case SKF_AD_OFF + SKF_AD_CPU:
C
Chema Gonzalez 已提交
269
	case SKF_AD_OFF + SKF_AD_RANDOM:
270
		/* arg1 = CTX */
271
		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
272
		/* arg2 = A */
273
		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
274
		/* arg3 = X */
275
		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
276
		/* Emit call(arg1=CTX, arg2=A, arg3=X) */
277 278
		switch (fp->k) {
		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
279
			*insn = BPF_EMIT_CALL(__skb_get_pay_offset);
280 281
			break;
		case SKF_AD_OFF + SKF_AD_NLATTR:
282
			*insn = BPF_EMIT_CALL(__skb_get_nlattr);
283 284
			break;
		case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
285
			*insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
286 287
			break;
		case SKF_AD_OFF + SKF_AD_CPU:
288
			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
289
			break;
C
Chema Gonzalez 已提交
290
		case SKF_AD_OFF + SKF_AD_RANDOM:
291
			*insn = BPF_EMIT_CALL(__get_random_u32);
C
Chema Gonzalez 已提交
292
			break;
293 294 295 296
		}
		break;

	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
297 298
		/* A ^= X */
		*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
		break;

	default:
		/* This is just a dummy call to avoid letting the compiler
		 * evict __bpf_call_base() as an optimization. Placed here
		 * where no-one bothers.
		 */
		BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
		return false;
	}

	*insnp = insn;
	return true;
}

/**
315
 *	bpf_convert_filter - convert filter program
316 317 318 319 320 321 322 323 324
 *	@prog: the user passed filter program
 *	@len: the length of the user passed filter program
 *	@new_prog: buffer where converted program will be stored
 *	@new_len: pointer to store length of converted program
 *
 * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
 * Conversion workflow:
 *
 * 1) First pass for calculating the new program length:
325
 *   bpf_convert_filter(old_prog, old_len, NULL, &new_len)
326 327 328
 *
 * 2) 2nd pass to remap in two passes: 1st pass finds new
 *    jump offsets, 2nd pass remapping:
329
 *   new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
330
 *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
331 332 333 334 335 336 337
 *
 * User BPF's register A is mapped to our BPF register 6, user BPF
 * register X is mapped to BPF register 7; frame pointer is always
 * register 10; Context 'void *ctx' is stored in register 1, that is,
 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
 * ctx == 'struct seccomp_data *'.
 */
338 339
int bpf_convert_filter(struct sock_filter *prog, int len,
		       struct bpf_insn *new_prog, int *new_len)
340 341
{
	int new_flen = 0, pass = 0, target, i;
342
	struct bpf_insn *new_insn;
343 344 345 346 347
	struct sock_filter *fp;
	int *addrs = NULL;
	u8 bpf_src;

	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
348
	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
349

350
	if (len <= 0 || len > BPF_MAXINSNS)
351 352 353
		return -EINVAL;

	if (new_prog) {
354
		addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
355 356 357 358 359 360 361 362
		if (!addrs)
			return -ENOMEM;
	}

do_pass:
	new_insn = new_prog;
	fp = prog;

363 364
	if (new_insn)
		*new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
365 366 367
	new_insn++;

	for (i = 0; i < len; fp++, i++) {
368 369
		struct bpf_insn tmp_insns[6] = { };
		struct bpf_insn *insn = tmp_insns;
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411

		if (addrs)
			addrs[i] = new_insn - new_prog;

		switch (fp->code) {
		/* All arithmetic insns and skb loads map as-is. */
		case BPF_ALU | BPF_ADD | BPF_X:
		case BPF_ALU | BPF_ADD | BPF_K:
		case BPF_ALU | BPF_SUB | BPF_X:
		case BPF_ALU | BPF_SUB | BPF_K:
		case BPF_ALU | BPF_AND | BPF_X:
		case BPF_ALU | BPF_AND | BPF_K:
		case BPF_ALU | BPF_OR | BPF_X:
		case BPF_ALU | BPF_OR | BPF_K:
		case BPF_ALU | BPF_LSH | BPF_X:
		case BPF_ALU | BPF_LSH | BPF_K:
		case BPF_ALU | BPF_RSH | BPF_X:
		case BPF_ALU | BPF_RSH | BPF_K:
		case BPF_ALU | BPF_XOR | BPF_X:
		case BPF_ALU | BPF_XOR | BPF_K:
		case BPF_ALU | BPF_MUL | BPF_X:
		case BPF_ALU | BPF_MUL | BPF_K:
		case BPF_ALU | BPF_DIV | BPF_X:
		case BPF_ALU | BPF_DIV | BPF_K:
		case BPF_ALU | BPF_MOD | BPF_X:
		case BPF_ALU | BPF_MOD | BPF_K:
		case BPF_ALU | BPF_NEG:
		case BPF_LD | BPF_ABS | BPF_W:
		case BPF_LD | BPF_ABS | BPF_H:
		case BPF_LD | BPF_ABS | BPF_B:
		case BPF_LD | BPF_IND | BPF_W:
		case BPF_LD | BPF_IND | BPF_H:
		case BPF_LD | BPF_IND | BPF_B:
			/* Check for overloaded BPF extension and
			 * directly convert it if found, otherwise
			 * just move on with mapping.
			 */
			if (BPF_CLASS(fp->code) == BPF_LD &&
			    BPF_MODE(fp->code) == BPF_ABS &&
			    convert_bpf_extensions(fp, &insn))
				break;

412
			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
413 414
			break;

415 416 417 418 419 420 421
		/* Jump transformation cannot use BPF block macros
		 * everywhere as offset calculation and target updates
		 * require a bit more work than the rest, i.e. jump
		 * opcodes map as-is, but offsets need adjustment.
		 */

#define BPF_EMIT_JMP							\
422 423 424 425 426 427 428 429
	do {								\
		if (target >= len || target < 0)			\
			goto err;					\
		insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0;	\
		/* Adjust pc relative offset for 2nd or 3rd insn. */	\
		insn->off -= insn - tmp_insns;				\
	} while (0)

430 431 432 433
		case BPF_JMP | BPF_JA:
			target = i + fp->k + 1;
			insn->code = fp->code;
			BPF_EMIT_JMP;
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
			break;

		case BPF_JMP | BPF_JEQ | BPF_K:
		case BPF_JMP | BPF_JEQ | BPF_X:
		case BPF_JMP | BPF_JSET | BPF_K:
		case BPF_JMP | BPF_JSET | BPF_X:
		case BPF_JMP | BPF_JGT | BPF_K:
		case BPF_JMP | BPF_JGT | BPF_X:
		case BPF_JMP | BPF_JGE | BPF_K:
		case BPF_JMP | BPF_JGE | BPF_X:
			if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
				/* BPF immediates are signed, zero extend
				 * immediate into tmp register and use it
				 * in compare insn.
				 */
449
				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
450

451 452
				insn->dst_reg = BPF_REG_A;
				insn->src_reg = BPF_REG_TMP;
453 454
				bpf_src = BPF_X;
			} else {
455 456
				insn->dst_reg = BPF_REG_A;
				insn->src_reg = BPF_REG_X;
457 458
				insn->imm = fp->k;
				bpf_src = BPF_SRC(fp->code);
L
Linus Torvalds 已提交
459
			}
460 461 462 463 464

			/* Common case where 'jump_false' is next insn. */
			if (fp->jf == 0) {
				insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
				target = i + fp->jt + 1;
465
				BPF_EMIT_JMP;
466
				break;
L
Linus Torvalds 已提交
467
			}
468 469 470 471 472

			/* Convert JEQ into JNE when 'jump_true' is next insn. */
			if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
				insn->code = BPF_JMP | BPF_JNE | bpf_src;
				target = i + fp->jf + 1;
473
				BPF_EMIT_JMP;
474
				break;
475
			}
476 477 478 479

			/* Other jumps are mapped into two insns: Jxx and JA. */
			target = i + fp->jt + 1;
			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
480
			BPF_EMIT_JMP;
481 482 483 484
			insn++;

			insn->code = BPF_JMP | BPF_JA;
			target = i + fp->jf + 1;
485
			BPF_EMIT_JMP;
486 487 488 489
			break;

		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
		case BPF_LDX | BPF_MSH | BPF_B:
490
			/* tmp = A */
491
			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
492
			/* A = BPF_R0 = *(u8 *) (skb->data + K) */
493
			*insn++ = BPF_LD_ABS(BPF_B, fp->k);
494
			/* A &= 0xf */
495
			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
496
			/* A <<= 2 */
497
			*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
498
			/* X = A */
499
			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
500
			/* A = tmp */
501
			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
502 503 504 505 506
			break;

		/* RET_K, RET_A are remaped into 2 insns. */
		case BPF_RET | BPF_A:
		case BPF_RET | BPF_K:
507 508 509
			*insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
						BPF_K : BPF_X, BPF_REG_0,
						BPF_REG_A, fp->k);
510
			*insn = BPF_EXIT_INSN();
511 512 513 514 515
			break;

		/* Store to stack. */
		case BPF_ST:
		case BPF_STX:
516 517 518
			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
					    BPF_ST ? BPF_REG_A : BPF_REG_X,
					    -(BPF_MEMWORDS - fp->k) * 4);
519 520 521 522 523
			break;

		/* Load from stack. */
		case BPF_LD | BPF_MEM:
		case BPF_LDX | BPF_MEM:
524 525 526
			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
					    -(BPF_MEMWORDS - fp->k) * 4);
527 528 529 530 531
			break;

		/* A = K or X = K */
		case BPF_LD | BPF_IMM:
		case BPF_LDX | BPF_IMM:
532 533
			*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
					      BPF_REG_A : BPF_REG_X, fp->k);
534 535 536 537
			break;

		/* X = A */
		case BPF_MISC | BPF_TAX:
538
			*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
539 540 541 542
			break;

		/* A = X */
		case BPF_MISC | BPF_TXA:
543
			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
544 545 546 547 548
			break;

		/* A = skb->len or X = skb->len */
		case BPF_LD | BPF_W | BPF_LEN:
		case BPF_LDX | BPF_W | BPF_LEN:
549 550 551
			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
					    BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
					    offsetof(struct sk_buff, len));
552 553
			break;

554
		/* Access seccomp_data fields. */
555
		case BPF_LDX | BPF_ABS | BPF_W:
556 557
			/* A = *(u32 *) (ctx + K) */
			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
558 559
			break;

560
		/* Unkown instruction. */
L
Linus Torvalds 已提交
561
		default:
562
			goto err;
L
Linus Torvalds 已提交
563
		}
564 565 566 567 568 569

		insn++;
		if (new_prog)
			memcpy(new_insn, tmp_insns,
			       sizeof(*insn) * (insn - tmp_insns));
		new_insn += insn - tmp_insns;
L
Linus Torvalds 已提交
570 571
	}

572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
	if (!new_prog) {
		/* Only calculating new length. */
		*new_len = new_insn - new_prog;
		return 0;
	}

	pass++;
	if (new_flen != new_insn - new_prog) {
		new_flen = new_insn - new_prog;
		if (pass > 2)
			goto err;
		goto do_pass;
	}

	kfree(addrs);
	BUG_ON(*new_len != new_flen);
L
Linus Torvalds 已提交
588
	return 0;
589 590 591
err:
	kfree(addrs);
	return -EINVAL;
L
Linus Torvalds 已提交
592 593
}

594 595
/* Security:
 *
596
 * A BPF program is able to use 16 cells of memory to store intermediate
597 598
 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
 *
599 600 601
 * As we dont want to clear mem[] array for each packet going through
 * sk_run_filter(), we check that filter loaded by user never try to read
 * a cell if not previously written, and we check all branches to be sure
L
Lucas De Marchi 已提交
602
 * a malicious user doesn't try to abuse us.
603
 */
604
static int check_load_and_stores(const struct sock_filter *filter, int flen)
605
{
606
	u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
607 608 609
	int pc, ret = 0;

	BUILD_BUG_ON(BPF_MEMWORDS > 16);
610

611
	masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
612 613
	if (!masks)
		return -ENOMEM;
614

615 616 617 618 619 620
	memset(masks, 0xff, flen * sizeof(*masks));

	for (pc = 0; pc < flen; pc++) {
		memvalid &= masks[pc];

		switch (filter[pc].code) {
621 622
		case BPF_ST:
		case BPF_STX:
623 624
			memvalid |= (1 << filter[pc].k);
			break;
625 626
		case BPF_LD | BPF_MEM:
		case BPF_LDX | BPF_MEM:
627 628 629 630 631
			if (!(memvalid & (1 << filter[pc].k))) {
				ret = -EINVAL;
				goto error;
			}
			break;
632 633
		case BPF_JMP | BPF_JA:
			/* A jump must set masks on target */
634 635 636
			masks[pc + 1 + filter[pc].k] &= memvalid;
			memvalid = ~0;
			break;
637 638 639 640 641 642 643 644 645
		case BPF_JMP | BPF_JEQ | BPF_K:
		case BPF_JMP | BPF_JEQ | BPF_X:
		case BPF_JMP | BPF_JGE | BPF_K:
		case BPF_JMP | BPF_JGE | BPF_X:
		case BPF_JMP | BPF_JGT | BPF_K:
		case BPF_JMP | BPF_JGT | BPF_X:
		case BPF_JMP | BPF_JSET | BPF_K:
		case BPF_JMP | BPF_JSET | BPF_X:
			/* A jump must set masks on targets */
646 647 648 649 650 651 652 653 654 655 656
			masks[pc + 1 + filter[pc].jt] &= memvalid;
			masks[pc + 1 + filter[pc].jf] &= memvalid;
			memvalid = ~0;
			break;
		}
	}
error:
	kfree(masks);
	return ret;
}

657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
static bool chk_code_allowed(u16 code_to_probe)
{
	static const bool codes[] = {
		/* 32 bit ALU operations */
		[BPF_ALU | BPF_ADD | BPF_K] = true,
		[BPF_ALU | BPF_ADD | BPF_X] = true,
		[BPF_ALU | BPF_SUB | BPF_K] = true,
		[BPF_ALU | BPF_SUB | BPF_X] = true,
		[BPF_ALU | BPF_MUL | BPF_K] = true,
		[BPF_ALU | BPF_MUL | BPF_X] = true,
		[BPF_ALU | BPF_DIV | BPF_K] = true,
		[BPF_ALU | BPF_DIV | BPF_X] = true,
		[BPF_ALU | BPF_MOD | BPF_K] = true,
		[BPF_ALU | BPF_MOD | BPF_X] = true,
		[BPF_ALU | BPF_AND | BPF_K] = true,
		[BPF_ALU | BPF_AND | BPF_X] = true,
		[BPF_ALU | BPF_OR | BPF_K] = true,
		[BPF_ALU | BPF_OR | BPF_X] = true,
		[BPF_ALU | BPF_XOR | BPF_K] = true,
		[BPF_ALU | BPF_XOR | BPF_X] = true,
		[BPF_ALU | BPF_LSH | BPF_K] = true,
		[BPF_ALU | BPF_LSH | BPF_X] = true,
		[BPF_ALU | BPF_RSH | BPF_K] = true,
		[BPF_ALU | BPF_RSH | BPF_X] = true,
		[BPF_ALU | BPF_NEG] = true,
		/* Load instructions */
		[BPF_LD | BPF_W | BPF_ABS] = true,
		[BPF_LD | BPF_H | BPF_ABS] = true,
		[BPF_LD | BPF_B | BPF_ABS] = true,
		[BPF_LD | BPF_W | BPF_LEN] = true,
		[BPF_LD | BPF_W | BPF_IND] = true,
		[BPF_LD | BPF_H | BPF_IND] = true,
		[BPF_LD | BPF_B | BPF_IND] = true,
		[BPF_LD | BPF_IMM] = true,
		[BPF_LD | BPF_MEM] = true,
		[BPF_LDX | BPF_W | BPF_LEN] = true,
		[BPF_LDX | BPF_B | BPF_MSH] = true,
		[BPF_LDX | BPF_IMM] = true,
		[BPF_LDX | BPF_MEM] = true,
		/* Store instructions */
		[BPF_ST] = true,
		[BPF_STX] = true,
		/* Misc instructions */
		[BPF_MISC | BPF_TAX] = true,
		[BPF_MISC | BPF_TXA] = true,
		/* Return instructions */
		[BPF_RET | BPF_K] = true,
		[BPF_RET | BPF_A] = true,
		/* Jump instructions */
		[BPF_JMP | BPF_JA] = true,
		[BPF_JMP | BPF_JEQ | BPF_K] = true,
		[BPF_JMP | BPF_JEQ | BPF_X] = true,
		[BPF_JMP | BPF_JGE | BPF_K] = true,
		[BPF_JMP | BPF_JGE | BPF_X] = true,
		[BPF_JMP | BPF_JGT | BPF_K] = true,
		[BPF_JMP | BPF_JGT | BPF_X] = true,
		[BPF_JMP | BPF_JSET | BPF_K] = true,
		[BPF_JMP | BPF_JSET | BPF_X] = true,
	};

	if (code_to_probe >= ARRAY_SIZE(codes))
		return false;

	return codes[code_to_probe];
}

L
Linus Torvalds 已提交
723
/**
724
 *	bpf_check_classic - verify socket filter code
L
Linus Torvalds 已提交
725 726 727 728 729
 *	@filter: filter to verify
 *	@flen: length of filter
 *
 * Check the user's filter code. If we let some ugly
 * filter code slip through kaboom! The filter must contain
730 731
 * no references or jumps that are out of range, no illegal
 * instructions, and must end with a RET instruction.
L
Linus Torvalds 已提交
732
 *
733 734 735
 * All jumps are forward as they are not signed.
 *
 * Returns 0 if the rule set is legal or -EINVAL if not.
L
Linus Torvalds 已提交
736
 */
737
int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
L
Linus Torvalds 已提交
738
{
739
	bool anc_found;
740
	int pc;
L
Linus Torvalds 已提交
741

742
	if (flen == 0 || flen > BPF_MAXINSNS)
L
Linus Torvalds 已提交
743 744
		return -EINVAL;

745
	/* Check the filter code now */
L
Linus Torvalds 已提交
746
	for (pc = 0; pc < flen; pc++) {
747
		const struct sock_filter *ftest = &filter[pc];
748

749 750
		/* May we actually operate on this code? */
		if (!chk_code_allowed(ftest->code))
751
			return -EINVAL;
752

753
		/* Some instructions need special checks */
754 755 756 757
		switch (ftest->code) {
		case BPF_ALU | BPF_DIV | BPF_K:
		case BPF_ALU | BPF_MOD | BPF_K:
			/* Check for division by zero */
E
Eric Dumazet 已提交
758 759 760
			if (ftest->k == 0)
				return -EINVAL;
			break;
761 762 763 764 765
		case BPF_LD | BPF_MEM:
		case BPF_LDX | BPF_MEM:
		case BPF_ST:
		case BPF_STX:
			/* Check for invalid memory addresses */
766 767 768
			if (ftest->k >= BPF_MEMWORDS)
				return -EINVAL;
			break;
769 770
		case BPF_JMP | BPF_JA:
			/* Note, the large ftest->k might cause loops.
771 772 773
			 * Compare this with conditional jumps below,
			 * where offsets are limited. --ANK (981016)
			 */
774
			if (ftest->k >= (unsigned int)(flen - pc - 1))
775
				return -EINVAL;
776
			break;
777 778 779 780 781 782 783 784 785
		case BPF_JMP | BPF_JEQ | BPF_K:
		case BPF_JMP | BPF_JEQ | BPF_X:
		case BPF_JMP | BPF_JGE | BPF_K:
		case BPF_JMP | BPF_JGE | BPF_X:
		case BPF_JMP | BPF_JGT | BPF_K:
		case BPF_JMP | BPF_JGT | BPF_X:
		case BPF_JMP | BPF_JSET | BPF_K:
		case BPF_JMP | BPF_JSET | BPF_X:
			/* Both conditionals must be safe */
786
			if (pc + ftest->jt + 1 >= flen ||
787 788
			    pc + ftest->jf + 1 >= flen)
				return -EINVAL;
789
			break;
790 791 792
		case BPF_LD | BPF_W | BPF_ABS:
		case BPF_LD | BPF_H | BPF_ABS:
		case BPF_LD | BPF_B | BPF_ABS:
793
			anc_found = false;
794 795 796
			if (bpf_anc_helper(ftest) & BPF_ANC)
				anc_found = true;
			/* Ancillary operation unknown or unsupported */
797 798
			if (anc_found == false && ftest->k >= SKF_AD_OFF)
				return -EINVAL;
799 800
		}
	}
801

802
	/* Last instruction must be a RET code */
803
	switch (filter[flen - 1].code) {
804 805
	case BPF_RET | BPF_K:
	case BPF_RET | BPF_A:
806
		return check_load_and_stores(filter, flen);
807
	}
808

809
	return -EINVAL;
L
Linus Torvalds 已提交
810
}
811
EXPORT_SYMBOL(bpf_check_classic);
L
Linus Torvalds 已提交
812

813 814
static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
				      const struct sock_fprog *fprog)
815
{
816
	unsigned int fsize = bpf_classic_proglen(fprog);
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
	struct sock_fprog_kern *fkprog;

	fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
	if (!fp->orig_prog)
		return -ENOMEM;

	fkprog = fp->orig_prog;
	fkprog->len = fprog->len;
	fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
	if (!fkprog->filter) {
		kfree(fp->orig_prog);
		return -ENOMEM;
	}

	return 0;
}

834
static void bpf_release_orig_filter(struct bpf_prog *fp)
835 836 837 838 839 840 841 842 843
{
	struct sock_fprog_kern *fprog = fp->orig_prog;

	if (fprog) {
		kfree(fprog->filter);
		kfree(fprog);
	}
}

844 845 846 847 848 849
static void __bpf_prog_release(struct bpf_prog *prog)
{
	bpf_release_orig_filter(prog);
	bpf_prog_free(prog);
}

850 851
static void __sk_filter_release(struct sk_filter *fp)
{
852 853
	__bpf_prog_release(fp->prog);
	kfree(fp);
854 855
}

856
/**
E
Eric Dumazet 已提交
857
 * 	sk_filter_release_rcu - Release a socket filter by rcu_head
858 859
 *	@rcu: rcu_head that contains the sk_filter to free
 */
860
static void sk_filter_release_rcu(struct rcu_head *rcu)
861 862 863
{
	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);

864
	__sk_filter_release(fp);
865
}
866 867 868 869 870 871 872 873 874 875 876 877 878 879 880

/**
 *	sk_filter_release - release a socket filter
 *	@fp: filter to remove
 *
 *	Remove a filter from a socket and release its resources.
 */
static void sk_filter_release(struct sk_filter *fp)
{
	if (atomic_dec_and_test(&fp->refcnt))
		call_rcu(&fp->rcu, sk_filter_release_rcu);
}

void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
{
881
	u32 filter_size = bpf_prog_size(fp->prog->len);
882

883 884
	atomic_sub(filter_size, &sk->sk_omem_alloc);
	sk_filter_release(fp);
885
}
886

887 888 889 890
/* try to charge the socket memory if there is space available
 * return true on success
 */
bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
891
{
892
	u32 filter_size = bpf_prog_size(fp->prog->len);
893 894 895 896 897 898 899

	/* same check as in sock_kmalloc() */
	if (filter_size <= sysctl_optmem_max &&
	    atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
		atomic_inc(&fp->refcnt);
		atomic_add(filter_size, &sk->sk_omem_alloc);
		return true;
900
	}
901
	return false;
902 903
}

904
static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
905 906
{
	struct sock_filter *old_prog;
907
	struct bpf_prog *old_fp;
908
	int err, new_len, old_len = fp->len;
909 910 911 912 913 914 915

	/* We are free to overwrite insns et al right here as it
	 * won't be used at this point in time anymore internally
	 * after the migration to the internal BPF instruction
	 * representation.
	 */
	BUILD_BUG_ON(sizeof(struct sock_filter) !=
916
		     sizeof(struct bpf_insn));
917 918 919 920 921 922 923 924 925 926 927 928 929

	/* Conversion cannot happen on overlapping memory areas,
	 * so we need to keep the user BPF around until the 2nd
	 * pass. At this time, the user BPF is stored in fp->insns.
	 */
	old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
			   GFP_KERNEL);
	if (!old_prog) {
		err = -ENOMEM;
		goto out_err;
	}

	/* 1st pass: calculate the new program length. */
930
	err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
931 932 933 934 935
	if (err)
		goto out_err_free;

	/* Expand fp for appending the new filter representation. */
	old_fp = fp;
936
	fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
937 938 939 940 941 942 943 944 945 946 947
	if (!fp) {
		/* The old_fp is still around in case we couldn't
		 * allocate new memory, so uncharge on that one.
		 */
		fp = old_fp;
		err = -ENOMEM;
		goto out_err_free;
	}

	fp->len = new_len;

948
	/* 2nd pass: remap sock_filter insns into bpf_insn insns. */
949
	err = bpf_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
950
	if (err)
951
		/* 2nd bpf_convert_filter() can fail only if it fails
952 953
		 * to allocate memory, remapping must succeed. Note,
		 * that at this time old_fp has already been released
954
		 * by krealloc().
955 956 957
		 */
		goto out_err_free;

958
	bpf_prog_select_runtime(fp);
959

960 961 962 963 964 965
	kfree(old_prog);
	return fp;

out_err_free:
	kfree(old_prog);
out_err:
966
	__bpf_prog_release(fp);
967 968 969
	return ERR_PTR(err);
}

970
static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
971 972 973
{
	int err;

974
	fp->bpf_func = NULL;
975
	fp->jited = false;
976

977
	err = bpf_check_classic(fp->insns, fp->len);
978
	if (err) {
979
		__bpf_prog_release(fp);
980
		return ERR_PTR(err);
981
	}
982

983 984 985
	/* Probe if we can JIT compile the filter and if so, do
	 * the compilation of the filter.
	 */
986
	bpf_jit_compile(fp);
987 988 989 990

	/* JIT compiler couldn't process this filter, so do the
	 * internal BPF translation for the optimized interpreter.
	 */
991
	if (!fp->jited)
992
		fp = bpf_migrate_filter(fp);
993 994

	return fp;
995 996 997
}

/**
998
 *	bpf_prog_create - create an unattached filter
R
Randy Dunlap 已提交
999
 *	@pfp: the unattached filter that is created
1000
 *	@fprog: the filter program
1001
 *
R
Randy Dunlap 已提交
1002
 * Create a filter independent of any socket. We first run some
1003 1004 1005 1006
 * sanity checks on it to make sure it does not explode on us later.
 * If an error occurs or there is insufficient memory for the filter
 * a negative errno code is returned. On success the return is zero.
 */
1007
int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1008
{
1009
	unsigned int fsize = bpf_classic_proglen(fprog);
1010
	struct bpf_prog *fp;
1011 1012 1013 1014 1015

	/* Make sure new filter is there and in the right amounts. */
	if (fprog->filter == NULL)
		return -EINVAL;

1016
	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1017 1018
	if (!fp)
		return -ENOMEM;
1019

1020 1021 1022
	memcpy(fp->insns, fprog->filter, fsize);

	fp->len = fprog->len;
1023 1024 1025 1026 1027
	/* Since unattached filters are not copied back to user
	 * space through sk_get_filter(), we do not need to hold
	 * a copy here, and can spare us the work.
	 */
	fp->orig_prog = NULL;
1028

1029
	/* bpf_prepare_filter() already takes care of freeing
1030 1031
	 * memory in case something goes wrong.
	 */
1032
	fp = bpf_prepare_filter(fp);
1033 1034
	if (IS_ERR(fp))
		return PTR_ERR(fp);
1035 1036 1037 1038

	*pfp = fp;
	return 0;
}
1039
EXPORT_SYMBOL_GPL(bpf_prog_create);
1040

1041
void bpf_prog_destroy(struct bpf_prog *fp)
1042
{
1043
	__bpf_prog_release(fp);
1044
}
1045
EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1046

L
Linus Torvalds 已提交
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
/**
 *	sk_attach_filter - attach a socket filter
 *	@fprog: the filter program
 *	@sk: the socket to use
 *
 * Attach the user's filter code. We first run some sanity checks on
 * it to make sure it does not explode on us later. If an error
 * occurs or there is insufficient memory for the filter a negative
 * errno code is returned. On success the return is zero.
 */
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
1059
	struct sk_filter *fp, *old_fp;
1060
	unsigned int fsize = bpf_classic_proglen(fprog);
1061 1062
	unsigned int bpf_fsize = bpf_prog_size(fprog->len);
	struct bpf_prog *prog;
L
Linus Torvalds 已提交
1063 1064
	int err;

1065 1066 1067
	if (sock_flag(sk, SOCK_FILTER_LOCKED))
		return -EPERM;

L
Linus Torvalds 已提交
1068
	/* Make sure new filter is there and in the right amounts. */
1069 1070
	if (fprog->filter == NULL)
		return -EINVAL;
L
Linus Torvalds 已提交
1071

1072
	prog = bpf_prog_alloc(bpf_fsize, 0);
1073
	if (!prog)
L
Linus Torvalds 已提交
1074
		return -ENOMEM;
1075

1076 1077
	if (copy_from_user(prog->insns, fprog->filter, fsize)) {
		kfree(prog);
L
Linus Torvalds 已提交
1078 1079 1080
		return -EFAULT;
	}

1081
	prog->len = fprog->len;
L
Linus Torvalds 已提交
1082

1083
	err = bpf_prog_store_orig_filter(prog, fprog);
1084
	if (err) {
1085
		kfree(prog);
1086 1087 1088
		return -ENOMEM;
	}

1089
	/* bpf_prepare_filter() already takes care of freeing
1090 1091
	 * memory in case something goes wrong.
	 */
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
	prog = bpf_prepare_filter(prog);
	if (IS_ERR(prog))
		return PTR_ERR(prog);

	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
	if (!fp) {
		__bpf_prog_release(prog);
		return -ENOMEM;
	}
	fp->prog = prog;
L
Linus Torvalds 已提交
1102

1103 1104 1105 1106 1107 1108 1109
	atomic_set(&fp->refcnt, 0);

	if (!sk_filter_charge(sk, fp)) {
		__sk_filter_release(fp);
		return -ENOMEM;
	}

1110 1111
	old_fp = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
1112 1113
	rcu_assign_pointer(sk->sk_filter, fp);

1114
	if (old_fp)
E
Eric Dumazet 已提交
1115
		sk_filter_uncharge(sk, old_fp);
1116

1117
	return 0;
L
Linus Torvalds 已提交
1118
}
1119
EXPORT_SYMBOL_GPL(sk_attach_filter);
L
Linus Torvalds 已提交
1120

1121 1122 1123 1124 1125
int sk_detach_filter(struct sock *sk)
{
	int ret = -ENOENT;
	struct sk_filter *filter;

1126 1127 1128
	if (sock_flag(sk, SOCK_FILTER_LOCKED))
		return -EPERM;

1129 1130
	filter = rcu_dereference_protected(sk->sk_filter,
					   sock_owned_by_user(sk));
1131
	if (filter) {
1132
		RCU_INIT_POINTER(sk->sk_filter, NULL);
E
Eric Dumazet 已提交
1133
		sk_filter_uncharge(sk, filter);
1134 1135
		ret = 0;
	}
1136

1137 1138
	return ret;
}
1139
EXPORT_SYMBOL_GPL(sk_detach_filter);
1140

1141 1142
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
		  unsigned int len)
1143
{
1144
	struct sock_fprog_kern *fprog;
1145
	struct sk_filter *filter;
1146
	int ret = 0;
1147 1148 1149

	lock_sock(sk);
	filter = rcu_dereference_protected(sk->sk_filter,
1150
					   sock_owned_by_user(sk));
1151 1152
	if (!filter)
		goto out;
1153 1154 1155 1156

	/* We're copying the filter that has been originally attached,
	 * so no conversion/decode needed anymore.
	 */
1157
	fprog = filter->prog->orig_prog;
1158 1159

	ret = fprog->len;
1160
	if (!len)
1161
		/* User space only enquires number of filter blocks. */
1162
		goto out;
1163

1164
	ret = -EINVAL;
1165
	if (len < fprog->len)
1166 1167 1168
		goto out;

	ret = -EFAULT;
1169
	if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
1170
		goto out;
1171

1172 1173 1174 1175
	/* Instead of bytes, the API requests to return the number
	 * of filter blocks.
	 */
	ret = fprog->len;
1176 1177 1178 1179
out:
	release_sock(sk);
	return ret;
}