act_bpf.c 8.5 KB
Newer Older
J
Jiri Pirko 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/filter.h>
16 17
#include <linux/bpf.h>

J
Jiri Pirko 已提交
18 19 20 21 22 23
#include <net/netlink.h>
#include <net/pkt_sched.h>

#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>

24 25 26 27 28 29
#define BPF_TAB_MASK		15
#define ACT_BPF_NAME_LEN	256

struct tcf_bpf_cfg {
	struct bpf_prog *filter;
	struct sock_filter *bpf_ops;
30
	const char *bpf_name;
31 32
	u32 bpf_fd;
	u16 bpf_num_ops;
33
	bool is_ebpf;
34
};
J
Jiri Pirko 已提交
35

36
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
J
Jiri Pirko 已提交
37 38
		   struct tcf_result *res)
{
39
	struct tcf_bpf *prog = act->priv;
40
	int action, filter_res;
41
	bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
J
Jiri Pirko 已提交
42

43 44 45
	if (unlikely(!skb_mac_header_was_set(skb)))
		return TC_ACT_UNSPEC;

46
	spin_lock(&prog->tcf_lock);
47

48 49
	prog->tcf_tm.lastuse = jiffies;
	bstats_update(&prog->tcf_bstats, skb);
J
Jiri Pirko 已提交
50

51 52
	/* Needed here for accessing maps. */
	rcu_read_lock();
53 54 55 56 57 58 59
	if (at_ingress) {
		__skb_push(skb, skb->mac_len);
		filter_res = BPF_PROG_RUN(prog->filter, skb);
		__skb_pull(skb, skb->mac_len);
	} else {
		filter_res = BPF_PROG_RUN(prog->filter, skb);
	}
60
	rcu_read_unlock();
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

	/* A BPF program may overwrite the default action opcode.
	 * Similarly as in cls_bpf, if filter_res == -1 we use the
	 * default action specified from tc.
	 *
	 * In case a different well-known TC_ACT opcode has been
	 * returned, it will overwrite the default one.
	 *
	 * For everything else that is unkown, TC_ACT_UNSPEC is
	 * returned.
	 */
	switch (filter_res) {
	case TC_ACT_PIPE:
	case TC_ACT_RECLASSIFY:
	case TC_ACT_OK:
		action = filter_res;
		break;
	case TC_ACT_SHOT:
		action = filter_res;
80
		prog->tcf_qstats.drops++;
81 82
		break;
	case TC_ACT_UNSPEC:
83
		action = prog->tcf_action;
84 85 86 87
		break;
	default:
		action = TC_ACT_UNSPEC;
		break;
J
Jiri Pirko 已提交
88 89
	}

90
	spin_unlock(&prog->tcf_lock);
J
Jiri Pirko 已提交
91 92 93
	return action;
}

94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog)
{
	return !prog->bpf_ops;
}

static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
				 struct sk_buff *skb)
{
	struct nlattr *nla;

	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops))
		return -EMSGSIZE;

	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops *
			  sizeof(struct sock_filter));
	if (nla == NULL)
		return -EMSGSIZE;

	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));

	return 0;
}

static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
				  struct sk_buff *skb)
{
	if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
		return -EMSGSIZE;

	if (prog->bpf_name &&
	    nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
		return -EMSGSIZE;

	return 0;
}

static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
J
Jiri Pirko 已提交
131 132 133
			int bind, int ref)
{
	unsigned char *tp = skb_tail_pointer(skb);
134
	struct tcf_bpf *prog = act->priv;
J
Jiri Pirko 已提交
135
	struct tc_act_bpf opt = {
136 137 138 139
		.index   = prog->tcf_index,
		.refcnt  = prog->tcf_refcnt - ref,
		.bindcnt = prog->tcf_bindcnt - bind,
		.action  = prog->tcf_action,
J
Jiri Pirko 已提交
140
	};
141 142
	struct tcf_t tm;
	int ret;
J
Jiri Pirko 已提交
143 144 145 146

	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
		goto nla_put_failure;

147 148 149 150 151
	if (tcf_bpf_is_ebpf(prog))
		ret = tcf_bpf_dump_ebpf_info(prog, skb);
	else
		ret = tcf_bpf_dump_bpf_info(prog, skb);
	if (ret)
J
Jiri Pirko 已提交
152 153
		goto nla_put_failure;

154 155 156
	tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
	tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
	tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
J
Jiri Pirko 已提交
157

158
	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm))
J
Jiri Pirko 已提交
159
		goto nla_put_failure;
160

J
Jiri Pirko 已提交
161 162 163 164 165 166 167 168 169
	return skb->len;

nla_put_failure:
	nlmsg_trim(skb, tp);
	return -1;
}

static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
170 171
	[TCA_ACT_BPF_FD]	= { .type = NLA_U32 },
	[TCA_ACT_BPF_NAME]	= { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
J
Jiri Pirko 已提交
172 173 174 175 176
	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
};

177
static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
J
Jiri Pirko 已提交
178 179
{
	struct sock_filter *bpf_ops;
180
	struct sock_fprog_kern fprog_tmp;
J
Jiri Pirko 已提交
181
	struct bpf_prog *fp;
182
	u16 bpf_size, bpf_num_ops;
J
Jiri Pirko 已提交
183 184 185 186 187 188 189
	int ret;

	bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
	if (bpf_num_ops	> BPF_MAXINSNS || bpf_num_ops == 0)
		return -EINVAL;

	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
190 191 192
	if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
		return -EINVAL;

J
Jiri Pirko 已提交
193
	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
194
	if (bpf_ops == NULL)
J
Jiri Pirko 已提交
195 196 197 198
		return -ENOMEM;

	memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);

199 200
	fprog_tmp.len = bpf_num_ops;
	fprog_tmp.filter = bpf_ops;
J
Jiri Pirko 已提交
201

202 203 204 205 206
	ret = bpf_prog_create(&fp, &fprog_tmp);
	if (ret < 0) {
		kfree(bpf_ops);
		return ret;
	}
J
Jiri Pirko 已提交
207

208 209 210
	cfg->bpf_ops = bpf_ops;
	cfg->bpf_num_ops = bpf_num_ops;
	cfg->filter = fp;
211
	cfg->is_ebpf = false;
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245

	return 0;
}

static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
{
	struct bpf_prog *fp;
	char *name = NULL;
	u32 bpf_fd;

	bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);

	fp = bpf_prog_get(bpf_fd);
	if (IS_ERR(fp))
		return PTR_ERR(fp);

	if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
		bpf_prog_put(fp);
		return -EINVAL;
	}

	if (tb[TCA_ACT_BPF_NAME]) {
		name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
			       nla_len(tb[TCA_ACT_BPF_NAME]),
			       GFP_KERNEL);
		if (!name) {
			bpf_prog_put(fp);
			return -ENOMEM;
		}
	}

	cfg->bpf_fd = bpf_fd;
	cfg->bpf_name = name;
	cfg->filter = fp;
246
	cfg->is_ebpf = true;
247 248 249 250

	return 0;
}

251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
{
	if (cfg->is_ebpf)
		bpf_prog_put(cfg->filter);
	else
		bpf_prog_destroy(cfg->filter);

	kfree(cfg->bpf_ops);
	kfree(cfg->bpf_name);
}

static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
				  struct tcf_bpf_cfg *cfg)
{
	cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
	cfg->filter = prog->filter;

	cfg->bpf_ops = prog->bpf_ops;
	cfg->bpf_name = prog->bpf_name;
}

272 273 274 275 276
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
			struct nlattr *est, struct tc_action *act,
			int replace, int bind)
{
	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
277
	struct tcf_bpf_cfg cfg, old;
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
	struct tc_act_bpf *parm;
	struct tcf_bpf *prog;
	bool is_bpf, is_ebpf;
	int ret;

	if (!nla)
		return -EINVAL;

	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
	if (ret < 0)
		return ret;

	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
	is_ebpf = tb[TCA_ACT_BPF_FD];

	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
	    !tb[TCA_ACT_BPF_PARMS])
		return -EINVAL;

	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);

	memset(&cfg, 0, sizeof(cfg));

	ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
		       tcf_bpf_init_from_efd(tb, &cfg);
	if (ret < 0)
		return ret;

	if (!tcf_hash_check(parm->index, act, bind)) {
		ret = tcf_hash_create(parm->index, est, act,
308
				      sizeof(*prog), bind, false);
309
		if (ret < 0)
J
Jiri Pirko 已提交
310 311 312 313
			goto destroy_fp;

		ret = ACT_P_CREATED;
	} else {
314
		/* Don't override defaults. */
J
Jiri Pirko 已提交
315 316
		if (bind)
			goto destroy_fp;
317 318 319

		tcf_hash_release(act, bind);
		if (!replace) {
J
Jiri Pirko 已提交
320 321 322 323 324
			ret = -EEXIST;
			goto destroy_fp;
		}
	}

325 326 327
	prog = to_bpf(act);
	spin_lock_bh(&prog->tcf_lock);

328 329 330
	if (ret != ACT_P_CREATED)
		tcf_bpf_prog_fill_cfg(prog, &old);

331 332 333 334 335 336 337 338 339 340 341 342
	prog->bpf_ops = cfg.bpf_ops;
	prog->bpf_name = cfg.bpf_name;

	if (cfg.bpf_num_ops)
		prog->bpf_num_ops = cfg.bpf_num_ops;
	if (cfg.bpf_fd)
		prog->bpf_fd = cfg.bpf_fd;

	prog->tcf_action = parm->action;
	prog->filter = cfg.filter;

	spin_unlock_bh(&prog->tcf_lock);
J
Jiri Pirko 已提交
343 344

	if (ret == ACT_P_CREATED)
345
		tcf_hash_insert(act);
346 347
	else
		tcf_bpf_cfg_cleanup(&old);
348

J
Jiri Pirko 已提交
349 350 351
	return ret;

destroy_fp:
352
	tcf_bpf_cfg_cleanup(&cfg);
J
Jiri Pirko 已提交
353 354 355
	return ret;
}

356
static void tcf_bpf_cleanup(struct tc_action *act, int bind)
J
Jiri Pirko 已提交
357
{
358
	struct tcf_bpf_cfg tmp;
359

360 361
	tcf_bpf_prog_fill_cfg(act->priv, &tmp);
	tcf_bpf_cfg_cleanup(&tmp);
J
Jiri Pirko 已提交
362 363
}

364 365 366 367 368 369 370 371
static struct tc_action_ops act_bpf_ops __read_mostly = {
	.kind		=	"bpf",
	.type		=	TCA_ACT_BPF,
	.owner		=	THIS_MODULE,
	.act		=	tcf_bpf,
	.dump		=	tcf_bpf_dump,
	.cleanup	=	tcf_bpf_cleanup,
	.init		=	tcf_bpf_init,
J
Jiri Pirko 已提交
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
};

static int __init bpf_init_module(void)
{
	return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
}

static void __exit bpf_cleanup_module(void)
{
	tcf_unregister_action(&act_bpf_ops);
}

module_init(bpf_init_module);
module_exit(bpf_cleanup_module);

MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
MODULE_DESCRIPTION("TC BPF based action");
MODULE_LICENSE("GPL v2");