act_bpf.c 7.8 KB
Newer Older
J
Jiri Pirko 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/filter.h>
16 17
#include <linux/bpf.h>

J
Jiri Pirko 已提交
18 19 20 21 22 23
#include <net/netlink.h>
#include <net/pkt_sched.h>

#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>

24 25 26 27 28 29 30 31 32 33
#define BPF_TAB_MASK		15
#define ACT_BPF_NAME_LEN	256

struct tcf_bpf_cfg {
	struct bpf_prog *filter;
	struct sock_filter *bpf_ops;
	char *bpf_name;
	u32 bpf_fd;
	u16 bpf_num_ops;
};
J
Jiri Pirko 已提交
34

35
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
J
Jiri Pirko 已提交
36 37
		   struct tcf_result *res)
{
38
	struct tcf_bpf *prog = act->priv;
39
	int action, filter_res;
J
Jiri Pirko 已提交
40

41 42 43
	if (unlikely(!skb_mac_header_was_set(skb)))
		return TC_ACT_UNSPEC;

44
	spin_lock(&prog->tcf_lock);
45

46 47
	prog->tcf_tm.lastuse = jiffies;
	bstats_update(&prog->tcf_bstats, skb);
J
Jiri Pirko 已提交
48

49 50 51 52
	/* Needed here for accessing maps. */
	rcu_read_lock();
	filter_res = BPF_PROG_RUN(prog->filter, skb);
	rcu_read_unlock();
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71

	/* A BPF program may overwrite the default action opcode.
	 * Similarly as in cls_bpf, if filter_res == -1 we use the
	 * default action specified from tc.
	 *
	 * In case a different well-known TC_ACT opcode has been
	 * returned, it will overwrite the default one.
	 *
	 * For everything else that is unkown, TC_ACT_UNSPEC is
	 * returned.
	 */
	switch (filter_res) {
	case TC_ACT_PIPE:
	case TC_ACT_RECLASSIFY:
	case TC_ACT_OK:
		action = filter_res;
		break;
	case TC_ACT_SHOT:
		action = filter_res;
72
		prog->tcf_qstats.drops++;
73 74
		break;
	case TC_ACT_UNSPEC:
75
		action = prog->tcf_action;
76 77 78 79
		break;
	default:
		action = TC_ACT_UNSPEC;
		break;
J
Jiri Pirko 已提交
80 81
	}

82
	spin_unlock(&prog->tcf_lock);
J
Jiri Pirko 已提交
83 84 85
	return action;
}

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog)
{
	return !prog->bpf_ops;
}

static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
				 struct sk_buff *skb)
{
	struct nlattr *nla;

	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops))
		return -EMSGSIZE;

	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops *
			  sizeof(struct sock_filter));
	if (nla == NULL)
		return -EMSGSIZE;

	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));

	return 0;
}

static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
				  struct sk_buff *skb)
{
	if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
		return -EMSGSIZE;

	if (prog->bpf_name &&
	    nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
		return -EMSGSIZE;

	return 0;
}

static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
J
Jiri Pirko 已提交
123 124 125
			int bind, int ref)
{
	unsigned char *tp = skb_tail_pointer(skb);
126
	struct tcf_bpf *prog = act->priv;
J
Jiri Pirko 已提交
127
	struct tc_act_bpf opt = {
128 129 130 131
		.index   = prog->tcf_index,
		.refcnt  = prog->tcf_refcnt - ref,
		.bindcnt = prog->tcf_bindcnt - bind,
		.action  = prog->tcf_action,
J
Jiri Pirko 已提交
132
	};
133 134
	struct tcf_t tm;
	int ret;
J
Jiri Pirko 已提交
135 136 137 138

	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
		goto nla_put_failure;

139 140 141 142 143
	if (tcf_bpf_is_ebpf(prog))
		ret = tcf_bpf_dump_ebpf_info(prog, skb);
	else
		ret = tcf_bpf_dump_bpf_info(prog, skb);
	if (ret)
J
Jiri Pirko 已提交
144 145
		goto nla_put_failure;

146 147 148
	tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
	tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
	tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
J
Jiri Pirko 已提交
149

150
	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm))
J
Jiri Pirko 已提交
151
		goto nla_put_failure;
152

J
Jiri Pirko 已提交
153 154 155 156 157 158 159 160 161
	return skb->len;

nla_put_failure:
	nlmsg_trim(skb, tp);
	return -1;
}

static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
162 163
	[TCA_ACT_BPF_FD]	= { .type = NLA_U32 },
	[TCA_ACT_BPF_NAME]	= { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
J
Jiri Pirko 已提交
164 165 166 167 168
	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
};

169
static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
J
Jiri Pirko 已提交
170 171
{
	struct sock_filter *bpf_ops;
172
	struct sock_fprog_kern fprog_tmp;
J
Jiri Pirko 已提交
173
	struct bpf_prog *fp;
174
	u16 bpf_size, bpf_num_ops;
J
Jiri Pirko 已提交
175 176 177 178 179 180 181
	int ret;

	bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
	if (bpf_num_ops	> BPF_MAXINSNS || bpf_num_ops == 0)
		return -EINVAL;

	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
182 183 184
	if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
		return -EINVAL;

J
Jiri Pirko 已提交
185
	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
186
	if (bpf_ops == NULL)
J
Jiri Pirko 已提交
187 188 189 190
		return -ENOMEM;

	memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);

191 192
	fprog_tmp.len = bpf_num_ops;
	fprog_tmp.filter = bpf_ops;
J
Jiri Pirko 已提交
193

194 195 196 197 198
	ret = bpf_prog_create(&fp, &fprog_tmp);
	if (ret < 0) {
		kfree(bpf_ops);
		return ret;
	}
J
Jiri Pirko 已提交
199

200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
	cfg->bpf_ops = bpf_ops;
	cfg->bpf_num_ops = bpf_num_ops;
	cfg->filter = fp;

	return 0;
}

static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
{
	struct bpf_prog *fp;
	char *name = NULL;
	u32 bpf_fd;

	bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);

	fp = bpf_prog_get(bpf_fd);
	if (IS_ERR(fp))
		return PTR_ERR(fp);

	if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
		bpf_prog_put(fp);
		return -EINVAL;
	}

	if (tb[TCA_ACT_BPF_NAME]) {
		name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
			       nla_len(tb[TCA_ACT_BPF_NAME]),
			       GFP_KERNEL);
		if (!name) {
			bpf_prog_put(fp);
			return -ENOMEM;
		}
	}

	cfg->bpf_fd = bpf_fd;
	cfg->bpf_name = name;
	cfg->filter = fp;

	return 0;
}

static int tcf_bpf_init(struct net *net, struct nlattr *nla,
			struct nlattr *est, struct tc_action *act,
			int replace, int bind)
{
	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
	struct tc_act_bpf *parm;
	struct tcf_bpf *prog;
	struct tcf_bpf_cfg cfg;
	bool is_bpf, is_ebpf;
	int ret;

	if (!nla)
		return -EINVAL;

	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
	if (ret < 0)
		return ret;

	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
	is_ebpf = tb[TCA_ACT_BPF_FD];

	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
	    !tb[TCA_ACT_BPF_PARMS])
		return -EINVAL;

	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);

	memset(&cfg, 0, sizeof(cfg));

	ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
		       tcf_bpf_init_from_efd(tb, &cfg);
	if (ret < 0)
		return ret;

	if (!tcf_hash_check(parm->index, act, bind)) {
		ret = tcf_hash_create(parm->index, est, act,
				      sizeof(*prog), bind);
		if (ret < 0)
J
Jiri Pirko 已提交
279 280 281 282
			goto destroy_fp;

		ret = ACT_P_CREATED;
	} else {
283
		/* Don't override defaults. */
J
Jiri Pirko 已提交
284 285
		if (bind)
			goto destroy_fp;
286 287 288

		tcf_hash_release(act, bind);
		if (!replace) {
J
Jiri Pirko 已提交
289 290 291 292 293
			ret = -EEXIST;
			goto destroy_fp;
		}
	}

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
	prog = to_bpf(act);
	spin_lock_bh(&prog->tcf_lock);

	prog->bpf_ops = cfg.bpf_ops;
	prog->bpf_name = cfg.bpf_name;

	if (cfg.bpf_num_ops)
		prog->bpf_num_ops = cfg.bpf_num_ops;
	if (cfg.bpf_fd)
		prog->bpf_fd = cfg.bpf_fd;

	prog->tcf_action = parm->action;
	prog->filter = cfg.filter;

	spin_unlock_bh(&prog->tcf_lock);
J
Jiri Pirko 已提交
309 310

	if (ret == ACT_P_CREATED)
311 312
		tcf_hash_insert(act);

J
Jiri Pirko 已提交
313 314 315
	return ret;

destroy_fp:
316 317 318 319 320 321 322 323
	if (is_ebpf)
		bpf_prog_put(cfg.filter);
	else
		bpf_prog_destroy(cfg.filter);

	kfree(cfg.bpf_ops);
	kfree(cfg.bpf_name);

J
Jiri Pirko 已提交
324 325 326
	return ret;
}

327
static void tcf_bpf_cleanup(struct tc_action *act, int bind)
J
Jiri Pirko 已提交
328
{
329
	const struct tcf_bpf *prog = act->priv;
J
Jiri Pirko 已提交
330

331 332 333 334
	if (tcf_bpf_is_ebpf(prog))
		bpf_prog_put(prog->filter);
	else
		bpf_prog_destroy(prog->filter);
J
Jiri Pirko 已提交
335 336
}

337 338 339 340 341 342 343 344
static struct tc_action_ops act_bpf_ops __read_mostly = {
	.kind		=	"bpf",
	.type		=	TCA_ACT_BPF,
	.owner		=	THIS_MODULE,
	.act		=	tcf_bpf,
	.dump		=	tcf_bpf_dump,
	.cleanup	=	tcf_bpf_cleanup,
	.init		=	tcf_bpf_init,
J
Jiri Pirko 已提交
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
};

static int __init bpf_init_module(void)
{
	return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
}

static void __exit bpf_cleanup_module(void)
{
	tcf_unregister_action(&act_bpf_ops);
}

module_init(bpf_init_module);
module_exit(bpf_cleanup_module);

MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
MODULE_DESCRIPTION("TC BPF based action");
MODULE_LICENSE("GPL v2");