arraymap.c 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 */
#include <linux/bpf.h>
#include <linux/err.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/mm.h>
17
#include <linux/filter.h>
18
#include <linux/perf_event.h>
19 20 21 22 23

/* Called from syscall */
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
	struct bpf_array *array;
24
	u32 elem_size, array_size;
25 26 27 28 29 30 31 32

	/* check sanity of attributes */
	if (attr->max_entries == 0 || attr->key_size != 4 ||
	    attr->value_size == 0)
		return ERR_PTR(-EINVAL);

	elem_size = round_up(attr->value_size, 8);

33 34 35 36 37 38 39
	/* check round_up into zero and u32 overflow */
	if (elem_size == 0 ||
	    attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size)
		return ERR_PTR(-ENOMEM);

	array_size = sizeof(*array) + attr->max_entries * elem_size;

40
	/* allocate all map elements and zero-initialize them */
41
	array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
42
	if (!array) {
43
		array = vzalloc(array_size);
44 45 46 47 48 49 50 51
		if (!array)
			return ERR_PTR(-ENOMEM);
	}

	/* copy mandatory map attributes */
	array->map.key_size = attr->key_size;
	array->map.value_size = attr->value_size;
	array->map.max_entries = attr->max_entries;
52
	array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
	array->elem_size = elem_size;

	return &array->map;
}

/* Called from syscall or from eBPF program */
static void *array_map_lookup_elem(struct bpf_map *map, void *key)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	u32 index = *(u32 *)key;

	if (index >= array->map.max_entries)
		return NULL;

	return array->value + array->elem_size * index;
}

/* Called from syscall */
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	u32 index = *(u32 *)key;
	u32 *next = (u32 *)next_key;

	if (index >= array->map.max_entries) {
		*next = 0;
		return 0;
	}

	if (index == array->map.max_entries - 1)
		return -ENOENT;

	*next = index + 1;
	return 0;
}

/* Called from syscall or from eBPF program */
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
				 u64 map_flags)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	u32 index = *(u32 *)key;

	if (map_flags > BPF_EXIST)
		/* unknown flags */
		return -EINVAL;

	if (index >= array->map.max_entries)
		/* all elements were pre-allocated, cannot insert a new one */
		return -E2BIG;

	if (map_flags == BPF_NOEXIST)
105
		/* all elements already exist */
106 107
		return -EEXIST;

108
	memcpy(array->value + array->elem_size * index, value, map->value_size);
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
	return 0;
}

/* Called from syscall or from eBPF program */
static int array_map_delete_elem(struct bpf_map *map, void *key)
{
	return -EINVAL;
}

/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void array_map_free(struct bpf_map *map)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);

	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
	 * so the programs (can be more than one that used this map) were
	 * disconnected from events. Wait for outstanding programs to complete
	 * and free the array
	 */
	synchronize_rcu();

	kvfree(array);
}

133
static const struct bpf_map_ops array_ops = {
134 135 136 137 138 139 140 141
	.map_alloc = array_map_alloc,
	.map_free = array_map_free,
	.map_get_next_key = array_map_get_next_key,
	.map_lookup_elem = array_map_lookup_elem,
	.map_update_elem = array_map_update_elem,
	.map_delete_elem = array_map_delete_elem,
};

142
static struct bpf_map_type_list array_type __read_mostly = {
143 144 145 146 147 148
	.ops = &array_ops,
	.type = BPF_MAP_TYPE_ARRAY,
};

static int __init register_array_map(void)
{
149
	bpf_register_map_type(&array_type);
150 151 152
	return 0;
}
late_initcall(register_array_map);
153

154
static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
155
{
156
	/* only file descriptors can be stored in this type of map */
157 158 159 160 161
	if (attr->value_size != sizeof(u32))
		return ERR_PTR(-EINVAL);
	return array_map_alloc(attr);
}

162
static void fd_array_map_free(struct bpf_map *map)
163 164 165 166 167 168 169 170
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	int i;

	synchronize_rcu();

	/* make sure it's empty */
	for (i = 0; i < array->map.max_entries; i++)
171
		BUG_ON(array->ptrs[i] != NULL);
172 173 174
	kvfree(array);
}

175
static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
176 177 178 179 180
{
	return NULL;
}

/* only called from syscall */
181 182
static int fd_array_map_update_elem(struct bpf_map *map, void *key,
				    void *value, u64 map_flags)
183 184
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
185
	void *new_ptr, *old_ptr;
186 187 188 189 190 191 192 193 194
	u32 index = *(u32 *)key, ufd;

	if (map_flags != BPF_ANY)
		return -EINVAL;

	if (index >= array->map.max_entries)
		return -E2BIG;

	ufd = *(u32 *)value;
195 196 197
	new_ptr = map->ops->map_fd_get_ptr(map, ufd);
	if (IS_ERR(new_ptr))
		return PTR_ERR(new_ptr);
198

199 200 201
	old_ptr = xchg(array->ptrs + index, new_ptr);
	if (old_ptr)
		map->ops->map_fd_put_ptr(old_ptr);
202 203 204 205

	return 0;
}

206
static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
207 208
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
209
	void *old_ptr;
210 211 212 213 214
	u32 index = *(u32 *)key;

	if (index >= array->map.max_entries)
		return -E2BIG;

215 216 217
	old_ptr = xchg(array->ptrs + index, NULL);
	if (old_ptr) {
		map->ops->map_fd_put_ptr(old_ptr);
218 219 220 221 222 223
		return 0;
	} else {
		return -ENOENT;
	}
}

224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	struct bpf_prog *prog = bpf_prog_get(fd);
	if (IS_ERR(prog))
		return prog;

	if (!bpf_prog_array_compatible(array, prog)) {
		bpf_prog_put(prog);
		return ERR_PTR(-EINVAL);
	}
	return prog;
}

static void prog_fd_array_put_ptr(void *ptr)
{
	struct bpf_prog *prog = ptr;

	bpf_prog_put_rcu(prog);
}

245
/* decrement refcnt of all bpf_progs that are stored in this map */
246
void bpf_fd_array_map_clear(struct bpf_map *map)
247 248 249 250 251
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	int i;

	for (i = 0; i < array->map.max_entries; i++)
252
		fd_array_map_delete_elem(map, &i);
253 254 255
}

static const struct bpf_map_ops prog_array_ops = {
256 257
	.map_alloc = fd_array_map_alloc,
	.map_free = fd_array_map_free,
258
	.map_get_next_key = array_map_get_next_key,
259 260 261 262 263
	.map_lookup_elem = fd_array_map_lookup_elem,
	.map_update_elem = fd_array_map_update_elem,
	.map_delete_elem = fd_array_map_delete_elem,
	.map_fd_get_ptr = prog_fd_array_get_ptr,
	.map_fd_put_ptr = prog_fd_array_put_ptr,
264 265 266 267 268 269 270 271 272 273 274 275 276
};

static struct bpf_map_type_list prog_array_type __read_mostly = {
	.ops = &prog_array_ops,
	.type = BPF_MAP_TYPE_PROG_ARRAY,
};

static int __init register_prog_array_map(void)
{
	bpf_register_map_type(&prog_array_type);
	return 0;
}
late_initcall(register_prog_array_map);
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294

static void perf_event_array_map_free(struct bpf_map *map)
{
	bpf_fd_array_map_clear(map);
	fd_array_map_free(map);
}

static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
{
	struct perf_event *event;
	const struct perf_event_attr *attr;

	event = perf_event_get(fd);
	if (IS_ERR(event))
		return event;

	attr = perf_event_attrs(event);
	if (IS_ERR(attr))
295
		goto err;
296

297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
	if (attr->inherit)
		goto err;

	if (attr->type == PERF_TYPE_RAW)
		return event;

	if (attr->type == PERF_TYPE_HARDWARE)
		return event;

	if (attr->type == PERF_TYPE_SOFTWARE &&
	    attr->config == PERF_COUNT_SW_BPF_OUTPUT)
		return event;
err:
	perf_event_release_kernel(event);
	return ERR_PTR(-EINVAL);
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
}

static void perf_event_fd_array_put_ptr(void *ptr)
{
	struct perf_event *event = ptr;

	perf_event_release_kernel(event);
}

static const struct bpf_map_ops perf_event_array_ops = {
	.map_alloc = fd_array_map_alloc,
	.map_free = perf_event_array_map_free,
	.map_get_next_key = array_map_get_next_key,
	.map_lookup_elem = fd_array_map_lookup_elem,
	.map_update_elem = fd_array_map_update_elem,
	.map_delete_elem = fd_array_map_delete_elem,
	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
};

static struct bpf_map_type_list perf_event_array_type __read_mostly = {
	.ops = &perf_event_array_ops,
	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
};

static int __init register_perf_event_array_map(void)
{
	bpf_register_map_type(&perf_event_array_type);
	return 0;
}
late_initcall(register_perf_event_array_map);