xdp_umem.c 5.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
// SPDX-License-Identifier: GPL-2.0
/* XDP user-space packet buffer
 * Copyright(c) 2018 Intel Corporation.
 */

#include <linux/init.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/mm.h>
14 15
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
B
Björn Töpel 已提交
16
#include <linux/idr.h>
17
#include <linux/vmalloc.h>
18 19

#include "xdp_umem.h"
20
#include "xsk_queue.h"
21

22
#define XDP_UMEM_MIN_CHUNK_SIZE 2048
23

B
Björn Töpel 已提交
24 25
static DEFINE_IDA(umem_ida);

26 27 28 29
void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
{
	unsigned long flags;

30 31 32
	if (!xs->tx)
		return;

33 34 35
	spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
	list_add_rcu(&xs->list, &umem->xsk_tx_list);
	spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
36 37 38 39 40 41
}

void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
{
	unsigned long flags;

42 43 44
	if (!xs->tx)
		return;

45
	spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
46
	list_del_rcu(&xs->list);
47
	spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
48 49
}

50
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
51
{
52
	unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
53

54 55
	kfree(umem->pgs);
	umem->pgs = NULL;
56
}
57

58
static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
59
{
60 61 62 63
	if (umem->user) {
		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
		free_uid(umem->user);
	}
64 65
}

66 67
void xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
			 u16 queue_id)
68
{
69 70
	umem->dev = dev;
	umem->queue_id = queue_id;
71 72

	dev_hold(dev);
73 74
}

75
void xdp_umem_clear_dev(struct xdp_umem *umem)
76
{
77 78 79
	dev_put(umem->dev);
	umem->dev = NULL;
	umem->zc = false;
80 81
}

82 83
static void xdp_umem_release(struct xdp_umem *umem)
{
84 85
	xdp_umem_clear_dev(umem);

B
Björn Töpel 已提交
86 87
	ida_simple_remove(&umem_ida, umem->id);

88 89 90 91 92
	if (umem->fq) {
		xskq_destroy(umem->fq);
		umem->fq = NULL;
	}

93 94 95 96 97
	if (umem->cq) {
		xskq_destroy(umem->cq);
		umem->cq = NULL;
	}

B
Björn Töpel 已提交
98
	xdp_umem_unpin_pages(umem);
99 100 101 102 103 104 105

	xdp_umem_unaccount_pages(umem);
	kfree(umem);
}

void xdp_get_umem(struct xdp_umem *umem)
{
106
	refcount_inc(&umem->users);
107 108 109 110 111 112 113
}

void xdp_put_umem(struct xdp_umem *umem)
{
	if (!umem)
		return;

114 115
	if (refcount_dec_and_test(&umem->users))
		xdp_umem_release(umem);
116 117
}

118
static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
119 120 121 122 123
{
	unsigned int gup_flags = FOLL_WRITE;
	long npgs;
	int err;

124 125
	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
			    GFP_KERNEL | __GFP_NOWARN);
126 127 128
	if (!umem->pgs)
		return -ENOMEM;

129
	mmap_read_lock(current->mm);
130
	npgs = pin_user_pages(address, umem->npgs,
131
			      gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
132
	mmap_read_unlock(current->mm);
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175

	if (npgs != umem->npgs) {
		if (npgs >= 0) {
			umem->npgs = npgs;
			err = -ENOMEM;
			goto out_pin;
		}
		err = npgs;
		goto out_pgs;
	}
	return 0;

out_pin:
	xdp_umem_unpin_pages(umem);
out_pgs:
	kfree(umem->pgs);
	umem->pgs = NULL;
	return err;
}

static int xdp_umem_account_pages(struct xdp_umem *umem)
{
	unsigned long lock_limit, new_npgs, old_npgs;

	if (capable(CAP_IPC_LOCK))
		return 0;

	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
	umem->user = get_uid(current_user());

	do {
		old_npgs = atomic_long_read(&umem->user->locked_vm);
		new_npgs = old_npgs + umem->npgs;
		if (new_npgs > lock_limit) {
			free_uid(umem->user);
			umem->user = NULL;
			return -ENOBUFS;
		}
	} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
				     new_npgs) != old_npgs);
	return 0;
}

B
Björn Töpel 已提交
176
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
177
{
178
	bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
179
	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
180
	u64 npgs, addr = mr->addr, size = mr->len;
181
	unsigned int chunks, chunks_per_page;
182
	int err;
183

184
	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
185 186 187 188 189 190 191 192 193
		/* Strictly speaking we could support this, if:
		 * - huge pages, or*
		 * - using an IOMMU, or
		 * - making sure the memory area is consecutive
		 * but for now, we simply say "computer says no".
		 */
		return -EINVAL;
	}

194 195 196 197 198
	if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
			XDP_UMEM_USES_NEED_WAKEUP))
		return -EINVAL;

	if (!unaligned_chunks && !is_power_of_2(chunk_size))
199 200 201 202 203 204 205 206 207 208 209 210
		return -EINVAL;

	if (!PAGE_ALIGNED(addr)) {
		/* Memory area has to be page size aligned. For
		 * simplicity, this might change.
		 */
		return -EINVAL;
	}

	if ((addr + size) < addr)
		return -EINVAL;

211
	npgs = size >> PAGE_SHIFT;
212 213 214
	if (npgs > U32_MAX)
		return -EINVAL;

215 216
	chunks = (unsigned int)div_u64(size, chunk_size);
	if (chunks == 0)
217 218
		return -EINVAL;

219 220 221 222 223
	if (!unaligned_chunks) {
		chunks_per_page = PAGE_SIZE / chunk_size;
		if (chunks < chunks_per_page || chunks % chunks_per_page)
			return -EINVAL;
	}
224

225
	if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
226 227
		return -EINVAL;

228
	umem->size = size;
229
	umem->headroom = headroom;
230
	umem->chunk_size = chunk_size;
231
	umem->chunks = chunks;
232
	umem->npgs = (u32)npgs;
233 234
	umem->pgs = NULL;
	umem->user = NULL;
235
	umem->flags = mr->flags;
236 237
	INIT_LIST_HEAD(&umem->xsk_tx_list);
	spin_lock_init(&umem->xsk_tx_list_lock);
238

239
	refcount_set(&umem->users, 1);
240 241 242

	err = xdp_umem_account_pages(umem);
	if (err)
243
		return err;
244

245
	err = xdp_umem_pin_pages(umem, (unsigned long)addr);
246 247
	if (err)
		goto out_account;
B
Björn Töpel 已提交
248

249
	return 0;
250 251 252 253 254

out_account:
	xdp_umem_unaccount_pages(umem);
	return err;
}
255

B
Björn Töpel 已提交
256 257 258 259 260 261 262 263 264
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
{
	struct xdp_umem *umem;
	int err;

	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
	if (!umem)
		return ERR_PTR(-ENOMEM);

B
Björn Töpel 已提交
265 266 267 268 269 270 271
	err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
	if (err < 0) {
		kfree(umem);
		return ERR_PTR(err);
	}
	umem->id = err;

B
Björn Töpel 已提交
272 273
	err = xdp_umem_reg(umem, mr);
	if (err) {
B
Björn Töpel 已提交
274
		ida_simple_remove(&umem_ida, umem->id);
B
Björn Töpel 已提交
275 276 277 278 279 280 281
		kfree(umem);
		return ERR_PTR(err);
	}

	return umem;
}

282 283
bool xdp_umem_validate_queues(struct xdp_umem *umem)
{
284
	return umem->fq && umem->cq;
285
}