uverbs_main.c 36.9 KB
Newer Older
1 2
/*
 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4 5
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6
 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/poll.h>
43
#include <linux/sched.h>
44 45
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
46
#include <linux/file.h>
47
#include <linux/cdev.h>
48
#include <linux/anon_inodes.h>
49
#include <linux/slab.h>
50

51
#include <linux/uaccess.h>
52

53
#include <rdma/ib.h>
54
#include <rdma/uverbs_std_types.h>
55

56
#include "uverbs.h"
57
#include "core_priv.h"
58
#include "rdma_core.h"
59 60 61 62 63 64 65 66

MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
MODULE_LICENSE("Dual BSD/GPL");

enum {
	IB_UVERBS_MAJOR       = 231,
	IB_UVERBS_BASE_MINOR  = 192,
67 68 69
	IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
	IB_UVERBS_NUM_FIXED_MINOR = 32,
	IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
70 71 72 73
};

#define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)

74
static dev_t dynamic_uverbs_dev;
75 76
static struct class *uverbs_class;

77 78 79
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);

static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
80
				     struct ib_device *ib_dev,
81 82
				     const char __user *buf, int in_len,
				     int out_len) = {
A
Alexander Chiang 已提交
83 84 85 86 87 88
	[IB_USER_VERBS_CMD_GET_CONTEXT]		= ib_uverbs_get_context,
	[IB_USER_VERBS_CMD_QUERY_DEVICE]	= ib_uverbs_query_device,
	[IB_USER_VERBS_CMD_QUERY_PORT]		= ib_uverbs_query_port,
	[IB_USER_VERBS_CMD_ALLOC_PD]		= ib_uverbs_alloc_pd,
	[IB_USER_VERBS_CMD_DEALLOC_PD]		= ib_uverbs_dealloc_pd,
	[IB_USER_VERBS_CMD_REG_MR]		= ib_uverbs_reg_mr,
89
	[IB_USER_VERBS_CMD_REREG_MR]		= ib_uverbs_rereg_mr,
A
Alexander Chiang 已提交
90
	[IB_USER_VERBS_CMD_DEREG_MR]		= ib_uverbs_dereg_mr,
91 92
	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw,
	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,
93
	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
A
Alexander Chiang 已提交
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
	[IB_USER_VERBS_CMD_CREATE_CQ]		= ib_uverbs_create_cq,
	[IB_USER_VERBS_CMD_RESIZE_CQ]		= ib_uverbs_resize_cq,
	[IB_USER_VERBS_CMD_POLL_CQ]		= ib_uverbs_poll_cq,
	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]	= ib_uverbs_req_notify_cq,
	[IB_USER_VERBS_CMD_DESTROY_CQ]		= ib_uverbs_destroy_cq,
	[IB_USER_VERBS_CMD_CREATE_QP]		= ib_uverbs_create_qp,
	[IB_USER_VERBS_CMD_QUERY_QP]		= ib_uverbs_query_qp,
	[IB_USER_VERBS_CMD_MODIFY_QP]		= ib_uverbs_modify_qp,
	[IB_USER_VERBS_CMD_DESTROY_QP]		= ib_uverbs_destroy_qp,
	[IB_USER_VERBS_CMD_POST_SEND]		= ib_uverbs_post_send,
	[IB_USER_VERBS_CMD_POST_RECV]		= ib_uverbs_post_recv,
	[IB_USER_VERBS_CMD_POST_SRQ_RECV]	= ib_uverbs_post_srq_recv,
	[IB_USER_VERBS_CMD_CREATE_AH]		= ib_uverbs_create_ah,
	[IB_USER_VERBS_CMD_DESTROY_AH]		= ib_uverbs_destroy_ah,
	[IB_USER_VERBS_CMD_ATTACH_MCAST]	= ib_uverbs_attach_mcast,
	[IB_USER_VERBS_CMD_DETACH_MCAST]	= ib_uverbs_detach_mcast,
	[IB_USER_VERBS_CMD_CREATE_SRQ]		= ib_uverbs_create_srq,
	[IB_USER_VERBS_CMD_MODIFY_SRQ]		= ib_uverbs_modify_srq,
	[IB_USER_VERBS_CMD_QUERY_SRQ]		= ib_uverbs_query_srq,
	[IB_USER_VERBS_CMD_DESTROY_SRQ]		= ib_uverbs_destroy_srq,
114 115
	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd,
	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,
116
	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq,
117
	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp,
118 119 120
};

static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
121
				    struct ib_device *ib_dev,
122 123 124
				    struct ib_udata *ucore,
				    struct ib_udata *uhw) = {
	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
125
	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow,
126
	[IB_USER_VERBS_EX_CMD_QUERY_DEVICE]	= ib_uverbs_ex_query_device,
127
	[IB_USER_VERBS_EX_CMD_CREATE_CQ]	= ib_uverbs_ex_create_cq,
128
	[IB_USER_VERBS_EX_CMD_CREATE_QP]        = ib_uverbs_ex_create_qp,
Y
Yishai Hadas 已提交
129 130 131
	[IB_USER_VERBS_EX_CMD_CREATE_WQ]        = ib_uverbs_ex_create_wq,
	[IB_USER_VERBS_EX_CMD_MODIFY_WQ]        = ib_uverbs_ex_modify_wq,
	[IB_USER_VERBS_EX_CMD_DESTROY_WQ]       = ib_uverbs_ex_destroy_wq,
132 133
	[IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
	[IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
134
	[IB_USER_VERBS_EX_CMD_MODIFY_QP]        = ib_uverbs_ex_modify_qp,
135
	[IB_USER_VERBS_EX_CMD_MODIFY_CQ]        = ib_uverbs_ex_modify_cq,
136 137 138
};

static void ib_uverbs_add_one(struct ib_device *device);
139
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
140

141 142 143 144 145 146
struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
{
	return ufile->ucontext;
}
EXPORT_SYMBOL(ib_uverbs_get_ucontext);

147 148 149 150 151 152 153 154 155 156 157
int uverbs_dealloc_mw(struct ib_mw *mw)
{
	struct ib_pd *pd = mw->pd;
	int ret;

	ret = mw->device->dealloc_mw(mw);
	if (!ret)
		atomic_dec(&pd->usecnt);
	return ret;
}

158
static void ib_uverbs_release_dev(struct kobject *kobj)
159 160
{
	struct ib_uverbs_device *dev =
161
		container_of(kobj, struct ib_uverbs_device, kobj);
162

163
	cleanup_srcu_struct(&dev->disassociate_srcu);
164
	uverbs_free_spec_tree(dev->specs_root);
165
	kfree(dev);
166 167
}

168 169 170 171
static struct kobj_type ib_uverbs_dev_ktype = {
	.release = ib_uverbs_release_dev,
};

172
static void ib_uverbs_release_async_event_file(struct kref *ref)
173
{
174 175
	struct ib_uverbs_async_event_file *file =
		container_of(ref, struct ib_uverbs_async_event_file, ref);
176 177 178 179

	kfree(file);
}

180
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
181
			  struct ib_uverbs_completion_event_file *ev_file,
182 183 184 185 186
			  struct ib_ucq_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	if (ev_file) {
187
		spin_lock_irq(&ev_file->ev_queue.lock);
188 189 190 191
		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
			list_del(&evt->list);
			kfree(evt);
		}
192
		spin_unlock_irq(&ev_file->ev_queue.lock);
193

194
		uverbs_uobject_put(&ev_file->uobj_file.uobj);
195 196
	}

197
	spin_lock_irq(&file->async_file->ev_queue.lock);
198 199 200 201
	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
202
	spin_unlock_irq(&file->async_file->ev_queue.lock);
203 204 205 206 207 208 209
}

void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
			      struct ib_uevent_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

210
	spin_lock_irq(&file->async_file->ev_queue.lock);
211 212 213 214
	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
215
	spin_unlock_irq(&file->async_file->ev_queue.lock);
216 217
}

218 219
void ib_uverbs_detach_umcast(struct ib_qp *qp,
			     struct ib_uqp_object *uobj)
220 221 222 223 224 225 226 227 228 229
{
	struct ib_uverbs_mcast_entry *mcast, *tmp;

	list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
		ib_detach_mcast(qp, &mcast->gid, mcast->lid);
		list_del(&mcast->list);
		kfree(mcast);
	}
}

230 231
static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file,
				   bool device_removed)
232
{
233 234
	struct ib_ucontext *context = file->ucontext;

235
	context->closing = 1;
236
	uverbs_cleanup_ufile(file, device_removed);
237 238
	put_pid(context->tgid);

239 240 241
	ib_rdmacg_uncharge(&context->cg_obj, context->device,
			   RDMACG_RESOURCE_HCA_HANDLE);

242 243 244
	return context->device->dealloc_ucontext(context);
}

245 246 247 248 249
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
	complete(&dev->comp);
}

250
void ib_uverbs_release_file(struct kref *ref)
251 252 253
{
	struct ib_uverbs_file *file =
		container_of(ref, struct ib_uverbs_file, ref);
254 255 256 257 258 259 260 261 262
	struct ib_device *ib_dev;
	int srcu_key;

	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (ib_dev && !ib_dev->disassociate_ucontext)
		module_put(ib_dev->owner);
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
263

264 265
	if (atomic_dec_and_test(&file->device->refcount))
		ib_uverbs_comp_dev(file->device);
266

Y
Yishai Hadas 已提交
267
	kobject_put(&file->device->kobj);
268 269 270
	kfree(file);
}

271
static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
272 273 274
				    struct ib_uverbs_file *uverbs_file,
				    struct file *filp, char __user *buf,
				    size_t count, loff_t *pos,
275
				    size_t eventsz)
276
{
277
	struct ib_uverbs_event *event;
278 279
	int ret = 0;

280
	spin_lock_irq(&ev_queue->lock);
281

282 283
	while (list_empty(&ev_queue->event_list)) {
		spin_unlock_irq(&ev_queue->lock);
284 285 286 287

		if (filp->f_flags & O_NONBLOCK)
			return -EAGAIN;

288 289
		if (wait_event_interruptible(ev_queue->poll_wait,
					     (!list_empty(&ev_queue->event_list) ||
290 291 292 293
			/* The barriers built into wait_event_interruptible()
			 * and wake_up() guarentee this will see the null set
			 * without using RCU
			 */
294
					     !uverbs_file->device->ib_dev)))
295 296
			return -ERESTARTSYS;

297
		/* If device was disassociated and no event exists set an error */
298
		if (list_empty(&ev_queue->event_list) &&
299
		    !uverbs_file->device->ib_dev)
300 301
			return -EIO;

302
		spin_lock_irq(&ev_queue->lock);
303 304
	}

305
	event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
306

307 308 309
	if (eventsz > count) {
		ret   = -EINVAL;
		event = NULL;
310
	} else {
311
		list_del(ev_queue->event_list.next);
312 313 314 315 316
		if (event->counter) {
			++(*event->counter);
			list_del(&event->obj_list);
		}
	}
317

318
	spin_unlock_irq(&ev_queue->lock);
319 320 321 322 323 324 325 326 327 328 329 330 331

	if (event) {
		if (copy_to_user(buf, event, eventsz))
			ret = -EFAULT;
		else
			ret = eventsz;
	}

	kfree(event);

	return ret;
}

332 333 334 335 336
static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
					  size_t count, loff_t *pos)
{
	struct ib_uverbs_async_event_file *file = filp->private_data;

337
	return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
338 339
				    buf, count, pos,
				    sizeof(struct ib_uverbs_async_event_desc));
340 341 342 343 344 345 346 347
}

static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
					 size_t count, loff_t *pos)
{
	struct ib_uverbs_completion_event_file *comp_ev_file =
		filp->private_data;

348
	return ib_uverbs_event_read(&comp_ev_file->ev_queue,
349
				    comp_ev_file->uobj_file.ufile, filp,
350 351
				    buf, count, pos,
				    sizeof(struct ib_uverbs_comp_event_desc));
352 353
}

354
static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
355
					 struct file *filp,
356 357
					 struct poll_table_struct *wait)
{
358
	__poll_t pollflags = 0;
359

360
	poll_wait(filp, &ev_queue->poll_wait, wait);
361

362 363
	spin_lock_irq(&ev_queue->lock);
	if (!list_empty(&ev_queue->event_list))
364
		pollflags = EPOLLIN | EPOLLRDNORM;
365
	spin_unlock_irq(&ev_queue->lock);
366 367 368 369

	return pollflags;
}

370
static __poll_t ib_uverbs_async_event_poll(struct file *filp,
371 372 373 374 375
					       struct poll_table_struct *wait)
{
	return ib_uverbs_event_poll(filp->private_data, filp, wait);
}

376
static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
377 378 379 380 381
					      struct poll_table_struct *wait)
{
	struct ib_uverbs_completion_event_file *comp_ev_file =
		filp->private_data;

382
	return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
383 384 385
}

static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
G
Gleb Natapov 已提交
386
{
387
	struct ib_uverbs_event_queue *ev_queue = filp->private_data;
G
Gleb Natapov 已提交
388

389
	return fasync_helper(fd, filp, on, &ev_queue->async_queue);
G
Gleb Natapov 已提交
390 391
}

392
static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
393
{
394 395 396
	struct ib_uverbs_completion_event_file *comp_ev_file =
		filp->private_data;

397
	return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
398 399 400 401 402 403
}

static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_async_event_file *file = filp->private_data;
	struct ib_uverbs_file *uverbs_file = file->uverbs_file;
404
	struct ib_uverbs_event *entry, *tmp;
405
	int closed_already = 0;
406

407
	mutex_lock(&uverbs_file->device->lists_mutex);
408 409 410 411
	spin_lock_irq(&file->ev_queue.lock);
	closed_already = file->ev_queue.is_closed;
	file->ev_queue.is_closed = 1;
	list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
412 413 414 415
		if (entry->counter)
			list_del(&entry->obj_list);
		kfree(entry);
	}
416
	spin_unlock_irq(&file->ev_queue.lock);
417 418
	if (!closed_already) {
		list_del(&file->list);
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
		ib_unregister_event_handler(&uverbs_file->event_handler);
	}
	mutex_unlock(&uverbs_file->device->lists_mutex);

	kref_put(&uverbs_file->ref, ib_uverbs_release_file);
	kref_put(&file->ref, ib_uverbs_release_async_event_file);

	return 0;
}

static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_completion_event_file *file = filp->private_data;
	struct ib_uverbs_event *entry, *tmp;

434 435
	spin_lock_irq(&file->ev_queue.lock);
	list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
436 437 438
		if (entry->counter)
			list_del(&entry->obj_list);
		kfree(entry);
439
	}
440
	spin_unlock_irq(&file->ev_queue.lock);
441

442
	uverbs_close_fd(filp);
443 444 445 446

	return 0;
}

447
const struct file_operations uverbs_event_fops = {
448
	.owner	 = THIS_MODULE,
449 450 451 452 453 454 455 456 457 458 459 460 461
	.read	 = ib_uverbs_comp_event_read,
	.poll    = ib_uverbs_comp_event_poll,
	.release = ib_uverbs_comp_event_close,
	.fasync  = ib_uverbs_comp_event_fasync,
	.llseek	 = no_llseek,
};

static const struct file_operations uverbs_async_event_fops = {
	.owner	 = THIS_MODULE,
	.read	 = ib_uverbs_async_event_read,
	.poll    = ib_uverbs_async_event_poll,
	.release = ib_uverbs_async_event_close,
	.fasync  = ib_uverbs_async_event_fasync,
462
	.llseek	 = no_llseek,
463 464 465 466
};

void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
467
	struct ib_uverbs_event_queue   *ev_queue = cq_context;
468 469 470 471
	struct ib_ucq_object	       *uobj;
	struct ib_uverbs_event	       *entry;
	unsigned long			flags;

472
	if (!ev_queue)
473 474
		return;

475 476 477
	spin_lock_irqsave(&ev_queue->lock, flags);
	if (ev_queue->is_closed) {
		spin_unlock_irqrestore(&ev_queue->lock, flags);
478 479
		return;
	}
480

481
	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
482
	if (!entry) {
483
		spin_unlock_irqrestore(&ev_queue->lock, flags);
484
		return;
485
	}
486

487 488 489 490
	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);

	entry->desc.comp.cq_handle = cq->uobject->user_handle;
	entry->counter		   = &uobj->comp_events_reported;
491

492
	list_add_tail(&entry->list, &ev_queue->event_list);
493
	list_add_tail(&entry->obj_list, &uobj->comp_list);
494
	spin_unlock_irqrestore(&ev_queue->lock, flags);
495

496 497
	wake_up_interruptible(&ev_queue->poll_wait);
	kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
498 499 500
}

static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
501 502 503
				    __u64 element, __u64 event,
				    struct list_head *obj_list,
				    u32 *counter)
504
{
505
	struct ib_uverbs_event *entry;
506 507
	unsigned long flags;

508 509 510
	spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
	if (file->async_file->ev_queue.is_closed) {
		spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
511 512 513
		return;
	}

514
	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
515
	if (!entry) {
516
		spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
517
		return;
518
	}
519

520 521
	entry->desc.async.element    = element;
	entry->desc.async.event_type = event;
522
	entry->desc.async.reserved   = 0;
523
	entry->counter               = counter;
524

525
	list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
526 527
	if (obj_list)
		list_add_tail(&entry->obj_list, obj_list);
528
	spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
529

530 531
	wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
	kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
532 533 534 535
}

void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
536 537
	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
						  struct ib_ucq_object, uobject);
538

539
	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
540 541
				event->event, &uobj->async_list,
				&uobj->async_events_reported);
542 543 544 545
}

void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
546 547
	struct ib_uevent_object *uobj;

548
	/* for XRC target qp's, check that qp is live */
549
	if (!event->element.qp->uobject)
550 551
		return;

552 553 554 555 556 557
	uobj = container_of(event->element.qp->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
558 559
}

Y
Yishai Hadas 已提交
560 561 562 563 564 565 566 567 568 569
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
{
	struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
						  struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
}

570 571
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
572 573 574 575 576 577 578 579
	struct ib_uevent_object *uobj;

	uobj = container_of(event->element.srq->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
580 581
}

582 583
void ib_uverbs_event_handler(struct ib_event_handler *handler,
			     struct ib_event *event)
584 585 586 587
{
	struct ib_uverbs_file *file =
		container_of(handler, struct ib_uverbs_file, event_handler);

588 589
	ib_uverbs_async_handler(file, event->element.port_num, event->event,
				NULL, NULL);
590 591
}

592 593
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
594
	kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
595 596 597
	file->async_file = NULL;
}

598
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
599
{
600 601 602 603 604
	spin_lock_init(&ev_queue->lock);
	INIT_LIST_HEAD(&ev_queue->event_list);
	init_waitqueue_head(&ev_queue->poll_wait);
	ev_queue->is_closed   = 0;
	ev_queue->async_queue = NULL;
605 606 607 608 609 610
}

struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
					      struct ib_device	*ib_dev)
{
	struct ib_uverbs_async_event_file *ev_file;
611 612
	struct file *filp;

613
	ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
614 615 616
	if (!ev_file)
		return ERR_PTR(-ENOMEM);

617
	ib_uverbs_init_event_queue(&ev_file->ev_queue);
618
	ev_file->uverbs_file = uverbs_file;
619
	kref_get(&ev_file->uverbs_file->ref);
620 621
	kref_init(&ev_file->ref);
	filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
622
				  ev_file, O_RDONLY);
623
	if (IS_ERR(filp))
624 625
		goto err_put_refs;

626 627 628 629 630
	mutex_lock(&uverbs_file->device->lists_mutex);
	list_add_tail(&ev_file->list,
		      &uverbs_file->device->uverbs_events_file_list);
	mutex_unlock(&uverbs_file->device->lists_mutex);

631 632 633 634 635 636
	WARN_ON(uverbs_file->async_file);
	uverbs_file->async_file = ev_file;
	kref_get(&uverbs_file->async_file->ref);
	INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
			      ib_dev,
			      ib_uverbs_event_handler);
637
	ib_register_event_handler(&uverbs_file->event_handler);
638
	/* At that point async file stuff was fully set */
639 640 641 642 643

	return filp;

err_put_refs:
	kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
644
	kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
645 646 647
	return filp;
}

648
static bool verify_command_mask(struct ib_device *ib_dev,
649
				u32 command, bool extended)
650
{
651 652
	if (!extended)
		return ib_dev->uverbs_cmd_mask & BIT_ULL(command);
653

654
	return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command);
655 656
}

657 658 659
static bool verify_command_idx(u32 command, bool extended)
{
	if (extended)
660 661
		return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
		       uverbs_ex_cmd_table[command];
662

663 664
	return command < ARRAY_SIZE(uverbs_cmd_table) &&
	       uverbs_cmd_table[command];
665 666
}

667
static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
668
			   u32 *command, bool *extended)
669
{
670
	if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
671 672 673 674
				   IB_USER_VERBS_CMD_COMMAND_MASK))
		return -EINVAL;

	*command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
675
	*extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
676 677 678 679 680 681 682

	if (!verify_command_idx(*command, *extended))
		return -EOPNOTSUPP;

	return 0;
}

683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
			  struct ib_uverbs_ex_cmd_hdr *ex_hdr,
			  size_t count, bool extended)
{
	if (extended) {
		count -= sizeof(*hdr) + sizeof(*ex_hdr);

		if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
			return -EINVAL;

		if (ex_hdr->cmd_hdr_reserved)
			return -EINVAL;

		if (ex_hdr->response) {
			if (!hdr->out_words && !ex_hdr->provider_out_words)
				return -EINVAL;

			if (!access_ok(VERIFY_WRITE,
				       u64_to_user_ptr(ex_hdr->response),
				       (hdr->out_words + ex_hdr->provider_out_words) * 8))
				return -EFAULT;
		} else {
			if (hdr->out_words || ex_hdr->provider_out_words)
				return -EINVAL;
		}

		return 0;
	}

	/* not extended command */
	if (hdr->in_words * 4 != count)
		return -EINVAL;

	return 0;
}

719 720 721 722
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
			     size_t count, loff_t *pos)
{
	struct ib_uverbs_file *file = filp->private_data;
723
	struct ib_uverbs_ex_cmd_hdr ex_hdr;
724
	struct ib_device *ib_dev;
725
	struct ib_uverbs_cmd_hdr hdr;
726
	bool extended;
727
	int srcu_key;
728
	u32 command;
729
	ssize_t ret;
730

731 732 733
	if (!ib_safe_file_access(filp)) {
		pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
			    task_tgid_vnr(current), current->comm);
734
		return -EACCES;
735
	}
736

737
	if (count < sizeof(hdr))
738 739
		return -EINVAL;

740
	if (copy_from_user(&hdr, buf, sizeof(hdr)))
741 742
		return -EFAULT;

743 744 745 746
	ret = process_hdr(&hdr, &command, &extended);
	if (ret)
		return ret;

747 748 749 750
	if (!file->ucontext &&
	    (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended))
		return -EINVAL;

751 752 753 754 755 756
	if (extended) {
		if (count < (sizeof(hdr) + sizeof(ex_hdr)))
			return -EINVAL;
		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
			return -EFAULT;
	}
757

758 759 760 761
	ret = verify_hdr(&hdr, &ex_hdr, count, extended);
	if (ret)
		return ret;

762 763 764 765 766 767 768 769
	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
		goto out;
	}

770
	if (!verify_command_mask(ib_dev, command, extended)) {
771 772 773
		ret = -EOPNOTSUPP;
		goto out;
	}
E
Eli Cohen 已提交
774

775
	buf += sizeof(hdr);
776

777 778 779 780
	if (!extended) {
		ret = uverbs_cmd_table[command](file, ib_dev, buf,
						hdr.in_words * 4,
						hdr.out_words * 4);
781
	} else {
782 783 784
		struct ib_udata ucore;
		struct ib_udata uhw;

785
		buf += sizeof(ex_hdr);
786

787 788 789
		ib_uverbs_init_udata_buf_or_null(&ucore, buf,
					u64_to_user_ptr(ex_hdr.response),
					hdr.in_words * 8, hdr.out_words * 8);
790

791 792 793 794 795
		ib_uverbs_init_udata_buf_or_null(&uhw,
					buf + ucore.inlen,
					u64_to_user_ptr(ex_hdr.response) + ucore.outlen,
					ex_hdr.provider_in_words * 8,
					ex_hdr.provider_out_words * 8);
796

797
		ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
798
		ret = (ret) ? : count;
799
	}
800

801 802 803
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
804 805 806 807 808
}

static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct ib_uverbs_file *file = filp->private_data;
809 810 811
	struct ib_device *ib_dev;
	int ret = 0;
	int srcu_key;
812

813 814 815 816 817 818 819 820 821 822
	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
		goto out;
	}

	if (!file->ucontext)
		ret = -ENODEV;
823
	else
824 825 826 827
		ret = ib_dev->mmap(file->ucontext, vma);
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
828 829
}

830 831 832
/*
 * ib_uverbs_open() does not need the BKL:
 *
A
Alexander Chiang 已提交
833
 *  - the ib_uverbs_device structures are properly reference counted and
834 835 836
 *    everything else is purely local to the file being created, so
 *    races against other open calls are not a problem;
 *  - there is no ioctl method to race against;
A
Alexander Chiang 已提交
837 838
 *  - the open method will either immediately run -ENXIO, or all
 *    required initialization will be done.
839
 */
840 841
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
842
	struct ib_uverbs_device *dev;
843
	struct ib_uverbs_file *file;
844
	struct ib_device *ib_dev;
845
	int ret;
846 847
	int module_dependent;
	int srcu_key;
848

A
Alexander Chiang 已提交
849
	dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
850
	if (!atomic_inc_not_zero(&dev->refcount))
851 852
		return -ENXIO;

853 854 855 856 857 858
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	mutex_lock(&dev->lists_mutex);
	ib_dev = srcu_dereference(dev->ib_dev,
				  &dev->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
859 860
		goto err;
	}
861

862 863 864 865 866 867 868 869 870 871 872 873 874
	/* In case IB device supports disassociate ucontext, there is no hard
	 * dependency between uverbs device and its low level device.
	 */
	module_dependent = !(ib_dev->disassociate_ucontext);

	if (module_dependent) {
		if (!try_module_get(ib_dev->owner)) {
			ret = -ENODEV;
			goto err;
		}
	}

	file = kzalloc(sizeof(*file), GFP_KERNEL);
875
	if (!file) {
876
		ret = -ENOMEM;
877 878 879 880
		if (module_dependent)
			goto err_module;

		goto err;
881
	}
882

883
	file->device	 = dev;
884 885
	spin_lock_init(&file->idr_lock);
	idr_init(&file->idr);
886
	kref_init(&file->ref);
887
	mutex_init(&file->mutex);
888
	mutex_init(&file->cleanup_mutex);
889

890 891 892 893
	mutex_init(&file->uobjects_lock);
	INIT_LIST_HEAD(&file->uobjects);
	init_rwsem(&file->cleanup_rwsem);

894
	filp->private_data = file;
895
	kobject_get(&dev->kobj);
896 897 898
	list_add_tail(&file->list, &dev->uverbs_file_list);
	mutex_unlock(&dev->lists_mutex);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
899

900
	return nonseekable_open(inode, filp);
901 902

err_module:
903
	module_put(ib_dev->owner);
904 905

err:
906 907
	mutex_unlock(&dev->lists_mutex);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
908 909 910
	if (atomic_dec_and_test(&dev->refcount))
		ib_uverbs_comp_dev(dev);

911
	return ret;
912 913 914 915 916
}

static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_file *file = filp->private_data;
917 918 919

	mutex_lock(&file->cleanup_mutex);
	if (file->ucontext) {
920
		ib_uverbs_cleanup_ufile(file, false);
921 922 923
		file->ucontext = NULL;
	}
	mutex_unlock(&file->cleanup_mutex);
924
	idr_destroy(&file->idr);
925 926 927 928 929 930 931

	mutex_lock(&file->device->lists_mutex);
	if (!file->is_closed) {
		list_del(&file->list);
		file->is_closed = 1;
	}
	mutex_unlock(&file->device->lists_mutex);
932 933

	if (file->async_file)
934 935
		kref_put(&file->async_file->ref,
			 ib_uverbs_release_async_event_file);
936 937 938 939 940 941

	kref_put(&file->ref, ib_uverbs_release_file);

	return 0;
}

942
static const struct file_operations uverbs_fops = {
A
Alexander Chiang 已提交
943 944 945
	.owner	 = THIS_MODULE,
	.write	 = ib_uverbs_write,
	.open	 = ib_uverbs_open,
946 947
	.release = ib_uverbs_close,
	.llseek	 = no_llseek,
948
	.unlocked_ioctl = ib_uverbs_ioctl,
949
	.compat_ioctl = ib_uverbs_ioctl,
950 951
};

952
static const struct file_operations uverbs_mmap_fops = {
A
Alexander Chiang 已提交
953 954
	.owner	 = THIS_MODULE,
	.write	 = ib_uverbs_write,
955
	.mmap    = ib_uverbs_mmap,
A
Alexander Chiang 已提交
956
	.open	 = ib_uverbs_open,
957 958
	.release = ib_uverbs_close,
	.llseek	 = no_llseek,
959
	.unlocked_ioctl = ib_uverbs_ioctl,
960
	.compat_ioctl = ib_uverbs_ioctl,
961 962 963 964 965 966 967 968
};

static struct ib_client uverbs_client = {
	.name   = "uverbs",
	.add    = ib_uverbs_add_one,
	.remove = ib_uverbs_remove_one
};

969 970
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
			  char *buf)
971
{
972 973
	int ret = -ENODEV;
	int srcu_key;
974
	struct ib_uverbs_device *dev = dev_get_drvdata(device);
975
	struct ib_device *ib_dev;
976 977 978

	if (!dev)
		return -ENODEV;
979

980 981 982 983 984 985 986
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
	if (ib_dev)
		ret = sprintf(buf, "%s\n", ib_dev->name);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);

	return ret;
987
}
988
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
989

990 991
static ssize_t show_dev_abi_version(struct device *device,
				    struct device_attribute *attr, char *buf)
992
{
993
	struct ib_uverbs_device *dev = dev_get_drvdata(device);
994 995 996
	int ret = -ENODEV;
	int srcu_key;
	struct ib_device *ib_dev;
997 998 999

	if (!dev)
		return -ENODEV;
1000 1001 1002 1003 1004
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
	if (ib_dev)
		ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1005

1006
	return ret;
1007
}
1008
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1009

1010 1011
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
			 __stringify(IB_USER_VERBS_ABI_VERSION));
1012 1013 1014

static void ib_uverbs_add_one(struct ib_device *device)
{
1015
	int devnum;
1016
	dev_t base;
1017
	struct ib_uverbs_device *uverbs_dev;
1018
	int ret;
1019 1020 1021 1022

	if (!device->alloc_ucontext)
		return;

1023
	uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
1024 1025 1026
	if (!uverbs_dev)
		return;

1027 1028 1029 1030 1031 1032
	ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
	if (ret) {
		kfree(uverbs_dev);
		return;
	}

1033
	atomic_set(&uverbs_dev->refcount, 1);
1034
	init_completion(&uverbs_dev->comp);
1035 1036
	uverbs_dev->xrcd_tree = RB_ROOT;
	mutex_init(&uverbs_dev->xrcd_tree_mutex);
1037
	kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
1038 1039 1040
	mutex_init(&uverbs_dev->lists_mutex);
	INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
	INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1041

1042
	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
1043 1044 1045 1046 1047 1048 1049 1050
	if (devnum >= IB_UVERBS_MAX_DEVICES)
		goto err;
	uverbs_dev->devnum = devnum;
	set_bit(devnum, dev_map);
	if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
		base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
	else
		base = IB_UVERBS_BASE_DEV + devnum;
1051

1052
	rcu_assign_pointer(uverbs_dev->ib_dev, device);
1053
	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1054

1055 1056 1057
	cdev_init(&uverbs_dev->cdev, NULL);
	uverbs_dev->cdev.owner = THIS_MODULE;
	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
1058
	cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
1059
	kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
1060
	if (cdev_add(&uverbs_dev->cdev, base, 1))
1061
		goto err_cdev;
1062

1063
	uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
1064
					uverbs_dev->cdev.dev, uverbs_dev,
1065
					"uverbs%d", uverbs_dev->devnum);
1066
	if (IS_ERR(uverbs_dev->dev))
1067 1068
		goto err_cdev;

1069
	if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
1070
		goto err_class;
1071
	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1072
		goto err_class;
1073

1074
	if (!device->driver_specs_root) {
1075 1076 1077 1078 1079 1080 1081
		const struct uverbs_object_tree_def *default_root[] = {
			uverbs_default_get_objects()};

		uverbs_dev->specs_root = uverbs_alloc_spec_tree(1,
								default_root);
		if (IS_ERR(uverbs_dev->specs_root))
			goto err_class;
1082 1083 1084 1085 1086 1087 1088
	} else {
		uverbs_dev->specs_root = device->driver_specs_root;
		/*
		 * Take responsibility to free the specs allocated by the
		 * driver.
		 */
		device->driver_specs_root = NULL;
1089 1090
	}

1091 1092 1093 1094 1095
	ib_set_client_data(device, &uverbs_client, uverbs_dev);

	return;

err_class:
1096
	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1097 1098

err_cdev:
1099
	cdev_del(&uverbs_dev->cdev);
1100
	clear_bit(devnum, dev_map);
1101 1102

err:
1103 1104
	if (atomic_dec_and_test(&uverbs_dev->refcount))
		ib_uverbs_comp_dev(uverbs_dev);
1105
	wait_for_completion(&uverbs_dev->comp);
1106
	kobject_put(&uverbs_dev->kobj);
1107 1108 1109
	return;
}

1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
	struct ib_device *ib_dev = ibcontext->device;
	struct task_struct *owning_process  = NULL;
	struct mm_struct   *owning_mm       = NULL;

	owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
	if (!owning_process)
		return;

	owning_mm = get_task_mm(owning_process);
	if (!owning_mm) {
		pr_info("no mm, disassociate ucontext is pending task termination\n");
		while (1) {
			put_task_struct(owning_process);
			usleep_range(1000, 2000);
			owning_process = get_pid_task(ibcontext->tgid,
						      PIDTYPE_PID);
			if (!owning_process ||
			    owning_process->state == TASK_DEAD) {
				pr_info("disassociate ucontext done, task was terminated\n");
				/* in case task was dead need to release the
				 * task struct.
				 */
				if (owning_process)
					put_task_struct(owning_process);
				return;
			}
		}
	}

	down_write(&owning_mm->mmap_sem);
	ib_dev->disassociate_ucontext(ibcontext);
	up_write(&owning_mm->mmap_sem);
	mmput(owning_mm);
	put_task_struct(owning_process);
}

1148 1149 1150 1151
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
					struct ib_device *ib_dev)
{
	struct ib_uverbs_file *file;
1152
	struct ib_uverbs_async_event_file *event_file;
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
	struct ib_event event;

	/* Pending running commands to terminate */
	synchronize_srcu(&uverbs_dev->disassociate_srcu);
	event.event = IB_EVENT_DEVICE_FATAL;
	event.element.port_num = 0;
	event.device = ib_dev;

	mutex_lock(&uverbs_dev->lists_mutex);
	while (!list_empty(&uverbs_dev->uverbs_file_list)) {
		struct ib_ucontext *ucontext;
		file = list_first_entry(&uverbs_dev->uverbs_file_list,
					struct ib_uverbs_file, list);
		file->is_closed = 1;
		list_del(&file->list);
		kref_get(&file->ref);
		mutex_unlock(&uverbs_dev->lists_mutex);
1170 1171 1172 1173 1174 1175 1176 1177


		mutex_lock(&file->cleanup_mutex);
		ucontext = file->ucontext;
		file->ucontext = NULL;
		mutex_unlock(&file->cleanup_mutex);

		/* At this point ib_uverbs_close cannot be running
1178
		 * ib_uverbs_cleanup_ufile
1179
		 */
1180
		if (ucontext) {
1181 1182 1183 1184 1185 1186
			/* We must release the mutex before going ahead and
			 * calling disassociate_ucontext. disassociate_ucontext
			 * might end up indirectly calling uverbs_close,
			 * for example due to freeing the resources
			 * (e.g mmput).
			 */
1187
			ib_uverbs_event_handler(&file->event_handler, &event);
1188
			ib_uverbs_disassociate_ucontext(ucontext);
1189
			mutex_lock(&file->cleanup_mutex);
1190
			ib_uverbs_cleanup_ufile(file, true);
1191
			mutex_unlock(&file->cleanup_mutex);
1192 1193 1194 1195 1196 1197 1198 1199 1200
		}

		mutex_lock(&uverbs_dev->lists_mutex);
		kref_put(&file->ref, ib_uverbs_release_file);
	}

	while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
		event_file = list_first_entry(&uverbs_dev->
					      uverbs_events_file_list,
1201
					      struct ib_uverbs_async_event_file,
1202
					      list);
1203 1204 1205
		spin_lock_irq(&event_file->ev_queue.lock);
		event_file->ev_queue.is_closed = 1;
		spin_unlock_irq(&event_file->ev_queue.lock);
1206 1207

		list_del(&event_file->list);
1208 1209 1210 1211
		ib_unregister_event_handler(
			&event_file->uverbs_file->event_handler);
		event_file->uverbs_file->event_handler.device =
			NULL;
1212

1213 1214
		wake_up_interruptible(&event_file->ev_queue.poll_wait);
		kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
1215 1216 1217 1218
	}
	mutex_unlock(&uverbs_dev->lists_mutex);
}

1219
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1220
{
1221
	struct ib_uverbs_device *uverbs_dev = client_data;
1222
	int wait_clients = 1;
1223 1224 1225 1226

	if (!uverbs_dev)
		return;

1227
	dev_set_drvdata(uverbs_dev->dev, NULL);
1228 1229
	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
	cdev_del(&uverbs_dev->cdev);
1230
	clear_bit(uverbs_dev->devnum, dev_map);
1231

1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
	if (device->disassociate_ucontext) {
		/* We disassociate HW resources and immediately return.
		 * Userspace will see a EIO errno for all future access.
		 * Upon returning, ib_device may be freed internally and is not
		 * valid any more.
		 * uverbs_device is still available until all clients close
		 * their files, then the uverbs device ref count will be zero
		 * and its resources will be freed.
		 * Note: At this point no more files can be opened since the
		 * cdev was deleted, however active clients can still issue
		 * commands and close their open files.
		 */
		rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
		ib_uverbs_free_hw_resources(uverbs_dev, device);
		wait_clients = 0;
	}

1249 1250
	if (atomic_dec_and_test(&uverbs_dev->refcount))
		ib_uverbs_comp_dev(uverbs_dev);
1251 1252
	if (wait_clients)
		wait_for_completion(&uverbs_dev->comp);
1253

1254
	kobject_put(&uverbs_dev->kobj);
1255 1256
}

1257
static char *uverbs_devnode(struct device *dev, umode_t *mode)
1258
{
1259 1260
	if (mode)
		*mode = 0666;
1261 1262 1263
	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}

1264 1265 1266 1267
static int __init ib_uverbs_init(void)
{
	int ret;

1268 1269
	ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
				     IB_UVERBS_NUM_FIXED_MINOR,
1270 1271
				     "infiniband_verbs");
	if (ret) {
P
Parav Pandit 已提交
1272
		pr_err("user_verbs: couldn't register device number\n");
1273 1274 1275
		goto out;
	}

1276 1277 1278 1279 1280 1281 1282 1283
	ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
				  IB_UVERBS_NUM_DYNAMIC_MINOR,
				  "infiniband_verbs");
	if (ret) {
		pr_err("couldn't register dynamic device number\n");
		goto out_alloc;
	}

1284 1285 1286
	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
	if (IS_ERR(uverbs_class)) {
		ret = PTR_ERR(uverbs_class);
P
Parav Pandit 已提交
1287
		pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1288 1289 1290
		goto out_chrdev;
	}

1291 1292
	uverbs_class->devnode = uverbs_devnode;

1293
	ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1294
	if (ret) {
P
Parav Pandit 已提交
1295
		pr_err("user_verbs: couldn't create abi_version attribute\n");
1296 1297 1298 1299 1300
		goto out_class;
	}

	ret = ib_register_client(&uverbs_client);
	if (ret) {
P
Parav Pandit 已提交
1301
		pr_err("user_verbs: couldn't register client\n");
1302
		goto out_class;
1303 1304 1305 1306 1307
	}

	return 0;

out_class:
1308
	class_destroy(uverbs_class);
1309 1310

out_chrdev:
1311 1312 1313 1314 1315 1316
	unregister_chrdev_region(dynamic_uverbs_dev,
				 IB_UVERBS_NUM_DYNAMIC_MINOR);

out_alloc:
	unregister_chrdev_region(IB_UVERBS_BASE_DEV,
				 IB_UVERBS_NUM_FIXED_MINOR);
1317 1318 1319 1320 1321 1322 1323 1324

out:
	return ret;
}

static void __exit ib_uverbs_cleanup(void)
{
	ib_unregister_client(&uverbs_client);
1325
	class_destroy(uverbs_class);
1326 1327 1328 1329
	unregister_chrdev_region(IB_UVERBS_BASE_DEV,
				 IB_UVERBS_NUM_FIXED_MINOR);
	unregister_chrdev_region(dynamic_uverbs_dev,
				 IB_UVERBS_NUM_DYNAMIC_MINOR);
1330 1331 1332 1333
}

module_init(ib_uverbs_init);
module_exit(ib_uverbs_cleanup);