uverbs_main.c 35.3 KB
Newer Older
1 2
/*
 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4 5
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6
 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/poll.h>
43
#include <linux/sched.h>
44
#include <linux/file.h>
45
#include <linux/cdev.h>
46
#include <linux/anon_inodes.h>
47
#include <linux/slab.h>
48 49 50

#include <asm/uaccess.h>

51 52
#include <rdma/ib.h>

53 54 55 56 57 58 59 60 61 62 63 64 65 66
#include "uverbs.h"

MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
MODULE_LICENSE("Dual BSD/GPL");

enum {
	IB_UVERBS_MAJOR       = 231,
	IB_UVERBS_BASE_MINOR  = 192,
	IB_UVERBS_MAX_DEVICES = 32
};

#define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)

67 68
static struct class *uverbs_class;

69
DEFINE_SPINLOCK(ib_uverbs_idr_lock);
70 71 72 73 74 75
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
DEFINE_IDR(ib_uverbs_mw_idr);
DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
76
DEFINE_IDR(ib_uverbs_srq_idr);
77
DEFINE_IDR(ib_uverbs_xrcd_idr);
78
DEFINE_IDR(ib_uverbs_rule_idr);
79

80
static DEFINE_SPINLOCK(map_lock);
81 82 83
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);

static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
84
				     struct ib_device *ib_dev,
85 86
				     const char __user *buf, int in_len,
				     int out_len) = {
A
Alexander Chiang 已提交
87 88 89 90 91 92
	[IB_USER_VERBS_CMD_GET_CONTEXT]		= ib_uverbs_get_context,
	[IB_USER_VERBS_CMD_QUERY_DEVICE]	= ib_uverbs_query_device,
	[IB_USER_VERBS_CMD_QUERY_PORT]		= ib_uverbs_query_port,
	[IB_USER_VERBS_CMD_ALLOC_PD]		= ib_uverbs_alloc_pd,
	[IB_USER_VERBS_CMD_DEALLOC_PD]		= ib_uverbs_dealloc_pd,
	[IB_USER_VERBS_CMD_REG_MR]		= ib_uverbs_reg_mr,
93
	[IB_USER_VERBS_CMD_REREG_MR]		= ib_uverbs_rereg_mr,
A
Alexander Chiang 已提交
94
	[IB_USER_VERBS_CMD_DEREG_MR]		= ib_uverbs_dereg_mr,
95 96
	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw,
	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,
97
	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
A
Alexander Chiang 已提交
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
	[IB_USER_VERBS_CMD_CREATE_CQ]		= ib_uverbs_create_cq,
	[IB_USER_VERBS_CMD_RESIZE_CQ]		= ib_uverbs_resize_cq,
	[IB_USER_VERBS_CMD_POLL_CQ]		= ib_uverbs_poll_cq,
	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]	= ib_uverbs_req_notify_cq,
	[IB_USER_VERBS_CMD_DESTROY_CQ]		= ib_uverbs_destroy_cq,
	[IB_USER_VERBS_CMD_CREATE_QP]		= ib_uverbs_create_qp,
	[IB_USER_VERBS_CMD_QUERY_QP]		= ib_uverbs_query_qp,
	[IB_USER_VERBS_CMD_MODIFY_QP]		= ib_uverbs_modify_qp,
	[IB_USER_VERBS_CMD_DESTROY_QP]		= ib_uverbs_destroy_qp,
	[IB_USER_VERBS_CMD_POST_SEND]		= ib_uverbs_post_send,
	[IB_USER_VERBS_CMD_POST_RECV]		= ib_uverbs_post_recv,
	[IB_USER_VERBS_CMD_POST_SRQ_RECV]	= ib_uverbs_post_srq_recv,
	[IB_USER_VERBS_CMD_CREATE_AH]		= ib_uverbs_create_ah,
	[IB_USER_VERBS_CMD_DESTROY_AH]		= ib_uverbs_destroy_ah,
	[IB_USER_VERBS_CMD_ATTACH_MCAST]	= ib_uverbs_attach_mcast,
	[IB_USER_VERBS_CMD_DETACH_MCAST]	= ib_uverbs_detach_mcast,
	[IB_USER_VERBS_CMD_CREATE_SRQ]		= ib_uverbs_create_srq,
	[IB_USER_VERBS_CMD_MODIFY_SRQ]		= ib_uverbs_modify_srq,
	[IB_USER_VERBS_CMD_QUERY_SRQ]		= ib_uverbs_query_srq,
	[IB_USER_VERBS_CMD_DESTROY_SRQ]		= ib_uverbs_destroy_srq,
118 119
	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd,
	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,
120
	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq,
121
	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp,
122 123 124
};

static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
125
				    struct ib_device *ib_dev,
126 127 128
				    struct ib_udata *ucore,
				    struct ib_udata *uhw) = {
	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
129
	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow,
130
	[IB_USER_VERBS_EX_CMD_QUERY_DEVICE]	= ib_uverbs_ex_query_device,
131
	[IB_USER_VERBS_EX_CMD_CREATE_CQ]	= ib_uverbs_ex_create_cq,
132
	[IB_USER_VERBS_EX_CMD_CREATE_QP]        = ib_uverbs_ex_create_qp,
133 134 135
};

static void ib_uverbs_add_one(struct ib_device *device);
136
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
137

138 139 140 141 142 143 144 145 146 147 148
int uverbs_dealloc_mw(struct ib_mw *mw)
{
	struct ib_pd *pd = mw->pd;
	int ret;

	ret = mw->device->dealloc_mw(mw);
	if (!ret)
		atomic_dec(&pd->usecnt);
	return ret;
}

149
static void ib_uverbs_release_dev(struct kobject *kobj)
150 151
{
	struct ib_uverbs_device *dev =
152
		container_of(kobj, struct ib_uverbs_device, kobj);
153

154
	cleanup_srcu_struct(&dev->disassociate_srcu);
155
	kfree(dev);
156 157
}

158 159 160 161
static struct kobj_type ib_uverbs_dev_ktype = {
	.release = ib_uverbs_release_dev,
};

162 163 164 165 166 167 168 169
static void ib_uverbs_release_event_file(struct kref *ref)
{
	struct ib_uverbs_event_file *file =
		container_of(ref, struct ib_uverbs_event_file, ref);

	kfree(file);
}

170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
			  struct ib_uverbs_event_file *ev_file,
			  struct ib_ucq_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	if (ev_file) {
		spin_lock_irq(&ev_file->lock);
		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
			list_del(&evt->list);
			kfree(evt);
		}
		spin_unlock_irq(&ev_file->lock);

		kref_put(&ev_file->ref, ib_uverbs_release_event_file);
	}

	spin_lock_irq(&file->async_file->lock);
	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
	spin_unlock_irq(&file->async_file->lock);
}

void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
			      struct ib_uevent_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	spin_lock_irq(&file->async_file->lock);
	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
	spin_unlock_irq(&file->async_file->lock);
}

208 209 210 211 212 213 214 215 216 217 218 219
static void ib_uverbs_detach_umcast(struct ib_qp *qp,
				    struct ib_uqp_object *uobj)
{
	struct ib_uverbs_mcast_entry *mcast, *tmp;

	list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
		ib_detach_mcast(qp, &mcast->gid, mcast->lid);
		list_del(&mcast->list);
		kfree(mcast);
	}
}

220 221
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
				      struct ib_ucontext *context)
222 223 224
{
	struct ib_uobject *uobj, *tmp;

225 226
	context->closing = 1;

227
	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
228 229 230
		struct ib_ah *ah = uobj->object;

		idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
231 232 233
		ib_destroy_ah(ah);
		kfree(uobj);
	}
234

235 236 237 238 239
	/* Remove MWs before QPs, in order to support type 2A MWs. */
	list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
		struct ib_mw *mw = uobj->object;

		idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
240
		uverbs_dealloc_mw(mw);
241 242 243
		kfree(uobj);
	}

244 245 246 247 248 249 250 251
	list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
		struct ib_flow *flow_id = uobj->object;

		idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
		ib_destroy_flow(flow_id);
		kfree(uobj);
	}

252
	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
253
		struct ib_qp *qp = uobj->object;
254 255
		struct ib_uqp_object *uqp =
			container_of(uobj, struct ib_uqp_object, uevent.uobject);
256 257

		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
258 259
		if (qp != qp->real_qp) {
			ib_close_qp(qp);
260 261 262 263
		} else {
			ib_uverbs_detach_umcast(qp, uqp);
			ib_destroy_qp(qp);
		}
264 265
		ib_uverbs_release_uevent(file, &uqp->uevent);
		kfree(uqp);
266 267
	}

268 269 270 271 272 273 274 275 276 277 278
	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
		struct ib_srq *srq = uobj->object;
		struct ib_uevent_object *uevent =
			container_of(uobj, struct ib_uevent_object, uobject);

		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
		ib_destroy_srq(srq);
		ib_uverbs_release_uevent(file, uevent);
		kfree(uevent);
	}

279
	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
280
		struct ib_cq *cq = uobj->object;
281 282 283
		struct ib_uverbs_event_file *ev_file = cq->cq_context;
		struct ib_ucq_object *ucq =
			container_of(uobj, struct ib_ucq_object, uobject);
284 285

		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
286
		ib_destroy_cq(cq);
287 288
		ib_uverbs_release_ucq(file, ev_file, ucq);
		kfree(ucq);
289 290 291
	}

	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
292
		struct ib_mr *mr = uobj->object;
293

294
		idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
295
		ib_dereg_mr(mr);
296
		kfree(uobj);
297 298
	}

299 300 301 302 303 304 305 306 307 308 309 310
	mutex_lock(&file->device->xrcd_tree_mutex);
	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
		struct ib_xrcd *xrcd = uobj->object;
		struct ib_uxrcd_object *uxrcd =
			container_of(uobj, struct ib_uxrcd_object, uobject);

		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
		ib_uverbs_dealloc_xrcd(file->device, xrcd);
		kfree(uxrcd);
	}
	mutex_unlock(&file->device->xrcd_tree_mutex);

311
	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
312 313 314
		struct ib_pd *pd = uobj->object;

		idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
315 316 317 318
		ib_dealloc_pd(pd);
		kfree(uobj);
	}

319 320
	put_pid(context->tgid);

321 322 323
	return context->device->dealloc_ucontext(context);
}

324 325 326 327 328
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
	complete(&dev->comp);
}

329 330 331 332
static void ib_uverbs_release_file(struct kref *ref)
{
	struct ib_uverbs_file *file =
		container_of(ref, struct ib_uverbs_file, ref);
333 334 335 336 337 338 339 340 341
	struct ib_device *ib_dev;
	int srcu_key;

	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (ib_dev && !ib_dev->disassociate_ucontext)
		module_put(ib_dev->owner);
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
342

343 344
	if (atomic_dec_and_test(&file->device->refcount))
		ib_uverbs_comp_dev(file->device);
345

346 347 348 349 350 351 352
	kfree(file);
}

static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
				    size_t count, loff_t *pos)
{
	struct ib_uverbs_event_file *file = filp->private_data;
353
	struct ib_uverbs_event *event;
354 355 356 357 358
	int eventsz;
	int ret = 0;

	spin_lock_irq(&file->lock);

359
	while (list_empty(&file->event_list)) {
360 361 362 363 364 365
		spin_unlock_irq(&file->lock);

		if (filp->f_flags & O_NONBLOCK)
			return -EAGAIN;

		if (wait_event_interruptible(file->poll_wait,
366 367 368 369 370 371
					     (!list_empty(&file->event_list) ||
			/* The barriers built into wait_event_interruptible()
			 * and wake_up() guarentee this will see the null set
			 * without using RCU
			 */
					     !file->uverbs_file->device->ib_dev)))
372 373
			return -ERESTARTSYS;

374 375 376 377 378
		/* If device was disassociated and no event exists set an error */
		if (list_empty(&file->event_list) &&
		    !file->uverbs_file->device->ib_dev)
			return -EIO;

379 380 381
		spin_lock_irq(&file->lock);
	}

382 383 384
	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);

	if (file->is_async)
385
		eventsz = sizeof (struct ib_uverbs_async_event_desc);
386
	else
387 388 389 390 391
		eventsz = sizeof (struct ib_uverbs_comp_event_desc);

	if (eventsz > count) {
		ret   = -EINVAL;
		event = NULL;
392
	} else {
393
		list_del(file->event_list.next);
394 395 396 397 398
		if (event->counter) {
			++(*event->counter);
			list_del(&event->obj_list);
		}
	}
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422

	spin_unlock_irq(&file->lock);

	if (event) {
		if (copy_to_user(buf, event, eventsz))
			ret = -EFAULT;
		else
			ret = eventsz;
	}

	kfree(event);

	return ret;
}

static unsigned int ib_uverbs_event_poll(struct file *filp,
					 struct poll_table_struct *wait)
{
	unsigned int pollflags = 0;
	struct ib_uverbs_event_file *file = filp->private_data;

	poll_wait(filp, &file->poll_wait, wait);

	spin_lock_irq(&file->lock);
423
	if (!list_empty(&file->event_list))
424 425 426 427 428 429
		pollflags = POLLIN | POLLRDNORM;
	spin_unlock_irq(&file->lock);

	return pollflags;
}

G
Gleb Natapov 已提交
430 431 432 433 434 435 436
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
{
	struct ib_uverbs_event_file *file = filp->private_data;

	return fasync_helper(fd, filp, on, &file->async_queue);
}

437 438 439
static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_event_file *file = filp->private_data;
440
	struct ib_uverbs_event *entry, *tmp;
441
	int closed_already = 0;
442

443
	mutex_lock(&file->uverbs_file->device->lists_mutex);
444
	spin_lock_irq(&file->lock);
445
	closed_already = file->is_closed;
446
	file->is_closed = 1;
447 448 449 450 451 452
	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
		if (entry->counter)
			list_del(&entry->obj_list);
		kfree(entry);
	}
	spin_unlock_irq(&file->lock);
453 454 455 456 457 458 459
	if (!closed_already) {
		list_del(&file->list);
		if (file->is_async)
			ib_unregister_event_handler(&file->uverbs_file->
				event_handler);
	}
	mutex_unlock(&file->uverbs_file->device->lists_mutex);
460

461
	kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
462
	kref_put(&file->ref, ib_uverbs_release_event_file);
463 464 465 466

	return 0;
}

467
static const struct file_operations uverbs_event_fops = {
468
	.owner	 = THIS_MODULE,
A
Alexander Chiang 已提交
469
	.read	 = ib_uverbs_event_read,
470
	.poll    = ib_uverbs_event_poll,
G
Gleb Natapov 已提交
471
	.release = ib_uverbs_event_close,
472 473
	.fasync  = ib_uverbs_event_fasync,
	.llseek	 = no_llseek,
474 475 476 477
};

void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
478 479 480 481 482 483 484 485 486
	struct ib_uverbs_event_file    *file = cq_context;
	struct ib_ucq_object	       *uobj;
	struct ib_uverbs_event	       *entry;
	unsigned long			flags;

	if (!file)
		return;

	spin_lock_irqsave(&file->lock, flags);
487
	if (file->is_closed) {
488 489 490
		spin_unlock_irqrestore(&file->lock, flags);
		return;
	}
491 492

	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
493 494
	if (!entry) {
		spin_unlock_irqrestore(&file->lock, flags);
495
		return;
496
	}
497

498 499 500 501
	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);

	entry->desc.comp.cq_handle = cq->uobject->user_handle;
	entry->counter		   = &uobj->comp_events_reported;
502

503
	list_add_tail(&entry->list, &file->event_list);
504
	list_add_tail(&entry->obj_list, &uobj->comp_list);
505
	spin_unlock_irqrestore(&file->lock, flags);
506

507 508
	wake_up_interruptible(&file->poll_wait);
	kill_fasync(&file->async_queue, SIGIO, POLL_IN);
509 510 511
}

static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
512 513 514
				    __u64 element, __u64 event,
				    struct list_head *obj_list,
				    u32 *counter)
515
{
516
	struct ib_uverbs_event *entry;
517 518
	unsigned long flags;

519
	spin_lock_irqsave(&file->async_file->lock, flags);
520
	if (file->async_file->is_closed) {
521 522 523 524
		spin_unlock_irqrestore(&file->async_file->lock, flags);
		return;
	}

525
	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
526 527
	if (!entry) {
		spin_unlock_irqrestore(&file->async_file->lock, flags);
528
		return;
529
	}
530

531 532
	entry->desc.async.element    = element;
	entry->desc.async.event_type = event;
533
	entry->desc.async.reserved   = 0;
534
	entry->counter               = counter;
535

536
	list_add_tail(&entry->list, &file->async_file->event_list);
537 538
	if (obj_list)
		list_add_tail(&entry->obj_list, obj_list);
539
	spin_unlock_irqrestore(&file->async_file->lock, flags);
540

541 542
	wake_up_interruptible(&file->async_file->poll_wait);
	kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
543 544 545 546
}

void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
547 548
	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
						  struct ib_ucq_object, uobject);
549

550
	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
551 552
				event->event, &uobj->async_list,
				&uobj->async_events_reported);
553 554 555 556
}

void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
557 558
	struct ib_uevent_object *uobj;

559 560 561 562
	/* for XRC target qp's, check that qp is live */
	if (!event->element.qp->uobject || !event->element.qp->uobject->live)
		return;

563 564 565 566 567 568
	uobj = container_of(event->element.qp->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
569 570
}

571 572
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
573 574 575 576 577 578 579 580
	struct ib_uevent_object *uobj;

	uobj = container_of(event->element.srq->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
581 582
}

583 584
void ib_uverbs_event_handler(struct ib_event_handler *handler,
			     struct ib_event *event)
585 586 587 588
{
	struct ib_uverbs_file *file =
		container_of(handler, struct ib_uverbs_file, event_handler);

589 590
	ib_uverbs_async_handler(file, event->element.port_num, event->event,
				NULL, NULL);
591 592
}

593 594 595 596 597 598
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
	kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
	file->async_file = NULL;
}

599
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
600
					struct ib_device	*ib_dev,
601
					int is_async)
602
{
603
	struct ib_uverbs_event_file *ev_file;
604
	struct file *filp;
605
	int ret;
606

607
	ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
608 609 610 611 612 613 614 615
	if (!ev_file)
		return ERR_PTR(-ENOMEM);

	kref_init(&ev_file->ref);
	spin_lock_init(&ev_file->lock);
	INIT_LIST_HEAD(&ev_file->event_list);
	init_waitqueue_head(&ev_file->poll_wait);
	ev_file->uverbs_file = uverbs_file;
616
	kref_get(&ev_file->uverbs_file->ref);
617
	ev_file->async_queue = NULL;
618
	ev_file->is_closed   = 0;
619

620
	filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
621
				  ev_file, O_RDONLY);
622
	if (IS_ERR(filp))
623 624
		goto err_put_refs;

625 626 627 628 629
	mutex_lock(&uverbs_file->device->lists_mutex);
	list_add_tail(&ev_file->list,
		      &uverbs_file->device->uverbs_events_file_list);
	mutex_unlock(&uverbs_file->device->lists_mutex);

630 631 632 633 634
	if (is_async) {
		WARN_ON(uverbs_file->async_file);
		uverbs_file->async_file = ev_file;
		kref_get(&uverbs_file->async_file->ref);
		INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
635
				      ib_dev,
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
				      ib_uverbs_event_handler);
		ret = ib_register_event_handler(&uverbs_file->event_handler);
		if (ret)
			goto err_put_file;

		/* At that point async file stuff was fully set */
		ev_file->is_async = 1;
	}

	return filp;

err_put_file:
	fput(filp);
	kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
	uverbs_file->async_file = NULL;
	return ERR_PTR(ret);
652

653 654 655
err_put_refs:
	kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
	kref_put(&ev_file->ref, ib_uverbs_release_event_file);
656 657 658 659 660 661 662 663 664 665 666
	return filp;
}

/*
 * Look up a completion event file by FD.  If lookup is successful,
 * takes a ref to the event file struct that it returns; if
 * unsuccessful, returns NULL.
 */
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
{
	struct ib_uverbs_event_file *ev_file = NULL;
667
	struct fd f = fdget(fd);
668

669
	if (!f.file)
670 671
		return NULL;

672
	if (f.file->f_op != &uverbs_event_fops)
673 674
		goto out;

675
	ev_file = f.file->private_data;
676 677 678 679 680 681 682 683
	if (ev_file->is_async) {
		ev_file = NULL;
		goto out;
	}

	kref_get(&ev_file->ref);

out:
684
	fdput(f);
685
	return ev_file;
686 687
}

688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
	u64 mask;

	if (command <= IB_USER_VERBS_CMD_OPEN_QP)
		mask = ib_dev->uverbs_cmd_mask;
	else
		mask = ib_dev->uverbs_ex_cmd_mask;

	if (mask & ((u64)1 << command))
		return 0;

	return -1;
}

703 704 705 706
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
			     size_t count, loff_t *pos)
{
	struct ib_uverbs_file *file = filp->private_data;
707
	struct ib_device *ib_dev;
708
	struct ib_uverbs_cmd_hdr hdr;
E
Eli Cohen 已提交
709
	__u32 command;
710
	__u32 flags;
711 712
	int srcu_key;
	ssize_t ret;
713

714 715 716
	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
		return -EACCES;

717 718 719 720 721 722
	if (count < sizeof hdr)
		return -EINVAL;

	if (copy_from_user(&hdr, buf, sizeof hdr))
		return -EFAULT;

723 724 725 726 727 728 729 730
	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
		goto out;
	}

E
Eli Cohen 已提交
731 732 733 734 735 736 737
	if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
				   IB_USER_VERBS_CMD_COMMAND_MASK)) {
		ret = -EINVAL;
		goto out;
	}

	command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
738 739 740 741
	if (verify_command_mask(ib_dev, command)) {
		ret = -EOPNOTSUPP;
		goto out;
	}
E
Eli Cohen 已提交
742

743 744 745 746 747 748
	if (!file->ucontext &&
	    command != IB_USER_VERBS_CMD_GET_CONTEXT) {
		ret = -EINVAL;
		goto out;
	}

749 750
	flags = (hdr.command &
		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
751

752 753
	if (!flags) {
		if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
754 755 756 757
		    !uverbs_cmd_table[command]) {
			ret = -EINVAL;
			goto out;
		}
758

759 760 761 762
		if (hdr.in_words * 4 != count) {
			ret = -EINVAL;
			goto out;
		}
763

764
		ret = uverbs_cmd_table[command](file, ib_dev,
765 766 767 768 769 770 771 772 773 774 775
						 buf + sizeof(hdr),
						 hdr.in_words * 4,
						 hdr.out_words * 4);

	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
		struct ib_uverbs_ex_cmd_hdr ex_hdr;
		struct ib_udata ucore;
		struct ib_udata uhw;
		size_t written_count = count;

		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
776 777 778 779
		    !uverbs_ex_cmd_table[command]) {
			ret = -ENOSYS;
			goto out;
		}
780

781 782 783 784
		if (!file->ucontext) {
			ret = -EINVAL;
			goto out;
		}
785

786 787 788 789
		if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
			ret = -EINVAL;
			goto out;
		}
790

791 792 793 794
		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
			ret = -EFAULT;
			goto out;
		}
795 796 797 798

		count -= sizeof(hdr) + sizeof(ex_hdr);
		buf += sizeof(hdr) + sizeof(ex_hdr);

799 800 801 802
		if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
			ret = -EINVAL;
			goto out;
		}
803

804 805 806 807
		if (ex_hdr.cmd_hdr_reserved) {
			ret = -EINVAL;
			goto out;
		}
808

809
		if (ex_hdr.response) {
810 811 812 813
			if (!hdr.out_words && !ex_hdr.provider_out_words) {
				ret = -EINVAL;
				goto out;
			}
814 815 816

			if (!access_ok(VERIFY_WRITE,
				       (void __user *) (unsigned long) ex_hdr.response,
817 818 819 820
				       (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
				ret = -EFAULT;
				goto out;
			}
821
		} else {
822 823 824 825
			if (hdr.out_words || ex_hdr.provider_out_words) {
				ret = -EINVAL;
				goto out;
			}
826 827
		}

828 829 830 831 832 833 834 835
		INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
				       hdr.in_words * 8, hdr.out_words * 8);

		INIT_UDATA_BUF_OR_NULL(&uhw,
				       buf + ucore.inlen,
				       (unsigned long) ex_hdr.response + ucore.outlen,
				       ex_hdr.provider_in_words * 8,
				       ex_hdr.provider_out_words * 8);
836

837
		ret = uverbs_ex_cmd_table[command](file,
838
						   ib_dev,
839 840
						   &ucore,
						   &uhw);
841 842 843 844
		if (!ret)
			ret = written_count;
	} else {
		ret = -ENOSYS;
845
	}
846

847 848 849
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
850 851 852 853 854
}

static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct ib_uverbs_file *file = filp->private_data;
855 856 857
	struct ib_device *ib_dev;
	int ret = 0;
	int srcu_key;
858

859 860 861 862 863 864 865 866 867 868
	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
		goto out;
	}

	if (!file->ucontext)
		ret = -ENODEV;
869
	else
870 871 872 873
		ret = ib_dev->mmap(file->ucontext, vma);
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
874 875
}

876 877 878
/*
 * ib_uverbs_open() does not need the BKL:
 *
A
Alexander Chiang 已提交
879
 *  - the ib_uverbs_device structures are properly reference counted and
880 881 882
 *    everything else is purely local to the file being created, so
 *    races against other open calls are not a problem;
 *  - there is no ioctl method to race against;
A
Alexander Chiang 已提交
883 884
 *  - the open method will either immediately run -ENXIO, or all
 *    required initialization will be done.
885
 */
886 887
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
888
	struct ib_uverbs_device *dev;
889
	struct ib_uverbs_file *file;
890
	struct ib_device *ib_dev;
891
	int ret;
892 893
	int module_dependent;
	int srcu_key;
894

A
Alexander Chiang 已提交
895
	dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
896
	if (!atomic_inc_not_zero(&dev->refcount))
897 898
		return -ENXIO;

899 900 901 902 903 904
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	mutex_lock(&dev->lists_mutex);
	ib_dev = srcu_dereference(dev->ib_dev,
				  &dev->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
905 906
		goto err;
	}
907

908 909 910 911 912 913 914 915 916 917 918 919 920
	/* In case IB device supports disassociate ucontext, there is no hard
	 * dependency between uverbs device and its low level device.
	 */
	module_dependent = !(ib_dev->disassociate_ucontext);

	if (module_dependent) {
		if (!try_module_get(ib_dev->owner)) {
			ret = -ENODEV;
			goto err;
		}
	}

	file = kzalloc(sizeof(*file), GFP_KERNEL);
921
	if (!file) {
922
		ret = -ENOMEM;
923 924 925 926
		if (module_dependent)
			goto err_module;

		goto err;
927
	}
928

929 930 931
	file->device	 = dev;
	file->ucontext	 = NULL;
	file->async_file = NULL;
932
	kref_init(&file->ref);
933
	mutex_init(&file->mutex);
934 935

	filp->private_data = file;
936
	kobject_get(&dev->kobj);
937 938 939
	list_add_tail(&file->list, &dev->uverbs_file_list);
	mutex_unlock(&dev->lists_mutex);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
940

941
	return nonseekable_open(inode, filp);
942 943

err_module:
944
	module_put(ib_dev->owner);
945 946

err:
947 948
	mutex_unlock(&dev->lists_mutex);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
949 950 951
	if (atomic_dec_and_test(&dev->refcount))
		ib_uverbs_comp_dev(dev);

952
	return ret;
953 954 955 956 957
}

static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_file *file = filp->private_data;
958
	struct ib_uverbs_device *dev = file->device;
959 960 961 962 963 964 965 966 967 968 969 970
	struct ib_ucontext *ucontext = NULL;

	mutex_lock(&file->device->lists_mutex);
	ucontext = file->ucontext;
	file->ucontext = NULL;
	if (!file->is_closed) {
		list_del(&file->list);
		file->is_closed = 1;
	}
	mutex_unlock(&file->device->lists_mutex);
	if (ucontext)
		ib_uverbs_cleanup_ucontext(file, ucontext);
971 972 973

	if (file->async_file)
		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
974 975

	kref_put(&file->ref, ib_uverbs_release_file);
976
	kobject_put(&dev->kobj);
977 978 979 980

	return 0;
}

981
static const struct file_operations uverbs_fops = {
A
Alexander Chiang 已提交
982 983 984
	.owner	 = THIS_MODULE,
	.write	 = ib_uverbs_write,
	.open	 = ib_uverbs_open,
985 986
	.release = ib_uverbs_close,
	.llseek	 = no_llseek,
987 988
};

989
static const struct file_operations uverbs_mmap_fops = {
A
Alexander Chiang 已提交
990 991
	.owner	 = THIS_MODULE,
	.write	 = ib_uverbs_write,
992
	.mmap    = ib_uverbs_mmap,
A
Alexander Chiang 已提交
993
	.open	 = ib_uverbs_open,
994 995
	.release = ib_uverbs_close,
	.llseek	 = no_llseek,
996 997 998 999 1000 1001 1002 1003
};

static struct ib_client uverbs_client = {
	.name   = "uverbs",
	.add    = ib_uverbs_add_one,
	.remove = ib_uverbs_remove_one
};

1004 1005
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
			  char *buf)
1006
{
1007 1008
	int ret = -ENODEV;
	int srcu_key;
1009
	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1010
	struct ib_device *ib_dev;
1011 1012 1013

	if (!dev)
		return -ENODEV;
1014

1015 1016 1017 1018 1019 1020 1021
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
	if (ib_dev)
		ret = sprintf(buf, "%s\n", ib_dev->name);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);

	return ret;
1022
}
1023
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1024

1025 1026
static ssize_t show_dev_abi_version(struct device *device,
				    struct device_attribute *attr, char *buf)
1027
{
1028
	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1029 1030 1031
	int ret = -ENODEV;
	int srcu_key;
	struct ib_device *ib_dev;
1032 1033 1034

	if (!dev)
		return -ENODEV;
1035 1036 1037 1038 1039
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
	if (ib_dev)
		ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1040

1041
	return ret;
1042
}
1043
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1044

1045 1046
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
			 __stringify(IB_USER_VERBS_ABI_VERSION));
1047

1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
static dev_t overflow_maj;
static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);

/*
 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
 * requesting a new major number and doubling the number of max devices we
 * support. It's stupid, but simple.
 */
static int find_overflow_devnum(void)
{
	int ret;

	if (!overflow_maj) {
		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
					  "infiniband_verbs");
		if (ret) {
P
Parav Pandit 已提交
1064
			pr_err("user_verbs: couldn't register dynamic device number\n");
1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
			return ret;
		}
	}

	ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
	if (ret >= IB_UVERBS_MAX_DEVICES)
		return -1;

	return ret;
}

1076 1077
static void ib_uverbs_add_one(struct ib_device *device)
{
1078
	int devnum;
1079
	dev_t base;
1080
	struct ib_uverbs_device *uverbs_dev;
1081
	int ret;
1082 1083 1084 1085

	if (!device->alloc_ucontext)
		return;

R
Roland Dreier 已提交
1086
	uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
1087 1088 1089
	if (!uverbs_dev)
		return;

1090 1091 1092 1093 1094 1095
	ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
	if (ret) {
		kfree(uverbs_dev);
		return;
	}

1096
	atomic_set(&uverbs_dev->refcount, 1);
1097
	init_completion(&uverbs_dev->comp);
1098 1099
	uverbs_dev->xrcd_tree = RB_ROOT;
	mutex_init(&uverbs_dev->xrcd_tree_mutex);
1100
	kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
1101 1102 1103
	mutex_init(&uverbs_dev->lists_mutex);
	INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
	INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1104

1105
	spin_lock(&map_lock);
1106 1107
	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
	if (devnum >= IB_UVERBS_MAX_DEVICES) {
1108
		spin_unlock(&map_lock);
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
		devnum = find_overflow_devnum();
		if (devnum < 0)
			goto err;

		spin_lock(&map_lock);
		uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
		base = devnum + overflow_maj;
		set_bit(devnum, overflow_map);
	} else {
		uverbs_dev->devnum = devnum;
		base = devnum + IB_UVERBS_BASE_DEV;
		set_bit(devnum, dev_map);
1121 1122 1123
	}
	spin_unlock(&map_lock);

1124
	rcu_assign_pointer(uverbs_dev->ib_dev, device);
1125
	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1126

1127 1128 1129
	cdev_init(&uverbs_dev->cdev, NULL);
	uverbs_dev->cdev.owner = THIS_MODULE;
	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
1130
	uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
1131
	kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
1132
	if (cdev_add(&uverbs_dev->cdev, base, 1))
1133
		goto err_cdev;
1134

1135
	uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
1136
					uverbs_dev->cdev.dev, uverbs_dev,
1137
					"uverbs%d", uverbs_dev->devnum);
1138
	if (IS_ERR(uverbs_dev->dev))
1139 1140
		goto err_cdev;

1141
	if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
1142
		goto err_class;
1143
	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1144
		goto err_class;
1145 1146 1147 1148 1149 1150

	ib_set_client_data(device, &uverbs_client, uverbs_dev);

	return;

err_class:
1151
	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1152 1153

err_cdev:
1154
	cdev_del(&uverbs_dev->cdev);
1155 1156 1157 1158
	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
		clear_bit(devnum, dev_map);
	else
		clear_bit(devnum, overflow_map);
1159 1160

err:
1161 1162
	if (atomic_dec_and_test(&uverbs_dev->refcount))
		ib_uverbs_comp_dev(uverbs_dev);
1163
	wait_for_completion(&uverbs_dev->comp);
1164
	kobject_put(&uverbs_dev->kobj);
1165 1166 1167
	return;
}

1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
					struct ib_device *ib_dev)
{
	struct ib_uverbs_file *file;
	struct ib_uverbs_event_file *event_file;
	struct ib_event event;

	/* Pending running commands to terminate */
	synchronize_srcu(&uverbs_dev->disassociate_srcu);
	event.event = IB_EVENT_DEVICE_FATAL;
	event.element.port_num = 0;
	event.device = ib_dev;

	mutex_lock(&uverbs_dev->lists_mutex);
	while (!list_empty(&uverbs_dev->uverbs_file_list)) {
		struct ib_ucontext *ucontext;

		file = list_first_entry(&uverbs_dev->uverbs_file_list,
					struct ib_uverbs_file, list);
		file->is_closed = 1;
		ucontext = file->ucontext;
		list_del(&file->list);
		file->ucontext = NULL;
		kref_get(&file->ref);
		mutex_unlock(&uverbs_dev->lists_mutex);
		/* We must release the mutex before going ahead and calling
		 * disassociate_ucontext. disassociate_ucontext might end up
		 * indirectly calling uverbs_close, for example due to freeing
		 * the resources (e.g mmput).
		 */
		ib_uverbs_event_handler(&file->event_handler, &event);
		if (ucontext) {
			ib_dev->disassociate_ucontext(ucontext);
			ib_uverbs_cleanup_ucontext(file, ucontext);
		}

		mutex_lock(&uverbs_dev->lists_mutex);
		kref_put(&file->ref, ib_uverbs_release_file);
	}

	while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
		event_file = list_first_entry(&uverbs_dev->
					      uverbs_events_file_list,
					      struct ib_uverbs_event_file,
					      list);
		spin_lock_irq(&event_file->lock);
		event_file->is_closed = 1;
		spin_unlock_irq(&event_file->lock);

		list_del(&event_file->list);
		if (event_file->is_async) {
			ib_unregister_event_handler(&event_file->uverbs_file->
						    event_handler);
			event_file->uverbs_file->event_handler.device = NULL;
		}

		wake_up_interruptible(&event_file->poll_wait);
		kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
	}
	mutex_unlock(&uverbs_dev->lists_mutex);
}

1230
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1231
{
1232
	struct ib_uverbs_device *uverbs_dev = client_data;
1233
	int wait_clients = 1;
1234 1235 1236 1237

	if (!uverbs_dev)
		return;

1238
	dev_set_drvdata(uverbs_dev->dev, NULL);
1239 1240
	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
	cdev_del(&uverbs_dev->cdev);
1241

1242 1243 1244 1245
	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
		clear_bit(uverbs_dev->devnum, dev_map);
	else
		clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
1246

1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263
	if (device->disassociate_ucontext) {
		/* We disassociate HW resources and immediately return.
		 * Userspace will see a EIO errno for all future access.
		 * Upon returning, ib_device may be freed internally and is not
		 * valid any more.
		 * uverbs_device is still available until all clients close
		 * their files, then the uverbs device ref count will be zero
		 * and its resources will be freed.
		 * Note: At this point no more files can be opened since the
		 * cdev was deleted, however active clients can still issue
		 * commands and close their open files.
		 */
		rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
		ib_uverbs_free_hw_resources(uverbs_dev, device);
		wait_clients = 0;
	}

1264 1265
	if (atomic_dec_and_test(&uverbs_dev->refcount))
		ib_uverbs_comp_dev(uverbs_dev);
1266 1267
	if (wait_clients)
		wait_for_completion(&uverbs_dev->comp);
1268
	kobject_put(&uverbs_dev->kobj);
1269 1270
}

1271
static char *uverbs_devnode(struct device *dev, umode_t *mode)
1272
{
1273 1274
	if (mode)
		*mode = 0666;
1275 1276 1277
	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}

1278 1279 1280 1281 1282 1283 1284
static int __init ib_uverbs_init(void)
{
	int ret;

	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
				     "infiniband_verbs");
	if (ret) {
P
Parav Pandit 已提交
1285
		pr_err("user_verbs: couldn't register device number\n");
1286 1287 1288
		goto out;
	}

1289 1290 1291
	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
	if (IS_ERR(uverbs_class)) {
		ret = PTR_ERR(uverbs_class);
P
Parav Pandit 已提交
1292
		pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1293 1294 1295
		goto out_chrdev;
	}

1296 1297
	uverbs_class->devnode = uverbs_devnode;

1298
	ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1299
	if (ret) {
P
Parav Pandit 已提交
1300
		pr_err("user_verbs: couldn't create abi_version attribute\n");
1301 1302 1303 1304 1305
		goto out_class;
	}

	ret = ib_register_client(&uverbs_client);
	if (ret) {
P
Parav Pandit 已提交
1306
		pr_err("user_verbs: couldn't register client\n");
1307
		goto out_class;
1308 1309 1310 1311 1312
	}

	return 0;

out_class:
1313
	class_destroy(uverbs_class);
1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324

out_chrdev:
	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);

out:
	return ret;
}

static void __exit ib_uverbs_cleanup(void)
{
	ib_unregister_client(&uverbs_client);
1325
	class_destroy(uverbs_class);
1326
	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1327 1328
	if (overflow_maj)
		unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
1329 1330 1331 1332 1333 1334 1335
	idr_destroy(&ib_uverbs_pd_idr);
	idr_destroy(&ib_uverbs_mr_idr);
	idr_destroy(&ib_uverbs_mw_idr);
	idr_destroy(&ib_uverbs_ah_idr);
	idr_destroy(&ib_uverbs_cq_idr);
	idr_destroy(&ib_uverbs_qp_idr);
	idr_destroy(&ib_uverbs_srq_idr);
1336 1337 1338 1339
}

module_init(ib_uverbs_init);
module_exit(ib_uverbs_cleanup);