uverbs_main.c 35.3 KB
Newer Older
1 2
/*
 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3
 * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4 5
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6
 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/poll.h>
43
#include <linux/sched.h>
44
#include <linux/file.h>
45
#include <linux/cdev.h>
46
#include <linux/anon_inodes.h>
47
#include <linux/slab.h>
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64

#include <asm/uaccess.h>

#include "uverbs.h"

MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
MODULE_LICENSE("Dual BSD/GPL");

enum {
	IB_UVERBS_MAJOR       = 231,
	IB_UVERBS_BASE_MINOR  = 192,
	IB_UVERBS_MAX_DEVICES = 32
};

#define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)

65 66
static struct class *uverbs_class;

67
DEFINE_SPINLOCK(ib_uverbs_idr_lock);
68 69 70 71 72 73
DEFINE_IDR(ib_uverbs_pd_idr);
DEFINE_IDR(ib_uverbs_mr_idr);
DEFINE_IDR(ib_uverbs_mw_idr);
DEFINE_IDR(ib_uverbs_ah_idr);
DEFINE_IDR(ib_uverbs_cq_idr);
DEFINE_IDR(ib_uverbs_qp_idr);
74
DEFINE_IDR(ib_uverbs_srq_idr);
75
DEFINE_IDR(ib_uverbs_xrcd_idr);
76
DEFINE_IDR(ib_uverbs_rule_idr);
77

78
static DEFINE_SPINLOCK(map_lock);
79 80 81
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);

static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
82
				     struct ib_device *ib_dev,
83 84
				     const char __user *buf, int in_len,
				     int out_len) = {
A
Alexander Chiang 已提交
85 86 87 88 89 90
	[IB_USER_VERBS_CMD_GET_CONTEXT]		= ib_uverbs_get_context,
	[IB_USER_VERBS_CMD_QUERY_DEVICE]	= ib_uverbs_query_device,
	[IB_USER_VERBS_CMD_QUERY_PORT]		= ib_uverbs_query_port,
	[IB_USER_VERBS_CMD_ALLOC_PD]		= ib_uverbs_alloc_pd,
	[IB_USER_VERBS_CMD_DEALLOC_PD]		= ib_uverbs_dealloc_pd,
	[IB_USER_VERBS_CMD_REG_MR]		= ib_uverbs_reg_mr,
91
	[IB_USER_VERBS_CMD_REREG_MR]		= ib_uverbs_rereg_mr,
A
Alexander Chiang 已提交
92
	[IB_USER_VERBS_CMD_DEREG_MR]		= ib_uverbs_dereg_mr,
93 94
	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw,
	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,
95
	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
A
Alexander Chiang 已提交
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
	[IB_USER_VERBS_CMD_CREATE_CQ]		= ib_uverbs_create_cq,
	[IB_USER_VERBS_CMD_RESIZE_CQ]		= ib_uverbs_resize_cq,
	[IB_USER_VERBS_CMD_POLL_CQ]		= ib_uverbs_poll_cq,
	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]	= ib_uverbs_req_notify_cq,
	[IB_USER_VERBS_CMD_DESTROY_CQ]		= ib_uverbs_destroy_cq,
	[IB_USER_VERBS_CMD_CREATE_QP]		= ib_uverbs_create_qp,
	[IB_USER_VERBS_CMD_QUERY_QP]		= ib_uverbs_query_qp,
	[IB_USER_VERBS_CMD_MODIFY_QP]		= ib_uverbs_modify_qp,
	[IB_USER_VERBS_CMD_DESTROY_QP]		= ib_uverbs_destroy_qp,
	[IB_USER_VERBS_CMD_POST_SEND]		= ib_uverbs_post_send,
	[IB_USER_VERBS_CMD_POST_RECV]		= ib_uverbs_post_recv,
	[IB_USER_VERBS_CMD_POST_SRQ_RECV]	= ib_uverbs_post_srq_recv,
	[IB_USER_VERBS_CMD_CREATE_AH]		= ib_uverbs_create_ah,
	[IB_USER_VERBS_CMD_DESTROY_AH]		= ib_uverbs_destroy_ah,
	[IB_USER_VERBS_CMD_ATTACH_MCAST]	= ib_uverbs_attach_mcast,
	[IB_USER_VERBS_CMD_DETACH_MCAST]	= ib_uverbs_detach_mcast,
	[IB_USER_VERBS_CMD_CREATE_SRQ]		= ib_uverbs_create_srq,
	[IB_USER_VERBS_CMD_MODIFY_SRQ]		= ib_uverbs_modify_srq,
	[IB_USER_VERBS_CMD_QUERY_SRQ]		= ib_uverbs_query_srq,
	[IB_USER_VERBS_CMD_DESTROY_SRQ]		= ib_uverbs_destroy_srq,
116 117
	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd,
	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,
118
	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq,
119
	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp,
120 121 122
};

static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
123
				    struct ib_device *ib_dev,
124 125 126
				    struct ib_udata *ucore,
				    struct ib_udata *uhw) = {
	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
127
	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow,
128
	[IB_USER_VERBS_EX_CMD_QUERY_DEVICE]	= ib_uverbs_ex_query_device,
129
	[IB_USER_VERBS_EX_CMD_CREATE_CQ]	= ib_uverbs_ex_create_cq,
130
	[IB_USER_VERBS_EX_CMD_CREATE_QP]        = ib_uverbs_ex_create_qp,
131 132 133
};

static void ib_uverbs_add_one(struct ib_device *device);
134
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
135

136 137 138 139 140 141 142 143 144 145 146
int uverbs_dealloc_mw(struct ib_mw *mw)
{
	struct ib_pd *pd = mw->pd;
	int ret;

	ret = mw->device->dealloc_mw(mw);
	if (!ret)
		atomic_dec(&pd->usecnt);
	return ret;
}

147
static void ib_uverbs_release_dev(struct kobject *kobj)
148 149
{
	struct ib_uverbs_device *dev =
150
		container_of(kobj, struct ib_uverbs_device, kobj);
151

152
	cleanup_srcu_struct(&dev->disassociate_srcu);
153
	kfree(dev);
154 155
}

156 157 158 159
static struct kobj_type ib_uverbs_dev_ktype = {
	.release = ib_uverbs_release_dev,
};

160 161 162 163 164 165 166 167
static void ib_uverbs_release_event_file(struct kref *ref)
{
	struct ib_uverbs_event_file *file =
		container_of(ref, struct ib_uverbs_event_file, ref);

	kfree(file);
}

168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
			  struct ib_uverbs_event_file *ev_file,
			  struct ib_ucq_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	if (ev_file) {
		spin_lock_irq(&ev_file->lock);
		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
			list_del(&evt->list);
			kfree(evt);
		}
		spin_unlock_irq(&ev_file->lock);

		kref_put(&ev_file->ref, ib_uverbs_release_event_file);
	}

	spin_lock_irq(&file->async_file->lock);
	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
	spin_unlock_irq(&file->async_file->lock);
}

void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
			      struct ib_uevent_object *uobj)
{
	struct ib_uverbs_event *evt, *tmp;

	spin_lock_irq(&file->async_file->lock);
	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
		list_del(&evt->list);
		kfree(evt);
	}
	spin_unlock_irq(&file->async_file->lock);
}

206 207 208 209 210 211 212 213 214 215 216 217
static void ib_uverbs_detach_umcast(struct ib_qp *qp,
				    struct ib_uqp_object *uobj)
{
	struct ib_uverbs_mcast_entry *mcast, *tmp;

	list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
		ib_detach_mcast(qp, &mcast->gid, mcast->lid);
		list_del(&mcast->list);
		kfree(mcast);
	}
}

218 219
static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
				      struct ib_ucontext *context)
220 221 222
{
	struct ib_uobject *uobj, *tmp;

223 224
	context->closing = 1;

225
	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
226 227 228
		struct ib_ah *ah = uobj->object;

		idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
229 230 231
		ib_destroy_ah(ah);
		kfree(uobj);
	}
232

233 234 235 236 237
	/* Remove MWs before QPs, in order to support type 2A MWs. */
	list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
		struct ib_mw *mw = uobj->object;

		idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
238
		uverbs_dealloc_mw(mw);
239 240 241
		kfree(uobj);
	}

242 243 244 245 246 247 248 249
	list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
		struct ib_flow *flow_id = uobj->object;

		idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
		ib_destroy_flow(flow_id);
		kfree(uobj);
	}

250
	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
251
		struct ib_qp *qp = uobj->object;
252 253
		struct ib_uqp_object *uqp =
			container_of(uobj, struct ib_uqp_object, uevent.uobject);
254 255

		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
256 257
		if (qp != qp->real_qp) {
			ib_close_qp(qp);
258 259 260 261
		} else {
			ib_uverbs_detach_umcast(qp, uqp);
			ib_destroy_qp(qp);
		}
262 263
		ib_uverbs_release_uevent(file, &uqp->uevent);
		kfree(uqp);
264 265
	}

266 267 268 269 270 271 272 273 274 275 276
	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
		struct ib_srq *srq = uobj->object;
		struct ib_uevent_object *uevent =
			container_of(uobj, struct ib_uevent_object, uobject);

		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
		ib_destroy_srq(srq);
		ib_uverbs_release_uevent(file, uevent);
		kfree(uevent);
	}

277
	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
278
		struct ib_cq *cq = uobj->object;
279 280 281
		struct ib_uverbs_event_file *ev_file = cq->cq_context;
		struct ib_ucq_object *ucq =
			container_of(uobj, struct ib_ucq_object, uobject);
282 283

		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
284
		ib_destroy_cq(cq);
285 286
		ib_uverbs_release_ucq(file, ev_file, ucq);
		kfree(ucq);
287 288 289
	}

	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
290
		struct ib_mr *mr = uobj->object;
291

292
		idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
293
		ib_dereg_mr(mr);
294
		kfree(uobj);
295 296
	}

297 298 299 300 301 302 303 304 305 306 307 308
	mutex_lock(&file->device->xrcd_tree_mutex);
	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
		struct ib_xrcd *xrcd = uobj->object;
		struct ib_uxrcd_object *uxrcd =
			container_of(uobj, struct ib_uxrcd_object, uobject);

		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
		ib_uverbs_dealloc_xrcd(file->device, xrcd);
		kfree(uxrcd);
	}
	mutex_unlock(&file->device->xrcd_tree_mutex);

309
	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
310 311 312
		struct ib_pd *pd = uobj->object;

		idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
313 314 315 316
		ib_dealloc_pd(pd);
		kfree(uobj);
	}

317 318
	put_pid(context->tgid);

319 320 321
	return context->device->dealloc_ucontext(context);
}

322 323 324 325 326
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
	complete(&dev->comp);
}

327 328 329 330
static void ib_uverbs_release_file(struct kref *ref)
{
	struct ib_uverbs_file *file =
		container_of(ref, struct ib_uverbs_file, ref);
331 332 333 334 335 336 337 338 339
	struct ib_device *ib_dev;
	int srcu_key;

	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (ib_dev && !ib_dev->disassociate_ucontext)
		module_put(ib_dev->owner);
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
340

341 342
	if (atomic_dec_and_test(&file->device->refcount))
		ib_uverbs_comp_dev(file->device);
343

344 345 346 347 348 349 350
	kfree(file);
}

static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
				    size_t count, loff_t *pos)
{
	struct ib_uverbs_event_file *file = filp->private_data;
351
	struct ib_uverbs_event *event;
352 353 354 355 356
	int eventsz;
	int ret = 0;

	spin_lock_irq(&file->lock);

357
	while (list_empty(&file->event_list)) {
358 359 360 361 362 363
		spin_unlock_irq(&file->lock);

		if (filp->f_flags & O_NONBLOCK)
			return -EAGAIN;

		if (wait_event_interruptible(file->poll_wait,
364 365 366 367 368 369
					     (!list_empty(&file->event_list) ||
			/* The barriers built into wait_event_interruptible()
			 * and wake_up() guarentee this will see the null set
			 * without using RCU
			 */
					     !file->uverbs_file->device->ib_dev)))
370 371
			return -ERESTARTSYS;

372 373 374 375 376
		/* If device was disassociated and no event exists set an error */
		if (list_empty(&file->event_list) &&
		    !file->uverbs_file->device->ib_dev)
			return -EIO;

377 378 379
		spin_lock_irq(&file->lock);
	}

380 381 382
	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);

	if (file->is_async)
383
		eventsz = sizeof (struct ib_uverbs_async_event_desc);
384
	else
385 386 387 388 389
		eventsz = sizeof (struct ib_uverbs_comp_event_desc);

	if (eventsz > count) {
		ret   = -EINVAL;
		event = NULL;
390
	} else {
391
		list_del(file->event_list.next);
392 393 394 395 396
		if (event->counter) {
			++(*event->counter);
			list_del(&event->obj_list);
		}
	}
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420

	spin_unlock_irq(&file->lock);

	if (event) {
		if (copy_to_user(buf, event, eventsz))
			ret = -EFAULT;
		else
			ret = eventsz;
	}

	kfree(event);

	return ret;
}

static unsigned int ib_uverbs_event_poll(struct file *filp,
					 struct poll_table_struct *wait)
{
	unsigned int pollflags = 0;
	struct ib_uverbs_event_file *file = filp->private_data;

	poll_wait(filp, &file->poll_wait, wait);

	spin_lock_irq(&file->lock);
421
	if (!list_empty(&file->event_list))
422 423 424 425 426 427
		pollflags = POLLIN | POLLRDNORM;
	spin_unlock_irq(&file->lock);

	return pollflags;
}

G
Gleb Natapov 已提交
428 429 430 431 432 433 434
static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
{
	struct ib_uverbs_event_file *file = filp->private_data;

	return fasync_helper(fd, filp, on, &file->async_queue);
}

435 436 437
static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_event_file *file = filp->private_data;
438
	struct ib_uverbs_event *entry, *tmp;
439
	int closed_already = 0;
440

441
	mutex_lock(&file->uverbs_file->device->lists_mutex);
442
	spin_lock_irq(&file->lock);
443
	closed_already = file->is_closed;
444
	file->is_closed = 1;
445 446 447 448 449 450
	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
		if (entry->counter)
			list_del(&entry->obj_list);
		kfree(entry);
	}
	spin_unlock_irq(&file->lock);
451 452 453 454 455 456 457
	if (!closed_already) {
		list_del(&file->list);
		if (file->is_async)
			ib_unregister_event_handler(&file->uverbs_file->
				event_handler);
	}
	mutex_unlock(&file->uverbs_file->device->lists_mutex);
458

459
	kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
460
	kref_put(&file->ref, ib_uverbs_release_event_file);
461 462 463 464

	return 0;
}

465
static const struct file_operations uverbs_event_fops = {
466
	.owner	 = THIS_MODULE,
A
Alexander Chiang 已提交
467
	.read	 = ib_uverbs_event_read,
468
	.poll    = ib_uverbs_event_poll,
G
Gleb Natapov 已提交
469
	.release = ib_uverbs_event_close,
470 471
	.fasync  = ib_uverbs_event_fasync,
	.llseek	 = no_llseek,
472 473 474 475
};

void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
476 477 478 479 480 481 482 483 484
	struct ib_uverbs_event_file    *file = cq_context;
	struct ib_ucq_object	       *uobj;
	struct ib_uverbs_event	       *entry;
	unsigned long			flags;

	if (!file)
		return;

	spin_lock_irqsave(&file->lock, flags);
485
	if (file->is_closed) {
486 487 488
		spin_unlock_irqrestore(&file->lock, flags);
		return;
	}
489 490

	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
491 492
	if (!entry) {
		spin_unlock_irqrestore(&file->lock, flags);
493
		return;
494
	}
495

496 497 498 499
	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);

	entry->desc.comp.cq_handle = cq->uobject->user_handle;
	entry->counter		   = &uobj->comp_events_reported;
500

501
	list_add_tail(&entry->list, &file->event_list);
502
	list_add_tail(&entry->obj_list, &uobj->comp_list);
503
	spin_unlock_irqrestore(&file->lock, flags);
504

505 506
	wake_up_interruptible(&file->poll_wait);
	kill_fasync(&file->async_queue, SIGIO, POLL_IN);
507 508 509
}

static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
510 511 512
				    __u64 element, __u64 event,
				    struct list_head *obj_list,
				    u32 *counter)
513
{
514
	struct ib_uverbs_event *entry;
515 516
	unsigned long flags;

517
	spin_lock_irqsave(&file->async_file->lock, flags);
518
	if (file->async_file->is_closed) {
519 520 521 522
		spin_unlock_irqrestore(&file->async_file->lock, flags);
		return;
	}

523
	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
524 525
	if (!entry) {
		spin_unlock_irqrestore(&file->async_file->lock, flags);
526
		return;
527
	}
528

529 530
	entry->desc.async.element    = element;
	entry->desc.async.event_type = event;
531
	entry->desc.async.reserved   = 0;
532
	entry->counter               = counter;
533

534
	list_add_tail(&entry->list, &file->async_file->event_list);
535 536
	if (obj_list)
		list_add_tail(&entry->obj_list, obj_list);
537
	spin_unlock_irqrestore(&file->async_file->lock, flags);
538

539 540
	wake_up_interruptible(&file->async_file->poll_wait);
	kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
541 542 543 544
}

void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
545 546
	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
						  struct ib_ucq_object, uobject);
547

548
	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
549 550
				event->event, &uobj->async_list,
				&uobj->async_events_reported);
551 552 553 554
}

void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
555 556
	struct ib_uevent_object *uobj;

557 558 559 560
	/* for XRC target qp's, check that qp is live */
	if (!event->element.qp->uobject || !event->element.qp->uobject->live)
		return;

561 562 563 564 565 566
	uobj = container_of(event->element.qp->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
567 568
}

569 570
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
571 572 573 574 575 576 577 578
	struct ib_uevent_object *uobj;

	uobj = container_of(event->element.srq->uobject,
			    struct ib_uevent_object, uobject);

	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
				event->event, &uobj->event_list,
				&uobj->events_reported);
579 580
}

581 582
void ib_uverbs_event_handler(struct ib_event_handler *handler,
			     struct ib_event *event)
583 584 585 586
{
	struct ib_uverbs_file *file =
		container_of(handler, struct ib_uverbs_file, event_handler);

587 588
	ib_uverbs_async_handler(file, event->element.port_num, event->event,
				NULL, NULL);
589 590
}

591 592 593 594 595 596
void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
{
	kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
	file->async_file = NULL;
}

597
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
598
					struct ib_device	*ib_dev,
599
					int is_async)
600
{
601
	struct ib_uverbs_event_file *ev_file;
602
	struct file *filp;
603
	int ret;
604

605
	ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
606 607 608 609 610 611 612 613
	if (!ev_file)
		return ERR_PTR(-ENOMEM);

	kref_init(&ev_file->ref);
	spin_lock_init(&ev_file->lock);
	INIT_LIST_HEAD(&ev_file->event_list);
	init_waitqueue_head(&ev_file->poll_wait);
	ev_file->uverbs_file = uverbs_file;
614
	kref_get(&ev_file->uverbs_file->ref);
615
	ev_file->async_queue = NULL;
616
	ev_file->is_closed   = 0;
617

618
	filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
619
				  ev_file, O_RDONLY);
620
	if (IS_ERR(filp))
621 622
		goto err_put_refs;

623 624 625 626 627
	mutex_lock(&uverbs_file->device->lists_mutex);
	list_add_tail(&ev_file->list,
		      &uverbs_file->device->uverbs_events_file_list);
	mutex_unlock(&uverbs_file->device->lists_mutex);

628 629 630 631 632
	if (is_async) {
		WARN_ON(uverbs_file->async_file);
		uverbs_file->async_file = ev_file;
		kref_get(&uverbs_file->async_file->ref);
		INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
633
				      ib_dev,
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
				      ib_uverbs_event_handler);
		ret = ib_register_event_handler(&uverbs_file->event_handler);
		if (ret)
			goto err_put_file;

		/* At that point async file stuff was fully set */
		ev_file->is_async = 1;
	}

	return filp;

err_put_file:
	fput(filp);
	kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
	uverbs_file->async_file = NULL;
	return ERR_PTR(ret);
650

651 652 653
err_put_refs:
	kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
	kref_put(&ev_file->ref, ib_uverbs_release_event_file);
654 655 656 657 658 659 660 661 662 663 664
	return filp;
}

/*
 * Look up a completion event file by FD.  If lookup is successful,
 * takes a ref to the event file struct that it returns; if
 * unsuccessful, returns NULL.
 */
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
{
	struct ib_uverbs_event_file *ev_file = NULL;
665
	struct fd f = fdget(fd);
666

667
	if (!f.file)
668 669
		return NULL;

670
	if (f.file->f_op != &uverbs_event_fops)
671 672
		goto out;

673
	ev_file = f.file->private_data;
674 675 676 677 678 679 680 681
	if (ev_file->is_async) {
		ev_file = NULL;
		goto out;
	}

	kref_get(&ev_file->ref);

out:
682
	fdput(f);
683
	return ev_file;
684 685
}

686 687 688 689 690 691 692 693 694 695 696 697 698 699 700
static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
{
	u64 mask;

	if (command <= IB_USER_VERBS_CMD_OPEN_QP)
		mask = ib_dev->uverbs_cmd_mask;
	else
		mask = ib_dev->uverbs_ex_cmd_mask;

	if (mask & ((u64)1 << command))
		return 0;

	return -1;
}

701 702 703 704
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
			     size_t count, loff_t *pos)
{
	struct ib_uverbs_file *file = filp->private_data;
705
	struct ib_device *ib_dev;
706
	struct ib_uverbs_cmd_hdr hdr;
E
Eli Cohen 已提交
707
	__u32 command;
708
	__u32 flags;
709 710
	int srcu_key;
	ssize_t ret;
711

712 713 714 715 716 717
	if (count < sizeof hdr)
		return -EINVAL;

	if (copy_from_user(&hdr, buf, sizeof hdr))
		return -EFAULT;

718 719 720 721 722 723 724 725
	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
		goto out;
	}

E
Eli Cohen 已提交
726 727 728 729 730 731 732
	if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
				   IB_USER_VERBS_CMD_COMMAND_MASK)) {
		ret = -EINVAL;
		goto out;
	}

	command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
733 734 735 736
	if (verify_command_mask(ib_dev, command)) {
		ret = -EOPNOTSUPP;
		goto out;
	}
E
Eli Cohen 已提交
737

738 739 740 741 742 743
	if (!file->ucontext &&
	    command != IB_USER_VERBS_CMD_GET_CONTEXT) {
		ret = -EINVAL;
		goto out;
	}

744 745
	flags = (hdr.command &
		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
746

747 748
	if (!flags) {
		if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
749 750 751 752
		    !uverbs_cmd_table[command]) {
			ret = -EINVAL;
			goto out;
		}
753

754 755 756 757
		if (hdr.in_words * 4 != count) {
			ret = -EINVAL;
			goto out;
		}
758

759
		ret = uverbs_cmd_table[command](file, ib_dev,
760 761 762 763 764 765 766 767 768 769 770
						 buf + sizeof(hdr),
						 hdr.in_words * 4,
						 hdr.out_words * 4);

	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
		struct ib_uverbs_ex_cmd_hdr ex_hdr;
		struct ib_udata ucore;
		struct ib_udata uhw;
		size_t written_count = count;

		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
771 772 773 774
		    !uverbs_ex_cmd_table[command]) {
			ret = -ENOSYS;
			goto out;
		}
775

776 777 778 779
		if (!file->ucontext) {
			ret = -EINVAL;
			goto out;
		}
780

781 782 783 784
		if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
			ret = -EINVAL;
			goto out;
		}
785

786 787 788 789
		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
			ret = -EFAULT;
			goto out;
		}
790 791 792 793

		count -= sizeof(hdr) + sizeof(ex_hdr);
		buf += sizeof(hdr) + sizeof(ex_hdr);

794 795 796 797
		if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
			ret = -EINVAL;
			goto out;
		}
798

799 800 801 802
		if (ex_hdr.cmd_hdr_reserved) {
			ret = -EINVAL;
			goto out;
		}
803

804
		if (ex_hdr.response) {
805 806 807 808
			if (!hdr.out_words && !ex_hdr.provider_out_words) {
				ret = -EINVAL;
				goto out;
			}
809 810 811

			if (!access_ok(VERIFY_WRITE,
				       (void __user *) (unsigned long) ex_hdr.response,
812 813 814 815
				       (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
				ret = -EFAULT;
				goto out;
			}
816
		} else {
817 818 819 820
			if (hdr.out_words || ex_hdr.provider_out_words) {
				ret = -EINVAL;
				goto out;
			}
821 822
		}

823 824 825 826 827 828 829 830
		INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
				       hdr.in_words * 8, hdr.out_words * 8);

		INIT_UDATA_BUF_OR_NULL(&uhw,
				       buf + ucore.inlen,
				       (unsigned long) ex_hdr.response + ucore.outlen,
				       ex_hdr.provider_in_words * 8,
				       ex_hdr.provider_out_words * 8);
831

832
		ret = uverbs_ex_cmd_table[command](file,
833
						   ib_dev,
834 835
						   &ucore,
						   &uhw);
836 837 838 839
		if (!ret)
			ret = written_count;
	} else {
		ret = -ENOSYS;
840
	}
841

842 843 844
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
845 846 847 848 849
}

static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct ib_uverbs_file *file = filp->private_data;
850 851 852
	struct ib_device *ib_dev;
	int ret = 0;
	int srcu_key;
853

854 855 856 857 858 859 860 861 862 863
	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
	ib_dev = srcu_dereference(file->device->ib_dev,
				  &file->device->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
		goto out;
	}

	if (!file->ucontext)
		ret = -ENODEV;
864
	else
865 866 867 868
		ret = ib_dev->mmap(file->ucontext, vma);
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
869 870
}

871 872 873
/*
 * ib_uverbs_open() does not need the BKL:
 *
A
Alexander Chiang 已提交
874
 *  - the ib_uverbs_device structures are properly reference counted and
875 876 877
 *    everything else is purely local to the file being created, so
 *    races against other open calls are not a problem;
 *  - there is no ioctl method to race against;
A
Alexander Chiang 已提交
878 879
 *  - the open method will either immediately run -ENXIO, or all
 *    required initialization will be done.
880
 */
881 882
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
883
	struct ib_uverbs_device *dev;
884
	struct ib_uverbs_file *file;
885
	struct ib_device *ib_dev;
886
	int ret;
887 888
	int module_dependent;
	int srcu_key;
889

A
Alexander Chiang 已提交
890
	dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
891
	if (!atomic_inc_not_zero(&dev->refcount))
892 893
		return -ENXIO;

894 895 896 897 898 899
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	mutex_lock(&dev->lists_mutex);
	ib_dev = srcu_dereference(dev->ib_dev,
				  &dev->disassociate_srcu);
	if (!ib_dev) {
		ret = -EIO;
900 901
		goto err;
	}
902

903 904 905 906 907 908 909 910 911 912 913 914 915
	/* In case IB device supports disassociate ucontext, there is no hard
	 * dependency between uverbs device and its low level device.
	 */
	module_dependent = !(ib_dev->disassociate_ucontext);

	if (module_dependent) {
		if (!try_module_get(ib_dev->owner)) {
			ret = -ENODEV;
			goto err;
		}
	}

	file = kzalloc(sizeof(*file), GFP_KERNEL);
916
	if (!file) {
917
		ret = -ENOMEM;
918 919 920 921
		if (module_dependent)
			goto err_module;

		goto err;
922
	}
923

924 925 926
	file->device	 = dev;
	file->ucontext	 = NULL;
	file->async_file = NULL;
927
	kref_init(&file->ref);
928
	mutex_init(&file->mutex);
929 930

	filp->private_data = file;
931
	kobject_get(&dev->kobj);
932 933 934
	list_add_tail(&file->list, &dev->uverbs_file_list);
	mutex_unlock(&dev->lists_mutex);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
935

936
	return nonseekable_open(inode, filp);
937 938

err_module:
939
	module_put(ib_dev->owner);
940 941

err:
942 943
	mutex_unlock(&dev->lists_mutex);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
944 945 946
	if (atomic_dec_and_test(&dev->refcount))
		ib_uverbs_comp_dev(dev);

947
	return ret;
948 949 950 951 952
}

static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
	struct ib_uverbs_file *file = filp->private_data;
953
	struct ib_uverbs_device *dev = file->device;
954 955 956 957 958 959 960 961 962 963 964 965
	struct ib_ucontext *ucontext = NULL;

	mutex_lock(&file->device->lists_mutex);
	ucontext = file->ucontext;
	file->ucontext = NULL;
	if (!file->is_closed) {
		list_del(&file->list);
		file->is_closed = 1;
	}
	mutex_unlock(&file->device->lists_mutex);
	if (ucontext)
		ib_uverbs_cleanup_ucontext(file, ucontext);
966 967 968

	if (file->async_file)
		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
969 970

	kref_put(&file->ref, ib_uverbs_release_file);
971
	kobject_put(&dev->kobj);
972 973 974 975

	return 0;
}

976
static const struct file_operations uverbs_fops = {
A
Alexander Chiang 已提交
977 978 979
	.owner	 = THIS_MODULE,
	.write	 = ib_uverbs_write,
	.open	 = ib_uverbs_open,
980 981
	.release = ib_uverbs_close,
	.llseek	 = no_llseek,
982 983
};

984
static const struct file_operations uverbs_mmap_fops = {
A
Alexander Chiang 已提交
985 986
	.owner	 = THIS_MODULE,
	.write	 = ib_uverbs_write,
987
	.mmap    = ib_uverbs_mmap,
A
Alexander Chiang 已提交
988
	.open	 = ib_uverbs_open,
989 990
	.release = ib_uverbs_close,
	.llseek	 = no_llseek,
991 992 993 994 995 996 997 998
};

static struct ib_client uverbs_client = {
	.name   = "uverbs",
	.add    = ib_uverbs_add_one,
	.remove = ib_uverbs_remove_one
};

999 1000
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
			  char *buf)
1001
{
1002 1003
	int ret = -ENODEV;
	int srcu_key;
1004
	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1005
	struct ib_device *ib_dev;
1006 1007 1008

	if (!dev)
		return -ENODEV;
1009

1010 1011 1012 1013 1014 1015 1016
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
	if (ib_dev)
		ret = sprintf(buf, "%s\n", ib_dev->name);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);

	return ret;
1017
}
1018
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1019

1020 1021
static ssize_t show_dev_abi_version(struct device *device,
				    struct device_attribute *attr, char *buf)
1022
{
1023
	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1024 1025 1026
	int ret = -ENODEV;
	int srcu_key;
	struct ib_device *ib_dev;
1027 1028 1029

	if (!dev)
		return -ENODEV;
1030 1031 1032 1033 1034
	srcu_key = srcu_read_lock(&dev->disassociate_srcu);
	ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
	if (ib_dev)
		ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1035

1036
	return ret;
1037
}
1038
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1039

1040 1041
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
			 __stringify(IB_USER_VERBS_ABI_VERSION));
1042

1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
static dev_t overflow_maj;
static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);

/*
 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
 * requesting a new major number and doubling the number of max devices we
 * support. It's stupid, but simple.
 */
static int find_overflow_devnum(void)
{
	int ret;

	if (!overflow_maj) {
		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
					  "infiniband_verbs");
		if (ret) {
P
Parav Pandit 已提交
1059
			pr_err("user_verbs: couldn't register dynamic device number\n");
1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
			return ret;
		}
	}

	ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
	if (ret >= IB_UVERBS_MAX_DEVICES)
		return -1;

	return ret;
}

1071 1072
static void ib_uverbs_add_one(struct ib_device *device)
{
1073
	int devnum;
1074
	dev_t base;
1075
	struct ib_uverbs_device *uverbs_dev;
1076
	int ret;
1077 1078 1079 1080

	if (!device->alloc_ucontext)
		return;

R
Roland Dreier 已提交
1081
	uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
1082 1083 1084
	if (!uverbs_dev)
		return;

1085 1086 1087 1088 1089 1090
	ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
	if (ret) {
		kfree(uverbs_dev);
		return;
	}

1091
	atomic_set(&uverbs_dev->refcount, 1);
1092
	init_completion(&uverbs_dev->comp);
1093 1094
	uverbs_dev->xrcd_tree = RB_ROOT;
	mutex_init(&uverbs_dev->xrcd_tree_mutex);
1095
	kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
1096 1097 1098
	mutex_init(&uverbs_dev->lists_mutex);
	INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
	INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1099

1100
	spin_lock(&map_lock);
1101 1102
	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
	if (devnum >= IB_UVERBS_MAX_DEVICES) {
1103
		spin_unlock(&map_lock);
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
		devnum = find_overflow_devnum();
		if (devnum < 0)
			goto err;

		spin_lock(&map_lock);
		uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
		base = devnum + overflow_maj;
		set_bit(devnum, overflow_map);
	} else {
		uverbs_dev->devnum = devnum;
		base = devnum + IB_UVERBS_BASE_DEV;
		set_bit(devnum, dev_map);
1116 1117 1118
	}
	spin_unlock(&map_lock);

1119
	rcu_assign_pointer(uverbs_dev->ib_dev, device);
1120
	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1121

1122 1123 1124
	cdev_init(&uverbs_dev->cdev, NULL);
	uverbs_dev->cdev.owner = THIS_MODULE;
	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
1125
	uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
1126
	kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
1127
	if (cdev_add(&uverbs_dev->cdev, base, 1))
1128
		goto err_cdev;
1129

1130
	uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
1131
					uverbs_dev->cdev.dev, uverbs_dev,
1132
					"uverbs%d", uverbs_dev->devnum);
1133
	if (IS_ERR(uverbs_dev->dev))
1134 1135
		goto err_cdev;

1136
	if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
1137
		goto err_class;
1138
	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1139
		goto err_class;
1140 1141 1142 1143 1144 1145

	ib_set_client_data(device, &uverbs_client, uverbs_dev);

	return;

err_class:
1146
	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1147 1148

err_cdev:
1149
	cdev_del(&uverbs_dev->cdev);
1150 1151 1152 1153
	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
		clear_bit(devnum, dev_map);
	else
		clear_bit(devnum, overflow_map);
1154 1155

err:
1156 1157
	if (atomic_dec_and_test(&uverbs_dev->refcount))
		ib_uverbs_comp_dev(uverbs_dev);
1158
	wait_for_completion(&uverbs_dev->comp);
1159
	kobject_put(&uverbs_dev->kobj);
1160 1161 1162
	return;
}

1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
					struct ib_device *ib_dev)
{
	struct ib_uverbs_file *file;
	struct ib_uverbs_event_file *event_file;
	struct ib_event event;

	/* Pending running commands to terminate */
	synchronize_srcu(&uverbs_dev->disassociate_srcu);
	event.event = IB_EVENT_DEVICE_FATAL;
	event.element.port_num = 0;
	event.device = ib_dev;

	mutex_lock(&uverbs_dev->lists_mutex);
	while (!list_empty(&uverbs_dev->uverbs_file_list)) {
		struct ib_ucontext *ucontext;

		file = list_first_entry(&uverbs_dev->uverbs_file_list,
					struct ib_uverbs_file, list);
		file->is_closed = 1;
		ucontext = file->ucontext;
		list_del(&file->list);
		file->ucontext = NULL;
		kref_get(&file->ref);
		mutex_unlock(&uverbs_dev->lists_mutex);
		/* We must release the mutex before going ahead and calling
		 * disassociate_ucontext. disassociate_ucontext might end up
		 * indirectly calling uverbs_close, for example due to freeing
		 * the resources (e.g mmput).
		 */
		ib_uverbs_event_handler(&file->event_handler, &event);
		if (ucontext) {
			ib_dev->disassociate_ucontext(ucontext);
			ib_uverbs_cleanup_ucontext(file, ucontext);
		}

		mutex_lock(&uverbs_dev->lists_mutex);
		kref_put(&file->ref, ib_uverbs_release_file);
	}

	while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
		event_file = list_first_entry(&uverbs_dev->
					      uverbs_events_file_list,
					      struct ib_uverbs_event_file,
					      list);
		spin_lock_irq(&event_file->lock);
		event_file->is_closed = 1;
		spin_unlock_irq(&event_file->lock);

		list_del(&event_file->list);
		if (event_file->is_async) {
			ib_unregister_event_handler(&event_file->uverbs_file->
						    event_handler);
			event_file->uverbs_file->event_handler.device = NULL;
		}

		wake_up_interruptible(&event_file->poll_wait);
		kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
	}
	mutex_unlock(&uverbs_dev->lists_mutex);
}

1225
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1226
{
1227
	struct ib_uverbs_device *uverbs_dev = client_data;
1228
	int wait_clients = 1;
1229 1230 1231 1232

	if (!uverbs_dev)
		return;

1233
	dev_set_drvdata(uverbs_dev->dev, NULL);
1234 1235
	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
	cdev_del(&uverbs_dev->cdev);
1236

1237 1238 1239 1240
	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
		clear_bit(uverbs_dev->devnum, dev_map);
	else
		clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
1241

1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
	if (device->disassociate_ucontext) {
		/* We disassociate HW resources and immediately return.
		 * Userspace will see a EIO errno for all future access.
		 * Upon returning, ib_device may be freed internally and is not
		 * valid any more.
		 * uverbs_device is still available until all clients close
		 * their files, then the uverbs device ref count will be zero
		 * and its resources will be freed.
		 * Note: At this point no more files can be opened since the
		 * cdev was deleted, however active clients can still issue
		 * commands and close their open files.
		 */
		rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
		ib_uverbs_free_hw_resources(uverbs_dev, device);
		wait_clients = 0;
	}

1259 1260
	if (atomic_dec_and_test(&uverbs_dev->refcount))
		ib_uverbs_comp_dev(uverbs_dev);
1261 1262
	if (wait_clients)
		wait_for_completion(&uverbs_dev->comp);
1263
	kobject_put(&uverbs_dev->kobj);
1264 1265
}

1266
static char *uverbs_devnode(struct device *dev, umode_t *mode)
1267
{
1268 1269
	if (mode)
		*mode = 0666;
1270 1271 1272
	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}

1273 1274 1275 1276 1277 1278 1279
static int __init ib_uverbs_init(void)
{
	int ret;

	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
				     "infiniband_verbs");
	if (ret) {
P
Parav Pandit 已提交
1280
		pr_err("user_verbs: couldn't register device number\n");
1281 1282 1283
		goto out;
	}

1284 1285 1286
	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
	if (IS_ERR(uverbs_class)) {
		ret = PTR_ERR(uverbs_class);
P
Parav Pandit 已提交
1287
		pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1288 1289 1290
		goto out_chrdev;
	}

1291 1292
	uverbs_class->devnode = uverbs_devnode;

1293
	ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1294
	if (ret) {
P
Parav Pandit 已提交
1295
		pr_err("user_verbs: couldn't create abi_version attribute\n");
1296 1297 1298 1299 1300
		goto out_class;
	}

	ret = ib_register_client(&uverbs_client);
	if (ret) {
P
Parav Pandit 已提交
1301
		pr_err("user_verbs: couldn't register client\n");
1302
		goto out_class;
1303 1304 1305 1306 1307
	}

	return 0;

out_class:
1308
	class_destroy(uverbs_class);
1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319

out_chrdev:
	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);

out:
	return ret;
}

static void __exit ib_uverbs_cleanup(void)
{
	ib_unregister_client(&uverbs_client);
1320
	class_destroy(uverbs_class);
1321
	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1322 1323
	if (overflow_maj)
		unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
1324 1325 1326 1327 1328 1329 1330
	idr_destroy(&ib_uverbs_pd_idr);
	idr_destroy(&ib_uverbs_mr_idr);
	idr_destroy(&ib_uverbs_mw_idr);
	idr_destroy(&ib_uverbs_ah_idr);
	idr_destroy(&ib_uverbs_cq_idr);
	idr_destroy(&ib_uverbs_qp_idr);
	idr_destroy(&ib_uverbs_srq_idr);
1331 1332 1333 1334
}

module_init(ib_uverbs_init);
module_exit(ib_uverbs_cleanup);