file_ops.c 42.0 KB
Newer Older
M
Mike Marciniszyn 已提交
1
/*
2
 * Copyright(c) 2015-2017 Intel Corporation.
M
Mike Marciniszyn 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
 *
 * GPL LICENSE SUMMARY
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * BSD LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *  - Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *  - Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *  - Neither the name of Intel Corporation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */
#include <linux/poll.h>
#include <linux/cdev.h>
#include <linux/vmalloc.h>
#include <linux/io.h>
51
#include <linux/sched/mm.h>
52
#include <linux/bitmap.h>
M
Mike Marciniszyn 已提交
53

54 55
#include <rdma/ib.h>

M
Mike Marciniszyn 已提交
56 57 58 59 60
#include "hfi.h"
#include "pio.h"
#include "device.h"
#include "common.h"
#include "trace.h"
61
#include "mmu_rb.h"
M
Mike Marciniszyn 已提交
62
#include "user_sdma.h"
63
#include "user_exp_rcv.h"
64
#include "aspm.h"
M
Mike Marciniszyn 已提交
65 66 67 68 69 70 71 72 73

#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt

#define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */

/*
 * File operation functions
 */
74 75 76 77 78
static int hfi1_file_open(struct inode *inode, struct file *fp);
static int hfi1_file_close(struct inode *inode, struct file *fp);
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from);
static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt);
static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma);
M
Mike Marciniszyn 已提交
79

80
static u64 kvirt_to_phys(void *addr);
81
static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
82 83
static void init_subctxts(struct hfi1_ctxtdata *uctxt,
			  const struct hfi1_user_info *uinfo);
84 85
static int init_user_ctxt(struct hfi1_filedata *fd,
			  struct hfi1_ctxtdata *uctxt);
86
static void user_init(struct hfi1_ctxtdata *uctxt);
87 88 89 90
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
			 __u32 len);
static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
			 __u32 len);
91 92
static int setup_base_ctxt(struct hfi1_filedata *fd,
			   struct hfi1_ctxtdata *uctxt);
93
static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
94

95 96
static int find_sub_ctxt(struct hfi1_filedata *fd,
			 const struct hfi1_user_info *uinfo);
97
static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
98 99
			 struct hfi1_user_info *uinfo,
			 struct hfi1_ctxtdata **cd);
100
static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt);
101 102
static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt);
static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
103
static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
104
			  unsigned long events);
105 106
static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey);
static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
107 108
		       int start_stop);
static int vma_fault(struct vm_fault *vmf);
109 110
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
			    unsigned long arg);
M
Mike Marciniszyn 已提交
111 112 113 114 115 116

static const struct file_operations hfi1_file_ops = {
	.owner = THIS_MODULE,
	.write_iter = hfi1_write_iter,
	.open = hfi1_file_open,
	.release = hfi1_file_close,
117
	.unlocked_ioctl = hfi1_file_ioctl,
M
Mike Marciniszyn 已提交
118 119 120 121 122
	.poll = hfi1_poll,
	.mmap = hfi1_file_mmap,
	.llseek = noop_llseek,
};

123
static const struct vm_operations_struct vm_ops = {
M
Mike Marciniszyn 已提交
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
	.fault = vma_fault,
};

/*
 * Types of memories mapped into user processes' space
 */
enum mmap_types {
	PIO_BUFS = 1,
	PIO_BUFS_SOP,
	PIO_CRED,
	RCV_HDRQ,
	RCV_EGRBUF,
	UREGS,
	EVENTS,
	STATUS,
	RTAIL,
	SUBCTXT_UREGS,
	SUBCTXT_RCV_HDRQ,
	SUBCTXT_EGRBUF,
	SDMA_COMP
};

/*
 * Masks and offsets defining the mmap tokens
 */
#define HFI1_MMAP_OFFSET_MASK   0xfffULL
#define HFI1_MMAP_OFFSET_SHIFT  0
#define HFI1_MMAP_SUBCTXT_MASK  0xfULL
#define HFI1_MMAP_SUBCTXT_SHIFT 12
#define HFI1_MMAP_CTXT_MASK     0xffULL
#define HFI1_MMAP_CTXT_SHIFT    16
#define HFI1_MMAP_TYPE_MASK     0xfULL
#define HFI1_MMAP_TYPE_SHIFT    24
#define HFI1_MMAP_MAGIC_MASK    0xffffffffULL
#define HFI1_MMAP_MAGIC_SHIFT   32

#define HFI1_MMAP_MAGIC         0xdabbad00

#define HFI1_MMAP_TOKEN_SET(field, val)	\
	(((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT)
#define HFI1_MMAP_TOKEN_GET(field, token) \
	(((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK)
#define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr)   \
	(HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \
	HFI1_MMAP_TOKEN_SET(TYPE, type) | \
	HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \
	HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \
G
Geliang Tang 已提交
171
	HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr))))
M
Mike Marciniszyn 已提交
172 173 174 175 176 177 178 179 180 181 182

#define dbg(fmt, ...)				\
	pr_info(fmt, ##__VA_ARGS__)

static inline int is_valid_mmap(u64 token)
{
	return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
}

static int hfi1_file_open(struct inode *inode, struct file *fp)
{
I
Ira Weiny 已提交
183
	struct hfi1_filedata *fd;
184 185 186 187
	struct hfi1_devdata *dd = container_of(inode->i_cdev,
					       struct hfi1_devdata,
					       user_cdev);

188
	if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
189 190
		return -EINVAL;

191 192 193
	if (!atomic_inc_not_zero(&dd->user_refcount))
		return -ENXIO;

194 195 196
	/* Just take a ref now. Not all opens result in a context assign */
	kobject_get(&dd->kobj);

M
Mike Marciniszyn 已提交
197
	/* The real work is performed later in assign_ctxt() */
I
Ira Weiny 已提交
198 199 200

	fd = kzalloc(sizeof(*fd), GFP_KERNEL);

I
Ira Weiny 已提交
201 202 203
	if (fd) {
		fd->rec_cpu_num = -1; /* no cpu affinity by default */
		fd->mm = current->mm;
V
Vegard Nossum 已提交
204
		mmgrab(fd->mm);
205
		fd->dd = dd;
206 207 208 209 210 211
		fp->private_data = fd;
	} else {
		fp->private_data = NULL;

		if (atomic_dec_and_test(&dd->user_refcount))
			complete(&dd->user_comp);
I
Ira Weiny 已提交
212

213 214
		return -ENOMEM;
	}
I
Ira Weiny 已提交
215

216
	return 0;
M
Mike Marciniszyn 已提交
217 218
}

219 220 221 222 223 224 225 226 227 228 229 230 231
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
			    unsigned long arg)
{
	struct hfi1_filedata *fd = fp->private_data;
	struct hfi1_ctxtdata *uctxt = fd->uctxt;
	struct hfi1_user_info uinfo;
	struct hfi1_tid_info tinfo;
	int ret = 0;
	unsigned long addr;
	int uval = 0;
	unsigned long ul_uval = 0;
	u16 uval16 = 0;

232
	hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd);
233 234 235 236 237 238 239
	if (cmd != HFI1_IOCTL_ASSIGN_CTXT &&
	    cmd != HFI1_IOCTL_GET_VERS &&
	    !uctxt)
		return -EINVAL;

	switch (cmd) {
	case HFI1_IOCTL_ASSIGN_CTXT:
I
Ira Weiny 已提交
240 241 242
		if (uctxt)
			return -EINVAL;

243 244 245 246 247
		if (copy_from_user(&uinfo,
				   (struct hfi1_user_info __user *)arg,
				   sizeof(uinfo)))
			return -EFAULT;

248
		ret = assign_ctxt(fd, &uinfo);
249 250
		break;
	case HFI1_IOCTL_CTXT_INFO:
251
		ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg,
252 253 254
				    sizeof(struct hfi1_ctxt_info));
		break;
	case HFI1_IOCTL_USER_INFO:
255
		ret = get_base_info(fd, (void __user *)(unsigned long)arg,
256 257 258
				    sizeof(struct hfi1_base_info));
		break;
	case HFI1_IOCTL_CREDIT_UPD:
259
		if (uctxt)
260 261 262 263 264 265 266 267 268
			sc_return_credits(uctxt->sc);
		break;

	case HFI1_IOCTL_TID_UPDATE:
		if (copy_from_user(&tinfo,
				   (struct hfi11_tid_info __user *)arg,
				   sizeof(tinfo)))
			return -EFAULT;

269
		ret = hfi1_user_exp_rcv_setup(fd, &tinfo);
270 271 272 273 274 275 276
		if (!ret) {
			/*
			 * Copy the number of tidlist entries we used
			 * and the length of the buffer we registered.
			 */
			addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
			if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
277 278 279 280 281
					 sizeof(tinfo.tidcnt)))
				return -EFAULT;

			addr = arg + offsetof(struct hfi1_tid_info, length);
			if (copy_to_user((void __user *)addr, &tinfo.length,
282 283 284 285 286 287 288 289 290 291 292
					 sizeof(tinfo.length)))
				ret = -EFAULT;
		}
		break;

	case HFI1_IOCTL_TID_FREE:
		if (copy_from_user(&tinfo,
				   (struct hfi11_tid_info __user *)arg,
				   sizeof(tinfo)))
			return -EFAULT;

293
		ret = hfi1_user_exp_rcv_clear(fd, &tinfo);
294 295 296 297 298 299 300 301 302 303 304 305 306 307
		if (ret)
			break;
		addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
				 sizeof(tinfo.tidcnt)))
			ret = -EFAULT;
		break;

	case HFI1_IOCTL_TID_INVAL_READ:
		if (copy_from_user(&tinfo,
				   (struct hfi11_tid_info __user *)arg,
				   sizeof(tinfo)))
			return -EFAULT;

308
		ret = hfi1_user_exp_rcv_invalid(fd, &tinfo);
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
		if (ret)
			break;
		addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
				 sizeof(tinfo.tidcnt)))
			ret = -EFAULT;
		break;

	case HFI1_IOCTL_RECV_CTRL:
		ret = get_user(uval, (int __user *)arg);
		if (ret != 0)
			return -EFAULT;
		ret = manage_rcvq(uctxt, fd->subctxt, uval);
		break;

	case HFI1_IOCTL_POLL_TYPE:
		ret = get_user(uval, (int __user *)arg);
		if (ret != 0)
			return -EFAULT;
		uctxt->poll_type = (typeof(uctxt->poll_type))uval;
		break;

	case HFI1_IOCTL_ACK_EVENT:
		ret = get_user(ul_uval, (unsigned long __user *)arg);
		if (ret != 0)
			return -EFAULT;
		ret = user_event_ack(uctxt, fd->subctxt, ul_uval);
		break;

	case HFI1_IOCTL_SET_PKEY:
		ret = get_user(uval16, (u16 __user *)arg);
		if (ret != 0)
			return -EFAULT;
		if (HFI1_CAP_IS_USET(PKEY_CHECK))
			ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16);
		else
			return -EPERM;
		break;

	case HFI1_IOCTL_CTXT_RESET: {
		struct send_context *sc;
		struct hfi1_devdata *dd;

		if (!uctxt || !uctxt->dd || !uctxt->sc)
			return -EINVAL;

		/*
		 * There is no protection here. User level has to
		 * guarantee that no one will be writing to the send
		 * context while it is being re-initialized.
		 * If user level breaks that guarantee, it will break
		 * it's own context and no one else's.
		 */
		dd = uctxt->dd;
		sc = uctxt->sc;
		/*
		 * Wait until the interrupt handler has marked the
		 * context as halted or frozen. Report error if we time
		 * out.
		 */
		wait_event_interruptible_timeout(
			sc->halt_wait, (sc->flags & SCF_HALTED),
			msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
		if (!(sc->flags & SCF_HALTED))
			return -ENOLCK;

		/*
		 * If the send context was halted due to a Freeze,
		 * wait until the device has been "unfrozen" before
		 * resetting the context.
		 */
		if (sc->flags & SCF_FROZEN) {
			wait_event_interruptible_timeout(
				dd->event_queue,
				!(ACCESS_ONCE(dd->flags) & HFI1_FROZEN),
				msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
			if (dd->flags & HFI1_FROZEN)
				return -ENOLCK;

			if (dd->flags & HFI1_FORCED_FREEZE)
				/*
				 * Don't allow context reset if we are into
				 * forced freeze
				 */
				return -ENODEV;

			sc_disable(sc);
			ret = sc_enable(sc);
397
			hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt);
398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
		} else {
			ret = sc_restart(sc);
		}
		if (!ret)
			sc_return_credits(sc);
		break;
	}

	case HFI1_IOCTL_GET_VERS:
		uval = HFI1_USER_SWVERSION;
		if (put_user(uval, (int __user *)arg))
			return -EFAULT;
		break;

	default:
		return -EINVAL;
	}

	return ret;
}

M
Mike Marciniszyn 已提交
419 420
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
421 422 423
	struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
	struct hfi1_user_sdma_pkt_q *pq = fd->pq;
	struct hfi1_user_sdma_comp_q *cq = fd->cq;
424
	int done = 0, reqs = 0;
M
Mike Marciniszyn 已提交
425 426
	unsigned long dim = from->nr_segs;

427 428
	if (!cq || !pq)
		return -EIO;
M
Mike Marciniszyn 已提交
429

430 431
	if (!iter_is_iovec(from) || !dim)
		return -EINVAL;
M
Mike Marciniszyn 已提交
432

433
	trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
M
Mike Marciniszyn 已提交
434

435 436
	if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
		return -ENOSPC;
M
Mike Marciniszyn 已提交
437 438

	while (dim) {
439
		int ret;
M
Mike Marciniszyn 已提交
440 441 442
		unsigned long count = 0;

		ret = hfi1_user_sdma_process_request(
443
			fd, (struct iovec *)(from->iov + done),
M
Mike Marciniszyn 已提交
444
			dim, &count);
445 446 447 448
		if (ret) {
			reqs = ret;
			break;
		}
M
Mike Marciniszyn 已提交
449 450 451 452
		dim -= count;
		done += count;
		reqs++;
	}
453 454

	return reqs;
M
Mike Marciniszyn 已提交
455 456 457 458
}

static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
{
459 460
	struct hfi1_filedata *fd = fp->private_data;
	struct hfi1_ctxtdata *uctxt = fd->uctxt;
M
Mike Marciniszyn 已提交
461
	struct hfi1_devdata *dd;
462
	unsigned long flags;
M
Mike Marciniszyn 已提交
463 464
	u64 token = vma->vm_pgoff << PAGE_SHIFT,
		memaddr = 0;
465
	void *memvirt = NULL;
M
Mike Marciniszyn 已提交
466 467 468 469 470 471 472 473 474 475 476 477 478 479
	u8 subctxt, mapio = 0, vmf = 0, type;
	ssize_t memlen = 0;
	int ret = 0;
	u16 ctxt;

	if (!is_valid_mmap(token) || !uctxt ||
	    !(vma->vm_flags & VM_SHARED)) {
		ret = -EINVAL;
		goto done;
	}
	dd = uctxt->dd;
	ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token);
	subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token);
	type = HFI1_MMAP_TOKEN_GET(TYPE, token);
480
	if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) {
M
Mike Marciniszyn 已提交
481 482 483 484 485 486 487 488 489 490 491
		ret = -EINVAL;
		goto done;
	}

	flags = vma->vm_flags;

	switch (type) {
	case PIO_BUFS:
	case PIO_BUFS_SOP:
		memaddr = ((dd->physaddr + TXE_PIO_SEND) +
				/* chip pio base */
492
			   (uctxt->sc->hw_context * BIT(16))) +
M
Mike Marciniszyn 已提交
493 494 495 496 497 498 499
				/* 64K PIO space / ctxt */
			(type == PIO_BUFS_SOP ?
				(TXE_PIO_SIZE / 2) : 0); /* sop? */
		/*
		 * Map only the amount allocated to the context, not the
		 * entire available context's PIO space.
		 */
500
		memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE);
M
Mike Marciniszyn 已提交
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
		flags &= ~VM_MAYREAD;
		flags |= VM_DONTCOPY | VM_DONTEXPAND;
		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
		mapio = 1;
		break;
	case PIO_CRED:
		if (flags & VM_WRITE) {
			ret = -EPERM;
			goto done;
		}
		/*
		 * The credit return location for this context could be on the
		 * second or third page allocated for credit returns (if number
		 * of enabled contexts > 64 and 128 respectively).
		 */
516 517
		memvirt = dd->cr_base[uctxt->numa_id].va;
		memaddr = virt_to_phys(memvirt) +
M
Mike Marciniszyn 已提交
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
			(((u64)uctxt->sc->hw_free -
			  (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
		memlen = PAGE_SIZE;
		flags &= ~VM_MAYWRITE;
		flags |= VM_DONTCOPY | VM_DONTEXPAND;
		/*
		 * The driver has already allocated memory for credit
		 * returns and programmed it into the chip. Has that
		 * memory been flagged as non-cached?
		 */
		/* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */
		mapio = 1;
		break;
	case RCV_HDRQ:
		memlen = uctxt->rcvhdrq_size;
533
		memvirt = uctxt->rcvhdrq;
M
Mike Marciniszyn 已提交
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
		break;
	case RCV_EGRBUF: {
		unsigned long addr;
		int i;
		/*
		 * The RcvEgr buffer need to be handled differently
		 * as multiple non-contiguous pages need to be mapped
		 * into the user process.
		 */
		memlen = uctxt->egrbufs.size;
		if ((vma->vm_end - vma->vm_start) != memlen) {
			dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n",
				   (vma->vm_end - vma->vm_start), memlen);
			ret = -EINVAL;
			goto done;
		}
		if (vma->vm_flags & VM_WRITE) {
			ret = -EPERM;
			goto done;
		}
		vma->vm_flags &= ~VM_MAYWRITE;
		addr = vma->vm_start;
		for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
557 558
			memlen = uctxt->egrbufs.buffers[i].len;
			memvirt = uctxt->egrbufs.buffers[i].addr;
M
Mike Marciniszyn 已提交
559 560
			ret = remap_pfn_range(
				vma, addr,
561 562 563 564 565 566 567
				/*
				 * virt_to_pfn() does the same, but
				 * it's not available on x86_64
				 * when CONFIG_MMU is enabled.
				 */
				PFN_DOWN(__pa(memvirt)),
				memlen,
M
Mike Marciniszyn 已提交
568 569 570
				vma->vm_page_prot);
			if (ret < 0)
				goto done;
571
			addr += memlen;
M
Mike Marciniszyn 已提交
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
		}
		ret = 0;
		goto done;
	}
	case UREGS:
		/*
		 * Map only the page that contains this context's user
		 * registers.
		 */
		memaddr = (unsigned long)
			(dd->physaddr + RXE_PER_CONTEXT_USER)
			+ (uctxt->ctxt * RXE_PER_CONTEXT_SIZE);
		/*
		 * TidFlow table is on the same page as the rest of the
		 * user registers.
		 */
		memlen = PAGE_SIZE;
		flags |= VM_DONTCOPY | VM_DONTEXPAND;
		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
		mapio = 1;
		break;
	case EVENTS:
		/*
		 * Use the page where this context's flags are. User level
		 * knows where it's own bitmap is within the page.
		 */
598
		memaddr = (unsigned long)(dd->events +
599 600
				  ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
				   HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
M
Mike Marciniszyn 已提交
601 602 603 604 605 606 607 608 609
		memlen = PAGE_SIZE;
		/*
		 * v3.7 removes VM_RESERVED but the effect is kept by
		 * using VM_IO.
		 */
		flags |= VM_IO | VM_DONTEXPAND;
		vmf = 1;
		break;
	case STATUS:
610 611 612 613
		if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
			ret = -EPERM;
			goto done;
		}
M
Mike Marciniszyn 已提交
614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
		memaddr = kvirt_to_phys((void *)dd->status);
		memlen = PAGE_SIZE;
		flags |= VM_IO | VM_DONTEXPAND;
		break;
	case RTAIL:
		if (!HFI1_CAP_IS_USET(DMA_RTAIL)) {
			/*
			 * If the memory allocation failed, the context alloc
			 * also would have failed, so we would never get here
			 */
			ret = -EINVAL;
			goto done;
		}
		if (flags & VM_WRITE) {
			ret = -EPERM;
			goto done;
		}
		memlen = PAGE_SIZE;
632
		memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
M
Mike Marciniszyn 已提交
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
		flags &= ~VM_MAYWRITE;
		break;
	case SUBCTXT_UREGS:
		memaddr = (u64)uctxt->subctxt_uregbase;
		memlen = PAGE_SIZE;
		flags |= VM_IO | VM_DONTEXPAND;
		vmf = 1;
		break;
	case SUBCTXT_RCV_HDRQ:
		memaddr = (u64)uctxt->subctxt_rcvhdr_base;
		memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt;
		flags |= VM_IO | VM_DONTEXPAND;
		vmf = 1;
		break;
	case SUBCTXT_EGRBUF:
		memaddr = (u64)uctxt->subctxt_rcvegrbuf;
		memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt;
		flags |= VM_IO | VM_DONTEXPAND;
		flags &= ~VM_MAYWRITE;
		vmf = 1;
		break;
	case SDMA_COMP: {
655
		struct hfi1_user_sdma_comp_q *cq = fd->cq;
M
Mike Marciniszyn 已提交
656

657
		if (!cq) {
M
Mike Marciniszyn 已提交
658 659 660 661
			ret = -EFAULT;
			goto done;
		}
		memaddr = (u64)cq->comps;
662
		memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries);
M
Mike Marciniszyn 已提交
663 664 665 666 667 668 669 670 671 672 673
		flags |= VM_IO | VM_DONTEXPAND;
		vmf = 1;
		break;
	}
	default:
		ret = -EINVAL;
		break;
	}

	if ((vma->vm_end - vma->vm_start) != memlen) {
		hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu",
674
			  uctxt->ctxt, fd->subctxt,
M
Mike Marciniszyn 已提交
675 676 677 678 679 680
			  (vma->vm_end - vma->vm_start), memlen);
		ret = -EINVAL;
		goto done;
	}

	vma->vm_flags = flags;
681 682 683
	hfi1_cdbg(PROC,
		  "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
		    ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
M
Mike Marciniszyn 已提交
684 685
		    vma->vm_end - vma->vm_start, vma->vm_flags);
	if (vmf) {
686
		vma->vm_pgoff = PFN_DOWN(memaddr);
M
Mike Marciniszyn 已提交
687 688 689
		vma->vm_ops = &vm_ops;
		ret = 0;
	} else if (mapio) {
690 691 692
		ret = io_remap_pfn_range(vma, vma->vm_start,
					 PFN_DOWN(memaddr),
					 memlen,
M
Mike Marciniszyn 已提交
693
					 vma->vm_page_prot);
694 695 696 697 698
	} else if (memvirt) {
		ret = remap_pfn_range(vma, vma->vm_start,
				      PFN_DOWN(__pa(memvirt)),
				      memlen,
				      vma->vm_page_prot);
M
Mike Marciniszyn 已提交
699
	} else {
700 701 702
		ret = remap_pfn_range(vma, vma->vm_start,
				      PFN_DOWN(memaddr),
				      memlen,
M
Mike Marciniszyn 已提交
703 704 705 706 707 708 709 710 711 712
				      vma->vm_page_prot);
	}
done:
	return ret;
}

/*
 * Local (non-chip) user memory is not mapped right away but as it is
 * accessed by the user-level code.
 */
713
static int vma_fault(struct vm_fault *vmf)
M
Mike Marciniszyn 已提交
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
{
	struct page *page;

	page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
	if (!page)
		return VM_FAULT_SIGBUS;

	get_page(page);
	vmf->page = page;

	return 0;
}

static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt)
{
	struct hfi1_ctxtdata *uctxt;
	unsigned pollflag;

732
	uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt;
M
Mike Marciniszyn 已提交
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748
	if (!uctxt)
		pollflag = POLLERR;
	else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT)
		pollflag = poll_urgent(fp, pt);
	else  if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV)
		pollflag = poll_next(fp, pt);
	else /* invalid */
		pollflag = POLLERR;

	return pollflag;
}

static int hfi1_file_close(struct inode *inode, struct file *fp)
{
	struct hfi1_filedata *fdata = fp->private_data;
	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
749 750 751
	struct hfi1_devdata *dd = container_of(inode->i_cdev,
					       struct hfi1_devdata,
					       user_cdev);
M
Mike Marciniszyn 已提交
752 753 754 755 756 757 758
	unsigned long flags, *ev;

	fp->private_data = NULL;

	if (!uctxt)
		goto done;

759
	hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt);
M
Mike Marciniszyn 已提交
760 761 762

	flush_wc();
	/* drain user sdma queue */
763
	hfi1_user_sdma_free_queues(fdata, uctxt);
M
Mike Marciniszyn 已提交
764

765
	/* release the cpu */
766
	hfi1_put_proc_affinity(fdata->rec_cpu_num);
767

768 769 770
	/* clean up rcv side */
	hfi1_user_exp_rcv_free(fdata);

771 772 773 774 775 776 777
	/*
	 * fdata->uctxt is used in the above cleanup.  It is not ready to be
	 * removed until here.
	 */
	fdata->uctxt = NULL;
	hfi1_rcd_put(uctxt);

M
Mike Marciniszyn 已提交
778 779 780 781
	/*
	 * Clear any left over, unhandled events so the next process that
	 * gets this context doesn't get confused.
	 */
782
	ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
M
Mike Marciniszyn 已提交
783 784 785
			   HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
	*ev = 0;

786
	spin_lock_irqsave(&dd->uctxt_lock, flags);
787 788
	__clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
	if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
789
		spin_unlock_irqrestore(&dd->uctxt_lock, flags);
M
Mike Marciniszyn 已提交
790 791
		goto done;
	}
792
	spin_unlock_irqrestore(&dd->uctxt_lock, flags);
M
Mike Marciniszyn 已提交
793 794 795 796 797 798 799 800

	/*
	 * Disable receive context and interrupt available, reset all
	 * RcvCtxtCtrl bits to default values.
	 */
	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
		     HFI1_RCVCTRL_TIDFLOW_DIS |
		     HFI1_RCVCTRL_INTRAVAIL_DIS |
801
		     HFI1_RCVCTRL_TAILUPD_DIS |
M
Mike Marciniszyn 已提交
802 803
		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
804
		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
M
Mike Marciniszyn 已提交
805
	/* Clear the context's J_KEY */
806
	hfi1_clear_ctxt_jkey(dd, uctxt);
M
Mike Marciniszyn 已提交
807
	/*
808 809
	 * If a send context is allocated, reset context integrity
	 * checks to default and disable the send context.
M
Mike Marciniszyn 已提交
810
	 */
811 812 813 814
	if (uctxt->sc) {
		set_pio_integrity(uctxt->sc);
		sc_disable(uctxt->sc);
	}
M
Mike Marciniszyn 已提交
815

816
	hfi1_free_ctxt_rcv_groups(uctxt);
817
	hfi1_clear_ctxt_pkey(dd, uctxt);
818

M
Mike Marciniszyn 已提交
819
	uctxt->event_flags = 0;
820 821

	deallocate_ctxt(uctxt);
M
Mike Marciniszyn 已提交
822
done:
823
	mmdrop(fdata->mm);
824
	kobject_put(&dd->kobj);
825 826 827 828

	if (atomic_dec_and_test(&dd->user_refcount))
		complete(&dd->user_comp);

M
Mike Marciniszyn 已提交
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
	kfree(fdata);
	return 0;
}

/*
 * Convert kernel *virtual* addresses to physical addresses.
 * This is used to vmalloc'ed addresses.
 */
static u64 kvirt_to_phys(void *addr)
{
	struct page *page;
	u64 paddr = 0;

	page = vmalloc_to_page(addr);
	if (page)
		paddr = page_to_pfn(page) << PAGE_SHIFT;

	return paddr;
}

849 850 851 852 853 854 855 856 857 858 859 860
/**
 * complete_subctxt
 * @fd: valid filedata pointer
 *
 * Sub-context info can only be set up after the base context
 * has been completed.  This is indicated by the clearing of the
 * HFI1_CTXT_BASE_UINIT bit.
 *
 * Wait for the bit to be cleared, and then complete the subcontext
 * initialization.
 *
 */
861 862 863
static int complete_subctxt(struct hfi1_filedata *fd)
{
	int ret;
864
	unsigned long flags;
865 866 867 868 869 870 871 872 873 874 875 876

	/*
	 * sub-context info can only be set up after the base context
	 * has been completed.
	 */
	ret = wait_event_interruptible(
		fd->uctxt->wait,
		!test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags));

	if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags))
		ret = -ENOMEM;

877
	/* Finish the sub-context init */
878 879 880 881 882 883 884 885
	if (!ret) {
		fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id);
		ret = init_user_ctxt(fd, fd->uctxt);
	}

	if (ret) {
		hfi1_rcd_put(fd->uctxt);
		fd->uctxt = NULL;
886
		spin_lock_irqsave(&fd->dd->uctxt_lock, flags);
887
		__clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
888
		spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags);
889 890 891 892 893
	}

	return ret;
}

894
static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
M
Mike Marciniszyn 已提交
895
{
896
	int ret;
897
	unsigned int swmajor, swminor;
898
	struct hfi1_ctxtdata *uctxt = NULL;
M
Mike Marciniszyn 已提交
899 900

	swmajor = uinfo->userversion >> 16;
901 902
	if (swmajor != HFI1_USER_SWMAJOR)
		return -ENODEV;
M
Mike Marciniszyn 已提交
903

904 905 906
	if (uinfo->subctxt_cnt > HFI1_MAX_SHARED_CTXTS)
		return -EINVAL;

M
Mike Marciniszyn 已提交
907 908
	swminor = uinfo->userversion & 0xffff;

909 910 911 912
	/*
	 * Acquire the mutex to protect against multiple creations of what
	 * could be a shared base context.
	 */
M
Mike Marciniszyn 已提交
913
	mutex_lock(&hfi1_mutex);
914
	/*
915
	 * Get a sub context if available  (fd->uctxt will be set).
916 917
	 * ret < 0 error, 0 no context, 1 sub-context found
	 */
918
	ret = find_sub_ctxt(fd, uinfo);
M
Mike Marciniszyn 已提交
919 920

	/*
921 922
	 * Allocate a base context if context sharing is not required or a
	 * sub context wasn't found.
M
Mike Marciniszyn 已提交
923
	 */
924
	if (!ret)
925
		ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt);
926

M
Mike Marciniszyn 已提交
927
	mutex_unlock(&hfi1_mutex);
928

929
	/* Depending on the context type, finish the appropriate init */
930 931
	switch (ret) {
	case 0:
932 933
		ret = setup_base_ctxt(fd, uctxt);
		if (uctxt->subctxt_cnt) {
934
			/*
935 936 937
			 * Base context is done (successfully or not), notify
			 * anybody using a sub-context that is waiting for
			 * this completion.
938
			 */
939 940
			clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
			wake_up(&uctxt->wait);
941
		}
942 943 944 945 946 947
		break;
	case 1:
		ret = complete_subctxt(fd);
		break;
	default:
		break;
948 949
	}

M
Mike Marciniszyn 已提交
950 951 952
	return ret;
}

953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
/**
 * match_ctxt
 * @fd: valid filedata pointer
 * @uinfo: user info to compare base context with
 * @uctxt: context to compare uinfo to.
 *
 * Compare the given context with the given information to see if it
 * can be used for a sub context.
 */
static int match_ctxt(struct hfi1_filedata *fd,
		      const struct hfi1_user_info *uinfo,
		      struct hfi1_ctxtdata *uctxt)
{
	struct hfi1_devdata *dd = fd->dd;
	unsigned long flags;
	u16 subctxt;

	/* Skip dynamically allocated kernel contexts */
	if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
		return 0;

	/* Skip ctxt if it doesn't match the requested one */
	if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) ||
	    uctxt->jkey != generate_jkey(current_uid()) ||
	    uctxt->subctxt_id != uinfo->subctxt_id ||
	    uctxt->subctxt_cnt != uinfo->subctxt_cnt)
		return 0;

	/* Verify the sharing process matches the base */
	if (uctxt->userversion != uinfo->userversion)
		return -EINVAL;

	/* Find an unused sub context */
	spin_lock_irqsave(&dd->uctxt_lock, flags);
	if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
		/* context is being closed, do not use */
		spin_unlock_irqrestore(&dd->uctxt_lock, flags);
		return 0;
	}

	subctxt = find_first_zero_bit(uctxt->in_use_ctxts,
				      HFI1_MAX_SHARED_CTXTS);
	if (subctxt >= uctxt->subctxt_cnt) {
		spin_unlock_irqrestore(&dd->uctxt_lock, flags);
		return -EBUSY;
	}

	fd->subctxt = subctxt;
	__set_bit(fd->subctxt, uctxt->in_use_ctxts);
	spin_unlock_irqrestore(&dd->uctxt_lock, flags);

	fd->uctxt = uctxt;
	hfi1_rcd_get(uctxt);

	return 1;
}

/**
 * find_sub_ctxt
 * @fd: valid filedata pointer
 * @uinfo: matching info to use to find a possible context to share.
 *
1015
 * The hfi1_mutex must be held when this function is called.  It is
1016
 * necessary to ensure serialized creation of shared contexts.
1017 1018 1019 1020 1021 1022
 *
 * Return:
 *    0      No sub-context found
 *    1      Subcontext found and allocated
 *    errno  EINVAL (incorrect parameters)
 *           EBUSY (all sub contexts in use)
1023
 */
1024 1025
static int find_sub_ctxt(struct hfi1_filedata *fd,
			 const struct hfi1_user_info *uinfo)
M
Mike Marciniszyn 已提交
1026
{
1027
	struct hfi1_ctxtdata *uctxt;
1028
	struct hfi1_devdata *dd = fd->dd;
1029 1030
	u16 i;
	int ret;
M
Mike Marciniszyn 已提交
1031

1032 1033 1034
	if (!uinfo->subctxt_cnt)
		return 0;

1035
	for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) {
1036 1037 1038 1039 1040 1041 1042 1043
		uctxt = hfi1_rcd_get_by_index(dd, i);
		if (uctxt) {
			ret = match_ctxt(fd, uinfo, uctxt);
			hfi1_rcd_put(uctxt);
			/* value of != 0 will return */
			if (ret)
				return ret;
		}
M
Mike Marciniszyn 已提交
1044 1045
	}

1046
	return 0;
M
Mike Marciniszyn 已提交
1047 1048
}

1049
static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
1050
			 struct hfi1_user_info *uinfo,
1051
			 struct hfi1_ctxtdata **rcd)
M
Mike Marciniszyn 已提交
1052 1053
{
	struct hfi1_ctxtdata *uctxt;
1054
	int ret, numa;
M
Mike Marciniszyn 已提交
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066

	if (dd->flags & HFI1_FROZEN) {
		/*
		 * Pick an error that is unique from all other errors
		 * that are returned so the user process knows that
		 * it tried to allocate while the SPC was frozen.  It
		 * it should be able to retry with success in a short
		 * while.
		 */
		return -EIO;
	}

1067 1068 1069
	if (!dd->freectxts)
		return -EBUSY;

1070 1071 1072 1073 1074
	/*
	 * If we don't have a NUMA node requested, preference is towards
	 * device NUMA node.
	 */
	fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node);
1075 1076 1077 1078
	if (fd->rec_cpu_num != -1)
		numa = cpu_to_node(fd->rec_cpu_num);
	else
		numa = numa_node_id();
1079 1080 1081 1082
	ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt);
	if (ret < 0) {
		dd_dev_err(dd, "user ctxtdata allocation failed\n");
		return ret;
M
Mike Marciniszyn 已提交
1083
	}
1084 1085 1086 1087
	hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
		  uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
		  uctxt->numa_id);

M
Mike Marciniszyn 已提交
1088 1089 1090
	/*
	 * Allocate and enable a PIO send context.
	 */
1091
	uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node);
1092 1093 1094 1095
	if (!uctxt->sc) {
		ret = -ENOMEM;
		goto ctxdata_free;
	}
1096 1097
	hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
		  uctxt->sc->hw_context);
M
Mike Marciniszyn 已提交
1098 1099
	ret = sc_enable(uctxt->sc);
	if (ret)
1100 1101
		goto ctxdata_free;

M
Mike Marciniszyn 已提交
1102
	/*
1103
	 * Setup sub context information if the user-level has requested
1104
	 * sub contexts.
M
Mike Marciniszyn 已提交
1105
	 * This has to be done here so the rest of the sub-contexts find the
1106
	 * proper base context.
M
Mike Marciniszyn 已提交
1107
	 */
1108 1109
	if (uinfo->subctxt_cnt)
		init_subctxts(uctxt, uinfo);
M
Mike Marciniszyn 已提交
1110
	uctxt->userversion = uinfo->userversion;
1111
	uctxt->flags = hfi1_cap_mask; /* save current flag state */
M
Mike Marciniszyn 已提交
1112 1113 1114 1115 1116
	init_waitqueue_head(&uctxt->wait);
	strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
	memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
	uctxt->jkey = generate_jkey(current_uid());
	hfi1_stats.sps_ctxts++;
1117 1118 1119 1120 1121 1122
	/*
	 * Disable ASPM when there are open user/PSM contexts to avoid
	 * issues with ASPM L1 exit latency
	 */
	if (dd->freectxts-- == dd->num_user_contexts)
		aspm_disable_all(dd);
M
Mike Marciniszyn 已提交
1123

1124
	*rcd = uctxt;
1125

M
Mike Marciniszyn 已提交
1126
	return 0;
1127 1128

ctxdata_free:
1129
	hfi1_free_ctxt(uctxt);
1130
	return ret;
M
Mike Marciniszyn 已提交
1131 1132
}

1133 1134 1135 1136 1137 1138 1139
static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt)
{
	mutex_lock(&hfi1_mutex);
	hfi1_stats.sps_ctxts--;
	if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts)
		aspm_enable_all(uctxt->dd);
	mutex_unlock(&hfi1_mutex);
1140

1141
	hfi1_free_ctxt(uctxt);
1142 1143
}

1144 1145
static void init_subctxts(struct hfi1_ctxtdata *uctxt,
			  const struct hfi1_user_info *uinfo)
M
Mike Marciniszyn 已提交
1146 1147 1148
{
	uctxt->subctxt_cnt = uinfo->subctxt_cnt;
	uctxt->subctxt_id = uinfo->subctxt_id;
1149
	set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
M
Mike Marciniszyn 已提交
1150 1151 1152 1153 1154
}

static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
{
	int ret = 0;
1155
	u16 num_subctxts = uctxt->subctxt_cnt;
M
Mike Marciniszyn 已提交
1156 1157

	uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE);
1158 1159 1160
	if (!uctxt->subctxt_uregbase)
		return -ENOMEM;

M
Mike Marciniszyn 已提交
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
	/* We can take the size of the RcvHdr Queue from the master */
	uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
						  num_subctxts);
	if (!uctxt->subctxt_rcvhdr_base) {
		ret = -ENOMEM;
		goto bail_ureg;
	}

	uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size *
						num_subctxts);
	if (!uctxt->subctxt_rcvegrbuf) {
		ret = -ENOMEM;
		goto bail_rhdr;
	}
1175 1176 1177

	return 0;

M
Mike Marciniszyn 已提交
1178 1179
bail_rhdr:
	vfree(uctxt->subctxt_rcvhdr_base);
1180
	uctxt->subctxt_rcvhdr_base = NULL;
M
Mike Marciniszyn 已提交
1181 1182 1183
bail_ureg:
	vfree(uctxt->subctxt_uregbase);
	uctxt->subctxt_uregbase = NULL;
1184

M
Mike Marciniszyn 已提交
1185 1186 1187
	return ret;
}

1188
static void user_init(struct hfi1_ctxtdata *uctxt)
M
Mike Marciniszyn 已提交
1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210
{
	unsigned int rcvctrl_ops = 0;

	/* initialize poll variables... */
	uctxt->urgent = 0;
	uctxt->urgent_poll = 0;

	/*
	 * Now enable the ctxt for receive.
	 * For chips that are set to DMA the tail register to memory
	 * when they change (and when the update bit transitions from
	 * 0 to 1.  So for those chips, we turn it off and then back on.
	 * This will (very briefly) affect any other open ctxts, but the
	 * duration is very short, and therefore isn't an issue.  We
	 * explicitly set the in-memory tail copy to 0 beforehand, so we
	 * don't have to wait to be sure the DMA update has happened
	 * (chip resets head/tail to 0 on transition to enable).
	 */
	if (uctxt->rcvhdrtail_kvaddr)
		clear_rcvhdrtail(uctxt);

	/* Setup J_KEY before enabling the context */
1211
	hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
M
Mike Marciniszyn 已提交
1212 1213

	rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
1214
	if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
M
Mike Marciniszyn 已提交
1215 1216 1217 1218 1219 1220
		rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
	/*
	 * Ignore the bit in the flags for now until proper
	 * support for multiple packet per rcv array entry is
	 * added.
	 */
1221
	if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR))
M
Mike Marciniszyn 已提交
1222
		rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
1223
	if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL))
M
Mike Marciniszyn 已提交
1224
		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
1225
	if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
M
Mike Marciniszyn 已提交
1226
		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
1227 1228 1229 1230 1231 1232
	/*
	 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
	 * We can't rely on the correct value to be set from prior
	 * uses of the chip or ctxt. Therefore, add the rcvctrl op
	 * for both cases.
	 */
1233
	if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL))
M
Mike Marciniszyn 已提交
1234
		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
1235 1236
	else
		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
1237
	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
M
Mike Marciniszyn 已提交
1238 1239
}

1240 1241
static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
			 __u32 len)
M
Mike Marciniszyn 已提交
1242 1243
{
	struct hfi1_ctxt_info cinfo;
1244
	struct hfi1_ctxtdata *uctxt = fd->uctxt;
M
Mike Marciniszyn 已提交
1245 1246
	int ret = 0;

1247
	memset(&cinfo, 0, sizeof(cinfo));
1248 1249 1250 1251
	cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) &
				HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) |
			HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
			HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
D
Dean Luick 已提交
1252 1253 1254 1255
	/* adjust flag if this fd is not able to cache */
	if (!fd->handler)
		cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */

M
Mike Marciniszyn 已提交
1256 1257 1258
	cinfo.num_active = hfi1_count_active_units();
	cinfo.unit = uctxt->dd->unit;
	cinfo.ctxt = uctxt->ctxt;
1259
	cinfo.subctxt = fd->subctxt;
M
Mike Marciniszyn 已提交
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270
	cinfo.rcvtids = roundup(uctxt->egrbufs.alloced,
				uctxt->dd->rcv_entries.group_size) +
		uctxt->expected_count;
	cinfo.credits = uctxt->sc->credits;
	cinfo.numa_node = uctxt->numa_id;
	cinfo.rec_cpu = fd->rec_cpu_num;
	cinfo.send_ctxt = uctxt->sc->hw_context;

	cinfo.egrtids = uctxt->egrbufs.alloced;
	cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
	cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
1271
	cinfo.sdma_ring_size = fd->cq->nentries;
M
Mike Marciniszyn 已提交
1272 1273
	cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;

1274
	trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
M
Mike Marciniszyn 已提交
1275 1276
	if (copy_to_user(ubase, &cinfo, sizeof(cinfo)))
		ret = -EFAULT;
1277

M
Mike Marciniszyn 已提交
1278 1279 1280
	return ret;
}

1281 1282
static int init_user_ctxt(struct hfi1_filedata *fd,
			  struct hfi1_ctxtdata *uctxt)
1283 1284 1285 1286 1287 1288 1289
{
	int ret;

	ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
	if (ret)
		return ret;

1290 1291 1292
	ret = hfi1_user_exp_rcv_init(fd, uctxt);
	if (ret)
		hfi1_user_sdma_free_queues(fd, uctxt);
1293 1294 1295 1296

	return ret;
}

1297 1298
static int setup_base_ctxt(struct hfi1_filedata *fd,
			   struct hfi1_ctxtdata *uctxt)
M
Mike Marciniszyn 已提交
1299 1300 1301 1302
{
	struct hfi1_devdata *dd = uctxt->dd;
	int ret = 0;

1303
	hfi1_init_ctxt(uctxt->sc);
M
Mike Marciniszyn 已提交
1304

1305 1306 1307
	/* Now allocate the RcvHdr queue and eager buffers. */
	ret = hfi1_create_rcvhdrq(dd, uctxt);
	if (ret)
1308
		return ret;
1309

1310
	ret = hfi1_setup_eagerbufs(uctxt);
1311
	if (ret)
1312
		goto setup_failed;
1313 1314 1315 1316 1317

	/* If sub-contexts are enabled, do the appropriate setup */
	if (uctxt->subctxt_cnt)
		ret = setup_subctxt(uctxt);
	if (ret)
1318
		goto setup_failed;
1319

1320
	ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
1321
	if (ret)
1322
		goto setup_failed;
1323

1324
	ret = init_user_ctxt(fd, uctxt);
M
Mike Marciniszyn 已提交
1325
	if (ret)
1326
		goto setup_failed;
M
Mike Marciniszyn 已提交
1327

1328 1329
	user_init(uctxt);

1330 1331 1332 1333
	/* Now that the context is set up, the fd can get a reference. */
	fd->uctxt = uctxt;
	hfi1_rcd_get(uctxt);

1334 1335 1336
	return 0;

setup_failed:
1337
	/* Set the failed bit so sub-context init can do the right thing */
1338 1339
	set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
	deallocate_ctxt(uctxt);
1340

M
Mike Marciniszyn 已提交
1341 1342 1343
	return ret;
}

1344 1345
static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
			 __u32 len)
M
Mike Marciniszyn 已提交
1346 1347
{
	struct hfi1_base_info binfo;
1348
	struct hfi1_ctxtdata *uctxt = fd->uctxt;
M
Mike Marciniszyn 已提交
1349 1350 1351 1352 1353
	struct hfi1_devdata *dd = uctxt->dd;
	ssize_t sz;
	unsigned offset;
	int ret = 0;

1354
	trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt);
M
Mike Marciniszyn 已提交
1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369

	memset(&binfo, 0, sizeof(binfo));
	binfo.hw_version = dd->revision;
	binfo.sw_version = HFI1_KERN_SWVERSION;
	binfo.bthqp = kdeth_qp;
	binfo.jkey = uctxt->jkey;
	/*
	 * If more than 64 contexts are enabled the allocated credit
	 * return will span two or three contiguous pages. Since we only
	 * map the page containing the context's credit return address,
	 * we need to calculate the offset in the proper page.
	 */
	offset = ((u64)uctxt->sc->hw_free -
		  (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE;
	binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt,
1370
						fd->subctxt, offset);
M
Mike Marciniszyn 已提交
1371
	binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt,
1372
					    fd->subctxt,
M
Mike Marciniszyn 已提交
1373 1374 1375
					    uctxt->sc->base_addr);
	binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP,
						uctxt->ctxt,
1376
						fd->subctxt,
M
Mike Marciniszyn 已提交
1377 1378
						uctxt->sc->base_addr);
	binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt,
1379
					       fd->subctxt,
M
Mike Marciniszyn 已提交
1380 1381
					       uctxt->rcvhdrq);
	binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt,
1382
					       fd->subctxt,
1383
					       uctxt->egrbufs.rcvtids[0].dma);
M
Mike Marciniszyn 已提交
1384
	binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
1385
						 fd->subctxt, 0);
M
Mike Marciniszyn 已提交
1386 1387 1388 1389 1390
	/*
	 * user regs are at
	 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE))
	 */
	binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
1391
					    fd->subctxt, 0);
1392
	offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
1393
		    HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
G
Geliang Tang 已提交
1394
		  sizeof(*dd->events));
M
Mike Marciniszyn 已提交
1395
	binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
1396
					      fd->subctxt,
M
Mike Marciniszyn 已提交
1397 1398
					      offset);
	binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt,
1399
					      fd->subctxt,
M
Mike Marciniszyn 已提交
1400 1401 1402
					      dd->status);
	if (HFI1_CAP_IS_USET(DMA_RTAIL))
		binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt,
1403
						       fd->subctxt, 0);
M
Mike Marciniszyn 已提交
1404 1405 1406
	if (uctxt->subctxt_cnt) {
		binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS,
							uctxt->ctxt,
1407
							fd->subctxt, 0);
M
Mike Marciniszyn 已提交
1408 1409
		binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ,
							 uctxt->ctxt,
1410
							 fd->subctxt, 0);
M
Mike Marciniszyn 已提交
1411 1412
		binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF,
							 uctxt->ctxt,
1413
							 fd->subctxt, 0);
M
Mike Marciniszyn 已提交
1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
	}
	sz = (len < sizeof(binfo)) ? len : sizeof(binfo);
	if (copy_to_user(ubase, &binfo, sz))
		ret = -EFAULT;
	return ret;
}

static unsigned int poll_urgent(struct file *fp,
				struct poll_table_struct *pt)
{
1424 1425
	struct hfi1_filedata *fd = fp->private_data;
	struct hfi1_ctxtdata *uctxt = fd->uctxt;
M
Mike Marciniszyn 已提交
1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446
	struct hfi1_devdata *dd = uctxt->dd;
	unsigned pollflag;

	poll_wait(fp, &uctxt->wait, pt);

	spin_lock_irq(&dd->uctxt_lock);
	if (uctxt->urgent != uctxt->urgent_poll) {
		pollflag = POLLIN | POLLRDNORM;
		uctxt->urgent_poll = uctxt->urgent;
	} else {
		pollflag = 0;
		set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags);
	}
	spin_unlock_irq(&dd->uctxt_lock);

	return pollflag;
}

static unsigned int poll_next(struct file *fp,
			      struct poll_table_struct *pt)
{
1447 1448
	struct hfi1_filedata *fd = fp->private_data;
	struct hfi1_ctxtdata *uctxt = fd->uctxt;
M
Mike Marciniszyn 已提交
1449 1450 1451 1452 1453 1454 1455 1456
	struct hfi1_devdata *dd = uctxt->dd;
	unsigned pollflag;

	poll_wait(fp, &uctxt->wait, pt);

	spin_lock_irq(&dd->uctxt_lock);
	if (hdrqempty(uctxt)) {
		set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
1457
		hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt);
M
Mike Marciniszyn 已提交
1458
		pollflag = 0;
1459
	} else {
M
Mike Marciniszyn 已提交
1460
		pollflag = POLLIN | POLLRDNORM;
1461
	}
M
Mike Marciniszyn 已提交
1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475
	spin_unlock_irq(&dd->uctxt_lock);

	return pollflag;
}

/*
 * Find all user contexts in use, and set the specified bit in their
 * event mask.
 * See also find_ctxt() for a similar use, that is specific to send buffers.
 */
int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
{
	struct hfi1_ctxtdata *uctxt;
	struct hfi1_devdata *dd = ppd->dd;
1476
	u16 ctxt;
M
Mike Marciniszyn 已提交
1477

1478 1479
	if (!dd->events)
		return -EINVAL;
M
Mike Marciniszyn 已提交
1480

1481
	for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts;
M
Mike Marciniszyn 已提交
1482
	     ctxt++) {
1483
		uctxt = hfi1_rcd_get_by_index(dd, ctxt);
M
Mike Marciniszyn 已提交
1484 1485
		if (uctxt) {
			unsigned long *evs = dd->events +
1486
				(uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
M
Mike Marciniszyn 已提交
1487 1488 1489 1490 1491 1492 1493 1494 1495
				HFI1_MAX_SHARED_CTXTS;
			int i;
			/*
			 * subctxt_cnt is 0 if not shared, so do base
			 * separately, first, then remaining subctxt, if any
			 */
			set_bit(evtbit, evs);
			for (i = 1; i < uctxt->subctxt_cnt; i++)
				set_bit(evtbit, evs + i);
1496
			hfi1_rcd_put(uctxt);
M
Mike Marciniszyn 已提交
1497 1498
		}
	}
1499 1500

	return 0;
M
Mike Marciniszyn 已提交
1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512
}

/**
 * manage_rcvq - manage a context's receive queue
 * @uctxt: the context
 * @subctxt: the sub-context
 * @start_stop: action to carry out
 *
 * start_stop == 0 disables receive on the context, for use in queue
 * overflow conditions.  start_stop==1 re-enables, to be used to
 * re-init the software copy of the head register
 */
1513
static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
M
Mike Marciniszyn 已提交
1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533
		       int start_stop)
{
	struct hfi1_devdata *dd = uctxt->dd;
	unsigned int rcvctrl_op;

	if (subctxt)
		goto bail;
	/* atomically clear receive enable ctxt. */
	if (start_stop) {
		/*
		 * On enable, force in-memory copy of the tail register to
		 * 0, so that protocol code doesn't have to worry about
		 * whether or not the chip has yet updated the in-memory
		 * copy or not on return from the system call. The chip
		 * always resets it's tail register back to 0 on a
		 * transition from disabled to enabled.
		 */
		if (uctxt->rcvhdrtail_kvaddr)
			clear_rcvhdrtail(uctxt);
		rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
1534
	} else {
M
Mike Marciniszyn 已提交
1535
		rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
1536
	}
1537
	hfi1_rcvctrl(dd, rcvctrl_op, uctxt);
M
Mike Marciniszyn 已提交
1538 1539 1540 1541 1542 1543 1544 1545 1546 1547
	/* always; new head should be equal to new tail; see above */
bail:
	return 0;
}

/*
 * clear the event notifier events for this context.
 * User process then performs actions appropriate to bit having been
 * set, if desired, and checks again in future.
 */
1548
static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
M
Mike Marciniszyn 已提交
1549 1550 1551 1552 1553 1554 1555 1556 1557
			  unsigned long events)
{
	int i;
	struct hfi1_devdata *dd = uctxt->dd;
	unsigned long *evs;

	if (!dd->events)
		return 0;

1558
	evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
M
Mike Marciniszyn 已提交
1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
			    HFI1_MAX_SHARED_CTXTS) + subctxt;

	for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
		if (!test_bit(i, &events))
			continue;
		clear_bit(i, evs);
	}
	return 0;
}

1569
static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey)
M
Mike Marciniszyn 已提交
1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
{
	int ret = -ENOENT, i, intable = 0;
	struct hfi1_pportdata *ppd = uctxt->ppd;
	struct hfi1_devdata *dd = uctxt->dd;

	if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) {
		ret = -EINVAL;
		goto done;
	}

	for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++)
		if (pkey == ppd->pkeys[i]) {
			intable = 1;
			break;
		}

	if (intable)
1587
		ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey);
M
Mike Marciniszyn 已提交
1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603
done:
	return ret;
}

static void user_remove(struct hfi1_devdata *dd)
{

	hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device);
}

static int user_add(struct hfi1_devdata *dd)
{
	char name[10];
	int ret;

	snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
1604
	ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
1605
			     &dd->user_cdev, &dd->user_device,
1606
			     true, &dd->kobj);
M
Mike Marciniszyn 已提交
1607
	if (ret)
1608
		user_remove(dd);
M
Mike Marciniszyn 已提交
1609 1610 1611 1612 1613 1614 1615 1616 1617

	return ret;
}

/*
 * Create per-unit files in /dev
 */
int hfi1_device_create(struct hfi1_devdata *dd)
{
1618
	return user_add(dd);
M
Mike Marciniszyn 已提交
1619 1620 1621 1622 1623 1624 1625 1626 1627 1628
}

/*
 * Remove per-unit files in /dev
 * void, core kernel returns no errors for this stuff
 */
void hfi1_device_remove(struct hfi1_devdata *dd)
{
	user_remove(dd);
}