ntb_perf.c 19.2 KB
Newer Older
D
Dave Jiang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
/*
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 *   redistributing this file, you may do so under either license.
 *
 *   GPL LICENSE SUMMARY
 *
 *   Copyright(c) 2015 Intel Corporation. All rights reserved.
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of version 2 of the GNU General Public License as
 *   published by the Free Software Foundation.
 *
 *   BSD LICENSE
 *
 *   Copyright(c) 2015 Intel Corporation. All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copy
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *   PCIe NTB Perf Linux driver
 */

#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/time.h>
#include <linux/timer.h>
#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
#include <linux/sizes.h>
#include <linux/ntb.h>
61
#include <linux/mutex.h>
D
Dave Jiang 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74

#define DRIVER_NAME		"ntb_perf"
#define DRIVER_DESCRIPTION	"PCIe NTB Performance Measurement Tool"

#define DRIVER_LICENSE		"Dual BSD/GPL"
#define DRIVER_VERSION		"1.0"
#define DRIVER_AUTHOR		"Dave Jiang <dave.jiang@intel.com>"

#define PERF_LINK_DOWN_TIMEOUT	10
#define PERF_VERSION		0xffff0001
#define MAX_THREADS		32
#define MAX_TEST_SIZE		SZ_1M
#define MAX_SRCS		32
75
#define DMA_OUT_RESOURCE_TO	msecs_to_jiffies(50)
D
Dave Jiang 已提交
76 77 78 79 80 81 82 83 84 85 86
#define DMA_RETRIES		20
#define SZ_4G			(1ULL << 32)
#define MAX_SEG_ORDER		20 /* no larger than 1M for kmalloc buffer */

MODULE_LICENSE(DRIVER_LICENSE);
MODULE_VERSION(DRIVER_VERSION);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESCRIPTION);

static struct dentry *perf_debugfs_dir;

87 88 89 90
static unsigned long max_mw_size;
module_param(max_mw_size, ulong, 0644);
MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");

D
Dave Jiang 已提交
91 92
static unsigned int seg_order = 19; /* 512K */
module_param(seg_order, uint, 0644);
93
MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");
D
Dave Jiang 已提交
94 95 96

static unsigned int run_order = 32; /* 4G */
module_param(run_order, uint, 0644);
97
MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");
D
Dave Jiang 已提交
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124

static bool use_dma; /* default to 0 */
module_param(use_dma, bool, 0644);
MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");

struct perf_mw {
	phys_addr_t	phys_addr;
	resource_size_t	phys_size;
	resource_size_t	xlat_align;
	resource_size_t	xlat_align_size;
	void __iomem	*vbase;
	size_t		xlat_size;
	size_t		buf_size;
	void		*virt_addr;
	dma_addr_t	dma_addr;
};

struct perf_ctx;

struct pthr_ctx {
	struct task_struct	*thread;
	struct perf_ctx		*perf;
	atomic_t		dma_sync;
	struct dma_chan		*dma_chan;
	int			dma_prep_err;
	int			src_idx;
	void			*srcs[MAX_SRCS];
125
	wait_queue_head_t       *wq;
126 127 128
	int			status;
	u64			copied;
	u64			diff_us;
D
Dave Jiang 已提交
129 130 131 132 133 134 135 136
};

struct perf_ctx {
	struct ntb_dev		*ntb;
	spinlock_t		db_lock;
	struct perf_mw		mw;
	bool			link_is_up;
	struct delayed_work	link_work;
137
	wait_queue_head_t	link_wq;
D
Dave Jiang 已提交
138 139 140 141
	struct dentry		*debugfs_node_dir;
	struct dentry		*debugfs_run;
	struct dentry		*debugfs_threads;
	u8			perf_threads;
142 143
	/* mutex ensures only one set of threads run at once */
	struct mutex		run_mutex;
D
Dave Jiang 已提交
144 145
	struct pthr_ctx		pthr_ctx[MAX_THREADS];
	atomic_t		tsync;
146
	atomic_t                tdone;
D
Dave Jiang 已提交
147 148 149 150 151 152 153 154 155 156 157 158 159
};

enum {
	VERSION = 0,
	MW_SZ_HIGH,
	MW_SZ_LOW,
	MAX_SPAD
};

static void perf_link_event(void *ctx)
{
	struct perf_ctx *perf = ctx;

160
	if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) {
D
Dave Jiang 已提交
161
		schedule_delayed_work(&perf->link_work, 2*HZ);
162 163 164 165 166 167 168 169
	} else {
		dev_dbg(&perf->ntb->pdev->dev, "link down\n");

		if (!perf->link_is_up)
			cancel_delayed_work_sync(&perf->link_work);

		perf->link_is_up = false;
	}
D
Dave Jiang 已提交
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
}

static void perf_db_event(void *ctx, int vec)
{
	struct perf_ctx *perf = ctx;
	u64 db_bits, db_mask;

	db_mask = ntb_db_vector_mask(perf->ntb, vec);
	db_bits = ntb_db_read(perf->ntb);

	dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
		vec, db_mask, db_bits);
}

static const struct ntb_ctx_ops perf_ops = {
	.link_event = perf_link_event,
	.db_event = perf_db_event,
};

static void perf_copy_callback(void *data)
{
	struct pthr_ctx *pctx = data;

	atomic_dec(&pctx->dma_sync);
}

196
static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst,
D
Dave Jiang 已提交
197 198 199 200 201 202 203 204 205 206
			 char *src, size_t size)
{
	struct perf_ctx *perf = pctx->perf;
	struct dma_async_tx_descriptor *txd;
	struct dma_chan *chan = pctx->dma_chan;
	struct dma_device *device;
	struct dmaengine_unmap_data *unmap;
	dma_cookie_t cookie;
	size_t src_off, dst_off;
	struct perf_mw *mw = &perf->mw;
207 208
	void __iomem *vbase;
	void __iomem *dst_vaddr;
D
Dave Jiang 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221 222
	dma_addr_t dst_phys;
	int retries = 0;

	if (!use_dma) {
		memcpy_toio(dst, src, size);
		return size;
	}

	if (!chan) {
		dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
		return -EINVAL;
	}

	device = chan->device;
223 224
	src_off = (uintptr_t)src & ~PAGE_MASK;
	dst_off = (uintptr_t __force)dst & ~PAGE_MASK;
D
Dave Jiang 已提交
225 226 227 228

	if (!is_dma_copy_aligned(device, src_off, dst_off, size))
		return -ENODEV;

229 230
	vbase = mw->vbase;
	dst_vaddr = dst;
D
Dave Jiang 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
	dst_phys = mw->phys_addr + (dst_vaddr - vbase);

	unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
	if (!unmap)
		return -ENOMEM;

	unmap->len = size;
	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
				      src_off, size, DMA_TO_DEVICE);
	if (dma_mapping_error(device->dev, unmap->addr[0]))
		goto err_get_unmap;

	unmap->to_cnt = 1;

	do {
		txd = device->device_prep_dma_memcpy(chan, dst_phys,
						     unmap->addr[0],
						     size, DMA_PREP_INTERRUPT);
		if (!txd) {
			set_current_state(TASK_INTERRUPTIBLE);
			schedule_timeout(DMA_OUT_RESOURCE_TO);
		}
	} while (!txd && (++retries < DMA_RETRIES));

	if (!txd) {
		pctx->dma_prep_err++;
		goto err_get_unmap;
	}

	txd->callback = perf_copy_callback;
	txd->callback_param = pctx;
	dma_set_unmap(txd, unmap);

	cookie = dmaengine_submit(txd);
	if (dma_submit_error(cookie))
		goto err_set_unmap;

268 269
	dmaengine_unmap_put(unmap);

D
Dave Jiang 已提交
270 271 272 273 274 275 276 277 278 279 280 281
	atomic_inc(&pctx->dma_sync);
	dma_async_issue_pending(chan);

	return size;

err_set_unmap:
	dmaengine_unmap_put(unmap);
err_get_unmap:
	dmaengine_unmap_put(unmap);
	return 0;
}

282
static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
D
Dave Jiang 已提交
283 284 285 286 287
			  u64 buf_size, u64 win_size, u64 total)
{
	int chunks, total_chunks, i;
	int copied_chunks = 0;
	u64 copied = 0, result;
288
	char __iomem *tmp = dst;
D
Dave Jiang 已提交
289 290
	u64 perf, diff_us;
	ktime_t kstart, kstop, kdiff;
291
	unsigned long last_sleep = jiffies;
D
Dave Jiang 已提交
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306

	chunks = div64_u64(win_size, buf_size);
	total_chunks = div64_u64(total, buf_size);
	kstart = ktime_get();

	for (i = 0; i < total_chunks; i++) {
		result = perf_copy(pctx, tmp, src, buf_size);
		copied += result;
		copied_chunks++;
		if (copied_chunks == chunks) {
			tmp = dst;
			copied_chunks = 0;
		} else
			tmp += buf_size;

307 308 309
		/* Probably should schedule every 5s to prevent soft hang. */
		if (unlikely((jiffies - last_sleep) > 5 * HZ)) {
			last_sleep = jiffies;
D
Dave Jiang 已提交
310 311 312
			set_current_state(TASK_INTERRUPTIBLE);
			schedule_timeout(1);
		}
313 314 315

		if (unlikely(kthread_should_stop()))
			break;
D
Dave Jiang 已提交
316 317 318
	}

	if (use_dma) {
319
		pr_debug("%s: All DMA descriptors submitted\n", current->comm);
320 321 322
		while (atomic_read(&pctx->dma_sync) != 0) {
			if (kthread_should_stop())
				break;
D
Dave Jiang 已提交
323
			msleep(20);
324
		}
D
Dave Jiang 已提交
325 326 327 328 329 330
	}

	kstop = ktime_get();
	kdiff = ktime_sub(kstop, kstart);
	diff_us = ktime_to_us(kdiff);

331
	pr_debug("%s: copied %llu bytes\n", current->comm, copied);
D
Dave Jiang 已提交
332

333
	pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us);
D
Dave Jiang 已提交
334 335 336

	perf = div64_u64(copied, diff_us);

337 338 339 340
	pr_debug("%s: MBytes/s: %llu\n", current->comm, perf);

	pctx->copied = copied;
	pctx->diff_us = diff_us;
D
Dave Jiang 已提交
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355

	return 0;
}

static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
{
	return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
}

static int ntb_perf_thread(void *data)
{
	struct pthr_ctx *pctx = data;
	struct perf_ctx *perf = pctx->perf;
	struct pci_dev *pdev = perf->ntb->pdev;
	struct perf_mw *mw = &perf->mw;
356
	char __iomem *dst;
D
Dave Jiang 已提交
357 358 359 360 361
	u64 win_size, buf_size, total;
	void *src;
	int rc, node, i;
	struct dma_chan *dma_chan = NULL;

362
	pr_debug("kthread %s starting...\n", current->comm);
D
Dave Jiang 已提交
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395

	node = dev_to_node(&pdev->dev);

	if (use_dma && !pctx->dma_chan) {
		dma_cap_mask_t dma_mask;

		dma_cap_zero(dma_mask);
		dma_cap_set(DMA_MEMCPY, dma_mask);
		dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
					       (void *)(unsigned long)node);
		if (!dma_chan) {
			pr_warn("%s: cannot acquire DMA channel, quitting\n",
				current->comm);
			return -ENODEV;
		}
		pctx->dma_chan = dma_chan;
	}

	for (i = 0; i < MAX_SRCS; i++) {
		pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
		if (!pctx->srcs[i]) {
			rc = -ENOMEM;
			goto err;
		}
	}

	win_size = mw->phys_size;
	buf_size = 1ULL << seg_order;
	total = 1ULL << run_order;

	if (buf_size > MAX_TEST_SIZE)
		buf_size = MAX_TEST_SIZE;

396
	dst = (char __iomem *)mw->vbase;
D
Dave Jiang 已提交
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419

	atomic_inc(&perf->tsync);
	while (atomic_read(&perf->tsync) != perf->perf_threads)
		schedule();

	src = pctx->srcs[pctx->src_idx];
	pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);

	rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);

	atomic_dec(&perf->tsync);

	if (rc < 0) {
		pr_err("%s: failed\n", current->comm);
		rc = -ENXIO;
		goto err;
	}

	for (i = 0; i < MAX_SRCS; i++) {
		kfree(pctx->srcs[i]);
		pctx->srcs[i] = NULL;
	}

420 421 422 423
	atomic_inc(&perf->tdone);
	wake_up(pctx->wq);
	rc = 0;
	goto done;
D
Dave Jiang 已提交
424 425 426 427 428 429 430 431 432 433 434 435

err:
	for (i = 0; i < MAX_SRCS; i++) {
		kfree(pctx->srcs[i]);
		pctx->srcs[i] = NULL;
	}

	if (dma_chan) {
		dma_release_channel(dma_chan);
		pctx->dma_chan = NULL;
	}

436 437 438 439 440 441 442 443 444 445
done:
	/* Wait until we are told to stop */
	for (;;) {
		set_current_state(TASK_INTERRUPTIBLE);
		if (kthread_should_stop())
			break;
		schedule();
	}
	__set_current_state(TASK_RUNNING);

D
Dave Jiang 已提交
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
	return rc;
}

static void perf_free_mw(struct perf_ctx *perf)
{
	struct perf_mw *mw = &perf->mw;
	struct pci_dev *pdev = perf->ntb->pdev;

	if (!mw->virt_addr)
		return;

	ntb_mw_clear_trans(perf->ntb, 0);
	dma_free_coherent(&pdev->dev, mw->buf_size,
			  mw->virt_addr, mw->dma_addr);
	mw->xlat_size = 0;
	mw->buf_size = 0;
	mw->virt_addr = NULL;
}

static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
{
	struct perf_mw *mw = &perf->mw;
	size_t xlat_size, buf_size;
469
	int rc;
D
Dave Jiang 已提交
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492

	if (!size)
		return -EINVAL;

	xlat_size = round_up(size, mw->xlat_align_size);
	buf_size = round_up(size, mw->xlat_align);

	if (mw->xlat_size == xlat_size)
		return 0;

	if (mw->buf_size)
		perf_free_mw(perf);

	mw->xlat_size = xlat_size;
	mw->buf_size = buf_size;

	mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
					   &mw->dma_addr, GFP_KERNEL);
	if (!mw->virt_addr) {
		mw->xlat_size = 0;
		mw->buf_size = 0;
	}

493 494 495 496 497 498 499
	rc = ntb_mw_set_trans(perf->ntb, 0, mw->dma_addr, mw->xlat_size);
	if (rc) {
		dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
		perf_free_mw(perf);
		return -EIO;
	}

D
Dave Jiang 已提交
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
	return 0;
}

static void perf_link_work(struct work_struct *work)
{
	struct perf_ctx *perf =
		container_of(work, struct perf_ctx, link_work.work);
	struct ntb_dev *ndev = perf->ntb;
	struct pci_dev *pdev = ndev->pdev;
	u32 val;
	u64 size;
	int rc;

	dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);

	size = perf->mw.phys_size;
516 517 518 519

	if (max_mw_size && size > max_mw_size)
		size = max_mw_size;

D
Dave Jiang 已提交
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
	ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
	ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
	ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);

	/* now read what peer wrote */
	val = ntb_spad_read(ndev, VERSION);
	if (val != PERF_VERSION) {
		dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
		goto out;
	}

	val = ntb_spad_read(ndev, MW_SZ_HIGH);
	size = (u64)val << 32;

	val = ntb_spad_read(ndev, MW_SZ_LOW);
	size |= val;

	dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);

	rc = perf_set_mw(perf, size);
	if (rc)
		goto out1;

	perf->link_is_up = true;
544
	wake_up(&perf->link_wq);
D
Dave Jiang 已提交
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580

	return;

out1:
	perf_free_mw(perf);

out:
	if (ntb_link_is_up(ndev, NULL, NULL) == 1)
		schedule_delayed_work(&perf->link_work,
				      msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
}

static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
{
	struct perf_mw *mw;
	int rc;

	mw = &perf->mw;

	rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
			      &mw->xlat_align, &mw->xlat_align_size);
	if (rc)
		return rc;

	perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
	if (!mw->vbase)
		return -ENOMEM;

	return 0;
}

static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
				size_t count, loff_t *offp)
{
	struct perf_ctx *perf = filp->private_data;
	char *buf;
581 582 583 584
	ssize_t ret, out_off = 0;
	struct pthr_ctx *pctx;
	int i;
	u64 rate;
D
Dave Jiang 已提交
585 586 587 588

	if (!perf)
		return 0;

589
	buf = kmalloc(1024, GFP_KERNEL);
590 591
	if (!buf)
		return -ENOMEM;
592

593
	if (mutex_is_locked(&perf->run_mutex)) {
594
		out_off = scnprintf(buf, 64, "running\n");
595 596 597 598 599 600 601 602 603 604
		goto read_from_buf;
	}

	for (i = 0; i < MAX_THREADS; i++) {
		pctx = &perf->pthr_ctx[i];

		if (pctx->status == -ENODATA)
			break;

		if (pctx->status) {
605
			out_off += scnprintf(buf + out_off, 1024 - out_off,
606 607 608 609 610 611
					    "%d: error %d\n", i,
					    pctx->status);
			continue;
		}

		rate = div64_u64(pctx->copied, pctx->diff_us);
612
		out_off += scnprintf(buf + out_off, 1024 - out_off,
613 614 615 616 617 618
			"%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
			i, pctx->copied, pctx->diff_us, rate);
	}

read_from_buf:
	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off);
D
Dave Jiang 已提交
619 620 621 622 623
	kfree(buf);

	return ret;
}

624 625 626 627 628 629 630 631
static void threads_cleanup(struct perf_ctx *perf)
{
	struct pthr_ctx *pctx;
	int i;

	for (i = 0; i < MAX_THREADS; i++) {
		pctx = &perf->pthr_ctx[i];
		if (pctx->thread) {
632
			pctx->status = kthread_stop(pctx->thread);
633 634 635 636 637
			pctx->thread = NULL;
		}
	}
}

638 639 640 641 642 643 644 645
static void perf_clear_thread_status(struct perf_ctx *perf)
{
	int i;

	for (i = 0; i < MAX_THREADS; i++)
		perf->pthr_ctx[i].status = -ENODATA;
}

D
Dave Jiang 已提交
646 647 648 649 650
static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
				 size_t count, loff_t *offp)
{
	struct perf_ctx *perf = filp->private_data;
	int node, i;
651
	DECLARE_WAIT_QUEUE_HEAD(wq);
D
Dave Jiang 已提交
652

653
	if (wait_event_interruptible(perf->link_wq, perf->link_is_up))
654
		return -ENOLINK;
D
Dave Jiang 已提交
655 656

	if (perf->perf_threads == 0)
657
		return -EINVAL;
D
Dave Jiang 已提交
658

659 660
	if (!mutex_trylock(&perf->run_mutex))
		return -EBUSY;
D
Dave Jiang 已提交
661

662 663
	perf_clear_thread_status(perf);

664 665 666 667
	if (perf->perf_threads > MAX_THREADS) {
		perf->perf_threads = MAX_THREADS;
		pr_info("Reset total threads to: %u\n", MAX_THREADS);
	}
D
Dave Jiang 已提交
668

669 670 671 672 673
	/* no greater than 1M */
	if (seg_order > MAX_SEG_ORDER) {
		seg_order = MAX_SEG_ORDER;
		pr_info("Fix seg_order to %u\n", seg_order);
	}
D
Dave Jiang 已提交
674

675 676 677 678
	if (run_order < seg_order) {
		run_order = seg_order;
		pr_info("Fix run_order to %u\n", run_order);
	}
D
Dave Jiang 已提交
679

680 681
	node = dev_to_node(&perf->ntb->pdev->dev);
	atomic_set(&perf->tdone, 0);
D
Dave Jiang 已提交
682

683 684 685
	/* launch kernel thread */
	for (i = 0; i < perf->perf_threads; i++) {
		struct pthr_ctx *pctx;
D
Dave Jiang 已提交
686

687 688 689 690 691 692 693 694 695 696 697 698 699 700
		pctx = &perf->pthr_ctx[i];
		atomic_set(&pctx->dma_sync, 0);
		pctx->perf = perf;
		pctx->wq = &wq;
		pctx->thread =
			kthread_create_on_node(ntb_perf_thread,
					       (void *)pctx,
					       node, "ntb_perf %d", i);
		if (IS_ERR(pctx->thread)) {
			pctx->thread = NULL;
			goto err;
		} else {
			wake_up_process(pctx->thread);
		}
D
Dave Jiang 已提交
701 702
	}

703 704 705 706 707
	wait_event_interruptible(wq,
		atomic_read(&perf->tdone) == perf->perf_threads);

	threads_cleanup(perf);
	mutex_unlock(&perf->run_mutex);
D
Dave Jiang 已提交
708
	return count;
709 710 711

err:
	threads_cleanup(perf);
712
	mutex_unlock(&perf->run_mutex);
713
	return -ENXIO;
D
Dave Jiang 已提交
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
}

static const struct file_operations ntb_perf_debugfs_run = {
	.owner = THIS_MODULE,
	.open = simple_open,
	.read = debugfs_run_read,
	.write = debugfs_run_write,
};

static int perf_debugfs_setup(struct perf_ctx *perf)
{
	struct pci_dev *pdev = perf->ntb->pdev;

	if (!debugfs_initialized())
		return -ENODEV;

	if (!perf_debugfs_dir) {
		perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
		if (!perf_debugfs_dir)
			return -ENODEV;
	}

	perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
						    perf_debugfs_dir);
	if (!perf->debugfs_node_dir)
		return -ENODEV;

	perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
						perf->debugfs_node_dir, perf,
						&ntb_perf_debugfs_run);
	if (!perf->debugfs_run)
		return -ENODEV;

	perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
						  perf->debugfs_node_dir,
						  &perf->perf_threads);
	if (!perf->debugfs_threads)
		return -ENODEV;

	return 0;
}

static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
{
	struct pci_dev *pdev = ntb->pdev;
	struct perf_ctx *perf;
	int node;
	int rc = 0;

763 764 765 766 767 768
	if (ntb_spad_count(ntb) < MAX_SPAD) {
		dev_err(&ntb->dev, "Not enough scratch pad registers for %s",
			DRIVER_NAME);
		return -EIO;
	}

D
Dave Jiang 已提交
769 770 771 772 773 774 775 776 777 778 779
	node = dev_to_node(&pdev->dev);

	perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
	if (!perf) {
		rc = -ENOMEM;
		goto err_perf;
	}

	perf->ntb = ntb;
	perf->perf_threads = 1;
	atomic_set(&perf->tsync, 0);
780
	mutex_init(&perf->run_mutex);
D
Dave Jiang 已提交
781 782
	spin_lock_init(&perf->db_lock);
	perf_setup_mw(ntb, perf);
783
	init_waitqueue_head(&perf->link_wq);
D
Dave Jiang 已提交
784 785 786 787 788 789 790 791 792 793 794 795 796 797
	INIT_DELAYED_WORK(&perf->link_work, perf_link_work);

	rc = ntb_set_ctx(ntb, perf, &perf_ops);
	if (rc)
		goto err_ctx;

	perf->link_is_up = false;
	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
	ntb_link_event(ntb);

	rc = perf_debugfs_setup(perf);
	if (rc)
		goto err_ctx;

798 799
	perf_clear_thread_status(perf);

D
Dave Jiang 已提交
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
	return 0;

err_ctx:
	cancel_delayed_work_sync(&perf->link_work);
	kfree(perf);
err_perf:
	return rc;
}

static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
{
	struct perf_ctx *perf = ntb->ctx;
	int i;

	dev_dbg(&perf->ntb->dev, "%s called\n", __func__);

816 817
	mutex_lock(&perf->run_mutex);

D
Dave Jiang 已提交
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
	cancel_delayed_work_sync(&perf->link_work);

	ntb_clear_ctx(ntb);
	ntb_link_disable(ntb);

	debugfs_remove_recursive(perf_debugfs_dir);
	perf_debugfs_dir = NULL;

	if (use_dma) {
		for (i = 0; i < MAX_THREADS; i++) {
			struct pthr_ctx *pctx = &perf->pthr_ctx[i];

			if (pctx->dma_chan)
				dma_release_channel(pctx->dma_chan);
		}
	}

	kfree(perf);
}

static struct ntb_client perf_client = {
	.ops = {
		.probe = perf_probe,
		.remove = perf_remove,
	},
};
module_ntb_client(perf_client);