smc_ib.c 16.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  IB infrastructure:
 *  Establish SMC-R as an Infiniband Client to be notified about added and
 *  removed IB devices of type RDMA.
 *  Determine device and port characteristics for these IB devices.
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/random.h>
16
#include <linux/workqueue.h>
17
#include <linux/scatterlist.h>
18
#include <linux/wait.h>
19
#include <rdma/ib_verbs.h>
20
#include <rdma/ib_cache.h>
21

22
#include "smc_pnet.h"
23
#include "smc_ib.h"
U
Ursula Braun 已提交
24
#include "smc_core.h"
25
#include "smc_wr.h"
26 27
#include "smc.h"

28 29
#define SMC_MAX_CQE 32766	/* max. # of completion queue elements */

30 31 32 33 34
#define SMC_QP_MIN_RNR_TIMER		5
#define SMC_QP_TIMEOUT			15 /* 4096 * 2 ** timeout usec */
#define SMC_QP_RETRY_CNT			7 /* 7: infinite */
#define SMC_QP_RNR_RETRY			7 /* 7: infinite */

35 36 37 38 39 40 41 42 43 44 45
struct smc_ib_devices smc_ib_devices = {	/* smc-registered ib devices */
	.lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock),
	.list = LIST_HEAD_INIT(smc_ib_devices.list),
};

#define SMC_LOCAL_SYSTEMID_RESET	"%%%%%%%"

u8 local_systemid[SMC_SYSTEMID_LEN] = SMC_LOCAL_SYSTEMID_RESET;	/* unique system
								 * identifier
								 */

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static int smc_ib_modify_qp_init(struct smc_link *lnk)
{
	struct ib_qp_attr qp_attr;

	memset(&qp_attr, 0, sizeof(qp_attr));
	qp_attr.qp_state = IB_QPS_INIT;
	qp_attr.pkey_index = 0;
	qp_attr.port_num = lnk->ibport;
	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE
				| IB_ACCESS_REMOTE_WRITE;
	return ib_modify_qp(lnk->roce_qp, &qp_attr,
			    IB_QP_STATE | IB_QP_PKEY_INDEX |
			    IB_QP_ACCESS_FLAGS | IB_QP_PORT);
}

static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
{
	enum ib_qp_attr_mask qp_attr_mask =
		IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
		IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
	struct ib_qp_attr qp_attr;

	memset(&qp_attr, 0, sizeof(qp_attr));
	qp_attr.qp_state = IB_QPS_RTR;
	qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
71
	qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
72
	rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
73
	rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0);
74
	rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
75
	memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
	       sizeof(lnk->peer_mac));
	qp_attr.dest_qp_num = lnk->peer_qpn;
	qp_attr.rq_psn = lnk->peer_psn; /* starting receive packet seq # */
	qp_attr.max_dest_rd_atomic = 1; /* max # of resources for incoming
					 * requests
					 */
	qp_attr.min_rnr_timer = SMC_QP_MIN_RNR_TIMER;

	return ib_modify_qp(lnk->roce_qp, &qp_attr, qp_attr_mask);
}

int smc_ib_modify_qp_rts(struct smc_link *lnk)
{
	struct ib_qp_attr qp_attr;

	memset(&qp_attr, 0, sizeof(qp_attr));
	qp_attr.qp_state = IB_QPS_RTS;
	qp_attr.timeout = SMC_QP_TIMEOUT;	/* local ack timeout */
	qp_attr.retry_cnt = SMC_QP_RETRY_CNT;	/* retry count */
	qp_attr.rnr_retry = SMC_QP_RNR_RETRY;	/* RNR retries, 7=infinite */
	qp_attr.sq_psn = lnk->psn_initial;	/* starting send packet seq # */
	qp_attr.max_rd_atomic = 1;	/* # of outstanding RDMA reads and
					 * atomic ops allowed
					 */
	return ib_modify_qp(lnk->roce_qp, &qp_attr,
			    IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
			    IB_QP_SQ_PSN | IB_QP_RNR_RETRY |
			    IB_QP_MAX_QP_RD_ATOMIC);
}

int smc_ib_modify_qp_reset(struct smc_link *lnk)
{
	struct ib_qp_attr qp_attr;

	memset(&qp_attr, 0, sizeof(qp_attr));
	qp_attr.qp_state = IB_QPS_RESET;
	return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE);
}

int smc_ib_ready_link(struct smc_link *lnk)
{
117
	struct smc_link_group *lgr = smc_get_lgr(lnk);
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
	int rc = 0;

	rc = smc_ib_modify_qp_init(lnk);
	if (rc)
		goto out;

	rc = smc_ib_modify_qp_rtr(lnk);
	if (rc)
		goto out;
	smc_wr_remember_qp_attr(lnk);
	rc = ib_req_notify_cq(lnk->smcibdev->roce_cq_recv,
			      IB_CQ_SOLICITED_MASK);
	if (rc)
		goto out;
	rc = smc_wr_rx_post_init(lnk);
	if (rc)
		goto out;
	smc_wr_remember_qp_attr(lnk);

	if (lgr->role == SMC_SERV) {
		rc = smc_ib_modify_qp_rts(lnk);
		if (rc)
			goto out;
		smc_wr_remember_qp_attr(lnk);
	}
out:
	return rc;
}

147
static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport)
148
{
149
	const struct ib_gid_attr *attr;
150
	int rc;
151

152 153
	attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0);
	if (IS_ERR(attr))
154 155
		return -ENODEV;

156
	rc = rdma_read_gid_l2_fields(attr, NULL, smcibdev->mac[ibport - 1]);
157 158
	rdma_put_gid_attr(attr);
	return rc;
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
}

/* Create an identifier unique for this instance of SMC-R.
 * The MAC-address of the first active registered IB device
 * plus a random 2-byte number is used to create this identifier.
 * This name is delivered to the peer during connection initialization.
 */
static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
						u8 ibport)
{
	memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
	       sizeof(smcibdev->mac[ibport - 1]));
	get_random_bytes(&local_systemid[0], 2);
}

bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
{
	return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
}

179 180 181 182
/* determine the gid for an ib-device port and vlan id */
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
			 unsigned short vlan_id, u8 gid[], u8 *sgid_index)
{
183
	const struct ib_gid_attr *attr;
184
	const struct net_device *ndev;
185 186 187
	int i;

	for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
188 189
		attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i);
		if (IS_ERR(attr))
190
			continue;
191

192 193 194
		rcu_read_lock();
		ndev = rdma_read_gid_attr_ndev_rcu(attr);
		if (!IS_ERR(ndev) &&
195 196 197 198
		    ((!vlan_id && !is_vlan_dev(attr->ndev)) ||
		     (vlan_id && is_vlan_dev(attr->ndev) &&
		      vlan_dev_vlan_id(attr->ndev) == vlan_id)) &&
		    attr->gid_type == IB_GID_TYPE_ROCE) {
199
			rcu_read_unlock();
200
			if (gid)
201
				memcpy(gid, &attr->gid, SMC_GID_SIZE);
202
			if (sgid_index)
203 204
				*sgid_index = attr->index;
			rdma_put_gid_attr(attr);
205 206
			return 0;
		}
207
		rcu_read_unlock();
208
		rdma_put_gid_attr(attr);
209 210 211 212
	}
	return -ENODEV;
}

213 214 215 216 217 218 219 220 221 222 223
static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
{
	int rc;

	memset(&smcibdev->pattr[ibport - 1], 0,
	       sizeof(smcibdev->pattr[ibport - 1]));
	rc = ib_query_port(smcibdev->ibdev, ibport,
			   &smcibdev->pattr[ibport - 1]);
	if (rc)
		goto out;
	/* the SMC protocol requires specification of the RoCE MAC address */
224
	rc = smc_ib_fill_mac(smcibdev, ibport);
225 226 227 228 229 230 231 232 233 234 235
	if (rc)
		goto out;
	if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
		     sizeof(local_systemid)) &&
	    smc_ib_port_active(smcibdev, ibport))
		/* create unique system identifier */
		smc_ib_define_local_systemid(smcibdev, ibport);
out:
	return rc;
}

236 237 238 239 240 241 242 243 244 245
/* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work)
{
	struct smc_ib_device *smcibdev = container_of(
		work, struct smc_ib_device, port_event_work);
	u8 port_idx;

	for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
		smc_ib_remember_port_attr(smcibdev, port_idx + 1);
		clear_bit(port_idx, &smcibdev->port_event_mask);
246 247
		if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
			set_bit(port_idx, smcibdev->ports_going_away);
248
			smc_port_terminate(smcibdev, port_idx + 1);
249 250 251
		} else {
			clear_bit(port_idx, smcibdev->ports_going_away);
		}
252 253 254 255 256 257 258 259
	}
}

/* can be called in IRQ context */
static void smc_ib_global_event_handler(struct ib_event_handler *handler,
					struct ib_event *ibevent)
{
	struct smc_ib_device *smcibdev;
260
	bool schedule = false;
261 262 263 264 265 266
	u8 port_idx;

	smcibdev = container_of(handler, struct smc_ib_device, event_handler);

	switch (ibevent->event) {
	case IB_EVENT_DEVICE_FATAL:
267
		/* terminate all ports on device */
268
		for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) {
269
			set_bit(port_idx, &smcibdev->port_event_mask);
270 271 272
			if (!test_and_set_bit(port_idx,
					      smcibdev->ports_going_away))
				schedule = true;
273
		}
274 275
		if (schedule)
			schedule_work(&smcibdev->port_event_work);
276
		break;
277 278
	case IB_EVENT_PORT_ACTIVE:
		port_idx = ibevent->element.port_num - 1;
279 280 281 282 283 284 285 286 287 288 289 290
		if (port_idx >= SMC_MAX_PORTS)
			break;
		set_bit(port_idx, &smcibdev->port_event_mask);
		if (test_and_clear_bit(port_idx, smcibdev->ports_going_away))
			schedule_work(&smcibdev->port_event_work);
		break;
	case IB_EVENT_PORT_ERR:
		port_idx = ibevent->element.port_num - 1;
		if (port_idx >= SMC_MAX_PORTS)
			break;
		set_bit(port_idx, &smcibdev->port_event_mask);
		if (!test_and_set_bit(port_idx, smcibdev->ports_going_away))
291
			schedule_work(&smcibdev->port_event_work);
292 293 294 295 296 297 298
		break;
	case IB_EVENT_GID_CHANGE:
		port_idx = ibevent->element.port_num - 1;
		if (port_idx >= SMC_MAX_PORTS)
			break;
		set_bit(port_idx, &smcibdev->port_event_mask);
		schedule_work(&smcibdev->port_event_work);
299 300 301 302 303 304
		break;
	default:
		break;
	}
}

305 306
void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
{
307 308
	if (lnk->roce_pd)
		ib_dealloc_pd(lnk->roce_pd);
309 310 311 312 313 314 315
	lnk->roce_pd = NULL;
}

int smc_ib_create_protection_domain(struct smc_link *lnk)
{
	int rc;

316
	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
317 318 319 320 321 322 323 324
	rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
	if (IS_ERR(lnk->roce_pd))
		lnk->roce_pd = NULL;
	return rc;
}

static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
{
325 326
	struct smc_link *lnk = (struct smc_link *)priv;
	struct smc_ib_device *smcibdev = lnk->smcibdev;
327 328
	u8 port_idx;

329
	switch (ibevent->event) {
330
	case IB_EVENT_QP_FATAL:
331
	case IB_EVENT_QP_ACCESS_ERR:
332
		port_idx = ibevent->element.qp->port - 1;
333 334 335 336
		if (port_idx >= SMC_MAX_PORTS)
			break;
		set_bit(port_idx, &smcibdev->port_event_mask);
		if (!test_and_set_bit(port_idx, smcibdev->ports_going_away))
337
			schedule_work(&smcibdev->port_event_work);
338 339 340 341 342 343 344 345
		break;
	default:
		break;
	}
}

void smc_ib_destroy_queue_pair(struct smc_link *lnk)
{
346 347
	if (lnk->roce_qp)
		ib_destroy_qp(lnk->roce_qp);
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
	lnk->roce_qp = NULL;
}

/* create a queue pair within the protection domain for a link */
int smc_ib_create_queue_pair(struct smc_link *lnk)
{
	struct ib_qp_init_attr qp_attr = {
		.event_handler = smc_ib_qp_event_handler,
		.qp_context = lnk,
		.send_cq = lnk->smcibdev->roce_cq_send,
		.recv_cq = lnk->smcibdev->roce_cq_recv,
		.srq = NULL,
		.cap = {
				/* include unsolicited rdma_writes as well,
				 * there are max. 2 RDMA_WRITE per 1 WR_SEND
				 */
364
			.max_send_wr = SMC_WR_BUF_CNT * 3,
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
			.max_recv_wr = SMC_WR_BUF_CNT * 3,
			.max_send_sge = SMC_IB_MAX_SEND_SGE,
			.max_recv_sge = 1,
		},
		.sq_sig_type = IB_SIGNAL_REQ_WR,
		.qp_type = IB_QPT_RC,
	};
	int rc;

	lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr);
	rc = PTR_ERR_OR_ZERO(lnk->roce_qp);
	if (IS_ERR(lnk->roce_qp))
		lnk->roce_qp = NULL;
	else
		smc_wr_remember_qp_attr(lnk);
	return rc;
}

383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
void smc_ib_put_memory_region(struct ib_mr *mr)
{
	ib_dereg_mr(mr);
}

static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
{
	unsigned int offset = 0;
	int sg_num;

	/* map the largest prefix of a dma mapped SG list */
	sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
			      buf_slot->sgt[SMC_SINGLE_LINK].sgl,
			      buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
			      &offset, PAGE_SIZE);

	return sg_num;
}

/* Allocate a memory region and map the dma mapped SG list of buf_slot */
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
			     struct smc_buf_desc *buf_slot)
{
	if (buf_slot->mr_rx[SMC_SINGLE_LINK])
		return 0; /* already done */

	buf_slot->mr_rx[SMC_SINGLE_LINK] =
		ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
	if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
		int rc;

		rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
		buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
		return rc;
	}

	if (smc_ib_map_mr_sg(buf_slot) != 1)
		return -EINVAL;

	return 0;
}

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
/* synchronize buffer usage for cpu access */
void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
			    struct smc_buf_desc *buf_slot,
			    enum dma_data_direction data_direction)
{
	struct scatterlist *sg;
	unsigned int i;

	/* for now there is just one DMA address */
	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
		if (!sg_dma_len(sg))
			break;
		ib_dma_sync_single_for_cpu(smcibdev->ibdev,
					   sg_dma_address(sg),
					   sg_dma_len(sg),
					   data_direction);
	}
}

/* synchronize buffer usage for device access */
void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
			       struct smc_buf_desc *buf_slot,
			       enum dma_data_direction data_direction)
{
	struct scatterlist *sg;
	unsigned int i;

	/* for now there is just one DMA address */
	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
		if (!sg_dma_len(sg))
			break;
		ib_dma_sync_single_for_device(smcibdev->ibdev,
					      sg_dma_address(sg),
					      sg_dma_len(sg),
					      data_direction);
	}
}

465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
/* Map a new TX or RX buffer SG-table to DMA */
int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
		      struct smc_buf_desc *buf_slot,
		      enum dma_data_direction data_direction)
{
	int mapped_nents;

	mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
				     buf_slot->sgt[SMC_SINGLE_LINK].sgl,
				     buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
				     data_direction);
	if (!mapped_nents)
		return -ENOMEM;

	return mapped_nents;
}

void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
			 struct smc_buf_desc *buf_slot,
			 enum dma_data_direction data_direction)
{
	if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
		return; /* already unmapped */

	ib_dma_unmap_sg(smcibdev->ibdev,
			buf_slot->sgt[SMC_SINGLE_LINK].sgl,
			buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
			data_direction);
	buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
}

496 497 498
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
{
	struct ib_cq_init_attr cqattr =	{
499 500
		.cqe = SMC_MAX_CQE, .comp_vector = 0 };
	int cqe_size_order, smc_order;
501 502
	long rc;

503 504 505 506 507
	/* the calculated number of cq entries fits to mlx5 cq allocation */
	cqe_size_order = cache_line_size() == 128 ? 7 : 6;
	smc_order = MAX_ORDER - cqe_size_order - 1;
	if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
		cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
	smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
					      smc_wr_tx_cq_handler, NULL,
					      smcibdev, &cqattr);
	rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send);
	if (IS_ERR(smcibdev->roce_cq_send)) {
		smcibdev->roce_cq_send = NULL;
		return rc;
	}
	smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev,
					      smc_wr_rx_cq_handler, NULL,
					      smcibdev, &cqattr);
	rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_recv);
	if (IS_ERR(smcibdev->roce_cq_recv)) {
		smcibdev->roce_cq_recv = NULL;
		goto err;
	}
	smc_wr_add_dev(smcibdev);
	smcibdev->initialized = 1;
	return rc;

err:
	ib_destroy_cq(smcibdev->roce_cq_send);
	return rc;
}

static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
{
	if (!smcibdev->initialized)
		return;
537
	smcibdev->initialized = 0;
538 539
	ib_destroy_cq(smcibdev->roce_cq_recv);
	ib_destroy_cq(smcibdev->roce_cq_send);
540
	smc_wr_remove_dev(smcibdev);
541 542
}

543 544 545 546 547 548
static struct ib_client smc_ib_client;

/* callback function for ib_register_client() */
static void smc_ib_add_dev(struct ib_device *ibdev)
{
	struct smc_ib_device *smcibdev;
549 550
	u8 port_cnt;
	int i;
551 552 553 554 555 556 557 558 559

	if (ibdev->node_type != RDMA_NODE_IB_CA)
		return;

	smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL);
	if (!smcibdev)
		return;

	smcibdev->ibdev = ibdev;
560
	INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work);
561 562
	atomic_set(&smcibdev->lnk_cnt, 0);
	init_waitqueue_head(&smcibdev->lnks_deleted);
563 564 565 566
	spin_lock(&smc_ib_devices.lock);
	list_add_tail(&smcibdev->list, &smc_ib_devices.list);
	spin_unlock(&smc_ib_devices.lock);
	ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
567 568 569 570 571 572 573 574
	INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
			      smc_ib_global_event_handler);
	ib_register_event_handler(&smcibdev->event_handler);

	/* trigger reading of the port attributes */
	port_cnt = smcibdev->ibdev->phys_port_cnt;
	for (i = 0;
	     i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
U
Ursula Braun 已提交
575
	     i++) {
576
		set_bit(i, &smcibdev->port_event_mask);
U
Ursula Braun 已提交
577 578 579 580
		/* determine pnetids of the port */
		smc_pnetid_by_dev_port(ibdev->dev.parent, i,
				       smcibdev->pnetid[i]);
	}
581
	schedule_work(&smcibdev->port_event_work);
582 583
}

584
/* callback function for ib_unregister_client() */
585 586 587 588 589 590 591 592 593
static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
{
	struct smc_ib_device *smcibdev;

	smcibdev = ib_get_client_data(ibdev, &smc_ib_client);
	ib_set_client_data(ibdev, &smc_ib_client, NULL);
	spin_lock(&smc_ib_devices.lock);
	list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
	spin_unlock(&smc_ib_devices.lock);
594
	smc_smcr_terminate_all(smcibdev);
595
	smc_ib_cleanup_per_ibdev(smcibdev);
596
	ib_unregister_event_handler(&smcibdev->event_handler);
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
	kfree(smcibdev);
}

static struct ib_client smc_ib_client = {
	.name	= "smc_ib",
	.add	= smc_ib_add_dev,
	.remove = smc_ib_remove_dev,
};

int __init smc_ib_register_client(void)
{
	return ib_register_client(&smc_ib_client);
}

void smc_ib_unregister_client(void)
{
	ib_unregister_client(&smc_ib_client);
}