smc_core.c 50.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  Basic Transport Functions exploiting Infiniband API
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/socket.h>
#include <linux/if_vlan.h>
#include <linux/random.h>
#include <linux/workqueue.h>
16
#include <linux/wait.h>
17
#include <linux/reboot.h>
18
#include <linux/mutex.h>
19 20 21
#include <net/tcp.h>
#include <net/sock.h>
#include <rdma/ib_verbs.h>
22
#include <rdma/ib_cache.h>
23 24 25 26 27

#include "smc.h"
#include "smc_clc.h"
#include "smc_core.h"
#include "smc_ib.h"
28
#include "smc_wr.h"
U
Ursula Braun 已提交
29
#include "smc_llc.h"
30
#include "smc_cdc.h"
31
#include "smc_close.h"
32
#include "smc_ism.h"
33

34 35
#define SMC_LGR_NUM_INCR		256
#define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
36
#define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
37
#define SMC_LGR_FREE_DELAY_FAST		(8 * HZ)
38

39 40 41 42 43
static struct smc_lgr_list smc_lgr_list = {	/* established link groups */
	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
	.list = LIST_HEAD_INIT(smc_lgr_list.list),
	.num = 0,
};
U
Ursula Braun 已提交
44

45
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
46 47
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);

48 49
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
			 struct smc_buf_desc *buf_desc);
50
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
51

52
static void smc_link_down_work(struct work_struct *work);
53

54 55 56 57 58 59 60 61 62 63 64 65 66
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
						  spinlock_t **lgr_lock)
{
	if (lgr->is_smcd) {
		*lgr_lock = &lgr->smcd->lgr_lock;
		return &lgr->smcd->lgr_list;
	}

	*lgr_lock = &smc_lgr_list.lock;
	return &smc_lgr_list.list;
}

67 68 69 70 71 72
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
	/* client link group creation always follows the server link group
	 * creation. For client use a somewhat higher removal delay time,
	 * otherwise there is a risk of out-of-sync link groups.
	 */
U
Ursula Braun 已提交
73 74 75 76 77 78
	if (!lgr->freeing && !lgr->freefast) {
		mod_delayed_work(system_wq, &lgr->free_work,
				 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
						SMC_LGR_FREE_DELAY_CLNT :
						SMC_LGR_FREE_DELAY_SERV);
	}
79 80
}

81 82
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
U
Ursula Braun 已提交
83 84 85 86 87
	if (!lgr->freeing && !lgr->freefast) {
		lgr->freefast = 1;
		mod_delayed_work(system_wq, &lgr->free_work,
				 SMC_LGR_FREE_DELAY_FAST);
	}
88 89
}

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
/* Register connection's alert token in our lookup structure.
 * To use rbtrees we have to implement our own insert core.
 * Requires @conns_lock
 * @smc		connection to register
 * Returns 0 on success, != otherwise.
 */
static void smc_lgr_add_alert_token(struct smc_connection *conn)
{
	struct rb_node **link, *parent = NULL;
	u32 token = conn->alert_token_local;

	link = &conn->lgr->conns_all.rb_node;
	while (*link) {
		struct smc_connection *cur = rb_entry(*link,
					struct smc_connection, alert_node);

		parent = *link;
		if (cur->alert_token_local > token)
			link = &parent->rb_left;
		else
			link = &parent->rb_right;
	}
	/* Put the new node there */
	rb_link_node(&conn->alert_node, parent, link);
	rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
}

117 118 119 120 121 122 123 124 125 126 127
/* assign an SMC-R link to the connection */
static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
{
	enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
				       SMC_LNK_ACTIVE;
	int i, j;

	/* do link balancing */
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &conn->lgr->lnk[i];

K
Karsten Graul 已提交
128
		if (lnk->state != expected || lnk->link_is_asym)
129 130 131 132 133 134 135 136 137 138
			continue;
		if (conn->lgr->role == SMC_CLNT) {
			conn->lnk = lnk; /* temporary, SMC server assigns link*/
			break;
		}
		if (conn->lgr->conns_num % 2) {
			for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
				struct smc_link *lnk2;

				lnk2 = &conn->lgr->lnk[j];
K
Karsten Graul 已提交
139 140
				if (lnk2->state == expected &&
				    !lnk2->link_is_asym) {
141 142 143 144 145 146 147 148 149 150 151 152 153 154
					conn->lnk = lnk2;
					break;
				}
			}
		}
		if (!conn->lnk)
			conn->lnk = lnk;
		break;
	}
	if (!conn->lnk)
		return SMC_CLC_DECL_NOACTLINK;
	return 0;
}

155 156 157 158 159
/* Register connection in link group by assigning an alert token
 * registered in a search tree.
 * Requires @conns_lock
 * Note that '0' is a reserved value and not assigned.
 */
160
static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
161 162 163
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	static atomic_t nexttoken = ATOMIC_INIT(0);
164
	int rc;
165

166 167 168 169 170
	if (!conn->lgr->is_smcd) {
		rc = smcr_lgr_conn_assign_link(conn, first);
		if (rc)
			return rc;
	}
171 172 173 174 175 176 177 178 179 180 181
	/* find a new alert_token_local value not yet used by some connection
	 * in this link group
	 */
	sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
	while (!conn->alert_token_local) {
		conn->alert_token_local = atomic_inc_return(&nexttoken);
		if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
			conn->alert_token_local = 0;
	}
	smc_lgr_add_alert_token(conn);
	conn->lgr->conns_num++;
182
	return 0;
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
}

/* Unregister connection and reset the alert token of the given connection<
 */
static void __smc_lgr_unregister_conn(struct smc_connection *conn)
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	struct smc_link_group *lgr = conn->lgr;

	rb_erase(&conn->alert_node, &lgr->conns_all);
	lgr->conns_num--;
	conn->alert_token_local = 0;
	sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
}

198
/* Unregister connection from lgr
199 200 201 202 203
 */
static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;

204 205
	if (!lgr)
		return;
206 207 208 209 210
	write_lock_bh(&lgr->conns_lock);
	if (conn->alert_token_local) {
		__smc_lgr_unregister_conn(conn);
	}
	write_unlock_bh(&lgr->conns_lock);
211
	conn->lgr = NULL;
212 213
}

214 215 216
void smc_lgr_cleanup_early(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;
217 218
	struct list_head *lgr_list;
	spinlock_t *lgr_lock;
219 220 221 222 223

	if (!lgr)
		return;

	smc_conn_free(conn);
224 225 226 227 228 229
	lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
	/* do not use this link group for new connections */
	if (!list_empty(lgr_list))
		list_del_init(lgr_list);
	spin_unlock_bh(lgr_lock);
230 231 232
	smc_lgr_schedule_free_work_fast(lgr);
}

233 234 235 236 237 238 239 240 241 242
static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
{
	int i;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &lgr->lnk[i];

		if (smc_link_usable(lnk))
			lnk->state = SMC_LNK_INACTIVE;
	}
243 244
	wake_up_all(&lgr->llc_msg_waiter);
	wake_up_all(&lgr->llc_flow_waiter);
245 246
}

U
Ursula Braun 已提交
247 248
static void smc_lgr_free(struct smc_link_group *lgr);

249 250 251 252 253
static void smc_lgr_free_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(to_delayed_work(work),
						  struct smc_link_group,
						  free_work);
254
	spinlock_t *lgr_lock;
255 256
	bool conns;

257 258
	smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
U
Ursula Braun 已提交
259 260 261 262
	if (lgr->freeing) {
		spin_unlock_bh(lgr_lock);
		return;
	}
263 264 265 266
	read_lock_bh(&lgr->conns_lock);
	conns = RB_EMPTY_ROOT(&lgr->conns_all);
	read_unlock_bh(&lgr->conns_lock);
	if (!conns) { /* number of lgr connections is no longer zero */
267
		spin_unlock_bh(lgr_lock);
268 269
		return;
	}
270
	list_del_init(&lgr->list); /* remove from smc_lgr_list */
U
Ursula Braun 已提交
271 272 273
	lgr->freeing = 1; /* this instance does the freeing, no new schedule */
	spin_unlock_bh(lgr_lock);
	cancel_delayed_work(&lgr->free_work);
274

275 276 277
	if (!lgr->is_smcd && !lgr->terminating)
		smc_llc_send_link_delete_all(lgr, true,
					     SMC_LLC_DEL_PROG_INIT_TERM);
278
	if (lgr->is_smcd && !lgr->terminating)
U
Ursula Braun 已提交
279
		smc_ism_signal_shutdown(lgr);
280 281
	if (!lgr->is_smcd)
		smcr_lgr_link_deactivate_all(lgr);
U
Ursula Braun 已提交
282
	smc_lgr_free(lgr);
283 284
}

285 286 287 288 289
static void smc_lgr_terminate_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
						  terminate_work);

290
	__smc_lgr_terminate(lgr, true);
291 292
}

293 294 295 296 297 298 299 300 301 302 303
/* return next unique link id for the lgr */
static u8 smcr_next_link_id(struct smc_link_group *lgr)
{
	u8 link_id;
	int i;

	while (1) {
		link_id = ++lgr->next_link_id;
		if (!link_id)	/* skip zero as link_id */
			link_id = ++lgr->next_link_id;
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
304
			if (smc_link_usable(&lgr->lnk[i]) &&
305 306 307 308 309 310 311 312
			    lgr->lnk[i].link_id == link_id)
				continue;
		}
		break;
	}
	return link_id;
}

313 314
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
		   u8 link_idx, struct smc_init_info *ini)
315 316 317 318 319 320 321
{
	u8 rndvec[3];
	int rc;

	get_device(&ini->ib_dev->ibdev->dev);
	atomic_inc(&ini->ib_dev->lnk_cnt);
	lnk->state = SMC_LNK_ACTIVATING;
322
	lnk->link_id = smcr_next_link_id(lgr);
323
	lnk->lgr = lgr;
324
	lnk->link_idx = link_idx;
325 326 327
	lnk->smcibdev = ini->ib_dev;
	lnk->ibport = ini->ib_port;
	lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
328
	smc_llc_link_set_uid(lnk);
329
	INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
	if (!ini->ib_dev->initialized) {
		rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
		if (rc)
			goto out;
	}
	get_random_bytes(rndvec, sizeof(rndvec));
	lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
		(rndvec[2] << 16);
	rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
				  ini->vlan_id, lnk->gid, &lnk->sgid_index);
	if (rc)
		goto out;
	rc = smc_llc_link_init(lnk);
	if (rc)
		goto out;
	rc = smc_wr_alloc_link_mem(lnk);
	if (rc)
		goto clear_llc_lnk;
	rc = smc_ib_create_protection_domain(lnk);
	if (rc)
		goto free_link_mem;
	rc = smc_ib_create_queue_pair(lnk);
	if (rc)
		goto dealloc_pd;
	rc = smc_wr_create_link(lnk);
	if (rc)
		goto destroy_qp;
	return 0;

destroy_qp:
	smc_ib_destroy_queue_pair(lnk);
dealloc_pd:
	smc_ib_dealloc_protection_domain(lnk);
free_link_mem:
	smc_wr_free_link_mem(lnk);
clear_llc_lnk:
366
	smc_llc_link_clear(lnk, false);
367 368 369
out:
	put_device(&ini->ib_dev->ibdev->dev);
	memset(lnk, 0, sizeof(struct smc_link));
370
	lnk->state = SMC_LNK_UNUSED;
371 372 373 374 375
	if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
		wake_up(&ini->ib_dev->lnks_deleted);
	return rc;
}

376
/* create a new SMC link group */
377
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
378 379
{
	struct smc_link_group *lgr;
380
	struct list_head *lgr_list;
381
	struct smc_link *lnk;
382
	spinlock_t *lgr_lock;
383
	u8 link_idx;
384
	int rc = 0;
U
Ursula Braun 已提交
385
	int i;
386

387
	if (ini->is_smcd && ini->vlan_id) {
388 389
		if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
			rc = SMC_CLC_DECL_ISMVLANERR;
390
			goto out;
391
		}
392 393
	}

394 395
	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
	if (!lgr) {
396
		rc = SMC_CLC_DECL_MEM;
397
		goto ism_put_vlan;
398
	}
399
	lgr->is_smcd = ini->is_smcd;
400
	lgr->sync_err = 0;
U
Ursula Braun 已提交
401 402 403
	lgr->terminating = 0;
	lgr->freefast = 0;
	lgr->freeing = 0;
404
	lgr->vlan_id = ini->vlan_id;
405 406
	mutex_init(&lgr->sndbufs_lock);
	mutex_init(&lgr->rmbs_lock);
407
	rwlock_init(&lgr->conns_lock);
U
Ursula Braun 已提交
408 409 410 411
	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		INIT_LIST_HEAD(&lgr->sndbufs[i]);
		INIT_LIST_HEAD(&lgr->rmbs[i]);
	}
412
	lgr->next_link_id = 0;
413 414
	smc_lgr_list.num += SMC_LGR_NUM_INCR;
	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
415
	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
416
	INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
417
	lgr->conns_all = RB_ROOT;
418
	if (ini->is_smcd) {
419
		/* SMC-D specific settings */
420
		get_device(&ini->ism_dev->dev);
421 422
		lgr->peer_gid = ini->ism_gid;
		lgr->smcd = ini->ism_dev;
423
		lgr_list = &ini->ism_dev->lgr_list;
424
		lgr_lock = &lgr->smcd->lgr_lock;
425
		lgr->peer_shutdown = 0;
426
		atomic_inc(&ini->ism_dev->lgr_cnt);
427 428 429
	} else {
		/* SMC-R specific settings */
		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
430 431
		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
		       SMC_SYSTEMID_LEN);
432 433
		memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
		       SMC_MAX_PNETID_LEN);
434 435
		smc_llc_lgr_init(lgr, smc);

436 437 438
		link_idx = SMC_SINGLE_LINK;
		lnk = &lgr->lnk[link_idx];
		rc = smcr_link_init(lgr, lnk, link_idx, ini);
439 440
		if (rc)
			goto free_lgr;
441 442
		lgr_list = &smc_lgr_list.list;
		lgr_lock = &smc_lgr_list.lock;
443
		atomic_inc(&lgr_cnt);
444
	}
445
	smc->conn.lgr = lgr;
446
	spin_lock_bh(lgr_lock);
447
	list_add(&lgr->list, lgr_list);
448
	spin_unlock_bh(lgr_lock);
449 450 451 452
	return 0;

free_lgr:
	kfree(lgr);
453 454 455
ism_put_vlan:
	if (ini->is_smcd && ini->vlan_id)
		smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
456
out:
457 458 459 460 461 462
	if (rc < 0) {
		if (rc == -ENOMEM)
			rc = SMC_CLC_DECL_MEM;
		else
			rc = SMC_CLC_DECL_INTERR;
	}
463 464 465
	return rc;
}

466 467 468 469 470 471 472 473 474 475 476 477 478 479
static int smc_write_space(struct smc_connection *conn)
{
	int buffer_len = conn->peer_rmbe_size;
	union smc_host_cursor prod;
	union smc_host_cursor cons;
	int space;

	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
	/* determine rx_buf space */
	space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
	return space;
}

480 481
static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
			     struct smc_wr_buf *wr_buf)
482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
{
	struct smc_connection *conn = &smc->conn;
	union smc_host_cursor cons, fin;
	int rc = 0;
	int diff;

	smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
	smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
	/* set prod cursor to old state, enforce tx_rdma_writes() */
	smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);

	if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
		/* cons cursor advanced more than fin, and prod was set
		 * fin above, so now prod is smaller than cons. Fix that.
		 */
		diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
		smc_curs_add(conn->sndbuf_desc->len,
			     &conn->tx_curs_sent, diff);
		smc_curs_add(conn->sndbuf_desc->len,
			     &conn->tx_curs_fin, diff);

		smp_mb__before_atomic();
		atomic_add(diff, &conn->sndbuf_space);
		smp_mb__after_atomic();

		smc_curs_add(conn->peer_rmbe_size,
			     &conn->local_tx_ctrl.prod, diff);
		smc_curs_add(conn->peer_rmbe_size,
			     &conn->local_tx_ctrl_fin, diff);
	}
	/* recalculate, value is used by tx_rdma_writes() */
	atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));

	if (smc->sk.sk_state != SMC_INIT &&
	    smc->sk.sk_state != SMC_CLOSED) {
518
		rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
519 520 521 522
		if (!rc) {
			schedule_delayed_work(&conn->tx_work, 0);
			smc->sk.sk_data_ready(&smc->sk);
		}
523 524 525
	} else {
		smc_wr_tx_put_slot(conn->lnk,
				   (struct smc_wr_tx_pend_priv *)pend);
526 527 528 529 530 531 532 533
	}
	return rc;
}

struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
				  struct smc_link *from_lnk, bool is_dev_err)
{
	struct smc_link *to_lnk = NULL;
534
	struct smc_cdc_tx_pend *pend;
535
	struct smc_connection *conn;
536
	struct smc_wr_buf *wr_buf;
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
	struct smc_sock *smc;
	struct rb_node *node;
	int i, rc = 0;

	/* link is inactive, wake up tx waiters */
	smc_wr_wakeup_tx_wait(from_lnk);

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
		    i == from_lnk->link_idx)
			continue;
		if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
		    from_lnk->ibport == lgr->lnk[i].ibport) {
			continue;
		}
		to_lnk = &lgr->lnk[i];
		break;
	}
	if (!to_lnk) {
		smc_lgr_terminate_sched(lgr);
		return NULL;
	}
again:
	read_lock_bh(&lgr->conns_lock);
	for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
		conn = rb_entry(node, struct smc_connection, alert_node);
		if (conn->lnk != from_lnk)
			continue;
		smc = container_of(conn, struct smc_sock, conn);
		/* conn->lnk not yet set in SMC_INIT state */
		if (smc->sk.sk_state == SMC_INIT)
			continue;
		if (smc->sk.sk_state == SMC_CLOSED ||
		    smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
		    smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
		    smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
		    smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
		    smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
		    smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
		    smc->sk.sk_state == SMC_PEERABORTWAIT ||
		    smc->sk.sk_state == SMC_PROCESSABORT) {
			spin_lock_bh(&conn->send_lock);
			conn->lnk = to_lnk;
			spin_unlock_bh(&conn->send_lock);
			continue;
		}
		sock_hold(&smc->sk);
		read_unlock_bh(&lgr->conns_lock);
585 586 587 588 589 590
		/* pre-fetch buffer outside of send_lock, might sleep */
		rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
		if (rc) {
			smcr_link_down_cond_sched(to_lnk);
			return NULL;
		}
591 592 593
		/* avoid race with smcr_tx_sndbuf_nonempty() */
		spin_lock_bh(&conn->send_lock);
		conn->lnk = to_lnk;
594
		rc = smc_switch_cursor(smc, pend, wr_buf);
595 596 597 598 599 600 601 602 603 604 605 606
		spin_unlock_bh(&conn->send_lock);
		sock_put(&smc->sk);
		if (rc) {
			smcr_link_down_cond_sched(to_lnk);
			return NULL;
		}
		goto again;
	}
	read_unlock_bh(&lgr->conns_lock);
	return to_lnk;
}

607
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
608
			   struct smc_link_group *lgr)
609
{
610 611
	int rc;

612 613
	if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
		/* unregister rmb with peer */
614 615 616 617 618 619 620 621 622
		rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
		if (!rc) {
			/* protect against smc_llc_cli_rkey_exchange() */
			mutex_lock(&lgr->llc_conf_mutex);
			smc_llc_do_delete_rkey(lgr, rmb_desc);
			rmb_desc->is_conf_rkey = false;
			mutex_unlock(&lgr->llc_conf_mutex);
			smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
		}
623
	}
624

625 626
	if (rmb_desc->is_reg_err) {
		/* buf registration failed, reuse not possible */
627
		mutex_lock(&lgr->rmbs_lock);
628
		list_del(&rmb_desc->list);
629
		mutex_unlock(&lgr->rmbs_lock);
630 631 632 633 634 635 636

		smc_buf_free(lgr, true, rmb_desc);
	} else {
		rmb_desc->used = 0;
	}
}

637 638
static void smc_buf_unuse(struct smc_connection *conn,
			  struct smc_link_group *lgr)
U
Ursula Braun 已提交
639
{
640
	if (conn->sndbuf_desc)
U
Ursula Braun 已提交
641
		conn->sndbuf_desc->used = 0;
642 643 644
	if (conn->rmb_desc && lgr->is_smcd)
		conn->rmb_desc->used = 0;
	else if (conn->rmb_desc)
645
		smcr_buf_unuse(conn->rmb_desc, lgr);
U
Ursula Braun 已提交
646 647
}

648 649 650
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
651 652 653
	struct smc_link_group *lgr = conn->lgr;

	if (!lgr)
654
		return;
655
	if (lgr->is_smcd) {
656 657
		if (!list_empty(&lgr->list))
			smc_ism_unset_conn(conn);
658 659
		tasklet_kill(&conn->rx_tsklet);
	} else {
660
		smc_cdc_tx_dismiss_slots(conn);
661 662
		if (current_work() != &conn->abort_work)
			cancel_work_sync(&conn->abort_work);
663
	}
664 665 666 667
	if (!list_empty(&lgr->list)) {
		smc_lgr_unregister_conn(conn);
		smc_buf_unuse(conn, lgr); /* allow buffer reuse */
	}
668 669 670

	if (!lgr->conns_num)
		smc_lgr_schedule_free_work(lgr);
671 672
}

673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
/* unregister a link from a buf_desc */
static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
				struct smc_link *lnk)
{
	if (is_rmb)
		buf_desc->is_reg_mr[lnk->link_idx] = false;
	if (!buf_desc->is_map_ib[lnk->link_idx])
		return;
	if (is_rmb) {
		if (buf_desc->mr_rx[lnk->link_idx]) {
			smc_ib_put_memory_region(
					buf_desc->mr_rx[lnk->link_idx]);
			buf_desc->mr_rx[lnk->link_idx] = NULL;
		}
		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
	} else {
		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
	}
	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
	buf_desc->is_map_ib[lnk->link_idx] = false;
}

/* unmap all buffers of lgr for a deleted link */
static void smcr_buf_unmap_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_buf_desc *buf_desc, *bf;
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		mutex_lock(&lgr->rmbs_lock);
		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
			smcr_buf_unmap_link(buf_desc, true, lnk);
		mutex_unlock(&lgr->rmbs_lock);
		mutex_lock(&lgr->sndbufs_lock);
		list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
					 list)
			smcr_buf_unmap_link(buf_desc, false, lnk);
		mutex_unlock(&lgr->sndbufs_lock);
	}
}

static void smcr_rtoken_clear_link(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		lgr->rtokens[i][lnk->link_idx].rkey = 0;
		lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
	}
}

726
/* must be called under lgr->llc_conf_mutex lock */
727
void smcr_link_clear(struct smc_link *lnk, bool log)
728
{
729 730
	struct smc_ib_device *smcibdev;

731
	if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
732
		return;
733
	lnk->peer_qpn = 0;
734
	smc_llc_link_clear(lnk, log);
735 736
	smcr_buf_unmap_lgr(lnk);
	smcr_rtoken_clear_link(lnk);
737
	smc_ib_modify_qp_reset(lnk);
738
	smc_wr_free_link(lnk);
739 740
	smc_ib_destroy_queue_pair(lnk);
	smc_ib_dealloc_protection_domain(lnk);
741
	smc_wr_free_link_mem(lnk);
742
	put_device(&lnk->smcibdev->ibdev->dev);
743 744 745 746 747
	smcibdev = lnk->smcibdev;
	memset(lnk, 0, sizeof(struct smc_link));
	lnk->state = SMC_LNK_UNUSED;
	if (!atomic_dec_return(&smcibdev->lnk_cnt))
		wake_up(&smcibdev->lnks_deleted);
748 749
}

750 751
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
			  struct smc_buf_desc *buf_desc)
U
Ursula Braun 已提交
752
{
753
	int i;
754

755 756
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
		smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
757

758 759
	if (buf_desc->pages)
		__free_pages(buf_desc->pages, buf_desc->order);
760
	kfree(buf_desc);
U
Ursula Braun 已提交
761 762
}

763 764 765
static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
			  struct smc_buf_desc *buf_desc)
{
766 767 768
	if (is_dmb) {
		/* restore original buf len */
		buf_desc->len += sizeof(struct smcd_cdc_msg);
769
		smc_ism_unregister_dmb(lgr->smcd, buf_desc);
770
	} else {
771
		kfree(buf_desc->cpu_addr);
772
	}
773 774 775 776 777 778 779 780 781 782 783 784
	kfree(buf_desc);
}

static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
			 struct smc_buf_desc *buf_desc)
{
	if (lgr->is_smcd)
		smcd_buf_free(lgr, is_rmb, buf_desc);
	else
		smcr_buf_free(lgr, is_rmb, buf_desc);
}

785
static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
U
Ursula Braun 已提交
786
{
787 788
	struct smc_buf_desc *buf_desc, *bf_desc;
	struct list_head *buf_list;
U
Ursula Braun 已提交
789 790 791
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
792 793 794 795 796
		if (is_rmb)
			buf_list = &lgr->rmbs[i];
		else
			buf_list = &lgr->sndbufs[i];
		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
U
Ursula Braun 已提交
797
					 list) {
798
			list_del(&buf_desc->list);
799
			smc_buf_free(lgr, is_rmb, buf_desc);
U
Ursula Braun 已提交
800 801 802 803
		}
	}
}

804 805 806 807 808 809 810 811
static void smc_lgr_free_bufs(struct smc_link_group *lgr)
{
	/* free send buffers */
	__smc_lgr_free_bufs(lgr, false);
	/* free rmbs */
	__smc_lgr_free_bufs(lgr, true);
}

812
/* remove a link group */
U
Ursula Braun 已提交
813
static void smc_lgr_free(struct smc_link_group *lgr)
814
{
815 816
	int i;

817 818 819 820
	if (!lgr->is_smcd) {
		mutex_lock(&lgr->llc_conf_mutex);
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
821
				smcr_link_clear(&lgr->lnk[i], false);
822 823 824 825 826
		}
		mutex_unlock(&lgr->llc_conf_mutex);
		smc_llc_lgr_clear(lgr);
	}

827
	smc_lgr_free_bufs(lgr);
828
	if (lgr->is_smcd) {
829 830 831 832
		if (!lgr->terminating) {
			smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
			put_device(&lgr->smcd->dev);
		}
833 834
		if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
			wake_up(&lgr->smcd->lgrs_deleted);
835
	} else {
836 837
		if (!atomic_dec_return(&lgr_cnt))
			wake_up(&lgrs_deleted);
838
	}
839 840 841
	kfree(lgr);
}

842 843 844 845 846 847 848 849 850 851 852 853 854 855
static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
{
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		struct smc_buf_desc *buf_desc;

		list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
			buf_desc->len += sizeof(struct smcd_cdc_msg);
			smc_ism_unregister_dmb(lgr->smcd, buf_desc);
		}
	}
}

856 857 858 859 860 861 862 863
static void smc_sk_wake_ups(struct smc_sock *smc)
{
	smc->sk.sk_write_space(&smc->sk);
	smc->sk.sk_data_ready(&smc->sk);
	smc->sk.sk_state_change(&smc->sk);
}

/* kill a connection */
864
static void smc_conn_kill(struct smc_connection *conn, bool soft)
865 866 867
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);

868 869 870 871
	if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
	else
		smc_close_abort(conn);
872
	conn->killed = 1;
873
	smc->sk.sk_err = ECONNABORTED;
874
	smc_sk_wake_ups(smc);
875 876
	if (conn->lgr->is_smcd) {
		smc_ism_unset_conn(conn);
877 878 879 880
		if (soft)
			tasklet_kill(&conn->rx_tsklet);
		else
			tasklet_unlock_wait(&conn->rx_tsklet);
881 882
	} else {
		smc_cdc_tx_dismiss_slots(conn);
883
	}
884
	smc_lgr_unregister_conn(conn);
U
Ursula Braun 已提交
885
	smc_close_active_abort(smc);
886 887
}

888 889 890 891 892 893 894 895
static void smc_lgr_cleanup(struct smc_link_group *lgr)
{
	if (lgr->is_smcd) {
		smc_ism_signal_shutdown(lgr);
		smcd_unregister_all_dmbs(lgr);
		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
		put_device(&lgr->smcd->dev);
	} else {
896 897 898 899 900
		u32 rsn = lgr->llc_termination_rsn;

		if (!rsn)
			rsn = SMC_LLC_DEL_PROG_INIT_TERM;
		smc_llc_send_link_delete_all(lgr, false, rsn);
901
		smcr_lgr_link_deactivate_all(lgr);
902 903 904
	}
}

905 906 907 908
/* terminate link group
 * @soft: true if link group shutdown can take its time
 *	  false if immediate link group shutdown is required
 */
909
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
910 911
{
	struct smc_connection *conn;
912
	struct smc_sock *smc;
913 914
	struct rb_node *node;

915 916
	if (lgr->terminating)
		return;	/* lgr already terminating */
917 918
	/* cancel free_work sync, will terminate when lgr->freeing is set */
	cancel_delayed_work_sync(&lgr->free_work);
919
	lgr->terminating = 1;
920

921 922
	/* kill remaining link group connections */
	read_lock_bh(&lgr->conns_lock);
923 924
	node = rb_first(&lgr->conns_all);
	while (node) {
925
		read_unlock_bh(&lgr->conns_lock);
926
		conn = rb_entry(node, struct smc_connection, alert_node);
927
		smc = container_of(conn, struct smc_sock, conn);
U
Ursula Braun 已提交
928
		sock_hold(&smc->sk); /* sock_put below */
929
		lock_sock(&smc->sk);
930
		smc_conn_kill(conn, soft);
931
		release_sock(&smc->sk);
U
Ursula Braun 已提交
932
		sock_put(&smc->sk); /* sock_hold above */
933
		read_lock_bh(&lgr->conns_lock);
934 935
		node = rb_first(&lgr->conns_all);
	}
936
	read_unlock_bh(&lgr->conns_lock);
937
	smc_lgr_cleanup(lgr);
938
	smc_lgr_free(lgr);
939 940
}

941 942
/* unlink link group and schedule termination */
void smc_lgr_terminate_sched(struct smc_link_group *lgr)
943
{
944 945 946 947
	spinlock_t *lgr_lock;

	smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
948
	if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
949 950 951 952
		spin_unlock_bh(lgr_lock);
		return;	/* lgr already terminating */
	}
	list_del_init(&lgr->list);
953
	lgr->freeing = 1;
954
	spin_unlock_bh(lgr_lock);
955
	schedule_work(&lgr->terminate_work);
956 957
}

958
/* Called when peer lgr shutdown (regularly or abnormally) is received */
H
Hans Wippel 已提交
959
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
960 961 962 963 964
{
	struct smc_link_group *lgr, *l;
	LIST_HEAD(lgr_free_list);

	/* run common cleanup function and build free list */
965
	spin_lock_bh(&dev->lgr_lock);
966 967
	list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
		if ((!peer_gid || lgr->peer_gid == peer_gid) &&
H
Hans Wippel 已提交
968
		    (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
969 970
			if (peer_gid) /* peer triggered termination */
				lgr->peer_shutdown = 1;
971
			list_move(&lgr->list, &lgr_free_list);
972
			lgr->freeing = 1;
973 974
		}
	}
975
	spin_unlock_bh(&dev->lgr_lock);
976 977 978 979

	/* cancel the regular free workers and actually free lgrs */
	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
		list_del_init(&lgr->list);
980
		schedule_work(&lgr->terminate_work);
981 982 983
	}
}

984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999
/* Called when an SMCD device is removed or the smc module is unloaded */
void smc_smcd_terminate_all(struct smcd_dev *smcd)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_free_list);

	spin_lock_bh(&smcd->lgr_lock);
	list_splice_init(&smcd->lgr_list, &lgr_free_list);
	list_for_each_entry(lgr, &lgr_free_list, list)
		lgr->freeing = 1;
	spin_unlock_bh(&smcd->lgr_lock);

	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
		list_del_init(&lgr->list);
		__smc_lgr_terminate(lgr, false);
	}
1000 1001 1002

	if (atomic_read(&smcd->lgr_cnt))
		wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1003 1004
}

1005 1006 1007 1008 1009 1010 1011 1012
/* Called when an SMCR device is removed or the smc module is unloaded.
 * If smcibdev is given, all SMCR link groups using this device are terminated.
 * If smcibdev is NULL, all SMCR link groups are terminated.
 */
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_free_list);
1013
	int i;
1014 1015 1016 1017 1018 1019 1020 1021

	spin_lock_bh(&smc_lgr_list.lock);
	if (!smcibdev) {
		list_splice_init(&smc_lgr_list.list, &lgr_free_list);
		list_for_each_entry(lgr, &lgr_free_list, list)
			lgr->freeing = 1;
	} else {
		list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1022
			for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1023 1024
				if (lgr->lnk[i].smcibdev == smcibdev)
					smcr_link_down_cond_sched(&lgr->lnk[i]);
1025 1026 1027 1028 1029 1030 1031
			}
		}
	}
	spin_unlock_bh(&smc_lgr_list.lock);

	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
		list_del_init(&lgr->list);
1032
		smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1033 1034
		__smc_lgr_terminate(lgr, false);
	}
1035 1036 1037 1038 1039 1040 1041 1042 1043

	if (smcibdev) {
		if (atomic_read(&smcibdev->lnk_cnt))
			wait_event(smcibdev->lnks_deleted,
				   !atomic_read(&smcibdev->lnk_cnt));
	} else {
		if (atomic_read(&lgr_cnt))
			wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
	}
1044 1045
}

K
Karsten Graul 已提交
1046 1047 1048
/* set new lgr type and clear all asymmetric link tagging */
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
{
1049
	char *lgr_type = "";
K
Karsten Graul 已提交
1050 1051 1052 1053 1054
	int i;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
		if (smc_link_usable(&lgr->lnk[i]))
			lgr->lnk[i].link_is_asym = false;
1055 1056
	if (lgr->type == new_type)
		return;
K
Karsten Graul 已提交
1057
	lgr->type = new_type;
1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078

	switch (lgr->type) {
	case SMC_LGR_NONE:
		lgr_type = "NONE";
		break;
	case SMC_LGR_SINGLE:
		lgr_type = "SINGLE";
		break;
	case SMC_LGR_SYMMETRIC:
		lgr_type = "SYMMETRIC";
		break;
	case SMC_LGR_ASYMMETRIC_PEER:
		lgr_type = "ASYMMETRIC_PEER";
		break;
	case SMC_LGR_ASYMMETRIC_LOCAL:
		lgr_type = "ASYMMETRIC_LOCAL";
		break;
	}
	pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
			    "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
			    lgr_type, lgr->pnet_id);
K
Karsten Graul 已提交
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
}

/* set new lgr type and tag a link as asymmetric */
void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
			    enum smc_lgr_type new_type, int asym_lnk_idx)
{
	smcr_lgr_set_type(lgr, new_type);
	lgr->lnk[asym_lnk_idx].link_is_asym = true;
}

1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
/* abort connection, abort_work scheduled from tasklet context */
static void smc_conn_abort_work(struct work_struct *work)
{
	struct smc_connection *conn = container_of(work,
						   struct smc_connection,
						   abort_work);
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);

	smc_conn_kill(conn, true);
	sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
}

1101 1102 1103 1104 1105
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
	struct smc_link_group *lgr, *n;

	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1106 1107
		struct smc_link *link;

1108 1109 1110 1111 1112
		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
			    SMC_MAX_PNETID_LEN) ||
		    lgr->type == SMC_LGR_SYMMETRIC ||
		    lgr->type == SMC_LGR_ASYMMETRIC_PEER)
			continue;
1113 1114 1115 1116 1117

		/* trigger local add link processing */
		link = smc_llc_usable_link(lgr);
		if (link)
			smc_llc_add_link_local(link);
1118 1119 1120
	}
}

1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
/* link is down - switch connections to alternate link,
 * must be called under lgr->llc_conf_mutex lock
 */
static void smcr_link_down(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_link *to_lnk;
	int del_link_id;

	if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
		return;

	smc_ib_modify_qp_reset(lnk);
1134
	to_lnk = smc_switch_conns(lgr, lnk, true);
1135
	if (!to_lnk) { /* no backup link available */
1136
		smcr_link_clear(lnk, true);
1137 1138
		return;
	}
K
Karsten Graul 已提交
1139
	smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1140 1141 1142 1143
	del_link_id = lnk->link_id;

	if (lgr->role == SMC_SERV) {
		/* trigger local delete link processing */
1144
		smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1145 1146 1147 1148
	} else {
		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
			/* another llc task is ongoing */
			mutex_unlock(&lgr->llc_conf_mutex);
1149 1150 1151
			wait_event_timeout(lgr->llc_flow_waiter,
				(list_empty(&lgr->list) ||
				 lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1152 1153 1154
				SMC_LLC_WAIT_TIME);
			mutex_lock(&lgr->llc_conf_mutex);
		}
1155
		if (!list_empty(&lgr->list)) {
1156 1157 1158
			smc_llc_send_delete_link(to_lnk, del_link_id,
						 SMC_LLC_REQ, true,
						 SMC_LLC_DEL_LOST_PATH);
1159 1160
			smcr_link_clear(lnk, true);
		}
1161
		wake_up(&lgr->llc_flow_waiter);	/* wake up next waiter */
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
	}
}

/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_down_cond(struct smc_link *lnk)
{
	if (smc_link_downing(&lnk->state))
		smcr_link_down(lnk);
}

/* will get the lgr->llc_conf_mutex lock */
void smcr_link_down_cond_sched(struct smc_link *lnk)
{
	if (smc_link_downing(&lnk->state))
		schedule_work(&lnk->link_down_wrk);
}

void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
{
	struct smc_link_group *lgr, *n;
	int i;

	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
			    SMC_MAX_PNETID_LEN))
			continue; /* lgr is not affected */
		if (list_empty(&lgr->list))
			continue;
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			struct smc_link *lnk = &lgr->lnk[i];

			if (smc_link_usable(lnk) &&
			    lnk->smcibdev == smcibdev && lnk->ibport == ibport)
				smcr_link_down_cond_sched(lnk);
		}
	}
}

static void smc_link_down_work(struct work_struct *work)
{
	struct smc_link *link = container_of(work, struct smc_link,
					     link_down_wrk);
	struct smc_link_group *lgr = link->lgr;

	if (list_empty(&lgr->list))
		return;
1208
	wake_up_all(&lgr->llc_msg_waiter);
1209 1210 1211 1212 1213
	mutex_lock(&lgr->llc_conf_mutex);
	smcr_link_down(link);
	mutex_unlock(&lgr->llc_conf_mutex);
}

1214 1215 1216
/* Determine vlan of internal TCP socket.
 * @vlan_id: address to store the determined vlan id into
 */
1217
int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1218 1219
{
	struct dst_entry *dst = sk_dst_get(clcsock->sk);
1220 1221
	struct net_device *ndev;
	int i, nest_lvl, rc = 0;
1222

1223
	ini->vlan_id = 0;
1224 1225 1226 1227 1228 1229 1230 1231 1232
	if (!dst) {
		rc = -ENOTCONN;
		goto out;
	}
	if (!dst->dev) {
		rc = -ENODEV;
		goto out_rel;
	}

1233 1234
	ndev = dst->dev;
	if (is_vlan_dev(ndev)) {
1235
		ini->vlan_id = vlan_dev_vlan_id(ndev);
1236 1237 1238 1239
		goto out_rel;
	}

	rtnl_lock();
1240
	nest_lvl = ndev->lower_level;
1241 1242 1243 1244 1245 1246 1247 1248
	for (i = 0; i < nest_lvl; i++) {
		struct list_head *lower = &ndev->adj_list.lower;

		if (list_empty(lower))
			break;
		lower = lower->next;
		ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
		if (is_vlan_dev(ndev)) {
1249
			ini->vlan_id = vlan_dev_vlan_id(ndev);
1250 1251 1252 1253
			break;
		}
	}
	rtnl_unlock();
1254 1255 1256 1257 1258 1259 1260

out_rel:
	dst_release(dst);
out:
	return rc;
}

1261 1262
static bool smcr_lgr_match(struct smc_link_group *lgr,
			   struct smc_clc_msg_local *lcl,
1263
			   enum smc_lgr_role role, u32 clcqpn)
1264
{
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
	int i;

	if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
	    lgr->role != role)
		return false;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
			continue;
		if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
		    !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
		    !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
			return true;
	}
	return false;
1280
}
1281

1282 1283 1284 1285
static bool smcd_lgr_match(struct smc_link_group *lgr,
			   struct smcd_dev *smcismdev, u64 peer_gid)
{
	return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1286 1287 1288
}

/* create a new SMC connection (and a new link group if necessary) */
1289
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1290 1291
{
	struct smc_connection *conn = &smc->conn;
1292
	struct list_head *lgr_list;
1293 1294
	struct smc_link_group *lgr;
	enum smc_lgr_role role;
1295
	spinlock_t *lgr_lock;
1296 1297
	int rc = 0;

1298
	lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
1299
	lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
1300
	ini->cln_first_contact = SMC_FIRST_CONTACT;
1301
	role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1302
	if (role == SMC_CLNT && ini->srv_first_contact)
1303 1304 1305 1306
		/* create new link group as well */
		goto create;

	/* determine if an existing link group can be reused */
1307
	spin_lock_bh(lgr_lock);
1308
	list_for_each_entry(lgr, lgr_list, list) {
1309
		write_lock_bh(&lgr->conns_lock);
1310 1311 1312
		if ((ini->is_smcd ?
		     smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
		     smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1313
		    !lgr->sync_err &&
1314
		    lgr->vlan_id == ini->vlan_id &&
1315 1316
		    (role == SMC_CLNT ||
		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1317
			/* link group found */
1318
			ini->cln_first_contact = SMC_REUSE_CONTACT;
1319
			conn->lgr = lgr;
1320
			rc = smc_lgr_register_conn(conn, false);
1321
			write_unlock_bh(&lgr->conns_lock);
1322 1323
			if (!rc && delayed_work_pending(&lgr->free_work))
				cancel_delayed_work(&lgr->free_work);
1324 1325 1326 1327
			break;
		}
		write_unlock_bh(&lgr->conns_lock);
	}
1328
	spin_unlock_bh(lgr_lock);
1329 1330
	if (rc)
		return rc;
1331

1332
	if (role == SMC_CLNT && !ini->srv_first_contact &&
1333
	    ini->cln_first_contact == SMC_FIRST_CONTACT) {
1334 1335 1336 1337
		/* Server reuses a link group, but Client wants to start
		 * a new one
		 * send out_of_sync decline, reason synchr. error
		 */
1338
		return SMC_CLC_DECL_SYNCERR;
1339 1340 1341
	}

create:
1342
	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1343
		rc = smc_lgr_create(smc, ini);
1344 1345
		if (rc)
			goto out;
1346 1347
		lgr = conn->lgr;
		write_lock_bh(&lgr->conns_lock);
1348
		rc = smc_lgr_register_conn(conn, true);
1349
		write_unlock_bh(&lgr->conns_lock);
1350 1351
		if (rc)
			goto out;
1352
	}
1353
	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1354
	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
S
Stefan Raspl 已提交
1355
	conn->urg_state = SMC_URG_READ;
1356
	INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1357
	if (ini->is_smcd) {
1358 1359 1360
		conn->rx_off = sizeof(struct smcd_cdc_msg);
		smcd_cdc_rx_init(conn); /* init tasklet for this conn */
	}
1361 1362 1363
#ifndef KERNEL_HAS_ATOMIC64
	spin_lock_init(&conn->acurs_lock);
#endif
1364 1365

out:
1366
	return rc;
1367
}
U
Ursula Braun 已提交
1368

1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395
/* convert the RMB size into the compressed notation - minimum 16K.
 * In contrast to plain ilog2, this rounds towards the next power of 2,
 * so the socket application gets at least its desired sndbuf / rcvbuf size.
 */
static u8 smc_compress_bufsize(int size)
{
	u8 compressed;

	if (size <= SMC_BUF_MIN_SIZE)
		return 0;

	size = (size - 1) >> 14;
	compressed = ilog2(size) + 1;
	if (compressed >= SMC_RMBE_SIZES)
		compressed = SMC_RMBE_SIZES - 1;
	return compressed;
}

/* convert the RMB size from compressed notation into integer */
int smc_uncompress_bufsize(u8 compressed)
{
	u32 size;

	size = 0x00000001 << (((int)compressed) + 14);
	return (int)size;
}

1396 1397
/* try to reuse a sndbuf or rmb description slot for a certain
 * buffer size; if not available, return NULL
U
Ursula Braun 已提交
1398
 */
1399
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1400
					     struct mutex *lock,
1401
					     struct list_head *buf_list)
U
Ursula Braun 已提交
1402
{
1403
	struct smc_buf_desc *buf_slot;
U
Ursula Braun 已提交
1404

1405
	mutex_lock(lock);
1406 1407
	list_for_each_entry(buf_slot, buf_list, list) {
		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1408
			mutex_unlock(lock);
1409
			return buf_slot;
U
Ursula Braun 已提交
1410 1411
		}
	}
1412
	mutex_unlock(lock);
U
Ursula Braun 已提交
1413 1414 1415
	return NULL;
}

U
Ursula Braun 已提交
1416 1417 1418 1419 1420 1421 1422 1423 1424
/* one of the conditions for announcing a receiver's current window size is
 * that it "results in a minimum increase in the window size of 10% of the
 * receive buffer space" [RFC7609]
 */
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}

1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
/* map an rmb buf to a link */
static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
			     struct smc_link *lnk)
{
	int rc;

	if (buf_desc->is_map_ib[lnk->link_idx])
		return 0;

	rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
	if (rc)
		return rc;
	sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
		   buf_desc->cpu_addr, buf_desc->len);

	/* map sg table to DMA address */
	rc = smc_ib_buf_map_sg(lnk, buf_desc,
			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
	/* SMC protocol depends on mapping to one DMA address only */
	if (rc != 1) {
		rc = -EAGAIN;
		goto free_table;
	}

	/* create a new memory region for the RMB */
	if (is_rmb) {
		rc = smc_ib_get_memory_region(lnk->roce_pd,
					      IB_ACCESS_REMOTE_WRITE |
					      IB_ACCESS_LOCAL_WRITE,
					      buf_desc, lnk->link_idx);
		if (rc)
			goto buf_unmap;
		smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
	}
	buf_desc->is_map_ib[lnk->link_idx] = true;
	return 0;

buf_unmap:
	smc_ib_buf_unmap_sg(lnk, buf_desc,
			    is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
free_table:
	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
	return rc;
}

1470 1471 1472
/* register a new rmb on IB device,
 * must be called under lgr->llc_conf_mutex lock
 */
1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
{
	if (list_empty(&link->lgr->list))
		return -ENOLINK;
	if (!rmb_desc->is_reg_mr[link->link_idx]) {
		/* register memory region for new rmb */
		if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
			rmb_desc->is_reg_err = true;
			return -EFAULT;
		}
		rmb_desc->is_reg_mr[link->link_idx] = true;
	}
	return 0;
}

1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525
static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
			     struct list_head *lst, bool is_rmb)
{
	struct smc_buf_desc *buf_desc, *bf;
	int rc = 0;

	mutex_lock(lock);
	list_for_each_entry_safe(buf_desc, bf, lst, list) {
		if (!buf_desc->used)
			continue;
		rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
		if (rc)
			goto out;
	}
out:
	mutex_unlock(lock);
	return rc;
}

/* map all used buffers of lgr for a new link */
int smcr_buf_map_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	int i, rc = 0;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
				       &lgr->rmbs[i], true);
		if (rc)
			return rc;
		rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
				       &lgr->sndbufs[i], false);
		if (rc)
			return rc;
	}
	return 0;
}

1526 1527 1528
/* register all used buffers of lgr for a new link,
 * must be called under lgr->llc_conf_mutex lock
 */
1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549
int smcr_buf_reg_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_buf_desc *buf_desc, *bf;
	int i, rc = 0;

	mutex_lock(&lgr->rmbs_lock);
	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
			if (!buf_desc->used)
				continue;
			rc = smcr_link_reg_rmb(lnk, buf_desc);
			if (rc)
				goto out;
		}
	}
out:
	mutex_unlock(&lgr->rmbs_lock);
	return rc;
}

1550 1551
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
						bool is_rmb, int bufsize)
1552 1553 1554 1555 1556 1557 1558 1559
{
	struct smc_buf_desc *buf_desc;

	/* try to alloc a new buffer */
	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
	if (!buf_desc)
		return ERR_PTR(-ENOMEM);

1560 1561 1562 1563 1564 1565
	buf_desc->order = get_order(bufsize);
	buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
				      __GFP_NOMEMALLOC | __GFP_COMP |
				      __GFP_NORETRY | __GFP_ZERO,
				      buf_desc->order);
	if (!buf_desc->pages) {
1566 1567 1568
		kfree(buf_desc);
		return ERR_PTR(-EAGAIN);
	}
1569
	buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1570 1571 1572
	buf_desc->len = bufsize;
	return buf_desc;
}
1573

1574 1575 1576 1577 1578 1579 1580
/* map buf_desc on all usable links,
 * unused buffers stay mapped as long as the link is up
 */
static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
				     struct smc_buf_desc *buf_desc, bool is_rmb)
{
	int i, rc = 0;
1581

1582 1583
	/* protect against parallel link reconfiguration */
	mutex_lock(&lgr->llc_conf_mutex);
1584 1585
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &lgr->lnk[i];
1586

1587
		if (!smc_link_usable(lnk))
1588 1589 1590 1591
			continue;
		if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
			rc = -ENOMEM;
			goto out;
1592 1593
		}
	}
1594
out:
1595
	mutex_unlock(&lgr->llc_conf_mutex);
1596
	return rc;
1597 1598
}

1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619
#define SMCD_DMBE_SIZES		7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */

static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
						bool is_dmb, int bufsize)
{
	struct smc_buf_desc *buf_desc;
	int rc;

	if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
		return ERR_PTR(-EAGAIN);

	/* try to alloc a new DMB */
	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
	if (!buf_desc)
		return ERR_PTR(-ENOMEM);
	if (is_dmb) {
		rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
		if (rc) {
			kfree(buf_desc);
			return ERR_PTR(-EAGAIN);
		}
1620 1621 1622
		buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
		/* CDC header stored in buf. So, pretend it was smaller */
		buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636
	} else {
		buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
					     __GFP_NOWARN | __GFP_NORETRY |
					     __GFP_NOMEMALLOC);
		if (!buf_desc->cpu_addr) {
			kfree(buf_desc);
			return ERR_PTR(-EAGAIN);
		}
		buf_desc->len = bufsize;
	}
	return buf_desc;
}

static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
U
Ursula Braun 已提交
1637
{
1638
	struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
U
Ursula Braun 已提交
1639 1640
	struct smc_connection *conn = &smc->conn;
	struct smc_link_group *lgr = conn->lgr;
1641
	struct list_head *buf_list;
1642
	int bufsize, bufsize_short;
1643
	struct mutex *lock;	/* lock buffer list */
1644
	int sk_buf_size;
U
Ursula Braun 已提交
1645

1646 1647 1648 1649 1650 1651 1652
	if (is_rmb)
		/* use socket recv buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_rcvbuf / 2;
	else
		/* use socket send buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_sndbuf / 2;

1653
	for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1654
	     bufsize_short >= 0; bufsize_short--) {
1655

1656 1657 1658 1659 1660 1661
		if (is_rmb) {
			lock = &lgr->rmbs_lock;
			buf_list = &lgr->rmbs[bufsize_short];
		} else {
			lock = &lgr->sndbufs_lock;
			buf_list = &lgr->sndbufs[bufsize_short];
1662
		}
1663
		bufsize = smc_uncompress_bufsize(bufsize_short);
1664 1665 1666
		if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
			continue;

1667
		/* check for reusable slot in the link group */
1668
		buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1669 1670
		if (buf_desc) {
			memset(buf_desc->cpu_addr, 0, bufsize);
U
Ursula Braun 已提交
1671 1672
			break; /* found reusable slot */
		}
1673

1674 1675 1676 1677 1678
		if (is_smcd)
			buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
		else
			buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);

1679 1680 1681
		if (PTR_ERR(buf_desc) == -ENOMEM)
			break;
		if (IS_ERR(buf_desc))
1682
			continue;
1683

1684
		buf_desc->used = 1;
1685
		mutex_lock(lock);
1686
		list_add(&buf_desc->list, buf_list);
1687
		mutex_unlock(lock);
1688
		break; /* found */
U
Ursula Braun 已提交
1689
	}
1690

1691
	if (IS_ERR(buf_desc))
1692 1693
		return -ENOMEM;

1694 1695
	if (!is_smcd) {
		if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1696
			smcr_buf_unuse(buf_desc, lgr);
1697 1698 1699 1700
			return -ENOMEM;
		}
	}

1701 1702
	if (is_rmb) {
		conn->rmb_desc = buf_desc;
1703 1704
		conn->rmbe_size_short = bufsize_short;
		smc->sk.sk_rcvbuf = bufsize * 2;
1705
		atomic_set(&conn->bytes_to_rcv, 0);
1706 1707
		conn->rmbe_update_limit =
			smc_rmb_wnd_update_limit(buf_desc->len);
1708 1709
		if (is_smcd)
			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
U
Ursula Braun 已提交
1710
	} else {
1711 1712 1713
		conn->sndbuf_desc = buf_desc;
		smc->sk.sk_sndbuf = bufsize * 2;
		atomic_set(&conn->sndbuf_space, bufsize);
U
Ursula Braun 已提交
1714
	}
1715 1716 1717
	return 0;
}

1718 1719
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
1720
	if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1721
		return;
1722
	smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1723 1724 1725 1726
}

void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
1727
	if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1728
		return;
1729
	smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1730 1731 1732 1733
}

void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
1734
	int i;
1735

1736 1737
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
1738
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1739
		if (!smc_link_usable(&conn->lgr->lnk[i]))
1740 1741 1742 1743
			continue;
		smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
				       DMA_FROM_DEVICE);
	}
1744 1745 1746 1747
}

void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
1748
	int i;
1749

1750 1751
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
1752
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1753
		if (!smc_link_usable(&conn->lgr->lnk[i]))
1754 1755 1756 1757
			continue;
		smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
					  DMA_FROM_DEVICE);
	}
1758 1759
}

1760 1761 1762 1763 1764 1765
/* create the send and receive buffer for an SMC socket;
 * receive buffers are called RMBs;
 * (even though the SMC protocol allows more than one RMB-element per RMB,
 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
 * extra RMB for every connection in a link group
 */
1766
int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1767 1768 1769 1770
{
	int rc;

	/* create send buffer */
1771
	rc = __smc_buf_create(smc, is_smcd, false);
1772 1773 1774
	if (rc)
		return rc;
	/* create rmb */
1775
	rc = __smc_buf_create(smc, is_smcd, true);
1776
	if (rc)
1777
		smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1778
	return rc;
U
Ursula Braun 已提交
1779
}
1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791

static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
	int i;

	for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
		if (!test_and_set_bit(i, lgr->rtokens_used_mask))
			return i;
	}
	return -ENOSPC;
}

1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838
static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
				   u32 rkey)
{
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		if (test_bit(i, lgr->rtokens_used_mask) &&
		    lgr->rtokens[i][lnk_idx].rkey == rkey)
			return i;
	}
	return -ENOENT;
}

/* set rtoken for a new link to an existing rmb */
void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
		    __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
{
	int rtok_idx;

	rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
	if (rtok_idx == -ENOENT)
		return;
	lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
	lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
}

/* set rtoken for a new link whose link_id is given */
void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
		     __be64 nw_vaddr, __be32 nw_rkey)
{
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
	bool found = false;
	int link_idx;

	for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
		if (lgr->lnk[link_idx].link_id == link_id) {
			found = true;
			break;
		}
	}
	if (!found)
		return;
	lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
	lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
}

1839
/* add a new rtoken from peer */
1840
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1841
{
1842
	struct smc_link_group *lgr = smc_get_lgr(lnk);
1843 1844
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
1845 1846 1847
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1848 1849
		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
		    lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1850
		    test_bit(i, lgr->rtokens_used_mask)) {
1851 1852 1853 1854 1855 1856 1857
			/* already in list */
			return i;
		}
	}
	i = smc_rmb_reserve_rtoken_idx(lgr);
	if (i < 0)
		return i;
1858 1859
	lgr->rtokens[i][lnk->link_idx].rkey = rkey;
	lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1860 1861 1862
	return i;
}

1863
/* delete an rtoken from all links */
1864
int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1865
{
1866
	struct smc_link_group *lgr = smc_get_lgr(lnk);
1867
	u32 rkey = ntohl(nw_rkey);
1868
	int i, j;
1869 1870

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1871
		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1872
		    test_bit(i, lgr->rtokens_used_mask)) {
1873 1874 1875 1876
			for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
				lgr->rtokens[i][j].rkey = 0;
				lgr->rtokens[i][j].dma_addr = 0;
			}
1877
			clear_bit(i, lgr->rtokens_used_mask);
1878 1879 1880
			return 0;
		}
	}
1881 1882 1883 1884 1885
	return -ENOENT;
}

/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
1886
			    struct smc_link *lnk,
1887 1888
			    struct smc_clc_msg_accept_confirm *clc)
{
1889
	conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1890
					  clc->rmb_rkey);
1891 1892 1893 1894
	if (conn->rtoken_idx < 0)
		return conn->rtoken_idx;
	return 0;
}
1895

1896 1897 1898 1899 1900
static void smc_core_going_away(void)
{
	struct smc_ib_device *smcibdev;
	struct smcd_dev *smcd;

1901
	mutex_lock(&smc_ib_devices.mutex);
1902 1903 1904 1905 1906 1907
	list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
		int i;

		for (i = 0; i < SMC_MAX_PORTS; i++)
			set_bit(i, smcibdev->ports_going_away);
	}
1908
	mutex_unlock(&smc_ib_devices.mutex);
1909

1910
	mutex_lock(&smcd_dev_list.mutex);
1911 1912 1913
	list_for_each_entry(smcd, &smcd_dev_list.list, list) {
		smcd->going_away = 1;
	}
1914
	mutex_unlock(&smcd_dev_list.mutex);
1915 1916
}

1917 1918
/* Clean up all SMC link groups */
static void smc_lgrs_shutdown(void)
1919
{
1920
	struct smcd_dev *smcd;
1921

1922 1923
	smc_core_going_away();

1924
	smc_smcr_terminate_all(NULL);
1925

1926
	mutex_lock(&smcd_dev_list.mutex);
1927
	list_for_each_entry(smcd, &smcd_dev_list.list, list)
1928
		smc_smcd_terminate_all(smcd);
1929
	mutex_unlock(&smcd_dev_list.mutex);
1930
}
1931

1932 1933 1934 1935
static int smc_core_reboot_event(struct notifier_block *this,
				 unsigned long event, void *ptr)
{
	smc_lgrs_shutdown();
1936
	smc_ib_unregister_client();
1937 1938 1939 1940 1941 1942 1943
	return 0;
}

static struct notifier_block smc_reboot_notifier = {
	.notifier_call = smc_core_reboot_event,
};

1944 1945
int __init smc_core_init(void)
{
1946
	return register_reboot_notifier(&smc_reboot_notifier);
1947 1948
}

1949 1950 1951
/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
1952
	unregister_reboot_notifier(&smc_reboot_notifier);
1953 1954
	smc_lgrs_shutdown();
}