smc_core.c 51.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  Basic Transport Functions exploiting Infiniband API
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/socket.h>
#include <linux/if_vlan.h>
#include <linux/random.h>
#include <linux/workqueue.h>
16
#include <linux/wait.h>
17
#include <linux/reboot.h>
18 19 20
#include <net/tcp.h>
#include <net/sock.h>
#include <rdma/ib_verbs.h>
21
#include <rdma/ib_cache.h>
22 23 24 25 26

#include "smc.h"
#include "smc_clc.h"
#include "smc_core.h"
#include "smc_ib.h"
27
#include "smc_wr.h"
U
Ursula Braun 已提交
28
#include "smc_llc.h"
29
#include "smc_cdc.h"
30
#include "smc_close.h"
31
#include "smc_ism.h"
32

33 34
#define SMC_LGR_NUM_INCR		256
#define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
35
#define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36
#define SMC_LGR_FREE_DELAY_FAST		(8 * HZ)
37

38 39 40 41 42
static struct smc_lgr_list smc_lgr_list = {	/* established link groups */
	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
	.list = LIST_HEAD_INIT(smc_lgr_list.list),
	.num = 0,
};
U
Ursula Braun 已提交
43

44
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 46
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);

47 48 49 50 51 52 53
struct smc_ib_up_work {
	struct work_struct	work;
	struct smc_link_group	*lgr;
	struct smc_ib_device	*smcibdev;
	u8			ibport;
};

54 55
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
			 struct smc_buf_desc *buf_desc);
56
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
57

58
static void smc_link_up_work(struct work_struct *work);
59
static void smc_link_down_work(struct work_struct *work);
60

61 62 63 64 65 66 67 68 69 70 71 72 73
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
						  spinlock_t **lgr_lock)
{
	if (lgr->is_smcd) {
		*lgr_lock = &lgr->smcd->lgr_lock;
		return &lgr->smcd->lgr_list;
	}

	*lgr_lock = &smc_lgr_list.lock;
	return &smc_lgr_list.list;
}

74 75 76 77 78 79
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
	/* client link group creation always follows the server link group
	 * creation. For client use a somewhat higher removal delay time,
	 * otherwise there is a risk of out-of-sync link groups.
	 */
U
Ursula Braun 已提交
80 81 82 83 84 85
	if (!lgr->freeing && !lgr->freefast) {
		mod_delayed_work(system_wq, &lgr->free_work,
				 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
						SMC_LGR_FREE_DELAY_CLNT :
						SMC_LGR_FREE_DELAY_SERV);
	}
86 87
}

88 89
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
U
Ursula Braun 已提交
90 91 92 93 94
	if (!lgr->freeing && !lgr->freefast) {
		lgr->freefast = 1;
		mod_delayed_work(system_wq, &lgr->free_work,
				 SMC_LGR_FREE_DELAY_FAST);
	}
95 96
}

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
/* Register connection's alert token in our lookup structure.
 * To use rbtrees we have to implement our own insert core.
 * Requires @conns_lock
 * @smc		connection to register
 * Returns 0 on success, != otherwise.
 */
static void smc_lgr_add_alert_token(struct smc_connection *conn)
{
	struct rb_node **link, *parent = NULL;
	u32 token = conn->alert_token_local;

	link = &conn->lgr->conns_all.rb_node;
	while (*link) {
		struct smc_connection *cur = rb_entry(*link,
					struct smc_connection, alert_node);

		parent = *link;
		if (cur->alert_token_local > token)
			link = &parent->rb_left;
		else
			link = &parent->rb_right;
	}
	/* Put the new node there */
	rb_link_node(&conn->alert_node, parent, link);
	rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
}

124 125 126 127 128 129 130 131 132 133 134
/* assign an SMC-R link to the connection */
static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
{
	enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
				       SMC_LNK_ACTIVE;
	int i, j;

	/* do link balancing */
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &conn->lgr->lnk[i];

K
Karsten Graul 已提交
135
		if (lnk->state != expected || lnk->link_is_asym)
136 137 138 139 140 141 142 143 144 145
			continue;
		if (conn->lgr->role == SMC_CLNT) {
			conn->lnk = lnk; /* temporary, SMC server assigns link*/
			break;
		}
		if (conn->lgr->conns_num % 2) {
			for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
				struct smc_link *lnk2;

				lnk2 = &conn->lgr->lnk[j];
K
Karsten Graul 已提交
146 147
				if (lnk2->state == expected &&
				    !lnk2->link_is_asym) {
148 149 150 151 152 153 154 155 156 157 158 159 160 161
					conn->lnk = lnk2;
					break;
				}
			}
		}
		if (!conn->lnk)
			conn->lnk = lnk;
		break;
	}
	if (!conn->lnk)
		return SMC_CLC_DECL_NOACTLINK;
	return 0;
}

162 163 164 165 166
/* Register connection in link group by assigning an alert token
 * registered in a search tree.
 * Requires @conns_lock
 * Note that '0' is a reserved value and not assigned.
 */
167
static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
168 169 170
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	static atomic_t nexttoken = ATOMIC_INIT(0);
171
	int rc;
172

173 174 175 176 177
	if (!conn->lgr->is_smcd) {
		rc = smcr_lgr_conn_assign_link(conn, first);
		if (rc)
			return rc;
	}
178 179 180 181 182 183 184 185 186 187 188
	/* find a new alert_token_local value not yet used by some connection
	 * in this link group
	 */
	sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
	while (!conn->alert_token_local) {
		conn->alert_token_local = atomic_inc_return(&nexttoken);
		if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
			conn->alert_token_local = 0;
	}
	smc_lgr_add_alert_token(conn);
	conn->lgr->conns_num++;
189
	return 0;
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
}

/* Unregister connection and reset the alert token of the given connection<
 */
static void __smc_lgr_unregister_conn(struct smc_connection *conn)
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	struct smc_link_group *lgr = conn->lgr;

	rb_erase(&conn->alert_node, &lgr->conns_all);
	lgr->conns_num--;
	conn->alert_token_local = 0;
	sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
}

205
/* Unregister connection from lgr
206 207 208 209 210
 */
static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;

211 212
	if (!lgr)
		return;
213 214 215 216 217
	write_lock_bh(&lgr->conns_lock);
	if (conn->alert_token_local) {
		__smc_lgr_unregister_conn(conn);
	}
	write_unlock_bh(&lgr->conns_lock);
218
	conn->lgr = NULL;
219 220
}

221 222 223
void smc_lgr_cleanup_early(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;
224 225
	struct list_head *lgr_list;
	spinlock_t *lgr_lock;
226 227 228 229 230

	if (!lgr)
		return;

	smc_conn_free(conn);
231 232 233 234 235 236
	lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
	/* do not use this link group for new connections */
	if (!list_empty(lgr_list))
		list_del_init(lgr_list);
	spin_unlock_bh(lgr_lock);
237 238 239
	smc_lgr_schedule_free_work_fast(lgr);
}

240 241 242 243 244 245 246 247 248 249 250 251 252
static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
{
	int i;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &lgr->lnk[i];

		if (smc_link_usable(lnk))
			lnk->state = SMC_LNK_INACTIVE;
	}
	wake_up_interruptible_all(&lgr->llc_waiter);
}

U
Ursula Braun 已提交
253 254
static void smc_lgr_free(struct smc_link_group *lgr);

255 256 257 258 259
static void smc_lgr_free_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(to_delayed_work(work),
						  struct smc_link_group,
						  free_work);
260
	spinlock_t *lgr_lock;
261 262
	bool conns;

263 264
	smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
U
Ursula Braun 已提交
265 266 267 268
	if (lgr->freeing) {
		spin_unlock_bh(lgr_lock);
		return;
	}
269 270 271 272
	read_lock_bh(&lgr->conns_lock);
	conns = RB_EMPTY_ROOT(&lgr->conns_all);
	read_unlock_bh(&lgr->conns_lock);
	if (!conns) { /* number of lgr connections is no longer zero */
273
		spin_unlock_bh(lgr_lock);
274 275
		return;
	}
276
	list_del_init(&lgr->list); /* remove from smc_lgr_list */
U
Ursula Braun 已提交
277 278 279
	lgr->freeing = 1; /* this instance does the freeing, no new schedule */
	spin_unlock_bh(lgr_lock);
	cancel_delayed_work(&lgr->free_work);
280

281 282 283
	if (!lgr->is_smcd && !lgr->terminating)
		smc_llc_send_link_delete_all(lgr, true,
					     SMC_LLC_DEL_PROG_INIT_TERM);
284
	if (lgr->is_smcd && !lgr->terminating)
U
Ursula Braun 已提交
285
		smc_ism_signal_shutdown(lgr);
286 287
	if (!lgr->is_smcd)
		smcr_lgr_link_deactivate_all(lgr);
U
Ursula Braun 已提交
288
	smc_lgr_free(lgr);
289 290
}

291 292 293 294 295
static void smc_lgr_terminate_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
						  terminate_work);

296
	__smc_lgr_terminate(lgr, true);
297 298
}

299 300 301 302 303 304 305 306 307 308 309
/* return next unique link id for the lgr */
static u8 smcr_next_link_id(struct smc_link_group *lgr)
{
	u8 link_id;
	int i;

	while (1) {
		link_id = ++lgr->next_link_id;
		if (!link_id)	/* skip zero as link_id */
			link_id = ++lgr->next_link_id;
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
310
			if (smc_link_usable(&lgr->lnk[i]) &&
311 312 313 314 315 316 317 318
			    lgr->lnk[i].link_id == link_id)
				continue;
		}
		break;
	}
	return link_id;
}

319 320
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
		   u8 link_idx, struct smc_init_info *ini)
321 322 323 324 325 326 327
{
	u8 rndvec[3];
	int rc;

	get_device(&ini->ib_dev->ibdev->dev);
	atomic_inc(&ini->ib_dev->lnk_cnt);
	lnk->state = SMC_LNK_ACTIVATING;
328
	lnk->link_id = smcr_next_link_id(lgr);
329
	lnk->lgr = lgr;
330
	lnk->link_idx = link_idx;
331 332 333
	lnk->smcibdev = ini->ib_dev;
	lnk->ibport = ini->ib_port;
	lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
334
	smc_llc_link_set_uid(lnk);
335
	INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
	if (!ini->ib_dev->initialized) {
		rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
		if (rc)
			goto out;
	}
	get_random_bytes(rndvec, sizeof(rndvec));
	lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
		(rndvec[2] << 16);
	rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
				  ini->vlan_id, lnk->gid, &lnk->sgid_index);
	if (rc)
		goto out;
	rc = smc_llc_link_init(lnk);
	if (rc)
		goto out;
	rc = smc_wr_alloc_link_mem(lnk);
	if (rc)
		goto clear_llc_lnk;
	rc = smc_ib_create_protection_domain(lnk);
	if (rc)
		goto free_link_mem;
	rc = smc_ib_create_queue_pair(lnk);
	if (rc)
		goto dealloc_pd;
	rc = smc_wr_create_link(lnk);
	if (rc)
		goto destroy_qp;
	return 0;

destroy_qp:
	smc_ib_destroy_queue_pair(lnk);
dealloc_pd:
	smc_ib_dealloc_protection_domain(lnk);
free_link_mem:
	smc_wr_free_link_mem(lnk);
clear_llc_lnk:
372
	smc_llc_link_clear(lnk, false);
373 374 375
out:
	put_device(&ini->ib_dev->ibdev->dev);
	memset(lnk, 0, sizeof(struct smc_link));
376
	lnk->state = SMC_LNK_UNUSED;
377 378 379 380 381
	if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
		wake_up(&ini->ib_dev->lnks_deleted);
	return rc;
}

382
/* create a new SMC link group */
383
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
384 385
{
	struct smc_link_group *lgr;
386
	struct list_head *lgr_list;
387
	struct smc_link *lnk;
388
	spinlock_t *lgr_lock;
389
	u8 link_idx;
390
	int rc = 0;
U
Ursula Braun 已提交
391
	int i;
392

393
	if (ini->is_smcd && ini->vlan_id) {
394 395
		if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
			rc = SMC_CLC_DECL_ISMVLANERR;
396
			goto out;
397
		}
398 399
	}

400 401
	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
	if (!lgr) {
402
		rc = SMC_CLC_DECL_MEM;
403
		goto ism_put_vlan;
404
	}
405
	lgr->is_smcd = ini->is_smcd;
406
	lgr->sync_err = 0;
U
Ursula Braun 已提交
407 408 409
	lgr->terminating = 0;
	lgr->freefast = 0;
	lgr->freeing = 0;
410
	lgr->vlan_id = ini->vlan_id;
411 412
	mutex_init(&lgr->sndbufs_lock);
	mutex_init(&lgr->rmbs_lock);
413
	rwlock_init(&lgr->conns_lock);
U
Ursula Braun 已提交
414 415 416 417
	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		INIT_LIST_HEAD(&lgr->sndbufs[i]);
		INIT_LIST_HEAD(&lgr->rmbs[i]);
	}
418
	lgr->next_link_id = 0;
419 420
	smc_lgr_list.num += SMC_LGR_NUM_INCR;
	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
421
	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
422
	INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
423
	lgr->conns_all = RB_ROOT;
424
	if (ini->is_smcd) {
425
		/* SMC-D specific settings */
426
		get_device(&ini->ism_dev->dev);
427 428
		lgr->peer_gid = ini->ism_gid;
		lgr->smcd = ini->ism_dev;
429
		lgr_list = &ini->ism_dev->lgr_list;
430
		lgr_lock = &lgr->smcd->lgr_lock;
431
		lgr->peer_shutdown = 0;
432
		atomic_inc(&ini->ism_dev->lgr_cnt);
433 434 435
	} else {
		/* SMC-R specific settings */
		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
436 437
		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
		       SMC_SYSTEMID_LEN);
438 439
		memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
		       SMC_MAX_PNETID_LEN);
440 441
		smc_llc_lgr_init(lgr, smc);

442 443 444
		link_idx = SMC_SINGLE_LINK;
		lnk = &lgr->lnk[link_idx];
		rc = smcr_link_init(lgr, lnk, link_idx, ini);
445 446
		if (rc)
			goto free_lgr;
447 448
		lgr_list = &smc_lgr_list.list;
		lgr_lock = &smc_lgr_list.lock;
449
		atomic_inc(&lgr_cnt);
450
	}
451
	smc->conn.lgr = lgr;
452
	spin_lock_bh(lgr_lock);
453
	list_add(&lgr->list, lgr_list);
454
	spin_unlock_bh(lgr_lock);
455 456 457 458
	return 0;

free_lgr:
	kfree(lgr);
459 460 461
ism_put_vlan:
	if (ini->is_smcd && ini->vlan_id)
		smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
462
out:
463 464 465 466 467 468
	if (rc < 0) {
		if (rc == -ENOMEM)
			rc = SMC_CLC_DECL_MEM;
		else
			rc = SMC_CLC_DECL_INTERR;
	}
469 470 471
	return rc;
}

472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
static int smc_write_space(struct smc_connection *conn)
{
	int buffer_len = conn->peer_rmbe_size;
	union smc_host_cursor prod;
	union smc_host_cursor cons;
	int space;

	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
	/* determine rx_buf space */
	space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
	return space;
}

static int smc_switch_cursor(struct smc_sock *smc)
{
	struct smc_connection *conn = &smc->conn;
	union smc_host_cursor cons, fin;
	int rc = 0;
	int diff;

	smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
	smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
	/* set prod cursor to old state, enforce tx_rdma_writes() */
	smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);

	if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
		/* cons cursor advanced more than fin, and prod was set
		 * fin above, so now prod is smaller than cons. Fix that.
		 */
		diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
		smc_curs_add(conn->sndbuf_desc->len,
			     &conn->tx_curs_sent, diff);
		smc_curs_add(conn->sndbuf_desc->len,
			     &conn->tx_curs_fin, diff);

		smp_mb__before_atomic();
		atomic_add(diff, &conn->sndbuf_space);
		smp_mb__after_atomic();

		smc_curs_add(conn->peer_rmbe_size,
			     &conn->local_tx_ctrl.prod, diff);
		smc_curs_add(conn->peer_rmbe_size,
			     &conn->local_tx_ctrl_fin, diff);
	}
	/* recalculate, value is used by tx_rdma_writes() */
	atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));

	if (smc->sk.sk_state != SMC_INIT &&
	    smc->sk.sk_state != SMC_CLOSED) {
523
		rc = smcr_cdc_msg_send_validation(conn);
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
		if (!rc) {
			schedule_delayed_work(&conn->tx_work, 0);
			smc->sk.sk_data_ready(&smc->sk);
		}
	}
	return rc;
}

struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
				  struct smc_link *from_lnk, bool is_dev_err)
{
	struct smc_link *to_lnk = NULL;
	struct smc_connection *conn;
	struct smc_sock *smc;
	struct rb_node *node;
	int i, rc = 0;

	/* link is inactive, wake up tx waiters */
	smc_wr_wakeup_tx_wait(from_lnk);

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
		    i == from_lnk->link_idx)
			continue;
		if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
		    from_lnk->ibport == lgr->lnk[i].ibport) {
			continue;
		}
		to_lnk = &lgr->lnk[i];
		break;
	}
	if (!to_lnk) {
		smc_lgr_terminate_sched(lgr);
		return NULL;
	}
again:
	read_lock_bh(&lgr->conns_lock);
	for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
		conn = rb_entry(node, struct smc_connection, alert_node);
		if (conn->lnk != from_lnk)
			continue;
		smc = container_of(conn, struct smc_sock, conn);
		/* conn->lnk not yet set in SMC_INIT state */
		if (smc->sk.sk_state == SMC_INIT)
			continue;
		if (smc->sk.sk_state == SMC_CLOSED ||
		    smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
		    smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
		    smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
		    smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
		    smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
		    smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
		    smc->sk.sk_state == SMC_PEERABORTWAIT ||
		    smc->sk.sk_state == SMC_PROCESSABORT) {
			spin_lock_bh(&conn->send_lock);
			conn->lnk = to_lnk;
			spin_unlock_bh(&conn->send_lock);
			continue;
		}
		sock_hold(&smc->sk);
		read_unlock_bh(&lgr->conns_lock);
		/* avoid race with smcr_tx_sndbuf_nonempty() */
		spin_lock_bh(&conn->send_lock);
		conn->lnk = to_lnk;
		rc = smc_switch_cursor(smc);
		spin_unlock_bh(&conn->send_lock);
		sock_put(&smc->sk);
		if (rc) {
			smcr_link_down_cond_sched(to_lnk);
			return NULL;
		}
		goto again;
	}
	read_unlock_bh(&lgr->conns_lock);
	return to_lnk;
}

601
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
602
			   struct smc_link_group *lgr)
603
{
604 605
	int rc;

606 607
	if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
		/* unregister rmb with peer */
608 609 610 611 612 613 614 615 616
		rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
		if (!rc) {
			/* protect against smc_llc_cli_rkey_exchange() */
			mutex_lock(&lgr->llc_conf_mutex);
			smc_llc_do_delete_rkey(lgr, rmb_desc);
			rmb_desc->is_conf_rkey = false;
			mutex_unlock(&lgr->llc_conf_mutex);
			smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
		}
617
	}
618

619 620
	if (rmb_desc->is_reg_err) {
		/* buf registration failed, reuse not possible */
621
		mutex_lock(&lgr->rmbs_lock);
622
		list_del(&rmb_desc->list);
623
		mutex_unlock(&lgr->rmbs_lock);
624 625 626 627 628 629 630

		smc_buf_free(lgr, true, rmb_desc);
	} else {
		rmb_desc->used = 0;
	}
}

631 632
static void smc_buf_unuse(struct smc_connection *conn,
			  struct smc_link_group *lgr)
U
Ursula Braun 已提交
633
{
634
	if (conn->sndbuf_desc)
U
Ursula Braun 已提交
635
		conn->sndbuf_desc->used = 0;
636 637 638
	if (conn->rmb_desc && lgr->is_smcd)
		conn->rmb_desc->used = 0;
	else if (conn->rmb_desc)
639
		smcr_buf_unuse(conn->rmb_desc, lgr);
U
Ursula Braun 已提交
640 641
}

642 643 644
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
645 646 647
	struct smc_link_group *lgr = conn->lgr;

	if (!lgr)
648
		return;
649
	if (lgr->is_smcd) {
650 651
		if (!list_empty(&lgr->list))
			smc_ism_unset_conn(conn);
652 653
		tasklet_kill(&conn->rx_tsklet);
	} else {
654
		smc_cdc_tx_dismiss_slots(conn);
655 656
		if (current_work() != &conn->abort_work)
			cancel_work_sync(&conn->abort_work);
657
	}
658 659 660 661
	if (!list_empty(&lgr->list)) {
		smc_lgr_unregister_conn(conn);
		smc_buf_unuse(conn, lgr); /* allow buffer reuse */
	}
662 663 664

	if (!lgr->conns_num)
		smc_lgr_schedule_free_work(lgr);
665 666
}

667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
/* unregister a link from a buf_desc */
static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
				struct smc_link *lnk)
{
	if (is_rmb)
		buf_desc->is_reg_mr[lnk->link_idx] = false;
	if (!buf_desc->is_map_ib[lnk->link_idx])
		return;
	if (is_rmb) {
		if (buf_desc->mr_rx[lnk->link_idx]) {
			smc_ib_put_memory_region(
					buf_desc->mr_rx[lnk->link_idx]);
			buf_desc->mr_rx[lnk->link_idx] = NULL;
		}
		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
	} else {
		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
	}
	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
	buf_desc->is_map_ib[lnk->link_idx] = false;
}

/* unmap all buffers of lgr for a deleted link */
static void smcr_buf_unmap_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_buf_desc *buf_desc, *bf;
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		mutex_lock(&lgr->rmbs_lock);
		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
			smcr_buf_unmap_link(buf_desc, true, lnk);
		mutex_unlock(&lgr->rmbs_lock);
		mutex_lock(&lgr->sndbufs_lock);
		list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
					 list)
			smcr_buf_unmap_link(buf_desc, false, lnk);
		mutex_unlock(&lgr->sndbufs_lock);
	}
}

static void smcr_rtoken_clear_link(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		lgr->rtokens[i][lnk->link_idx].rkey = 0;
		lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
	}
}

720
/* must be called under lgr->llc_conf_mutex lock */
721
void smcr_link_clear(struct smc_link *lnk, bool log)
722
{
723 724
	struct smc_ib_device *smcibdev;

725
	if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
726
		return;
727
	lnk->peer_qpn = 0;
728
	smc_llc_link_clear(lnk, log);
729 730
	smcr_buf_unmap_lgr(lnk);
	smcr_rtoken_clear_link(lnk);
731
	smc_ib_modify_qp_reset(lnk);
732
	smc_wr_free_link(lnk);
733 734
	smc_ib_destroy_queue_pair(lnk);
	smc_ib_dealloc_protection_domain(lnk);
735
	smc_wr_free_link_mem(lnk);
736
	put_device(&lnk->smcibdev->ibdev->dev);
737 738 739 740 741
	smcibdev = lnk->smcibdev;
	memset(lnk, 0, sizeof(struct smc_link));
	lnk->state = SMC_LNK_UNUSED;
	if (!atomic_dec_return(&smcibdev->lnk_cnt))
		wake_up(&smcibdev->lnks_deleted);
742 743
}

744 745
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
			  struct smc_buf_desc *buf_desc)
U
Ursula Braun 已提交
746
{
747
	int i;
748

749 750
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
		smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
751

752 753
	if (buf_desc->pages)
		__free_pages(buf_desc->pages, buf_desc->order);
754
	kfree(buf_desc);
U
Ursula Braun 已提交
755 756
}

757 758 759
static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
			  struct smc_buf_desc *buf_desc)
{
760 761 762
	if (is_dmb) {
		/* restore original buf len */
		buf_desc->len += sizeof(struct smcd_cdc_msg);
763
		smc_ism_unregister_dmb(lgr->smcd, buf_desc);
764
	} else {
765
		kfree(buf_desc->cpu_addr);
766
	}
767 768 769 770 771 772 773 774 775 776 777 778
	kfree(buf_desc);
}

static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
			 struct smc_buf_desc *buf_desc)
{
	if (lgr->is_smcd)
		smcd_buf_free(lgr, is_rmb, buf_desc);
	else
		smcr_buf_free(lgr, is_rmb, buf_desc);
}

779
static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
U
Ursula Braun 已提交
780
{
781 782
	struct smc_buf_desc *buf_desc, *bf_desc;
	struct list_head *buf_list;
U
Ursula Braun 已提交
783 784 785
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
786 787 788 789 790
		if (is_rmb)
			buf_list = &lgr->rmbs[i];
		else
			buf_list = &lgr->sndbufs[i];
		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
U
Ursula Braun 已提交
791
					 list) {
792
			list_del(&buf_desc->list);
793
			smc_buf_free(lgr, is_rmb, buf_desc);
U
Ursula Braun 已提交
794 795 796 797
		}
	}
}

798 799 800 801 802 803 804 805
static void smc_lgr_free_bufs(struct smc_link_group *lgr)
{
	/* free send buffers */
	__smc_lgr_free_bufs(lgr, false);
	/* free rmbs */
	__smc_lgr_free_bufs(lgr, true);
}

806
/* remove a link group */
U
Ursula Braun 已提交
807
static void smc_lgr_free(struct smc_link_group *lgr)
808
{
809 810
	int i;

811 812 813 814
	if (!lgr->is_smcd) {
		mutex_lock(&lgr->llc_conf_mutex);
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
815
				smcr_link_clear(&lgr->lnk[i], false);
816 817 818 819 820
		}
		mutex_unlock(&lgr->llc_conf_mutex);
		smc_llc_lgr_clear(lgr);
	}

821
	smc_lgr_free_bufs(lgr);
822
	if (lgr->is_smcd) {
823 824 825 826
		if (!lgr->terminating) {
			smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
			put_device(&lgr->smcd->dev);
		}
827 828
		if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
			wake_up(&lgr->smcd->lgrs_deleted);
829
	} else {
830 831
		if (!atomic_dec_return(&lgr_cnt))
			wake_up(&lgrs_deleted);
832
	}
833 834 835
	kfree(lgr);
}

836 837 838 839 840 841 842 843 844 845 846 847 848 849
static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
{
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		struct smc_buf_desc *buf_desc;

		list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
			buf_desc->len += sizeof(struct smcd_cdc_msg);
			smc_ism_unregister_dmb(lgr->smcd, buf_desc);
		}
	}
}

850 851 852 853 854 855 856 857
static void smc_sk_wake_ups(struct smc_sock *smc)
{
	smc->sk.sk_write_space(&smc->sk);
	smc->sk.sk_data_ready(&smc->sk);
	smc->sk.sk_state_change(&smc->sk);
}

/* kill a connection */
858
static void smc_conn_kill(struct smc_connection *conn, bool soft)
859 860 861
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);

862 863 864 865
	if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
	else
		smc_close_abort(conn);
866
	conn->killed = 1;
867
	smc->sk.sk_err = ECONNABORTED;
868
	smc_sk_wake_ups(smc);
869 870
	if (conn->lgr->is_smcd) {
		smc_ism_unset_conn(conn);
871 872 873 874
		if (soft)
			tasklet_kill(&conn->rx_tsklet);
		else
			tasklet_unlock_wait(&conn->rx_tsklet);
875 876
	} else {
		smc_cdc_tx_dismiss_slots(conn);
877
	}
878
	smc_lgr_unregister_conn(conn);
U
Ursula Braun 已提交
879
	smc_close_active_abort(smc);
880 881
}

882 883 884 885 886 887 888 889
static void smc_lgr_cleanup(struct smc_link_group *lgr)
{
	if (lgr->is_smcd) {
		smc_ism_signal_shutdown(lgr);
		smcd_unregister_all_dmbs(lgr);
		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
		put_device(&lgr->smcd->dev);
	} else {
890 891 892 893 894
		u32 rsn = lgr->llc_termination_rsn;

		if (!rsn)
			rsn = SMC_LLC_DEL_PROG_INIT_TERM;
		smc_llc_send_link_delete_all(lgr, false, rsn);
895
		smcr_lgr_link_deactivate_all(lgr);
896 897 898
	}
}

899 900 901 902
/* terminate link group
 * @soft: true if link group shutdown can take its time
 *	  false if immediate link group shutdown is required
 */
903
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
904 905
{
	struct smc_connection *conn;
906
	struct smc_sock *smc;
907 908
	struct rb_node *node;

909 910
	if (lgr->terminating)
		return;	/* lgr already terminating */
911 912
	/* cancel free_work sync, will terminate when lgr->freeing is set */
	cancel_delayed_work_sync(&lgr->free_work);
913
	lgr->terminating = 1;
914

915 916
	/* kill remaining link group connections */
	read_lock_bh(&lgr->conns_lock);
917 918
	node = rb_first(&lgr->conns_all);
	while (node) {
919
		read_unlock_bh(&lgr->conns_lock);
920
		conn = rb_entry(node, struct smc_connection, alert_node);
921
		smc = container_of(conn, struct smc_sock, conn);
U
Ursula Braun 已提交
922
		sock_hold(&smc->sk); /* sock_put below */
923
		lock_sock(&smc->sk);
924
		smc_conn_kill(conn, soft);
925
		release_sock(&smc->sk);
U
Ursula Braun 已提交
926
		sock_put(&smc->sk); /* sock_hold above */
927
		read_lock_bh(&lgr->conns_lock);
928 929
		node = rb_first(&lgr->conns_all);
	}
930
	read_unlock_bh(&lgr->conns_lock);
931
	smc_lgr_cleanup(lgr);
932
	smc_lgr_free(lgr);
933 934
}

935 936
/* unlink link group and schedule termination */
void smc_lgr_terminate_sched(struct smc_link_group *lgr)
937
{
938 939 940 941
	spinlock_t *lgr_lock;

	smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
942
	if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
943 944 945 946
		spin_unlock_bh(lgr_lock);
		return;	/* lgr already terminating */
	}
	list_del_init(&lgr->list);
947
	lgr->freeing = 1;
948
	spin_unlock_bh(lgr_lock);
949
	schedule_work(&lgr->terminate_work);
950 951
}

952
/* Called when peer lgr shutdown (regularly or abnormally) is received */
H
Hans Wippel 已提交
953
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
954 955 956 957 958
{
	struct smc_link_group *lgr, *l;
	LIST_HEAD(lgr_free_list);

	/* run common cleanup function and build free list */
959
	spin_lock_bh(&dev->lgr_lock);
960 961
	list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
		if ((!peer_gid || lgr->peer_gid == peer_gid) &&
H
Hans Wippel 已提交
962
		    (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
963 964
			if (peer_gid) /* peer triggered termination */
				lgr->peer_shutdown = 1;
965
			list_move(&lgr->list, &lgr_free_list);
966
			lgr->freeing = 1;
967 968
		}
	}
969
	spin_unlock_bh(&dev->lgr_lock);
970 971 972 973

	/* cancel the regular free workers and actually free lgrs */
	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
		list_del_init(&lgr->list);
974
		schedule_work(&lgr->terminate_work);
975 976 977
	}
}

978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993
/* Called when an SMCD device is removed or the smc module is unloaded */
void smc_smcd_terminate_all(struct smcd_dev *smcd)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_free_list);

	spin_lock_bh(&smcd->lgr_lock);
	list_splice_init(&smcd->lgr_list, &lgr_free_list);
	list_for_each_entry(lgr, &lgr_free_list, list)
		lgr->freeing = 1;
	spin_unlock_bh(&smcd->lgr_lock);

	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
		list_del_init(&lgr->list);
		__smc_lgr_terminate(lgr, false);
	}
994 995 996

	if (atomic_read(&smcd->lgr_cnt))
		wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
997 998
}

999 1000 1001 1002 1003 1004 1005 1006
/* Called when an SMCR device is removed or the smc module is unloaded.
 * If smcibdev is given, all SMCR link groups using this device are terminated.
 * If smcibdev is NULL, all SMCR link groups are terminated.
 */
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_free_list);
1007
	int i;
1008 1009 1010 1011 1012 1013 1014 1015

	spin_lock_bh(&smc_lgr_list.lock);
	if (!smcibdev) {
		list_splice_init(&smc_lgr_list.list, &lgr_free_list);
		list_for_each_entry(lgr, &lgr_free_list, list)
			lgr->freeing = 1;
	} else {
		list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1016
			for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1017 1018
				if (lgr->lnk[i].smcibdev == smcibdev)
					smcr_link_down_cond_sched(&lgr->lnk[i]);
1019 1020 1021 1022 1023 1024 1025
			}
		}
	}
	spin_unlock_bh(&smc_lgr_list.lock);

	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
		list_del_init(&lgr->list);
1026
		smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1027 1028
		__smc_lgr_terminate(lgr, false);
	}
1029 1030 1031 1032 1033 1034 1035 1036 1037

	if (smcibdev) {
		if (atomic_read(&smcibdev->lnk_cnt))
			wait_event(smcibdev->lnks_deleted,
				   !atomic_read(&smcibdev->lnk_cnt));
	} else {
		if (atomic_read(&lgr_cnt))
			wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
	}
1038 1039
}

K
Karsten Graul 已提交
1040 1041 1042
/* set new lgr type and clear all asymmetric link tagging */
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
{
1043
	char *lgr_type = "";
K
Karsten Graul 已提交
1044 1045 1046 1047 1048
	int i;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
		if (smc_link_usable(&lgr->lnk[i]))
			lgr->lnk[i].link_is_asym = false;
1049 1050
	if (lgr->type == new_type)
		return;
K
Karsten Graul 已提交
1051
	lgr->type = new_type;
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072

	switch (lgr->type) {
	case SMC_LGR_NONE:
		lgr_type = "NONE";
		break;
	case SMC_LGR_SINGLE:
		lgr_type = "SINGLE";
		break;
	case SMC_LGR_SYMMETRIC:
		lgr_type = "SYMMETRIC";
		break;
	case SMC_LGR_ASYMMETRIC_PEER:
		lgr_type = "ASYMMETRIC_PEER";
		break;
	case SMC_LGR_ASYMMETRIC_LOCAL:
		lgr_type = "ASYMMETRIC_LOCAL";
		break;
	}
	pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
			    "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
			    lgr_type, lgr->pnet_id);
K
Karsten Graul 已提交
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
}

/* set new lgr type and tag a link as asymmetric */
void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
			    enum smc_lgr_type new_type, int asym_lnk_idx)
{
	smcr_lgr_set_type(lgr, new_type);
	lgr->lnk[asym_lnk_idx].link_is_asym = true;
}

1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
/* abort connection, abort_work scheduled from tasklet context */
static void smc_conn_abort_work(struct work_struct *work)
{
	struct smc_connection *conn = container_of(work,
						   struct smc_connection,
						   abort_work);
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);

	smc_conn_kill(conn, true);
	sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
}

1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
/* link is up - establish alternate link if applicable */
static void smcr_link_up(struct smc_link_group *lgr,
			 struct smc_ib_device *smcibdev, u8 ibport)
{
	struct smc_link *link = NULL;

	if (list_empty(&lgr->list) ||
	    lgr->type == SMC_LGR_SYMMETRIC ||
	    lgr->type == SMC_LGR_ASYMMETRIC_PEER)
		return;

	if (lgr->role == SMC_SERV) {
		/* trigger local add link processing */
		link = smc_llc_usable_link(lgr);
		if (!link)
			return;
1111
		smc_llc_srv_add_link_local(link);
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157
	} else {
		/* invite server to start add link processing */
		u8 gid[SMC_GID_SIZE];

		if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
					 NULL))
			return;
		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
			/* some other llc task is ongoing */
			wait_event_interruptible_timeout(lgr->llc_waiter,
				(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
				SMC_LLC_WAIT_TIME);
		}
		if (list_empty(&lgr->list) ||
		    !smc_ib_port_active(smcibdev, ibport))
			return; /* lgr or device no longer active */
		link = smc_llc_usable_link(lgr);
		if (!link)
			return;
		smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
				      NULL, SMC_LLC_REQ);
	}
}

void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
	struct smc_ib_up_work *ib_work;
	struct smc_link_group *lgr, *n;

	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
			    SMC_MAX_PNETID_LEN) ||
		    lgr->type == SMC_LGR_SYMMETRIC ||
		    lgr->type == SMC_LGR_ASYMMETRIC_PEER)
			continue;
		ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
		if (!ib_work)
			continue;
		INIT_WORK(&ib_work->work, smc_link_up_work);
		ib_work->lgr = lgr;
		ib_work->smcibdev = smcibdev;
		ib_work->ibport = ibport;
		schedule_work(&ib_work->work);
	}
}

1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
/* link is down - switch connections to alternate link,
 * must be called under lgr->llc_conf_mutex lock
 */
static void smcr_link_down(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_link *to_lnk;
	int del_link_id;

	if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
		return;

	smc_ib_modify_qp_reset(lnk);
1171
	to_lnk = smc_switch_conns(lgr, lnk, true);
1172
	if (!to_lnk) { /* no backup link available */
1173
		smcr_link_clear(lnk, true);
1174 1175
		return;
	}
K
Karsten Graul 已提交
1176
	smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1177 1178 1179 1180
	del_link_id = lnk->link_id;

	if (lgr->role == SMC_SERV) {
		/* trigger local delete link processing */
1181
		smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
	} else {
		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
			/* another llc task is ongoing */
			mutex_unlock(&lgr->llc_conf_mutex);
			wait_event_interruptible_timeout(lgr->llc_waiter,
				(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
				SMC_LLC_WAIT_TIME);
			mutex_lock(&lgr->llc_conf_mutex);
		}
		smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
					 SMC_LLC_DEL_LOST_PATH);
	}
}

/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_down_cond(struct smc_link *lnk)
{
	if (smc_link_downing(&lnk->state))
		smcr_link_down(lnk);
}

/* will get the lgr->llc_conf_mutex lock */
void smcr_link_down_cond_sched(struct smc_link *lnk)
{
	if (smc_link_downing(&lnk->state))
		schedule_work(&lnk->link_down_wrk);
}

void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
{
	struct smc_link_group *lgr, *n;
	int i;

	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
			    SMC_MAX_PNETID_LEN))
			continue; /* lgr is not affected */
		if (list_empty(&lgr->list))
			continue;
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			struct smc_link *lnk = &lgr->lnk[i];

			if (smc_link_usable(lnk) &&
			    lnk->smcibdev == smcibdev && lnk->ibport == ibport)
				smcr_link_down_cond_sched(lnk);
		}
	}
}

1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244
static void smc_link_up_work(struct work_struct *work)
{
	struct smc_ib_up_work *ib_work = container_of(work,
						      struct smc_ib_up_work,
						      work);
	struct smc_link_group *lgr = ib_work->lgr;

	if (list_empty(&lgr->list))
		goto out;
	smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
out:
	kfree(ib_work);
}

1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
static void smc_link_down_work(struct work_struct *work)
{
	struct smc_link *link = container_of(work, struct smc_link,
					     link_down_wrk);
	struct smc_link_group *lgr = link->lgr;

	if (list_empty(&lgr->list))
		return;
	wake_up_interruptible_all(&lgr->llc_waiter);
	mutex_lock(&lgr->llc_conf_mutex);
	smcr_link_down(link);
	mutex_unlock(&lgr->llc_conf_mutex);
}

1259 1260 1261
/* Determine vlan of internal TCP socket.
 * @vlan_id: address to store the determined vlan id into
 */
1262
int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1263 1264
{
	struct dst_entry *dst = sk_dst_get(clcsock->sk);
1265 1266
	struct net_device *ndev;
	int i, nest_lvl, rc = 0;
1267

1268
	ini->vlan_id = 0;
1269 1270 1271 1272 1273 1274 1275 1276 1277
	if (!dst) {
		rc = -ENOTCONN;
		goto out;
	}
	if (!dst->dev) {
		rc = -ENODEV;
		goto out_rel;
	}

1278 1279
	ndev = dst->dev;
	if (is_vlan_dev(ndev)) {
1280
		ini->vlan_id = vlan_dev_vlan_id(ndev);
1281 1282 1283 1284
		goto out_rel;
	}

	rtnl_lock();
1285
	nest_lvl = ndev->lower_level;
1286 1287 1288 1289 1290 1291 1292 1293
	for (i = 0; i < nest_lvl; i++) {
		struct list_head *lower = &ndev->adj_list.lower;

		if (list_empty(lower))
			break;
		lower = lower->next;
		ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
		if (is_vlan_dev(ndev)) {
1294
			ini->vlan_id = vlan_dev_vlan_id(ndev);
1295 1296 1297 1298
			break;
		}
	}
	rtnl_unlock();
1299 1300 1301 1302 1303 1304 1305

out_rel:
	dst_release(dst);
out:
	return rc;
}

1306 1307
static bool smcr_lgr_match(struct smc_link_group *lgr,
			   struct smc_clc_msg_local *lcl,
1308
			   enum smc_lgr_role role, u32 clcqpn)
1309
{
1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
	int i;

	if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
	    lgr->role != role)
		return false;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
			continue;
		if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
		    !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
		    !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
			return true;
	}
	return false;
1325
}
1326

1327 1328 1329 1330
static bool smcd_lgr_match(struct smc_link_group *lgr,
			   struct smcd_dev *smcismdev, u64 peer_gid)
{
	return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1331 1332 1333
}

/* create a new SMC connection (and a new link group if necessary) */
1334
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1335 1336
{
	struct smc_connection *conn = &smc->conn;
1337
	struct list_head *lgr_list;
1338 1339
	struct smc_link_group *lgr;
	enum smc_lgr_role role;
1340
	spinlock_t *lgr_lock;
1341 1342
	int rc = 0;

1343
	lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
1344
	lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
1345
	ini->cln_first_contact = SMC_FIRST_CONTACT;
1346
	role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1347
	if (role == SMC_CLNT && ini->srv_first_contact)
1348 1349 1350 1351
		/* create new link group as well */
		goto create;

	/* determine if an existing link group can be reused */
1352
	spin_lock_bh(lgr_lock);
1353
	list_for_each_entry(lgr, lgr_list, list) {
1354
		write_lock_bh(&lgr->conns_lock);
1355 1356 1357
		if ((ini->is_smcd ?
		     smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
		     smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1358
		    !lgr->sync_err &&
1359
		    lgr->vlan_id == ini->vlan_id &&
1360 1361
		    (role == SMC_CLNT ||
		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1362
			/* link group found */
1363
			ini->cln_first_contact = SMC_REUSE_CONTACT;
1364
			conn->lgr = lgr;
1365
			rc = smc_lgr_register_conn(conn, false);
1366
			write_unlock_bh(&lgr->conns_lock);
1367 1368
			if (!rc && delayed_work_pending(&lgr->free_work))
				cancel_delayed_work(&lgr->free_work);
1369 1370 1371 1372
			break;
		}
		write_unlock_bh(&lgr->conns_lock);
	}
1373
	spin_unlock_bh(lgr_lock);
1374 1375
	if (rc)
		return rc;
1376

1377
	if (role == SMC_CLNT && !ini->srv_first_contact &&
1378
	    ini->cln_first_contact == SMC_FIRST_CONTACT) {
1379 1380 1381 1382
		/* Server reuses a link group, but Client wants to start
		 * a new one
		 * send out_of_sync decline, reason synchr. error
		 */
1383
		return SMC_CLC_DECL_SYNCERR;
1384 1385 1386
	}

create:
1387
	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1388
		rc = smc_lgr_create(smc, ini);
1389 1390
		if (rc)
			goto out;
1391 1392
		lgr = conn->lgr;
		write_lock_bh(&lgr->conns_lock);
1393
		rc = smc_lgr_register_conn(conn, true);
1394
		write_unlock_bh(&lgr->conns_lock);
1395 1396
		if (rc)
			goto out;
1397
	}
1398
	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1399
	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
S
Stefan Raspl 已提交
1400
	conn->urg_state = SMC_URG_READ;
1401
	INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1402
	if (ini->is_smcd) {
1403 1404 1405
		conn->rx_off = sizeof(struct smcd_cdc_msg);
		smcd_cdc_rx_init(conn); /* init tasklet for this conn */
	}
1406 1407 1408
#ifndef KERNEL_HAS_ATOMIC64
	spin_lock_init(&conn->acurs_lock);
#endif
1409 1410

out:
1411
	return rc;
1412
}
U
Ursula Braun 已提交
1413

1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440
/* convert the RMB size into the compressed notation - minimum 16K.
 * In contrast to plain ilog2, this rounds towards the next power of 2,
 * so the socket application gets at least its desired sndbuf / rcvbuf size.
 */
static u8 smc_compress_bufsize(int size)
{
	u8 compressed;

	if (size <= SMC_BUF_MIN_SIZE)
		return 0;

	size = (size - 1) >> 14;
	compressed = ilog2(size) + 1;
	if (compressed >= SMC_RMBE_SIZES)
		compressed = SMC_RMBE_SIZES - 1;
	return compressed;
}

/* convert the RMB size from compressed notation into integer */
int smc_uncompress_bufsize(u8 compressed)
{
	u32 size;

	size = 0x00000001 << (((int)compressed) + 14);
	return (int)size;
}

1441 1442
/* try to reuse a sndbuf or rmb description slot for a certain
 * buffer size; if not available, return NULL
U
Ursula Braun 已提交
1443
 */
1444
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1445
					     struct mutex *lock,
1446
					     struct list_head *buf_list)
U
Ursula Braun 已提交
1447
{
1448
	struct smc_buf_desc *buf_slot;
U
Ursula Braun 已提交
1449

1450
	mutex_lock(lock);
1451 1452
	list_for_each_entry(buf_slot, buf_list, list) {
		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1453
			mutex_unlock(lock);
1454
			return buf_slot;
U
Ursula Braun 已提交
1455 1456
		}
	}
1457
	mutex_unlock(lock);
U
Ursula Braun 已提交
1458 1459 1460
	return NULL;
}

U
Ursula Braun 已提交
1461 1462 1463 1464 1465 1466 1467 1468 1469
/* one of the conditions for announcing a receiver's current window size is
 * that it "results in a minimum increase in the window size of 10% of the
 * receive buffer space" [RFC7609]
 */
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}

1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514
/* map an rmb buf to a link */
static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
			     struct smc_link *lnk)
{
	int rc;

	if (buf_desc->is_map_ib[lnk->link_idx])
		return 0;

	rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
	if (rc)
		return rc;
	sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
		   buf_desc->cpu_addr, buf_desc->len);

	/* map sg table to DMA address */
	rc = smc_ib_buf_map_sg(lnk, buf_desc,
			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
	/* SMC protocol depends on mapping to one DMA address only */
	if (rc != 1) {
		rc = -EAGAIN;
		goto free_table;
	}

	/* create a new memory region for the RMB */
	if (is_rmb) {
		rc = smc_ib_get_memory_region(lnk->roce_pd,
					      IB_ACCESS_REMOTE_WRITE |
					      IB_ACCESS_LOCAL_WRITE,
					      buf_desc, lnk->link_idx);
		if (rc)
			goto buf_unmap;
		smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
	}
	buf_desc->is_map_ib[lnk->link_idx] = true;
	return 0;

buf_unmap:
	smc_ib_buf_unmap_sg(lnk, buf_desc,
			    is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
free_table:
	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
	return rc;
}

1515 1516 1517
/* register a new rmb on IB device,
 * must be called under lgr->llc_conf_mutex lock
 */
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532
int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
{
	if (list_empty(&link->lgr->list))
		return -ENOLINK;
	if (!rmb_desc->is_reg_mr[link->link_idx]) {
		/* register memory region for new rmb */
		if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
			rmb_desc->is_reg_err = true;
			return -EFAULT;
		}
		rmb_desc->is_reg_mr[link->link_idx] = true;
	}
	return 0;
}

1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570
static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
			     struct list_head *lst, bool is_rmb)
{
	struct smc_buf_desc *buf_desc, *bf;
	int rc = 0;

	mutex_lock(lock);
	list_for_each_entry_safe(buf_desc, bf, lst, list) {
		if (!buf_desc->used)
			continue;
		rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
		if (rc)
			goto out;
	}
out:
	mutex_unlock(lock);
	return rc;
}

/* map all used buffers of lgr for a new link */
int smcr_buf_map_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	int i, rc = 0;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
				       &lgr->rmbs[i], true);
		if (rc)
			return rc;
		rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
				       &lgr->sndbufs[i], false);
		if (rc)
			return rc;
	}
	return 0;
}

1571 1572 1573
/* register all used buffers of lgr for a new link,
 * must be called under lgr->llc_conf_mutex lock
 */
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594
int smcr_buf_reg_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_buf_desc *buf_desc, *bf;
	int i, rc = 0;

	mutex_lock(&lgr->rmbs_lock);
	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
			if (!buf_desc->used)
				continue;
			rc = smcr_link_reg_rmb(lnk, buf_desc);
			if (rc)
				goto out;
		}
	}
out:
	mutex_unlock(&lgr->rmbs_lock);
	return rc;
}

1595 1596
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
						bool is_rmb, int bufsize)
1597 1598 1599 1600 1601 1602 1603 1604
{
	struct smc_buf_desc *buf_desc;

	/* try to alloc a new buffer */
	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
	if (!buf_desc)
		return ERR_PTR(-ENOMEM);

1605 1606 1607 1608 1609 1610
	buf_desc->order = get_order(bufsize);
	buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
				      __GFP_NOMEMALLOC | __GFP_COMP |
				      __GFP_NORETRY | __GFP_ZERO,
				      buf_desc->order);
	if (!buf_desc->pages) {
1611 1612 1613
		kfree(buf_desc);
		return ERR_PTR(-EAGAIN);
	}
1614
	buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1615 1616 1617
	buf_desc->len = bufsize;
	return buf_desc;
}
1618

1619 1620 1621 1622 1623 1624 1625
/* map buf_desc on all usable links,
 * unused buffers stay mapped as long as the link is up
 */
static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
				     struct smc_buf_desc *buf_desc, bool is_rmb)
{
	int i, rc = 0;
1626

1627 1628
	/* protect against parallel link reconfiguration */
	mutex_lock(&lgr->llc_conf_mutex);
1629 1630
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &lgr->lnk[i];
1631

1632
		if (!smc_link_usable(lnk))
1633 1634 1635 1636
			continue;
		if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
			rc = -ENOMEM;
			goto out;
1637 1638
		}
	}
1639
out:
1640
	mutex_unlock(&lgr->llc_conf_mutex);
1641
	return rc;
1642 1643
}

1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664
#define SMCD_DMBE_SIZES		7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */

static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
						bool is_dmb, int bufsize)
{
	struct smc_buf_desc *buf_desc;
	int rc;

	if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
		return ERR_PTR(-EAGAIN);

	/* try to alloc a new DMB */
	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
	if (!buf_desc)
		return ERR_PTR(-ENOMEM);
	if (is_dmb) {
		rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
		if (rc) {
			kfree(buf_desc);
			return ERR_PTR(-EAGAIN);
		}
1665 1666 1667
		buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
		/* CDC header stored in buf. So, pretend it was smaller */
		buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681
	} else {
		buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
					     __GFP_NOWARN | __GFP_NORETRY |
					     __GFP_NOMEMALLOC);
		if (!buf_desc->cpu_addr) {
			kfree(buf_desc);
			return ERR_PTR(-EAGAIN);
		}
		buf_desc->len = bufsize;
	}
	return buf_desc;
}

static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
U
Ursula Braun 已提交
1682
{
1683
	struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
U
Ursula Braun 已提交
1684 1685
	struct smc_connection *conn = &smc->conn;
	struct smc_link_group *lgr = conn->lgr;
1686
	struct list_head *buf_list;
1687
	int bufsize, bufsize_short;
1688
	struct mutex *lock;	/* lock buffer list */
1689
	int sk_buf_size;
U
Ursula Braun 已提交
1690

1691 1692 1693 1694 1695 1696 1697
	if (is_rmb)
		/* use socket recv buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_rcvbuf / 2;
	else
		/* use socket send buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_sndbuf / 2;

1698
	for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1699
	     bufsize_short >= 0; bufsize_short--) {
1700

1701 1702 1703 1704 1705 1706
		if (is_rmb) {
			lock = &lgr->rmbs_lock;
			buf_list = &lgr->rmbs[bufsize_short];
		} else {
			lock = &lgr->sndbufs_lock;
			buf_list = &lgr->sndbufs[bufsize_short];
1707
		}
1708
		bufsize = smc_uncompress_bufsize(bufsize_short);
1709 1710 1711
		if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
			continue;

1712
		/* check for reusable slot in the link group */
1713
		buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1714 1715
		if (buf_desc) {
			memset(buf_desc->cpu_addr, 0, bufsize);
U
Ursula Braun 已提交
1716 1717
			break; /* found reusable slot */
		}
1718

1719 1720 1721 1722 1723
		if (is_smcd)
			buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
		else
			buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);

1724 1725 1726
		if (PTR_ERR(buf_desc) == -ENOMEM)
			break;
		if (IS_ERR(buf_desc))
1727
			continue;
1728

1729
		buf_desc->used = 1;
1730
		mutex_lock(lock);
1731
		list_add(&buf_desc->list, buf_list);
1732
		mutex_unlock(lock);
1733
		break; /* found */
U
Ursula Braun 已提交
1734
	}
1735

1736
	if (IS_ERR(buf_desc))
1737 1738
		return -ENOMEM;

1739 1740
	if (!is_smcd) {
		if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1741
			smcr_buf_unuse(buf_desc, lgr);
1742 1743 1744 1745
			return -ENOMEM;
		}
	}

1746 1747
	if (is_rmb) {
		conn->rmb_desc = buf_desc;
1748 1749
		conn->rmbe_size_short = bufsize_short;
		smc->sk.sk_rcvbuf = bufsize * 2;
1750
		atomic_set(&conn->bytes_to_rcv, 0);
1751 1752
		conn->rmbe_update_limit =
			smc_rmb_wnd_update_limit(buf_desc->len);
1753 1754
		if (is_smcd)
			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
U
Ursula Braun 已提交
1755
	} else {
1756 1757 1758
		conn->sndbuf_desc = buf_desc;
		smc->sk.sk_sndbuf = bufsize * 2;
		atomic_set(&conn->sndbuf_space, bufsize);
U
Ursula Braun 已提交
1759
	}
1760 1761 1762
	return 0;
}

1763 1764
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
1765
	if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1766
		return;
1767
	smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1768 1769 1770 1771
}

void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
1772
	if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1773
		return;
1774
	smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1775 1776 1777 1778
}

void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
1779
	int i;
1780

1781 1782
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
1783
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1784
		if (!smc_link_usable(&conn->lgr->lnk[i]))
1785 1786 1787 1788
			continue;
		smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
				       DMA_FROM_DEVICE);
	}
1789 1790 1791 1792
}

void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
1793
	int i;
1794

1795 1796
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
1797
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1798
		if (!smc_link_usable(&conn->lgr->lnk[i]))
1799 1800 1801 1802
			continue;
		smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
					  DMA_FROM_DEVICE);
	}
1803 1804
}

1805 1806 1807 1808 1809 1810
/* create the send and receive buffer for an SMC socket;
 * receive buffers are called RMBs;
 * (even though the SMC protocol allows more than one RMB-element per RMB,
 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
 * extra RMB for every connection in a link group
 */
1811
int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1812 1813 1814 1815
{
	int rc;

	/* create send buffer */
1816
	rc = __smc_buf_create(smc, is_smcd, false);
1817 1818 1819
	if (rc)
		return rc;
	/* create rmb */
1820
	rc = __smc_buf_create(smc, is_smcd, true);
1821
	if (rc)
1822
		smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1823
	return rc;
U
Ursula Braun 已提交
1824
}
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836

static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
	int i;

	for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
		if (!test_and_set_bit(i, lgr->rtokens_used_mask))
			return i;
	}
	return -ENOSPC;
}

1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883
static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
				   u32 rkey)
{
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		if (test_bit(i, lgr->rtokens_used_mask) &&
		    lgr->rtokens[i][lnk_idx].rkey == rkey)
			return i;
	}
	return -ENOENT;
}

/* set rtoken for a new link to an existing rmb */
void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
		    __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
{
	int rtok_idx;

	rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
	if (rtok_idx == -ENOENT)
		return;
	lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
	lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
}

/* set rtoken for a new link whose link_id is given */
void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
		     __be64 nw_vaddr, __be32 nw_rkey)
{
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
	bool found = false;
	int link_idx;

	for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
		if (lgr->lnk[link_idx].link_id == link_id) {
			found = true;
			break;
		}
	}
	if (!found)
		return;
	lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
	lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
}

1884
/* add a new rtoken from peer */
1885
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1886
{
1887
	struct smc_link_group *lgr = smc_get_lgr(lnk);
1888 1889
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
1890 1891 1892
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1893 1894
		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
		    lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1895
		    test_bit(i, lgr->rtokens_used_mask)) {
1896 1897 1898 1899 1900 1901 1902
			/* already in list */
			return i;
		}
	}
	i = smc_rmb_reserve_rtoken_idx(lgr);
	if (i < 0)
		return i;
1903 1904
	lgr->rtokens[i][lnk->link_idx].rkey = rkey;
	lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1905 1906 1907
	return i;
}

1908
/* delete an rtoken from all links */
1909
int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1910
{
1911
	struct smc_link_group *lgr = smc_get_lgr(lnk);
1912
	u32 rkey = ntohl(nw_rkey);
1913
	int i, j;
1914 1915

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1916
		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1917
		    test_bit(i, lgr->rtokens_used_mask)) {
1918 1919 1920 1921
			for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
				lgr->rtokens[i][j].rkey = 0;
				lgr->rtokens[i][j].dma_addr = 0;
			}
1922
			clear_bit(i, lgr->rtokens_used_mask);
1923 1924 1925
			return 0;
		}
	}
1926 1927 1928 1929 1930
	return -ENOENT;
}

/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
1931
			    struct smc_link *lnk,
1932 1933
			    struct smc_clc_msg_accept_confirm *clc)
{
1934
	conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1935
					  clc->rmb_rkey);
1936 1937 1938 1939
	if (conn->rtoken_idx < 0)
		return conn->rtoken_idx;
	return 0;
}
1940

1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
static void smc_core_going_away(void)
{
	struct smc_ib_device *smcibdev;
	struct smcd_dev *smcd;

	spin_lock(&smc_ib_devices.lock);
	list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
		int i;

		for (i = 0; i < SMC_MAX_PORTS; i++)
			set_bit(i, smcibdev->ports_going_away);
	}
	spin_unlock(&smc_ib_devices.lock);

	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(smcd, &smcd_dev_list.list, list) {
		smcd->going_away = 1;
	}
	spin_unlock(&smcd_dev_list.lock);
}

1962 1963
/* Clean up all SMC link groups */
static void smc_lgrs_shutdown(void)
1964
{
1965
	struct smcd_dev *smcd;
1966

1967 1968
	smc_core_going_away();

1969
	smc_smcr_terminate_all(NULL);
1970 1971 1972

	spin_lock(&smcd_dev_list.lock);
	list_for_each_entry(smcd, &smcd_dev_list.list, list)
1973
		smc_smcd_terminate_all(smcd);
1974
	spin_unlock(&smcd_dev_list.lock);
1975
}
1976

1977 1978 1979 1980
static int smc_core_reboot_event(struct notifier_block *this,
				 unsigned long event, void *ptr)
{
	smc_lgrs_shutdown();
1981
	smc_ib_unregister_client();
1982 1983 1984 1985 1986 1987 1988
	return 0;
}

static struct notifier_block smc_reboot_notifier = {
	.notifier_call = smc_core_reboot_event,
};

1989 1990
int __init smc_core_init(void)
{
1991
	return register_reboot_notifier(&smc_reboot_notifier);
1992 1993
}

1994 1995 1996
/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
1997
	unregister_reboot_notifier(&smc_reboot_notifier);
1998 1999
	smc_lgrs_shutdown();
}