smc_core.c 51.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  Basic Transport Functions exploiting Infiniband API
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/socket.h>
#include <linux/if_vlan.h>
#include <linux/random.h>
#include <linux/workqueue.h>
16
#include <linux/wait.h>
17
#include <linux/reboot.h>
18
#include <linux/mutex.h>
19 20 21
#include <net/tcp.h>
#include <net/sock.h>
#include <rdma/ib_verbs.h>
22
#include <rdma/ib_cache.h>
23 24 25 26 27

#include "smc.h"
#include "smc_clc.h"
#include "smc_core.h"
#include "smc_ib.h"
28
#include "smc_wr.h"
U
Ursula Braun 已提交
29
#include "smc_llc.h"
30
#include "smc_cdc.h"
31
#include "smc_close.h"
32
#include "smc_ism.h"
33

34 35
#define SMC_LGR_NUM_INCR		256
#define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
36
#define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
37

38 39 40 41 42
static struct smc_lgr_list smc_lgr_list = {	/* established link groups */
	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
	.list = LIST_HEAD_INIT(smc_lgr_list.list),
	.num = 0,
};
U
Ursula Braun 已提交
43

44
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 46
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);

47 48
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
			 struct smc_buf_desc *buf_desc);
49
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
50

51
static void smc_link_down_work(struct work_struct *work);
52

53 54 55 56 57 58 59 60 61 62 63 64 65
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
						  spinlock_t **lgr_lock)
{
	if (lgr->is_smcd) {
		*lgr_lock = &lgr->smcd->lgr_lock;
		return &lgr->smcd->lgr_list;
	}

	*lgr_lock = &smc_lgr_list.lock;
	return &smc_lgr_list.list;
}

66 67 68 69 70 71 72 73 74 75
static void smc_ibdev_cnt_inc(struct smc_link *lnk)
{
	atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
}

static void smc_ibdev_cnt_dec(struct smc_link *lnk)
{
	atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
}

76 77 78 79 80 81
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
	/* client link group creation always follows the server link group
	 * creation. For client use a somewhat higher removal delay time,
	 * otherwise there is a risk of out-of-sync link groups.
	 */
82
	if (!lgr->freeing) {
U
Ursula Braun 已提交
83 84 85 86 87
		mod_delayed_work(system_wq, &lgr->free_work,
				 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
						SMC_LGR_FREE_DELAY_CLNT :
						SMC_LGR_FREE_DELAY_SERV);
	}
88 89
}

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
/* Register connection's alert token in our lookup structure.
 * To use rbtrees we have to implement our own insert core.
 * Requires @conns_lock
 * @smc		connection to register
 * Returns 0 on success, != otherwise.
 */
static void smc_lgr_add_alert_token(struct smc_connection *conn)
{
	struct rb_node **link, *parent = NULL;
	u32 token = conn->alert_token_local;

	link = &conn->lgr->conns_all.rb_node;
	while (*link) {
		struct smc_connection *cur = rb_entry(*link,
					struct smc_connection, alert_node);

		parent = *link;
		if (cur->alert_token_local > token)
			link = &parent->rb_left;
		else
			link = &parent->rb_right;
	}
	/* Put the new node there */
	rb_link_node(&conn->alert_node, parent, link);
	rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
}

117 118 119 120 121 122 123 124 125 126 127
/* assign an SMC-R link to the connection */
static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
{
	enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
				       SMC_LNK_ACTIVE;
	int i, j;

	/* do link balancing */
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &conn->lgr->lnk[i];

K
Karsten Graul 已提交
128
		if (lnk->state != expected || lnk->link_is_asym)
129 130 131 132 133 134 135 136 137 138
			continue;
		if (conn->lgr->role == SMC_CLNT) {
			conn->lnk = lnk; /* temporary, SMC server assigns link*/
			break;
		}
		if (conn->lgr->conns_num % 2) {
			for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
				struct smc_link *lnk2;

				lnk2 = &conn->lgr->lnk[j];
K
Karsten Graul 已提交
139 140
				if (lnk2->state == expected &&
				    !lnk2->link_is_asym) {
141 142 143 144 145 146 147 148 149 150 151
					conn->lnk = lnk2;
					break;
				}
			}
		}
		if (!conn->lnk)
			conn->lnk = lnk;
		break;
	}
	if (!conn->lnk)
		return SMC_CLC_DECL_NOACTLINK;
152
	atomic_inc(&conn->lnk->conn_cnt);
153 154 155
	return 0;
}

156 157 158 159 160
/* Register connection in link group by assigning an alert token
 * registered in a search tree.
 * Requires @conns_lock
 * Note that '0' is a reserved value and not assigned.
 */
161
static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
162 163 164
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	static atomic_t nexttoken = ATOMIC_INIT(0);
165
	int rc;
166

167 168 169 170 171
	if (!conn->lgr->is_smcd) {
		rc = smcr_lgr_conn_assign_link(conn, first);
		if (rc)
			return rc;
	}
172 173 174 175 176 177 178 179 180 181 182
	/* find a new alert_token_local value not yet used by some connection
	 * in this link group
	 */
	sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
	while (!conn->alert_token_local) {
		conn->alert_token_local = atomic_inc_return(&nexttoken);
		if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
			conn->alert_token_local = 0;
	}
	smc_lgr_add_alert_token(conn);
	conn->lgr->conns_num++;
183
	return 0;
184 185 186 187 188 189 190 191 192 193
}

/* Unregister connection and reset the alert token of the given connection<
 */
static void __smc_lgr_unregister_conn(struct smc_connection *conn)
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	struct smc_link_group *lgr = conn->lgr;

	rb_erase(&conn->alert_node, &lgr->conns_all);
194 195
	if (conn->lnk)
		atomic_dec(&conn->lnk->conn_cnt);
196 197 198 199 200
	lgr->conns_num--;
	conn->alert_token_local = 0;
	sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
}

201
/* Unregister connection from lgr
202 203 204 205 206
 */
static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;

207 208
	if (!lgr)
		return;
209 210 211 212 213
	write_lock_bh(&lgr->conns_lock);
	if (conn->alert_token_local) {
		__smc_lgr_unregister_conn(conn);
	}
	write_unlock_bh(&lgr->conns_lock);
214
	conn->lgr = NULL;
215 216
}

217 218 219
void smc_lgr_cleanup_early(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;
220 221
	struct list_head *lgr_list;
	spinlock_t *lgr_lock;
222 223 224 225 226

	if (!lgr)
		return;

	smc_conn_free(conn);
227 228 229 230 231 232
	lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
	/* do not use this link group for new connections */
	if (!list_empty(lgr_list))
		list_del_init(lgr_list);
	spin_unlock_bh(lgr_lock);
233
	__smc_lgr_terminate(lgr, true);
234 235
}

236 237 238 239 240 241 242 243 244 245
static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
{
	int i;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &lgr->lnk[i];

		if (smc_link_usable(lnk))
			lnk->state = SMC_LNK_INACTIVE;
	}
246 247
	wake_up_all(&lgr->llc_msg_waiter);
	wake_up_all(&lgr->llc_flow_waiter);
248 249
}

U
Ursula Braun 已提交
250 251
static void smc_lgr_free(struct smc_link_group *lgr);

252 253 254 255 256
static void smc_lgr_free_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(to_delayed_work(work),
						  struct smc_link_group,
						  free_work);
257
	spinlock_t *lgr_lock;
258 259
	bool conns;

260 261
	smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
U
Ursula Braun 已提交
262 263 264 265
	if (lgr->freeing) {
		spin_unlock_bh(lgr_lock);
		return;
	}
266 267 268 269
	read_lock_bh(&lgr->conns_lock);
	conns = RB_EMPTY_ROOT(&lgr->conns_all);
	read_unlock_bh(&lgr->conns_lock);
	if (!conns) { /* number of lgr connections is no longer zero */
270
		spin_unlock_bh(lgr_lock);
271 272
		return;
	}
273
	list_del_init(&lgr->list); /* remove from smc_lgr_list */
U
Ursula Braun 已提交
274 275 276
	lgr->freeing = 1; /* this instance does the freeing, no new schedule */
	spin_unlock_bh(lgr_lock);
	cancel_delayed_work(&lgr->free_work);
277

278 279 280
	if (!lgr->is_smcd && !lgr->terminating)
		smc_llc_send_link_delete_all(lgr, true,
					     SMC_LLC_DEL_PROG_INIT_TERM);
281
	if (lgr->is_smcd && !lgr->terminating)
U
Ursula Braun 已提交
282
		smc_ism_signal_shutdown(lgr);
283 284
	if (!lgr->is_smcd)
		smcr_lgr_link_deactivate_all(lgr);
U
Ursula Braun 已提交
285
	smc_lgr_free(lgr);
286 287
}

288 289 290 291 292
static void smc_lgr_terminate_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
						  terminate_work);

293
	__smc_lgr_terminate(lgr, true);
294 295
}

296 297 298 299 300 301 302 303 304 305 306
/* return next unique link id for the lgr */
static u8 smcr_next_link_id(struct smc_link_group *lgr)
{
	u8 link_id;
	int i;

	while (1) {
		link_id = ++lgr->next_link_id;
		if (!link_id)	/* skip zero as link_id */
			link_id = ++lgr->next_link_id;
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
307
			if (smc_link_usable(&lgr->lnk[i]) &&
308 309 310 311 312 313 314 315
			    lgr->lnk[i].link_id == link_id)
				continue;
		}
		break;
	}
	return link_id;
}

316 317 318 319 320 321 322 323 324
static void smcr_copy_dev_info_to_link(struct smc_link *link)
{
	struct smc_ib_device *smcibdev = link->smcibdev;

	snprintf(link->ibname, sizeof(link->ibname), "%s",
		 smcibdev->ibdev->name);
	link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
}

325 326
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
		   u8 link_idx, struct smc_init_info *ini)
327 328 329 330 331 332
{
	u8 rndvec[3];
	int rc;

	get_device(&ini->ib_dev->ibdev->dev);
	atomic_inc(&ini->ib_dev->lnk_cnt);
333
	lnk->link_id = smcr_next_link_id(lgr);
334
	lnk->lgr = lgr;
335
	lnk->link_idx = link_idx;
336 337
	lnk->smcibdev = ini->ib_dev;
	lnk->ibport = ini->ib_port;
338
	smc_ibdev_cnt_inc(lnk);
339
	smcr_copy_dev_info_to_link(lnk);
340
	lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
341
	atomic_set(&lnk->conn_cnt, 0);
342
	smc_llc_link_set_uid(lnk);
343
	INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
	if (!ini->ib_dev->initialized) {
		rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
		if (rc)
			goto out;
	}
	get_random_bytes(rndvec, sizeof(rndvec));
	lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
		(rndvec[2] << 16);
	rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
				  ini->vlan_id, lnk->gid, &lnk->sgid_index);
	if (rc)
		goto out;
	rc = smc_llc_link_init(lnk);
	if (rc)
		goto out;
	rc = smc_wr_alloc_link_mem(lnk);
	if (rc)
		goto clear_llc_lnk;
	rc = smc_ib_create_protection_domain(lnk);
	if (rc)
		goto free_link_mem;
	rc = smc_ib_create_queue_pair(lnk);
	if (rc)
		goto dealloc_pd;
	rc = smc_wr_create_link(lnk);
	if (rc)
		goto destroy_qp;
371
	lnk->state = SMC_LNK_ACTIVATING;
372 373 374 375 376 377 378 379 380
	return 0;

destroy_qp:
	smc_ib_destroy_queue_pair(lnk);
dealloc_pd:
	smc_ib_dealloc_protection_domain(lnk);
free_link_mem:
	smc_wr_free_link_mem(lnk);
clear_llc_lnk:
381
	smc_llc_link_clear(lnk, false);
382
out:
383
	smc_ibdev_cnt_dec(lnk);
384 385
	put_device(&ini->ib_dev->ibdev->dev);
	memset(lnk, 0, sizeof(struct smc_link));
386
	lnk->state = SMC_LNK_UNUSED;
387 388 389 390 391
	if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
		wake_up(&ini->ib_dev->lnks_deleted);
	return rc;
}

392
/* create a new SMC link group */
393
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
394 395
{
	struct smc_link_group *lgr;
396
	struct list_head *lgr_list;
397
	struct smc_link *lnk;
398
	spinlock_t *lgr_lock;
399
	u8 link_idx;
400
	int rc = 0;
U
Ursula Braun 已提交
401
	int i;
402

403
	if (ini->is_smcd && ini->vlan_id) {
404 405
		if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
				     ini->vlan_id)) {
406
			rc = SMC_CLC_DECL_ISMVLANERR;
407
			goto out;
408
		}
409 410
	}

411 412
	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
	if (!lgr) {
413
		rc = SMC_CLC_DECL_MEM;
414
		goto ism_put_vlan;
415
	}
416 417 418 419 420 421
	lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
				     SMC_LGR_ID_SIZE, &lgr->id);
	if (!lgr->tx_wq) {
		rc = -ENOMEM;
		goto free_lgr;
	}
422
	lgr->is_smcd = ini->is_smcd;
423
	lgr->sync_err = 0;
U
Ursula Braun 已提交
424 425
	lgr->terminating = 0;
	lgr->freeing = 0;
426
	lgr->vlan_id = ini->vlan_id;
427 428
	mutex_init(&lgr->sndbufs_lock);
	mutex_init(&lgr->rmbs_lock);
429
	rwlock_init(&lgr->conns_lock);
U
Ursula Braun 已提交
430 431 432 433
	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		INIT_LIST_HEAD(&lgr->sndbufs[i]);
		INIT_LIST_HEAD(&lgr->rmbs[i]);
	}
434
	lgr->next_link_id = 0;
435 436
	smc_lgr_list.num += SMC_LGR_NUM_INCR;
	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
437
	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
438
	INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
439
	lgr->conns_all = RB_ROOT;
440
	if (ini->is_smcd) {
441
		/* SMC-D specific settings */
442 443 444 445
		get_device(&ini->ism_dev[ini->ism_selected]->dev);
		lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
		lgr->smcd = ini->ism_dev[ini->ism_selected];
		lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
446
		lgr_lock = &lgr->smcd->lgr_lock;
447
		lgr->smc_version = ini->smcd_version;
448
		lgr->peer_shutdown = 0;
449
		atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
450 451 452
	} else {
		/* SMC-R specific settings */
		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
453 454
		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
		       SMC_SYSTEMID_LEN);
455 456
		memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
		       SMC_MAX_PNETID_LEN);
457 458
		smc_llc_lgr_init(lgr, smc);

459 460 461
		link_idx = SMC_SINGLE_LINK;
		lnk = &lgr->lnk[link_idx];
		rc = smcr_link_init(lgr, lnk, link_idx, ini);
462
		if (rc)
463
			goto free_wq;
464 465
		lgr_list = &smc_lgr_list.list;
		lgr_lock = &smc_lgr_list.lock;
466
		atomic_inc(&lgr_cnt);
467
	}
468
	smc->conn.lgr = lgr;
469
	spin_lock_bh(lgr_lock);
K
Karsten Graul 已提交
470
	list_add_tail(&lgr->list, lgr_list);
471
	spin_unlock_bh(lgr_lock);
472 473
	return 0;

474 475
free_wq:
	destroy_workqueue(lgr->tx_wq);
476 477
free_lgr:
	kfree(lgr);
478 479
ism_put_vlan:
	if (ini->is_smcd && ini->vlan_id)
480
		smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
481
out:
482 483 484 485 486 487
	if (rc < 0) {
		if (rc == -ENOMEM)
			rc = SMC_CLC_DECL_MEM;
		else
			rc = SMC_CLC_DECL_INTERR;
	}
488 489 490
	return rc;
}

491 492 493 494 495 496 497 498 499 500 501 502 503 504
static int smc_write_space(struct smc_connection *conn)
{
	int buffer_len = conn->peer_rmbe_size;
	union smc_host_cursor prod;
	union smc_host_cursor cons;
	int space;

	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
	/* determine rx_buf space */
	space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
	return space;
}

505 506
static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
			     struct smc_wr_buf *wr_buf)
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
{
	struct smc_connection *conn = &smc->conn;
	union smc_host_cursor cons, fin;
	int rc = 0;
	int diff;

	smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
	smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
	/* set prod cursor to old state, enforce tx_rdma_writes() */
	smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);

	if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
		/* cons cursor advanced more than fin, and prod was set
		 * fin above, so now prod is smaller than cons. Fix that.
		 */
		diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
		smc_curs_add(conn->sndbuf_desc->len,
			     &conn->tx_curs_sent, diff);
		smc_curs_add(conn->sndbuf_desc->len,
			     &conn->tx_curs_fin, diff);

		smp_mb__before_atomic();
		atomic_add(diff, &conn->sndbuf_space);
		smp_mb__after_atomic();

		smc_curs_add(conn->peer_rmbe_size,
			     &conn->local_tx_ctrl.prod, diff);
		smc_curs_add(conn->peer_rmbe_size,
			     &conn->local_tx_ctrl_fin, diff);
	}
	/* recalculate, value is used by tx_rdma_writes() */
	atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));

	if (smc->sk.sk_state != SMC_INIT &&
	    smc->sk.sk_state != SMC_CLOSED) {
543
		rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
544
		if (!rc) {
545
			queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
546 547
			smc->sk.sk_data_ready(&smc->sk);
		}
548 549 550
	} else {
		smc_wr_tx_put_slot(conn->lnk,
				   (struct smc_wr_tx_pend_priv *)pend);
551 552 553 554
	}
	return rc;
}

555 556 557 558 559 560 561 562
static void smc_switch_link_and_count(struct smc_connection *conn,
				      struct smc_link *to_lnk)
{
	atomic_dec(&conn->lnk->conn_cnt);
	conn->lnk = to_lnk;
	atomic_inc(&conn->lnk->conn_cnt);
}

563 564 565 566
struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
				  struct smc_link *from_lnk, bool is_dev_err)
{
	struct smc_link *to_lnk = NULL;
567
	struct smc_cdc_tx_pend *pend;
568
	struct smc_connection *conn;
569
	struct smc_wr_buf *wr_buf;
570 571 572 573 574 575 576 577
	struct smc_sock *smc;
	struct rb_node *node;
	int i, rc = 0;

	/* link is inactive, wake up tx waiters */
	smc_wr_wakeup_tx_wait(from_lnk);

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
578
		if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
			continue;
		if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
		    from_lnk->ibport == lgr->lnk[i].ibport) {
			continue;
		}
		to_lnk = &lgr->lnk[i];
		break;
	}
	if (!to_lnk) {
		smc_lgr_terminate_sched(lgr);
		return NULL;
	}
again:
	read_lock_bh(&lgr->conns_lock);
	for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
		conn = rb_entry(node, struct smc_connection, alert_node);
		if (conn->lnk != from_lnk)
			continue;
		smc = container_of(conn, struct smc_sock, conn);
		/* conn->lnk not yet set in SMC_INIT state */
		if (smc->sk.sk_state == SMC_INIT)
			continue;
		if (smc->sk.sk_state == SMC_CLOSED ||
		    smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
		    smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
		    smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
		    smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
		    smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
		    smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
		    smc->sk.sk_state == SMC_PEERABORTWAIT ||
		    smc->sk.sk_state == SMC_PROCESSABORT) {
			spin_lock_bh(&conn->send_lock);
611
			smc_switch_link_and_count(conn, to_lnk);
612 613 614 615 616
			spin_unlock_bh(&conn->send_lock);
			continue;
		}
		sock_hold(&smc->sk);
		read_unlock_bh(&lgr->conns_lock);
617 618 619 620 621 622
		/* pre-fetch buffer outside of send_lock, might sleep */
		rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
		if (rc) {
			smcr_link_down_cond_sched(to_lnk);
			return NULL;
		}
623 624
		/* avoid race with smcr_tx_sndbuf_nonempty() */
		spin_lock_bh(&conn->send_lock);
625
		smc_switch_link_and_count(conn, to_lnk);
626
		rc = smc_switch_cursor(smc, pend, wr_buf);
627 628 629 630 631 632 633 634 635 636 637 638
		spin_unlock_bh(&conn->send_lock);
		sock_put(&smc->sk);
		if (rc) {
			smcr_link_down_cond_sched(to_lnk);
			return NULL;
		}
		goto again;
	}
	read_unlock_bh(&lgr->conns_lock);
	return to_lnk;
}

639
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
640
			   struct smc_link_group *lgr)
641
{
642 643
	int rc;

644 645
	if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
		/* unregister rmb with peer */
646 647 648 649 650 651 652 653 654
		rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
		if (!rc) {
			/* protect against smc_llc_cli_rkey_exchange() */
			mutex_lock(&lgr->llc_conf_mutex);
			smc_llc_do_delete_rkey(lgr, rmb_desc);
			rmb_desc->is_conf_rkey = false;
			mutex_unlock(&lgr->llc_conf_mutex);
			smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
		}
655
	}
656

657 658
	if (rmb_desc->is_reg_err) {
		/* buf registration failed, reuse not possible */
659
		mutex_lock(&lgr->rmbs_lock);
660
		list_del(&rmb_desc->list);
661
		mutex_unlock(&lgr->rmbs_lock);
662 663 664 665 666 667 668

		smc_buf_free(lgr, true, rmb_desc);
	} else {
		rmb_desc->used = 0;
	}
}

669 670
static void smc_buf_unuse(struct smc_connection *conn,
			  struct smc_link_group *lgr)
U
Ursula Braun 已提交
671
{
672
	if (conn->sndbuf_desc)
U
Ursula Braun 已提交
673
		conn->sndbuf_desc->used = 0;
674 675 676
	if (conn->rmb_desc && lgr->is_smcd)
		conn->rmb_desc->used = 0;
	else if (conn->rmb_desc)
677
		smcr_buf_unuse(conn->rmb_desc, lgr);
U
Ursula Braun 已提交
678 679
}

680 681 682
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
683 684 685
	struct smc_link_group *lgr = conn->lgr;

	if (!lgr)
686
		return;
687
	if (lgr->is_smcd) {
688 689
		if (!list_empty(&lgr->list))
			smc_ism_unset_conn(conn);
690 691
		tasklet_kill(&conn->rx_tsklet);
	} else {
692
		smc_cdc_tx_dismiss_slots(conn);
693 694
		if (current_work() != &conn->abort_work)
			cancel_work_sync(&conn->abort_work);
695
	}
696 697 698 699
	if (!list_empty(&lgr->list)) {
		smc_lgr_unregister_conn(conn);
		smc_buf_unuse(conn, lgr); /* allow buffer reuse */
	}
700 701 702

	if (!lgr->conns_num)
		smc_lgr_schedule_free_work(lgr);
703 704
}

705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757
/* unregister a link from a buf_desc */
static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
				struct smc_link *lnk)
{
	if (is_rmb)
		buf_desc->is_reg_mr[lnk->link_idx] = false;
	if (!buf_desc->is_map_ib[lnk->link_idx])
		return;
	if (is_rmb) {
		if (buf_desc->mr_rx[lnk->link_idx]) {
			smc_ib_put_memory_region(
					buf_desc->mr_rx[lnk->link_idx]);
			buf_desc->mr_rx[lnk->link_idx] = NULL;
		}
		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
	} else {
		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
	}
	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
	buf_desc->is_map_ib[lnk->link_idx] = false;
}

/* unmap all buffers of lgr for a deleted link */
static void smcr_buf_unmap_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_buf_desc *buf_desc, *bf;
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		mutex_lock(&lgr->rmbs_lock);
		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
			smcr_buf_unmap_link(buf_desc, true, lnk);
		mutex_unlock(&lgr->rmbs_lock);
		mutex_lock(&lgr->sndbufs_lock);
		list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
					 list)
			smcr_buf_unmap_link(buf_desc, false, lnk);
		mutex_unlock(&lgr->sndbufs_lock);
	}
}

static void smcr_rtoken_clear_link(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		lgr->rtokens[i][lnk->link_idx].rkey = 0;
		lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
	}
}

758
/* must be called under lgr->llc_conf_mutex lock */
759
void smcr_link_clear(struct smc_link *lnk, bool log)
760
{
761 762
	struct smc_ib_device *smcibdev;

763
	if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
764
		return;
765
	lnk->peer_qpn = 0;
766
	smc_llc_link_clear(lnk, log);
767 768
	smcr_buf_unmap_lgr(lnk);
	smcr_rtoken_clear_link(lnk);
769
	smc_ib_modify_qp_reset(lnk);
770
	smc_wr_free_link(lnk);
771 772
	smc_ib_destroy_queue_pair(lnk);
	smc_ib_dealloc_protection_domain(lnk);
773
	smc_wr_free_link_mem(lnk);
774
	smc_ibdev_cnt_dec(lnk);
775
	put_device(&lnk->smcibdev->ibdev->dev);
776 777 778 779 780
	smcibdev = lnk->smcibdev;
	memset(lnk, 0, sizeof(struct smc_link));
	lnk->state = SMC_LNK_UNUSED;
	if (!atomic_dec_return(&smcibdev->lnk_cnt))
		wake_up(&smcibdev->lnks_deleted);
781 782
}

783 784
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
			  struct smc_buf_desc *buf_desc)
U
Ursula Braun 已提交
785
{
786
	int i;
787

788 789
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
		smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
790

791 792
	if (buf_desc->pages)
		__free_pages(buf_desc->pages, buf_desc->order);
793
	kfree(buf_desc);
U
Ursula Braun 已提交
794 795
}

796 797 798
static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
			  struct smc_buf_desc *buf_desc)
{
799 800 801
	if (is_dmb) {
		/* restore original buf len */
		buf_desc->len += sizeof(struct smcd_cdc_msg);
802
		smc_ism_unregister_dmb(lgr->smcd, buf_desc);
803
	} else {
804
		kfree(buf_desc->cpu_addr);
805
	}
806 807 808 809 810 811 812 813 814 815 816 817
	kfree(buf_desc);
}

static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
			 struct smc_buf_desc *buf_desc)
{
	if (lgr->is_smcd)
		smcd_buf_free(lgr, is_rmb, buf_desc);
	else
		smcr_buf_free(lgr, is_rmb, buf_desc);
}

818
static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
U
Ursula Braun 已提交
819
{
820 821
	struct smc_buf_desc *buf_desc, *bf_desc;
	struct list_head *buf_list;
U
Ursula Braun 已提交
822 823 824
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
825 826 827 828 829
		if (is_rmb)
			buf_list = &lgr->rmbs[i];
		else
			buf_list = &lgr->sndbufs[i];
		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
U
Ursula Braun 已提交
830
					 list) {
831
			list_del(&buf_desc->list);
832
			smc_buf_free(lgr, is_rmb, buf_desc);
U
Ursula Braun 已提交
833 834 835 836
		}
	}
}

837 838 839 840 841 842 843 844
static void smc_lgr_free_bufs(struct smc_link_group *lgr)
{
	/* free send buffers */
	__smc_lgr_free_bufs(lgr, false);
	/* free rmbs */
	__smc_lgr_free_bufs(lgr, true);
}

845
/* remove a link group */
U
Ursula Braun 已提交
846
static void smc_lgr_free(struct smc_link_group *lgr)
847
{
848 849
	int i;

850 851 852 853
	if (!lgr->is_smcd) {
		mutex_lock(&lgr->llc_conf_mutex);
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
854
				smcr_link_clear(&lgr->lnk[i], false);
855 856 857 858 859
		}
		mutex_unlock(&lgr->llc_conf_mutex);
		smc_llc_lgr_clear(lgr);
	}

860
	smc_lgr_free_bufs(lgr);
861
	destroy_workqueue(lgr->tx_wq);
862
	if (lgr->is_smcd) {
863 864
		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
		put_device(&lgr->smcd->dev);
865 866
		if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
			wake_up(&lgr->smcd->lgrs_deleted);
867
	} else {
868 869
		if (!atomic_dec_return(&lgr_cnt))
			wake_up(&lgrs_deleted);
870
	}
871 872 873
	kfree(lgr);
}

874 875 876 877 878 879 880 881 882 883 884 885 886 887
static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
{
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		struct smc_buf_desc *buf_desc;

		list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
			buf_desc->len += sizeof(struct smcd_cdc_msg);
			smc_ism_unregister_dmb(lgr->smcd, buf_desc);
		}
	}
}

888 889 890 891 892 893 894 895
static void smc_sk_wake_ups(struct smc_sock *smc)
{
	smc->sk.sk_write_space(&smc->sk);
	smc->sk.sk_data_ready(&smc->sk);
	smc->sk.sk_state_change(&smc->sk);
}

/* kill a connection */
896
static void smc_conn_kill(struct smc_connection *conn, bool soft)
897 898 899
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);

900 901 902 903
	if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
	else
		smc_close_abort(conn);
904
	conn->killed = 1;
905
	smc->sk.sk_err = ECONNABORTED;
906
	smc_sk_wake_ups(smc);
907 908
	if (conn->lgr->is_smcd) {
		smc_ism_unset_conn(conn);
909 910 911 912
		if (soft)
			tasklet_kill(&conn->rx_tsklet);
		else
			tasklet_unlock_wait(&conn->rx_tsklet);
913 914
	} else {
		smc_cdc_tx_dismiss_slots(conn);
915
	}
916
	smc_lgr_unregister_conn(conn);
U
Ursula Braun 已提交
917
	smc_close_active_abort(smc);
918 919
}

920 921 922 923 924 925
static void smc_lgr_cleanup(struct smc_link_group *lgr)
{
	if (lgr->is_smcd) {
		smc_ism_signal_shutdown(lgr);
		smcd_unregister_all_dmbs(lgr);
	} else {
926 927 928 929 930
		u32 rsn = lgr->llc_termination_rsn;

		if (!rsn)
			rsn = SMC_LLC_DEL_PROG_INIT_TERM;
		smc_llc_send_link_delete_all(lgr, false, rsn);
931
		smcr_lgr_link_deactivate_all(lgr);
932 933 934
	}
}

935 936 937 938
/* terminate link group
 * @soft: true if link group shutdown can take its time
 *	  false if immediate link group shutdown is required
 */
939
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
940 941
{
	struct smc_connection *conn;
942
	struct smc_sock *smc;
943 944
	struct rb_node *node;

945 946
	if (lgr->terminating)
		return;	/* lgr already terminating */
947 948
	/* cancel free_work sync, will terminate when lgr->freeing is set */
	cancel_delayed_work_sync(&lgr->free_work);
949
	lgr->terminating = 1;
950

951 952
	/* kill remaining link group connections */
	read_lock_bh(&lgr->conns_lock);
953 954
	node = rb_first(&lgr->conns_all);
	while (node) {
955
		read_unlock_bh(&lgr->conns_lock);
956
		conn = rb_entry(node, struct smc_connection, alert_node);
957
		smc = container_of(conn, struct smc_sock, conn);
U
Ursula Braun 已提交
958
		sock_hold(&smc->sk); /* sock_put below */
959
		lock_sock(&smc->sk);
960
		smc_conn_kill(conn, soft);
961
		release_sock(&smc->sk);
U
Ursula Braun 已提交
962
		sock_put(&smc->sk); /* sock_hold above */
963
		read_lock_bh(&lgr->conns_lock);
964 965
		node = rb_first(&lgr->conns_all);
	}
966
	read_unlock_bh(&lgr->conns_lock);
967
	smc_lgr_cleanup(lgr);
968
	smc_lgr_free(lgr);
969 970
}

971 972
/* unlink link group and schedule termination */
void smc_lgr_terminate_sched(struct smc_link_group *lgr)
973
{
974 975 976 977
	spinlock_t *lgr_lock;

	smc_lgr_list_head(lgr, &lgr_lock);
	spin_lock_bh(lgr_lock);
978
	if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
979 980 981 982
		spin_unlock_bh(lgr_lock);
		return;	/* lgr already terminating */
	}
	list_del_init(&lgr->list);
983
	lgr->freeing = 1;
984
	spin_unlock_bh(lgr_lock);
985
	schedule_work(&lgr->terminate_work);
986 987
}

988
/* Called when peer lgr shutdown (regularly or abnormally) is received */
H
Hans Wippel 已提交
989
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
990 991 992 993 994
{
	struct smc_link_group *lgr, *l;
	LIST_HEAD(lgr_free_list);

	/* run common cleanup function and build free list */
995
	spin_lock_bh(&dev->lgr_lock);
996 997
	list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
		if ((!peer_gid || lgr->peer_gid == peer_gid) &&
H
Hans Wippel 已提交
998
		    (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
999 1000
			if (peer_gid) /* peer triggered termination */
				lgr->peer_shutdown = 1;
1001
			list_move(&lgr->list, &lgr_free_list);
1002
			lgr->freeing = 1;
1003 1004
		}
	}
1005
	spin_unlock_bh(&dev->lgr_lock);
1006 1007 1008 1009

	/* cancel the regular free workers and actually free lgrs */
	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
		list_del_init(&lgr->list);
1010
		schedule_work(&lgr->terminate_work);
1011 1012 1013
	}
}

1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
/* Called when an SMCD device is removed or the smc module is unloaded */
void smc_smcd_terminate_all(struct smcd_dev *smcd)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_free_list);

	spin_lock_bh(&smcd->lgr_lock);
	list_splice_init(&smcd->lgr_list, &lgr_free_list);
	list_for_each_entry(lgr, &lgr_free_list, list)
		lgr->freeing = 1;
	spin_unlock_bh(&smcd->lgr_lock);

	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
		list_del_init(&lgr->list);
		__smc_lgr_terminate(lgr, false);
	}
1030 1031 1032

	if (atomic_read(&smcd->lgr_cnt))
		wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1033 1034
}

1035 1036 1037 1038 1039 1040 1041 1042
/* Called when an SMCR device is removed or the smc module is unloaded.
 * If smcibdev is given, all SMCR link groups using this device are terminated.
 * If smcibdev is NULL, all SMCR link groups are terminated.
 */
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_free_list);
1043
	int i;
1044 1045 1046 1047 1048 1049 1050 1051

	spin_lock_bh(&smc_lgr_list.lock);
	if (!smcibdev) {
		list_splice_init(&smc_lgr_list.list, &lgr_free_list);
		list_for_each_entry(lgr, &lgr_free_list, list)
			lgr->freeing = 1;
	} else {
		list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1052
			for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1053 1054
				if (lgr->lnk[i].smcibdev == smcibdev)
					smcr_link_down_cond_sched(&lgr->lnk[i]);
1055 1056 1057 1058 1059 1060 1061
			}
		}
	}
	spin_unlock_bh(&smc_lgr_list.lock);

	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
		list_del_init(&lgr->list);
1062
		smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1063 1064
		__smc_lgr_terminate(lgr, false);
	}
1065 1066 1067 1068 1069 1070 1071 1072 1073

	if (smcibdev) {
		if (atomic_read(&smcibdev->lnk_cnt))
			wait_event(smcibdev->lnks_deleted,
				   !atomic_read(&smcibdev->lnk_cnt));
	} else {
		if (atomic_read(&lgr_cnt))
			wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
	}
1074 1075
}

K
Karsten Graul 已提交
1076 1077 1078
/* set new lgr type and clear all asymmetric link tagging */
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
{
1079
	char *lgr_type = "";
K
Karsten Graul 已提交
1080 1081 1082 1083 1084
	int i;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
		if (smc_link_usable(&lgr->lnk[i]))
			lgr->lnk[i].link_is_asym = false;
1085 1086
	if (lgr->type == new_type)
		return;
K
Karsten Graul 已提交
1087
	lgr->type = new_type;
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108

	switch (lgr->type) {
	case SMC_LGR_NONE:
		lgr_type = "NONE";
		break;
	case SMC_LGR_SINGLE:
		lgr_type = "SINGLE";
		break;
	case SMC_LGR_SYMMETRIC:
		lgr_type = "SYMMETRIC";
		break;
	case SMC_LGR_ASYMMETRIC_PEER:
		lgr_type = "ASYMMETRIC_PEER";
		break;
	case SMC_LGR_ASYMMETRIC_LOCAL:
		lgr_type = "ASYMMETRIC_LOCAL";
		break;
	}
	pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
			    "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
			    lgr_type, lgr->pnet_id);
K
Karsten Graul 已提交
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
}

/* set new lgr type and tag a link as asymmetric */
void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
			    enum smc_lgr_type new_type, int asym_lnk_idx)
{
	smcr_lgr_set_type(lgr, new_type);
	lgr->lnk[asym_lnk_idx].link_is_asym = true;
}

1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
/* abort connection, abort_work scheduled from tasklet context */
static void smc_conn_abort_work(struct work_struct *work)
{
	struct smc_connection *conn = container_of(work,
						   struct smc_connection,
						   abort_work);
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);

	smc_conn_kill(conn, true);
	sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
}

1131 1132 1133 1134 1135
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
{
	struct smc_link_group *lgr, *n;

	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1136 1137
		struct smc_link *link;

1138 1139 1140 1141 1142
		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
			    SMC_MAX_PNETID_LEN) ||
		    lgr->type == SMC_LGR_SYMMETRIC ||
		    lgr->type == SMC_LGR_ASYMMETRIC_PEER)
			continue;
1143 1144 1145 1146 1147

		/* trigger local add link processing */
		link = smc_llc_usable_link(lgr);
		if (link)
			smc_llc_add_link_local(link);
1148 1149 1150
	}
}

1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
/* link is down - switch connections to alternate link,
 * must be called under lgr->llc_conf_mutex lock
 */
static void smcr_link_down(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_link *to_lnk;
	int del_link_id;

	if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
		return;

	smc_ib_modify_qp_reset(lnk);
1164
	to_lnk = smc_switch_conns(lgr, lnk, true);
1165
	if (!to_lnk) { /* no backup link available */
1166
		smcr_link_clear(lnk, true);
1167 1168
		return;
	}
K
Karsten Graul 已提交
1169
	smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1170 1171 1172 1173
	del_link_id = lnk->link_id;

	if (lgr->role == SMC_SERV) {
		/* trigger local delete link processing */
1174
		smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1175 1176 1177 1178
	} else {
		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
			/* another llc task is ongoing */
			mutex_unlock(&lgr->llc_conf_mutex);
1179 1180 1181
			wait_event_timeout(lgr->llc_flow_waiter,
				(list_empty(&lgr->list) ||
				 lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1182 1183 1184
				SMC_LLC_WAIT_TIME);
			mutex_lock(&lgr->llc_conf_mutex);
		}
1185
		if (!list_empty(&lgr->list)) {
1186 1187 1188
			smc_llc_send_delete_link(to_lnk, del_link_id,
						 SMC_LLC_REQ, true,
						 SMC_LLC_DEL_LOST_PATH);
1189 1190
			smcr_link_clear(lnk, true);
		}
1191
		wake_up(&lgr->llc_flow_waiter);	/* wake up next waiter */
1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
	}
}

/* must be called under lgr->llc_conf_mutex lock */
void smcr_link_down_cond(struct smc_link *lnk)
{
	if (smc_link_downing(&lnk->state))
		smcr_link_down(lnk);
}

/* will get the lgr->llc_conf_mutex lock */
void smcr_link_down_cond_sched(struct smc_link *lnk)
{
	if (smc_link_downing(&lnk->state))
		schedule_work(&lnk->link_down_wrk);
}

void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
{
	struct smc_link_group *lgr, *n;
	int i;

	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
			    SMC_MAX_PNETID_LEN))
			continue; /* lgr is not affected */
		if (list_empty(&lgr->list))
			continue;
		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
			struct smc_link *lnk = &lgr->lnk[i];

			if (smc_link_usable(lnk) &&
			    lnk->smcibdev == smcibdev && lnk->ibport == ibport)
				smcr_link_down_cond_sched(lnk);
		}
	}
}

static void smc_link_down_work(struct work_struct *work)
{
	struct smc_link *link = container_of(work, struct smc_link,
					     link_down_wrk);
	struct smc_link_group *lgr = link->lgr;

	if (list_empty(&lgr->list))
		return;
1238
	wake_up_all(&lgr->llc_msg_waiter);
1239 1240 1241 1242 1243
	mutex_lock(&lgr->llc_conf_mutex);
	smcr_link_down(link);
	mutex_unlock(&lgr->llc_conf_mutex);
}

1244 1245 1246
/* Determine vlan of internal TCP socket.
 * @vlan_id: address to store the determined vlan id into
 */
1247
int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1248 1249
{
	struct dst_entry *dst = sk_dst_get(clcsock->sk);
1250 1251
	struct net_device *ndev;
	int i, nest_lvl, rc = 0;
1252

1253
	ini->vlan_id = 0;
1254 1255 1256 1257 1258 1259 1260 1261 1262
	if (!dst) {
		rc = -ENOTCONN;
		goto out;
	}
	if (!dst->dev) {
		rc = -ENODEV;
		goto out_rel;
	}

1263 1264
	ndev = dst->dev;
	if (is_vlan_dev(ndev)) {
1265
		ini->vlan_id = vlan_dev_vlan_id(ndev);
1266 1267 1268 1269
		goto out_rel;
	}

	rtnl_lock();
1270
	nest_lvl = ndev->lower_level;
1271 1272 1273 1274 1275 1276 1277 1278
	for (i = 0; i < nest_lvl; i++) {
		struct list_head *lower = &ndev->adj_list.lower;

		if (list_empty(lower))
			break;
		lower = lower->next;
		ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
		if (is_vlan_dev(ndev)) {
1279
			ini->vlan_id = vlan_dev_vlan_id(ndev);
1280 1281 1282 1283
			break;
		}
	}
	rtnl_unlock();
1284 1285 1286 1287 1288 1289 1290

out_rel:
	dst_release(dst);
out:
	return rc;
}

1291 1292
static bool smcr_lgr_match(struct smc_link_group *lgr,
			   struct smc_clc_msg_local *lcl,
1293
			   enum smc_lgr_role role, u32 clcqpn)
1294
{
1295 1296 1297 1298 1299 1300 1301
	int i;

	if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
	    lgr->role != role)
		return false;

	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1302
		if (!smc_link_active(&lgr->lnk[i]))
1303 1304 1305 1306 1307 1308 1309
			continue;
		if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
		    !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
		    !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
			return true;
	}
	return false;
1310
}
1311

1312 1313 1314 1315
static bool smcd_lgr_match(struct smc_link_group *lgr,
			   struct smcd_dev *smcismdev, u64 peer_gid)
{
	return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1316 1317 1318
}

/* create a new SMC connection (and a new link group if necessary) */
1319
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1320 1321
{
	struct smc_connection *conn = &smc->conn;
1322
	struct list_head *lgr_list;
1323 1324
	struct smc_link_group *lgr;
	enum smc_lgr_role role;
1325
	spinlock_t *lgr_lock;
1326 1327
	int rc = 0;

1328
	lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
1329
				  &smc_lgr_list.list;
1330
	lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
1331
				  &smc_lgr_list.lock;
1332
	ini->first_contact_local = 1;
1333
	role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1334
	if (role == SMC_CLNT && ini->first_contact_peer)
1335 1336 1337 1338
		/* create new link group as well */
		goto create;

	/* determine if an existing link group can be reused */
1339
	spin_lock_bh(lgr_lock);
1340
	list_for_each_entry(lgr, lgr_list, list) {
1341
		write_lock_bh(&lgr->conns_lock);
1342
		if ((ini->is_smcd ?
1343 1344
		     smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
				    ini->ism_peer_gid[ini->ism_selected]) :
1345
		     smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1346
		    !lgr->sync_err &&
1347 1348
		    (ini->smcd_version == SMC_V2 ||
		     lgr->vlan_id == ini->vlan_id) &&
K
Karsten Graul 已提交
1349
		    (role == SMC_CLNT || ini->is_smcd ||
1350
		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1351
			/* link group found */
1352
			ini->first_contact_local = 0;
1353
			conn->lgr = lgr;
1354
			rc = smc_lgr_register_conn(conn, false);
1355
			write_unlock_bh(&lgr->conns_lock);
1356 1357
			if (!rc && delayed_work_pending(&lgr->free_work))
				cancel_delayed_work(&lgr->free_work);
1358 1359 1360 1361
			break;
		}
		write_unlock_bh(&lgr->conns_lock);
	}
1362
	spin_unlock_bh(lgr_lock);
1363 1364
	if (rc)
		return rc;
1365

1366 1367
	if (role == SMC_CLNT && !ini->first_contact_peer &&
	    ini->first_contact_local) {
1368 1369 1370 1371
		/* Server reuses a link group, but Client wants to start
		 * a new one
		 * send out_of_sync decline, reason synchr. error
		 */
1372
		return SMC_CLC_DECL_SYNCERR;
1373 1374 1375
	}

create:
1376
	if (ini->first_contact_local) {
1377
		rc = smc_lgr_create(smc, ini);
1378 1379
		if (rc)
			goto out;
1380 1381
		lgr = conn->lgr;
		write_lock_bh(&lgr->conns_lock);
1382
		rc = smc_lgr_register_conn(conn, true);
1383
		write_unlock_bh(&lgr->conns_lock);
1384 1385
		if (rc)
			goto out;
1386
	}
1387
	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1388
	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
S
Stefan Raspl 已提交
1389
	conn->urg_state = SMC_URG_READ;
1390
	INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1391
	if (ini->is_smcd) {
1392 1393
		conn->rx_off = sizeof(struct smcd_cdc_msg);
		smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1394 1395
	} else {
		conn->rx_off = 0;
1396
	}
1397 1398 1399
#ifndef KERNEL_HAS_ATOMIC64
	spin_lock_init(&conn->acurs_lock);
#endif
1400 1401

out:
1402
	return rc;
1403
}
U
Ursula Braun 已提交
1404

1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431
/* convert the RMB size into the compressed notation - minimum 16K.
 * In contrast to plain ilog2, this rounds towards the next power of 2,
 * so the socket application gets at least its desired sndbuf / rcvbuf size.
 */
static u8 smc_compress_bufsize(int size)
{
	u8 compressed;

	if (size <= SMC_BUF_MIN_SIZE)
		return 0;

	size = (size - 1) >> 14;
	compressed = ilog2(size) + 1;
	if (compressed >= SMC_RMBE_SIZES)
		compressed = SMC_RMBE_SIZES - 1;
	return compressed;
}

/* convert the RMB size from compressed notation into integer */
int smc_uncompress_bufsize(u8 compressed)
{
	u32 size;

	size = 0x00000001 << (((int)compressed) + 14);
	return (int)size;
}

1432 1433
/* try to reuse a sndbuf or rmb description slot for a certain
 * buffer size; if not available, return NULL
U
Ursula Braun 已提交
1434
 */
1435
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1436
					     struct mutex *lock,
1437
					     struct list_head *buf_list)
U
Ursula Braun 已提交
1438
{
1439
	struct smc_buf_desc *buf_slot;
U
Ursula Braun 已提交
1440

1441
	mutex_lock(lock);
1442 1443
	list_for_each_entry(buf_slot, buf_list, list) {
		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1444
			mutex_unlock(lock);
1445
			return buf_slot;
U
Ursula Braun 已提交
1446 1447
		}
	}
1448
	mutex_unlock(lock);
U
Ursula Braun 已提交
1449 1450 1451
	return NULL;
}

U
Ursula Braun 已提交
1452 1453 1454 1455 1456 1457 1458 1459 1460
/* one of the conditions for announcing a receiver's current window size is
 * that it "results in a minimum increase in the window size of 10% of the
 * receive buffer space" [RFC7609]
 */
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}

1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505
/* map an rmb buf to a link */
static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
			     struct smc_link *lnk)
{
	int rc;

	if (buf_desc->is_map_ib[lnk->link_idx])
		return 0;

	rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
	if (rc)
		return rc;
	sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
		   buf_desc->cpu_addr, buf_desc->len);

	/* map sg table to DMA address */
	rc = smc_ib_buf_map_sg(lnk, buf_desc,
			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
	/* SMC protocol depends on mapping to one DMA address only */
	if (rc != 1) {
		rc = -EAGAIN;
		goto free_table;
	}

	/* create a new memory region for the RMB */
	if (is_rmb) {
		rc = smc_ib_get_memory_region(lnk->roce_pd,
					      IB_ACCESS_REMOTE_WRITE |
					      IB_ACCESS_LOCAL_WRITE,
					      buf_desc, lnk->link_idx);
		if (rc)
			goto buf_unmap;
		smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
	}
	buf_desc->is_map_ib[lnk->link_idx] = true;
	return 0;

buf_unmap:
	smc_ib_buf_unmap_sg(lnk, buf_desc,
			    is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
free_table:
	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
	return rc;
}

1506 1507 1508
/* register a new rmb on IB device,
 * must be called under lgr->llc_conf_mutex lock
 */
1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523
int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
{
	if (list_empty(&link->lgr->list))
		return -ENOLINK;
	if (!rmb_desc->is_reg_mr[link->link_idx]) {
		/* register memory region for new rmb */
		if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
			rmb_desc->is_reg_err = true;
			return -EFAULT;
		}
		rmb_desc->is_reg_mr[link->link_idx] = true;
	}
	return 0;
}

1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561
static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
			     struct list_head *lst, bool is_rmb)
{
	struct smc_buf_desc *buf_desc, *bf;
	int rc = 0;

	mutex_lock(lock);
	list_for_each_entry_safe(buf_desc, bf, lst, list) {
		if (!buf_desc->used)
			continue;
		rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
		if (rc)
			goto out;
	}
out:
	mutex_unlock(lock);
	return rc;
}

/* map all used buffers of lgr for a new link */
int smcr_buf_map_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	int i, rc = 0;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
				       &lgr->rmbs[i], true);
		if (rc)
			return rc;
		rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
				       &lgr->sndbufs[i], false);
		if (rc)
			return rc;
	}
	return 0;
}

1562 1563 1564
/* register all used buffers of lgr for a new link,
 * must be called under lgr->llc_conf_mutex lock
 */
1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
int smcr_buf_reg_lgr(struct smc_link *lnk)
{
	struct smc_link_group *lgr = lnk->lgr;
	struct smc_buf_desc *buf_desc, *bf;
	int i, rc = 0;

	mutex_lock(&lgr->rmbs_lock);
	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
			if (!buf_desc->used)
				continue;
			rc = smcr_link_reg_rmb(lnk, buf_desc);
			if (rc)
				goto out;
		}
	}
out:
	mutex_unlock(&lgr->rmbs_lock);
	return rc;
}

1586 1587
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
						bool is_rmb, int bufsize)
1588 1589 1590 1591 1592 1593 1594 1595
{
	struct smc_buf_desc *buf_desc;

	/* try to alloc a new buffer */
	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
	if (!buf_desc)
		return ERR_PTR(-ENOMEM);

1596 1597 1598 1599 1600 1601
	buf_desc->order = get_order(bufsize);
	buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
				      __GFP_NOMEMALLOC | __GFP_COMP |
				      __GFP_NORETRY | __GFP_ZERO,
				      buf_desc->order);
	if (!buf_desc->pages) {
1602 1603 1604
		kfree(buf_desc);
		return ERR_PTR(-EAGAIN);
	}
1605
	buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1606 1607 1608
	buf_desc->len = bufsize;
	return buf_desc;
}
1609

1610 1611 1612 1613 1614 1615 1616
/* map buf_desc on all usable links,
 * unused buffers stay mapped as long as the link is up
 */
static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
				     struct smc_buf_desc *buf_desc, bool is_rmb)
{
	int i, rc = 0;
1617

1618 1619
	/* protect against parallel link reconfiguration */
	mutex_lock(&lgr->llc_conf_mutex);
1620 1621
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
		struct smc_link *lnk = &lgr->lnk[i];
1622

1623
		if (!smc_link_usable(lnk))
1624 1625 1626 1627
			continue;
		if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
			rc = -ENOMEM;
			goto out;
1628 1629
		}
	}
1630
out:
1631
	mutex_unlock(&lgr->llc_conf_mutex);
1632
	return rc;
1633 1634
}

1635
#define SMCD_DMBE_SIZES		6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653

static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
						bool is_dmb, int bufsize)
{
	struct smc_buf_desc *buf_desc;
	int rc;

	if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
		return ERR_PTR(-EAGAIN);

	/* try to alloc a new DMB */
	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
	if (!buf_desc)
		return ERR_PTR(-ENOMEM);
	if (is_dmb) {
		rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
		if (rc) {
			kfree(buf_desc);
1654 1655 1656 1657 1658
			if (rc == -ENOMEM)
				return ERR_PTR(-EAGAIN);
			if (rc == -ENOSPC)
				return ERR_PTR(-ENOSPC);
			return ERR_PTR(-EIO);
1659
		}
1660 1661 1662
		buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
		/* CDC header stored in buf. So, pretend it was smaller */
		buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676
	} else {
		buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
					     __GFP_NOWARN | __GFP_NORETRY |
					     __GFP_NOMEMALLOC);
		if (!buf_desc->cpu_addr) {
			kfree(buf_desc);
			return ERR_PTR(-EAGAIN);
		}
		buf_desc->len = bufsize;
	}
	return buf_desc;
}

static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
U
Ursula Braun 已提交
1677
{
1678
	struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
U
Ursula Braun 已提交
1679 1680
	struct smc_connection *conn = &smc->conn;
	struct smc_link_group *lgr = conn->lgr;
1681
	struct list_head *buf_list;
1682
	int bufsize, bufsize_short;
1683
	struct mutex *lock;	/* lock buffer list */
1684
	int sk_buf_size;
U
Ursula Braun 已提交
1685

1686 1687 1688 1689 1690 1691 1692
	if (is_rmb)
		/* use socket recv buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_rcvbuf / 2;
	else
		/* use socket send buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_sndbuf / 2;

1693
	for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1694
	     bufsize_short >= 0; bufsize_short--) {
1695

1696 1697 1698 1699 1700 1701
		if (is_rmb) {
			lock = &lgr->rmbs_lock;
			buf_list = &lgr->rmbs[bufsize_short];
		} else {
			lock = &lgr->sndbufs_lock;
			buf_list = &lgr->sndbufs[bufsize_short];
1702
		}
1703
		bufsize = smc_uncompress_bufsize(bufsize_short);
1704 1705 1706
		if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
			continue;

1707
		/* check for reusable slot in the link group */
1708
		buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1709 1710
		if (buf_desc) {
			memset(buf_desc->cpu_addr, 0, bufsize);
U
Ursula Braun 已提交
1711 1712
			break; /* found reusable slot */
		}
1713

1714 1715 1716 1717 1718
		if (is_smcd)
			buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
		else
			buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);

1719 1720 1721
		if (PTR_ERR(buf_desc) == -ENOMEM)
			break;
		if (IS_ERR(buf_desc))
1722
			continue;
1723

1724
		buf_desc->used = 1;
1725
		mutex_lock(lock);
1726
		list_add(&buf_desc->list, buf_list);
1727
		mutex_unlock(lock);
1728
		break; /* found */
U
Ursula Braun 已提交
1729
	}
1730

1731
	if (IS_ERR(buf_desc))
1732
		return PTR_ERR(buf_desc);
1733

1734 1735
	if (!is_smcd) {
		if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1736
			smcr_buf_unuse(buf_desc, lgr);
1737 1738 1739 1740
			return -ENOMEM;
		}
	}

1741 1742
	if (is_rmb) {
		conn->rmb_desc = buf_desc;
1743 1744
		conn->rmbe_size_short = bufsize_short;
		smc->sk.sk_rcvbuf = bufsize * 2;
1745
		atomic_set(&conn->bytes_to_rcv, 0);
1746 1747
		conn->rmbe_update_limit =
			smc_rmb_wnd_update_limit(buf_desc->len);
1748 1749
		if (is_smcd)
			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
U
Ursula Braun 已提交
1750
	} else {
1751 1752 1753
		conn->sndbuf_desc = buf_desc;
		smc->sk.sk_sndbuf = bufsize * 2;
		atomic_set(&conn->sndbuf_space, bufsize);
U
Ursula Braun 已提交
1754
	}
1755 1756 1757
	return 0;
}

1758 1759
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
1760
	if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
1761
		return;
1762
	smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1763 1764 1765 1766
}

void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
1767
	if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
1768
		return;
1769
	smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1770 1771 1772 1773
}

void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
1774
	int i;
1775

1776 1777
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
1778
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1779
		if (!smc_link_active(&conn->lgr->lnk[i]))
1780 1781 1782 1783
			continue;
		smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
				       DMA_FROM_DEVICE);
	}
1784 1785 1786 1787
}

void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
1788
	int i;
1789

1790 1791
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
1792
	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1793
		if (!smc_link_active(&conn->lgr->lnk[i]))
1794 1795 1796 1797
			continue;
		smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
					  DMA_FROM_DEVICE);
	}
1798 1799
}

1800 1801 1802 1803 1804 1805
/* create the send and receive buffer for an SMC socket;
 * receive buffers are called RMBs;
 * (even though the SMC protocol allows more than one RMB-element per RMB,
 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
 * extra RMB for every connection in a link group
 */
1806
int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1807 1808 1809 1810
{
	int rc;

	/* create send buffer */
1811
	rc = __smc_buf_create(smc, is_smcd, false);
1812 1813 1814
	if (rc)
		return rc;
	/* create rmb */
1815
	rc = __smc_buf_create(smc, is_smcd, true);
1816 1817 1818 1819
	if (rc) {
		mutex_lock(&smc->conn.lgr->sndbufs_lock);
		list_del(&smc->conn.sndbuf_desc->list);
		mutex_unlock(&smc->conn.lgr->sndbufs_lock);
1820
		smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1821
		smc->conn.sndbuf_desc = NULL;
1822
	}
1823
	return rc;
U
Ursula Braun 已提交
1824
}
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836

static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
	int i;

	for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
		if (!test_and_set_bit(i, lgr->rtokens_used_mask))
			return i;
	}
	return -ENOSPC;
}

1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883
static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
				   u32 rkey)
{
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		if (test_bit(i, lgr->rtokens_used_mask) &&
		    lgr->rtokens[i][lnk_idx].rkey == rkey)
			return i;
	}
	return -ENOENT;
}

/* set rtoken for a new link to an existing rmb */
void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
		    __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
{
	int rtok_idx;

	rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
	if (rtok_idx == -ENOENT)
		return;
	lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
	lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
}

/* set rtoken for a new link whose link_id is given */
void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
		     __be64 nw_vaddr, __be32 nw_rkey)
{
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
	bool found = false;
	int link_idx;

	for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
		if (lgr->lnk[link_idx].link_id == link_id) {
			found = true;
			break;
		}
	}
	if (!found)
		return;
	lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
	lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
}

1884
/* add a new rtoken from peer */
1885
int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1886
{
1887
	struct smc_link_group *lgr = smc_get_lgr(lnk);
1888 1889
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
1890 1891 1892
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1893 1894
		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
		    lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1895
		    test_bit(i, lgr->rtokens_used_mask)) {
1896 1897 1898 1899 1900 1901 1902
			/* already in list */
			return i;
		}
	}
	i = smc_rmb_reserve_rtoken_idx(lgr);
	if (i < 0)
		return i;
1903 1904
	lgr->rtokens[i][lnk->link_idx].rkey = rkey;
	lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1905 1906 1907
	return i;
}

1908
/* delete an rtoken from all links */
1909
int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1910
{
1911
	struct smc_link_group *lgr = smc_get_lgr(lnk);
1912
	u32 rkey = ntohl(nw_rkey);
1913
	int i, j;
1914 1915

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1916
		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1917
		    test_bit(i, lgr->rtokens_used_mask)) {
1918 1919 1920 1921
			for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
				lgr->rtokens[i][j].rkey = 0;
				lgr->rtokens[i][j].dma_addr = 0;
			}
1922
			clear_bit(i, lgr->rtokens_used_mask);
1923 1924 1925
			return 0;
		}
	}
1926 1927 1928 1929 1930
	return -ENOENT;
}

/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
1931
			    struct smc_link *lnk,
1932 1933
			    struct smc_clc_msg_accept_confirm *clc)
{
1934 1935
	conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
					  clc->r0.rmb_rkey);
1936 1937 1938 1939
	if (conn->rtoken_idx < 0)
		return conn->rtoken_idx;
	return 0;
}
1940

1941 1942 1943 1944 1945
static void smc_core_going_away(void)
{
	struct smc_ib_device *smcibdev;
	struct smcd_dev *smcd;

1946
	mutex_lock(&smc_ib_devices.mutex);
1947 1948 1949 1950 1951 1952
	list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
		int i;

		for (i = 0; i < SMC_MAX_PORTS; i++)
			set_bit(i, smcibdev->ports_going_away);
	}
1953
	mutex_unlock(&smc_ib_devices.mutex);
1954

1955
	mutex_lock(&smcd_dev_list.mutex);
1956 1957 1958
	list_for_each_entry(smcd, &smcd_dev_list.list, list) {
		smcd->going_away = 1;
	}
1959
	mutex_unlock(&smcd_dev_list.mutex);
1960 1961
}

1962 1963
/* Clean up all SMC link groups */
static void smc_lgrs_shutdown(void)
1964
{
1965
	struct smcd_dev *smcd;
1966

1967 1968
	smc_core_going_away();

1969
	smc_smcr_terminate_all(NULL);
1970

1971
	mutex_lock(&smcd_dev_list.mutex);
1972
	list_for_each_entry(smcd, &smcd_dev_list.list, list)
1973
		smc_smcd_terminate_all(smcd);
1974
	mutex_unlock(&smcd_dev_list.mutex);
1975
}
1976

1977 1978 1979 1980
static int smc_core_reboot_event(struct notifier_block *this,
				 unsigned long event, void *ptr)
{
	smc_lgrs_shutdown();
1981
	smc_ib_unregister_client();
1982 1983 1984 1985 1986 1987 1988
	return 0;
}

static struct notifier_block smc_reboot_notifier = {
	.notifier_call = smc_core_reboot_event,
};

1989 1990
int __init smc_core_init(void)
{
1991
	return register_reboot_notifier(&smc_reboot_notifier);
1992 1993
}

1994 1995 1996
/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
1997
	unregister_reboot_notifier(&smc_reboot_notifier);
1998 1999
	smc_lgrs_shutdown();
}