smc_core.c 27.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  Basic Transport Functions exploiting Infiniband API
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/socket.h>
#include <linux/if_vlan.h>
#include <linux/random.h>
#include <linux/workqueue.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <rdma/ib_verbs.h>
19
#include <rdma/ib_cache.h>
20 21 22 23 24

#include "smc.h"
#include "smc_clc.h"
#include "smc_core.h"
#include "smc_ib.h"
25
#include "smc_wr.h"
U
Ursula Braun 已提交
26
#include "smc_llc.h"
27
#include "smc_cdc.h"
28
#include "smc_close.h"
29
#include "smc_ism.h"
30

31 32
#define SMC_LGR_NUM_INCR		256
#define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
33
#define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
34
#define SMC_LGR_FREE_DELAY_FAST		(8 * HZ)
35

36 37 38 39 40
static struct smc_lgr_list smc_lgr_list = {	/* established link groups */
	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
	.list = LIST_HEAD_INIT(smc_lgr_list.list),
	.num = 0,
};
U
Ursula Braun 已提交
41

42 43
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
			 struct smc_buf_desc *buf_desc);
44

45 46 47 48 49 50 51
static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
{
	/* client link group creation always follows the server link group
	 * creation. For client use a somewhat higher removal delay time,
	 * otherwise there is a risk of out-of-sync link groups.
	 */
	mod_delayed_work(system_wq, &lgr->free_work,
52 53
			 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
			 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
54 55
}

56 57 58
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
{
	mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
59 60
}

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
/* Register connection's alert token in our lookup structure.
 * To use rbtrees we have to implement our own insert core.
 * Requires @conns_lock
 * @smc		connection to register
 * Returns 0 on success, != otherwise.
 */
static void smc_lgr_add_alert_token(struct smc_connection *conn)
{
	struct rb_node **link, *parent = NULL;
	u32 token = conn->alert_token_local;

	link = &conn->lgr->conns_all.rb_node;
	while (*link) {
		struct smc_connection *cur = rb_entry(*link,
					struct smc_connection, alert_node);

		parent = *link;
		if (cur->alert_token_local > token)
			link = &parent->rb_left;
		else
			link = &parent->rb_right;
	}
	/* Put the new node there */
	rb_link_node(&conn->alert_node, parent, link);
	rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
}

/* Register connection in link group by assigning an alert token
 * registered in a search tree.
 * Requires @conns_lock
 * Note that '0' is a reserved value and not assigned.
 */
static void smc_lgr_register_conn(struct smc_connection *conn)
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	static atomic_t nexttoken = ATOMIC_INIT(0);

	/* find a new alert_token_local value not yet used by some connection
	 * in this link group
	 */
	sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
	while (!conn->alert_token_local) {
		conn->alert_token_local = atomic_inc_return(&nexttoken);
		if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
			conn->alert_token_local = 0;
	}
	smc_lgr_add_alert_token(conn);
	conn->lgr->conns_num++;
}

/* Unregister connection and reset the alert token of the given connection<
 */
static void __smc_lgr_unregister_conn(struct smc_connection *conn)
{
	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
	struct smc_link_group *lgr = conn->lgr;

	rb_erase(&conn->alert_node, &lgr->conns_all);
	lgr->conns_num--;
	conn->alert_token_local = 0;
	sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
}

124
/* Unregister connection from lgr
125 126 127 128 129
 */
static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;

130 131
	if (!lgr)
		return;
132 133 134 135 136 137 138
	write_lock_bh(&lgr->conns_lock);
	if (conn->alert_token_local) {
		__smc_lgr_unregister_conn(conn);
	}
	write_unlock_bh(&lgr->conns_lock);
}

139 140 141 142 143 144 145 146 147 148 149 150 151 152
/* Send delete link, either as client to request the initiation
 * of the DELETE LINK sequence from server; or as server to
 * initiate the delete processing. See smc_llc_rx_delete_link().
 */
static int smc_link_send_delete(struct smc_link *lnk)
{
	if (lnk->state == SMC_LNK_ACTIVE &&
	    !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
		smc_llc_link_deleting(lnk);
		return 0;
	}
	return -ENOTCONN;
}

U
Ursula Braun 已提交
153 154
static void smc_lgr_free(struct smc_link_group *lgr);

155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
static void smc_lgr_free_work(struct work_struct *work)
{
	struct smc_link_group *lgr = container_of(to_delayed_work(work),
						  struct smc_link_group,
						  free_work);
	bool conns;

	spin_lock_bh(&smc_lgr_list.lock);
	read_lock_bh(&lgr->conns_lock);
	conns = RB_EMPTY_ROOT(&lgr->conns_all);
	read_unlock_bh(&lgr->conns_lock);
	if (!conns) { /* number of lgr connections is no longer zero */
		spin_unlock_bh(&smc_lgr_list.lock);
		return;
	}
170 171
	if (!list_empty(&lgr->list))
		list_del_init(&lgr->list); /* remove from smc_lgr_list */
172
	spin_unlock_bh(&smc_lgr_list.lock);
173 174

	if (!lgr->is_smcd && !lgr->terminating)	{
175 176
		struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];

177
		/* try to send del link msg, on error free lgr immediately */
178 179
		if (lnk->state == SMC_LNK_ACTIVE &&
		    !smc_link_send_delete(lnk)) {
180 181 182 183 184 185
			/* reschedule in case we never receive a response */
			smc_lgr_schedule_free_work(lgr);
			return;
		}
	}

186
	if (!delayed_work_pending(&lgr->free_work)) {
187 188 189 190
		struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];

		if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
			smc_llc_link_inactive(lnk);
H
Hans Wippel 已提交
191 192
		if (lgr->is_smcd)
			smc_ism_signal_shutdown(lgr);
193
		smc_lgr_free(lgr);
194
	}
195 196 197
}

/* create a new SMC link group */
198
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
199 200 201 202 203
{
	struct smc_link_group *lgr;
	struct smc_link *lnk;
	u8 rndvec[3];
	int rc = 0;
U
Ursula Braun 已提交
204
	int i;
205

206
	if (ini->is_smcd && ini->vlan_id) {
207 208
		if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
			rc = SMC_CLC_DECL_ISMVLANERR;
209
			goto out;
210
		}
211 212
	}

213 214
	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
	if (!lgr) {
215
		rc = SMC_CLC_DECL_MEM;
216
		goto ism_put_vlan;
217
	}
218
	lgr->is_smcd = ini->is_smcd;
219
	lgr->sync_err = 0;
220
	lgr->vlan_id = ini->vlan_id;
U
Ursula Braun 已提交
221 222
	rwlock_init(&lgr->sndbufs_lock);
	rwlock_init(&lgr->rmbs_lock);
223
	rwlock_init(&lgr->conns_lock);
U
Ursula Braun 已提交
224 225 226 227
	for (i = 0; i < SMC_RMBE_SIZES; i++) {
		INIT_LIST_HEAD(&lgr->sndbufs[i]);
		INIT_LIST_HEAD(&lgr->rmbs[i]);
	}
228 229
	smc_lgr_list.num += SMC_LGR_NUM_INCR;
	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
230 231
	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
	lgr->conns_all = RB_ROOT;
232
	if (ini->is_smcd) {
233
		/* SMC-D specific settings */
234 235
		lgr->peer_gid = ini->ism_gid;
		lgr->smcd = ini->ism_dev;
236 237 238
	} else {
		/* SMC-R specific settings */
		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
239 240
		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
		       SMC_SYSTEMID_LEN);
241 242 243 244 245

		lnk = &lgr->lnk[SMC_SINGLE_LINK];
		/* initialize link */
		lnk->state = SMC_LNK_ACTIVATING;
		lnk->link_id = SMC_SINGLE_LINK;
246 247 248 249 250 251
		lnk->smcibdev = ini->ib_dev;
		lnk->ibport = ini->ib_port;
		lnk->path_mtu =
			ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
		if (!ini->ib_dev->initialized)
			smc_ib_setup_per_ibdev(ini->ib_dev);
252 253 254
		get_random_bytes(rndvec, sizeof(rndvec));
		lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
			(rndvec[2] << 16);
255
		rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
256 257
					  ini->vlan_id, lnk->gid,
					  &lnk->sgid_index);
258 259
		if (rc)
			goto free_lgr;
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
		rc = smc_llc_link_init(lnk);
		if (rc)
			goto free_lgr;
		rc = smc_wr_alloc_link_mem(lnk);
		if (rc)
			goto clear_llc_lnk;
		rc = smc_ib_create_protection_domain(lnk);
		if (rc)
			goto free_link_mem;
		rc = smc_ib_create_queue_pair(lnk);
		if (rc)
			goto dealloc_pd;
		rc = smc_wr_create_link(lnk);
		if (rc)
			goto destroy_qp;
	}
276 277 278 279
	smc->conn.lgr = lgr;
	spin_lock_bh(&smc_lgr_list.lock);
	list_add(&lgr->list, &smc_lgr_list.list);
	spin_unlock_bh(&smc_lgr_list.lock);
280 281
	return 0;

282 283 284 285 286 287
destroy_qp:
	smc_ib_destroy_queue_pair(lnk);
dealloc_pd:
	smc_ib_dealloc_protection_domain(lnk);
free_link_mem:
	smc_wr_free_link_mem(lnk);
288 289
clear_llc_lnk:
	smc_llc_link_clear(lnk);
290 291
free_lgr:
	kfree(lgr);
292 293 294
ism_put_vlan:
	if (ini->is_smcd && ini->vlan_id)
		smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
295
out:
296 297 298 299 300 301
	if (rc < 0) {
		if (rc == -ENOMEM)
			rc = SMC_CLC_DECL_MEM;
		else
			rc = SMC_CLC_DECL_INTERR;
	}
302 303 304
	return rc;
}

305 306
static void smc_buf_unuse(struct smc_connection *conn,
			  struct smc_link_group *lgr)
U
Ursula Braun 已提交
307
{
308
	if (conn->sndbuf_desc)
U
Ursula Braun 已提交
309 310
		conn->sndbuf_desc->used = 0;
	if (conn->rmb_desc) {
311
		if (!conn->rmb_desc->regerr) {
312 313 314 315 316 317
			if (!lgr->is_smcd) {
				/* unregister rmb with peer */
				smc_llc_do_delete_rkey(
						&lgr->lnk[SMC_SINGLE_LINK],
						conn->rmb_desc);
			}
318
			conn->rmb_desc->used = 0;
319 320 321 322 323 324
		} else {
			/* buf registration failed, reuse not possible */
			write_lock_bh(&lgr->rmbs_lock);
			list_del(&conn->rmb_desc->list);
			write_unlock_bh(&lgr->rmbs_lock);

325
			smc_buf_free(lgr, true, conn->rmb_desc);
326
		}
U
Ursula Braun 已提交
327 328 329
	}
}

330 331 332
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
333 334 335
	struct smc_link_group *lgr = conn->lgr;

	if (!lgr)
336
		return;
337
	if (lgr->is_smcd) {
338
		smc_ism_unset_conn(conn);
339 340
		tasklet_kill(&conn->rx_tsklet);
	} else {
341
		smc_cdc_tx_dismiss_slots(conn);
342
	}
343
	smc_lgr_unregister_conn(conn);
344
	smc_buf_unuse(conn, lgr);		/* allow buffer reuse */
345
	conn->lgr = NULL;
346 347 348

	if (!lgr->conns_num)
		smc_lgr_schedule_free_work(lgr);
349 350 351 352 353
}

static void smc_link_clear(struct smc_link *lnk)
{
	lnk->peer_qpn = 0;
354
	smc_llc_link_clear(lnk);
355
	smc_ib_modify_qp_reset(lnk);
356
	smc_wr_free_link(lnk);
357 358
	smc_ib_destroy_queue_pair(lnk);
	smc_ib_dealloc_protection_domain(lnk);
359
	smc_wr_free_link_mem(lnk);
360 361
}

362 363
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
			  struct smc_buf_desc *buf_desc)
U
Ursula Braun 已提交
364
{
365 366
	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];

367 368 369 370 371 372 373 374 375
	if (is_rmb) {
		if (buf_desc->mr_rx[SMC_SINGLE_LINK])
			smc_ib_put_memory_region(
					buf_desc->mr_rx[SMC_SINGLE_LINK]);
		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
				    DMA_FROM_DEVICE);
	} else {
		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
				    DMA_TO_DEVICE);
U
Ursula Braun 已提交
376
	}
377
	sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
378 379
	if (buf_desc->pages)
		__free_pages(buf_desc->pages, buf_desc->order);
380
	kfree(buf_desc);
U
Ursula Braun 已提交
381 382
}

383 384 385
static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
			  struct smc_buf_desc *buf_desc)
{
386 387 388
	if (is_dmb) {
		/* restore original buf len */
		buf_desc->len += sizeof(struct smcd_cdc_msg);
389
		smc_ism_unregister_dmb(lgr->smcd, buf_desc);
390
	} else {
391
		kfree(buf_desc->cpu_addr);
392
	}
393 394 395 396 397 398 399 400 401 402 403 404
	kfree(buf_desc);
}

static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
			 struct smc_buf_desc *buf_desc)
{
	if (lgr->is_smcd)
		smcd_buf_free(lgr, is_rmb, buf_desc);
	else
		smcr_buf_free(lgr, is_rmb, buf_desc);
}

405
static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
U
Ursula Braun 已提交
406
{
407 408
	struct smc_buf_desc *buf_desc, *bf_desc;
	struct list_head *buf_list;
U
Ursula Braun 已提交
409 410 411
	int i;

	for (i = 0; i < SMC_RMBE_SIZES; i++) {
412 413 414 415 416
		if (is_rmb)
			buf_list = &lgr->rmbs[i];
		else
			buf_list = &lgr->sndbufs[i];
		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
U
Ursula Braun 已提交
417
					 list) {
418
			list_del(&buf_desc->list);
419
			smc_buf_free(lgr, is_rmb, buf_desc);
U
Ursula Braun 已提交
420 421 422 423
		}
	}
}

424 425 426 427 428 429 430 431
static void smc_lgr_free_bufs(struct smc_link_group *lgr)
{
	/* free send buffers */
	__smc_lgr_free_bufs(lgr, false);
	/* free rmbs */
	__smc_lgr_free_bufs(lgr, true);
}

432
/* remove a link group */
U
Ursula Braun 已提交
433
static void smc_lgr_free(struct smc_link_group *lgr)
434
{
435
	smc_lgr_free_bufs(lgr);
436 437 438 439
	if (lgr->is_smcd)
		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
	else
		smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
440 441 442
	kfree(lgr);
}

443 444 445 446 447 448 449 450 451
void smc_lgr_forget(struct smc_link_group *lgr)
{
	spin_lock_bh(&smc_lgr_list.lock);
	/* do not use this link group for new connections */
	if (!list_empty(&lgr->list))
		list_del_init(&lgr->list);
	spin_unlock_bh(&smc_lgr_list.lock);
}

452
/* terminate linkgroup abnormally */
453
static void __smc_lgr_terminate(struct smc_link_group *lgr)
454 455
{
	struct smc_connection *conn;
456
	struct smc_sock *smc;
457 458
	struct rb_node *node;

459 460 461
	if (lgr->terminating)
		return;	/* lgr already terminating */
	lgr->terminating = 1;
462 463
	if (!list_empty(&lgr->list)) /* forget lgr */
		list_del_init(&lgr->list);
464 465
	if (!lgr->is_smcd)
		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
466 467 468 469 470

	write_lock_bh(&lgr->conns_lock);
	node = rb_first(&lgr->conns_all);
	while (node) {
		conn = rb_entry(node, struct smc_connection, alert_node);
471
		smc = container_of(conn, struct smc_sock, conn);
472
		sock_hold(&smc->sk); /* sock_put in close work */
473
		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
474
		__smc_lgr_unregister_conn(conn);
475
		conn->lgr = NULL;
476
		write_unlock_bh(&lgr->conns_lock);
477 478
		if (!schedule_work(&conn->close_work))
			sock_put(&smc->sk);
479
		write_lock_bh(&lgr->conns_lock);
480 481 482
		node = rb_first(&lgr->conns_all);
	}
	write_unlock_bh(&lgr->conns_lock);
483 484
	if (!lgr->is_smcd)
		wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
485
	smc_lgr_schedule_free_work(lgr);
486 487
}

488 489 490 491 492 493 494
void smc_lgr_terminate(struct smc_link_group *lgr)
{
	spin_lock_bh(&smc_lgr_list.lock);
	__smc_lgr_terminate(lgr);
	spin_unlock_bh(&smc_lgr_list.lock);
}

495 496 497 498 499
/* Called when IB port is terminated */
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
{
	struct smc_link_group *lgr, *l;

500
	spin_lock_bh(&smc_lgr_list.lock);
501
	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
502 503
		if (!lgr->is_smcd &&
		    lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
504
		    lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
505
			__smc_lgr_terminate(lgr);
506
	}
507
	spin_unlock_bh(&smc_lgr_list.lock);
508 509
}

510
/* Called when SMC-D device is terminated or peer is lost */
H
Hans Wippel 已提交
511
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
512 513 514 515 516 517 518 519 520
{
	struct smc_link_group *lgr, *l;
	LIST_HEAD(lgr_free_list);

	/* run common cleanup function and build free list */
	spin_lock_bh(&smc_lgr_list.lock);
	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
		if (lgr->is_smcd && lgr->smcd == dev &&
		    (!peer_gid || lgr->peer_gid == peer_gid) &&
H
Hans Wippel 已提交
521
		    (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
522 523 524 525 526 527 528 529 530 531
			__smc_lgr_terminate(lgr);
			list_move(&lgr->list, &lgr_free_list);
		}
	}
	spin_unlock_bh(&smc_lgr_list.lock);

	/* cancel the regular free workers and actually free lgrs */
	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
		list_del_init(&lgr->list);
		cancel_delayed_work_sync(&lgr->free_work);
H
Hans Wippel 已提交
532 533
		if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
			smc_ism_signal_shutdown(lgr);
534 535 536 537
		smc_lgr_free(lgr);
	}
}

538 539 540
/* Determine vlan of internal TCP socket.
 * @vlan_id: address to store the determined vlan id into
 */
541
int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
542 543
{
	struct dst_entry *dst = sk_dst_get(clcsock->sk);
544 545
	struct net_device *ndev;
	int i, nest_lvl, rc = 0;
546

547
	ini->vlan_id = 0;
548 549 550 551 552 553 554 555 556
	if (!dst) {
		rc = -ENOTCONN;
		goto out;
	}
	if (!dst->dev) {
		rc = -ENODEV;
		goto out_rel;
	}

557 558
	ndev = dst->dev;
	if (is_vlan_dev(ndev)) {
559
		ini->vlan_id = vlan_dev_vlan_id(ndev);
560 561 562 563
		goto out_rel;
	}

	rtnl_lock();
564
	nest_lvl = ndev->lower_level;
565 566 567 568 569 570 571 572
	for (i = 0; i < nest_lvl; i++) {
		struct list_head *lower = &ndev->adj_list.lower;

		if (list_empty(lower))
			break;
		lower = lower->next;
		ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
		if (is_vlan_dev(ndev)) {
573
			ini->vlan_id = vlan_dev_vlan_id(ndev);
574 575 576 577
			break;
		}
	}
	rtnl_unlock();
578 579 580 581 582 583 584

out_rel:
	dst_release(dst);
out:
	return rc;
}

585 586
static bool smcr_lgr_match(struct smc_link_group *lgr,
			   struct smc_clc_msg_local *lcl,
587
			   enum smc_lgr_role role, u32 clcqpn)
588
{
589 590 591 592 593 594
	return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
		       SMC_SYSTEMID_LEN) &&
		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
			SMC_GID_SIZE) &&
		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
			sizeof(lcl->mac)) &&
595 596 597
		lgr->role == role &&
		(lgr->role == SMC_SERV ||
		 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
598
}
599

600 601 602 603
static bool smcd_lgr_match(struct smc_link_group *lgr,
			   struct smcd_dev *smcismdev, u64 peer_gid)
{
	return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
604 605 606
}

/* create a new SMC connection (and a new link group if necessary) */
607
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
608 609 610 611 612 613
{
	struct smc_connection *conn = &smc->conn;
	struct smc_link_group *lgr;
	enum smc_lgr_role role;
	int rc = 0;

614
	ini->cln_first_contact = SMC_FIRST_CONTACT;
615
	role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
616
	if (role == SMC_CLNT && ini->srv_first_contact)
617 618 619 620 621 622 623
		/* create new link group as well */
		goto create;

	/* determine if an existing link group can be reused */
	spin_lock_bh(&smc_lgr_list.lock);
	list_for_each_entry(lgr, &smc_lgr_list.list, list) {
		write_lock_bh(&lgr->conns_lock);
624 625 626
		if ((ini->is_smcd ?
		     smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
		     smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
627
		    !lgr->sync_err &&
628
		    lgr->vlan_id == ini->vlan_id &&
629 630
		    (role == SMC_CLNT ||
		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
631
			/* link group found */
632
			ini->cln_first_contact = SMC_REUSE_CONTACT;
633 634
			conn->lgr = lgr;
			smc_lgr_register_conn(conn); /* add smc conn to lgr */
635 636
			if (delayed_work_pending(&lgr->free_work))
				cancel_delayed_work(&lgr->free_work);
637 638 639 640 641 642 643
			write_unlock_bh(&lgr->conns_lock);
			break;
		}
		write_unlock_bh(&lgr->conns_lock);
	}
	spin_unlock_bh(&smc_lgr_list.lock);

644
	if (role == SMC_CLNT && !ini->srv_first_contact &&
645
	    ini->cln_first_contact == SMC_FIRST_CONTACT) {
646 647 648 649
		/* Server reuses a link group, but Client wants to start
		 * a new one
		 * send out_of_sync decline, reason synchr. error
		 */
650
		return SMC_CLC_DECL_SYNCERR;
651 652 653
	}

create:
654
	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
655
		rc = smc_lgr_create(smc, ini);
656 657
		if (rc)
			goto out;
658 659
		lgr = conn->lgr;
		write_lock_bh(&lgr->conns_lock);
660
		smc_lgr_register_conn(conn); /* add smc conn to lgr */
661
		write_unlock_bh(&lgr->conns_lock);
662
	}
663
	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
664
	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
S
Stefan Raspl 已提交
665
	conn->urg_state = SMC_URG_READ;
666
	if (ini->is_smcd) {
667 668 669
		conn->rx_off = sizeof(struct smcd_cdc_msg);
		smcd_cdc_rx_init(conn); /* init tasklet for this conn */
	}
670 671 672
#ifndef KERNEL_HAS_ATOMIC64
	spin_lock_init(&conn->acurs_lock);
#endif
673 674

out:
675
	return rc;
676
}
U
Ursula Braun 已提交
677

678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
/* convert the RMB size into the compressed notation - minimum 16K.
 * In contrast to plain ilog2, this rounds towards the next power of 2,
 * so the socket application gets at least its desired sndbuf / rcvbuf size.
 */
static u8 smc_compress_bufsize(int size)
{
	u8 compressed;

	if (size <= SMC_BUF_MIN_SIZE)
		return 0;

	size = (size - 1) >> 14;
	compressed = ilog2(size) + 1;
	if (compressed >= SMC_RMBE_SIZES)
		compressed = SMC_RMBE_SIZES - 1;
	return compressed;
}

/* convert the RMB size from compressed notation into integer */
int smc_uncompress_bufsize(u8 compressed)
{
	u32 size;

	size = 0x00000001 << (((int)compressed) + 14);
	return (int)size;
}

705 706
/* try to reuse a sndbuf or rmb description slot for a certain
 * buffer size; if not available, return NULL
U
Ursula Braun 已提交
707
 */
708 709 710
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
					     rwlock_t *lock,
					     struct list_head *buf_list)
U
Ursula Braun 已提交
711
{
712
	struct smc_buf_desc *buf_slot;
U
Ursula Braun 已提交
713

714 715 716 717 718
	read_lock_bh(lock);
	list_for_each_entry(buf_slot, buf_list, list) {
		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
			read_unlock_bh(lock);
			return buf_slot;
U
Ursula Braun 已提交
719 720
		}
	}
721
	read_unlock_bh(lock);
U
Ursula Braun 已提交
722 723 724
	return NULL;
}

U
Ursula Braun 已提交
725 726 727 728 729 730 731 732 733
/* one of the conditions for announcing a receiver's current window size is
 * that it "results in a minimum increase in the window size of 10% of the
 * receive buffer space" [RFC7609]
 */
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}

734 735
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
						bool is_rmb, int bufsize)
736 737 738 739 740 741 742 743 744 745
{
	struct smc_buf_desc *buf_desc;
	struct smc_link *lnk;
	int rc;

	/* try to alloc a new buffer */
	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
	if (!buf_desc)
		return ERR_PTR(-ENOMEM);

746 747 748 749 750 751
	buf_desc->order = get_order(bufsize);
	buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
				      __GFP_NOMEMALLOC | __GFP_COMP |
				      __GFP_NORETRY | __GFP_ZERO,
				      buf_desc->order);
	if (!buf_desc->pages) {
752 753 754
		kfree(buf_desc);
		return ERR_PTR(-EAGAIN);
	}
755
	buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
756 757 758 759 760 761

	/* build the sg table from the pages */
	lnk = &lgr->lnk[SMC_SINGLE_LINK];
	rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
			    GFP_KERNEL);
	if (rc) {
762
		smc_buf_free(lgr, is_rmb, buf_desc);
763 764 765 766 767 768 769 770 771 772
		return ERR_PTR(rc);
	}
	sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
		   buf_desc->cpu_addr, bufsize);

	/* map sg table to DMA address */
	rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
	/* SMC protocol depends on mapping to one DMA address only */
	if (rc != 1)  {
773
		smc_buf_free(lgr, is_rmb, buf_desc);
774 775 776 777 778 779 780 781 782 783
		return ERR_PTR(-EAGAIN);
	}

	/* create a new memory region for the RMB */
	if (is_rmb) {
		rc = smc_ib_get_memory_region(lnk->roce_pd,
					      IB_ACCESS_REMOTE_WRITE |
					      IB_ACCESS_LOCAL_WRITE,
					      buf_desc);
		if (rc) {
784
			smc_buf_free(lgr, is_rmb, buf_desc);
785 786 787 788
			return ERR_PTR(rc);
		}
	}

789
	buf_desc->len = bufsize;
790 791 792
	return buf_desc;
}

793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
#define SMCD_DMBE_SIZES		7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */

static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
						bool is_dmb, int bufsize)
{
	struct smc_buf_desc *buf_desc;
	int rc;

	if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
		return ERR_PTR(-EAGAIN);

	/* try to alloc a new DMB */
	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
	if (!buf_desc)
		return ERR_PTR(-ENOMEM);
	if (is_dmb) {
		rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
		if (rc) {
			kfree(buf_desc);
			return ERR_PTR(-EAGAIN);
		}
814 815 816
		buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
		/* CDC header stored in buf. So, pretend it was smaller */
		buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
817 818 819 820 821 822 823 824 825 826 827 828 829 830
	} else {
		buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
					     __GFP_NOWARN | __GFP_NORETRY |
					     __GFP_NOMEMALLOC);
		if (!buf_desc->cpu_addr) {
			kfree(buf_desc);
			return ERR_PTR(-EAGAIN);
		}
		buf_desc->len = bufsize;
	}
	return buf_desc;
}

static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
U
Ursula Braun 已提交
831
{
832
	struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
U
Ursula Braun 已提交
833 834
	struct smc_connection *conn = &smc->conn;
	struct smc_link_group *lgr = conn->lgr;
835
	struct list_head *buf_list;
836
	int bufsize, bufsize_short;
837 838
	int sk_buf_size;
	rwlock_t *lock;
U
Ursula Braun 已提交
839

840 841 842 843 844 845 846
	if (is_rmb)
		/* use socket recv buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_rcvbuf / 2;
	else
		/* use socket send buffer size (w/o overhead) as start value */
		sk_buf_size = smc->sk.sk_sndbuf / 2;

847
	for (bufsize_short = smc_compress_bufsize(sk_buf_size);
848
	     bufsize_short >= 0; bufsize_short--) {
849

850 851 852 853 854 855
		if (is_rmb) {
			lock = &lgr->rmbs_lock;
			buf_list = &lgr->rmbs[bufsize_short];
		} else {
			lock = &lgr->sndbufs_lock;
			buf_list = &lgr->sndbufs[bufsize_short];
856
		}
857
		bufsize = smc_uncompress_bufsize(bufsize_short);
858 859 860
		if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
			continue;

861
		/* check for reusable slot in the link group */
862
		buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
863 864
		if (buf_desc) {
			memset(buf_desc->cpu_addr, 0, bufsize);
U
Ursula Braun 已提交
865 866
			break; /* found reusable slot */
		}
867

868 869 870 871 872
		if (is_smcd)
			buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
		else
			buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);

873 874 875
		if (PTR_ERR(buf_desc) == -ENOMEM)
			break;
		if (IS_ERR(buf_desc))
876
			continue;
877

878 879 880 881 882
		buf_desc->used = 1;
		write_lock_bh(lock);
		list_add(&buf_desc->list, buf_list);
		write_unlock_bh(lock);
		break; /* found */
U
Ursula Braun 已提交
883
	}
884

885
	if (IS_ERR(buf_desc))
886 887 888 889
		return -ENOMEM;

	if (is_rmb) {
		conn->rmb_desc = buf_desc;
890 891
		conn->rmbe_size_short = bufsize_short;
		smc->sk.sk_rcvbuf = bufsize * 2;
892
		atomic_set(&conn->bytes_to_rcv, 0);
893 894
		conn->rmbe_update_limit =
			smc_rmb_wnd_update_limit(buf_desc->len);
895 896
		if (is_smcd)
			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
U
Ursula Braun 已提交
897
	} else {
898 899 900
		conn->sndbuf_desc = buf_desc;
		smc->sk.sk_sndbuf = bufsize * 2;
		atomic_set(&conn->sndbuf_space, bufsize);
U
Ursula Braun 已提交
901
	}
902 903 904
	return 0;
}

905 906 907 908
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;

909 910
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
911 912 913 914 915 916 917 918
	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
			       conn->sndbuf_desc, DMA_TO_DEVICE);
}

void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;

919 920
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
921 922 923 924 925 926 927 928
	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
				  conn->sndbuf_desc, DMA_TO_DEVICE);
}

void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;

929 930
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
931 932 933 934 935 936 937 938
	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
			       conn->rmb_desc, DMA_FROM_DEVICE);
}

void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
	struct smc_link_group *lgr = conn->lgr;

939 940
	if (!conn->lgr || conn->lgr->is_smcd)
		return;
941 942 943 944
	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
				  conn->rmb_desc, DMA_FROM_DEVICE);
}

945 946 947 948 949 950
/* create the send and receive buffer for an SMC socket;
 * receive buffers are called RMBs;
 * (even though the SMC protocol allows more than one RMB-element per RMB,
 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
 * extra RMB for every connection in a link group
 */
951
int smc_buf_create(struct smc_sock *smc, bool is_smcd)
952 953 954 955
{
	int rc;

	/* create send buffer */
956
	rc = __smc_buf_create(smc, is_smcd, false);
957 958 959
	if (rc)
		return rc;
	/* create rmb */
960
	rc = __smc_buf_create(smc, is_smcd, true);
961
	if (rc)
962
		smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
963
	return rc;
U
Ursula Braun 已提交
964
}
965 966 967 968 969 970 971 972 973 974 975 976

static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
{
	int i;

	for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
		if (!test_and_set_bit(i, lgr->rtokens_used_mask))
			return i;
	}
	return -ENOSPC;
}

977 978
/* add a new rtoken from peer */
int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
979
{
980 981
	u64 dma_addr = be64_to_cpu(nw_vaddr);
	u32 rkey = ntohl(nw_rkey);
982 983 984 985
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
986
		    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
987
		    test_bit(i, lgr->rtokens_used_mask)) {
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
			/* already in list */
			return i;
		}
	}
	i = smc_rmb_reserve_rtoken_idx(lgr);
	if (i < 0)
		return i;
	lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
	lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
	return i;
}

/* delete an rtoken */
int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
{
	u32 rkey = ntohl(nw_rkey);
	int i;

	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
		if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
		    test_bit(i, lgr->rtokens_used_mask)) {
			lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
			lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;

			clear_bit(i, lgr->rtokens_used_mask);
1013 1014 1015
			return 0;
		}
	}
1016 1017 1018 1019 1020 1021 1022 1023 1024
	return -ENOENT;
}

/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
			    struct smc_clc_msg_accept_confirm *clc)
{
	conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
					  clc->rmb_rkey);
1025 1026 1027 1028
	if (conn->rtoken_idx < 0)
		return conn->rtoken_idx;
	return 0;
}
1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041

/* Called (from smc_exit) when module is removed */
void smc_core_exit(void)
{
	struct smc_link_group *lgr, *lg;
	LIST_HEAD(lgr_freeing_list);

	spin_lock_bh(&smc_lgr_list.lock);
	if (!list_empty(&smc_lgr_list.list))
		list_splice_init(&smc_lgr_list.list, &lgr_freeing_list);
	spin_unlock_bh(&smc_lgr_list.lock);
	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
		list_del_init(&lgr->list);
1042 1043 1044 1045 1046 1047 1048 1049
		if (!lgr->is_smcd) {
			struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];

			if (lnk->state == SMC_LNK_ACTIVE)
				smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
							 false);
			smc_llc_link_inactive(lnk);
		}
1050
		cancel_delayed_work_sync(&lgr->free_work);
H
Hans Wippel 已提交
1051 1052
		if (lgr->is_smcd)
			smc_ism_signal_shutdown(lgr);
1053 1054 1055
		smc_lgr_free(lgr); /* free link group */
	}
}