smc_cdc.c 8.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 * Connection Data Control (CDC)
 * handles flow control
 *
 * Copyright IBM Corp. 2016
 *
 * Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/spinlock.h>

#include "smc.h"
#include "smc_wr.h"
#include "smc_cdc.h"
18
#include "smc_tx.h"
19
#include "smc_rx.h"
20
#include "smc_close.h"
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46

/********************************** send *************************************/

struct smc_cdc_tx_pend {
	struct smc_connection	*conn;		/* socket connection */
	union smc_host_cursor	cursor;	/* tx sndbuf cursor sent */
	union smc_host_cursor	p_cursor;	/* rx RMBE cursor produced */
	u16			ctrl_seq;	/* conn. tx sequence # */
};

/* handler for send/transmission completion of a CDC msg */
static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
			       struct smc_link *link,
			       enum ib_wc_status wc_status)
{
	struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd;
	struct smc_sock *smc;
	int diff;

	if (!cdcpend->conn)
		/* already dismissed */
		return;

	smc = container_of(cdcpend->conn, struct smc_sock, conn);
	bh_lock_sock(&smc->sk);
	if (!wc_status) {
47
		diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
48 49 50 51 52
				     &cdcpend->conn->tx_curs_fin,
				     &cdcpend->cursor);
		/* sndbuf_space is decreased in smc_sendmsg */
		smp_mb__before_atomic();
		atomic_add(diff, &cdcpend->conn->sndbuf_space);
53
		/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
54 55 56 57 58
		smp_mb__after_atomic();
		smc_curs_write(&cdcpend->conn->tx_curs_fin,
			       smc_curs_read(&cdcpend->cursor, cdcpend->conn),
			       cdcpend->conn);
	}
59
	smc_tx_sndbuf_nonfull(smc);
60 61 62
	bh_unlock_sock(&smc->sk);
}

63
int smc_cdc_get_free_slot(struct smc_connection *conn,
64 65 66
			  struct smc_wr_buf **wr_buf,
			  struct smc_cdc_tx_pend **pend)
{
67
	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
68
	int rc;
69

70 71 72 73 74 75
	rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
				     (struct smc_wr_tx_pend_priv **)pend);
	if (!conn->alert_token_local)
		/* abnormal termination */
		rc = -EPIPE;
	return rc;
76 77 78 79 80 81 82 83 84
}

static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
					    struct smc_cdc_tx_pend *pend)
{
	BUILD_BUG_ON_MSG(
		sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
		"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
	BUILD_BUG_ON_MSG(
85
		sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
		"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
	BUILD_BUG_ON_MSG(
		sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
		"must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_cdc_tx_pend)");
	pend->conn = conn;
	pend->cursor = conn->tx_curs_sent;
	pend->p_cursor = conn->local_tx_ctrl.prod;
	pend->ctrl_seq = conn->tx_cdc_seq;
}

int smc_cdc_msg_send(struct smc_connection *conn,
		     struct smc_wr_buf *wr_buf,
		     struct smc_cdc_tx_pend *pend)
{
	struct smc_link *link;
	int rc;

	link = &conn->lgr->lnk[SMC_SINGLE_LINK];

	smc_cdc_add_pending_send(conn, pend);

	conn->tx_cdc_seq++;
	conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
	smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf,
			    &conn->local_tx_ctrl, conn);
	rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
	if (!rc)
		smc_curs_write(&conn->rx_curs_confirmed,
			       smc_curs_read(&conn->local_tx_ctrl.cons, conn),
			       conn);

	return rc;
}

int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
	struct smc_cdc_tx_pend *pend;
	struct smc_wr_buf *wr_buf;
	int rc;

126
	rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
	if (rc)
		return rc;

	return smc_cdc_msg_send(conn, wr_buf, pend);
}

static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
			      unsigned long data)
{
	struct smc_connection *conn = (struct smc_connection *)data;
	struct smc_cdc_tx_pend *cdc_pend =
		(struct smc_cdc_tx_pend *)tx_pend;

	return cdc_pend->conn == conn;
}

static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
{
	struct smc_cdc_tx_pend *cdc_pend =
		(struct smc_cdc_tx_pend *)tx_pend;

	cdc_pend->conn = NULL;
}

void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
{
	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];

	smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
				smc_cdc_tx_filter, smc_cdc_tx_dismisser,
				(unsigned long)conn);
}

/********************************* receive ***********************************/

static inline bool smc_cdc_before(u16 seq1, u16 seq2)
{
	return (s16)(seq1 - seq2) < 0;
}

static void smc_cdc_msg_recv_action(struct smc_sock *smc,
				    struct smc_cdc_msg *cdc)
{
	union smc_host_cursor cons_old, prod_old;
	struct smc_connection *conn = &smc->conn;
	int diff_cons, diff_prod;

	smc_curs_write(&prod_old,
		       smc_curs_read(&conn->local_rx_ctrl.prod, conn),
		       conn);
	smc_curs_write(&cons_old,
		       smc_curs_read(&conn->local_rx_ctrl.cons, conn),
		       conn);
	smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn);

	diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old,
				  &conn->local_rx_ctrl.cons);
	if (diff_cons) {
		/* peer_rmbe_space is decreased during data transfer with RDMA
		 * write
		 */
		smp_mb__before_atomic();
		atomic_add(diff_cons, &conn->peer_rmbe_space);
		/* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
		smp_mb__after_atomic();
	}

194
	diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old,
195 196 197 198 199
				  &conn->local_rx_ctrl.prod);
	if (diff_prod) {
		/* bytes_to_rcv is decreased in smc_recvmsg */
		smp_mb__before_atomic();
		atomic_add(diff_prod, &conn->bytes_to_rcv);
200
		/* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */
201
		smp_mb__after_atomic();
202
		smc->sk.sk_data_ready(&smc->sk);
203 204 205
	} else if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
		   (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req)) {
		smc->sk.sk_data_ready(&smc->sk);
206 207
	}

208 209 210 211 212 213 214 215
	/* piggy backed tx info */
	/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
	if (diff_cons && smc_tx_prepared_sends(conn)) {
		smc_tx_sndbuf_nonempty(conn);
		/* trigger socket release if connection closed */
		smc_close_wake_tx_prepared(smc);
	}

216
	if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
217
		smc->sk.sk_err = ECONNRESET;
218 219
		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
	}
220 221 222 223 224
	if (smc_cdc_rxed_any_close_or_senddone(conn)) {
		smc->sk.sk_shutdown |= RCV_SHUTDOWN;
		if (smc->clcsock && smc->clcsock->sk)
			smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
		sock_set_flag(&smc->sk, SOCK_DONE);
225 226 227
		sock_hold(&smc->sk); /* sock_put in close_work */
		if (!schedule_work(&conn->close_work))
			sock_put(&smc->sk);
228
	}
229 230 231
}

/* called under tasklet context */
232
static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
233 234 235
{
	sock_hold(&smc->sk);
	bh_lock_sock(&smc->sk);
236
	smc_cdc_msg_recv_action(smc, cdc);
237 238 239 240 241 242 243 244 245 246
	bh_unlock_sock(&smc->sk);
	sock_put(&smc->sk); /* no free sk in softirq-context */
}

/***************************** init, exit, misc ******************************/

static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
{
	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
	struct smc_cdc_msg *cdc = buf;
247 248 249
	struct smc_connection *conn;
	struct smc_link_group *lgr;
	struct smc_sock *smc;
250 251 252

	if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved))
		return; /* short message */
253
	if (cdc->len != SMC_WR_TX_SIZE)
254
		return; /* invalid message */
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271

	/* lookup connection */
	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
	read_lock_bh(&lgr->conns_lock);
	conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
	read_unlock_bh(&lgr->conns_lock);
	if (!conn)
		return;
	smc = container_of(conn, struct smc_sock, conn);

	if (!cdc->prod_flags.failover_validation) {
		if (smc_cdc_before(ntohs(cdc->seqno),
				   conn->local_rx_ctrl.seqno))
			/* received seqno is old */
			return;
	}
	smc_cdc_msg_recv(smc, cdc);
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
}

static struct smc_wr_rx_handler smc_cdc_rx_handlers[] = {
	{
		.handler	= smc_cdc_rx_handler,
		.type		= SMC_CDC_MSG_TYPE
	},
	{
		.handler	= NULL,
	}
};

int __init smc_cdc_init(void)
{
	struct smc_wr_rx_handler *handler;
	int rc = 0;

	for (handler = smc_cdc_rx_handlers; handler->handler; handler++) {
		INIT_HLIST_NODE(&handler->list);
		rc = smc_wr_rx_register_handler(handler);
		if (rc)
			break;
	}
	return rc;
}
新手
引导
客服 返回
顶部