smc_cdc.c 9.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 * Connection Data Control (CDC)
 * handles flow control
 *
 * Copyright IBM Corp. 2016
 *
 * Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/spinlock.h>

#include "smc.h"
#include "smc_wr.h"
#include "smc_cdc.h"
18
#include "smc_tx.h"
19
#include "smc_rx.h"
20
#include "smc_close.h"
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46

/********************************** send *************************************/

struct smc_cdc_tx_pend {
	struct smc_connection	*conn;		/* socket connection */
	union smc_host_cursor	cursor;	/* tx sndbuf cursor sent */
	union smc_host_cursor	p_cursor;	/* rx RMBE cursor produced */
	u16			ctrl_seq;	/* conn. tx sequence # */
};

/* handler for send/transmission completion of a CDC msg */
static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
			       struct smc_link *link,
			       enum ib_wc_status wc_status)
{
	struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd;
	struct smc_sock *smc;
	int diff;

	if (!cdcpend->conn)
		/* already dismissed */
		return;

	smc = container_of(cdcpend->conn, struct smc_sock, conn);
	bh_lock_sock(&smc->sk);
	if (!wc_status) {
47
		diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
48 49 50 51 52
				     &cdcpend->conn->tx_curs_fin,
				     &cdcpend->cursor);
		/* sndbuf_space is decreased in smc_sendmsg */
		smp_mb__before_atomic();
		atomic_add(diff, &cdcpend->conn->sndbuf_space);
53
		/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
54 55 56 57 58
		smp_mb__after_atomic();
		smc_curs_write(&cdcpend->conn->tx_curs_fin,
			       smc_curs_read(&cdcpend->cursor, cdcpend->conn),
			       cdcpend->conn);
	}
59
	smc_tx_sndbuf_nonfull(smc);
60 61 62
	bh_unlock_sock(&smc->sk);
}

63
int smc_cdc_get_free_slot(struct smc_connection *conn,
64 65 66
			  struct smc_wr_buf **wr_buf,
			  struct smc_cdc_tx_pend **pend)
{
67
	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
68
	int rc;
69

70 71 72 73 74 75
	rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
				     (struct smc_wr_tx_pend_priv **)pend);
	if (!conn->alert_token_local)
		/* abnormal termination */
		rc = -EPIPE;
	return rc;
76 77 78 79 80 81 82 83 84
}

static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
					    struct smc_cdc_tx_pend *pend)
{
	BUILD_BUG_ON_MSG(
		sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
		"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
	BUILD_BUG_ON_MSG(
85
		sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
		"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
	BUILD_BUG_ON_MSG(
		sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
		"must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_cdc_tx_pend)");
	pend->conn = conn;
	pend->cursor = conn->tx_curs_sent;
	pend->p_cursor = conn->local_tx_ctrl.prod;
	pend->ctrl_seq = conn->tx_cdc_seq;
}

int smc_cdc_msg_send(struct smc_connection *conn,
		     struct smc_wr_buf *wr_buf,
		     struct smc_cdc_tx_pend *pend)
{
	struct smc_link *link;
	int rc;

	link = &conn->lgr->lnk[SMC_SINGLE_LINK];

	smc_cdc_add_pending_send(conn, pend);

	conn->tx_cdc_seq++;
	conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
	smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf,
			    &conn->local_tx_ctrl, conn);
	rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
	if (!rc)
		smc_curs_write(&conn->rx_curs_confirmed,
			       smc_curs_read(&conn->local_tx_ctrl.cons, conn),
			       conn);

	return rc;
}

int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
{
	struct smc_cdc_tx_pend *pend;
	struct smc_wr_buf *wr_buf;
	int rc;

126
	rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
	if (rc)
		return rc;

	return smc_cdc_msg_send(conn, wr_buf, pend);
}

static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
			      unsigned long data)
{
	struct smc_connection *conn = (struct smc_connection *)data;
	struct smc_cdc_tx_pend *cdc_pend =
		(struct smc_cdc_tx_pend *)tx_pend;

	return cdc_pend->conn == conn;
}

static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
{
	struct smc_cdc_tx_pend *cdc_pend =
		(struct smc_cdc_tx_pend *)tx_pend;

	cdc_pend->conn = NULL;
}

void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
{
	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];

	smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
				smc_cdc_tx_filter, smc_cdc_tx_dismisser,
				(unsigned long)conn);
}

/********************************* receive ***********************************/

static inline bool smc_cdc_before(u16 seq1, u16 seq2)
{
	return (s16)(seq1 - seq2) < 0;
}

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
					    int *diff_prod)
{
	struct smc_connection *conn = &smc->conn;
	char *base;

	/* new data included urgent business */
	smc_curs_write(&conn->urg_curs,
		       smc_curs_read(&conn->local_rx_ctrl.prod, conn),
		       conn);
	conn->urg_state = SMC_URG_VALID;
	if (!sock_flag(&smc->sk, SOCK_URGINLINE))
		/* we'll skip the urgent byte, so don't account for it */
		(*diff_prod)--;
	base = (char *)conn->rmb_desc->cpu_addr;
	if (conn->urg_curs.count)
		conn->urg_rx_byte = *(base + conn->urg_curs.count - 1);
	else
		conn->urg_rx_byte = *(base + conn->rmb_desc->len - 1);
	sk_send_sigurg(&smc->sk);
}

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
static void smc_cdc_msg_recv_action(struct smc_sock *smc,
				    struct smc_cdc_msg *cdc)
{
	union smc_host_cursor cons_old, prod_old;
	struct smc_connection *conn = &smc->conn;
	int diff_cons, diff_prod;

	smc_curs_write(&prod_old,
		       smc_curs_read(&conn->local_rx_ctrl.prod, conn),
		       conn);
	smc_curs_write(&cons_old,
		       smc_curs_read(&conn->local_rx_ctrl.cons, conn),
		       conn);
	smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn);

	diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old,
				  &conn->local_rx_ctrl.cons);
	if (diff_cons) {
		/* peer_rmbe_space is decreased during data transfer with RDMA
		 * write
		 */
		smp_mb__before_atomic();
		atomic_add(diff_cons, &conn->peer_rmbe_space);
		/* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
		smp_mb__after_atomic();
	}

216
	diff_prod = smc_curs_diff(conn->rmb_desc->len, &prod_old,
217 218
				  &conn->local_rx_ctrl.prod);
	if (diff_prod) {
219 220
		if (conn->local_rx_ctrl.prod_flags.urg_data_present)
			smc_cdc_handle_urg_data_arrival(smc, &diff_prod);
221 222 223
		/* bytes_to_rcv is decreased in smc_recvmsg */
		smp_mb__before_atomic();
		atomic_add(diff_prod, &conn->bytes_to_rcv);
224
		/* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */
225
		smp_mb__after_atomic();
226
		smc->sk.sk_data_ready(&smc->sk);
227 228 229 230 231 232 233 234 235 236 237
	} else {
		if (conn->local_rx_ctrl.prod_flags.write_blocked ||
		    conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
		    conn->local_rx_ctrl.prod_flags.urg_data_pending) {
			if (conn->local_rx_ctrl.prod_flags.urg_data_pending)
				conn->urg_state = SMC_URG_NOTYET;
			/* force immediate tx of current consumer cursor, but
			 * under send_lock to guarantee arrival in seqno-order
			 */
			smc_tx_sndbuf_nonempty(conn);
		}
238 239
	}

240 241 242 243 244 245 246
	/* piggy backed tx info */
	/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
	if (diff_cons && smc_tx_prepared_sends(conn)) {
		smc_tx_sndbuf_nonempty(conn);
		/* trigger socket release if connection closed */
		smc_close_wake_tx_prepared(smc);
	}
247 248 249 250 251 252
	if (diff_cons && conn->urg_tx_pend &&
	    atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) {
		/* urg data confirmed by peer, indicate we're ready for more */
		conn->urg_tx_pend = false;
		smc->sk.sk_write_space(&smc->sk);
	}
253

254
	if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
255
		smc->sk.sk_err = ECONNRESET;
256 257
		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
	}
258 259 260 261 262
	if (smc_cdc_rxed_any_close_or_senddone(conn)) {
		smc->sk.sk_shutdown |= RCV_SHUTDOWN;
		if (smc->clcsock && smc->clcsock->sk)
			smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
		sock_set_flag(&smc->sk, SOCK_DONE);
263 264 265
		sock_hold(&smc->sk); /* sock_put in close_work */
		if (!schedule_work(&conn->close_work))
			sock_put(&smc->sk);
266
	}
267 268 269
}

/* called under tasklet context */
270
static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
271 272 273
{
	sock_hold(&smc->sk);
	bh_lock_sock(&smc->sk);
274
	smc_cdc_msg_recv_action(smc, cdc);
275 276 277 278 279 280 281 282 283 284
	bh_unlock_sock(&smc->sk);
	sock_put(&smc->sk); /* no free sk in softirq-context */
}

/***************************** init, exit, misc ******************************/

static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
{
	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
	struct smc_cdc_msg *cdc = buf;
285 286 287
	struct smc_connection *conn;
	struct smc_link_group *lgr;
	struct smc_sock *smc;
288 289 290

	if (wc->byte_len < offsetof(struct smc_cdc_msg, reserved))
		return; /* short message */
291
	if (cdc->len != SMC_WR_TX_SIZE)
292
		return; /* invalid message */
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309

	/* lookup connection */
	lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
	read_lock_bh(&lgr->conns_lock);
	conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
	read_unlock_bh(&lgr->conns_lock);
	if (!conn)
		return;
	smc = container_of(conn, struct smc_sock, conn);

	if (!cdc->prod_flags.failover_validation) {
		if (smc_cdc_before(ntohs(cdc->seqno),
				   conn->local_rx_ctrl.seqno))
			/* received seqno is old */
			return;
	}
	smc_cdc_msg_recv(smc, cdc);
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
}

static struct smc_wr_rx_handler smc_cdc_rx_handlers[] = {
	{
		.handler	= smc_cdc_rx_handler,
		.type		= SMC_CDC_MSG_TYPE
	},
	{
		.handler	= NULL,
	}
};

int __init smc_cdc_init(void)
{
	struct smc_wr_rx_handler *handler;
	int rc = 0;

	for (handler = smc_cdc_rx_handlers; handler->handler; handler++) {
		INIT_HLIST_NODE(&handler->list);
		rc = smc_wr_rx_register_handler(handler);
		if (rc)
			break;
	}
	return rc;
}
反馈
建议
客服 返回
顶部