smc_clc.c 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
 *
 *  CLC (connection layer control) handshake over initial TCP socket to
 *  prepare for RDMA traffic
 *
 *  Copyright IBM Corp. 2016
 *
 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
 */

#include <linux/in.h>
13
#include <linux/if_ether.h>
14 15 16 17
#include <net/sock.h>
#include <net/tcp.h>

#include "smc.h"
18
#include "smc_core.h"
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
#include "smc_clc.h"
#include "smc_ib.h"

/* Wait for data on the tcp-socket, analyze received data
 * Returns:
 * 0 if success and it was not a decline that we received.
 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
 */
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
		     u8 expected_type)
{
	struct sock *clc_sk = smc->clcsock->sk;
	struct smc_clc_msg_hdr *clcm = buf;
	struct msghdr msg = {NULL, 0};
	int reason_code = 0;
	struct kvec vec;
	int len, datlen;
	int krflags;

	/* peek the first few bytes to determine length of data to receive
	 * so we don't consume any subsequent CLC message or payload data
	 * in the TCP byte stream
	 */
	vec.iov_base = buf;
	vec.iov_len = buflen;
	krflags = MSG_PEEK | MSG_WAITALL;
	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
	len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
			     sizeof(struct smc_clc_msg_hdr), krflags);
	if (signal_pending(current)) {
		reason_code = -EINTR;
		clc_sk->sk_err = EINTR;
		smc->sk.sk_err = EINTR;
		goto out;
	}
	if (clc_sk->sk_err) {
		reason_code = -clc_sk->sk_err;
		smc->sk.sk_err = clc_sk->sk_err;
		goto out;
	}
	if (!len) { /* peer has performed orderly shutdown */
		smc->sk.sk_err = ECONNRESET;
		reason_code = -ECONNRESET;
		goto out;
	}
	if (len < 0) {
		smc->sk.sk_err = -len;
		reason_code = len;
		goto out;
	}
	datlen = ntohs(clcm->length);
	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
	    (datlen < sizeof(struct smc_clc_msg_decline)) ||
	    (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
	    memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
	    ((clcm->type != SMC_CLC_DECLINE) &&
	     (clcm->type != expected_type))) {
		smc->sk.sk_err = EPROTO;
		reason_code = -EPROTO;
		goto out;
	}

	/* receive the complete CLC message */
	vec.iov_base = buf;
	vec.iov_len = buflen;
	memset(&msg, 0, sizeof(struct msghdr));
	krflags = MSG_WAITALL;
	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
	len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
	if (len < datlen) {
		smc->sk.sk_err = EPROTO;
		reason_code = -EPROTO;
		goto out;
	}
94
	if (clcm->type == SMC_CLC_DECLINE) {
95
		reason_code = SMC_CLC_DECL_REPLY;
96 97 98 99 100
		if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
			== SMC_CLC_DECL_SYNCERR)
			smc->conn.lgr->sync_err = true;
	}

101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
out:
	return reason_code;
}

/* send CLC DECLINE message across internal TCP socket */
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
			 u8 out_of_sync)
{
	struct smc_clc_msg_decline dclc;
	struct msghdr msg;
	struct kvec vec;
	int len;

	memset(&dclc, 0, sizeof(dclc));
	memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
	dclc.hdr.type = SMC_CLC_DECLINE;
	dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
	dclc.hdr.version = SMC_CLC_V1;
	dclc.hdr.flag = out_of_sync ? 1 : 0;
	memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
	dclc.peer_diagnosis = htonl(peer_diag_info);
	memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));

	memset(&msg, 0, sizeof(msg));
	vec.iov_base = &dclc;
	vec.iov_len = sizeof(struct smc_clc_msg_decline);
	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
			     sizeof(struct smc_clc_msg_decline));
	if (len < sizeof(struct smc_clc_msg_decline))
		smc->sk.sk_err = EPROTO;
	if (len < 0)
		smc->sk.sk_err = -len;
	return len;
}

/* send CLC PROPOSAL message across internal TCP socket */
int smc_clc_send_proposal(struct smc_sock *smc,
			  struct smc_ib_device *smcibdev,
			  u8 ibport)
{
	struct smc_clc_msg_proposal pclc;
	int reason_code = 0;
	struct msghdr msg;
	struct kvec vec;
	int len, rc;

	/* send SMC Proposal CLC message */
	memset(&pclc, 0, sizeof(pclc));
	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
	pclc.hdr.type = SMC_CLC_PROPOSAL;
	pclc.hdr.length = htons(sizeof(pclc));
	pclc.hdr.version = SMC_CLC_V1;		/* SMC version */
	memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
	memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
155
	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183

	/* determine subnet and mask from internal TCP socket */
	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
				  &pclc.prefix_len);
	if (rc)
		return SMC_CLC_DECL_CNFERR; /* configuration error */
	memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
	memset(&msg, 0, sizeof(msg));
	vec.iov_base = &pclc;
	vec.iov_len = sizeof(pclc);
	/* due to the few bytes needed for clc-handshake this cannot block */
	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
	if (len < sizeof(pclc)) {
		if (len >= 0) {
			reason_code = -ENETUNREACH;
			smc->sk.sk_err = -reason_code;
		} else {
			smc->sk.sk_err = smc->clcsock->sk->sk_err;
			reason_code = -smc->sk.sk_err;
		}
	}

	return reason_code;
}

/* send CLC CONFIRM message across internal TCP socket */
int smc_clc_send_confirm(struct smc_sock *smc)
{
184
	struct smc_connection *conn = &smc->conn;
185
	struct smc_clc_msg_accept_confirm cclc;
186
	struct smc_link *link;
187 188 189 190 191
	int reason_code = 0;
	struct msghdr msg;
	struct kvec vec;
	int len;

192
	link = &conn->lgr->lnk[SMC_SINGLE_LINK];
193 194 195 196 197 198 199
	/* send SMC Confirm CLC msg */
	memset(&cclc, 0, sizeof(cclc));
	memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
	cclc.hdr.type = SMC_CLC_CONFIRM;
	cclc.hdr.length = htons(sizeof(cclc));
	cclc.hdr.version = SMC_CLC_V1;		/* SMC version */
	memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
200 201
	memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
	       SMC_GID_SIZE);
202
	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
203
	hton24(cclc.qpn, link->roce_qp->qp_num);
204 205
	cclc.rmb_rkey =
		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
206
	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
207 208
	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
209 210 211
	cclc.rmbe_size = conn->rmbe_size_short;
	cclc.rmb_dma_addr =
		cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
212
	hton24(cclc.psn, link->psn_initial);
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232

	memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));

	memset(&msg, 0, sizeof(msg));
	vec.iov_base = &cclc;
	vec.iov_len = sizeof(cclc);
	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
	if (len < sizeof(cclc)) {
		if (len >= 0) {
			reason_code = -ENETUNREACH;
			smc->sk.sk_err = -reason_code;
		} else {
			smc->sk.sk_err = smc->clcsock->sk->sk_err;
			reason_code = -smc->sk.sk_err;
		}
	}
	return reason_code;
}

/* send CLC ACCEPT message across internal TCP socket */
233
int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
234
{
235
	struct smc_connection *conn = &new_smc->conn;
236
	struct smc_clc_msg_accept_confirm aclc;
237
	struct smc_link *link;
238 239 240 241 242
	struct msghdr msg;
	struct kvec vec;
	int rc = 0;
	int len;

243
	link = &conn->lgr->lnk[SMC_SINGLE_LINK];
244 245 246 247 248
	memset(&aclc, 0, sizeof(aclc));
	memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
	aclc.hdr.type = SMC_CLC_ACCEPT;
	aclc.hdr.length = htons(sizeof(aclc));
	aclc.hdr.version = SMC_CLC_V1;		/* SMC version */
249 250
	if (srv_first_contact)
		aclc.hdr.flag = 1;
251
	memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
252 253
	memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
	       SMC_GID_SIZE);
254
	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
255
	hton24(aclc.qpn, link->roce_qp->qp_num);
256 257
	aclc.rmb_rkey =
		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
258
	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
259 260
	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
	aclc.qp_mtu = link->path_mtu;
U
Ursula Braun 已提交
261 262 263
	aclc.rmbe_size = conn->rmbe_size_short,
	aclc.rmb_dma_addr =
		cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
264
	hton24(aclc.psn, link->psn_initial);
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
	memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));

	memset(&msg, 0, sizeof(msg));
	vec.iov_base = &aclc;
	vec.iov_len = sizeof(aclc);
	len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
	if (len < sizeof(aclc)) {
		if (len >= 0)
			new_smc->sk.sk_err = EPROTO;
		else
			new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
		rc = sock_error(&new_smc->sk);
	}

	return rc;
}