opfn.c 8.5 KB
Newer Older
K
Kaike Wan 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
 * Copyright(c) 2018 Intel Corporation.
 *
 */
#include "hfi.h"
#include "trace.h"
#include "qp.h"
#include "opfn.h"

#define IB_BTHE_E                 BIT(IB_BTHE_E_SHIFT)

#define OPFN_CODE(code) BIT((code) - 1)
#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)

struct hfi1_opfn_type {
	bool (*request)(struct rvt_qp *qp, u64 *data);
	bool (*response)(struct rvt_qp *qp, u64 *data);
	bool (*reply)(struct rvt_qp *qp, u64 data);
	void (*error)(struct rvt_qp *qp);
};

static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
	[STL_VERBS_EXTD_TID_RDMA] = {
		.request = tid_rdma_conn_req,
		.response = tid_rdma_conn_resp,
		.reply = tid_rdma_conn_reply,
		.error = tid_rdma_conn_error,
	},
};

static struct workqueue_struct *opfn_wq;

static void opfn_schedule_conn_request(struct rvt_qp *qp);

static bool hfi1_opfn_extended(u32 bth1)
{
	return !!(bth1 & IB_BTHE_E);
}

static void opfn_conn_request(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
	struct ib_atomic_wr wr;
	u16 mask, capcode;
	struct hfi1_opfn_type *extd;
	u64 data;
	unsigned long flags;
	int ret = 0;

K
Kaike Wan 已提交
51
	trace_hfi1_opfn_state_conn_request(qp);
K
Kaike Wan 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
	spin_lock_irqsave(&priv->opfn.lock, flags);
	/*
	 * Exit if the extended bit is not set, or if nothing is requested, or
	 * if we have completed all requests, or if a previous request is in
	 * progress
	 */
	if (!priv->opfn.extended || !priv->opfn.requested ||
	    priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
		goto done;

	mask = priv->opfn.requested & ~priv->opfn.completed;
	capcode = ilog2(mask & ~(mask - 1)) + 1;
	if (capcode >= STL_VERBS_EXTD_MAX) {
		priv->opfn.completed |= OPFN_CODE(capcode);
		goto done;
	}

	extd = &hfi1_opfn_handlers[capcode];
	if (!extd || !extd->request || !extd->request(qp, &data)) {
		/*
		 * Either there is no handler for this capability or the request
		 * packet could not be generated. Either way, mark it as done so
		 * we don't keep attempting to complete it.
		 */
		priv->opfn.completed |= OPFN_CODE(capcode);
		goto done;
	}

K
Kaike Wan 已提交
80
	trace_hfi1_opfn_data_conn_request(qp, capcode, data);
K
Kaike Wan 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94
	data = (data & ~0xf) | capcode;

	memset(&wr, 0, sizeof(wr));
	wr.wr.opcode = IB_WR_OPFN;
	wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
	wr.compare_add = data;

	priv->opfn.curr = capcode;	/* A new request is now in progress */
	/* Drop opfn.lock before calling ib_post_send() */
	spin_unlock_irqrestore(&priv->opfn.lock, flags);

	ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
	if (ret)
		goto err;
K
Kaike Wan 已提交
95
	trace_hfi1_opfn_state_conn_request(qp);
K
Kaike Wan 已提交
96 97
	return;
err:
K
Kaike Wan 已提交
98 99
	trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
					 (u64)ret);
K
Kaike Wan 已提交
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
	spin_lock_irqsave(&priv->opfn.lock, flags);
	/*
	 * In case of an unexpected error return from ib_post_send
	 * clear opfn.curr and reschedule to try again
	 */
	priv->opfn.curr = STL_VERBS_EXTD_NONE;
	opfn_schedule_conn_request(qp);
done:
	spin_unlock_irqrestore(&priv->opfn.lock, flags);
}

void opfn_send_conn_request(struct work_struct *work)
{
	struct hfi1_opfn_data *od;
	struct hfi1_qp_priv *qpriv;

	od = container_of(work, struct hfi1_opfn_data, opfn_work);
	qpriv = container_of(od, struct hfi1_qp_priv, opfn);

	opfn_conn_request(qpriv->owner);
}

/*
 * When QP s_lock is held in the caller, the OPFN request must be scheduled
 * to a different workqueue to avoid double locking QP s_lock in call to
 * ib_post_send in opfn_conn_request
 */
static void opfn_schedule_conn_request(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

K
Kaike Wan 已提交
131
	trace_hfi1_opfn_state_sched_conn_request(qp);
K
Kaike Wan 已提交
132 133 134 135 136 137 138 139 140 141 142 143
	queue_work(opfn_wq, &priv->opfn.opfn_work);
}

void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
			struct ib_atomic_eth *ateth)
{
	struct hfi1_qp_priv *priv = qp->priv;
	u64 data = be64_to_cpu(ateth->compare_data);
	struct hfi1_opfn_type *extd;
	u8 capcode;
	unsigned long flags;

K
Kaike Wan 已提交
144
	trace_hfi1_opfn_state_conn_response(qp);
K
Kaike Wan 已提交
145
	capcode = data & 0xf;
K
Kaike Wan 已提交
146
	trace_hfi1_opfn_data_conn_response(qp, capcode, data);
K
Kaike Wan 已提交
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
	if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
		return;

	extd = &hfi1_opfn_handlers[capcode];

	if (!extd || !extd->response) {
		e->atomic_data = capcode;
		return;
	}

	spin_lock_irqsave(&priv->opfn.lock, flags);
	if (priv->opfn.completed & OPFN_CODE(capcode)) {
		/*
		 * We are receiving a request for a feature that has already
		 * been negotiated. This may mean that the other side has reset
		 */
		priv->opfn.completed &= ~OPFN_CODE(capcode);
		if (extd->error)
			extd->error(qp);
	}

	if (extd->response(qp, &data))
		priv->opfn.completed |= OPFN_CODE(capcode);
	e->atomic_data = (data & ~0xf) | capcode;
K
Kaike Wan 已提交
171
	trace_hfi1_opfn_state_conn_response(qp);
K
Kaike Wan 已提交
172 173 174 175 176 177 178 179 180 181
	spin_unlock_irqrestore(&priv->opfn.lock, flags);
}

void opfn_conn_reply(struct rvt_qp *qp, u64 data)
{
	struct hfi1_qp_priv *priv = qp->priv;
	struct hfi1_opfn_type *extd;
	u8 capcode;
	unsigned long flags;

K
Kaike Wan 已提交
182
	trace_hfi1_opfn_state_conn_reply(qp);
K
Kaike Wan 已提交
183
	capcode = data & 0xf;
K
Kaike Wan 已提交
184
	trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
K
Kaike Wan 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
		return;

	spin_lock_irqsave(&priv->opfn.lock, flags);
	/*
	 * Either there is no previous request or the reply is not for the
	 * current request
	 */
	if (!priv->opfn.curr || capcode != priv->opfn.curr)
		goto done;

	extd = &hfi1_opfn_handlers[capcode];

	if (!extd || !extd->reply)
		goto clear;

	if (extd->reply(qp, data))
		priv->opfn.completed |= OPFN_CODE(capcode);
clear:
	/*
	 * Clear opfn.curr to indicate that the previous request is no longer in
	 * progress
	 */
	priv->opfn.curr = STL_VERBS_EXTD_NONE;
K
Kaike Wan 已提交
209
	trace_hfi1_opfn_state_conn_reply(qp);
K
Kaike Wan 已提交
210 211 212 213 214 215 216 217 218 219 220
done:
	spin_unlock_irqrestore(&priv->opfn.lock, flags);
}

void opfn_conn_error(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
	struct hfi1_opfn_type *extd = NULL;
	unsigned long flags;
	u16 capcode;

K
Kaike Wan 已提交
221 222
	trace_hfi1_opfn_state_conn_error(qp);
	trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
K
Kaike Wan 已提交
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
	/*
	 * The QP has gone into the Error state. We have to invalidate all
	 * negotiated feature, including the one in progress (if any). The RC
	 * QP handling will clean the WQE for the connection request.
	 */
	spin_lock_irqsave(&priv->opfn.lock, flags);
	while (priv->opfn.completed) {
		capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
		extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
		if (extd->error)
			extd->error(qp);
		priv->opfn.completed &= ~OPFN_CODE(capcode);
	}
	priv->opfn.extended = 0;
	priv->opfn.requested = 0;
	priv->opfn.curr = STL_VERBS_EXTD_NONE;
	spin_unlock_irqrestore(&priv->opfn.lock, flags);
}

void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
{
	struct ib_qp *ibqp = &qp->ibqp;
	struct hfi1_qp_priv *priv = qp->priv;
	unsigned long flags;

248 249 250
	if (attr_mask & IB_QP_RETRY_CNT)
		priv->s_retry = attr->retry_cnt;

K
Kaike Wan 已提交
251 252 253 254
	spin_lock_irqsave(&priv->opfn.lock, flags);
	if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
		struct tid_rdma_params *local = &priv->tid_rdma.local;

K
Kaike Wan 已提交
255 256
		if (attr_mask & IB_QP_TIMEOUT)
			priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
K
Kaike Wan 已提交
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
		if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
		    qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
			tid_rdma_opfn_init(qp, local);
			/*
			 * We only want to set the OPFN requested bit when the
			 * QP transitions to RTS.
			 */
			if (attr_mask & IB_QP_STATE &&
			    attr->qp_state == IB_QPS_RTS) {
				priv->opfn.requested |= OPFN_MASK(TID_RDMA);
				/*
				 * If the QP is transitioning to RTS and the
				 * opfn.completed for TID RDMA has already been
				 * set, the QP is being moved *back* into RTS.
				 * We can now renegotiate the TID RDMA
				 * parameters.
				 */
				if (priv->opfn.completed &
				    OPFN_MASK(TID_RDMA)) {
					priv->opfn.completed &=
						~OPFN_MASK(TID_RDMA);
					/*
					 * Since the opfn.completed bit was
					 * already set, it is safe to assume
					 * that the opfn.extended is also set.
					 */
					opfn_schedule_conn_request(qp);
				}
			}
		} else {
			memset(local, 0, sizeof(*local));
		}
	}
	spin_unlock_irqrestore(&priv->opfn.lock, flags);
}

void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
{
	struct hfi1_qp_priv *priv = qp->priv;

	if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
	    HFI1_CAP_IS_KSET(OPFN)) {
		priv->opfn.extended = 1;
		if (qp->state == IB_QPS_RTS)
			opfn_conn_request(qp);
	}
}

int opfn_init(void)
{
	opfn_wq = alloc_workqueue("hfi_opfn",
				  WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
				  WQ_MEM_RECLAIM,
				  HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
	if (!opfn_wq)
		return -ENOMEM;

	return 0;
}

void opfn_exit(void)
{
	if (opfn_wq) {
		destroy_workqueue(opfn_wq);
		opfn_wq = NULL;
	}
}