cq.c 34.7 KB
Newer Older
1
/*
2
 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/kref.h>
#include <rdma/ib_umem.h>
35
#include <rdma/ib_user_verbs.h>
36
#include <rdma/ib_cache.h>
37
#include "mlx5_ib.h"
38
#include "srq.h"
39
#include "qp.h"
40

41
static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
{
	struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;

	ibcq->comp_handler(ibcq, ibcq->cq_context);
}

static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
{
	struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
	struct ib_cq *ibcq = &cq->ibcq;
	struct ib_event event;

	if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
		mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
			     type, mcq->cqn);
		return;
	}

	if (ibcq->event_handler) {
		event.device     = &dev->ib_dev;
		event.event      = IB_EVENT_CQ_ERR;
		event.element.cq = ibcq;
		ibcq->event_handler(&event, ibcq->cq_context);
	}
}

static void *get_cqe(struct mlx5_ib_cq *cq, int n)
{
71
	return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
72 73
}

E
Eli Cohen 已提交
74 75 76 77 78
static u8 sw_ownership_bit(int n, int nent)
{
	return (n & nent) ? 1 : 0;
}

79 80 81 82 83 84
static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
{
	void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
	struct mlx5_cqe64 *cqe64;

	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
E
Eli Cohen 已提交
85

86
	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
E
Eli Cohen 已提交
87 88 89 90 91
	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
		return cqe;
	} else {
		return NULL;
	}
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
}

static void *next_cqe_sw(struct mlx5_ib_cq *cq)
{
	return get_sw_cqe(cq, cq->mcq.cons_index);
}

static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
{
	switch (wq->wr_data[idx]) {
	case MLX5_IB_WR_UMR:
		return 0;

	case IB_WR_LOCAL_INV:
		return IB_WC_LOCAL_INV;

108 109 110
	case IB_WR_REG_MR:
		return IB_WC_REG_MR;

111 112 113 114 115 116 117 118 119 120 121 122 123
	default:
		pr_warn("unknown completion status\n");
		return 0;
	}
}

static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
			    struct mlx5_ib_wq *wq, int idx)
{
	wc->wc_flags = 0;
	switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
	case MLX5_OPCODE_RDMA_WRITE_IMM:
		wc->wc_flags |= IB_WC_WITH_IMM;
124
		/* fall through */
125 126 127 128 129
	case MLX5_OPCODE_RDMA_WRITE:
		wc->opcode    = IB_WC_RDMA_WRITE;
		break;
	case MLX5_OPCODE_SEND_IMM:
		wc->wc_flags |= IB_WC_WITH_IMM;
130
		/* fall through */
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
	case MLX5_OPCODE_SEND:
	case MLX5_OPCODE_SEND_INVAL:
		wc->opcode    = IB_WC_SEND;
		break;
	case MLX5_OPCODE_RDMA_READ:
		wc->opcode    = IB_WC_RDMA_READ;
		wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
		break;
	case MLX5_OPCODE_ATOMIC_CS:
		wc->opcode    = IB_WC_COMP_SWAP;
		wc->byte_len  = 8;
		break;
	case MLX5_OPCODE_ATOMIC_FA:
		wc->opcode    = IB_WC_FETCH_ADD;
		wc->byte_len  = 8;
		break;
	case MLX5_OPCODE_ATOMIC_MASKED_CS:
		wc->opcode    = IB_WC_MASKED_COMP_SWAP;
		wc->byte_len  = 8;
		break;
	case MLX5_OPCODE_ATOMIC_MASKED_FA:
		wc->opcode    = IB_WC_MASKED_FETCH_ADD;
		wc->byte_len  = 8;
		break;
	case MLX5_OPCODE_UMR:
		wc->opcode = get_umr_comp(wq, idx);
		break;
	}
}

enum {
	MLX5_GRH_IN_BUFFER = 1,
	MLX5_GRH_IN_CQE	   = 2,
};

static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
			     struct mlx5_ib_qp *qp)
{
169
	enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
170 171 172 173
	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
	struct mlx5_ib_srq *srq;
	struct mlx5_ib_wq *wq;
	u16 wqe_ctr;
174 175
	u8  roce_packet_type;
	bool vlan_present;
176 177 178 179 180 181
	u8 g;

	if (qp->ibqp.srq || qp->ibqp.xrcd) {
		struct mlx5_core_srq *msrq = NULL;

		if (qp->ibqp.xrcd) {
182
			msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
183 184 185 186 187 188 189 190
			srq = to_mibsrq(msrq);
		} else {
			srq = to_msrq(qp->ibqp.srq);
		}
		if (srq) {
			wqe_ctr = be16_to_cpu(cqe->wqe_counter);
			wc->wr_id = srq->wrid[wqe_ctr];
			mlx5_ib_free_srq_wqe(srq, wqe_ctr);
191 192
			if (msrq)
				mlx5_core_res_put(&msrq->common);
193 194 195 196 197 198 199 200
		}
	} else {
		wq	  = &qp->rq;
		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
		++wq->tail;
	}
	wc->byte_len = be32_to_cpu(cqe->byte_cnt);

201
	switch (get_cqe_opcode(cqe)) {
202 203 204 205 206 207 208
	case MLX5_CQE_RESP_WR_IMM:
		wc->opcode	= IB_WC_RECV_RDMA_WITH_IMM;
		wc->wc_flags	= IB_WC_WITH_IMM;
		wc->ex.imm_data = cqe->imm_inval_pkey;
		break;
	case MLX5_CQE_RESP_SEND:
		wc->opcode   = IB_WC_RECV;
209 210 211 212
		wc->wc_flags = IB_WC_IP_CSUM_OK;
		if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
			       (cqe->hds_ip_ext & CQE_L4_OK))))
			wc->wc_flags = 0;
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
		break;
	case MLX5_CQE_RESP_SEND_IMM:
		wc->opcode	= IB_WC_RECV;
		wc->wc_flags	= IB_WC_WITH_IMM;
		wc->ex.imm_data = cqe->imm_inval_pkey;
		break;
	case MLX5_CQE_RESP_SEND_INV:
		wc->opcode	= IB_WC_RECV;
		wc->wc_flags	= IB_WC_WITH_INVALIDATE;
		wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
		break;
	}
	wc->src_qp	   = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
	wc->dlid_path_bits = cqe->ml_path;
	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
	wc->wc_flags |= g ? IB_WC_GRH : 0;
229 230 231 232 233 234 235 236
	if (unlikely(is_qp1(qp->ibqp.qp_type))) {
		u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;

		ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
				    &wc->pkey_index);
	} else {
		wc->pkey_index = 0;
	}
237

238
	if (ll != IB_LINK_LAYER_ETHERNET) {
239
		wc->slid = be16_to_cpu(cqe->slid);
240
		wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
241
		return;
242 243
	}

244
	wc->slid = 0;
245 246 247 248 249 250 251 252 253
	vlan_present = cqe->l4_l3_hdr_type & 0x1;
	roce_packet_type   = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
	if (vlan_present) {
		wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
		wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
		wc->wc_flags |= IB_WC_WITH_VLAN;
	} else {
		wc->sl = 0;
	}
254

255
	switch (roce_packet_type) {
256 257 258 259 260 261 262 263 264 265 266
	case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
		wc->network_hdr_type = RDMA_NETWORK_IB;
		break;
	case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
		wc->network_hdr_type = RDMA_NETWORK_IPV6;
		break;
	case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
		wc->network_hdr_type = RDMA_NETWORK_IPV4;
		break;
	}
	wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
267 268 269 270 271
}

static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
{
	mlx5_ib_warn(dev, "dump error cqe\n");
272
	mlx5_dump_err_cqe(dev->mdev, cqe);
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
}

static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
				  struct mlx5_err_cqe *cqe,
				  struct ib_wc *wc)
{
	int dump = 1;

	switch (cqe->syndrome) {
	case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
		wc->status = IB_WC_LOC_LEN_ERR;
		break;
	case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
		wc->status = IB_WC_LOC_QP_OP_ERR;
		break;
	case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
		wc->status = IB_WC_LOC_PROT_ERR;
		break;
	case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
		dump = 0;
		wc->status = IB_WC_WR_FLUSH_ERR;
		break;
	case MLX5_CQE_SYNDROME_MW_BIND_ERR:
		wc->status = IB_WC_MW_BIND_ERR;
		break;
	case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
		wc->status = IB_WC_BAD_RESP_ERR;
		break;
	case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
		wc->status = IB_WC_LOC_ACCESS_ERR;
		break;
	case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
		wc->status = IB_WC_REM_INV_REQ_ERR;
		break;
	case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
		wc->status = IB_WC_REM_ACCESS_ERR;
		break;
	case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
		wc->status = IB_WC_REM_OP_ERR;
		break;
	case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
		wc->status = IB_WC_RETRY_EXC_ERR;
		dump = 0;
		break;
	case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
		wc->status = IB_WC_RNR_RETRY_EXC_ERR;
		dump = 0;
		break;
	case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
		wc->status = IB_WC_REM_ABORT_ERR;
		break;
	default:
		wc->status = IB_WC_GENERAL_ERR;
		break;
	}

	wc->vendor_err = cqe->vendor_err_synd;
	if (dump)
		dump_cqe(dev, cqe);
}

334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
			   u16 tail, u16 head)
{
	u16 idx;

	do {
		idx = tail & (qp->sq.wqe_cnt - 1);
		if (idx == head)
			break;

		tail = qp->sq.w_list[idx].next;
	} while (1);
	tail = qp->sq.w_list[idx].next;
	qp->sq.last_poll = tail;
}

E
Eli Cohen 已提交
350 351
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
352
	mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
E
Eli Cohen 已提交
353 354
}

355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
			     struct ib_sig_err *item)
{
	u16 syndrome = be16_to_cpu(cqe->syndrome);

#define GUARD_ERR   (1 << 13)
#define APPTAG_ERR  (1 << 12)
#define REFTAG_ERR  (1 << 11)

	if (syndrome & GUARD_ERR) {
		item->err_type = IB_SIG_BAD_GUARD;
		item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
		item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
	} else
	if (syndrome & REFTAG_ERR) {
		item->err_type = IB_SIG_BAD_REFTAG;
		item->expected = be32_to_cpu(cqe->expected_reftag);
		item->actual = be32_to_cpu(cqe->actual_reftag);
	} else
	if (syndrome & APPTAG_ERR) {
		item->err_type = IB_SIG_BAD_APPTAG;
		item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
		item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
	} else {
		pr_err("Got signature completion error with bad syndrome %04x\n",
		       syndrome);
	}

	item->sig_err_offset = be64_to_cpu(cqe->err_offset);
	item->key = be32_to_cpu(cqe->mkey);
}

387
static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
388
		    int *npolled, bool is_send)
389 390 391 392 393 394
{
	struct mlx5_ib_wq *wq;
	unsigned int cur;
	int np;
	int i;

395
	wq = (is_send) ? &qp->sq : &qp->rq;
396 397 398 399 400 401 402
	cur = wq->head - wq->tail;
	np = *npolled;

	if (cur == 0)
		return;

	for (i = 0;  i < cur && np < num_entries; i++) {
403 404 405 406 407
		unsigned int idx;

		idx = (is_send) ? wq->last_poll : wq->tail;
		idx &= (wq->wqe_cnt - 1);
		wc->wr_id = wq->wrid[idx];
408 409 410
		wc->status = IB_WC_WR_FLUSH_ERR;
		wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
		wq->tail++;
411 412
		if (is_send)
			wq->last_poll = wq->w_list[idx].next;
413 414 415 416 417 418 419 420 421 422 423 424 425
		np++;
		wc->qp = &qp->ibqp;
		wc++;
	}
	*npolled = np;
}

static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
				 struct ib_wc *wc, int *npolled)
{
	struct mlx5_ib_qp *qp;

	*npolled = 0;
426
	/* Find uncompleted WQEs belonging to that cq and return mmics ones */
427
	list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
428
		sw_comp(qp, num_entries, wc + *npolled, npolled, true);
429 430 431 432 433
		if (*npolled >= num_entries)
			return;
	}

	list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
434
		sw_comp(qp, num_entries, wc + *npolled, npolled, false);
435 436 437 438 439
		if (*npolled >= num_entries)
			return;
	}
}

440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
			 struct mlx5_ib_qp **cur_qp,
			 struct ib_wc *wc)
{
	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
	struct mlx5_err_cqe *err_cqe;
	struct mlx5_cqe64 *cqe64;
	struct mlx5_core_qp *mqp;
	struct mlx5_ib_wq *wq;
	uint8_t opcode;
	uint32_t qpn;
	u16 wqe_ctr;
	void *cqe;
	int idx;

E
Eli Cohen 已提交
455
repoll:
456 457 458 459 460 461 462 463 464 465 466 467 468
	cqe = next_cqe_sw(cq);
	if (!cqe)
		return -EAGAIN;

	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;

	++cq->mcq.cons_index;

	/* Make sure we read CQ entry contents after we've checked the
	 * ownership bit.
	 */
	rmb();

469
	opcode = get_cqe_opcode(cqe64);
E
Eli Cohen 已提交
470 471 472 473 474 475 476 477 478 479 480
	if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
		if (likely(cq->resize_buf)) {
			free_cq_buf(dev, &cq->buf);
			cq->buf = *cq->resize_buf;
			kfree(cq->resize_buf);
			cq->resize_buf = NULL;
			goto repoll;
		} else {
			mlx5_ib_warn(dev, "unexpected resize cqe\n");
		}
	}
481 482 483 484 485 486 487

	qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
	if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
		/* We do not have to take the QP table lock here,
		 * because CQs will be locked while QPs are removed
		 * from the table.
		 */
488
		mqp = radix_tree_lookup(&dev->qp_table.tree, qpn);
489 490 491 492 493 494 495 496 497 498
		*cur_qp = to_mibqp(mqp);
	}

	wc->qp  = &(*cur_qp)->ibqp;
	switch (opcode) {
	case MLX5_CQE_REQ:
		wq = &(*cur_qp)->sq;
		wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
		idx = wqe_ctr & (wq->wqe_cnt - 1);
		handle_good_req(wc, cqe64, wq, idx);
499
		handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
		wc->wr_id = wq->wrid[idx];
		wq->tail = wq->wqe_head[idx] + 1;
		wc->status = IB_WC_SUCCESS;
		break;
	case MLX5_CQE_RESP_WR_IMM:
	case MLX5_CQE_RESP_SEND:
	case MLX5_CQE_RESP_SEND_IMM:
	case MLX5_CQE_RESP_SEND_INV:
		handle_responder(wc, cqe64, *cur_qp);
		wc->status = IB_WC_SUCCESS;
		break;
	case MLX5_CQE_RESIZE_CQ:
		break;
	case MLX5_CQE_REQ_ERR:
	case MLX5_CQE_RESP_ERR:
		err_cqe = (struct mlx5_err_cqe *)cqe64;
		mlx5_handle_error_cqe(dev, err_cqe, wc);
		mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
			    opcode == MLX5_CQE_REQ_ERR ?
			    "Requestor" : "Responder", cq->mcq.cqn);
		mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
			    err_cqe->syndrome, err_cqe->vendor_err_synd);
		if (opcode == MLX5_CQE_REQ_ERR) {
			wq = &(*cur_qp)->sq;
			wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
			idx = wqe_ctr & (wq->wqe_cnt - 1);
			wc->wr_id = wq->wrid[idx];
			wq->tail = wq->wqe_head[idx] + 1;
		} else {
			struct mlx5_ib_srq *srq;

			if ((*cur_qp)->ibqp.srq) {
				srq = to_msrq((*cur_qp)->ibqp.srq);
				wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
				wc->wr_id = srq->wrid[wqe_ctr];
				mlx5_ib_free_srq_wqe(srq, wqe_ctr);
			} else {
				wq = &(*cur_qp)->rq;
				wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
				++wq->tail;
			}
		}
		break;
543 544 545 546
	case MLX5_CQE_SIG_ERR: {
		struct mlx5_sig_err_cqe *sig_err_cqe =
			(struct mlx5_sig_err_cqe *)cqe64;
		struct mlx5_core_sig_ctx *sig;
547

548 549
		xa_lock(&dev->sig_mrs);
		sig = xa_load(&dev->sig_mrs,
550
				mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
551 552 553
		get_sig_err_item(sig_err_cqe, &sig->err_item);
		sig->sig_err_exists = true;
		sig->sigerr_count++;
554 555

		mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
556 557 558 559 560
			     cq->mcq.cqn, sig->err_item.key,
			     sig->err_item.err_type,
			     sig->err_item.sig_err_offset,
			     sig->err_item.expected,
			     sig->err_item.actual);
561

562
		xa_unlock(&dev->sig_mrs);
563
		goto repoll;
564
	}
565
	}
566 567 568 569

	return 0;
}

570
static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
571
			struct ib_wc *wc, bool is_fatal_err)
572 573 574 575 576 577 578 579 580 581 582 583
{
	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
	struct mlx5_ib_wc *soft_wc, *next;
	int npolled = 0;

	list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
		if (npolled >= num_entries)
			break;

		mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
			    cq->mcq.cqn);

584 585 586 587
		if (unlikely(is_fatal_err)) {
			soft_wc->wc.status = IB_WC_WR_FLUSH_ERR;
			soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
		}
588 589 590 591 592 593 594 595
		wc[npolled++] = soft_wc->wc;
		list_del(&soft_wc->list);
		kfree(soft_wc);
	}

	return npolled;
}

596 597 598 599
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
	struct mlx5_ib_cq *cq = to_mcq(ibcq);
	struct mlx5_ib_qp *cur_qp = NULL;
600 601
	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
	struct mlx5_core_dev *mdev = dev->mdev;
602
	unsigned long flags;
603
	int soft_polled = 0;
604 605 606
	int npolled;

	spin_lock_irqsave(&cq->lock, flags);
607
	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
608 609 610 611 612 613
		/* make sure no soft wqe's are waiting */
		if (unlikely(!list_empty(&cq->wc_list)))
			soft_polled = poll_soft_wc(cq, num_entries, wc, true);

		mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled,
				     wc + soft_polled, &npolled);
614 615
		goto out;
	}
616

617
	if (unlikely(!list_empty(&cq->wc_list)))
618
		soft_polled = poll_soft_wc(cq, num_entries, wc, false);
619 620

	for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
621
		if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
622 623 624 625 626
			break;
	}

	if (npolled)
		mlx5_cq_set_ci(&cq->mcq);
627
out:
628 629
	spin_unlock_irqrestore(&cq->lock, flags);

630
	return soft_polled + npolled;
631 632 633 634
}

int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
635
	struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
636
	struct mlx5_ib_cq *cq = to_mcq(ibcq);
637
	void __iomem *uar_page = mdev->priv.uar->map;
638 639 640 641 642 643
	unsigned long irq_flags;
	int ret = 0;

	spin_lock_irqsave(&cq->lock, irq_flags);
	if (cq->notify_flags != IB_CQ_NEXT_COMP)
		cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
644

645 646 647 648 649
	if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
		ret = 1;
	spin_unlock_irqrestore(&cq->lock, irq_flags);

	mlx5_cq_arm(&cq->mcq,
650 651
		    (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
		    MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
652
		    uar_page, to_mcq(ibcq)->mcq.cons_index);
653

654
	return ret;
655 656
}

657 658 659 660
static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
			     struct mlx5_ib_cq_buf *buf,
			     int nent,
			     int cqe_size)
661
{
662 663 664
	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
	u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
	u8 log_wq_sz     = ilog2(cqe_size);
665 666
	int err;

667 668 669 670
	err = mlx5_frag_buf_alloc_node(dev->mdev,
				       nent * cqe_size,
				       frag_buf,
				       dev->mdev->priv.numa_node);
671 672 673
	if (err)
		return err;

674 675
	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);

676
	buf->cqe_size = cqe_size;
E
Eli Cohen 已提交
677
	buf->nent = nent;
678 679 680 681

	return 0;
}

682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
enum {
	MLX5_CQE_RES_FORMAT_HASH = 0,
	MLX5_CQE_RES_FORMAT_CSUM = 1,
	MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3,
};

static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format)
{
	switch (format) {
	case MLX5_IB_CQE_RES_FORMAT_HASH:
		return MLX5_CQE_RES_FORMAT_HASH;
	case MLX5_IB_CQE_RES_FORMAT_CSUM:
		return MLX5_CQE_RES_FORMAT_CSUM;
	case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX:
		if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
			return MLX5_CQE_RES_FORMAT_CSUM_STRIDX;
		return -EOPNOTSUPP;
	default:
		return -EINVAL;
	}
}

704
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
705
			  struct mlx5_ib_cq *cq, int entries, u32 **cqb,
706 707
			  int *cqe_size, int *index, int *inlen)
{
708
	struct mlx5_ib_create_cq ucmd = {};
709
	size_t ucmdlen;
710
	int page_shift;
711
	__be64 *pas;
712 713
	int npages;
	int ncont;
714
	void *cqc;
715
	int err;
716 717
	struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
		udata, struct mlx5_ib_ucontext, ibucontext);
718

719 720 721
	ucmdlen = min(udata->inlen, sizeof(ucmd));
	if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
		return -EINVAL;
722 723

	if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
724 725
		return -EFAULT;

726 727
	if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD |
			    MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX)))
728 729
		return -EINVAL;

730 731
	if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) ||
	    ucmd.reserved0 || ucmd.reserved1)
732 733 734 735
		return -EINVAL;

	*cqe_size = ucmd.cqe_size;

736
	cq->buf.umem =
737 738
		ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
			    entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE);
739 740 741 742 743
	if (IS_ERR(cq->buf.umem)) {
		err = PTR_ERR(cq->buf.umem);
		return err;
	}

744
	err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db);
745 746 747
	if (err)
		goto err_umem;

748
	mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
749 750 751 752
			   &ncont, NULL);
	mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
		    ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);

753 754
	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
755
	*cqb = kvzalloc(*inlen, GFP_KERNEL);
756 757 758 759
	if (!*cqb) {
		err = -ENOMEM;
		goto err_db;
	}
760 761 762 763 764 765 766

	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
	mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);

	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
	MLX5_SET(cqc, cqc, log_page_size,
		 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
767

768
	if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
769
		*index = ucmd.uar_page_index;
770 771 772 773
	} else if (context->bfregi.lib_uar_dyn) {
		err = -EINVAL;
		goto err_cqb;
	} else {
774
		*index = context->bfregi.sys_pages[0];
775
	}
776

777
	if (ucmd.cqe_comp_en == 1) {
778 779
		int mini_cqe_format;

780 781 782 783
		if (!((*cqe_size == 128 &&
		       MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) ||
		      (*cqe_size == 64  &&
		       MLX5_CAP_GEN(dev->mdev, cqe_compression)))) {
784 785 786 787 788 789
			err = -EOPNOTSUPP;
			mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
				     *cqe_size);
			goto err_cqb;
		}

790 791 792 793 794 795 796
		mini_cqe_format =
			mini_cqe_res_format_to_hw(dev,
						  ucmd.cqe_comp_res_format);
		if (mini_cqe_format < 0) {
			err = mini_cqe_format;
			mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n",
				    ucmd.cqe_comp_res_format, err);
797 798 799 800
			goto err_cqb;
		}

		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
801
		MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format);
802 803
	}

804 805 806 807 808 809 810 811 812 813 814 815 816
	if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) {
		if (*cqe_size != 128 ||
		    !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) {
			err = -EOPNOTSUPP;
			mlx5_ib_warn(dev,
				     "CQE padding is not supported for CQE size of %dB!\n",
				     *cqe_size);
			goto err_cqb;
		}

		cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
	}

817
	MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid);
818 819
	return 0;

820
err_cqb:
821
	kvfree(*cqb);
822

823
err_db:
824
	mlx5_ib_db_unmap_user(context, &cq->db);
825 826 827 828 829 830

err_umem:
	ib_umem_release(cq->buf.umem);
	return err;
}

831
static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
832
{
833 834 835 836
	struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
		udata, struct mlx5_ib_ucontext, ibucontext);

	mlx5_ib_db_unmap_user(context, &cq->db);
837 838 839
	ib_umem_release(cq->buf.umem);
}

840 841
static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
			     struct mlx5_ib_cq_buf *buf)
842 843 844 845 846
{
	int i;
	void *cqe;
	struct mlx5_cqe64 *cqe64;

E
Eli Cohen 已提交
847
	for (i = 0; i < buf->nent; i++) {
848
		cqe = get_cqe(cq, i);
E
Eli Cohen 已提交
849 850
		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
		cqe64->op_own = MLX5_CQE_INVALID << 4;
851 852 853 854 855
	}
}

static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
			    int entries, int cqe_size,
856
			    u32 **cqb, int *index, int *inlen)
857
{
858 859
	__be64 *pas;
	void *cqc;
860 861
	int err;

862
	err = mlx5_db_alloc(dev->mdev, &cq->db);
863 864 865 866 867 868 869
	if (err)
		return err;

	cq->mcq.set_ci_db  = cq->db.db;
	cq->mcq.arm_db     = cq->db.db + 1;
	cq->mcq.cqe_sz = cqe_size;

870
	err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
871 872 873
	if (err)
		goto err_db;

874
	init_cq_frag_buf(cq, &cq->buf);
875

876
	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
877
		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
878
		 cq->buf.frag_buf.npages;
879
	*cqb = kvzalloc(*inlen, GFP_KERNEL);
880 881 882 883 884
	if (!*cqb) {
		err = -ENOMEM;
		goto err_buf;
	}

885
	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
886
	mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
887 888 889

	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
	MLX5_SET(cqc, cqc, log_page_size,
890
		 cq->buf.frag_buf.page_shift -
891
		 MLX5_ADAPTER_PAGE_SHIFT);
892

893
	*index = dev->mdev->priv.uar->index;
894 895 896 897 898 899 900

	return 0;

err_buf:
	free_cq_buf(dev, &cq->buf);

err_db:
901
	mlx5_db_free(dev->mdev, &cq->db);
902 903 904 905 906 907
	return err;
}

static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
{
	free_cq_buf(dev, &cq->buf);
908
	mlx5_db_free(dev->mdev, &cq->db);
909 910
}

911 912 913 914 915 916 917 918
static void notify_soft_wc_handler(struct work_struct *work)
{
	struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
					     notify_work);

	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}

919 920
int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
		      struct ib_udata *udata)
921
{
922
	struct ib_device *ibdev = ibcq->device;
923 924
	int entries = attr->cqe;
	int vector = attr->comp_vector;
925
	struct mlx5_ib_dev *dev = to_mdev(ibdev);
926
	struct mlx5_ib_cq *cq = to_mcq(ibcq);
927
	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
928 929
	int uninitialized_var(index);
	int uninitialized_var(inlen);
930 931
	u32 *cqb = NULL;
	void *cqc;
932
	int cqe_size;
933
	unsigned int irqn;
934 935 936
	int eqn;
	int err;

937 938
	if (entries < 0 ||
	    (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
939
		return -EINVAL;
940

941
	if (check_cq_create_flags(attr->flags))
942
		return -EOPNOTSUPP;
943

944
	entries = roundup_pow_of_two(entries + 1);
945
	if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
946
		return -EINVAL;
947 948 949 950 951 952

	cq->ibcq.cqe = entries - 1;
	mutex_init(&cq->resize_mutex);
	spin_lock_init(&cq->lock);
	cq->resize_buf = NULL;
	cq->resize_umem = NULL;
953
	cq->create_flags = attr->flags;
954 955
	INIT_LIST_HEAD(&cq->list_send_qp);
	INIT_LIST_HEAD(&cq->list_recv_qp);
956

957
	if (udata) {
958 959
		err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
				     &index, &inlen);
960
		if (err)
961
			return err;
962
	} else {
963
		cqe_size = cache_line_size() == 128 ? 128 : 64;
964 965 966
		err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
				       &index, &inlen);
		if (err)
967
			return err;
968 969

		INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
970 971
	}

972
	err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
973 974 975
	if (err)
		goto err_cqb;

976 977 978
	cq->cqe_size = cqe_size;

	cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
979 980 981 982
	MLX5_SET(cqc, cqc, cqe_sz,
		 cqe_sz_to_mlx_sz(cqe_size,
				  cq->private_flags &
				  MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
983 984 985 986
	MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
	MLX5_SET(cqc, cqc, uar_page, index);
	MLX5_SET(cqc, cqc, c_eqn, eqn);
	MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
987
	if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
988
		MLX5_SET(cqc, cqc, oi, 1);
989

990
	err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
991 992 993 994 995
	if (err)
		goto err_cqb;

	mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
	cq->mcq.irqn = irqn;
996
	if (udata)
997 998 999
		cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
	else
		cq->mcq.comp  = mlx5_ib_cq_comp;
1000 1001
	cq->mcq.event = mlx5_ib_cq_event;

1002 1003
	INIT_LIST_HEAD(&cq->wc_list);

1004
	if (udata)
1005 1006 1007 1008 1009 1010
		if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
			err = -EFAULT;
			goto err_cmd;
		}


A
Al Viro 已提交
1011
	kvfree(cqb);
1012
	return 0;
1013 1014

err_cmd:
1015
	mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
1016 1017

err_cqb:
A
Al Viro 已提交
1018
	kvfree(cqb);
1019 1020
	if (udata)
		destroy_cq_user(cq, udata);
1021 1022
	else
		destroy_cq_kernel(dev, cq);
1023
	return err;
1024 1025
}

1026
void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
1027 1028 1029 1030
{
	struct mlx5_ib_dev *dev = to_mdev(cq->device);
	struct mlx5_ib_cq *mcq = to_mcq(cq);

1031
	mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
1032 1033
	if (udata)
		destroy_cq_user(mcq, udata);
1034 1035 1036 1037
	else
		destroy_cq_kernel(dev, mcq);
}

M
Moshe Lazer 已提交
1038
static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
1039
{
M
Moshe Lazer 已提交
1040
	return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
}

void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
{
	struct mlx5_cqe64 *cqe64, *dest64;
	void *cqe, *dest;
	u32 prod_index;
	int nfreed = 0;
	u8 owner_bit;

	if (!cq)
		return;

	/* First we need to find the current producer index, so we
	 * know where to start cleaning from.  It doesn't matter if HW
	 * adds new entries after this loop -- the QP we're worried
	 * about is already in RESET, so the new entries won't come
	 * from our QP and therefore don't need to be checked.
	 */
	for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
		if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
			break;

	/* Now sweep backwards through the CQ, removing CQ entries
	 * that match our QP by copying older entries on top of them.
	 */
	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
		cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
M
Moshe Lazer 已提交
1070 1071
		if (is_equal_rsn(cqe64, rsn)) {
			if (srq && (ntohl(cqe64->srqn) & 0xffffff))
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
				mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
			++nfreed;
		} else if (nfreed) {
			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
			dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
			owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
			memcpy(dest, cqe, cq->mcq.cqe_sz);
			dest64->op_own = owner_bit |
				(dest64->op_own & ~MLX5_CQE_OWNER_MASK);
		}
	}

	if (nfreed) {
		cq->mcq.cons_index += nfreed;
		/* Make sure update of buffer contents is done before
		 * updating consumer index.
		 */
		wmb();
		mlx5_cq_set_ci(&cq->mcq);
	}
}

void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
{
	if (!cq)
		return;

	spin_lock_irq(&cq->lock);
	__mlx5_ib_cq_clean(cq, qpn, srq);
	spin_unlock_irq(&cq->lock);
}

int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
E
Eli Cohen 已提交
1106 1107 1108 1109
	struct mlx5_ib_dev *dev = to_mdev(cq->device);
	struct mlx5_ib_cq *mcq = to_mcq(cq);
	int err;

1110
	if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
1111
		return -EOPNOTSUPP;
E
Eli Cohen 已提交
1112

1113 1114 1115
	if (cq_period > MLX5_MAX_CQ_PERIOD)
		return -EINVAL;

1116 1117
	err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
					     cq_period, cq_count);
E
Eli Cohen 已提交
1118 1119 1120 1121
	if (err)
		mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);

	return err;
1122 1123
}

E
Eli Cohen 已提交
1124 1125 1126 1127 1128 1129 1130 1131 1132
static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
		       int entries, struct ib_udata *udata, int *npas,
		       int *page_shift, int *cqe_size)
{
	struct mlx5_ib_resize_cq ucmd;
	struct ib_umem *umem;
	int err;
	int npages;

1133 1134 1135 1136 1137 1138
	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
	if (err)
		return err;

	if (ucmd.reserved0 || ucmd.reserved1)
		return -EINVAL;
E
Eli Cohen 已提交
1139

1140 1141 1142 1143
	/* check multiplication overflow */
	if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
		return -EINVAL;

1144
	umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
1145
			   (size_t)ucmd.cqe_size * entries,
1146
			   IB_ACCESS_LOCAL_WRITE);
E
Eli Cohen 已提交
1147 1148 1149 1150 1151
	if (IS_ERR(umem)) {
		err = PTR_ERR(umem);
		return err;
	}

1152
	mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
E
Eli Cohen 已提交
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
			   npas, NULL);

	cq->resize_umem = umem;
	*cqe_size = ucmd.cqe_size;

	return 0;
}

static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
			 int entries, int cqe_size)
{
	int err;

	cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
	if (!cq->resize_buf)
		return -ENOMEM;

1170
	err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
E
Eli Cohen 已提交
1171 1172 1173
	if (err)
		goto ex;

1174
	init_cq_frag_buf(cq, cq->resize_buf);
E
Eli Cohen 已提交
1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211

	return 0;

ex:
	kfree(cq->resize_buf);
	return err;
}

static int copy_resize_cqes(struct mlx5_ib_cq *cq)
{
	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
	struct mlx5_cqe64 *scqe64;
	struct mlx5_cqe64 *dcqe64;
	void *start_cqe;
	void *scqe;
	void *dcqe;
	int ssize;
	int dsize;
	int i;
	u8 sw_own;

	ssize = cq->buf.cqe_size;
	dsize = cq->resize_buf->cqe_size;
	if (ssize != dsize) {
		mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
		return -EINVAL;
	}

	i = cq->mcq.cons_index;
	scqe = get_sw_cqe(cq, i);
	scqe64 = ssize == 64 ? scqe : scqe + 64;
	start_cqe = scqe;
	if (!scqe) {
		mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
		return -EINVAL;
	}

1212
	while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
1213 1214
		dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
					     (i + 1) & cq->resize_buf->nent);
E
Eli Cohen 已提交
1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
		memcpy(dcqe, scqe, dsize);
		dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;

		++i;
		scqe = get_sw_cqe(cq, i);
		scqe64 = ssize == 64 ? scqe : scqe + 64;
		if (!scqe) {
			mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
			return -EINVAL;
		}

		if (scqe == start_cqe) {
			pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
				cq->mcq.cqn);
			return -ENOMEM;
		}
	}
	++cq->mcq.cons_index;
	return 0;
}

1238 1239
int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
{
E
Eli Cohen 已提交
1240 1241
	struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
	struct mlx5_ib_cq *cq = to_mcq(ibcq);
1242 1243
	void *cqc;
	u32 *in;
E
Eli Cohen 已提交
1244 1245
	int err;
	int npas;
1246
	__be64 *pas;
E
Eli Cohen 已提交
1247 1248 1249 1250 1251
	int page_shift;
	int inlen;
	int uninitialized_var(cqe_size);
	unsigned long flags;

1252
	if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
E
Eli Cohen 已提交
1253 1254 1255 1256
		pr_info("Firmware does not support resize CQ\n");
		return -ENOSYS;
	}

1257 1258 1259 1260 1261
	if (entries < 1 ||
	    entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) {
		mlx5_ib_warn(dev, "wrong entries number %d, max %d\n",
			     entries,
			     1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz));
E
Eli Cohen 已提交
1262
		return -EINVAL;
1263
	}
E
Eli Cohen 已提交
1264 1265

	entries = roundup_pow_of_two(entries + 1);
1266
	if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
E
Eli Cohen 已提交
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
		return -EINVAL;

	if (entries == ibcq->cqe + 1)
		return 0;

	mutex_lock(&cq->resize_mutex);
	if (udata) {
		err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
				  &cqe_size);
	} else {
		cqe_size = 64;
		err = resize_kernel(dev, cq, entries, cqe_size);
		if (!err) {
1280
			struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf;
1281

1282 1283
			npas = frag_buf->npages;
			page_shift = frag_buf->page_shift;
E
Eli Cohen 已提交
1284 1285 1286 1287 1288 1289
		}
	}

	if (err)
		goto ex;

1290 1291 1292
	inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
		MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;

1293
	in = kvzalloc(inlen, GFP_KERNEL);
E
Eli Cohen 已提交
1294 1295 1296 1297 1298
	if (!in) {
		err = -ENOMEM;
		goto ex_resize;
	}

1299
	pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
E
Eli Cohen 已提交
1300 1301
	if (udata)
		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
1302
				     pas, 0);
E
Eli Cohen 已提交
1303
	else
1304
		mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315

	MLX5_SET(modify_cq_in, in,
		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
		 MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
		 MLX5_MODIFY_CQ_MASK_PG_OFFSET |
		 MLX5_MODIFY_CQ_MASK_PG_SIZE);

	cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);

	MLX5_SET(cqc, cqc, log_page_size,
		 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
1316 1317 1318 1319
	MLX5_SET(cqc, cqc, cqe_sz,
		 cqe_sz_to_mlx_sz(cqe_size,
				  cq->private_flags &
				  MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
1320 1321 1322 1323
	MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));

	MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
	MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
E
Eli Cohen 已提交
1324

1325
	err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
E
Eli Cohen 已提交
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355
	if (err)
		goto ex_alloc;

	if (udata) {
		cq->ibcq.cqe = entries - 1;
		ib_umem_release(cq->buf.umem);
		cq->buf.umem = cq->resize_umem;
		cq->resize_umem = NULL;
	} else {
		struct mlx5_ib_cq_buf tbuf;
		int resized = 0;

		spin_lock_irqsave(&cq->lock, flags);
		if (cq->resize_buf) {
			err = copy_resize_cqes(cq);
			if (!err) {
				tbuf = cq->buf;
				cq->buf = *cq->resize_buf;
				kfree(cq->resize_buf);
				cq->resize_buf = NULL;
				resized = 1;
			}
		}
		cq->ibcq.cqe = entries - 1;
		spin_unlock_irqrestore(&cq->lock, flags);
		if (resized)
			free_cq_buf(dev, &tbuf);
	}
	mutex_unlock(&cq->resize_mutex);

A
Al Viro 已提交
1356
	kvfree(in);
E
Eli Cohen 已提交
1357 1358 1359
	return 0;

ex_alloc:
A
Al Viro 已提交
1360
	kvfree(in);
E
Eli Cohen 已提交
1361 1362

ex_resize:
1363 1364 1365 1366 1367
	ib_umem_release(cq->resize_umem);
	if (!udata) {
		free_cq_buf(dev, cq->resize_buf);
		cq->resize_buf = NULL;
	}
E
Eli Cohen 已提交
1368 1369 1370
ex:
	mutex_unlock(&cq->resize_mutex);
	return err;
1371 1372
}

1373
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq)
1374 1375 1376 1377 1378 1379 1380 1381 1382
{
	struct mlx5_ib_cq *cq;

	if (!ibcq)
		return 128;

	cq = to_mcq(ibcq);
	return cq->cqe_size;
}
1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406

/* Called from atomic context */
int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
{
	struct mlx5_ib_wc *soft_wc;
	struct mlx5_ib_cq *cq = to_mcq(ibcq);
	unsigned long flags;

	soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
	if (!soft_wc)
		return -ENOMEM;

	soft_wc->wc = *wc;
	spin_lock_irqsave(&cq->lock, flags);
	list_add_tail(&soft_wc->list, &cq->wc_list);
	if (cq->notify_flags == IB_CQ_NEXT_COMP ||
	    wc->status != IB_WC_SUCCESS) {
		cq->notify_flags = 0;
		schedule_work(&cq->notify_work);
	}
	spin_unlock_irqrestore(&cq->lock, flags);

	return 0;
}