Merge branch 'bnxt_en-next'

Michael Chan says: ==================== bnxt_en: Update for net-next. Three main changes in this series, besides the usual firmware spec update: 1. Add support for a new firmware communication channel direct to the firmware processor that handles flow offloads. This speeds up flow offload operations. 2. Use 64-bit internal flow handles to increase the number of flows that can be offloaded. 3. Add level-2 context memory paging so that we can configure more context memory for RDMA on the 57500 chips. Allocate more context memory if RDMA is enabled on the 57500 chips. ==================== Signed-off-by: N David S. Miller <davem@davemloft.net>

Merge branch 'bnxt_en-next'
Michael Chan says: ==================== bnxt_en: Update for net-next. Three main changes in this series, besides the usual firmware spec update: 1. Add support for a new firmware communication channel direct to the firmware processor that handles flow offloads. This speeds up flow offload operations. 2. Use 64-bit internal flow handles to increase the number of flows that can be offloaded. 3. Add level-2 context memory paging so that we can configure more context memory for RDMA on the 57500 chips. Allocate more context memory if RDMA is enabled on the 57500 chips. ==================== Signed-off-by: N David S. Miller <davem@davemloft.net>
44a7b3b6 · David S. Miller · ac68a3d3 · 0c2ff8d7 · 44a7b3b6 · 44a7b3b6
5 changed file
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1812,7 +1812,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp)
 	case CMPL_BASE_TYPE_HWRM_DONE:
 		seq_id = le16_to_cpu(h_cmpl->sequence_id);
 		if (seq_id == bp->hwrm_intr_seq_id)
-			bp->hwrm_intr_seq_id = HWRM_SEQ_ID_INVALID;
+			bp->hwrm_intr_seq_id = (u16)~bp->hwrm_intr_seq_id;
 		else
 			netdev_err(bp->dev, "Invalid hwrm seq id %d\n", seq_id);
 		break;
@@ -2375,7 +2375,11 @@ static void bnxt_free_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
 		rmem->pg_arr[i] = NULL;
 	}
 	if (rmem->pg_tbl) {
-		dma_free_coherent(&pdev->dev, rmem->nr_pages * 8,
+		size_t pg_tbl_size = rmem->nr_pages * 8;
+
+		if (rmem->flags & BNXT_RMEM_USE_FULL_PAGE_FLAG)
+			pg_tbl_size = rmem->page_size;
+		dma_free_coherent(&pdev->dev, pg_tbl_size,
 				  rmem->pg_tbl, rmem->pg_tbl_map);
 		rmem->pg_tbl = NULL;
 	}
@@ -2393,9 +2397,12 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)

 	if (rmem->flags & (BNXT_RMEM_VALID_PTE_FLAG | BNXT_RMEM_RING_PTE_FLAG))
 		valid_bit = PTU_PTE_VALID;
-	if (rmem->nr_pages > 1) {
-		rmem->pg_tbl = dma_alloc_coherent(&pdev->dev,
-						  rmem->nr_pages * 8,
+	if ((rmem->nr_pages > 1 || rmem->depth > 0) && !rmem->pg_tbl) {
+		size_t pg_tbl_size = rmem->nr_pages * 8;
+
+		if (rmem->flags & BNXT_RMEM_USE_FULL_PAGE_FLAG)
+			pg_tbl_size = rmem->page_size;
+		rmem->pg_tbl = dma_alloc_coherent(&pdev->dev, pg_tbl_size,
 						  &rmem->pg_tbl_map,
 						  GFP_KERNEL);
 		if (!rmem->pg_tbl)
@@ -2412,7 +2419,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
 		if (!rmem->pg_arr[i])
 			return -ENOMEM;

-		if (rmem->nr_pages > 1) {
+		if (rmem->nr_pages > 1 || rmem->depth > 0) {
 			if (i == rmem->nr_pages - 2 &&
 			    (rmem->flags & BNXT_RMEM_RING_PTE_FLAG))
 				extra_bits |= PTU_PTE_NEXT_TO_LAST;
@@ -3279,6 +3286,27 @@ static void bnxt_free_hwrm_resources(struct bnxt *bp)
 				  bp->hwrm_cmd_resp_dma_addr);
 		bp->hwrm_cmd_resp_addr = NULL;
 	}
+
+	if (bp->hwrm_cmd_kong_resp_addr) {
+		dma_free_coherent(&pdev->dev, PAGE_SIZE,
+				  bp->hwrm_cmd_kong_resp_addr,
+				  bp->hwrm_cmd_kong_resp_dma_addr);
+		bp->hwrm_cmd_kong_resp_addr = NULL;
+	}
+}
+
+static int bnxt_alloc_kong_hwrm_resources(struct bnxt *bp)
+{
+	struct pci_dev *pdev = bp->pdev;
+
+	bp->hwrm_cmd_kong_resp_addr =
+		dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
+				   &bp->hwrm_cmd_kong_resp_dma_addr,
+				   GFP_KERNEL);
+	if (!bp->hwrm_cmd_kong_resp_addr)
+		return -ENOMEM;
+
+	return 0;
 }

 static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
@@ -3740,7 +3768,10 @@ void bnxt_hwrm_cmd_hdr_init(struct bnxt *bp, void *request, u16 req_type,
 	req->req_type = cpu_to_le16(req_type);
 	req->cmpl_ring = cpu_to_le16(cmpl_ring);
 	req->target_id = cpu_to_le16(target_id);
-	req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
+	if (bnxt_kong_hwrm_message(bp, req))
+		req->resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
+	else
+		req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
 }

 static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
@@ -3755,11 +3786,10 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
 	u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
 	struct hwrm_short_input short_input = {0};
-
-	req->seq_id = cpu_to_le16(bp->hwrm_cmd_seq++);
-	memset(resp, 0, PAGE_SIZE);
-	cp_ring_id = le16_to_cpu(req->cmpl_ring);
-	intr_process = (cp_ring_id == INVALID_HW_RING_ID) ? 0 : 1;
+	u32 doorbell_offset = BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER;
+	u8 *resp_addr = (u8 *)bp->hwrm_cmd_resp_addr;
+	u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
+	u16 dst = BNXT_HWRM_CHNL_CHIMP;

 	if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
 		if (msg_len > bp->hwrm_max_ext_req_len ||
@@ -3767,6 +3797,23 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			return -EINVAL;
 	}

+	if (bnxt_hwrm_kong_chnl(bp, req)) {
+		dst = BNXT_HWRM_CHNL_KONG;
+		bar_offset = BNXT_GRCPF_REG_KONG_COMM;
+		doorbell_offset = BNXT_GRCPF_REG_KONG_COMM_TRIGGER;
+		resp = bp->hwrm_cmd_kong_resp_addr;
+		resp_addr = (u8 *)bp->hwrm_cmd_kong_resp_addr;
+	}
+
+	memset(resp, 0, PAGE_SIZE);
+	cp_ring_id = le16_to_cpu(req->cmpl_ring);
+	intr_process = (cp_ring_id == INVALID_HW_RING_ID) ? 0 : 1;
+
+	req->seq_id = cpu_to_le16(bnxt_get_hwrm_seq_id(bp, dst));
+	/* currently supports only one outstanding message */
+	if (intr_process)
+		bp->hwrm_intr_seq_id = le16_to_cpu(req->seq_id);
+
 	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
 	    msg_len > BNXT_HWRM_MAX_REQ_LEN) {
 		void *short_cmd_req = bp->hwrm_short_cmd_req_addr;
@@ -3800,17 +3847,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	}

 	/* Write request msg to hwrm channel */
-	__iowrite32_copy(bp->bar0, data, msg_len / 4);
+	__iowrite32_copy(bp->bar0 + bar_offset, data, msg_len / 4);

 	for (i = msg_len; i < max_req_len; i += 4)
-		writel(0, bp->bar0 + i);
-
-	/* currently supports only one outstanding message */
-	if (intr_process)
-		bp->hwrm_intr_seq_id = le16_to_cpu(req->seq_id);
+		writel(0, bp->bar0 + bar_offset + i);

 	/* Ring channel doorbell */
-	writel(1, bp->bar0 + 0x100);
+	writel(1, bp->bar0 + doorbell_offset);

 	if (!timeout)
 		timeout = DFLT_HWRM_CMD_TIMEOUT;
@@ -3825,10 +3868,13 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
 	timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
 	tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
-	resp_len = bp->hwrm_cmd_resp_addr + HWRM_RESP_LEN_OFFSET;
+	resp_len = (__le32 *)(resp_addr + HWRM_RESP_LEN_OFFSET);
+
 	if (intr_process) {
+		u16 seq_id = bp->hwrm_intr_seq_id;
+
 		/* Wait until hwrm response cmpl interrupt is processed */
-		while (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID &&
+		while (bp->hwrm_intr_seq_id != (u16)~seq_id &&
 		       i++ < tmo_count) {
 			/* on first few passes, just barely sleep */
 			if (i < HWRM_SHORT_TIMEOUT_COUNTER)
@@ -3839,14 +3885,14 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 					     HWRM_MAX_TIMEOUT);
 		}

-		if (bp->hwrm_intr_seq_id != HWRM_SEQ_ID_INVALID) {
+		if (bp->hwrm_intr_seq_id != (u16)~seq_id) {
 			netdev_err(bp->dev, "Resp cmpl intr err msg: 0x%x\n",
 				   le16_to_cpu(req->req_type));
 			return -1;
 		}
 		len = (le32_to_cpu(*resp_len) & HWRM_RESP_LEN_MASK) >>
 		      HWRM_RESP_LEN_SFT;
-		valid = bp->hwrm_cmd_resp_addr + len - 1;
+		valid = resp_addr + len - 1;
 	} else {
 		int j;

@@ -3874,7 +3920,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 		}

 		/* Last byte of resp contains valid bit */
-		valid = bp->hwrm_cmd_resp_addr + len - 1;
+		valid = resp_addr + len - 1;
 		for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
 			/* make sure we read from updated DMA memory */
 			dma_rmb();
@@ -4009,6 +4055,10 @@ static int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp)
 			cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD);
 	}

+	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
+		req.flags |= cpu_to_le32(
+			FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE);
+
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
@@ -4137,12 +4187,11 @@ static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp,
 static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
 					     struct bnxt_ntuple_filter *fltr)
 {
-	int rc = 0;
+	struct bnxt_vnic_info *vnic = &bp->vnic_info[fltr->rxq + 1];
 	struct hwrm_cfa_ntuple_filter_alloc_input req = {0};
-	struct hwrm_cfa_ntuple_filter_alloc_output *resp =
-		bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_ntuple_filter_alloc_output *resp;
 	struct flow_keys *keys = &fltr->fkeys;
-	struct bnxt_vnic_info *vnic = &bp->vnic_info[fltr->rxq + 1];
+	int rc = 0;

 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_NTUPLE_FILTER_ALLOC, -1, -1);
 	req.l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[fltr->l2_fltr_idx];
@@ -4188,8 +4237,10 @@ static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
 	req.dst_id = cpu_to_le16(vnic->fw_vnic_id);
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc)
+	if (!rc) {
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
 		fltr->filter_id = resp->ntuple_filter_id;
+	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
 }
@@ -6000,8 +6051,11 @@ static void bnxt_hwrm_set_pg_attr(struct bnxt_ring_mem_info *rmem, u8 *pg_attr,
 		pg_size = 2 << 4;

 	*pg_attr = pg_size;
-	if (rmem->nr_pages > 1) {
-		*pg_attr |= 1;
+	if (rmem->depth >= 1) {
+		if (rmem->depth == 2)
+			*pg_attr |= 2;
+		else
+			*pg_attr |= 1;
 		*pg_dir = cpu_to_le64(rmem->pg_tbl_map);
 	} else {
 		*pg_dir = cpu_to_le64(rmem->dma_arr[0]);
@@ -6078,6 +6132,22 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 				      &req.stat_pg_size_stat_lvl,
 				      &req.stat_page_dir);
 	}
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV) {
+		ctx_pg = &ctx->mrav_mem;
+		req.mrav_num_entries = cpu_to_le32(ctx_pg->entries);
+		req.mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req.mrav_pg_size_mrav_lvl,
+				      &req.mrav_page_dir);
+	}
+	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) {
+		ctx_pg = &ctx->tim_mem;
+		req.tim_num_entries = cpu_to_le32(ctx_pg->entries);
+		req.tim_entry_size = cpu_to_le16(ctx->tim_entry_size);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req.tim_pg_size_tim_lvl,
+				      &req.tim_page_dir);
+	}
 	for (i = 0, num_entries = &req.tqm_sp_num_entries,
 	     pg_attr = &req.tqm_sp_pg_size_tqm_sp_lvl,
 	     pg_dir = &req.tqm_sp_page_dir,
@@ -6098,25 +6168,104 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 }

 static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
-				  struct bnxt_ctx_pg_info *ctx_pg, u32 mem_size)
+				  struct bnxt_ctx_pg_info *ctx_pg)
 {
 	struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;

-	if (!mem_size)
-		return 0;
-
-	rmem->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE);
-	if (rmem->nr_pages > MAX_CTX_PAGES) {
-		rmem->nr_pages = 0;
-		return -EINVAL;
-	}
 	rmem->page_size = BNXT_PAGE_SIZE;
 	rmem->pg_arr = ctx_pg->ctx_pg_arr;
 	rmem->dma_arr = ctx_pg->ctx_dma_arr;
 	rmem->flags = BNXT_RMEM_VALID_PTE_FLAG;
+	if (rmem->depth >= 1)
+		rmem->flags |= BNXT_RMEM_USE_FULL_PAGE_FLAG;
 	return bnxt_alloc_ring(bp, rmem);
 }

+static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
+				  struct bnxt_ctx_pg_info *ctx_pg, u32 mem_size,
+				  u8 depth)
+{
+	struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
+	int rc;
+
+	if (!mem_size)
+		return 0;
+
+	ctx_pg->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE);
+	if (ctx_pg->nr_pages > MAX_CTX_TOTAL_PAGES) {
+		ctx_pg->nr_pages = 0;
+		return -EINVAL;
+	}
+	if (ctx_pg->nr_pages > MAX_CTX_PAGES || depth > 1) {
+		int nr_tbls, i;
+
+		rmem->depth = 2;
+		ctx_pg->ctx_pg_tbl = kcalloc(MAX_CTX_PAGES, sizeof(ctx_pg),
+					     GFP_KERNEL);
+		if (!ctx_pg->ctx_pg_tbl)
+			return -ENOMEM;
+		nr_tbls = DIV_ROUND_UP(ctx_pg->nr_pages, MAX_CTX_PAGES);
+		rmem->nr_pages = nr_tbls;
+		rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg);
+		if (rc)
+			return rc;
+		for (i = 0; i < nr_tbls; i++) {
+			struct bnxt_ctx_pg_info *pg_tbl;
+
+			pg_tbl = kzalloc(sizeof(*pg_tbl), GFP_KERNEL);
+			if (!pg_tbl)
+				return -ENOMEM;
+			ctx_pg->ctx_pg_tbl[i] = pg_tbl;
+			rmem = &pg_tbl->ring_mem;
+			rmem->pg_tbl = ctx_pg->ctx_pg_arr[i];
+			rmem->pg_tbl_map = ctx_pg->ctx_dma_arr[i];
+			rmem->depth = 1;
+			rmem->nr_pages = MAX_CTX_PAGES;
+			if (i == (nr_tbls - 1))
+				rmem->nr_pages = ctx_pg->nr_pages %
+						 MAX_CTX_PAGES;
+			rc = bnxt_alloc_ctx_mem_blk(bp, pg_tbl);
+			if (rc)
+				break;
+		}
+	} else {
+		rmem->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE);
+		if (rmem->nr_pages > 1 || depth)
+			rmem->depth = 1;
+		rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg);
+	}
+	return rc;
+}
+
+static void bnxt_free_ctx_pg_tbls(struct bnxt *bp,
+				  struct bnxt_ctx_pg_info *ctx_pg)
+{
+	struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
+
+	if (rmem->depth > 1 || ctx_pg->nr_pages > MAX_CTX_PAGES ||
+	    ctx_pg->ctx_pg_tbl) {
+		int i, nr_tbls = rmem->nr_pages;
+
+		for (i = 0; i < nr_tbls; i++) {
+			struct bnxt_ctx_pg_info *pg_tbl;
+			struct bnxt_ring_mem_info *rmem2;
+
+			pg_tbl = ctx_pg->ctx_pg_tbl[i];
+			if (!pg_tbl)
+				continue;
+			rmem2 = &pg_tbl->ring_mem;
+			bnxt_free_ring(bp, rmem2);
+			ctx_pg->ctx_pg_arr[i] = NULL;
+			kfree(pg_tbl);
+			ctx_pg->ctx_pg_tbl[i] = NULL;
+		}
+		kfree(ctx_pg->ctx_pg_tbl);
+		ctx_pg->ctx_pg_tbl = NULL;
+	}
+	bnxt_free_ring(bp, rmem);
+	ctx_pg->nr_pages = 0;
+}
+
 static void bnxt_free_ctx_mem(struct bnxt *bp)
 {
 	struct bnxt_ctx_mem_info *ctx = bp->ctx;
@@ -6127,16 +6276,18 @@ static void bnxt_free_ctx_mem(struct bnxt *bp)

 	if (ctx->tqm_mem[0]) {
 		for (i = 0; i < bp->max_q + 1; i++)
-			bnxt_free_ring(bp, &ctx->tqm_mem[i]->ring_mem);
+			bnxt_free_ctx_pg_tbls(bp, ctx->tqm_mem[i]);
 		kfree(ctx->tqm_mem[0]);
 		ctx->tqm_mem[0] = NULL;
 	}

-	bnxt_free_ring(bp, &ctx->stat_mem.ring_mem);
-	bnxt_free_ring(bp, &ctx->vnic_mem.ring_mem);
-	bnxt_free_ring(bp, &ctx->cq_mem.ring_mem);
-	bnxt_free_ring(bp, &ctx->srq_mem.ring_mem);
-	bnxt_free_ring(bp, &ctx->qp_mem.ring_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->tim_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->mrav_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->stat_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->vnic_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->cq_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->srq_mem);
+	bnxt_free_ctx_pg_tbls(bp, &ctx->qp_mem);
 	ctx->flags &= ~BNXT_CTX_FLAG_INITED;
 }

@@ -6145,6 +6296,9 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
 	struct bnxt_ctx_pg_info *ctx_pg;
 	struct bnxt_ctx_mem_info *ctx;
 	u32 mem_size, ena, entries;
+	u32 extra_srqs = 0;
+	u32 extra_qps = 0;
+	u8 pg_lvl = 1;
 	int i, rc;

 	rc = bnxt_hwrm_func_backing_store_qcaps(bp);
@@ -6157,24 +6311,31 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
 	if (!ctx || (ctx->flags & BNXT_CTX_FLAG_INITED))
 		return 0;

+	if (bp->flags & BNXT_FLAG_ROCE_CAP) {
+		pg_lvl = 2;
+		extra_qps = 65536;
+		extra_srqs = 8192;
+	}
+
 	ctx_pg = &ctx->qp_mem;
-	ctx_pg->entries = ctx->qp_min_qp1_entries + ctx->qp_max_l2_entries;
+	ctx_pg->entries = ctx->qp_min_qp1_entries + ctx->qp_max_l2_entries +
+			  extra_qps;
 	mem_size = ctx->qp_entry_size * ctx_pg->entries;
-	rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size);
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl);
 	if (rc)
 		return rc;

 	ctx_pg = &ctx->srq_mem;
-	ctx_pg->entries = ctx->srq_max_l2_entries;
+	ctx_pg->entries = ctx->srq_max_l2_entries + extra_srqs;
 	mem_size = ctx->srq_entry_size * ctx_pg->entries;
-	rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size);
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl);
 	if (rc)
 		return rc;

 	ctx_pg = &ctx->cq_mem;
-	ctx_pg->entries = ctx->cq_max_l2_entries;
+	ctx_pg->entries = ctx->cq_max_l2_entries + extra_qps * 2;
 	mem_size = ctx->cq_entry_size * ctx_pg->entries;
-	rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size);
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl);
 	if (rc)
 		return rc;

@@ -6182,26 +6343,47 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
 	ctx_pg->entries = ctx->vnic_max_vnic_entries +
 			  ctx->vnic_max_ring_table_entries;
 	mem_size = ctx->vnic_entry_size * ctx_pg->entries;
-	rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size);
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1);
 	if (rc)
 		return rc;

 	ctx_pg = &ctx->stat_mem;
 	ctx_pg->entries = ctx->stat_max_entries;
 	mem_size = ctx->stat_entry_size * ctx_pg->entries;
-	rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size);
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1);
 	if (rc)
 		return rc;

-	entries = ctx->qp_max_l2_entries;
+	ena = 0;
+	if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
+		goto skip_rdma;
+
+	ctx_pg = &ctx->mrav_mem;
+	ctx_pg->entries = extra_qps * 4;
+	mem_size = ctx->mrav_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 2);
+	if (rc)
+		return rc;
+	ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV;
+
+	ctx_pg = &ctx->tim_mem;
+	ctx_pg->entries = ctx->qp_mem.entries;
+	mem_size = ctx->tim_entry_size * ctx_pg->entries;
+	rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1);
+	if (rc)
+		return rc;
+	ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM;
+
+skip_rdma:
+	entries = ctx->qp_max_l2_entries + extra_qps;
 	entries = roundup(entries, ctx->tqm_entries_multiple);
 	entries = clamp_t(u32, entries, ctx->tqm_min_entries_per_ring,
 			  ctx->tqm_max_entries_per_ring);
-	for (i = 0, ena = 0; i < bp->max_q + 1; i++) {
+	for (i = 0; i < bp->max_q + 1; i++) {
 		ctx_pg = ctx->tqm_mem[i];
 		ctx_pg->entries = entries;
 		mem_size = ctx->tqm_entry_size * entries;
-		rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg, mem_size);
+		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1);
 		if (rc)
 			return rc;
 		ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP << i;
@@ -6481,6 +6663,13 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
 	    (dev_caps_cfg & VER_GET_RESP_DEV_CAPS_CFG_SHORT_CMD_REQUIRED))
 		bp->fw_cap |= BNXT_FW_CAP_SHORT_CMD;

+	if (dev_caps_cfg & VER_GET_RESP_DEV_CAPS_CFG_KONG_MB_CHNL_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_KONG_MB_CHNL;
+
+	if (dev_caps_cfg &
+	    VER_GET_RESP_DEV_CAPS_CFG_FLOW_HANDLE_64BIT_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_OVS_64BIT_HANDLE;
+
 hwrm_ver_get_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -9227,7 +9416,7 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
 	 * 1 coal_buf x bufs_per_record = 1 completion record.
 	 */
 	coal = &bp->rx_coal;
-	coal->coal_ticks = 14;
+	coal->coal_ticks = 10;
 	coal->coal_bufs = 30;
 	coal->coal_ticks_irq = 1;
 	coal->coal_bufs_irq = 2;
@@ -10219,6 +10408,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;

+	if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
+		rc = bnxt_alloc_kong_hwrm_resources(bp);
+		if (rc)
+			bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
+	}
+
 	if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
 	    bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
 		rc = bnxt_alloc_hwrm_short_cmd_req(bp);

--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -567,7 +567,6 @@ struct nqe_cn {
 #define HWRM_RESP_LEN_MASK		0xffff0000
 #define HWRM_RESP_LEN_SFT		16
 #define HWRM_RESP_VALID_MASK		0xff000000
-#define HWRM_SEQ_ID_INVALID		-1
 #define BNXT_HWRM_REQ_MAX_SIZE		128
 #define BNXT_HWRM_REQS_PER_PAGE		(BNXT_PAGE_SIZE /	\
 					 BNXT_HWRM_REQ_MAX_SIZE)
@@ -585,6 +584,9 @@ struct nqe_cn {

 #define HWRM_VALID_BIT_DELAY_USEC	20

+#define BNXT_HWRM_CHNL_CHIMP	0
+#define BNXT_HWRM_CHNL_KONG	1
+
 #define BNXT_RX_EVENT	1
 #define BNXT_AGG_EVENT	2
 #define BNXT_TX_EVENT	4
@@ -615,9 +617,12 @@ struct bnxt_sw_rx_agg_bd {
 struct bnxt_ring_mem_info {
 	int			nr_pages;
 	int			page_size;
-	u32			flags;
+	u16			flags;
 #define BNXT_RMEM_VALID_PTE_FLAG	1
 #define BNXT_RMEM_RING_PTE_FLAG		2
+#define BNXT_RMEM_USE_FULL_PAGE_FLAG	4
+
+	u16			depth;

 	void			**pg_arr;
 	dma_addr_t		*dma_arr;
@@ -1113,9 +1118,14 @@ struct bnxt_test_info {
 	char string[BNXT_MAX_TEST][ETH_GSTRING_LEN];
 };

-#define BNXT_GRCPF_REG_WINDOW_BASE_OUT	0x400
-#define BNXT_CAG_REG_LEGACY_INT_STATUS	0x4014
-#define BNXT_CAG_REG_BASE		0x300000
+#define BNXT_GRCPF_REG_CHIMP_COMM		0x0
+#define BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER	0x100
+#define BNXT_GRCPF_REG_WINDOW_BASE_OUT		0x400
+#define BNXT_CAG_REG_LEGACY_INT_STATUS		0x4014
+#define BNXT_CAG_REG_BASE			0x300000
+
+#define BNXT_GRCPF_REG_KONG_COMM		0xA00
+#define BNXT_GRCPF_REG_KONG_COMM_TRIGGER	0xB00

 struct bnxt_tc_flow_stats {
 	u64		packets;
@@ -1183,12 +1193,15 @@ struct bnxt_vf_rep {
 #define PTU_PTE_NEXT_TO_LAST      0x4UL

 #define MAX_CTX_PAGES	(BNXT_PAGE_SIZE / 8)
+#define MAX_CTX_TOTAL_PAGES	(MAX_CTX_PAGES * MAX_CTX_PAGES)

 struct bnxt_ctx_pg_info {
 	u32		entries;
+	u32		nr_pages;
 	void		*ctx_pg_arr[MAX_CTX_PAGES];
 	dma_addr_t	ctx_dma_arr[MAX_CTX_PAGES];
 	struct bnxt_ring_mem_info ring_mem;
+	struct bnxt_ctx_pg_info **ctx_pg_tbl;
 };

 struct bnxt_ctx_mem_info {
@@ -1224,6 +1237,8 @@ struct bnxt_ctx_mem_info {
 	struct bnxt_ctx_pg_info cq_mem;
 	struct bnxt_ctx_pg_info vnic_mem;
 	struct bnxt_ctx_pg_info stat_mem;
+	struct bnxt_ctx_pg_info mrav_mem;
+	struct bnxt_ctx_pg_info tim_mem;
 	struct bnxt_ctx_pg_info *tqm_mem[9];
 };

@@ -1457,20 +1472,25 @@ struct bnxt {
 	u32			msg_enable;

 	u32			fw_cap;
-	#define BNXT_FW_CAP_SHORT_CMD	0x00000001
-	#define BNXT_FW_CAP_LLDP_AGENT	0x00000002
-	#define BNXT_FW_CAP_DCBX_AGENT	0x00000004
-	#define BNXT_FW_CAP_NEW_RM	0x00000008
-	#define BNXT_FW_CAP_IF_CHANGE	0x00000010
+	#define BNXT_FW_CAP_SHORT_CMD			0x00000001
+	#define BNXT_FW_CAP_LLDP_AGENT			0x00000002
+	#define BNXT_FW_CAP_DCBX_AGENT			0x00000004
+	#define BNXT_FW_CAP_NEW_RM			0x00000008
+	#define BNXT_FW_CAP_IF_CHANGE			0x00000010
+	#define BNXT_FW_CAP_KONG_MB_CHNL		0x00000080
+	#define BNXT_FW_CAP_OVS_64BIT_HANDLE		0x00000400

 #define BNXT_NEW_RM(bp)		((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
 	u32			hwrm_spec_code;
 	u16			hwrm_cmd_seq;
-	u32			hwrm_intr_seq_id;
+	u16                     hwrm_cmd_kong_seq;
+	u16			hwrm_intr_seq_id;
 	void			*hwrm_short_cmd_req_addr;
 	dma_addr_t		hwrm_short_cmd_req_dma_addr;
 	void			*hwrm_cmd_resp_addr;
 	dma_addr_t		hwrm_cmd_resp_dma_addr;
+	void			*hwrm_cmd_kong_resp_addr;
+	dma_addr_t		hwrm_cmd_kong_resp_dma_addr;

 	struct rtnl_link_stats64	net_stats_prev;
 	struct rx_port_stats	*hw_rx_port_stats;
@@ -1672,6 +1692,66 @@ static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db,
 	}
 }

+static inline bool bnxt_cfa_hwrm_message(u16 req_type)
+{
+	switch (req_type) {
+	case HWRM_CFA_ENCAP_RECORD_ALLOC:
+	case HWRM_CFA_ENCAP_RECORD_FREE:
+	case HWRM_CFA_DECAP_FILTER_ALLOC:
+	case HWRM_CFA_DECAP_FILTER_FREE:
+	case HWRM_CFA_NTUPLE_FILTER_ALLOC:
+	case HWRM_CFA_NTUPLE_FILTER_FREE:
+	case HWRM_CFA_NTUPLE_FILTER_CFG:
+	case HWRM_CFA_EM_FLOW_ALLOC:
+	case HWRM_CFA_EM_FLOW_FREE:
+	case HWRM_CFA_EM_FLOW_CFG:
+	case HWRM_CFA_FLOW_ALLOC:
+	case HWRM_CFA_FLOW_FREE:
+	case HWRM_CFA_FLOW_INFO:
+	case HWRM_CFA_FLOW_FLUSH:
+	case HWRM_CFA_FLOW_STATS:
+	case HWRM_CFA_METER_PROFILE_ALLOC:
+	case HWRM_CFA_METER_PROFILE_FREE:
+	case HWRM_CFA_METER_PROFILE_CFG:
+	case HWRM_CFA_METER_INSTANCE_ALLOC:
+	case HWRM_CFA_METER_INSTANCE_FREE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool bnxt_kong_hwrm_message(struct bnxt *bp, struct input *req)
+{
+	return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
+		bnxt_cfa_hwrm_message(le16_to_cpu(req->req_type)));
+}
+
+static inline bool bnxt_hwrm_kong_chnl(struct bnxt *bp, struct input *req)
+{
+	return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
+		req->resp_addr == cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr));
+}
+
+static inline void *bnxt_get_hwrm_resp_addr(struct bnxt *bp, void *req)
+{
+	if (bnxt_hwrm_kong_chnl(bp, (struct input *)req))
+		return bp->hwrm_cmd_kong_resp_addr;
+	else
+		return bp->hwrm_cmd_resp_addr;
+}
+
+static inline u16 bnxt_get_hwrm_seq_id(struct bnxt *bp, u16 dst)
+{
+	u16 seq_id;
+
+	if (dst == BNXT_HWRM_CHNL_CHIMP)
+		seq_id = bp->hwrm_cmd_seq++;
+	else
+		seq_id = bp->hwrm_cmd_kong_seq++;
+	return seq_id;
+}
+
 extern const u16 bnxt_lhint_arr[];

 int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,

--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -337,18 +337,21 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 	return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts);
 }

-static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle)
+static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
+				   struct bnxt_tc_flow_node *flow_node)
 {
 	struct hwrm_cfa_flow_free_input req = { 0 };
 	int rc;

 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
-	req.flow_handle = flow_handle;
+	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
+		req.ext_flow_handle = flow_node->ext_flow_handle;
+	else
+		req.flow_handle = flow_node->flow_handle;

 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc)
-		netdev_info(bp->dev, "Error: %s: flow_handle=0x%x rc=%d",
-			    __func__, flow_handle, rc);
+		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);

 	if (rc)
 		rc = -EIO;
@@ -418,13 +421,14 @@ static bool bits_set(void *key, int len)

 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 				    __le16 ref_flow_handle,
-				    __le32 tunnel_handle, __le16 *flow_handle)
+				    __le32 tunnel_handle,
+				    struct bnxt_tc_flow_node *flow_node)
 {
-	struct hwrm_cfa_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 	struct bnxt_tc_actions *actions = &flow->actions;
 	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
 	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
 	struct hwrm_cfa_flow_alloc_input req = { 0 };
+	struct hwrm_cfa_flow_alloc_output *resp;
 	u16 flow_flags = 0, action_flags = 0;
 	int rc;

@@ -527,8 +531,23 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,

 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc)
-		*flow_handle = resp->flow_handle;
+	if (!rc) {
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
+		/* CFA_FLOW_ALLOC response interpretation:
+		 *		    fw with	     fw with
+		 *		    16-bit	     64-bit
+		 *		    flow handle      flow handle
+		 *		    ===========	     ===========
+		 * flow_handle      flow handle      flow context id
+		 * ext_flow_handle  INVALID	     flow handle
+		 * flow_id	    INVALID	     flow counter id
+		 */
+		flow_node->flow_handle = resp->flow_handle;
+		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
+			flow_node->ext_flow_handle = resp->ext_flow_handle;
+			flow_node->flow_id = resp->flow_id;
+		}
+	}
 	mutex_unlock(&bp->hwrm_cmd_lock);

 	if (rc == HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR)
@@ -544,9 +563,8 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
 				       __le32 ref_decap_handle,
 				       __le32 *decap_filter_handle)
 {
-	struct hwrm_cfa_decap_filter_alloc_output *resp =
-						bp->hwrm_cmd_resp_addr;
 	struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
+	struct hwrm_cfa_decap_filter_alloc_output *resp;
 	struct ip_tunnel_key *tun_key = &flow->tun_key;
 	u32 enables = 0;
 	int rc;
@@ -599,10 +617,12 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,

 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc)
+	if (!rc) {
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
 		*decap_filter_handle = resp->decap_filter_id;
-	else
+	} else {
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+	}
 	mutex_unlock(&bp->hwrm_cmd_lock);

 	if (rc)
@@ -633,9 +653,8 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
 				       struct bnxt_tc_l2_key *l2_info,
 				       __le32 *encap_record_handle)
 {
-	struct hwrm_cfa_encap_record_alloc_output *resp =
-						bp->hwrm_cmd_resp_addr;
 	struct hwrm_cfa_encap_record_alloc_input req = { 0 };
+	struct hwrm_cfa_encap_record_alloc_output *resp;
 	struct hwrm_cfa_encap_data_vxlan *encap =
 			(struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
 	struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
@@ -667,10 +686,12 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,

 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (!rc)
+	if (!rc) {
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
 		*encap_record_handle = resp->encap_record_id;
-	else
+	} else {
 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+	}
 	mutex_unlock(&bp->hwrm_cmd_lock);

 	if (rc)
@@ -1224,7 +1245,7 @@ static int __bnxt_tc_del_flow(struct bnxt *bp,
 	int rc;

 	/* send HWRM cmd to free the flow-id */
-	bnxt_hwrm_cfa_flow_free(bp, flow_node->flow_handle);
+	bnxt_hwrm_cfa_flow_free(bp, flow_node);

 	mutex_lock(&tc_info->lock);

@@ -1246,6 +1267,12 @@ static int __bnxt_tc_del_flow(struct bnxt *bp,
 	return 0;
 }

+static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
+				 u16 src_fid)
+{
+	flow->dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
+}
+
 static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
 				u16 src_fid)
 {
@@ -1293,6 +1320,9 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,

 	bnxt_tc_set_src_fid(bp, flow, src_fid);

+	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
+		bnxt_tc_set_flow_dir(bp, flow, src_fid);
+
 	if (!bnxt_tc_can_offload(bp, flow)) {
 		rc = -ENOSPC;
 		goto free_node;
@@ -1320,7 +1350,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,

 	/* send HWRM cmd to alloc the flow */
 	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
-				      tunnel_handle, &new_node->flow_handle);
+				      tunnel_handle, new_node);
 	if (rc)
 		goto put_tunnel;

@@ -1336,7 +1366,7 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
 	return 0;

 hwrm_flow_free:
-	bnxt_hwrm_cfa_flow_free(bp, new_node->flow_handle);
+	bnxt_hwrm_cfa_flow_free(bp, new_node);
 put_tunnel:
 	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
 put_l2:
@@ -1397,13 +1427,40 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp,
 	return 0;
 }

+static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
+				    struct bnxt_tc_flow_node *flow_node,
+				    __le16 *flow_handle, __le32 *flow_id)
+{
+	u16 handle;
+
+	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
+		*flow_id = flow_node->flow_id;
+
+		/* If flow_id is used to fetch flow stats then:
+		 * 1. lower 12 bits of flow_handle must be set to all 1s.
+		 * 2. 15th bit of flow_handle must specify the flow
+		 *    direction (TX/RX).
+		 */
+		if (flow_node->flow.dir == BNXT_DIR_RX)
+			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
+				 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
+		else
+			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
+
+		*flow_handle = cpu_to_le16(handle);
+	} else {
+		*flow_handle = flow_node->flow_handle;
+	}
+}
+
 static int
 bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
 			     struct bnxt_tc_stats_batch stats_batch[])
 {
-	struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_cfa_flow_stats_input req = { 0 };
+	struct hwrm_cfa_flow_stats_output *resp;
 	__le16 *req_flow_handles = &req.flow_handle_0;
+	__le32 *req_flow_ids = &req.flow_id_0;
 	int rc, i;

 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
@@ -1411,14 +1468,19 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
 	for (i = 0; i < num_flows; i++) {
 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;

-		req_flow_handles[i] = flow_node->flow_handle;
+		bnxt_fill_cfa_stats_req(bp, flow_node,
+					&req_flow_handles[i], &req_flow_ids[i]);
 	}

 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (!rc) {
-		__le64 *resp_packets = &resp->packet_0;
-		__le64 *resp_bytes = &resp->byte_0;
+		__le64 *resp_packets;
+		__le64 *resp_bytes;
+
+		resp = bnxt_get_hwrm_resp_addr(bp, &req);
+		resp_packets = &resp->packet_0;
+		resp_bytes = &resp->byte_0;

 		for (i = 0; i < num_flows; i++) {
 			stats_batch[i].hw_stats.packets =

--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
@@ -98,6 +98,9 @@ struct bnxt_tc_flow {

 	/* flow applicable to pkts ingressing on this fid */
 	u16				src_fid;
+	u8				dir;
+#define BNXT_DIR_RX	1
+#define BNXT_DIR_TX	0
 	struct bnxt_tc_l2_key		l2_key;
 	struct bnxt_tc_l2_key		l2_mask;
 	struct bnxt_tc_l3_key		l3_key;
@@ -170,7 +173,9 @@ struct bnxt_tc_flow_node {

 	struct bnxt_tc_flow		flow;

+	__le64				ext_flow_handle;
 	__le16				flow_handle;
+	__le32				flow_id;

 	/* L2 node in l2 hashtable that shares flow's l2 key */
 	struct bnxt_tc_l2_node		*l2_node;