Merge branch 'mlx5-next'

Saeed Mahameed says: ==================== Mellanox 100G mlx5 CQE compression Introducing ConnectX-4 CQE (Completion Queue Entry) compression feature for mlx5 etherent driver. CQE Compressing reduces PCI overhead by coalescing and compressing multiple CQEs into a single merged CQE. Successful compressing improves message rate especially for small packet traffic. CQE Compressing in details: Instead of writing full CQEs to memory, multiple almost identical CQEs are merged and compressed. Information that is shared between the CQEs is written once, regardless of the number of compressed CQEs. In addition, only the unique information (small amount of bytes compared to full CQE size) is written per CQE. CQE Compression Block: This block contains multiple compressed CQEs. CQE Compression Block contains a single copy of CQEs properties which are shared between all the compressed CQEs (called Title, see below) and multiple mini CQEs (CQEs in compressed form). Title: The Title holds information which is shared between all the compressed CQEs in the CQE Compression Block. In each Compression Block there is only a single Title regardless of the number of compressed CQEs. Mini CQE: A CQE in compressed form that holds some data needed to extract a single full CQE, for example 8 Bytes instead of 64 Bytes. The shared information between all compressed CQEs, which belong to the same CQE Compression Block called Title, is written once, and only the unique information in each compressed CQE, for example 8 bytes, is written per compressed CQE, called mini CQE. Since CQE Compression can add overhead to the software (CPU), it will be only enabled on "weak/slow" PCI slots, where it can actually help. Applied on top: c047c3b1 ('netfilter: conntrack: remove uninitialized shadow variable') ==================== Signed-off-by: N David S. Miller <davem@davemloft.net>

Merge branch 'mlx5-next'
Saeed Mahameed says: ==================== Mellanox 100G mlx5 CQE compression Introducing ConnectX-4 CQE (Completion Queue Entry) compression feature for mlx5 etherent driver. CQE Compressing reduces PCI overhead by coalescing and compressing multiple CQEs into a single merged CQE. Successful compressing improves message rate especially for small packet traffic. CQE Compressing in details: Instead of writing full CQEs to memory, multiple almost identical CQEs are merged and compressed. Information that is shared between the CQEs is written once, regardless of the number of compressed CQEs. In addition, only the unique information (small amount of bytes compared to full CQE size) is written per CQE. CQE Compression Block: This block contains multiple compressed CQEs. CQE Compression Block contains a single copy of CQEs properties which are shared between all the compressed CQEs (called Title, see below) and multiple mini CQEs (CQEs in compressed form). Title: The Title holds information which is shared between all the compressed CQEs in the CQE Compression Block. In each Compression Block there is only a single Title regardless of the number of compressed CQEs. Mini CQE: A CQE in compressed form that holds some data needed to extract a single full CQE, for example 8 Bytes instead of 64 Bytes. The shared information between all compressed CQEs, which belong to the same CQE Compression Block called Title, is written once, and only the unique information in each compressed CQE, for example 8 bytes, is written per compressed CQE, called mini CQE. Since CQE Compression can add overhead to the software (CPU), it will be only enabled on "weak/slow" PCI slots, where it can actually help. Applied on top: c047c3b1 ('netfilter: conntrack: remove uninitialized shadow variable') ==================== Signed-off-by: N David S. Miller <davem@davemloft.net>
6a47a570 · David S. Miller · c1869d58 · b797a684 · 6a47a570 · 6a47a570
7 changed file
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -64,12 +64,9 @@
 #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x4
 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW            0x6

-#define MLX5_MPWRQ_LOG_NUM_STRIDES		11 /* >= 9, HW restriction */
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE		6  /* >= 6, HW restriction */
-#define MLX5_MPWRQ_NUM_STRIDES			BIT(MLX5_MPWRQ_LOG_NUM_STRIDES)
-#define MLX5_MPWRQ_STRIDE_SIZE			BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE)
-#define MLX5_MPWRQ_LOG_WQE_SZ			(MLX5_MPWRQ_LOG_NUM_STRIDES +\
-						 MLX5_MPWRQ_LOG_STRIDE_SIZE)
+#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS	8  /* >= 6, HW restriction */
+#define MLX5_MPWRQ_LOG_WQE_SZ			17
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
 				    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
 #define MLX5_MPWRQ_PAGES_PER_WQE		BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
@@ -154,9 +151,13 @@ struct mlx5e_umr_wqe {
 struct mlx5e_params {
 	u8  log_sq_size;
 	u8  rq_wq_type;
+	u8  mpwqe_log_stride_sz;
+	u8  mpwqe_log_num_strides;
 	u8  log_rq_size;
 	u16 num_channels;
 	u8  num_tc;
+	bool rx_cqe_compress_admin;
+	bool rx_cqe_compress;
 	u16 rx_cq_moderation_usec;
 	u16 rx_cq_moderation_pkts;
 	u16 tx_cq_moderation_usec;
@@ -202,6 +203,13 @@ struct mlx5e_cq {
 	struct mlx5e_channel      *channel;
 	struct mlx5e_priv         *priv;

+	/* cqe decompression */
+	struct mlx5_cqe64          title;
+	struct mlx5_mini_cqe8      mini_arr[MLX5_MINI_CQE_ARRAY_SIZE];
+	u8                         mini_arr_idx;
+	u16                        decmprs_left;
+	u16                        decmprs_wqe_counter;
+
 	/* control */
 	struct mlx5_wq_ctrl        wq_ctrl;
 } ____cacheline_aligned_in_smp;
@@ -240,6 +248,8 @@ struct mlx5e_rq {
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
 	u8                     wq_type;
+	u32                    mpwqe_stride_sz;
+	u32                    mpwqe_num_strides;
 	u32                    rqn;
 	struct mlx5e_channel  *channel;
 	struct mlx5e_priv     *priv;
@@ -263,7 +273,7 @@ struct mlx5e_mpw_info {
 	void (*dma_pre_sync)(struct device *pdev,
 			     struct mlx5e_mpw_info *wi,
 			     u32 wqe_offset, u32 len);
-	void (*add_skb_frag)(struct device *pdev,
+	void (*add_skb_frag)(struct mlx5e_rq *rq,
 			     struct sk_buff *skb,
 			     struct mlx5e_mpw_info *wi,
 			     u32 page_idx, u32 frag_offset, u32 len);
@@ -616,6 +626,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv);
 void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv);
 int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr);
 int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr);
+void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val);

 int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
 			  u16 vid);
@@ -634,6 +645,7 @@ int mlx5e_close_locked(struct net_device *netdev);
 void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
 				   u32 *indirection_rqt, int len,
 				   int num_channels);
+int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);

 static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
 				      struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -93,6 +93,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
 	/* RX HW timestamp */
 	switch (config.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
+		/* Reset CQE compression to Admin default */
+		mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin);
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_SOME:
@@ -108,6 +110,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		/* Disable CQE compression */
+		mlx5e_modify_rx_cqe_compression(priv, false);
 		config.rx_filter = HWTSTAMP_FILTER_ALL;
 		break;
 	default:

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -613,6 +613,25 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap)
 	return 0;
 }

+int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+	u32 max_speed = 0;
+	u32 proto_cap;
+	int err;
+	int i;
+
+	err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
+	if (err)
+		return err;
+
+	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
+		if (proto_cap & MLX5E_PROT_MASK(i))
+			max_speed = max(max_speed, ptys2ethtool_table[i].speed);
+
+	*speed = max_speed;
+	return 0;
+}
+
 static void get_speed_duplex(struct net_device *netdev,
 			     u32 eth_proto_oper,
 			     struct ethtool_cmd *cmd)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -114,6 +114,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 		s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
 		s->rx_mpwqe_frag   += rq_stats->mpwqe_frag;
 		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
+		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
+		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;

 		for (j = 0; j < priv->params.num_tc; j++) {
 			sq_stats = &priv->channel[i]->sq[j].stats;
@@ -305,7 +307,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
 		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;

-		rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE;
+		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
+		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
+		rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
 		byte_count = rq->wqe_sz;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1128,9 +1132,9 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	switch (priv->params.rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		MLX5_SET(wq, wq, log_wqe_num_of_strides,
-			 MLX5_MPWRQ_LOG_NUM_STRIDES - 9);
+			 priv->params.mpwqe_log_num_strides - 9);
 		MLX5_SET(wq, wq, log_wqe_stride_size,
-			 MLX5_MPWRQ_LOG_STRIDE_SIZE - 6);
+			 priv->params.mpwqe_log_stride_sz - 6);
 		MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
@@ -1197,13 +1201,17 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 	switch (priv->params.rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		log_cq_size = priv->params.log_rq_size +
-			MLX5_MPWRQ_LOG_NUM_STRIDES;
+			priv->params.mpwqe_log_num_strides;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		log_cq_size = priv->params.log_rq_size;
 	}

 	MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
+	if (priv->params.rx_cqe_compress) {
+		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
+		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+	}

 	mlx5e_build_common_cq_param(priv, param);
 }
@@ -2708,11 +2716,49 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 		MLX5_CAP_ETH(mdev, reg_umr_sq);
 }

+static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
+{
+	enum pcie_link_width width;
+	enum pci_bus_speed speed;
+	int err = 0;
+
+	err = pcie_get_minimum_link(mdev->pdev, &speed, &width);
+	if (err)
+		return err;
+
+	if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN)
+		return -EINVAL;
+
+	switch (speed) {
+	case PCIE_SPEED_2_5GT:
+		*pci_bw = 2500 * width;
+		break;
+	case PCIE_SPEED_5_0GT:
+		*pci_bw = 5000 * width;
+		break;
+	case PCIE_SPEED_8_0GT:
+		*pci_bw = 8000 * width;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw)
+{
+	return (link_speed && pci_bw &&
+		(pci_bw < 40000) && (pci_bw < link_speed));
+}
+
 static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 				    struct net_device *netdev,
 				    int num_channels)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	u32 link_speed = 0;
+	u32 pci_bw = 0;

 	priv->params.log_sq_size           =
 		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
@@ -2720,15 +2766,42 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
 		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
 		MLX5_WQ_TYPE_LINKED_LIST;

+	/* set CQE compression */
+	priv->params.rx_cqe_compress_admin = false;
+	if (MLX5_CAP_GEN(mdev, cqe_compression) &&
+	    MLX5_CAP_GEN(mdev, vport_group_manager)) {
+		mlx5e_get_max_linkspeed(mdev, &link_speed);
+		mlx5e_get_pci_bw(mdev, &pci_bw);
+		mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
+			      link_speed, pci_bw);
+		priv->params.rx_cqe_compress_admin =
+			cqe_compress_heuristic(link_speed, pci_bw);
+	}
+
+	priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin;
+
 	switch (priv->params.rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
+		priv->params.mpwqe_log_stride_sz =
+			priv->params.rx_cqe_compress ?
+			MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
+			MLX5_MPWRQ_LOG_STRIDE_SIZE;
+		priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
+			priv->params.mpwqe_log_stride_sz;
 		priv->params.lro_en = true;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
 	}

+	mlx5_core_info(mdev,
+		       "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+		       priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+		       BIT(priv->params.log_rq_size),
+		       BIT(priv->params.mpwqe_log_stride_sz),
+		       priv->params.rx_cqe_compress_admin);
+
 	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
 					    BIT(priv->params.log_rq_size));
 	priv->params.rx_cq_moderation_usec =

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -42,6 +42,143 @@ static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 	return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL;
 }

+static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
+				       void *data)
+{
+	u32 ci = cqcc & cq->wq.sz_m1;
+
+	memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
+}
+
+static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
+					 struct mlx5e_cq *cq, u32 cqcc)
+{
+	mlx5e_read_cqe_slot(cq, cqcc, &cq->title);
+	cq->decmprs_left        = be32_to_cpu(cq->title.byte_cnt);
+	cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter);
+	rq->stats.cqe_compress_blks++;
+}
+
+static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
+{
+	mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr);
+	cq->mini_arr_idx = 0;
+}
+
+static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
+{
+	u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
+	u32 wq_sz = 1 << cq->wq.log_sz;
+	u32 ci = cqcc & cq->wq.sz_m1;
+	u32 ci_top = min_t(u32, wq_sz, ci + n);
+
+	for (; ci < ci_top; ci++, n--) {
+		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+
+		cqe->op_own = op_own;
+	}
+
+	if (unlikely(ci == wq_sz)) {
+		op_own = !op_own;
+		for (ci = 0; ci < n; ci++) {
+			struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
+
+			cqe->op_own = op_own;
+		}
+	}
+}
+
+static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
+					struct mlx5e_cq *cq, u32 cqcc)
+{
+	u16 wqe_cnt_step;
+
+	cq->title.byte_cnt     = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
+	cq->title.check_sum    = cq->mini_arr[cq->mini_arr_idx].checksum;
+	cq->title.op_own      &= 0xf0;
+	cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.log_sz);
+	cq->title.wqe_counter  = cpu_to_be16(cq->decmprs_wqe_counter);
+
+	wqe_cnt_step =
+		rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+		mpwrq_get_cqe_consumed_strides(&cq->title) : 1;
+	cq->decmprs_wqe_counter =
+		(cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1;
+}
+
+static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
+						struct mlx5e_cq *cq, u32 cqcc)
+{
+	mlx5e_decompress_cqe(rq, cq, cqcc);
+	cq->title.rss_hash_type   = 0;
+	cq->title.rss_hash_result = 0;
+}
+
+static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
+					     struct mlx5e_cq *cq,
+					     int update_owner_only,
+					     int budget_rem)
+{
+	u32 cqcc = cq->wq.cc + update_owner_only;
+	u32 cqe_count;
+	u32 i;
+
+	cqe_count = min_t(u32, cq->decmprs_left, budget_rem);
+
+	for (i = update_owner_only; i < cqe_count;
+	     i++, cq->mini_arr_idx++, cqcc++) {
+		if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
+			mlx5e_read_mini_arr_slot(cq, cqcc);
+
+		mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
+		rq->handle_rx_cqe(rq, &cq->title);
+	}
+	mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc);
+	cq->wq.cc = cqcc;
+	cq->decmprs_left -= cqe_count;
+	rq->stats.cqe_compress_pkts += cqe_count;
+
+	return cqe_count;
+}
+
+static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
+					      struct mlx5e_cq *cq,
+					      int budget_rem)
+{
+	mlx5e_read_title_slot(rq, cq, cq->wq.cc);
+	mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1);
+	mlx5e_decompress_cqe(rq, cq, cq->wq.cc);
+	rq->handle_rx_cqe(rq, &cq->title);
+	cq->mini_arr_idx++;
+
+	return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1;
+}
+
+void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val)
+{
+	bool was_opened;
+
+	if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
+		return;
+
+	mutex_lock(&priv->state_lock);
+
+	if (priv->params.rx_cqe_compress == val)
+		goto unlock;
+
+	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (was_opened)
+		mlx5e_close_locked(priv->netdev);
+
+	priv->params.rx_cqe_compress = val;
+
+	if (was_opened)
+		mlx5e_open_locked(priv->netdev);
+
+unlock:
+	mutex_unlock(&priv->state_lock);
+}
+
 int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 {
 	struct sk_buff *skb;
@@ -75,6 +212,11 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 	return -ENOMEM;
 }

+static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
+{
+	return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
+}
+
 static inline void
 mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev,
 				struct mlx5e_mpw_info *wi,
@@ -93,13 +235,13 @@ mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev,
 }

 static inline void
-mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev,
+mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq,
 				struct sk_buff *skb,
 				struct mlx5e_mpw_info *wi,
 				u32 page_idx, u32 frag_offset,
 				u32 len)
 {
-	unsigned int truesize =	ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE);
+	unsigned int truesize =	ALIGN(len, rq->mpwqe_stride_sz);

 	wi->skbs_frags[page_idx]++;
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
@@ -108,15 +250,15 @@ mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev,
 }

 static inline void
-mlx5e_add_skb_frag_fragmented_mpwqe(struct device *pdev,
+mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq,
 				    struct sk_buff *skb,
 				    struct mlx5e_mpw_info *wi,
 				    u32 page_idx, u32 frag_offset,
 				    u32 len)
 {
-	unsigned int truesize =	ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE);
+	unsigned int truesize =	ALIGN(len, rq->mpwqe_stride_sz);

-	dma_sync_single_for_cpu(pdev,
+	dma_sync_single_for_cpu(rq->pdev,
 				wi->umr.dma_info[page_idx].addr + frag_offset,
 				len, DMA_FROM_DEVICE);
 	wi->skbs_frags[page_idx]++;
@@ -156,7 +298,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
 	skb_copy_to_linear_data_offset(skb, 0,
 				       page_address(dma_info->page) + offset,
 				       len);
-#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE)
 	if (unlikely(offset + headlen > PAGE_SIZE)) {
 		dma_info++;
 		headlen_pg = len;
@@ -167,7 +308,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
 					       page_address(dma_info->page),
 					       len);
 	}
-#endif
 }

 static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix)
@@ -293,7 +433,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
 		if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i)))
 			goto err_unmap;
-		atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE,
+		atomic_add(mlx5e_mpwqe_strides_per_page(rq),
 			   &wi->umr.dma_info[i].page->_count);
 		wi->skbs_frags[i] = 0;
 	}
@@ -312,7 +452,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
 	while (--i >= 0) {
 		dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
 			       PCI_DMA_FROMDEVICE);
-		atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE,
+		atomic_sub(mlx5e_mpwqe_strides_per_page(rq),
 			   &wi->umr.dma_info[i].page->_count);
 		put_page(wi->umr.dma_info[i].page);
 	}
@@ -337,7 +477,7 @@ void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
 		dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
 			       PCI_DMA_FROMDEVICE);
-		atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i],
+		atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
 			   &wi->umr.dma_info[i].page->_count);
 		put_page(wi->umr.dma_info[i].page);
 	}
@@ -387,7 +527,7 @@ static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq,
 	 */
 	split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER);
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE,
+		atomic_add(mlx5e_mpwqe_strides_per_page(rq),
 			   &wi->dma_info.page[i]._count);
 		wi->skbs_frags[i] = 0;
 	}
@@ -411,7 +551,7 @@ void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq,
 	dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz,
 		       PCI_DMA_FROMDEVICE);
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i],
+		atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i],
 			   &wi->dma_info.page[i]._count);
 		put_page(&wi->dma_info.page[i]);
 	}
@@ -656,9 +796,9 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
 					   u32 cqe_bcnt,
 					   struct sk_buff *skb)
 {
-	u32 consumed_bytes = ALIGN(cqe_bcnt, MLX5_MPWRQ_STRIDE_SIZE);
+	u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz);
 	u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
-	u32 wqe_offset     = stride_ix * MLX5_MPWRQ_STRIDE_SIZE;
+	u32 wqe_offset     = stride_ix * rq->mpwqe_stride_sz;
 	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
 	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
 	u32 head_page_idx  = page_idx;
@@ -666,19 +806,17 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
 	u32 frag_offset    = head_offset + headlen;
 	u16 byte_cnt       = cqe_bcnt - headlen;

-#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE)
 	if (unlikely(frag_offset >= PAGE_SIZE)) {
 		page_idx++;
 		frag_offset -= PAGE_SIZE;
 	}
-#endif
 	wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes);

 	while (byte_cnt) {
 		u32 pg_consumed_bytes =
 			min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);

-		wi->add_skb_frag(rq->pdev, skb, wi, page_idx, frag_offset,
+		wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset,
 				 pg_consumed_bytes);
 		byte_cnt -= pg_consumed_bytes;
 		frag_offset = 0;
@@ -728,7 +866,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);

 mpwrq_cqe_out:
-	if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES))
+	if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
 		return;

 	wi->free_wqe(rq, wi);
@@ -738,14 +876,24 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
-	int work_done;
+	int work_done = 0;

-	for (work_done = 0; work_done < budget; work_done++) {
+	if (cq->decmprs_left)
+		work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
+
+	for (; work_done < budget; work_done++) {
 		struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq);

 		if (!cqe)
 			break;

+		if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
+			work_done +=
+				mlx5e_decompress_cqes_start(rq, cq,
+							    budget - work_done);
+			continue;
+		}
+
 		mlx5_cqwq_pop(&cq->wq);

 		rq->handle_rx_cqe(rq, cqe);

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -72,6 +72,8 @@ struct mlx5e_sw_stats {
 	u64 rx_mpwqe_filler;
 	u64 rx_mpwqe_frag;
 	u64 rx_buff_alloc_err;
+	u64 rx_cqe_compress_blks;
+	u64 rx_cqe_compress_pkts;

 	/* Special handling counters */
 	u64 link_down_events;
@@ -101,6 +103,8 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) },
 };

@@ -283,6 +287,8 @@ struct mlx5e_rq_stats {
 	u64 mpwqe_filler;
 	u64 mpwqe_frag;
 	u64 buff_alloc_err;
+	u64 cqe_compress_blks;
+	u64 cqe_compress_pkts;
 };

 static const struct counter_desc rq_stats_desc[] = {
@@ -297,6 +303,8 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
 };

 struct mlx5e_sq_stats {

--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -685,6 +685,40 @@ struct mlx5_cqe64 {
 	u8		op_own;
 };

+struct mlx5_mini_cqe8 {
+	union {
+		__be32 rx_hash_result;
+		struct {
+			__be16 checksum;
+			__be16 rsvd;
+		};
+		struct {
+			__be16 wqe_counter;
+			u8  s_wqe_opcode;
+			u8  reserved;
+		} s_wqe_info;
+	};
+	__be32 byte_cnt;
+};
+
+enum {
+	MLX5_NO_INLINE_DATA,
+	MLX5_INLINE_DATA32_SEG,
+	MLX5_INLINE_DATA64_SEG,
+	MLX5_COMPRESSED,
+};
+
+enum {
+	MLX5_CQE_FORMAT_CSUM = 0x1,
+};
+
+#define MLX5_MINI_CQE_ARRAY_SIZE 8
+
+static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
+{
+	return (cqe->op_own >> 2) & 0x3;
+}
+
 static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
 {
 	return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;