diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h
index 28427f0758c7ed3ddb35870aee9e450cc451151a..31c886edc79180c38d7cce239db6cfcfc61a3b14 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/item.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/item.h
@@ -42,7 +42,7 @@
 
 struct mlxsw_item {
 	unsigned short	offset;		/* bytes in container */
-	unsigned short	step;		/* step in bytes for indexed items */
+	short		step;		/* step in bytes for indexed items */
 	unsigned short	in_step_offset; /* offset within one step */
 	unsigned char	shift;		/* shift in bits */
 	unsigned char	element_size;	/* size of element in bit array */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6c4e08b8058a0caa804f1de02cb2fee1d685fb4d..0e08be41c8e093fd4390b5c7bff6c177260c413a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -4827,6 +4827,42 @@ static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index,
 	mlxsw_reg_ratr_counter_set_type_set(payload, set_type);
 }
 
+/* RDPM - Router DSCP to Priority Mapping
+ * --------------------------------------
+ * Controls the mapping from DSCP field to switch priority on routed packets
+ */
+#define MLXSW_REG_RDPM_ID 0x8009
+#define MLXSW_REG_RDPM_BASE_LEN 0x00
+#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN 0x01
+#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT 64
+#define MLXSW_REG_RDPM_LEN 0x40
+#define MLXSW_REG_RDPM_LAST_ENTRY (MLXSW_REG_RDPM_BASE_LEN + \
+				   MLXSW_REG_RDPM_LEN - \
+				   MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN)
+
+MLXSW_REG_DEFINE(rdpm, MLXSW_REG_RDPM_ID, MLXSW_REG_RDPM_LEN);
+
+/* reg_dscp_entry_e
+ * Enable update of the specific entry
+ * Access: Index
+ */
+MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_e, MLXSW_REG_RDPM_LAST_ENTRY, 7, 1,
+		    -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
+
+/* reg_dscp_entry_prio
+ * Switch Priority
+ * Access: RW
+ */
+MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_prio, MLXSW_REG_RDPM_LAST_ENTRY, 0, 4,
+		    -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
+
+static inline void mlxsw_reg_rdpm_pack(char *payload, unsigned short index,
+				       u8 prio)
+{
+	mlxsw_reg_rdpm_dscp_entry_e_set(payload, index, 1);
+	mlxsw_reg_rdpm_dscp_entry_prio_set(payload, index, prio);
+}
+
 /* RICNT - Router Interface Counter Register
  * -----------------------------------------
  * The RICNT register retrieves per port performance counters
@@ -7640,6 +7676,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(rtar),
 	MLXSW_REG(ratr),
 	MLXSW_REG(rtdp),
+	MLXSW_REG(rdpm),
 	MLXSW_REG(ricnt),
 	MLXSW_REG(rrcr),
 	MLXSW_REG(ralta),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 54c7d9202e814cefe36e6c914b2be8baaaa7a09d..f78bfe39496644e7354e005f7fd22162224cd16b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1830,6 +1830,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 		return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data);
 	case TC_SETUP_QDISC_RED:
 		return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data);
+	case TC_SETUP_QDISC_PRIO:
+		return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index b6f475e834740f653f0f8969e829754cce302a3c..16f8fbda0891a2fa4721060fedfd2299adcdfb39 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -565,6 +565,8 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port);
 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 			  struct tc_red_qopt_offload *p);
+int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct tc_prio_qopt_offload *p);
 
 /* spectrum_fid.c */
 int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 971f689dd833b3708f94b219bfce9c8a617a8f51..e11a0abfc6631c634dbed7fe8b3cc5a69547ffbe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -41,9 +41,12 @@
 #include "spectrum.h"
 #include "reg.h"
 
+#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
+
 enum mlxsw_sp_qdisc_type {
 	MLXSW_SP_QDISC_NO_QDISC,
 	MLXSW_SP_QDISC_RED,
+	MLXSW_SP_QDISC_PRIO,
 };
 
 struct mlxsw_sp_qdisc_ops {
@@ -63,6 +66,11 @@ struct mlxsw_sp_qdisc_ops {
 			  void *xstats_ptr);
 	void (*clean_stats)(struct mlxsw_sp_port *mlxsw_sp_port,
 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc);
+	/* unoffload - to be used for a qdisc that stops being offloaded without
+	 * being destroyed.
+	 */
+	void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
 };
 
 struct mlxsw_sp_qdisc {
@@ -76,6 +84,7 @@ struct mlxsw_sp_qdisc {
 		u64 tx_packets;
 		u64 drops;
 		u64 overlimits;
+		u64 backlog;
 	} stats_base;
 
 	struct mlxsw_sp_qdisc_ops *ops;
@@ -141,6 +150,9 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 
 err_bad_param:
 err_config:
+	if (mlxsw_sp_qdisc->handle == handle && ops->unoffload)
+		ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params);
+
 	mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
 	return err;
 }
@@ -403,6 +415,165 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
+static int
+mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+	int i;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
+					  MLXSW_SP_PORT_DEFAULT_TCLASS);
+
+	return 0;
+}
+
+static int
+mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
+				 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+				 void *params)
+{
+	struct tc_prio_qopt_offload_params *p = params;
+
+	if (p->bands > IEEE_8021QAZ_MAX_TCS)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int
+mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			    void *params)
+{
+	struct tc_prio_qopt_offload_params *p = params;
+	int tclass, i;
+	int err;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
+		err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void
+mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			      void *params)
+{
+	struct tc_prio_qopt_offload_params *p = params;
+	u64 backlog;
+
+	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+				       mlxsw_sp_qdisc->stats_base.backlog);
+	p->qstats->backlog -= backlog;
+}
+
+static int
+mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			      struct tc_qopt_offload_stats *stats_ptr)
+{
+	u64 tx_bytes, tx_packets, drops = 0, backlog = 0;
+	struct mlxsw_sp_qdisc_stats *stats_base;
+	struct mlxsw_sp_port_xstats *xstats;
+	struct rtnl_link_stats64 *stats;
+	int i;
+
+	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
+	stats_base = &mlxsw_sp_qdisc->stats_base;
+
+	tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
+	tx_packets = stats->tx_packets - stats_base->tx_packets;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		drops += xstats->tail_drop[i];
+		backlog += xstats->backlog[i];
+	}
+	drops = drops - stats_base->drops;
+
+	_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
+	stats_ptr->qstats->drops += drops;
+	stats_ptr->qstats->backlog +=
+				mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+						     backlog) -
+				mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+						     stats_base->backlog);
+	stats_base->backlog = backlog;
+	stats_base->drops += drops;
+	stats_base->tx_bytes += tx_bytes;
+	stats_base->tx_packets += tx_packets;
+	return 0;
+}
+
+static void
+mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
+					 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+	struct mlxsw_sp_qdisc_stats *stats_base;
+	struct mlxsw_sp_port_xstats *xstats;
+	struct rtnl_link_stats64 *stats;
+	int i;
+
+	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
+	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
+	stats_base = &mlxsw_sp_qdisc->stats_base;
+
+	stats_base->tx_packets = stats->tx_packets;
+	stats_base->tx_bytes = stats->tx_bytes;
+
+	stats_base->drops = 0;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		stats_base->drops += xstats->tail_drop[i];
+
+	mlxsw_sp_qdisc->stats_base.backlog = 0;
+}
+
+static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
+	.type = MLXSW_SP_QDISC_PRIO,
+	.check_params = mlxsw_sp_qdisc_prio_check_params,
+	.replace = mlxsw_sp_qdisc_prio_replace,
+	.unoffload = mlxsw_sp_qdisc_prio_unoffload,
+	.destroy = mlxsw_sp_qdisc_prio_destroy,
+	.get_stats = mlxsw_sp_qdisc_get_prio_stats,
+	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
+};
+
+int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct tc_prio_qopt_offload *p)
+{
+	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+
+	if (p->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
+	if (p->command == TC_PRIO_REPLACE)
+		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
+					      mlxsw_sp_qdisc,
+					      &mlxsw_sp_qdisc_ops_prio,
+					      &p->replace_params);
+
+	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
+				    MLXSW_SP_QDISC_PRIO))
+		return -EOPNOTSUPP;
+
+	switch (p->command) {
+	case TC_PRIO_DESTROY:
+		return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+	case TC_PRIO_STATS:
+		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+						&p->stats);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 7a136256b8f77ffe045267c3627ed5276287e76d..01ff5ba6796e28936dead6e5d96806118cf19535 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -7011,6 +7011,24 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
 }
 #endif
 
+static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
+{
+	char rdpm_pl[MLXSW_REG_RDPM_LEN];
+	unsigned int i;
+
+	MLXSW_REG_ZERO(rdpm, rdpm_pl);
+
+	/* HW is determining switch priority based on DSCP-bits, but the
+	 * kernel is still doing that based on the ToS. Since there's a
+	 * mismatch in bits we need to make sure to translate the right
+	 * value ToS would observe, skipping the 2 least-significant ECN bits.
+	 */
+	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
+		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
+}
+
 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
@@ -7023,6 +7041,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 
 	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
+	mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 	if (err)
 		return err;
@@ -7098,6 +7117,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_mp_hash_init;
 
+	err = mlxsw_sp_dscp_init(mlxsw_sp);
+	if (err)
+		goto err_dscp_init;
+
 	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
 	err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
 				    mlxsw_sp_router_fib_dump_flush);
@@ -7107,6 +7130,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 
 err_register_fib_notifier:
+err_dscp_init:
 err_mp_hash_init:
 	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
 err_register_netevent_notifier:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ef7b348e849816f827e959346839e738ae1dbf50..6d95477b962c0ecbdb6e77504b18a867b9b01f76 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -780,6 +780,7 @@ enum tc_setup_type {
 	TC_SETUP_BLOCK,
 	TC_SETUP_QDISC_CBS,
 	TC_SETUP_QDISC_RED,
+	TC_SETUP_QDISC_PRIO,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0d1343cba84cd3e735d325d8c716febb9b058161..9c341f003091edfcad5a039b27a9a3ad5b9db64a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -761,4 +761,29 @@ struct tc_red_qopt_offload {
 	};
 };
 
+enum tc_prio_command {
+	TC_PRIO_REPLACE,
+	TC_PRIO_DESTROY,
+	TC_PRIO_STATS,
+};
+
+struct tc_prio_qopt_offload_params {
+	int bands;
+	u8 priomap[TC_PRIO_MAX + 1];
+	/* In case that a prio qdisc is offloaded and now is changed to a
+	 * non-offloadedable config, it needs to update the backlog & qlen
+	 * values to negate the HW backlog & qlen values (and only them).
+	 */
+	struct gnet_stats_queue *qstats;
+};
+
+struct tc_prio_qopt_offload {
+	enum tc_prio_command command;
+	u32 handle;
+	u32 parent;
+	union {
+		struct tc_prio_qopt_offload_params replace_params;
+		struct tc_qopt_offload_stats stats;
+	};
+};
 #endif
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fe1510eb111ff506c047063cbe516e5742d7343f..a398502899a966038f78400bd940b454b2436e9d 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -142,6 +142,31 @@ prio_reset(struct Qdisc *sch)
 	sch->q.qlen = 0;
 }
 
+static int prio_offload(struct Qdisc *sch, bool enable)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_prio_qopt_offload opt = {
+		.handle = sch->handle,
+		.parent = sch->parent,
+	};
+
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	if (enable) {
+		opt.command = TC_PRIO_REPLACE;
+		opt.replace_params.bands = q->bands;
+		memcpy(&opt.replace_params.priomap, q->prio2band,
+		       TC_PRIO_MAX + 1);
+		opt.replace_params.qstats = &sch->qstats;
+	} else {
+		opt.command = TC_PRIO_DESTROY;
+	}
+
+	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt);
+}
+
 static void
 prio_destroy(struct Qdisc *sch)
 {
@@ -149,6 +174,7 @@ prio_destroy(struct Qdisc *sch)
 	struct prio_sched_data *q = qdisc_priv(sch);
 
 	tcf_block_put(q->block);
+	prio_offload(sch, false);
 	for (prio = 0; prio < q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
@@ -204,6 +230,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	sch_tree_unlock(sch);
+	prio_offload(sch, true);
 	return 0;
 }
 
@@ -223,15 +250,47 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt,
 	return prio_tune(sch, opt, extack);
 }
 
+static int prio_dump_offload(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_prio_qopt_offload hw_stats = {
+		.handle = sch->handle,
+		.parent = sch->parent,
+		.command = TC_PRIO_STATS,
+		.stats.bstats = &sch->bstats,
+		.stats.qstats = &sch->qstats,
+	};
+	int err;
+
+	sch->flags &= ~TCQ_F_OFFLOADED;
+	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+		return 0;
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
+					    &hw_stats);
+	if (err == -EOPNOTSUPP)
+		return 0;
+
+	if (!err)
+		sch->flags |= TCQ_F_OFFLOADED;
+
+	return err;
+}
+
 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_prio_qopt opt;
+	int err;
 
 	opt.bands = q->bands;
 	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
 
+	err = prio_dump_offload(sch);
+	if (err)
+		goto nla_put_failure;
+
 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 		goto nla_put_failure;