ipq40xx: essedma time-balanced scheduling for nat

b93293ef · coolsnowwolf · 96652f8f · b93293ef · b93293ef · b93293ef
8 changed file
--- a/target/linux/ipq40xx/Makefile
+++ b/target/linux/ipq40xx/Makefile
@@ -18,6 +18,8 @@ DEFAULT_PACKAGES += \
 	kmod-usb-dwc3-qcom \
 	kmod-leds-gpio kmod-gpio-button-hotplug swconfig \
 	kmod-ath10k-ct wpad-openssl \
-	kmod-usb3 kmod-usb-dwc3 ath10k-firmware-qca4019-ct ethtool
+	kmod-usb3 kmod-usb-dwc3 ath10k-firmware-qca4019-ct \
+	automount autosamba luci-app-ipsec-vpnd luci-app-unblockmusic luci-app-cpufreq luci-app-zerotier luci-app-xlnetacc \
+	htop fdisk e2fsprogs ethtool

 $(eval $(call BuildTarget))
--- a/target/linux/ipq40xx/base-files/etc/hotplug.d/net/21_adjust_network
+++ b/target/linux/ipq40xx/base-files/etc/hotplug.d/net/21_adjust_network
+#!/bin/sh
+
+[ -f /lib/adjust_network.sh ] && {
+	. /lib/adjust_network.sh
+
+	adjust_eth_queue
+}
--- a/target/linux/ipq40xx/base-files/etc/init.d/adjust_network
+++ b/target/linux/ipq40xx/base-files/etc/init.d/adjust_network
+#!/bin/sh /etc/rc.common
+# Copyright (C) 2006-2011 OpenWrt.org
+
+START=11
+STOP=98
+
+adjust_smp_affinity() {
+	test -f /lib/adjust_network.sh && {
+		. /lib/adjust_network.sh
+
+		adjust_eth_queue
+		adjust_edma_smp_affinity
+		adjust_radio_smp_affinity
+	}
+}
+
+boot() {
+	adjust_smp_affinity
+}
--- a/target/linux/ipq40xx/base-files/lib/adjust_network.sh
+++ b/target/linux/ipq40xx/base-files/lib/adjust_network.sh
+#!/bin/sh
+# this scripts is used for adjust cpu's choice of interrupts.
+#
+
+################################################
+# Adjust smp_affinity of edma
+# Globals:
+#    None
+# Arguments:
+#    None
+# Returns:
+#    None
+# Remark:
+#    execute only once on start-up.
+################################################
+adjust_edma_smp_affinity() {
+	grep -q edma_eth_ /proc/interrupts || return 0
+	local nr=`cat /proc/cpuinfo | grep processor | wc -l`
+	local cpu=0
+	local tx_irq_num
+
+	for tx_num in `seq 0 1 15` ; do
+		cpu=`printf "%x" $((1<<((tx_num/4+0)%nr)))`
+		tx_irq_num=`grep -m1 edma_eth_tx$tx_num /proc/interrupts | cut -d ':' -f 1 | tail -n1 | tr -d ' '`
+		[ -n "$tx_irq_num" ] && echo $cpu > /proc/irq/$tx_irq_num/smp_affinity
+	done
+
+	for rx_num in `seq 0 1 7` ; do
+		cpu=`printf "%x" $((1<<((rx_num/2)%nr)))`
+		rx_irq_num=`grep -m1 edma_eth_rx$rx_num /proc/interrupts | cut -d ':' -f 1 | tail -n1 | tr -d ' '`
+		[ -n "$rx_irq_num" ] && echo $cpu > /proc/irq/$rx_irq_num/smp_affinity
+	done
+}
+
+################################################
+# Adjust smp_affinity of ath10k for 2G and 5G
+# Globals:
+#    None
+# Arguments:
+#    None
+# Returns:
+#    None
+# Remark:
+#    execute only once on start-up.
+################################################
+adjust_radio_smp_affinity() {
+	local irqs="`grep -E 'ath10k' /proc/interrupts | cut -d ':' -f 1 | tr -d ' '`"
+	local nr=`cat /proc/cpuinfo | grep processor | wc -l`
+	local idx=2
+
+	for irq in $irqs; do
+		cpu=`printf "%x" $((1<<((idx)%nr)))`
+		echo $cpu > /proc/irq/$irq/smp_affinity
+		idx=$((idx+1))
+	done
+}
+
+################################################
+# Adjust queue of eth
+# Globals:
+#    None
+# Arguments:
+#    None
+# Returns:
+#    None
+# Remark:
+#    Each network reboot needs to be executed.
+################################################
+adjust_eth_queue() {
+	local nr=`cat /proc/cpuinfo | grep processor | wc -l`
+	local idx=0
+
+	for epath in /sys/class/net/eth[0-9]*; do
+		test -e $epath || break
+		echo $epath | grep -q "\." && continue
+		eth=`basename $epath`
+		idx=0
+		for exps in /sys/class/net/$eth/queues/rx-[0-9]*/rps_cpus; do
+			test -e $exps || break
+			cpu=`printf "%x" $((1<<((idx+1)%nr)))`
+			idx=$((idx+1))
+			echo $cpu > $exps
+			echo 256 > `dirname $exps`/rps_flow_cnt
+		done
+		which ethtool >/dev/null 2>&1 && ethtool -K $eth gro off
+	done
+
+	echo 1024 > /proc/sys/net/core/rps_sock_flow_entries
+}
--- a/target/linux/ipq40xx/patches-4.19/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch
+++ b/target/linux/ipq40xx/patches-4.19/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch
+diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c
+index 724f355..7a16236 100644
+--- a/drivers/net/ethernet/qualcomm/essedma/edma.c
+++ b/drivers/net/ethernet/qualcomm/essedma/edma.c
+@@ -22,14 +22,6 @@ extern struct net_device *edma_netdev[EDMA_MAX_PORTID_SUPPORTED];
+ bool edma_stp_rstp;
+ u16 edma_ath_eth_type;
+ 
+-/* edma_skb_priority_offset()
+- * 	get edma skb priority
+- */
+-static unsigned int edma_skb_priority_offset(struct sk_buff *skb)
+-{
+-	return (skb->priority >> 2) & 1;
+-}
+-
+ /* edma_alloc_tx_ring()
+  *	Allocate Tx descriptors ring
+  */
+@@ -1014,13 +1006,14 @@ static inline u16 edma_tpd_available(struct edma_common_info *edma_cinfo,
+ /* edma_tx_queue_get()
+  *	Get the starting number of  the queue
+  */
+-static inline int edma_tx_queue_get(struct edma_adapter *adapter,
+static inline int edma_tx_queue_get(struct edma_common_info *edma_cinfo, struct edma_adapter *adapter,
+ 				   struct sk_buff *skb, int txq_id)
+ {
+ 	/* skb->priority is used as an index to skb priority table
+ 	 * and based on packet priority, correspong queue is assigned.
+	 * FIXME we just simple use jiffies for time base balance
+ 	 */
+-	return adapter->tx_start_offset[txq_id] + edma_skb_priority_offset(skb);
+	return adapter->tx_start_offset[txq_id] + (smp_processor_id() % edma_cinfo->num_txq_per_core_netdev);
+ }
+ 
+ /* edma_tx_update_hw_idx()
+@@ -1389,8 +1382,9 @@ netdev_tx_t edma_xmit(struct sk_buff *skb,
+ 	}
+ 
+ 	/* this will be one of the 4 TX queues exposed to linux kernel */
+-	txq_id = skb_get_queue_mapping(skb);
+-	queue_id = edma_tx_queue_get(adapter, skb, txq_id);
+	/* XXX what if num_online_cpus() > EDMA_CPU_CORES_SUPPORTED */
+	txq_id = ((jiffies >> 5) % (EDMA_CPU_CORES_SUPPORTED - 1) + smp_processor_id() + 1) % EDMA_CPU_CORES_SUPPORTED;
+	queue_id = edma_tx_queue_get(edma_cinfo, adapter, skb, txq_id);
+ 	etdr = edma_cinfo->tpd_ring[queue_id];
+ 	nq = netdev_get_tx_queue(net_dev, txq_id);
+ 
+@@ -1871,8 +1865,8 @@ void edma_free_irqs(struct edma_adapter *adapter)
+ 	int i, j;
+ 	int k = ((edma_cinfo->num_rx_queues == 4) ? 1 : 2);
+ 
+-	for (i = 0; i < CONFIG_NR_CPUS; i++) {
+-		for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + 4); j++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) {
+		for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + edma_cinfo->num_txq_per_core); j++)
+ 			free_irq(edma_cinfo->tx_irq[j], &edma_cinfo->edma_percpu_info[i]);
+ 
+ 		for (j = edma_cinfo->edma_percpu_info[i].rx_start; j < (edma_cinfo->edma_percpu_info[i].rx_start + k); j++)
+diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.h b/drivers/net/ethernet/qualcomm/essedma/edma.h
+index 015e5f5..abb0bd5 100644
+--- a/drivers/net/ethernet/qualcomm/essedma/edma.h
+++ b/drivers/net/ethernet/qualcomm/essedma/edma.h
+@@ -324,6 +324,7 @@ struct edma_common_info {
+ 	u32 from_cpu; /* from CPU TPD field */
+ 	u32 num_rxq_per_core; /* Rx queues per core */
+ 	u32 num_txq_per_core; /* Tx queues per core */
+	u32 num_txq_per_core_netdev; /* Tx queues per core per netdev */
+ 	u16 tx_ring_count; /* Tx ring count */
+ 	u16 rx_ring_count; /* Rx ring*/
+ 	u16 rx_head_buffer_len; /* rx buffer length */
+@@ -331,7 +332,7 @@ struct edma_common_info {
+ 	u32 page_mode; /* Jumbo frame supported flag */
+ 	u32 fraglist_mode; /* fraglist supported flag */
+ 	struct edma_hw hw; /* edma hw specific structure */
+-	struct edma_per_cpu_queues_info edma_percpu_info[CONFIG_NR_CPUS]; /* per cpu information */
+	struct edma_per_cpu_queues_info edma_percpu_info[EDMA_CPU_CORES_SUPPORTED]; /* per cpu information */
+ 	spinlock_t stats_lock; /* protect edma stats area for updation */
+ 	struct timer_list edma_stats_timer;
+ 	bool is_single_phy;
+@@ -401,7 +402,7 @@ struct edma_adapter {
+ 	u32 link_state; /* phy link state */
+ 	u32 phy_mdio_addr; /* PHY device address on MII interface */
+ 	u32 poll_required; /* check if link polling is required */
+-	u32 tx_start_offset[CONFIG_NR_CPUS]; /* tx queue start */
+	u32 tx_start_offset[EDMA_CPU_CORES_SUPPORTED]; /* tx queue start */
+ 	u32 default_vlan_tag; /* vlan tag */
+ 	u32 dp_bitmap;
+ 	uint8_t phy_id[MII_BUS_ID_SIZE + 3];
+diff --git a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
+index d53c63b..2d4770c 100644
+--- a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
+++ b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
+@@ -719,11 +719,7 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 	int i, j, k, err = 0;
+ 	int portid_bmp;
+ 	int idx = 0, idx_mac = 0;
+-
+-	if (CONFIG_NR_CPUS != EDMA_CPU_CORES_SUPPORTED) {
+-		dev_err(&pdev->dev, "Invalid CPU Cores\n");
+-		return -EINVAL;
+-	}
+	int netdev_group = 2;
+ 
+ 	if ((num_rxq != 4) && (num_rxq != 8)) {
+ 		dev_err(&pdev->dev, "Invalid RX queue, edma probe failed\n");
+@@ -747,7 +743,7 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 	/* Initialize the netdev array before allocation
+ 	 * to avoid double free
+ 	 */
+-	for (i = 0 ; i < edma_cinfo->num_gmac ; i++)
+	for (i = 0 ; i < EDMA_MAX_PORTID_SUPPORTED; i++)
+ 		edma_netdev[i] = NULL;
+ 
+ 	for (i = 0 ; i < edma_cinfo->num_gmac ; i++) {
+@@ -768,8 +764,11 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 
+ 	/* Fill ring details */
+ 	edma_cinfo->num_tx_queues = EDMA_MAX_TRANSMIT_QUEUE;
+-	edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / 4);
+	edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / num_online_cpus());
+	edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus());
+ 	edma_cinfo->tx_ring_count = EDMA_TX_RING_SIZE;
+	if (edma_cinfo->num_txq_per_core == 0)
+		edma_cinfo->num_txq_per_core = 1;
+ 
+ 	/* Update num rx queues based on module parameter */
+ 	edma_cinfo->num_rx_queues = num_rxq;
+@@ -939,6 +938,13 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 		idx_mac++;
+ 	}
+ 
+	if (edma_cinfo->num_gmac == 1) {
+		netdev_group = 1;
+		edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus());
+	}
+	if (edma_cinfo->num_txq_per_core_netdev == 0)
+		edma_cinfo->num_txq_per_core_netdev = 1;
+
+ 	/* Populate the adapter structure register the netdevice */
+ 	for (i = 0; i < edma_cinfo->num_gmac; i++) {
+ 		int k, m;
+@@ -946,17 +952,16 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 		adapter[i] = netdev_priv(edma_netdev[i]);
+ 		adapter[i]->netdev = edma_netdev[i];
+ 		adapter[i]->pdev = pdev;
+-		for (j = 0; j < CONFIG_NR_CPUS; j++) {
+-			m = i % 2;
+-			adapter[i]->tx_start_offset[j] =
+-				((j << EDMA_TX_CPU_START_SHIFT) + (m << 1));
+		for (j = 0; j < num_online_cpus() && j < EDMA_CPU_CORES_SUPPORTED; j++) {
+			m = i % netdev_group;
+			adapter[i]->tx_start_offset[j] = j * edma_cinfo->num_txq_per_core + m * edma_cinfo->num_txq_per_core_netdev;
+ 			/* Share the queues with available net-devices.
+ 			 * For instance , with 5 net-devices
+ 			 * eth0/eth2/eth4 will share q0,q1,q4,q5,q8,q9,q12,q13
+ 			 * and eth1/eth3 will get the remaining.
+ 			 */
+ 			for (k = adapter[i]->tx_start_offset[j]; k <
+-			     (adapter[i]->tx_start_offset[j] + 2); k++) {
+			     (adapter[i]->tx_start_offset[j] + edma_cinfo->num_txq_per_core_netdev); k++) {
+ 				if (edma_fill_netdev(edma_cinfo, k, i, j)) {
+ 					pr_err("Netdev overflow Error\n");
+ 					goto err_register;
+@@ -1109,9 +1114,12 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 	/* populate per_core_info, do a napi_Add, request 16 TX irqs,
+ 	 * 8 RX irqs, do a napi enable
+ 	 */
+-	for (i = 0; i < CONFIG_NR_CPUS; i++) {
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) {
+ 		u8 rx_start;
+ 
+		tx_mask[i] = (0xFFFF >> (16 - edma_cinfo->num_txq_per_core)) << (i * edma_cinfo->num_txq_per_core);
+		tx_start[i] = i * edma_cinfo->num_txq_per_core;
+
+ 		edma_cinfo->edma_percpu_info[i].napi.state = 0;
+ 
+ 		netif_napi_add(edma_netdev[0],
+@@ -1131,7 +1139,7 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 
+ 		/* Request irq per core */
+ 		for (j = edma_cinfo->edma_percpu_info[i].tx_start;
+-		     j < tx_start[i] + 4; j++) {
+		     j < tx_start[i] + edma_cinfo->num_txq_per_core; j++) {
+ 			sprintf(&edma_tx_irq[j][0], "edma_eth_tx%d", j);
+ 			err = request_irq(edma_cinfo->tx_irq[j],
+ 					  edma_interrupt,
+@@ -1253,7 +1261,7 @@ static int edma_axi_probe(struct platform_device *pdev)
+ #endif
+ err_rmap_add_fail:
+ 	edma_free_irqs(adapter[0]);
+-	for (i = 0; i < CONFIG_NR_CPUS; i++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++)
+ 		napi_disable(&edma_cinfo->edma_percpu_info[i].napi);
+ err_reset:
+ err_unregister_sysctl_tbl:
+@@ -1301,7 +1309,7 @@ static int edma_axi_remove(struct platform_device *pdev)
+ 		unregister_netdev(edma_netdev[i]);
+ 
+ 	edma_stop_rx_tx(hw);
+-	for (i = 0; i < CONFIG_NR_CPUS; i++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++)
+ 		napi_disable(&edma_cinfo->edma_percpu_info[i].napi);
+ 
+ 	edma_irq_disable(edma_cinfo);
--- a/target/linux/ipq40xx/patches-4.19/716-essedma-reduce-write-reg.patch
+++ b/target/linux/ipq40xx/patches-4.19/716-essedma-reduce-write-reg.patch
+diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c
+index fc274c8..e9d12a4 100644
+--- a/drivers/net/ethernet/qualcomm/essedma/edma.c
+++ b/drivers/net/ethernet/qualcomm/essedma/edma.c
+@@ -2075,15 +2075,13 @@ int edma_poll(struct napi_struct *napi, int budget)
+ 	int i, work_done = 0;
+ 	u16 rx_pending_fill;
+ 
+-	/* Store the Rx/Tx status by ANDing it with
+-	 * appropriate CPU RX?TX mask
+	/* Store the Tx status by ANDing it with
+	 * appropriate CPU TX mask
+ 	 */
+-	edma_read_reg(EDMA_REG_RX_ISR, &reg_data);
+-	edma_percpu_info->rx_status |= reg_data & edma_percpu_info->rx_mask;
+-	shadow_rx_status = edma_percpu_info->rx_status;
+ 	edma_read_reg(EDMA_REG_TX_ISR, &reg_data);
+ 	edma_percpu_info->tx_status |= reg_data & edma_percpu_info->tx_mask;
+ 	shadow_tx_status = edma_percpu_info->tx_status;
+	edma_write_reg(EDMA_REG_TX_ISR, shadow_tx_status);
+ 
+ 	/* Every core will have a start, which will be computed
+ 	 * in probe and stored in edma_percpu_info->tx_start variable.
+@@ -2098,6 +2096,14 @@ int edma_poll(struct napi_struct *napi, int budget)
+ 		edma_percpu_info->tx_status &= ~(1 << queue_id);
+ 	}
+ 
+	/* Store the Rx status by ANDing it with
+	 * appropriate CPU RX mask
+	 */
+	edma_read_reg(EDMA_REG_RX_ISR, &reg_data);
+	edma_percpu_info->rx_status |= reg_data & edma_percpu_info->rx_mask;
+	shadow_rx_status = edma_percpu_info->rx_status;
+	edma_write_reg(EDMA_REG_RX_ISR, shadow_rx_status);
+
+ 	/* Every core will have a start, which will be computed
+ 	 * in probe and stored in edma_percpu_info->tx_start variable.
+ 	 * We will shift the status bit by tx_start to obtain
+@@ -2122,15 +2128,6 @@ int edma_poll(struct napi_struct *napi, int budget)
+ 		}
+ 	}
+ 
+-	/* Clear the status register, to avoid the interrupts to
+-	 * reoccur.This clearing of interrupt status register is
+-	 * done here as writing to status register only takes place
+-	 * once the  producer/consumer index has been updated to
+-	 * reflect that the packet transmission/reception went fine.
+-	 */
+-	edma_write_reg(EDMA_REG_RX_ISR, shadow_rx_status);
+-	edma_write_reg(EDMA_REG_TX_ISR, shadow_tx_status);
+-
+ 	/* If budget not fully consumed, exit the polling mode */
+ 	if (likely(work_done < budget)) {
+ 		napi_complete(napi);
--- a/target/linux/ipq40xx/patches-5.4/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch
+++ b/target/linux/ipq40xx/patches-5.4/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch
+diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c
+index 724f355..7a16236 100644
+--- a/drivers/net/ethernet/qualcomm/essedma/edma.c
+++ b/drivers/net/ethernet/qualcomm/essedma/edma.c
+@@ -22,14 +22,6 @@ extern struct net_device *edma_netdev[EDMA_MAX_PORTID_SUPPORTED];
+ bool edma_stp_rstp;
+ u16 edma_ath_eth_type;
+ 
+-/* edma_skb_priority_offset()
+- * 	get edma skb priority
+- */
+-static unsigned int edma_skb_priority_offset(struct sk_buff *skb)
+-{
+-	return (skb->priority >> 2) & 1;
+-}
+-
+ /* edma_alloc_tx_ring()
+  *	Allocate Tx descriptors ring
+  */
+@@ -1014,13 +1006,14 @@ static inline u16 edma_tpd_available(struct edma_common_info *edma_cinfo,
+ /* edma_tx_queue_get()
+  *	Get the starting number of  the queue
+  */
+-static inline int edma_tx_queue_get(struct edma_adapter *adapter,
+static inline int edma_tx_queue_get(struct edma_common_info *edma_cinfo, struct edma_adapter *adapter,
+ 				   struct sk_buff *skb, int txq_id)
+ {
+ 	/* skb->priority is used as an index to skb priority table
+ 	 * and based on packet priority, correspong queue is assigned.
+	 * FIXME we just simple use jiffies for time base balance
+ 	 */
+-	return adapter->tx_start_offset[txq_id] + edma_skb_priority_offset(skb);
+	return adapter->tx_start_offset[txq_id] + (smp_processor_id() % edma_cinfo->num_txq_per_core_netdev);
+ }
+ 
+ /* edma_tx_update_hw_idx()
+@@ -1389,8 +1382,9 @@ netdev_tx_t edma_xmit(struct sk_buff *skb,
+ 	}
+ 
+ 	/* this will be one of the 4 TX queues exposed to linux kernel */
+-	txq_id = skb_get_queue_mapping(skb);
+-	queue_id = edma_tx_queue_get(adapter, skb, txq_id);
+	/* XXX what if num_online_cpus() > EDMA_CPU_CORES_SUPPORTED */
+	txq_id = ((jiffies >> 5) % (EDMA_CPU_CORES_SUPPORTED - 1) + smp_processor_id() + 1) % EDMA_CPU_CORES_SUPPORTED;
+	queue_id = edma_tx_queue_get(edma_cinfo, adapter, skb, txq_id);
+ 	etdr = edma_cinfo->tpd_ring[queue_id];
+ 	nq = netdev_get_tx_queue(net_dev, txq_id);
+ 
+@@ -1871,8 +1865,8 @@ void edma_free_irqs(struct edma_adapter *adapter)
+ 	int i, j;
+ 	int k = ((edma_cinfo->num_rx_queues == 4) ? 1 : 2);
+ 
+-	for (i = 0; i < CONFIG_NR_CPUS; i++) {
+-		for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + 4); j++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) {
+		for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + edma_cinfo->num_txq_per_core); j++)
+ 			free_irq(edma_cinfo->tx_irq[j], &edma_cinfo->edma_percpu_info[i]);
+ 
+ 		for (j = edma_cinfo->edma_percpu_info[i].rx_start; j < (edma_cinfo->edma_percpu_info[i].rx_start + k); j++)
+diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.h b/drivers/net/ethernet/qualcomm/essedma/edma.h
+index 015e5f5..abb0bd5 100644
+--- a/drivers/net/ethernet/qualcomm/essedma/edma.h
+++ b/drivers/net/ethernet/qualcomm/essedma/edma.h
+@@ -324,6 +324,7 @@ struct edma_common_info {
+ 	u32 from_cpu; /* from CPU TPD field */
+ 	u32 num_rxq_per_core; /* Rx queues per core */
+ 	u32 num_txq_per_core; /* Tx queues per core */
+	u32 num_txq_per_core_netdev; /* Tx queues per core per netdev */
+ 	u16 tx_ring_count; /* Tx ring count */
+ 	u16 rx_ring_count; /* Rx ring*/
+ 	u16 rx_head_buffer_len; /* rx buffer length */
+@@ -331,7 +332,7 @@ struct edma_common_info {
+ 	u32 page_mode; /* Jumbo frame supported flag */
+ 	u32 fraglist_mode; /* fraglist supported flag */
+ 	struct edma_hw hw; /* edma hw specific structure */
+-	struct edma_per_cpu_queues_info edma_percpu_info[CONFIG_NR_CPUS]; /* per cpu information */
+	struct edma_per_cpu_queues_info edma_percpu_info[EDMA_CPU_CORES_SUPPORTED]; /* per cpu information */
+ 	spinlock_t stats_lock; /* protect edma stats area for updation */
+ 	struct timer_list edma_stats_timer;
+ 	bool is_single_phy;
+@@ -401,7 +402,7 @@ struct edma_adapter {
+ 	u32 link_state; /* phy link state */
+ 	u32 phy_mdio_addr; /* PHY device address on MII interface */
+ 	u32 poll_required; /* check if link polling is required */
+-	u32 tx_start_offset[CONFIG_NR_CPUS]; /* tx queue start */
+	u32 tx_start_offset[EDMA_CPU_CORES_SUPPORTED]; /* tx queue start */
+ 	u32 default_vlan_tag; /* vlan tag */
+ 	u32 dp_bitmap;
+ 	uint8_t phy_id[MII_BUS_ID_SIZE + 3];
+diff --git a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
+index d53c63b..2d4770c 100644
+--- a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
+++ b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c
+@@ -719,11 +719,7 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 	int i, j, k, err = 0;
+ 	int portid_bmp;
+ 	int idx = 0, idx_mac = 0;
+-
+-	if (CONFIG_NR_CPUS != EDMA_CPU_CORES_SUPPORTED) {
+-		dev_err(&pdev->dev, "Invalid CPU Cores\n");
+-		return -EINVAL;
+-	}
+	int netdev_group = 2;
+ 
+ 	if ((num_rxq != 4) && (num_rxq != 8)) {
+ 		dev_err(&pdev->dev, "Invalid RX queue, edma probe failed\n");
+@@ -747,7 +743,7 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 	/* Initialize the netdev array before allocation
+ 	 * to avoid double free
+ 	 */
+-	for (i = 0 ; i < edma_cinfo->num_gmac ; i++)
+	for (i = 0 ; i < EDMA_MAX_PORTID_SUPPORTED; i++)
+ 		edma_netdev[i] = NULL;
+ 
+ 	for (i = 0 ; i < edma_cinfo->num_gmac ; i++) {
+@@ -768,8 +764,11 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 
+ 	/* Fill ring details */
+ 	edma_cinfo->num_tx_queues = EDMA_MAX_TRANSMIT_QUEUE;
+-	edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / 4);
+	edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / num_online_cpus());
+	edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus());
+ 	edma_cinfo->tx_ring_count = EDMA_TX_RING_SIZE;
+	if (edma_cinfo->num_txq_per_core == 0)
+		edma_cinfo->num_txq_per_core = 1;
+ 
+ 	/* Update num rx queues based on module parameter */
+ 	edma_cinfo->num_rx_queues = num_rxq;
+@@ -939,6 +938,13 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 		idx_mac++;
+ 	}
+ 
+	if (edma_cinfo->num_gmac == 1) {
+		netdev_group = 1;
+		edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus());
+	}
+	if (edma_cinfo->num_txq_per_core_netdev == 0)
+		edma_cinfo->num_txq_per_core_netdev = 1;
+
+ 	/* Populate the adapter structure register the netdevice */
+ 	for (i = 0; i < edma_cinfo->num_gmac; i++) {
+ 		int k, m;
+@@ -946,17 +952,16 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 		adapter[i] = netdev_priv(edma_netdev[i]);
+ 		adapter[i]->netdev = edma_netdev[i];
+ 		adapter[i]->pdev = pdev;
+-		for (j = 0; j < CONFIG_NR_CPUS; j++) {
+-			m = i % 2;
+-			adapter[i]->tx_start_offset[j] =
+-				((j << EDMA_TX_CPU_START_SHIFT) + (m << 1));
+		for (j = 0; j < num_online_cpus() && j < EDMA_CPU_CORES_SUPPORTED; j++) {
+			m = i % netdev_group;
+			adapter[i]->tx_start_offset[j] = j * edma_cinfo->num_txq_per_core + m * edma_cinfo->num_txq_per_core_netdev;
+ 			/* Share the queues with available net-devices.
+ 			 * For instance , with 5 net-devices
+ 			 * eth0/eth2/eth4 will share q0,q1,q4,q5,q8,q9,q12,q13
+ 			 * and eth1/eth3 will get the remaining.
+ 			 */
+ 			for (k = adapter[i]->tx_start_offset[j]; k <
+-			     (adapter[i]->tx_start_offset[j] + 2); k++) {
+			     (adapter[i]->tx_start_offset[j] + edma_cinfo->num_txq_per_core_netdev); k++) {
+ 				if (edma_fill_netdev(edma_cinfo, k, i, j)) {
+ 					pr_err("Netdev overflow Error\n");
+ 					goto err_register;
+@@ -1109,9 +1114,12 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 	/* populate per_core_info, do a napi_Add, request 16 TX irqs,
+ 	 * 8 RX irqs, do a napi enable
+ 	 */
+-	for (i = 0; i < CONFIG_NR_CPUS; i++) {
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) {
+ 		u8 rx_start;
+ 
+		tx_mask[i] = (0xFFFF >> (16 - edma_cinfo->num_txq_per_core)) << (i * edma_cinfo->num_txq_per_core);
+		tx_start[i] = i * edma_cinfo->num_txq_per_core;
+
+ 		edma_cinfo->edma_percpu_info[i].napi.state = 0;
+ 
+ 		netif_napi_add(edma_netdev[0],
+@@ -1131,7 +1139,7 @@ static int edma_axi_probe(struct platform_device *pdev)
+ 
+ 		/* Request irq per core */
+ 		for (j = edma_cinfo->edma_percpu_info[i].tx_start;
+-		     j < tx_start[i] + 4; j++) {
+		     j < tx_start[i] + edma_cinfo->num_txq_per_core; j++) {
+ 			sprintf(&edma_tx_irq[j][0], "edma_eth_tx%d", j);
+ 			err = request_irq(edma_cinfo->tx_irq[j],
+ 					  edma_interrupt,
+@@ -1253,7 +1261,7 @@ static int edma_axi_probe(struct platform_device *pdev)
+ #endif
+ err_rmap_add_fail:
+ 	edma_free_irqs(adapter[0]);
+-	for (i = 0; i < CONFIG_NR_CPUS; i++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++)
+ 		napi_disable(&edma_cinfo->edma_percpu_info[i].napi);
+ err_reset:
+ err_unregister_sysctl_tbl:
+@@ -1301,7 +1309,7 @@ static int edma_axi_remove(struct platform_device *pdev)
+ 		unregister_netdev(edma_netdev[i]);
+ 
+ 	edma_stop_rx_tx(hw);
+-	for (i = 0; i < CONFIG_NR_CPUS; i++)
+	for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++)
+ 		napi_disable(&edma_cinfo->edma_percpu_info[i].napi);
+ 
+ 	edma_irq_disable(edma_cinfo);
--- a/target/linux/ipq40xx/patches-5.4/716-essedma-reduce-write-reg.patch
+++ b/target/linux/ipq40xx/patches-5.4/716-essedma-reduce-write-reg.patch
+diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c
+index fc274c8..e9d12a4 100644
+--- a/drivers/net/ethernet/qualcomm/essedma/edma.c
+++ b/drivers/net/ethernet/qualcomm/essedma/edma.c
+@@ -2075,15 +2075,13 @@ int edma_poll(struct napi_struct *napi, int budget)
+ 	int i, work_done = 0;
+ 	u16 rx_pending_fill;
+ 
+-	/* Store the Rx/Tx status by ANDing it with
+-	 * appropriate CPU RX?TX mask
+	/* Store the Tx status by ANDing it with
+	 * appropriate CPU TX mask
+ 	 */
+-	edma_read_reg(EDMA_REG_RX_ISR, &reg_data);
+-	edma_percpu_info->rx_status |= reg_data & edma_percpu_info->rx_mask;
+-	shadow_rx_status = edma_percpu_info->rx_status;
+ 	edma_read_reg(EDMA_REG_TX_ISR, &reg_data);
+ 	edma_percpu_info->tx_status |= reg_data & edma_percpu_info->tx_mask;
+ 	shadow_tx_status = edma_percpu_info->tx_status;
+	edma_write_reg(EDMA_REG_TX_ISR, shadow_tx_status);
+ 
+ 	/* Every core will have a start, which will be computed
+ 	 * in probe and stored in edma_percpu_info->tx_start variable.
+@@ -2098,6 +2096,14 @@ int edma_poll(struct napi_struct *napi, int budget)
+ 		edma_percpu_info->tx_status &= ~(1 << queue_id);
+ 	}
+ 
+	/* Store the Rx status by ANDing it with
+	 * appropriate CPU RX mask
+	 */
+	edma_read_reg(EDMA_REG_RX_ISR, &reg_data);
+	edma_percpu_info->rx_status |= reg_data & edma_percpu_info->rx_mask;
+	shadow_rx_status = edma_percpu_info->rx_status;
+	edma_write_reg(EDMA_REG_RX_ISR, shadow_rx_status);
+
+ 	/* Every core will have a start, which will be computed
+ 	 * in probe and stored in edma_percpu_info->tx_start variable.
+ 	 * We will shift the status bit by tx_start to obtain
+@@ -2122,15 +2128,6 @@ int edma_poll(struct napi_struct *napi, int budget)
+ 		}
+ 	}
+ 
+-	/* Clear the status register, to avoid the interrupts to
+-	 * reoccur.This clearing of interrupt status register is
+-	 * done here as writing to status register only takes place
+-	 * once the  producer/consumer index has been updated to
+-	 * reflect that the packet transmission/reception went fine.
+-	 */
+-	edma_write_reg(EDMA_REG_RX_ISR, shadow_rx_status);
+-	edma_write_reg(EDMA_REG_TX_ISR, shadow_tx_status);
+-
+ 	/* If budget not fully consumed, exit the polling mode */
+ 	if (likely(work_done < budget)) {
+ 		napi_complete(napi);