提交 f13d1b48 编写于 作者: D David S. Miller

Merge branch 'netpoll-second-round-of-fixes'

Eric Dumazet says:

====================
netpoll: second round of fixes.

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC).

This capture, showing one ksoftirqd eating all cycles
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

It seems that all networking drivers that do use NAPI
for their TX completions, should not provide a ndo_poll_controller() :

Most NAPI drivers have netpoll support already handled
in core networking stack, since netpoll_poll_dev()
uses poll_napi(dev) to iterate through registered
NAPI contexts for a device.

First patch is a fix in poll_one_napi().

Then following patches take care of ten drivers.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -2185,25 +2185,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -2185,25 +2185,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
#ifdef CONFIG_NET_POLL_CONTROLLER
static void ena_netpoll(struct net_device *netdev)
{
struct ena_adapter *adapter = netdev_priv(netdev);
int i;
/* Dont schedule NAPI if the driver is in the middle of reset
* or netdev is down.
*/
if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) ||
test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
return;
for (i = 0; i < adapter->num_queues; i++)
napi_schedule(&adapter->ena_napi[i].napi);
}
#endif /* CONFIG_NET_POLL_CONTROLLER */
static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev, struct net_device *sb_dev,
select_queue_fallback_t fallback) select_queue_fallback_t fallback)
...@@ -2369,9 +2350,6 @@ static const struct net_device_ops ena_netdev_ops = { ...@@ -2369,9 +2350,6 @@ static const struct net_device_ops ena_netdev_ops = {
.ndo_change_mtu = ena_change_mtu, .ndo_change_mtu = ena_change_mtu,
.ndo_set_mac_address = NULL, .ndo_set_mac_address = NULL,
.ndo_validate_addr = eth_validate_addr, .ndo_validate_addr = eth_validate_addr,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = ena_netpoll,
#endif /* CONFIG_NET_POLL_CONTROLLER */
}; };
static int ena_device_validate_params(struct ena_adapter *adapter, static int ena_device_validate_params(struct ena_adapter *adapter,
......
...@@ -1503,21 +1503,6 @@ static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, ...@@ -1503,21 +1503,6 @@ static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
return phy_mii_ioctl(phy_dev, ifr, cmd); return phy_mii_ioctl(phy_dev, ifr, cmd);
} }
/* use only for netconsole to poll with the device without interrupt */
#ifdef CONFIG_NET_POLL_CONTROLLER
static void hns_nic_poll_controller(struct net_device *ndev)
{
struct hns_nic_priv *priv = netdev_priv(ndev);
unsigned long flags;
int i;
local_irq_save(flags);
for (i = 0; i < priv->ae_handle->q_num * 2; i++)
napi_schedule(&priv->ring_data[i].napi);
local_irq_restore(flags);
}
#endif
static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb, static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb,
struct net_device *ndev) struct net_device *ndev)
{ {
...@@ -1970,9 +1955,6 @@ static const struct net_device_ops hns_nic_netdev_ops = { ...@@ -1970,9 +1955,6 @@ static const struct net_device_ops hns_nic_netdev_ops = {
.ndo_set_features = hns_nic_set_features, .ndo_set_features = hns_nic_set_features,
.ndo_fix_features = hns_nic_fix_features, .ndo_fix_features = hns_nic_fix_features,
.ndo_get_stats64 = hns_nic_get_stats64, .ndo_get_stats64 = hns_nic_get_stats64,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = hns_nic_poll_controller,
#endif
.ndo_set_rx_mode = hns_nic_set_rx_mode, .ndo_set_rx_mode = hns_nic_set_rx_mode,
.ndo_select_queue = hns_nic_select_queue, .ndo_select_queue = hns_nic_select_queue,
}; };
......
...@@ -789,23 +789,6 @@ static void hinic_get_stats64(struct net_device *netdev, ...@@ -789,23 +789,6 @@ static void hinic_get_stats64(struct net_device *netdev,
stats->tx_errors = nic_tx_stats->tx_dropped; stats->tx_errors = nic_tx_stats->tx_dropped;
} }
#ifdef CONFIG_NET_POLL_CONTROLLER
static void hinic_netpoll(struct net_device *netdev)
{
struct hinic_dev *nic_dev = netdev_priv(netdev);
int i, num_qps;
num_qps = hinic_hwdev_num_qps(nic_dev->hwdev);
for (i = 0; i < num_qps; i++) {
struct hinic_txq *txq = &nic_dev->txqs[i];
struct hinic_rxq *rxq = &nic_dev->rxqs[i];
napi_schedule(&txq->napi);
napi_schedule(&rxq->napi);
}
}
#endif
static const struct net_device_ops hinic_netdev_ops = { static const struct net_device_ops hinic_netdev_ops = {
.ndo_open = hinic_open, .ndo_open = hinic_open,
.ndo_stop = hinic_close, .ndo_stop = hinic_close,
...@@ -818,9 +801,6 @@ static const struct net_device_ops hinic_netdev_ops = { ...@@ -818,9 +801,6 @@ static const struct net_device_ops hinic_netdev_ops = {
.ndo_start_xmit = hinic_xmit_frame, .ndo_start_xmit = hinic_xmit_frame,
.ndo_tx_timeout = hinic_tx_timeout, .ndo_tx_timeout = hinic_tx_timeout,
.ndo_get_stats64 = hinic_get_stats64, .ndo_get_stats64 = hinic_get_stats64,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = hinic_netpoll,
#endif
}; };
static void netdev_features_init(struct net_device *netdev) static void netdev_features_init(struct net_device *netdev)
......
...@@ -921,17 +921,6 @@ static int ehea_poll(struct napi_struct *napi, int budget) ...@@ -921,17 +921,6 @@ static int ehea_poll(struct napi_struct *napi, int budget)
return rx; return rx;
} }
#ifdef CONFIG_NET_POLL_CONTROLLER
static void ehea_netpoll(struct net_device *dev)
{
struct ehea_port *port = netdev_priv(dev);
int i;
for (i = 0; i < port->num_def_qps; i++)
napi_schedule(&port->port_res[i].napi);
}
#endif
static irqreturn_t ehea_recv_irq_handler(int irq, void *param) static irqreturn_t ehea_recv_irq_handler(int irq, void *param)
{ {
struct ehea_port_res *pr = param; struct ehea_port_res *pr = param;
...@@ -2953,9 +2942,6 @@ static const struct net_device_ops ehea_netdev_ops = { ...@@ -2953,9 +2942,6 @@ static const struct net_device_ops ehea_netdev_ops = {
.ndo_open = ehea_open, .ndo_open = ehea_open,
.ndo_stop = ehea_stop, .ndo_stop = ehea_stop,
.ndo_start_xmit = ehea_start_xmit, .ndo_start_xmit = ehea_start_xmit,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = ehea_netpoll,
#endif
.ndo_get_stats64 = ehea_get_stats64, .ndo_get_stats64 = ehea_get_stats64,
.ndo_set_mac_address = ehea_set_mac_addr, .ndo_set_mac_address = ehea_set_mac_addr,
.ndo_validate_addr = eth_validate_addr, .ndo_validate_addr = eth_validate_addr,
......
...@@ -2207,19 +2207,6 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget) ...@@ -2207,19 +2207,6 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
return frames_processed; return frames_processed;
} }
#ifdef CONFIG_NET_POLL_CONTROLLER
static void ibmvnic_netpoll_controller(struct net_device *dev)
{
struct ibmvnic_adapter *adapter = netdev_priv(dev);
int i;
replenish_pools(netdev_priv(dev));
for (i = 0; i < adapter->req_rx_queues; i++)
ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq,
adapter->rx_scrq[i]);
}
#endif
static int wait_for_reset(struct ibmvnic_adapter *adapter) static int wait_for_reset(struct ibmvnic_adapter *adapter)
{ {
int rc, ret; int rc, ret;
...@@ -2292,9 +2279,6 @@ static const struct net_device_ops ibmvnic_netdev_ops = { ...@@ -2292,9 +2279,6 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
.ndo_set_mac_address = ibmvnic_set_mac, .ndo_set_mac_address = ibmvnic_set_mac,
.ndo_validate_addr = eth_validate_addr, .ndo_validate_addr = eth_validate_addr,
.ndo_tx_timeout = ibmvnic_tx_timeout, .ndo_tx_timeout = ibmvnic_tx_timeout,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = ibmvnic_netpoll_controller,
#endif
.ndo_change_mtu = ibmvnic_change_mtu, .ndo_change_mtu = ibmvnic_change_mtu,
.ndo_features_check = ibmvnic_features_check, .ndo_features_check = ibmvnic_features_check,
}; };
......
...@@ -72,9 +72,6 @@ static void netxen_schedule_work(struct netxen_adapter *adapter, ...@@ -72,9 +72,6 @@ static void netxen_schedule_work(struct netxen_adapter *adapter,
work_func_t func, int delay); work_func_t func, int delay);
static void netxen_cancel_fw_work(struct netxen_adapter *adapter); static void netxen_cancel_fw_work(struct netxen_adapter *adapter);
static int netxen_nic_poll(struct napi_struct *napi, int budget); static int netxen_nic_poll(struct napi_struct *napi, int budget);
#ifdef CONFIG_NET_POLL_CONTROLLER
static void netxen_nic_poll_controller(struct net_device *netdev);
#endif
static void netxen_create_sysfs_entries(struct netxen_adapter *adapter); static void netxen_create_sysfs_entries(struct netxen_adapter *adapter);
static void netxen_remove_sysfs_entries(struct netxen_adapter *adapter); static void netxen_remove_sysfs_entries(struct netxen_adapter *adapter);
...@@ -581,9 +578,6 @@ static const struct net_device_ops netxen_netdev_ops = { ...@@ -581,9 +578,6 @@ static const struct net_device_ops netxen_netdev_ops = {
.ndo_tx_timeout = netxen_tx_timeout, .ndo_tx_timeout = netxen_tx_timeout,
.ndo_fix_features = netxen_fix_features, .ndo_fix_features = netxen_fix_features,
.ndo_set_features = netxen_set_features, .ndo_set_features = netxen_set_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = netxen_nic_poll_controller,
#endif
}; };
static inline bool netxen_function_zero(struct pci_dev *pdev) static inline bool netxen_function_zero(struct pci_dev *pdev)
...@@ -2402,23 +2396,6 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget) ...@@ -2402,23 +2396,6 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget)
return work_done; return work_done;
} }
#ifdef CONFIG_NET_POLL_CONTROLLER
static void netxen_nic_poll_controller(struct net_device *netdev)
{
int ring;
struct nx_host_sds_ring *sds_ring;
struct netxen_adapter *adapter = netdev_priv(netdev);
struct netxen_recv_context *recv_ctx = &adapter->recv_ctx;
disable_irq(adapter->irq);
for (ring = 0; ring < adapter->max_sds_rings; ring++) {
sds_ring = &recv_ctx->sds_rings[ring];
netxen_intr(adapter->irq, sds_ring);
}
enable_irq(adapter->irq);
}
#endif
static int static int
nx_incr_dev_ref_cnt(struct netxen_adapter *adapter) nx_incr_dev_ref_cnt(struct netxen_adapter *adapter)
{ {
......
...@@ -59,9 +59,6 @@ static int qlcnic_close(struct net_device *netdev); ...@@ -59,9 +59,6 @@ static int qlcnic_close(struct net_device *netdev);
static void qlcnic_tx_timeout(struct net_device *netdev); static void qlcnic_tx_timeout(struct net_device *netdev);
static void qlcnic_attach_work(struct work_struct *work); static void qlcnic_attach_work(struct work_struct *work);
static void qlcnic_fwinit_work(struct work_struct *work); static void qlcnic_fwinit_work(struct work_struct *work);
#ifdef CONFIG_NET_POLL_CONTROLLER
static void qlcnic_poll_controller(struct net_device *netdev);
#endif
static void qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding); static void qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding);
static int qlcnic_can_start_firmware(struct qlcnic_adapter *adapter); static int qlcnic_can_start_firmware(struct qlcnic_adapter *adapter);
...@@ -545,9 +542,6 @@ static const struct net_device_ops qlcnic_netdev_ops = { ...@@ -545,9 +542,6 @@ static const struct net_device_ops qlcnic_netdev_ops = {
.ndo_udp_tunnel_add = qlcnic_add_vxlan_port, .ndo_udp_tunnel_add = qlcnic_add_vxlan_port,
.ndo_udp_tunnel_del = qlcnic_del_vxlan_port, .ndo_udp_tunnel_del = qlcnic_del_vxlan_port,
.ndo_features_check = qlcnic_features_check, .ndo_features_check = qlcnic_features_check,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = qlcnic_poll_controller,
#endif
#ifdef CONFIG_QLCNIC_SRIOV #ifdef CONFIG_QLCNIC_SRIOV
.ndo_set_vf_mac = qlcnic_sriov_set_vf_mac, .ndo_set_vf_mac = qlcnic_sriov_set_vf_mac,
.ndo_set_vf_rate = qlcnic_sriov_set_vf_tx_rate, .ndo_set_vf_rate = qlcnic_sriov_set_vf_tx_rate,
...@@ -3200,45 +3194,6 @@ static irqreturn_t qlcnic_msix_tx_intr(int irq, void *data) ...@@ -3200,45 +3194,6 @@ static irqreturn_t qlcnic_msix_tx_intr(int irq, void *data)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
#ifdef CONFIG_NET_POLL_CONTROLLER
static void qlcnic_poll_controller(struct net_device *netdev)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
struct qlcnic_host_sds_ring *sds_ring;
struct qlcnic_recv_context *recv_ctx;
struct qlcnic_host_tx_ring *tx_ring;
int ring;
if (!test_bit(__QLCNIC_DEV_UP, &adapter->state))
return;
recv_ctx = adapter->recv_ctx;
for (ring = 0; ring < adapter->drv_sds_rings; ring++) {
sds_ring = &recv_ctx->sds_rings[ring];
qlcnic_disable_sds_intr(adapter, sds_ring);
napi_schedule(&sds_ring->napi);
}
if (adapter->flags & QLCNIC_MSIX_ENABLED) {
/* Only Multi-Tx queue capable devices need to
* schedule NAPI for TX rings
*/
if ((qlcnic_83xx_check(adapter) &&
(adapter->flags & QLCNIC_TX_INTR_SHARED)) ||
(qlcnic_82xx_check(adapter) &&
!qlcnic_check_multi_tx(adapter)))
return;
for (ring = 0; ring < adapter->drv_tx_rings; ring++) {
tx_ring = &adapter->tx_ring[ring];
qlcnic_disable_tx_intr(adapter, tx_ring);
napi_schedule(&tx_ring->napi);
}
}
}
#endif
static void static void
qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding) qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding)
{ {
......
...@@ -2206,29 +2206,6 @@ static void efx_fini_napi(struct efx_nic *efx) ...@@ -2206,29 +2206,6 @@ static void efx_fini_napi(struct efx_nic *efx)
efx_fini_napi_channel(channel); efx_fini_napi_channel(channel);
} }
/**************************************************************************
*
* Kernel netpoll interface
*
*************************************************************************/
#ifdef CONFIG_NET_POLL_CONTROLLER
/* Although in the common case interrupts will be disabled, this is not
* guaranteed. However, all our work happens inside the NAPI callback,
* so no locking is required.
*/
static void efx_netpoll(struct net_device *net_dev)
{
struct efx_nic *efx = netdev_priv(net_dev);
struct efx_channel *channel;
efx_for_each_channel(channel, efx)
efx_schedule_channel(channel);
}
#endif
/************************************************************************** /**************************************************************************
* *
* Kernel net device interface * Kernel net device interface
...@@ -2509,9 +2486,6 @@ static const struct net_device_ops efx_netdev_ops = { ...@@ -2509,9 +2486,6 @@ static const struct net_device_ops efx_netdev_ops = {
#endif #endif
.ndo_get_phys_port_id = efx_get_phys_port_id, .ndo_get_phys_port_id = efx_get_phys_port_id,
.ndo_get_phys_port_name = efx_get_phys_port_name, .ndo_get_phys_port_name = efx_get_phys_port_name,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = efx_netpoll,
#endif
.ndo_setup_tc = efx_setup_tc, .ndo_setup_tc = efx_setup_tc,
#ifdef CONFIG_RFS_ACCEL #ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = efx_filter_rfs, .ndo_rx_flow_steer = efx_filter_rfs,
......
...@@ -2052,29 +2052,6 @@ static void ef4_fini_napi(struct ef4_nic *efx) ...@@ -2052,29 +2052,6 @@ static void ef4_fini_napi(struct ef4_nic *efx)
ef4_fini_napi_channel(channel); ef4_fini_napi_channel(channel);
} }
/**************************************************************************
*
* Kernel netpoll interface
*
*************************************************************************/
#ifdef CONFIG_NET_POLL_CONTROLLER
/* Although in the common case interrupts will be disabled, this is not
* guaranteed. However, all our work happens inside the NAPI callback,
* so no locking is required.
*/
static void ef4_netpoll(struct net_device *net_dev)
{
struct ef4_nic *efx = netdev_priv(net_dev);
struct ef4_channel *channel;
ef4_for_each_channel(channel, efx)
ef4_schedule_channel(channel);
}
#endif
/************************************************************************** /**************************************************************************
* *
* Kernel net device interface * Kernel net device interface
...@@ -2250,9 +2227,6 @@ static const struct net_device_ops ef4_netdev_ops = { ...@@ -2250,9 +2227,6 @@ static const struct net_device_ops ef4_netdev_ops = {
.ndo_set_mac_address = ef4_set_mac_address, .ndo_set_mac_address = ef4_set_mac_address,
.ndo_set_rx_mode = ef4_set_rx_mode, .ndo_set_rx_mode = ef4_set_rx_mode,
.ndo_set_features = ef4_set_features, .ndo_set_features = ef4_set_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = ef4_netpoll,
#endif
.ndo_setup_tc = ef4_setup_tc, .ndo_setup_tc = ef4_setup_tc,
#ifdef CONFIG_RFS_ACCEL #ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = ef4_filter_rfs, .ndo_rx_flow_steer = ef4_filter_rfs,
......
...@@ -1699,17 +1699,6 @@ static void virtnet_stats(struct net_device *dev, ...@@ -1699,17 +1699,6 @@ static void virtnet_stats(struct net_device *dev,
tot->rx_frame_errors = dev->stats.rx_frame_errors; tot->rx_frame_errors = dev->stats.rx_frame_errors;
} }
#ifdef CONFIG_NET_POLL_CONTROLLER
static void virtnet_netpoll(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
int i;
for (i = 0; i < vi->curr_queue_pairs; i++)
napi_schedule(&vi->rq[i].napi);
}
#endif
static void virtnet_ack_link_announce(struct virtnet_info *vi) static void virtnet_ack_link_announce(struct virtnet_info *vi)
{ {
rtnl_lock(); rtnl_lock();
...@@ -2447,9 +2436,6 @@ static const struct net_device_ops virtnet_netdev = { ...@@ -2447,9 +2436,6 @@ static const struct net_device_ops virtnet_netdev = {
.ndo_get_stats64 = virtnet_stats, .ndo_get_stats64 = virtnet_stats,
.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = virtnet_netpoll,
#endif
.ndo_bpf = virtnet_xdp, .ndo_bpf = virtnet_xdp,
.ndo_xdp_xmit = virtnet_xdp_xmit, .ndo_xdp_xmit = virtnet_xdp_xmit,
.ndo_features_check = passthru_features_check, .ndo_features_check = passthru_features_check,
......
...@@ -135,27 +135,9 @@ static void queue_process(struct work_struct *work) ...@@ -135,27 +135,9 @@ static void queue_process(struct work_struct *work)
} }
} }
/*
* Check whether delayed processing was scheduled for our NIC. If so,
* we attempt to grab the poll lock and use ->poll() to pump the card.
* If this fails, either we've recursed in ->poll() or it's already
* running on another CPU.
*
* Note: we don't mask interrupts with this lock because we're using
* trylock here and interrupts are already disabled in the softirq
* case. Further, we test the poll_owner to avoid recursion on UP
* systems where the lock doesn't exist.
*/
static void poll_one_napi(struct napi_struct *napi) static void poll_one_napi(struct napi_struct *napi)
{ {
int work = 0; int work;
/* net_rx_action's ->poll() invocations and our's are
* synchronized by this test which is only made while
* holding the napi->poll_lock.
*/
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
return;
/* If we set this bit but see that it has already been set, /* If we set this bit but see that it has already been set,
* that indicates that napi has been disabled and we need * that indicates that napi has been disabled and we need
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册