diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 90ac95900a11c65222950105b76384c050afcf48..888d5513fa4ac8a46b5247103982bf1d34e4633d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,6 +314,7 @@ struct napi_struct { struct net_device *dev; struct sk_buff *gro_list; struct sk_buff *skb; + struct hrtimer timer; struct list_head dev_list; struct hlist_node napi_hash_node; unsigned int napi_id; @@ -443,14 +444,19 @@ static inline bool napi_reschedule(struct napi_struct *napi) return false; } +void __napi_complete(struct napi_struct *n); +void napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete * @n: napi context * * Mark NAPI processing as complete. + * Consider using napi_complete_done() instead. */ -void __napi_complete(struct napi_struct *n); -void napi_complete(struct napi_struct *n); +static inline void napi_complete(struct napi_struct *n) +{ + return napi_complete_done(n, 0); +} /** * napi_by_id - lookup a NAPI by napi_id @@ -485,14 +491,7 @@ void napi_hash_del(struct napi_struct *napi); * Stop NAPI from being scheduled on this context. * Waits till any outstanding processing completes. */ -static inline void napi_disable(struct napi_struct *n) -{ - might_sleep(); - set_bit(NAPI_STATE_DISABLE, &n->state); - while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) - msleep(1); - clear_bit(NAPI_STATE_DISABLE, &n->state); -} +void napi_disable(struct napi_struct *n); /** * napi_enable - enable NAPI scheduling @@ -1603,6 +1602,7 @@ struct net_device { #endif + unsigned long gro_flush_timeout; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; diff --git a/net/core/dev.c b/net/core/dev.c index 70bb609c283d54a396553fd19faeef8c4e4ae820..bb09b0364619fd577acc45c067211a33d8f0b142 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -134,6 +134,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -4412,7 +4413,6 @@ EXPORT_SYMBOL(__napi_schedule_irqoff); void __napi_complete(struct napi_struct *n) { BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - BUG_ON(n->gro_list); list_del_init(&n->poll_list); smp_mb__before_atomic(); @@ -4420,7 +4420,7 @@ void __napi_complete(struct napi_struct *n) } EXPORT_SYMBOL(__napi_complete); -void napi_complete(struct napi_struct *n) +void napi_complete_done(struct napi_struct *n, int work_done) { unsigned long flags; @@ -4431,8 +4431,18 @@ void napi_complete(struct napi_struct *n) if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) return; - napi_gro_flush(n, false); + if (n->gro_list) { + unsigned long timeout = 0; + if (work_done) + timeout = n->dev->gro_flush_timeout; + + if (timeout) + hrtimer_start(&n->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); + else + napi_gro_flush(n, false); + } if (likely(list_empty(&n->poll_list))) { WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state)); } else { @@ -4442,7 +4452,7 @@ void napi_complete(struct napi_struct *n) local_irq_restore(flags); } } -EXPORT_SYMBOL(napi_complete); +EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ struct napi_struct *napi_by_id(unsigned int napi_id) @@ -4496,10 +4506,23 @@ void napi_hash_del(struct napi_struct *napi) } EXPORT_SYMBOL_GPL(napi_hash_del); +static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) +{ + struct napi_struct *napi; + + napi = container_of(timer, struct napi_struct, timer); + if (napi->gro_list) + napi_schedule(napi); + + return HRTIMER_NORESTART; +} + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { INIT_LIST_HEAD(&napi->poll_list); + hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + napi->timer.function = napi_watchdog; napi->gro_count = 0; napi->gro_list = NULL; napi->skb = NULL; @@ -4518,6 +4541,20 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, } EXPORT_SYMBOL(netif_napi_add); +void napi_disable(struct napi_struct *n) +{ + might_sleep(); + set_bit(NAPI_STATE_DISABLE, &n->state); + + while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) + msleep(1); + + hrtimer_cancel(&n->timer); + + clear_bit(NAPI_STATE_DISABLE, &n->state); +} +EXPORT_SYMBOL(napi_disable); + void netif_napi_del(struct napi_struct *napi) { list_del_init(&napi->dev_list); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9dd06699b09c9434e60aa40a948adf69d5505d2d..1a24602cd54e63caa38e40cdedf8009944e0824b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -325,6 +325,23 @@ static ssize_t tx_queue_len_store(struct device *dev, } NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong); +static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) +{ + dev->gro_flush_timeout = val; + return 0; +} + +static ssize_t gro_flush_timeout_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + return netdev_store(dev, attr, buf, len, change_gro_flush_timeout); +} +NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong); + static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -422,6 +439,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_mtu.attr, &dev_attr_flags.attr, &dev_attr_tx_queue_len.attr, + &dev_attr_gro_flush_timeout.attr, &dev_attr_phys_port_id.attr, NULL, };