mirror of
				git://git.openwrt.org/openwrt/openwrt.git
				synced 2025-10-29 13:04:27 -04:00 
			
		
		
		
	update kernel version, refresh current patchset Signed-off-by: Kabuli Chana <newtownBuild@gmail.com> [rebase/refresh] Signed-off-by: Adrian Schmutzler <freifunk@adrianschmutzler.de>
		
			
				
	
	
		
			286 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			286 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| From: Felix Fietkau <nbd@nbd.name>
 | |
| Date: Sun, 26 Jul 2020 14:03:21 +0200
 | |
| Subject: [PATCH] net: add support for threaded NAPI polling
 | |
| 
 | |
| For some drivers (especially 802.11 drivers), doing a lot of work in the NAPI
 | |
| poll function does not perform well. Since NAPI poll is bound to the CPU it
 | |
| was scheduled from, we can easily end up with a few very busy CPUs spending
 | |
| most of their time in softirq/ksoftirqd and some idle ones.
 | |
| 
 | |
| Introduce threaded NAPI for such drivers based on a workqueue. The API is the
 | |
| same except for using netif_threaded_napi_add instead of netif_napi_add.
 | |
| 
 | |
| In my tests with mt76 on MT7621 using threaded NAPI + a thread for tx scheduling
 | |
| improves LAN->WLAN bridging throughput by 10-50%. Throughput without threaded
 | |
| NAPI is wildly inconsistent, depending on the CPU that runs the tx scheduling
 | |
| thread.
 | |
| 
 | |
| With threaded NAPI, throughput seems stable and consistent (and higher than
 | |
| the best results I got without it).
 | |
| 
 | |
| Based on a patch by Hillf Danton
 | |
| 
 | |
| Cc: Hillf Danton <hdanton@sina.com>
 | |
| Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | |
| ---
 | |
| 
 | |
| --- a/include/linux/netdevice.h
 | |
| +++ b/include/linux/netdevice.h
 | |
| @@ -347,6 +347,7 @@ struct napi_struct {
 | |
|  	struct list_head	dev_list;
 | |
|  	struct hlist_node	napi_hash_node;
 | |
|  	unsigned int		napi_id;
 | |
| +	struct work_struct	work;
 | |
|  };
 | |
|  
 | |
|  enum {
 | |
| @@ -357,6 +358,7 @@ enum {
 | |
|  	NAPI_STATE_LISTED,	/* NAPI added to system lists */
 | |
|  	NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
 | |
|  	NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
 | |
| +	NAPI_STATE_THREADED,	/* Use threaded NAPI */
 | |
|  };
 | |
|  
 | |
|  enum {
 | |
| @@ -367,6 +369,7 @@ enum {
 | |
|  	NAPIF_STATE_LISTED	 = BIT(NAPI_STATE_LISTED),
 | |
|  	NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
 | |
|  	NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
 | |
| +	NAPIF_STATE_THREADED	 = BIT(NAPI_STATE_THREADED),
 | |
|  };
 | |
|  
 | |
|  enum gro_result {
 | |
| @@ -2411,6 +2414,26 @@ void netif_napi_add(struct net_device *d
 | |
|  		    int (*poll)(struct napi_struct *, int), int weight);
 | |
|  
 | |
|  /**
 | |
| + *	netif_threaded_napi_add - initialize a NAPI context
 | |
| + *	@dev:  network device
 | |
| + *	@napi: NAPI context
 | |
| + *	@poll: polling function
 | |
| + *	@weight: default weight
 | |
| + *
 | |
| + * This variant of netif_napi_add() should be used from drivers using NAPI
 | |
| + * with CPU intensive poll functions.
 | |
| + * This will schedule polling from a high priority workqueue
 | |
| + */
 | |
| +static inline void netif_threaded_napi_add(struct net_device *dev,
 | |
| +					   struct napi_struct *napi,
 | |
| +					   int (*poll)(struct napi_struct *, int),
 | |
| +					   int weight)
 | |
| +{
 | |
| +	set_bit(NAPI_STATE_THREADED, &napi->state);
 | |
| +	netif_napi_add(dev, napi, poll, weight);
 | |
| +}
 | |
| +
 | |
| +/**
 | |
|   *	netif_tx_napi_add - initialize a NAPI context
 | |
|   *	@dev:  network device
 | |
|   *	@napi: NAPI context
 | |
| --- a/net/core/dev.c
 | |
| +++ b/net/core/dev.c
 | |
| @@ -159,6 +159,7 @@ static DEFINE_SPINLOCK(offload_lock);
 | |
|  struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 | |
|  struct list_head ptype_all __read_mostly;	/* Taps */
 | |
|  static struct list_head offload_base __read_mostly;
 | |
| +static struct workqueue_struct *napi_workq __read_mostly;
 | |
|  
 | |
|  static int netif_rx_internal(struct sk_buff *skb);
 | |
|  static int call_netdevice_notifiers_info(unsigned long val,
 | |
| @@ -6407,6 +6408,11 @@ void __napi_schedule(struct napi_struct
 | |
|  {
 | |
|  	unsigned long flags;
 | |
|  
 | |
| +	if (test_bit(NAPI_STATE_THREADED, &n->state)) {
 | |
| +		queue_work(napi_workq, &n->work);
 | |
| +		return;
 | |
| +	}
 | |
| +
 | |
|  	local_irq_save(flags);
 | |
|  	____napi_schedule(this_cpu_ptr(&softnet_data), n);
 | |
|  	local_irq_restore(flags);
 | |
| @@ -6454,6 +6460,11 @@ EXPORT_SYMBOL(napi_schedule_prep);
 | |
|   */
 | |
|  void __napi_schedule_irqoff(struct napi_struct *n)
 | |
|  {
 | |
| +	if (test_bit(NAPI_STATE_THREADED, &n->state)) {
 | |
| +		queue_work(napi_workq, &n->work);
 | |
| +		return;
 | |
| +	}
 | |
| +
 | |
|  	____napi_schedule(this_cpu_ptr(&softnet_data), n);
 | |
|  }
 | |
|  EXPORT_SYMBOL(__napi_schedule_irqoff);
 | |
| @@ -6715,6 +6726,86 @@ static void init_gro_hash(struct napi_st
 | |
|  	napi->gro_bitmask = 0;
 | |
|  }
 | |
|  
 | |
| +static int __napi_poll(struct napi_struct *n, bool *repoll)
 | |
| +{
 | |
| +	int work, weight;
 | |
| +
 | |
| +	weight = n->weight;
 | |
| +
 | |
| +	/* This NAPI_STATE_SCHED test is for avoiding a race
 | |
| +	 * with netpoll's poll_napi().  Only the entity which
 | |
| +	 * obtains the lock and sees NAPI_STATE_SCHED set will
 | |
| +	 * actually make the ->poll() call.  Therefore we avoid
 | |
| +	 * accidentally calling ->poll() when NAPI is not scheduled.
 | |
| +	 */
 | |
| +	work = 0;
 | |
| +	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
 | |
| +		work = n->poll(n, weight);
 | |
| +		trace_napi_poll(n, work, weight);
 | |
| +	}
 | |
| +
 | |
| +	if (unlikely(work > weight))
 | |
| +		pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
 | |
| +			    n->poll, work, weight);
 | |
| +
 | |
| +	if (likely(work < weight))
 | |
| +		return work;
 | |
| +
 | |
| +	/* Drivers must not modify the NAPI state if they
 | |
| +	 * consume the entire weight.  In such cases this code
 | |
| +	 * still "owns" the NAPI instance and therefore can
 | |
| +	 * move the instance around on the list at-will.
 | |
| +	 */
 | |
| +	if (unlikely(napi_disable_pending(n))) {
 | |
| +		napi_complete(n);
 | |
| +		return work;
 | |
| +	}
 | |
| +
 | |
| +	if (n->gro_bitmask) {
 | |
| +		/* flush too old packets
 | |
| +		 * If HZ < 1000, flush all packets.
 | |
| +		 */
 | |
| +		napi_gro_flush(n, HZ >= 1000);
 | |
| +	}
 | |
| +
 | |
| +	gro_normal_list(n);
 | |
| +
 | |
| +	*repoll = true;
 | |
| +
 | |
| +	return work;
 | |
| +}
 | |
| +
 | |
| +static void napi_workfn(struct work_struct *work)
 | |
| +{
 | |
| +	struct napi_struct *n = container_of(work, struct napi_struct, work);
 | |
| +	void *have;
 | |
| +
 | |
| +	for (;;) {
 | |
| +		bool repoll = false;
 | |
| +
 | |
| +		local_bh_disable();
 | |
| +
 | |
| +		have = netpoll_poll_lock(n);
 | |
| +		__napi_poll(n, &repoll);
 | |
| +		netpoll_poll_unlock(have);
 | |
| +
 | |
| +		local_bh_enable();
 | |
| +
 | |
| +		if (!repoll)
 | |
| +			return;
 | |
| +
 | |
| +		if (!need_resched())
 | |
| +			continue;
 | |
| +
 | |
| +		/*
 | |
| +		 * have to pay for the latency of task switch even if
 | |
| +		 * napi is scheduled
 | |
| +		 */
 | |
| +		queue_work(napi_workq, work);
 | |
| +		return;
 | |
| +	}
 | |
| +}
 | |
| +
 | |
|  void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 | |
|  		    int (*poll)(struct napi_struct *, int), int weight)
 | |
|  {
 | |
| @@ -6738,6 +6829,7 @@ void netif_napi_add(struct net_device *d
 | |
|  #ifdef CONFIG_NETPOLL
 | |
|  	napi->poll_owner = -1;
 | |
|  #endif
 | |
| +	INIT_WORK(&napi->work, napi_workfn);
 | |
|  	set_bit(NAPI_STATE_SCHED, &napi->state);
 | |
|  	set_bit(NAPI_STATE_NPSVC, &napi->state);
 | |
|  	list_add_rcu(&napi->dev_list, &dev->napi_list);
 | |
| @@ -6780,6 +6872,7 @@ void __netif_napi_del(struct napi_struct
 | |
|  	if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
 | |
|  		return;
 | |
|  
 | |
| +	cancel_work_sync(&napi->work);
 | |
|  	napi_hash_del(napi);
 | |
|  	list_del_rcu(&napi->dev_list);
 | |
|  	napi_free_frags(napi);
 | |
| @@ -6791,53 +6884,19 @@ EXPORT_SYMBOL(__netif_napi_del);
 | |
|  
 | |
|  static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 | |
|  {
 | |
| +	bool do_repoll = false;
 | |
|  	void *have;
 | |
| -	int work, weight;
 | |
| +	int work;
 | |
|  
 | |
|  	list_del_init(&n->poll_list);
 | |
|  
 | |
|  	have = netpoll_poll_lock(n);
 | |
|  
 | |
| -	weight = n->weight;
 | |
| +	work = __napi_poll(n, &do_repoll);
 | |
|  
 | |
| -	/* This NAPI_STATE_SCHED test is for avoiding a race
 | |
| -	 * with netpoll's poll_napi().  Only the entity which
 | |
| -	 * obtains the lock and sees NAPI_STATE_SCHED set will
 | |
| -	 * actually make the ->poll() call.  Therefore we avoid
 | |
| -	 * accidentally calling ->poll() when NAPI is not scheduled.
 | |
| -	 */
 | |
| -	work = 0;
 | |
| -	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
 | |
| -		work = n->poll(n, weight);
 | |
| -		trace_napi_poll(n, work, weight);
 | |
| -	}
 | |
| -
 | |
| -	if (unlikely(work > weight))
 | |
| -		pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
 | |
| -			    n->poll, work, weight);
 | |
| -
 | |
| -	if (likely(work < weight))
 | |
| +	if (!do_repoll)
 | |
|  		goto out_unlock;
 | |
|  
 | |
| -	/* Drivers must not modify the NAPI state if they
 | |
| -	 * consume the entire weight.  In such cases this code
 | |
| -	 * still "owns" the NAPI instance and therefore can
 | |
| -	 * move the instance around on the list at-will.
 | |
| -	 */
 | |
| -	if (unlikely(napi_disable_pending(n))) {
 | |
| -		napi_complete(n);
 | |
| -		goto out_unlock;
 | |
| -	}
 | |
| -
 | |
| -	if (n->gro_bitmask) {
 | |
| -		/* flush too old packets
 | |
| -		 * If HZ < 1000, flush all packets.
 | |
| -		 */
 | |
| -		napi_gro_flush(n, HZ >= 1000);
 | |
| -	}
 | |
| -
 | |
| -	gro_normal_list(n);
 | |
| -
 | |
|  	/* Some drivers may have called napi_schedule
 | |
|  	 * prior to exhausting their budget.
 | |
|  	 */
 | |
| @@ -11291,6 +11350,10 @@ static int __init net_dev_init(void)
 | |
|  		sd->backlog.weight = weight_p;
 | |
|  	}
 | |
|  
 | |
| +	napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI,
 | |
| +				     WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS);
 | |
| +	BUG_ON(!napi_workq);
 | |
| +
 | |
|  	dev_boot_phase = 0;
 | |
|  
 | |
|  	/* The loopback device is special if any other network devices
 |