mirror of
				git://git.openwrt.org/openwrt/openwrt.git
				synced 2025-11-03 22:44:27 -05:00 
			
		
		
		
	Changelog: https://cdn.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.6.55 Added the following default ksym to target/linux/generic/config-6.6: CONFIG_PROC_MEM_ALWAYS_FORCE=y # CONFIG_PROC_MEM_FORCE_PTRACE is not set # CONFIG_PROC_MEM_NO_FORCE is not set Removed upstreamed: generic/backport-6.6/780-23-v6.12-r8169-Fix-spelling-mistake-tx_underun-tx_underrun.patch[1] generic/backport-6.6/780-25-v6.12-r8169-add-tally-counter-fields-added-with-RTL8125.patch[2] generic/pending-6.6/684-gso-fix-gso-fraglist-segmentation-after-pull-from-fr.patch[3] lantiq/patches-6.6/0025-v6.12-net-ethernet-lantiq_etop-fix-memory-disclosure.patch[4] Manually rebased: bcm27xx/patches-6.6/950-0086-Main-bcm2708-bcm2709-linux-port.patch bcm27xx/patches-6.6/950-0998-i2c-designware-Add-support-for-bus-clear-feature.patch All other patches automatically rebased. 1. https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v6.6.56&id=f02fcb7283b1c25f7e3ae07d7a2c830e06eb1a62 2. https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v6.6.56&id=1c723d785adb711496bc64c24240f952f4faaabf 3. https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v6.6.56&id=af3122f5fdc0d00581d6e598a668df6bf54c9daa 4. https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v6.6.56&id=e66e38d07b31e177ca430758ed97fbc79f27d966 Build system: x86/64 Build-tested: x86/64/AMD Cezanne, flogic/xiaomi_redmi-router-ax6000-ubootmod, ramips/tplink_archer-a6-v3 Run-tested: x86/64/AMD Cezanne, flogic/xiaomi_redmi-router-ax6000-ubootmod, ramips/tplink_archer-a6-v3 Signed-off-by: John Audia <therealgraysky@proton.me> Link: https://github.com/openwrt/openwrt/pull/16655 Signed-off-by: Nick Hainke <vincent@systemli.org>
		
			
				
	
	
		
			331 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			331 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
From dad6b97702639fba27a2bd3e986982ad6f0db3a7 Mon Sep 17 00:00:00 2001
 | 
						|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 | 
						|
Date: Mon, 25 Mar 2024 08:40:29 +0100
 | 
						|
Subject: [PATCH 2/4] net: Allow to use SMP threads for backlog NAPI.
 | 
						|
 | 
						|
Backlog NAPI is a per-CPU NAPI struct only (with no device behind it)
 | 
						|
used by drivers which don't do NAPI them self, RPS and parts of the
 | 
						|
stack which need to avoid recursive deadlocks while processing a packet.
 | 
						|
 | 
						|
The non-NAPI driver use the CPU local backlog NAPI. If RPS is enabled
 | 
						|
then a flow for the skb is computed and based on the flow the skb can be
 | 
						|
enqueued on a remote CPU. Scheduling/ raising the softirq (for backlog's
 | 
						|
NAPI) on the remote CPU isn't trivial because the softirq is only
 | 
						|
scheduled on the local CPU and performed after the hardirq is done.
 | 
						|
In order to schedule a softirq on the remote CPU, an IPI is sent to the
 | 
						|
remote CPU which schedules the backlog-NAPI on the then local CPU.
 | 
						|
 | 
						|
On PREEMPT_RT interrupts are force-threaded. The soft interrupts are
 | 
						|
raised within the interrupt thread and processed after the interrupt
 | 
						|
handler completed still within the context of the interrupt thread. The
 | 
						|
softirq is handled in the context where it originated.
 | 
						|
 | 
						|
With force-threaded interrupts enabled, ksoftirqd is woken up if a
 | 
						|
softirq is raised from hardirq context. This is the case if it is raised
 | 
						|
from an IPI. Additionally there is a warning on PREEMPT_RT if the
 | 
						|
softirq is raised from the idle thread.
 | 
						|
This was done for two reasons:
 | 
						|
- With threaded interrupts the processing should happen in thread
 | 
						|
  context (where it originated) and ksoftirqd is the only thread for
 | 
						|
  this context if raised from hardirq. Using the currently running task
 | 
						|
  instead would "punish" a random task.
 | 
						|
- Once ksoftirqd is active it consumes all further softirqs until it
 | 
						|
  stops running. This changed recently and is no longer the case.
 | 
						|
 | 
						|
Instead of keeping the backlog NAPI in ksoftirqd (in force-threaded/
 | 
						|
PREEMPT_RT setups) I am proposing NAPI-threads for backlog.
 | 
						|
The "proper" setup with threaded-NAPI is not doable because the threads
 | 
						|
are not pinned to an individual CPU and can be modified by the user.
 | 
						|
Additionally a dummy network device would have to be assigned. Also
 | 
						|
CPU-hotplug has to be considered if additional CPUs show up.
 | 
						|
All this can be probably done/ solved but the smpboot-threads already
 | 
						|
provide this infrastructure.
 | 
						|
 | 
						|
Sending UDP packets over loopback expects that the packet is processed
 | 
						|
within the call. Delaying it by handing it over to the thread hurts
 | 
						|
performance. It is not beneficial to the outcome if the context switch
 | 
						|
happens immediately after enqueue or after a while to process a few
 | 
						|
packets in a batch.
 | 
						|
There is no need to always use the thread if the backlog NAPI is
 | 
						|
requested on the local CPU. This restores the loopback throuput. The
 | 
						|
performance drops mostly to the same value after enabling RPS on the
 | 
						|
loopback comparing the IPI and the tread result.
 | 
						|
 | 
						|
Create NAPI-threads for backlog if request during boot. The thread runs
 | 
						|
the inner loop from napi_threaded_poll(), the wait part is different. It
 | 
						|
checks for NAPI_STATE_SCHED (the backlog NAPI can not be disabled).
 | 
						|
 | 
						|
The NAPI threads for backlog are optional, it has to be enabled via the boot
 | 
						|
argument "thread_backlog_napi". It is mandatory for PREEMPT_RT to avoid the
 | 
						|
wakeup of ksoftirqd from the IPI.
 | 
						|
 | 
						|
Acked-by: Jakub Kicinski <kuba@kernel.org>
 | 
						|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 | 
						|
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
 | 
						|
---
 | 
						|
 net/core/dev.c | 148 +++++++++++++++++++++++++++++++++++++------------
 | 
						|
 1 file changed, 113 insertions(+), 35 deletions(-)
 | 
						|
 | 
						|
--- a/net/core/dev.c
 | 
						|
+++ b/net/core/dev.c
 | 
						|
@@ -78,6 +78,7 @@
 | 
						|
 #include <linux/slab.h>
 | 
						|
 #include <linux/sched.h>
 | 
						|
 #include <linux/sched/mm.h>
 | 
						|
+#include <linux/smpboot.h>
 | 
						|
 #include <linux/mutex.h>
 | 
						|
 #include <linux/rwsem.h>
 | 
						|
 #include <linux/string.h>
 | 
						|
@@ -217,6 +218,31 @@ static inline struct hlist_head *dev_ind
 | 
						|
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 | 
						|
 }
 | 
						|
 
 | 
						|
+#ifndef CONFIG_PREEMPT_RT
 | 
						|
+
 | 
						|
+static DEFINE_STATIC_KEY_FALSE(use_backlog_threads_key);
 | 
						|
+
 | 
						|
+static int __init setup_backlog_napi_threads(char *arg)
 | 
						|
+{
 | 
						|
+	static_branch_enable(&use_backlog_threads_key);
 | 
						|
+	return 0;
 | 
						|
+}
 | 
						|
+early_param("thread_backlog_napi", setup_backlog_napi_threads);
 | 
						|
+
 | 
						|
+static bool use_backlog_threads(void)
 | 
						|
+{
 | 
						|
+	return static_branch_unlikely(&use_backlog_threads_key);
 | 
						|
+}
 | 
						|
+
 | 
						|
+#else
 | 
						|
+
 | 
						|
+static bool use_backlog_threads(void)
 | 
						|
+{
 | 
						|
+	return true;
 | 
						|
+}
 | 
						|
+
 | 
						|
+#endif
 | 
						|
+
 | 
						|
 static inline void rps_lock_irqsave(struct softnet_data *sd,
 | 
						|
 				    unsigned long *flags)
 | 
						|
 {
 | 
						|
@@ -4445,6 +4471,7 @@ EXPORT_SYMBOL(__dev_direct_xmit);
 | 
						|
 /*************************************************************************
 | 
						|
  *			Receiver routines
 | 
						|
  *************************************************************************/
 | 
						|
+static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
 | 
						|
 
 | 
						|
 int netdev_max_backlog __read_mostly = 1000;
 | 
						|
 EXPORT_SYMBOL(netdev_max_backlog);
 | 
						|
@@ -4477,12 +4504,16 @@ static inline void ____napi_schedule(str
 | 
						|
 		 */
 | 
						|
 		thread = READ_ONCE(napi->thread);
 | 
						|
 		if (thread) {
 | 
						|
+			if (use_backlog_threads() && thread == raw_cpu_read(backlog_napi))
 | 
						|
+				goto use_local_napi;
 | 
						|
+
 | 
						|
 			set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
 | 
						|
 			wake_up_process(thread);
 | 
						|
 			return;
 | 
						|
 		}
 | 
						|
 	}
 | 
						|
 
 | 
						|
+use_local_napi:
 | 
						|
 	list_add_tail(&napi->poll_list, &sd->poll_list);
 | 
						|
 	WRITE_ONCE(napi->list_owner, smp_processor_id());
 | 
						|
 	/* If not called from net_rx_action()
 | 
						|
@@ -4728,6 +4759,11 @@ static void napi_schedule_rps(struct sof
 | 
						|
 
 | 
						|
 #ifdef CONFIG_RPS
 | 
						|
 	if (sd != mysd) {
 | 
						|
+		if (use_backlog_threads()) {
 | 
						|
+			__napi_schedule_irqoff(&sd->backlog);
 | 
						|
+			return;
 | 
						|
+		}
 | 
						|
+
 | 
						|
 		sd->rps_ipi_next = mysd->rps_ipi_list;
 | 
						|
 		mysd->rps_ipi_list = sd;
 | 
						|
 
 | 
						|
@@ -5951,7 +5987,7 @@ static void net_rps_action_and_irq_enabl
 | 
						|
 #ifdef CONFIG_RPS
 | 
						|
 	struct softnet_data *remsd = sd->rps_ipi_list;
 | 
						|
 
 | 
						|
-	if (remsd) {
 | 
						|
+	if (!use_backlog_threads() && remsd) {
 | 
						|
 		sd->rps_ipi_list = NULL;
 | 
						|
 
 | 
						|
 		local_irq_enable();
 | 
						|
@@ -5966,7 +6002,7 @@ static void net_rps_action_and_irq_enabl
 | 
						|
 static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
 | 
						|
 {
 | 
						|
 #ifdef CONFIG_RPS
 | 
						|
-	return sd->rps_ipi_list != NULL;
 | 
						|
+	return !use_backlog_threads() && sd->rps_ipi_list;
 | 
						|
 #else
 | 
						|
 	return false;
 | 
						|
 #endif
 | 
						|
@@ -6010,7 +6046,7 @@ static int process_backlog(struct napi_s
 | 
						|
 			 * We can use a plain write instead of clear_bit(),
 | 
						|
 			 * and we dont need an smp_mb() memory barrier.
 | 
						|
 			 */
 | 
						|
-			napi->state = 0;
 | 
						|
+			napi->state &= NAPIF_STATE_THREADED;
 | 
						|
 			again = false;
 | 
						|
 		} else {
 | 
						|
 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
 | 
						|
@@ -6676,43 +6712,48 @@ static void skb_defer_free_flush(struct
 | 
						|
 	}
 | 
						|
 }
 | 
						|
 
 | 
						|
-static int napi_threaded_poll(void *data)
 | 
						|
+static void napi_threaded_poll_loop(struct napi_struct *napi)
 | 
						|
 {
 | 
						|
-	struct napi_struct *napi = data;
 | 
						|
 	struct softnet_data *sd;
 | 
						|
-	void *have;
 | 
						|
+	unsigned long last_qs = jiffies;
 | 
						|
 
 | 
						|
-	while (!napi_thread_wait(napi)) {
 | 
						|
-		unsigned long last_qs = jiffies;
 | 
						|
+	for (;;) {
 | 
						|
+		bool repoll = false;
 | 
						|
+		void *have;
 | 
						|
 
 | 
						|
-		for (;;) {
 | 
						|
-			bool repoll = false;
 | 
						|
+		local_bh_disable();
 | 
						|
+		sd = this_cpu_ptr(&softnet_data);
 | 
						|
+		sd->in_napi_threaded_poll = true;
 | 
						|
 
 | 
						|
-			local_bh_disable();
 | 
						|
-			sd = this_cpu_ptr(&softnet_data);
 | 
						|
-			sd->in_napi_threaded_poll = true;
 | 
						|
-
 | 
						|
-			have = netpoll_poll_lock(napi);
 | 
						|
-			__napi_poll(napi, &repoll);
 | 
						|
-			netpoll_poll_unlock(have);
 | 
						|
-
 | 
						|
-			sd->in_napi_threaded_poll = false;
 | 
						|
-			barrier();
 | 
						|
-
 | 
						|
-			if (sd_has_rps_ipi_waiting(sd)) {
 | 
						|
-				local_irq_disable();
 | 
						|
-				net_rps_action_and_irq_enable(sd);
 | 
						|
-			}
 | 
						|
-			skb_defer_free_flush(sd);
 | 
						|
-			local_bh_enable();
 | 
						|
+		have = netpoll_poll_lock(napi);
 | 
						|
+		__napi_poll(napi, &repoll);
 | 
						|
+		netpoll_poll_unlock(have);
 | 
						|
+
 | 
						|
+		sd->in_napi_threaded_poll = false;
 | 
						|
+		barrier();
 | 
						|
+
 | 
						|
+		if (sd_has_rps_ipi_waiting(sd)) {
 | 
						|
+			local_irq_disable();
 | 
						|
+			net_rps_action_and_irq_enable(sd);
 | 
						|
+		}
 | 
						|
+		skb_defer_free_flush(sd);
 | 
						|
+		local_bh_enable();
 | 
						|
 
 | 
						|
-			if (!repoll)
 | 
						|
-				break;
 | 
						|
+		if (!repoll)
 | 
						|
+			break;
 | 
						|
 
 | 
						|
-			rcu_softirq_qs_periodic(last_qs);
 | 
						|
-			cond_resched();
 | 
						|
-		}
 | 
						|
+		rcu_softirq_qs_periodic(last_qs);
 | 
						|
+		cond_resched();
 | 
						|
 	}
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int napi_threaded_poll(void *data)
 | 
						|
+{
 | 
						|
+	struct napi_struct *napi = data;
 | 
						|
+
 | 
						|
+	while (!napi_thread_wait(napi))
 | 
						|
+		napi_threaded_poll_loop(napi);
 | 
						|
+
 | 
						|
 	return 0;
 | 
						|
 }
 | 
						|
 
 | 
						|
@@ -11293,7 +11334,7 @@ static int dev_cpu_dead(unsigned int old
 | 
						|
 
 | 
						|
 		list_del_init(&napi->poll_list);
 | 
						|
 		if (napi->poll == process_backlog)
 | 
						|
-			napi->state = 0;
 | 
						|
+			napi->state &= NAPIF_STATE_THREADED;
 | 
						|
 		else
 | 
						|
 			____napi_schedule(sd, napi);
 | 
						|
 	}
 | 
						|
@@ -11301,12 +11342,14 @@ static int dev_cpu_dead(unsigned int old
 | 
						|
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 | 
						|
 	local_irq_enable();
 | 
						|
 
 | 
						|
+	if (!use_backlog_threads()) {
 | 
						|
 #ifdef CONFIG_RPS
 | 
						|
-	remsd = oldsd->rps_ipi_list;
 | 
						|
-	oldsd->rps_ipi_list = NULL;
 | 
						|
+		remsd = oldsd->rps_ipi_list;
 | 
						|
+		oldsd->rps_ipi_list = NULL;
 | 
						|
 #endif
 | 
						|
-	/* send out pending IPI's on offline CPU */
 | 
						|
-	net_rps_send_ipi(remsd);
 | 
						|
+		/* send out pending IPI's on offline CPU */
 | 
						|
+		net_rps_send_ipi(remsd);
 | 
						|
+	}
 | 
						|
 
 | 
						|
 	/* Process offline CPU's input_pkt_queue */
 | 
						|
 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
 | 
						|
@@ -11569,6 +11612,38 @@ static struct pernet_operations __net_in
 | 
						|
  *
 | 
						|
  */
 | 
						|
 
 | 
						|
+static int backlog_napi_should_run(unsigned int cpu)
 | 
						|
+{
 | 
						|
+	struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
 | 
						|
+	struct napi_struct *napi = &sd->backlog;
 | 
						|
+
 | 
						|
+	return test_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static void run_backlog_napi(unsigned int cpu)
 | 
						|
+{
 | 
						|
+	struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
 | 
						|
+
 | 
						|
+	napi_threaded_poll_loop(&sd->backlog);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static void backlog_napi_setup(unsigned int cpu)
 | 
						|
+{
 | 
						|
+	struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
 | 
						|
+	struct napi_struct *napi = &sd->backlog;
 | 
						|
+
 | 
						|
+	napi->thread = this_cpu_read(backlog_napi);
 | 
						|
+	set_bit(NAPI_STATE_THREADED, &napi->state);
 | 
						|
+}
 | 
						|
+
 | 
						|
+static struct smp_hotplug_thread backlog_threads = {
 | 
						|
+	.store			= &backlog_napi,
 | 
						|
+	.thread_should_run	= backlog_napi_should_run,
 | 
						|
+	.thread_fn		= run_backlog_napi,
 | 
						|
+	.thread_comm		= "backlog_napi/%u",
 | 
						|
+	.setup			= backlog_napi_setup,
 | 
						|
+};
 | 
						|
+
 | 
						|
 /*
 | 
						|
  *       This is called single threaded during boot, so no need
 | 
						|
  *       to take the rtnl semaphore.
 | 
						|
@@ -11619,7 +11694,10 @@ static int __init net_dev_init(void)
 | 
						|
 		init_gro_hash(&sd->backlog);
 | 
						|
 		sd->backlog.poll = process_backlog;
 | 
						|
 		sd->backlog.weight = weight_p;
 | 
						|
+		INIT_LIST_HEAD(&sd->backlog.poll_list);
 | 
						|
 	}
 | 
						|
+	if (use_backlog_threads())
 | 
						|
+		smpboot_register_percpu_thread(&backlog_threads);
 | 
						|
 
 | 
						|
 	dev_boot_phase = 0;
 | 
						|
 
 |