mac80211: merge performance improvement patches
Fix fq_codel performance issues Add a new rx function for batch processing Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
		
							parent
							
								
									431b177afa
								
							
						
					
					
						commit
						3d731fc903
					
				@ -0,0 +1,186 @@
 | 
			
		||||
From: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
Date: Sat, 25 Jul 2020 20:53:23 +0200
 | 
			
		||||
Subject: [PATCH] mac80211: add a function for running rx without passing skbs
 | 
			
		||||
 to the stack
 | 
			
		||||
 | 
			
		||||
This can be used to run mac80211 rx processing on a batch of frames in NAPI
 | 
			
		||||
poll before passing them to the network stack in a large batch.
 | 
			
		||||
This can improve icache footprint, or it can be used to pass frames via
 | 
			
		||||
netif_receive_skb_list.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
--- a/include/net/mac80211.h
 | 
			
		||||
+++ b/include/net/mac80211.h
 | 
			
		||||
@@ -4358,6 +4358,31 @@ void ieee80211_free_hw(struct ieee80211_
 | 
			
		||||
 void ieee80211_restart_hw(struct ieee80211_hw *hw);
 | 
			
		||||
 
 | 
			
		||||
 /**
 | 
			
		||||
+ * ieee80211_rx_list - receive frame and store processed skbs in a list
 | 
			
		||||
+ *
 | 
			
		||||
+ * Use this function to hand received frames to mac80211. The receive
 | 
			
		||||
+ * buffer in @skb must start with an IEEE 802.11 header. In case of a
 | 
			
		||||
+ * paged @skb is used, the driver is recommended to put the ieee80211
 | 
			
		||||
+ * header of the frame on the linear part of the @skb to avoid memory
 | 
			
		||||
+ * allocation and/or memcpy by the stack.
 | 
			
		||||
+ *
 | 
			
		||||
+ * This function may not be called in IRQ context. Calls to this function
 | 
			
		||||
+ * for a single hardware must be synchronized against each other. Calls to
 | 
			
		||||
+ * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
 | 
			
		||||
+ * mixed for a single hardware. Must not run concurrently with
 | 
			
		||||
+ * ieee80211_tx_status() or ieee80211_tx_status_ni().
 | 
			
		||||
+ *
 | 
			
		||||
+ * This function must be called with BHs disabled and RCU read lock
 | 
			
		||||
+ *
 | 
			
		||||
+ * @hw: the hardware this frame came in on
 | 
			
		||||
+ * @sta: the station the frame was received from, or %NULL
 | 
			
		||||
+ * @skb: the buffer to receive, owned by mac80211 after this call
 | 
			
		||||
+ * @list: the destination list
 | 
			
		||||
+ */
 | 
			
		||||
+void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
 | 
			
		||||
+		       struct sk_buff *skb, struct list_head *list);
 | 
			
		||||
+
 | 
			
		||||
+/**
 | 
			
		||||
  * ieee80211_rx_napi - receive frame from NAPI context
 | 
			
		||||
  *
 | 
			
		||||
  * Use this function to hand received frames to mac80211. The receive
 | 
			
		||||
--- a/net/mac80211/ieee80211_i.h
 | 
			
		||||
+++ b/net/mac80211/ieee80211_i.h
 | 
			
		||||
@@ -218,7 +218,7 @@ enum ieee80211_rx_flags {
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
 struct ieee80211_rx_data {
 | 
			
		||||
-	struct napi_struct *napi;
 | 
			
		||||
+	struct list_head *list;
 | 
			
		||||
 	struct sk_buff *skb;
 | 
			
		||||
 	struct ieee80211_local *local;
 | 
			
		||||
 	struct ieee80211_sub_if_data *sdata;
 | 
			
		||||
--- a/net/mac80211/rx.c
 | 
			
		||||
+++ b/net/mac80211/rx.c
 | 
			
		||||
@@ -2552,8 +2552,8 @@ static void ieee80211_deliver_skb_to_loc
 | 
			
		||||
 		memset(skb->cb, 0, sizeof(skb->cb));
 | 
			
		||||
 
 | 
			
		||||
 		/* deliver to local stack */
 | 
			
		||||
-		if (rx->napi)
 | 
			
		||||
-			napi_gro_receive(rx->napi, skb);
 | 
			
		||||
+		if (rx->list)
 | 
			
		||||
+			list_add_tail(&skb->list, rx->list);
 | 
			
		||||
 		else
 | 
			
		||||
 			netif_receive_skb(skb);
 | 
			
		||||
 	}
 | 
			
		||||
@@ -3843,7 +3843,6 @@ void ieee80211_release_reorder_timeout(s
 | 
			
		||||
 		/* This is OK -- must be QoS data frame */
 | 
			
		||||
 		.security_idx = tid,
 | 
			
		||||
 		.seqno_idx = tid,
 | 
			
		||||
-		.napi = NULL, /* must be NULL to not have races */
 | 
			
		||||
 	};
 | 
			
		||||
 	struct tid_ampdu_rx *tid_agg_rx;
 | 
			
		||||
 
 | 
			
		||||
@@ -4453,8 +4452,8 @@ static bool ieee80211_invoke_fast_rx(str
 | 
			
		||||
 	/* deliver to local stack */
 | 
			
		||||
 	skb->protocol = eth_type_trans(skb, fast_rx->dev);
 | 
			
		||||
 	memset(skb->cb, 0, sizeof(skb->cb));
 | 
			
		||||
-	if (rx->napi)
 | 
			
		||||
-		napi_gro_receive(rx->napi, skb);
 | 
			
		||||
+	if (rx->list)
 | 
			
		||||
+		list_add_tail(&skb->list, rx->list);
 | 
			
		||||
 	else
 | 
			
		||||
 		netif_receive_skb(skb);
 | 
			
		||||
 
 | 
			
		||||
@@ -4521,7 +4520,7 @@ static bool ieee80211_prepare_and_rx_han
 | 
			
		||||
 static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 | 
			
		||||
 					 struct ieee80211_sta *pubsta,
 | 
			
		||||
 					 struct sk_buff *skb,
 | 
			
		||||
-					 struct napi_struct *napi)
 | 
			
		||||
+					 struct list_head *list)
 | 
			
		||||
 {
 | 
			
		||||
 	struct ieee80211_local *local = hw_to_local(hw);
 | 
			
		||||
 	struct ieee80211_sub_if_data *sdata;
 | 
			
		||||
@@ -4536,7 +4535,7 @@ static void __ieee80211_rx_handle_packet
 | 
			
		||||
 	memset(&rx, 0, sizeof(rx));
 | 
			
		||||
 	rx.skb = skb;
 | 
			
		||||
 	rx.local = local;
 | 
			
		||||
-	rx.napi = napi;
 | 
			
		||||
+	rx.list = list;
 | 
			
		||||
 
 | 
			
		||||
 	if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
 | 
			
		||||
 		I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
 | 
			
		||||
@@ -4644,8 +4643,8 @@ static void __ieee80211_rx_handle_packet
 | 
			
		||||
  * This is the receive path handler. It is called by a low level driver when an
 | 
			
		||||
  * 802.11 MPDU is received from the hardware.
 | 
			
		||||
  */
 | 
			
		||||
-void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
 | 
			
		||||
-		       struct sk_buff *skb, struct napi_struct *napi)
 | 
			
		||||
+void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
 | 
			
		||||
+		       struct sk_buff *skb, struct list_head *list)
 | 
			
		||||
 {
 | 
			
		||||
 	struct ieee80211_local *local = hw_to_local(hw);
 | 
			
		||||
 	struct ieee80211_rate *rate = NULL;
 | 
			
		||||
@@ -4737,36 +4736,53 @@ void ieee80211_rx_napi(struct ieee80211_
 | 
			
		||||
 	status->rx_flags = 0;
 | 
			
		||||
 
 | 
			
		||||
 	/*
 | 
			
		||||
-	 * key references and virtual interfaces are protected using RCU
 | 
			
		||||
-	 * and this requires that we are in a read-side RCU section during
 | 
			
		||||
-	 * receive processing
 | 
			
		||||
-	 */
 | 
			
		||||
-	rcu_read_lock();
 | 
			
		||||
-
 | 
			
		||||
-	/*
 | 
			
		||||
 	 * Frames with failed FCS/PLCP checksum are not returned,
 | 
			
		||||
 	 * all other frames are returned without radiotap header
 | 
			
		||||
 	 * if it was previously present.
 | 
			
		||||
 	 * Also, frames with less than 16 bytes are dropped.
 | 
			
		||||
 	 */
 | 
			
		||||
 	skb = ieee80211_rx_monitor(local, skb, rate);
 | 
			
		||||
-	if (!skb) {
 | 
			
		||||
-		rcu_read_unlock();
 | 
			
		||||
+	if (!skb)
 | 
			
		||||
 		return;
 | 
			
		||||
-	}
 | 
			
		||||
 
 | 
			
		||||
 	ieee80211_tpt_led_trig_rx(local,
 | 
			
		||||
 			((struct ieee80211_hdr *)skb->data)->frame_control,
 | 
			
		||||
 			skb->len);
 | 
			
		||||
 
 | 
			
		||||
-	__ieee80211_rx_handle_packet(hw, pubsta, skb, napi);
 | 
			
		||||
-
 | 
			
		||||
-	rcu_read_unlock();
 | 
			
		||||
+	__ieee80211_rx_handle_packet(hw, pubsta, skb, list);
 | 
			
		||||
 
 | 
			
		||||
 	return;
 | 
			
		||||
  drop:
 | 
			
		||||
 	kfree_skb(skb);
 | 
			
		||||
 }
 | 
			
		||||
+EXPORT_SYMBOL(ieee80211_rx_list);
 | 
			
		||||
+
 | 
			
		||||
+void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
 | 
			
		||||
+		       struct sk_buff *skb, struct napi_struct *napi)
 | 
			
		||||
+{
 | 
			
		||||
+	struct sk_buff *tmp;
 | 
			
		||||
+	LIST_HEAD(list);
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * key references and virtual interfaces are protected using RCU
 | 
			
		||||
+	 * and this requires that we are in a read-side RCU section during
 | 
			
		||||
+	 * receive processing
 | 
			
		||||
+	 */
 | 
			
		||||
+	rcu_read_lock();
 | 
			
		||||
+	ieee80211_rx_list(hw, pubsta, skb, &list);
 | 
			
		||||
+	rcu_read_unlock();
 | 
			
		||||
+
 | 
			
		||||
+	if (!napi) {
 | 
			
		||||
+		netif_receive_skb_list(&list);
 | 
			
		||||
+		return;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	list_for_each_entry_safe(skb, tmp, &list, list) {
 | 
			
		||||
+		skb_list_del_init(skb);
 | 
			
		||||
+		napi_gro_receive(napi, skb);
 | 
			
		||||
+	}
 | 
			
		||||
+}
 | 
			
		||||
 EXPORT_SYMBOL(ieee80211_rx_napi);
 | 
			
		||||
 
 | 
			
		||||
 /* This is a version of the rx handler that can be called from hard irq
 | 
			
		||||
@ -0,0 +1,55 @@
 | 
			
		||||
From: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
Date: Sun, 26 Jul 2020 14:37:02 +0200
 | 
			
		||||
Subject: [PATCH] net/fq_impl: use skb_get_hash instead of
 | 
			
		||||
 skb_get_hash_perturb
 | 
			
		||||
 | 
			
		||||
This avoids unnecessary regenerating of the skb flow hash
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
--- a/include/net/fq.h
 | 
			
		||||
+++ b/include/net/fq.h
 | 
			
		||||
@@ -69,15 +69,6 @@ struct fq {
 | 
			
		||||
 	struct list_head backlogs;
 | 
			
		||||
 	spinlock_t lock;
 | 
			
		||||
 	u32 flows_cnt;
 | 
			
		||||
-#if LINUX_VERSION_IS_GEQ(5,3,10) || \
 | 
			
		||||
-    LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \
 | 
			
		||||
-    LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \
 | 
			
		||||
-    LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \
 | 
			
		||||
-    LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0)
 | 
			
		||||
-	siphash_key_t	perturbation;
 | 
			
		||||
-#else
 | 
			
		||||
-	u32 perturbation;
 | 
			
		||||
-#endif
 | 
			
		||||
 	u32 limit;
 | 
			
		||||
 	u32 memory_limit;
 | 
			
		||||
 	u32 memory_usage;
 | 
			
		||||
--- a/include/net/fq_impl.h
 | 
			
		||||
+++ b/include/net/fq_impl.h
 | 
			
		||||
@@ -108,15 +108,7 @@ begin:
 | 
			
		||||
 
 | 
			
		||||
 static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
 | 
			
		||||
 {
 | 
			
		||||
-#if LINUX_VERSION_IS_GEQ(5,3,10) || \
 | 
			
		||||
-    LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \
 | 
			
		||||
-    LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \
 | 
			
		||||
-    LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \
 | 
			
		||||
-    LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0)
 | 
			
		||||
-	u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
 | 
			
		||||
-#else
 | 
			
		||||
-	u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
 | 
			
		||||
-#endif
 | 
			
		||||
+	u32 hash = skb_get_hash(skb);
 | 
			
		||||
 
 | 
			
		||||
 	return reciprocal_scale(hash, fq->flows_cnt);
 | 
			
		||||
 }
 | 
			
		||||
@@ -316,7 +308,6 @@ static int fq_init(struct fq *fq, int fl
 | 
			
		||||
 	INIT_LIST_HEAD(&fq->backlogs);
 | 
			
		||||
 	spin_lock_init(&fq->lock);
 | 
			
		||||
 	fq->flows_cnt = max_t(u32, flows_cnt, 1);
 | 
			
		||||
-	get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
 | 
			
		||||
 	fq->quantum = 300;
 | 
			
		||||
 	fq->limit = 8192;
 | 
			
		||||
 	fq->memory_limit = 16 << 20; /* 16 MBytes */
 | 
			
		||||
@ -0,0 +1,19 @@
 | 
			
		||||
From: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
Date: Sun, 26 Jul 2020 14:42:58 +0200
 | 
			
		||||
Subject: [PATCH] mac80211: calculcate skb hash early when using itxq
 | 
			
		||||
 | 
			
		||||
This avoids flow separation issues when using software encryption
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
--- a/net/mac80211/tx.c
 | 
			
		||||
+++ b/net/mac80211/tx.c
 | 
			
		||||
@@ -3937,6 +3937,7 @@ void __ieee80211_subif_start_xmit(struct
 | 
			
		||||
 	if (local->ops->wake_tx_queue) {
 | 
			
		||||
 		u16 queue = __ieee80211_select_queue(sdata, sta, skb);
 | 
			
		||||
 		skb_set_queue_mapping(skb, queue);
 | 
			
		||||
+		skb_get_hash(skb);
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
 	if (sta) {
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user