From 7ff35af5fef76c14c350d6327f8a6b86b3a3cbd4 Mon Sep 17 00:00:00 2001 From: Balamurugan S Date: Sat, 28 Jan 2023 10:57:05 +0530 Subject: [PATCH 1/2] ath12k: Performance optimization in Rx path optimized dp_service_srng(). There was multiple de reference to check various mask. This has been optimized to read in one shot to avoid cache miss. Added branch predictions in Rx processing. Changed napi_gro_recieve() to netif_receive_skb() in fast_rx path. Introduced cached entry to get the next descriptor in rx path. Signed-off-by: Balamurugan S --- drivers/net/wireless/ath/ath12k/dp.c | 48 +++++++++++++++++-------- drivers/net/wireless/ath/ath12k/dp_rx.c | 31 ++++++++-------- 2 files changed, 50 insertions(+), 29 deletions(-) Index: backports-20220822-5.4.213-ef7197996efe/drivers/net/wireless/ath/ath12k/dp.c =================================================================== --- backports-20220822-5.4.213-ef7197996efe.orig/drivers/net/wireless/ath/ath12k/dp.c +++ backports-20220822-5.4.213-ef7197996efe/drivers/net/wireless/ath/ath12k/dp.c @@ -927,15 +927,28 @@ int ath12k_dp_service_srng(struct ath12k int i = 0, j; int tot_work_done = 0; bool flag; + struct ath12k_hw_ring_mask *ring_mask = ab->hw_params->ring_mask; + u8 tx_mask = ring_mask->tx[grp_id]; + u8 rx_err_mask = ring_mask->rx_err[grp_id]; + u8 rx_wbm_rel_mask = ring_mask->rx_wbm_rel[grp_id]; + u8 rx_mask = ring_mask->rx[grp_id]; + u8 reo_status_mask = ring_mask->reo_status[grp_id]; + u8 host2rxdma_mask = ring_mask->host2rxdma[grp_id]; + u8 rx_mon_dest_mask = ring_mask->rx_mon_dest[grp_id]; + u8 tx_mon_dest_mask = ring_mask->tx_mon_dest[grp_id]; + u8 misc_intr_mask = rx_mon_dest_mask | + tx_mon_dest_mask | + reo_status_mask | + host2rxdma_mask; while (i < ab->hw_params->max_tx_ring) { - if (ab->hw_params->ring_mask->tx[grp_id] & - BIT(ab->hw_params->hal_ops->tcl_to_wbm_rbm_map[i].wbm_ring_num)) + if (tx_mask & + BIT(ab->hw_params->hal_ops->tcl_to_wbm_rbm_map[i].wbm_ring_num)) ath12k_dp_tx_completion_handler(ab, i); i++; } - - if (ab->hw_params->ring_mask->rx_err[grp_id]) { + + if (rx_err_mask) { work_done = ath12k_dp_rx_process_err(ab, napi, budget); budget -= work_done; tot_work_done += work_done; @@ -943,7 +956,7 @@ int ath12k_dp_service_srng(struct ath12k goto done; } - if (ab->hw_params->ring_mask->rx_wbm_rel[grp_id]) { + if (rx_wbm_rel_mask) { work_done = ath12k_dp_rx_process_wbm_err(ab, napi, budget); @@ -954,8 +967,8 @@ int ath12k_dp_service_srng(struct ath12k goto done; } - if (ab->hw_params->ring_mask->rx[grp_id]) { - i = fls(ab->hw_params->ring_mask->rx[grp_id]) - 1; + if (rx_mask) { + i = fls(rx_mask) - 1; work_done = ath12k_dp_rx_process(ab, i, napi, budget); budget -= work_done; @@ -964,14 +977,17 @@ int ath12k_dp_service_srng(struct ath12k goto done; } - if (ab->hw_params->ring_mask->rx_mon_dest[grp_id]) { + if (!misc_intr_mask) + goto done; + + if (rx_mon_dest_mask) { for (i = 0; i < ab->num_radios; i++) { for (j = 0; j < ab->hw_params->num_rxmda_per_pdev; j++) { int id = i * ab->hw_params->num_rxmda_per_pdev + j; flag = ATH12K_DP_RX_MONITOR_MODE; - if (ab->hw_params->ring_mask->rx_mon_dest[grp_id] & + if (rx_mon_dest_mask & BIT(id)) { work_done = ath12k_dp_mon_process_ring(ab, id, napi, budget, @@ -986,14 +1002,14 @@ int ath12k_dp_service_srng(struct ath12k } } - if (ab->hw_params->ring_mask->tx_mon_dest[grp_id]) { + if (tx_mon_dest_mask) { for (i = 0; i < ab->num_radios; i++) { for (j = 0; j < ab->hw_params->num_rxmda_per_pdev; j++) { int id = i * ab->hw_params->num_rxmda_per_pdev + j; flag = ATH12K_DP_TX_MONITOR_MODE; - if (ab->hw_params->ring_mask->tx_mon_dest[grp_id] & + if (tx_mon_dest_mask & BIT(id)) { work_done = ath12k_dp_mon_process_ring(ab, id, napi, budget, @@ -1008,10 +1024,10 @@ int ath12k_dp_service_srng(struct ath12k } } - if (ab->hw_params->ring_mask->reo_status[grp_id]) + if (reo_status_mask) ath12k_dp_rx_process_reo_status(ab); - if (ab->hw_params->ring_mask->host2rxdma[grp_id]) { + if (host2rxdma_mask) { struct ath12k_dp *dp = &ab->dp; struct dp_rxdma_ring *rx_ring = &dp->rx_refill_buf_ring; Index: backports-20220822-5.4.213-ef7197996efe/drivers/net/wireless/ath/ath12k/dp_rx.c =================================================================== --- backports-20220822-5.4.213-ef7197996efe.orig/drivers/net/wireless/ath/ath12k/dp_rx.c +++ backports-20220822-5.4.213-ef7197996efe/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -328,7 +328,7 @@ int ath12k_dp_rx_bufs_replenish(struct a while (num_remain > 0) { skb = dev_alloc_skb(DP_RX_BUFFER_SIZE + DP_RX_BUFFER_ALIGN_SIZE); - if (!skb) + if (unlikely(!skb)) break; if (!IS_ALIGNED((unsigned long)skb->data, @@ -344,7 +344,7 @@ int ath12k_dp_rx_bufs_replenish(struct a if (dma_mapping_error(ab->dev, paddr)) goto fail_free_skb; - if (hw_cc) { + if (likely(hw_cc)) { spin_lock_bh(&dp->rx_desc_lock); /* Get desc from free list and store in used list @@ -2671,7 +2671,6 @@ static void ath12k_dp_rx_h_mpdu(struct a u32 vp; #endif struct wireless_dev *wdev = NULL; - struct ath12k_sta *arsta = NULL; /* PN for multicast packets will be checked in mac80211 */ rxcb = ATH12K_SKB_RXCB(msdu); @@ -2705,11 +2704,7 @@ static void ath12k_dp_rx_h_mpdu(struct a } #endif msdu->protocol = eth_type_trans(msdu, msdu->dev); - napi_gro_receive(rxcb->napi, msdu); - if (peer->sta) - arsta = - (struct ath12k_sta *)peer->sta->drv_priv; - + netif_receive_skb(msdu); return; } } @@ -3212,6 +3207,7 @@ int ath12k_dp_rx_process(struct ath12k_b struct ath12k_peer *peer = NULL; struct ath12k *ar; u8 hw_link_id; + int valid_entries; __skb_queue_head_init(&msdu_list); @@ -3221,8 +3217,15 @@ int ath12k_dp_rx_process(struct ath12k_b try_again: ath12k_hal_srng_access_begin(ab, srng); + valid_entries = ath12k_hal_srng_dst_num_free(ab, srng, false); + if (unlikely(!valid_entries)) { + ath12k_hal_srng_access_end(ab, srng); + spin_unlock_bh(&srng->lock); + return -EINVAL; + } + ath12k_hal_srng_dst_invalidate_entry(ab, srng, valid_entries); - while ((rx_desc = ath12k_hal_srng_dst_get_next_entry(ab, srng))) { + while (likely((rx_desc = ath12k_hal_srng_dst_get_next_cache_entry(ab, srng)))) { struct hal_reo_dest_ring desc = *(struct hal_reo_dest_ring *)rx_desc; enum hal_reo_dest_ring_push_reason push_reason; u32 cookie; @@ -3260,7 +3263,7 @@ try_again: rcu_read_unlock(); /* retry manual desc retrieval */ - if (!desc_info) { + if (unlikely(!desc_info)) { desc_info = ath12k_dp_get_rx_desc(src_ab, cookie); if (!desc_info) { ath12k_warn(ab, "Rx with invalid buf cookie 0x%x\n", cookie); @@ -3268,7 +3271,7 @@ try_again: } } - if (desc_info->magic != ATH12K_DP_RX_DESC_MAGIC) + if (unlikely(desc_info->magic != ATH12K_DP_RX_DESC_MAGIC)) ath12k_warn(ab, "Check HW CC implementation"); msdu = desc_info->skb; @@ -3287,8 +3290,8 @@ try_again: push_reason = u32_get_bits(desc.info0, HAL_REO_DEST_RING_INFO0_PUSH_REASON); - if (push_reason != - HAL_REO_DEST_RING_PUSH_REASON_ROUTING_INSTRUCTION) { + if (unlikely(push_reason != + HAL_REO_DEST_RING_PUSH_REASON_ROUTING_INSTRUCTION)) { dev_kfree_skb_any(msdu); ab->soc_stats.hal_reo_error[dp->reo_dst_ring[ring_id].ring_id]++; continue; @@ -3346,7 +3349,7 @@ try_again: spin_unlock_bh(&srng->lock); - if (!total_msdu_reaped) + if (unlikely(!total_msdu_reaped)) goto exit; for (i = 0; i < ab->ag->num_chip; i++) {