aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/iavf/iavf_main.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2025-01-22 16:28:57 +0000
committerLinus Torvalds <[email protected]>2025-01-22 16:28:57 +0000
commit0ad9617c78acbc71373fb341a6f75d4012b01d69 (patch)
tree602d7c9ec86d9a4891a96a2996af6e4368a647eb /drivers/net/ethernet/intel/iavf/iavf_main.c
parentcachestat: fix page cache statistics permission checking (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net (diff)
downloadkernel-0ad9617c78acbc71373fb341a6f75d4012b01d69.tar.gz
kernel-0ad9617c78acbc71373fb341a6f75d4012b01d69.zip
Merge tag 'net-next-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "This is slightly smaller than usual, with the most interesting work being still around RTNL scope reduction. Core: - More core refactoring to reduce the RTNL lock contention, including preparatory work for the per-network namespace RTNL lock, replacing RTNL lock with a per device-one to protect NAPI-related net device data and moving synchronize_net() calls outside such lock. - Extend drop reasons usage, adding net scheduler, AF_UNIX, bridge and more specific TCP coverage. - Reduce network namespace tear-down time by removing per-subsystems synchronize_net() in tipc and sched. - Add flow label selector support for fib rules, allowing traffic redirection based on such header field. Netfilter: - Do not remove netdev basechain when last device is gone, allowing netdev basechains without devices. - Revisit the flowtable teardown strategy, dealing better with fin, reset and re-open events. - Scale-up IP-vs connection dumping by avoiding linear search on each restart. Protocols: - A significant XDP socket refactor, consolidating and optimizing several helpers into the core - Better scaling of ICMP rate-limiting, by removing false-sharing in inet peers handling. - Introduces netlink notifications for multicast IPv4 and IPv6 address changes. - Add ipsec support for IP-TFS/AggFrag encapsulation, allowing aggregation and fragmentation of the inner IP. - Add sysctl to configure TIME-WAIT reuse delay for TCP sockets, to avoid local port exhaustion issues when the average connection lifetime is very short. - Support updating keys (re-keying) for connections using kernel TLS (for TLS 1.3 only). - Support ipv4-mapped ipv6 address clients in smc-r v2. - Add support for jumbo data packet transmission in RxRPC sockets, gluing multiple data packets in a single UDP packet. - Support RxRPC RACK-TLP to manage packet loss and retransmission in conjunction with the congestion control algorithm. Driver API: - Introduce a unified and structured interface for reporting PHY statistics, exposing consistent data across different H/W via ethtool. - Make timestamping selectable, allow the user to select the desired hwtstamp provider (PHY or MAC) administratively. - Add support for configuring a header-data-split threshold (HDS) value via ethtool, to deal with partial or buggy H/W implementation. - Consolidate DSA drivers Energy Efficiency Ethernet support. - Add EEE management to phylink, making use of the phylib implementation. - Add phylib support for in-band capabilities negotiation. - Simplify how phylib-enabled mac drivers expose the supported interfaces. Tests and tooling: - Make the YNL tool package-friendly to make it easier to deploy it separately from the kernel. - Increase TCP selftest coverage importing several packetdrill test-cases. - Regenerate the ethtool uapi header from the YNL spec, to ease maintenance and future development. - Add YNL support for decoding the link types used in net self-tests, allowing a single build to run both net and drivers/net. Drivers: - Ethernet high-speed NICs: - nVidia/Mellanox (mlx5): - add cross E-Switch QoS support - add SW Steering support for ConnectX-8 - implement support for HW-Managed Flow Steering, improving the rule deletion/insertion rate - support for multi-host LAG - Intel (ixgbe, ice, igb): - ice: add support for devlink health events - ixgbe: add initial support for E610 chipset variant - igb: add support for AF_XDP zero-copy - Meta: - add support for basic RSS config - allow changing the number of channels - add hardware monitoring support - Broadcom (bnxt): - implement TCP data split and HDS threshold ethtool support, enabling Device Memory TCP. - Marvell Octeon: - implement egress ipsec offload support for the cn10k family - Hisilicon (HIBMC): - implement unicast MAC filtering - Ethernet NICs embedded and virtual: - Convert UDP tunnel drivers to NETDEV_PCPU_STAT_DSTATS, avoiding contented atomic operations for drop counters - Freescale: - quicc: phylink conversion - enetc: support Tx and Rx checksum offload and improve TSO performances - MediaTek: - airoha: introduce support for ETS and HTB Qdisc offload - Microchip: - lan78XX USB: preparation work for phylink conversion - Synopsys (stmmac): - support DWMAC IP on NXP Automotive SoCs S32G2xx/S32G3xx/S32R45 - refactor EEE support to leverage the new driver API - optimize DMA and cache access to increase raw RX performances by 40% - TI: - icssg-prueth: add multicast filtering support for VLAN interface - netkit: - add ability to configure head/tailroom - VXLAN: - accepts packets with user-defined reserved bit - Ethernet switches: - Microchip: - lan969x: add RGMII support - lan969x: improve TX and RX performance using the FDMA engine - nVidia/Mellanox: - move Tx header handling to PCI driver, to ease XDP support - Ethernet PHYs: - Texas Instruments DP83822: - add support for GPIO2 clock output - Realtek: - 8169: add support for RTL8125D rev.b - rtl822x: add hwmon support for the temperature sensor - Microchip: - add support for RDS PTP hardware - consolidate periodic output signal generation - CAN: - several DT-bindings to DT schema conversions - tcan4x5x: - add HW standby support - support nWKRQ voltage selection - kvaser: - allowing Bus Error Reporting runtime configuration - WiFi: - the on-going Multi-Link Operation (MLO) effort continues, affecting both the stack and in drivers - mac80211/cfg80211: - Emergency Preparedness Communication Services (EPCS) station mode support - support for adding and removing station links for MLO - add support for WiFi 7/EHT mesh over 320 MHz channels - report Tx power info for each link - RealTek (rtw88): - enable USB Rx aggregation and USB 3 to improve performance - LED support - RealTek (rtw89): - refactor power save to support Multi-Link Operations - add support for RTL8922AE-VS variant - MediaTek (mt76): - single wiphy multiband support (preparation for MLO) - p2p device support - add TP-Link TXE50UH USB adapter support - Qualcomm (ath10k): - support for the QCA6698AQ IP core - Qualcomm (ath12k): - enable MLO for QCN9274 - Bluetooth: - Allow sysfs to trigger hdev reset, to allow recovering devices not responsive from user-space - MediaTek: add support for MT7922, MT7925, MT7921e devices - Realtek: add support for RTL8851BE devices - Qualcomm: add support for WCN785x devices - ISO: allow BIG re-sync" * tag 'net-next-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1386 commits) net/rose: prevent integer overflows in rose_setsockopt() net: phylink: fix regression when binding a PHY net: ethernet: ti: am65-cpsw: streamline TX queue creation and cleanup net: ethernet: ti: am65-cpsw: streamline RX queue creation and cleanup net: ethernet: ti: am65-cpsw: ensure proper channel cleanup in error path ipv6: Convert inet6_rtm_deladdr() to per-netns RTNL. ipv6: Convert inet6_rtm_newaddr() to per-netns RTNL. ipv6: Move lifetime validation to inet6_rtm_newaddr(). ipv6: Set cfg.ifa_flags before device lookup in inet6_rtm_newaddr(). ipv6: Pass dev to inet6_addr_add(). ipv6: Convert inet6_ioctl() to per-netns RTNL. ipv6: Hold rtnl_net_lock() in addrconf_init() and addrconf_cleanup(). ipv6: Hold rtnl_net_lock() in addrconf_dad_work(). ipv6: Hold rtnl_net_lock() in addrconf_verify_work(). ipv6: Convert net.ipv6.conf.${DEV}.XXX sysctl to per-netns RTNL. ipv6: Add __in6_dev_get_rtnl_net(). net: stmmac: Drop redundant skb_mark_for_recycle() for SKB frags net: mii: Fix the Speed display when the network cable is not connected sysctl net: Remove macro checks for CONFIG_SYSCTL eth: bnxt: update header sizing defaults ...
Diffstat (limited to 'drivers/net/ethernet/intel/iavf/iavf_main.c')
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c79
1 files changed, 58 insertions, 21 deletions
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index a9e54866ae6b..cbfaaa5b7d02 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1180,7 +1180,7 @@ static void iavf_napi_enable_all(struct iavf_adapter *adapter)
q_vector = &adapter->q_vectors[q_idx];
napi = &q_vector->napi;
- napi_enable(napi);
+ napi_enable_locked(napi);
}
}
@@ -1196,7 +1196,7 @@ static void iavf_napi_disable_all(struct iavf_adapter *adapter)
for (q_idx = 0; q_idx < q_vectors; q_idx++) {
q_vector = &adapter->q_vectors[q_idx];
- napi_disable(&q_vector->napi);
+ napi_disable_locked(&q_vector->napi);
}
}
@@ -1800,8 +1800,8 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
q_vector->v_idx = q_idx;
q_vector->reg_idx = q_idx;
cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
- netif_napi_add(adapter->netdev, &q_vector->napi,
- iavf_napi_poll);
+ netif_napi_add_locked(adapter->netdev, &q_vector->napi,
+ iavf_napi_poll);
}
return 0;
@@ -1827,7 +1827,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
- netif_napi_del(&q_vector->napi);
+ netif_napi_del_locked(&q_vector->napi);
}
kfree(adapter->q_vectors);
adapter->q_vectors = NULL;
@@ -1968,6 +1968,7 @@ err:
static void iavf_finish_config(struct work_struct *work)
{
struct iavf_adapter *adapter;
+ bool netdev_released = false;
int pairs, err;
adapter = container_of(work, struct iavf_adapter, finish_config);
@@ -1976,7 +1977,7 @@ static void iavf_finish_config(struct work_struct *work)
* The dev->lock is needed to update the queue number
*/
rtnl_lock();
- mutex_lock(&adapter->netdev->lock);
+ netdev_lock(adapter->netdev);
mutex_lock(&adapter->crit_lock);
if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
@@ -1988,7 +1989,16 @@ static void iavf_finish_config(struct work_struct *work)
switch (adapter->state) {
case __IAVF_DOWN:
+ /* Set the real number of queues when reset occurs while
+ * state == __IAVF_DOWN
+ */
+ pairs = adapter->num_active_queues;
+ netif_set_real_num_rx_queues(adapter->netdev, pairs);
+ netif_set_real_num_tx_queues(adapter->netdev, pairs);
+
if (adapter->netdev->reg_state != NETREG_REGISTERED) {
+ netdev_unlock(adapter->netdev);
+ netdev_released = true;
err = register_netdevice(adapter->netdev);
if (err) {
dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
@@ -2003,11 +2013,7 @@ static void iavf_finish_config(struct work_struct *work)
goto out;
}
}
-
- /* Set the real number of queues when reset occurs while
- * state == __IAVF_DOWN
- */
- fallthrough;
+ break;
case __IAVF_RUNNING:
pairs = adapter->num_active_queues;
netif_set_real_num_rx_queues(adapter->netdev, pairs);
@@ -2020,7 +2026,8 @@ static void iavf_finish_config(struct work_struct *work)
out:
mutex_unlock(&adapter->crit_lock);
- mutex_unlock(&adapter->netdev->lock);
+ if (!netdev_released)
+ netdev_unlock(adapter->netdev);
rtnl_unlock();
}
@@ -2713,12 +2720,16 @@ static void iavf_watchdog_task(struct work_struct *work)
struct iavf_adapter *adapter = container_of(work,
struct iavf_adapter,
watchdog_task.work);
+ struct net_device *netdev = adapter->netdev;
struct iavf_hw *hw = &adapter->hw;
u32 reg_val;
+ netdev_lock(netdev);
if (!mutex_trylock(&adapter->crit_lock)) {
- if (adapter->state == __IAVF_REMOVE)
+ if (adapter->state == __IAVF_REMOVE) {
+ netdev_unlock(netdev);
return;
+ }
goto restart_watchdog;
}
@@ -2730,30 +2741,35 @@ static void iavf_watchdog_task(struct work_struct *work)
case __IAVF_STARTUP:
iavf_startup(adapter);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_VERSION_CHECK:
iavf_init_version_check(adapter);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_GET_RESOURCES:
iavf_init_get_resources(adapter);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_EXTENDED_CAPS:
iavf_init_process_extended_caps(adapter);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_CONFIG_ADAPTER:
iavf_init_config_adapter(adapter);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
@@ -2765,6 +2781,7 @@ static void iavf_watchdog_task(struct work_struct *work)
* as it can loop forever
*/
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
return;
}
if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
@@ -2773,6 +2790,7 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
iavf_shutdown_adminq(hw);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task, (5 * HZ));
return;
@@ -2780,6 +2798,7 @@ static void iavf_watchdog_task(struct work_struct *work)
/* Try again from failed step*/
iavf_change_state(adapter, adapter->last_state);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ);
return;
case __IAVF_COMM_FAILED:
@@ -2792,6 +2811,7 @@ static void iavf_watchdog_task(struct work_struct *work)
iavf_change_state(adapter, __IAVF_INIT_FAILED);
adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
return;
}
reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
@@ -2811,12 +2831,14 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task,
msecs_to_jiffies(10));
return;
case __IAVF_RESETTING:
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
HZ * 2);
return;
@@ -2847,6 +2869,7 @@ static void iavf_watchdog_task(struct work_struct *work)
case __IAVF_REMOVE:
default:
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
return;
}
@@ -2858,12 +2881,14 @@ static void iavf_watchdog_task(struct work_struct *work)
dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task, HZ * 2);
return;
}
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
restart_watchdog:
if (adapter->state >= __IAVF_DOWN)
queue_work(adapter->wq, &adapter->adminq_task);
@@ -2990,12 +3015,12 @@ static void iavf_reset_task(struct work_struct *work)
/* When device is being removed it doesn't make sense to run the reset
* task, just return in such a case.
*/
- mutex_lock(&netdev->lock);
+ netdev_lock(netdev);
if (!mutex_trylock(&adapter->crit_lock)) {
if (adapter->state != __IAVF_REMOVE)
queue_work(adapter->wq, &adapter->reset_task);
- mutex_unlock(&netdev->lock);
+ netdev_unlock(netdev);
return;
}
@@ -3043,7 +3068,7 @@ static void iavf_reset_task(struct work_struct *work)
reg_val);
iavf_disable_vf(adapter);
mutex_unlock(&adapter->crit_lock);
- mutex_unlock(&netdev->lock);
+ netdev_unlock(netdev);
return; /* Do not attempt to reinit. It's dead, Jim. */
}
@@ -3184,7 +3209,7 @@ continue_reset:
wake_up(&adapter->reset_waitqueue);
mutex_unlock(&adapter->crit_lock);
- mutex_unlock(&netdev->lock);
+ netdev_unlock(netdev);
return;
reset_err:
@@ -3195,7 +3220,7 @@ reset_err:
iavf_disable_vf(adapter);
mutex_unlock(&adapter->crit_lock);
- mutex_unlock(&netdev->lock);
+ netdev_unlock(netdev);
dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
}
@@ -3667,10 +3692,10 @@ exit:
if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
return 0;
- mutex_lock(&netdev->lock);
+ netdev_lock(netdev);
netif_set_real_num_rx_queues(netdev, total_qps);
netif_set_real_num_tx_queues(netdev, total_qps);
- mutex_unlock(&netdev->lock);
+ netdev_unlock(netdev);
return ret;
}
@@ -4340,14 +4365,17 @@ static int iavf_open(struct net_device *netdev)
return -EIO;
}
+ netdev_lock(netdev);
while (!mutex_trylock(&adapter->crit_lock)) {
/* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock
* is already taken and iavf_open is called from an upper
* device's notifier reacting on NETDEV_REGISTER event.
* We have to leave here to avoid dead lock.
*/
- if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER)
+ if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) {
+ netdev_unlock(netdev);
return -EBUSY;
+ }
usleep_range(500, 1000);
}
@@ -4396,6 +4424,7 @@ static int iavf_open(struct net_device *netdev)
iavf_irq_enable(adapter, true);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
return 0;
@@ -4408,6 +4437,7 @@ err_setup_tx:
iavf_free_all_tx_resources(adapter);
err_unlock:
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
return err;
}
@@ -4429,10 +4459,12 @@ static int iavf_close(struct net_device *netdev)
u64 aq_to_restore;
int status;
+ netdev_lock(netdev);
mutex_lock(&adapter->crit_lock);
if (adapter->state <= __IAVF_DOWN_PENDING) {
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
return 0;
}
@@ -4466,6 +4498,7 @@ static int iavf_close(struct net_device *netdev)
iavf_free_traffic_irqs(adapter);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
/* We explicitly don't free resources here because the hardware is
* still active and can DMA into memory. Resources are cleared in
@@ -5342,6 +5375,7 @@ static int iavf_suspend(struct device *dev_d)
netif_device_detach(netdev);
+ netdev_lock(netdev);
mutex_lock(&adapter->crit_lock);
if (netif_running(netdev)) {
@@ -5353,6 +5387,7 @@ static int iavf_suspend(struct device *dev_d)
iavf_reset_interrupt_capability(adapter);
mutex_unlock(&adapter->crit_lock);
+ netdev_unlock(netdev);
return 0;
}
@@ -5451,6 +5486,7 @@ static void iavf_remove(struct pci_dev *pdev)
if (netdev->reg_state == NETREG_REGISTERED)
unregister_netdev(netdev);
+ netdev_lock(netdev);
mutex_lock(&adapter->crit_lock);
dev_info(&adapter->pdev->dev, "Removing device\n");
iavf_change_state(adapter, __IAVF_REMOVE);
@@ -5487,6 +5523,7 @@ static void iavf_remove(struct pci_dev *pdev)
mutex_destroy(&hw->aq.asq_mutex);
mutex_unlock(&adapter->crit_lock);
mutex_destroy(&adapter->crit_lock);
+ netdev_unlock(netdev);
iounmap(hw->hw_addr);
pci_release_regions(pdev);