aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/idpf/idpf_lib.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2025-07-30 15:58:55 +0000
committerLinus Torvalds <[email protected]>2025-07-30 15:58:55 +0000
commit8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf (patch)
treefec3039a08284cd87f4ec9c3bea5b5a439f1859f /drivers/net/ethernet/intel/idpf/idpf_lib.c
parentMerge tag 'sysctl-6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff)
parentdpll: zl3073x: Fix build failure (diff)
downloadkernel-8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf.tar.gz
kernel-8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf.zip
Merge tag 'net-next-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core & protocols: - Wrap datapath globals into net_aligned_data, to avoid false sharing - Preserve MSG_ZEROCOPY in forwarding (e.g. out of a container) - Add SO_INQ and SCM_INQ support to AF_UNIX - Add SIOCINQ support to AF_VSOCK - Add TCP_MAXSEG sockopt to MPTCP - Add IPv6 force_forwarding sysctl to enable forwarding per interface - Make TCP validation of whether packet fully fits in the receive window and the rcv_buf more strict. With increased use of HW aggregation a single "packet" can be multiple 100s of kB - Add MSG_MORE flag to optimize large TCP transmissions via sockmap, improves latency up to 33% for sockmap users - Convert TCP send queue handling from tasklet to BH workque - Improve BPF iteration over TCP sockets to see each socket exactly once - Remove obsolete and unused TCP RFC3517/RFC6675 loss recovery code - Support enabling kernel threads for NAPI processing on per-NAPI instance basis rather than a whole device. Fully stop the kernel NAPI thread when threaded NAPI gets disabled. Previously thread would stick around until ifdown due to tricky synchronization - Allow multicast routing to take effect on locally-generated packets - Add output interface argument for End.X in segment routing - MCTP: add support for gateway routing, improve bind() handling - Don't require rtnl_lock when fetching an IPv6 neighbor over Netlink - Add a new neighbor flag ("extern_valid"), which cedes refresh responsibilities to userspace. This is needed for EVPN multi-homing where a neighbor entry for a multi-homed host needs to be synced across all the VTEPs among which the host is multi-homed - Support NUD_PERMANENT for proxy neighbor entries - Add a new queuing discipline for IETF RFC9332 DualQ Coupled AQM - Add sequence numbers to netconsole messages. Unregister netconsole's console when all net targets are removed. Code refactoring. Add a number of selftests - Align IPSec inbound SA lookup to RFC 4301. Only SPI and protocol should be used for an inbound SA lookup - Support inspecting ref_tracker state via DebugFS - Don't force bonding advertisement frames tx to ~333 ms boundaries. Add broadcast_neighbor option to send ARP/ND on all bonded links - Allow providing upcall pid for the 'execute' command in openvswitch - Remove DCCP support from Netfilter's conntrack - Disallow multiple packet duplications in the queuing layer - Prevent use of deprecated iptables code on PREEMPT_RT Driver API: - Support RSS and hashing configuration over ethtool Netlink - Add dedicated ethtool callbacks for getting and setting hashing fields - Add support for power budget evaluation strategy in PSE / Power-over-Ethernet. Generate Netlink events for overcurrent etc - Support DPLL phase offset monitoring across all device inputs. Support providing clock reference and SYNC over separate DPLL inputs - Support traffic classes in devlink rate API for bandwidth management - Remove rtnl_lock dependency from UDP tunnel port configuration Device drivers: - Add a new Broadcom driver for 800G Ethernet (bnge) - Add a standalone driver for Microchip ZL3073x DPLL - Remove IBM's NETIUCV device driver - Ethernet high-speed NICs: - Broadcom (bnxt): - support zero-copy Tx of DMABUF memory - take page size into account for page pool recycling rings - Intel (100G, ice, idpf): - idpf: XDP and AF_XDP support preparations - idpf: add flow steering - add link_down_events statistic - clean up the TSPLL code - preparations for live VM migration - nVidia/Mellanox: - support zero-copy Rx/Tx interfaces (DMABUF and io_uring) - optimize context memory usage for matchers - expose serial numbers in devlink info - support PCIe congestion metrics - Meta (fbnic): - add 25G, 50G, and 100G link modes to phylink - support dumping FW logs - Marvell/Cavium: - support for CN20K generation of the Octeon chips - Amazon: - add HW clock (without timestamping, just hypervisor time access) - Ethernet virtual: - VirtIO net: - support segmentation of UDP-tunnel-encapsulated packets - Google (gve): - support packet timestamping and clock synchronization - Microsoft vNIC: - add handler for device-originated servicing events - allow dynamic MSI-X vector allocation - support Tx bandwidth clamping - Ethernet NICs consumer, and embedded: - AMD: - amd-xgbe: hardware timestamping and PTP clock support - Broadcom integrated MACs (bcmgenet, bcmasp): - use napi_complete_done() return value to support NAPI polling - add support for re-starting auto-negotiation - Broadcom switches (b53): - support BCM5325 switches - add bcm63xx EPHY power control - Synopsys (stmmac): - lots of code refactoring and cleanups - TI: - icssg-prueth: read firmware-names from device tree - icssg: PRP offload support - Microchip: - lan78xx: convert to PHYLINK for improved PHY and MAC management - ksz: add KSZ8463 switch support - Intel: - support similar queue priority scheme in multi-queue and time-sensitive networking (taprio) - support packet pre-emption in both - RealTek (r8169): - enable EEE at 5Gbps on RTL8126 - Airoha: - add PPPoE offload support - MDIO bus controller for Airoha AN7583 - Ethernet PHYs: - support for the IPQ5018 internal GE PHY - micrel KSZ9477 switch-integrated PHYs: - add MDI/MDI-X control support - add RX error counters - add cable test support - add Signal Quality Indicator (SQI) reporting - dp83tg720: improve reset handling and reduce link recovery time - support bcm54811 (and its MII-Lite interface type) - air_en8811h: support resume/suspend - support PHY counters for QCA807x and QCA808x - support WoL for QCA807x - CAN drivers: - rcar_canfd: support for Transceiver Delay Compensation - kvaser: report FW versions via devlink dev info - WiFi: - extended regulatory info support (6 GHz) - add statistics and beacon monitor for Multi-Link Operation (MLO) - support S1G aggregation, improve S1G support - add Radio Measurement action fields - support per-radio RTS threshold - some work around how FIPS affects wifi, which was wrong (RC4 is used by TKIP, not only WEP) - improvements for unsolicited probe response handling - WiFi drivers: - RealTek (rtw88): - IBSS mode for SDIO devices - RealTek (rtw89): - BT coexistence for MLO/WiFi7 - concurrent station + P2P support - support for USB devices RTL8851BU/RTL8852BU - Intel (iwlwifi): - use embedded PNVM in (to be released) FW images to fix compatibility issues - many cleanups (unused FW APIs, PCIe code, WoWLAN) - some FIPS interoperability - MediaTek (mt76): - firmware recovery improvements - more MLO work - Qualcomm/Atheros (ath12k): - fix scan on multi-radio devices - more EHT/Wi-Fi 7 features - encapsulation/decapsulation offload - Broadcom (brcm80211): - support SDIO 43751 device - Bluetooth: - hci_event: add support for handling LE BIG Sync Lost event - ISO: add socket option to report packet seqnum via CMSG - ISO: support SCM_TIMESTAMPING for ISO TS - Bluetooth drivers: - intel_pcie: support Function Level Reset - nxpuart: add support for 4M baudrate - nxpuart: implement powerup sequence, reset, FW dump, and FW loading" * tag 'net-next-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1742 commits) dpll: zl3073x: Fix build failure selftests: bpf: fix legacy netfilter options ipv6: annotate data-races around rt->fib6_nsiblings ipv6: fix possible infinite loop in fib6_info_uses_dev() ipv6: prevent infinite loop in rt6_nlmsg_size() ipv6: add a retry logic in net6_rt_notify() vrf: Drop existing dst reference in vrf_ip6_input_dst net/sched: taprio: align entry index attr validation with mqprio net: fsl_pq_mdio: use dev_err_probe selftests: rtnetlink.sh: remove esp4_offload after test vsock: remove unnecessary null check in vsock_getname() igb: xsk: solve negative overflow of nb_pkts in zerocopy mode stmmac: xsk: fix negative overflow of budget in zerocopy mode dt-bindings: ieee802154: Convert at86rf230.txt yaml format net: dsa: microchip: Disable PTP function of KSZ8463 net: dsa: microchip: Setup fiber ports for KSZ8463 net: dsa: microchip: Write switch MAC address differently for KSZ8463 net: dsa: microchip: Use different registers for KSZ8463 net: dsa: microchip: Add KSZ8463 switch support to KSZ DSA driver dt-bindings: net: dsa: microchip: Add KSZ8463 switch support ...
Diffstat (limited to 'drivers/net/ethernet/intel/idpf/idpf_lib.c')
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_lib.c127
1 files changed, 105 insertions, 22 deletions
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 80382ff4a5fa..2c2a3e85d693 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -88,6 +88,8 @@ void idpf_intr_rel(struct idpf_adapter *adapter)
idpf_deinit_vector_stack(adapter);
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
+ kfree(adapter->rdma_msix_entries);
+ adapter->rdma_msix_entries = NULL;
}
/**
@@ -299,13 +301,33 @@ rel_lock:
*/
int idpf_intr_req(struct idpf_adapter *adapter)
{
+ u16 num_lan_vecs, min_lan_vecs, num_rdma_vecs = 0, min_rdma_vecs = 0;
u16 default_vports = idpf_get_default_vports(adapter);
int num_q_vecs, total_vecs, num_vec_ids;
- int min_vectors, v_actual, err;
+ int min_vectors, actual_vecs, err;
unsigned int vector;
u16 *vecids;
+ int i;
total_vecs = idpf_get_reserved_vecs(adapter);
+ num_lan_vecs = total_vecs;
+ if (idpf_is_rdma_cap_ena(adapter)) {
+ num_rdma_vecs = idpf_get_reserved_rdma_vecs(adapter);
+ min_rdma_vecs = IDPF_MIN_RDMA_VEC;
+
+ if (!num_rdma_vecs) {
+ /* If idpf_get_reserved_rdma_vecs is 0, vectors are
+ * pulled from the LAN pool.
+ */
+ num_rdma_vecs = min_rdma_vecs;
+ } else if (num_rdma_vecs < min_rdma_vecs) {
+ dev_err(&adapter->pdev->dev,
+ "Not enough vectors reserved for RDMA (min: %u, current: %u)\n",
+ min_rdma_vecs, num_rdma_vecs);
+ return -EINVAL;
+ }
+ }
+
num_q_vecs = total_vecs - IDPF_MBX_Q_VEC;
err = idpf_send_alloc_vectors_msg(adapter, num_q_vecs);
@@ -316,52 +338,76 @@ int idpf_intr_req(struct idpf_adapter *adapter)
return -EAGAIN;
}
- min_vectors = IDPF_MBX_Q_VEC + IDPF_MIN_Q_VEC * default_vports;
- v_actual = pci_alloc_irq_vectors(adapter->pdev, min_vectors,
- total_vecs, PCI_IRQ_MSIX);
- if (v_actual < min_vectors) {
- dev_err(&adapter->pdev->dev, "Failed to allocate MSIX vectors: %d\n",
- v_actual);
- err = -EAGAIN;
+ min_lan_vecs = IDPF_MBX_Q_VEC + IDPF_MIN_Q_VEC * default_vports;
+ min_vectors = min_lan_vecs + min_rdma_vecs;
+ actual_vecs = pci_alloc_irq_vectors(adapter->pdev, min_vectors,
+ total_vecs, PCI_IRQ_MSIX);
+ if (actual_vecs < 0) {
+ dev_err(&adapter->pdev->dev, "Failed to allocate minimum MSIX vectors required: %d\n",
+ min_vectors);
+ err = actual_vecs;
goto send_dealloc_vecs;
}
- adapter->msix_entries = kcalloc(v_actual, sizeof(struct msix_entry),
- GFP_KERNEL);
+ if (idpf_is_rdma_cap_ena(adapter)) {
+ if (actual_vecs < total_vecs) {
+ dev_warn(&adapter->pdev->dev,
+ "Warning: %d vectors requested, only %d available. Defaulting to minimum (%d) for RDMA and remaining for LAN.\n",
+ total_vecs, actual_vecs, IDPF_MIN_RDMA_VEC);
+ num_rdma_vecs = IDPF_MIN_RDMA_VEC;
+ }
+ adapter->rdma_msix_entries = kcalloc(num_rdma_vecs,
+ sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (!adapter->rdma_msix_entries) {
+ err = -ENOMEM;
+ goto free_irq;
+ }
+ }
+
+ num_lan_vecs = actual_vecs - num_rdma_vecs;
+ adapter->msix_entries = kcalloc(num_lan_vecs, sizeof(struct msix_entry),
+ GFP_KERNEL);
if (!adapter->msix_entries) {
err = -ENOMEM;
- goto free_irq;
+ goto free_rdma_msix;
}
adapter->mb_vector.v_idx = le16_to_cpu(adapter->caps.mailbox_vector_id);
- vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL);
+ vecids = kcalloc(actual_vecs, sizeof(u16), GFP_KERNEL);
if (!vecids) {
err = -ENOMEM;
goto free_msix;
}
- num_vec_ids = idpf_get_vec_ids(adapter, vecids, total_vecs,
+ num_vec_ids = idpf_get_vec_ids(adapter, vecids, actual_vecs,
&adapter->req_vec_chunks->vchunks);
- if (num_vec_ids < v_actual) {
+ if (num_vec_ids < actual_vecs) {
err = -EINVAL;
goto free_vecids;
}
- for (vector = 0; vector < v_actual; vector++) {
+ for (vector = 0; vector < num_lan_vecs; vector++) {
adapter->msix_entries[vector].entry = vecids[vector];
adapter->msix_entries[vector].vector =
pci_irq_vector(adapter->pdev, vector);
}
+ for (i = 0; i < num_rdma_vecs; vector++, i++) {
+ adapter->rdma_msix_entries[i].entry = vecids[vector];
+ adapter->rdma_msix_entries[i].vector =
+ pci_irq_vector(adapter->pdev, vector);
+ }
- adapter->num_req_msix = total_vecs;
- adapter->num_msix_entries = v_actual;
/* 'num_avail_msix' is used to distribute excess vectors to the vports
* after considering the minimum vectors required per each default
* vport
*/
- adapter->num_avail_msix = v_actual - min_vectors;
+ adapter->num_avail_msix = num_lan_vecs - min_lan_vecs;
+ adapter->num_msix_entries = num_lan_vecs;
+ if (idpf_is_rdma_cap_ena(adapter))
+ adapter->num_rdma_msix_entries = num_rdma_vecs;
/* Fill MSIX vector lifo stack with vector indexes */
err = idpf_init_vector_stack(adapter);
@@ -383,6 +429,9 @@ free_vecids:
free_msix:
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
+free_rdma_msix:
+ kfree(adapter->rdma_msix_entries);
+ adapter->rdma_msix_entries = NULL;
free_irq:
pci_free_irq_vectors(adapter->pdev);
send_dealloc_vecs:
@@ -755,6 +804,10 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS))
dflt_features |= NETIF_F_RXHASH;
+ if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS,
+ VIRTCHNL2_CAP_FLOW_STEER) &&
+ idpf_vport_is_cap_ena(vport, VIRTCHNL2_VPORT_SIDEBAND_FLOW_STEER))
+ dflt_features |= NETIF_F_NTUPLE;
if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V4))
csum_offloads |= NETIF_F_IP_CSUM;
if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V6))
@@ -972,6 +1025,8 @@ static void idpf_vport_dealloc(struct idpf_vport *vport)
struct idpf_adapter *adapter = vport->adapter;
unsigned int i = vport->idx;
+ idpf_idc_deinit_vport_aux_device(vport->vdev_info);
+
idpf_deinit_mac_addr(vport);
idpf_vport_stop(vport);
@@ -1079,8 +1134,10 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
if (!vport)
return vport;
+ num_max_q = max(max_q->max_txq, max_q->max_rxq);
if (!adapter->vport_config[idx]) {
struct idpf_vport_config *vport_config;
+ struct idpf_q_coalesce *q_coal;
vport_config = kzalloc(sizeof(*vport_config), GFP_KERNEL);
if (!vport_config) {
@@ -1089,6 +1146,21 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
return NULL;
}
+ q_coal = kcalloc(num_max_q, sizeof(*q_coal), GFP_KERNEL);
+ if (!q_coal) {
+ kfree(vport_config);
+ kfree(vport);
+
+ return NULL;
+ }
+ for (int i = 0; i < num_max_q; i++) {
+ q_coal[i].tx_intr_mode = IDPF_ITR_DYNAMIC;
+ q_coal[i].tx_coalesce_usecs = IDPF_ITR_TX_DEF;
+ q_coal[i].rx_intr_mode = IDPF_ITR_DYNAMIC;
+ q_coal[i].rx_coalesce_usecs = IDPF_ITR_RX_DEF;
+ }
+ vport_config->user_config.q_coalesce = q_coal;
+
adapter->vport_config[idx] = vport_config;
}
@@ -1098,7 +1170,6 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter,
vport->default_vport = adapter->num_alloc_vports <
idpf_get_default_vports(adapter);
- num_max_q = max(max_q->max_txq, max_q->max_rxq);
vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL);
if (!vport->q_vector_idxs)
goto free_vport;
@@ -1481,6 +1552,7 @@ void idpf_init_task(struct work_struct *work)
spin_lock_init(&vport_config->mac_filter_list_lock);
INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list);
+ INIT_LIST_HEAD(&vport_config->user_config.flow_steer_list);
err = idpf_check_supported_desc_ids(vport);
if (err) {
@@ -1738,6 +1810,8 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter)
} else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) {
bool is_reset = idpf_is_reset_detected(adapter);
+ idpf_idc_issue_reset_event(adapter->cdev_info);
+
idpf_set_vport_state(adapter);
idpf_vc_core_deinit(adapter);
if (!is_reset)
@@ -1785,6 +1859,10 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter)
unlock_mutex:
mutex_unlock(&adapter->vport_ctrl_lock);
+ /* Wait until all vports are created to init RDMA CORE AUX */
+ if (!err)
+ err = idpf_idc_init(adapter);
+
return err;
}
@@ -1868,6 +1946,9 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
idpf_vport_calc_num_q_desc(new_vport);
break;
case IDPF_SR_MTU_CHANGE:
+ idpf_idc_vdev_mtu_event(vport->vdev_info,
+ IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE);
+ break;
case IDPF_SR_RSC_CHANGE:
break;
default:
@@ -1912,9 +1993,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
if (current_state == __IDPF_VPORT_UP)
err = idpf_vport_open(vport);
- kfree(new_vport);
-
- return err;
+ goto free_vport;
err_reset:
idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq,
@@ -1927,6 +2006,10 @@ err_open:
free_vport:
kfree(new_vport);
+ if (reset_cause == IDPF_SR_MTU_CHANGE)
+ idpf_idc_vdev_mtu_event(vport->vdev_info,
+ IIDC_RDMA_EVENT_AFTER_MTU_CHANGE);
+
return err;
}