aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2025-07-31 02:26:49 +0000
committerLinus Torvalds <[email protected]>2025-07-31 02:26:49 +0000
commit260f6f4fda93c8485c8037865c941b42b9cba5d2 (patch)
tree587a0ea46d3351f63250d19860b01da8217ac774 /drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
parentMerge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (diff)
parentMerge tag 'drm-misc-next-fixes-2025-07-24' of https://gitlab.freedesktop.org/... (diff)
downloadkernel-260f6f4fda93c8485c8037865c941b42b9cba5d2.tar.gz
kernel-260f6f4fda93c8485c8037865c941b42b9cba5d2.zip
Merge tag 'drm-next-2025-07-30' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "Highlights: - Intel xe enable Panthor Lake, started adding WildCat Lake - amdgpu has a bunch of reset improvments along with the usual IP updates - msm got VM_BIND support which is important for vulkan sparse memory - more drm_panic users - gpusvm common code to handle a bunch of core SVM work outside drivers. Detail summary: Changes outside drm subdirectory: - 'shrink_shmem_memory()' for better shmem/hibernate interaction - Rust support infrastructure: - make ETIMEDOUT available - add size constants up to SZ_2G - add DMA coherent allocation bindings - mtd driver for Intel GPU non-volatile storage - i2c designware quirk for Intel xe core: - atomic helpers: tune enable/disable sequences - add task info to wedge API - refactor EDID quirks - connector: move HDR sink to drm_display_info - fourcc: half-float and 32-bit float formats - mode_config: pass format info to simplify dma-buf: - heaps: Give CMA heap a stable name ci: - add device tree validation and kunit displayport: - change AUX DPCD access probe address - add quirk for DPCD probe - add panel replay definitions - backlight control helpers fbdev: - make CONFIG_FIRMWARE_EDID available on all arches fence: - fix UAF issues format-helper: - improve tests gpusvm: - introduce devmem only flag for allocation - add timeslicing support to GPU SVM ttm: - improve eviction sched: - tracing improvements - kunit improvements - memory leak fixes - reset handling improvements color mgmt: - add hardware gamma LUT handling helpers bridge: - add destroy hook - switch to reference counted drm_bridge allocations - tc358767: convert to devm_drm_bridge_alloc - improve CEC handling panel: - switch to reference counter drm_panel allocations - fwnode panel lookup - Huiling hl055fhv028c support - Raspberry Pi 7" 720x1280 support - edp: KDC KD116N3730A05, N160JCE-ELL CMN, N116BCJ-EAK - simple: AUO P238HAN01 - st7701: Winstar wf40eswaa6mnn0 - visionox: rm69299-shift - Renesas R61307, Renesas R69328 support - DJN HX83112B hdmi: - add CEC handling - YUV420 output support xe: - WildCat Lake support - Enable PanthorLake by default - mark BMG as SRIOV capable - update firmware recommendations - Expose media OA units - aux-bux support for non-volatile memory - MTD intel-dg driver for non-volatile memory - Expose fan control and voltage regulator in sysfs - restructure migration for multi-device - Restore GuC submit UAF fix - make GEM shrinker drm managed - SRIOV VF Post-migration recovery of GGTT nodes - W/A additions/reworks - Prefetch support for svm ranges - Don't allocate managed BO for each policy change - HWMON fixes for BMG - Create LRC BO without VM - PCI ID updates - make SLPC debugfs files optional - rework eviction rejection of bound external BOs - consolidate PAT programming logic for pre/post Xe2 - init changes for flicker-free boot - Enable GuC Dynamic Inhibit Context switch i915: - drm_panic support for i915/xe - initial flip queue off by default for LNL/PNL - Wildcat Lake Display support - Support for DSC fractional link bpp - Support for simultaneous Panel Replay and Adaptive sync - Support for PTL+ double buffer LUT - initial PIPEDMC event handling - drm_panel_follower support - DPLL interface renames - allocate struct intel_display dynamically - flip queue preperation - abstract DRAM detection better - avoid GuC scheduling stalls - remove DG1 force probe requirement - fix MEI interrupt handler on RT kernels - use backlight control helpers for eDP - more shared display code refactoring amdgpu: - add userq slot to INFO ioctl - SR-IOV hibernation support - Suspend improvements - Backlight improvements - Use scaling for non-native eDP modes - cleaner shader updates for GC 9.x - Remove fence slab - SDMA fw checks for userq support - RAS updates - DMCUB updates - DP tunneling fixes - Display idle D3 support - Per queue reset improvements - initial smartmux support amdkfd: - enable KFD on loongarch - mtype fix for ext coherent system memory radeon: - CS validation additional GL extensions - drop console lock during suspend/resume - bump driver version msm: - VM BIND support - CI: infrastructure updates - UBWC single source of truth - decouple GPU and KMS support - DP: rework I/O accessors - DPU: SM8750 support - DSI: SM8750 support - GPU: X1-45 support and speedbin support for X1-85 - MDSS: SM8750 support nova: - register! macro improvements - DMA object abstraction - VBIOS parser + fwsec lookup - sysmem flush page support - falcon: generic falcon boot code and HAL - FWSEC-FRTS: fb setup and load/execute ivpu: - Add Wildcat Lake support - Add turbo flag ast: - improve hardware generations implementation imx: - IMX8qxq Display Controller support lima: - Rockchip RK3528 GPU support nouveau: - fence handling cleanup panfrost: - MT8370 support - bo labeling - 64-bit register access qaic: - add RAS support rockchip: - convert inno_hdmi to a bridge rz-du: - add RZ/V2H(P) support - MIPI-DSI DCS support sitronix: - ST7567 support sun4i: - add H616 support tidss: - add TI AM62L support - AM65x OLDI bridge support bochs: - drm panic support vkms: - YUV and R* format support - use faux device vmwgfx: - fence improvements hyperv: - move out of simple - add drm_panic support" * tag 'drm-next-2025-07-30' of https://gitlab.freedesktop.org/drm/kernel: (1479 commits) drm/tidss: oldi: convert to devm_drm_bridge_alloc() API drm/tidss: encoder: convert to devm_drm_bridge_alloc() drm/amdgpu: move reset support type checks into the caller drm/amdgpu/sdma7: re-emit unprocessed state on ring reset drm/amdgpu/sdma6: re-emit unprocessed state on ring reset drm/amdgpu/sdma5.2: re-emit unprocessed state on ring reset drm/amdgpu/sdma5: re-emit unprocessed state on ring reset drm/amdgpu/gfx12: re-emit unprocessed state on ring reset drm/amdgpu/gfx11: re-emit unprocessed state on ring reset drm/amdgpu/gfx10: re-emit unprocessed state on ring reset drm/amdgpu/gfx9.4.3: re-emit unprocessed state on kcq reset drm/amdgpu/gfx9: re-emit unprocessed state on kcq reset drm/amdgpu: Add WARN_ON to the resource clear function drm/amd/pm: Use cached metrics data on SMUv13.0.6 drm/amd/pm: Use cached data for min/max clocks gpu: nova-core: fix bounds check in PmuLookupTableEntry::new drm/amdgpu: Replace HQD terminology with slots naming drm/amdgpu: Add user queue instance count in HW IP info drm/amd/amdgpu: Add helper functions for isp buffers drm/amd/amdgpu: Initialize swnode for ISP MFD device ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c58
1 files changed, 38 insertions, 20 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4db92e0a60da..395c6be901ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -144,6 +144,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
AMDGPU_DEBUG_SMU_POOL = BIT(7),
AMDGPU_DEBUG_VM_USERPTR = BIT(8),
+ AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -361,12 +362,12 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint
* The second one is for Compute. The third and fourth ones are
* for SDMA and Video.
*
- * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
- * jobs is 10000. The timeout for compute is 60000.
+ * By default(with no lockup_timeout settings), the timeout for all jobs is 10000.
*/
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
- "for passthrough or sriov, 10000 for all jobs. 0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
- "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
+MODULE_PARM_DESC(lockup_timeout,
+ "GPU lockup timeout in ms (default: 10000 for all jobs. "
+ "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
+ "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
/**
@@ -2278,6 +2279,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: VM mode debug for userptr is enabled\n");
adev->debug_vm_userptr = true;
}
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) {
+ pr_info("debug: disable kernel logs of correctable errors\n");
+ adev->debug_disable_ce_logs = true;
+ }
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
@@ -2321,7 +2327,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
amdgpu_aspm = 0;
if (amdgpu_virtual_display ||
- amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+ amdgpu_device_asic_has_dc_support(pdev, flags & AMD_ASIC_MASK))
supports_atomic = true;
if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
@@ -2451,10 +2457,10 @@ retry_init:
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
/* only need to skip on ATPX */
- if (amdgpu_device_supports_px(ddev))
+ if (amdgpu_device_supports_px(adev))
dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
/* we want direct complete for BOCO */
- if (amdgpu_device_supports_boco(ddev))
+ if (amdgpu_device_supports_boco(adev))
dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE |
DPM_FLAG_SMART_SUSPEND |
DPM_FLAG_MAY_SKIP_RESUME);
@@ -2487,9 +2493,9 @@ retry_init:
* into D0 state. Then there will be a PMFW-aware D-state
* transition(D0->D3) on runpm suspend.
*/
- if (amdgpu_device_supports_baco(ddev) &&
+ if (amdgpu_device_supports_baco(adev) &&
!(adev->flags & AMD_IS_APU) &&
- (adev->asic_type >= CHIP_NAVI10))
+ adev->asic_type >= CHIP_NAVI10)
amdgpu_get_secondary_funcs(adev);
}
@@ -2506,6 +2512,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
+ amdgpu_ras_eeprom_check_and_recover(adev);
amdgpu_xcp_dev_unplug(adev);
amdgpu_gmc_prepare_nps_mode_change(adev);
drm_dev_unplug(dev);
@@ -2535,6 +2542,10 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
if (amdgpu_ras_intr_triggered())
return;
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return;
+
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown.
* unfortunately we can't detect certain
@@ -2551,11 +2562,14 @@ static int amdgpu_pmops_prepare(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return 0;
+
/* Return a positive number here so
* DPM_FLAG_SMART_SUSPEND works properly
*/
- if (amdgpu_device_supports_boco(drm_dev) &&
- pm_runtime_suspended(dev))
+ if (amdgpu_device_supports_boco(adev) && pm_runtime_suspended(dev))
return 1;
/* if we will not support s3 or s2i for the device
@@ -2570,7 +2584,7 @@ static int amdgpu_pmops_prepare(struct device *dev)
static void amdgpu_pmops_complete(struct device *dev)
{
- /* nothing to do */
+ amdgpu_device_complete(dev_get_drvdata(dev));
}
static int amdgpu_pmops_suspend(struct device *dev)
@@ -2650,12 +2664,21 @@ static int amdgpu_pmops_thaw(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ /* do not resume device if it's normal hibernation */
+ if (!pm_hibernate_is_recovering())
+ return 0;
+
return amdgpu_device_resume(drm_dev, true);
}
static int amdgpu_pmops_poweroff(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ /* device maybe not resumed here, return immediately in this case */
+ if (adev->in_s4 && adev->in_suspend)
+ return 0;
return amdgpu_device_suspend(drm_dev, true);
}
@@ -2828,7 +2851,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
/* nothing to do */
} else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
(adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
- amdgpu_device_baco_enter(drm_dev);
+ amdgpu_device_baco_enter(adev);
}
dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
@@ -2869,7 +2892,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
pci_set_master(pdev);
} else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
(adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
- amdgpu_device_baco_exit(drm_dev);
+ amdgpu_device_baco_exit(adev);
}
ret = amdgpu_device_resume(drm_dev, false);
if (ret) {
@@ -3107,10 +3130,6 @@ static int __init amdgpu_init(void)
if (r)
goto error_sync;
- r = amdgpu_fence_slab_init();
- if (r)
- goto error_fence;
-
r = amdgpu_userq_fence_slab_init();
if (r)
goto error_fence;
@@ -3145,7 +3164,6 @@ static void __exit amdgpu_exit(void)
amdgpu_unregister_atpx_handler();
amdgpu_acpi_release();
amdgpu_sync_fini();
- amdgpu_fence_slab_fini();
amdgpu_userq_fence_slab_fini();
mmu_notifier_synchronize();
amdgpu_xcp_drv_release();