aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2025-07-31 02:26:49 +0000
committerLinus Torvalds <[email protected]>2025-07-31 02:26:49 +0000
commit260f6f4fda93c8485c8037865c941b42b9cba5d2 (patch)
tree587a0ea46d3351f63250d19860b01da8217ac774 /drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
parentMerge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (diff)
parentMerge tag 'drm-misc-next-fixes-2025-07-24' of https://gitlab.freedesktop.org/... (diff)
downloadkernel-260f6f4fda93c8485c8037865c941b42b9cba5d2.tar.gz
kernel-260f6f4fda93c8485c8037865c941b42b9cba5d2.zip
Merge tag 'drm-next-2025-07-30' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "Highlights: - Intel xe enable Panthor Lake, started adding WildCat Lake - amdgpu has a bunch of reset improvments along with the usual IP updates - msm got VM_BIND support which is important for vulkan sparse memory - more drm_panic users - gpusvm common code to handle a bunch of core SVM work outside drivers. Detail summary: Changes outside drm subdirectory: - 'shrink_shmem_memory()' for better shmem/hibernate interaction - Rust support infrastructure: - make ETIMEDOUT available - add size constants up to SZ_2G - add DMA coherent allocation bindings - mtd driver for Intel GPU non-volatile storage - i2c designware quirk for Intel xe core: - atomic helpers: tune enable/disable sequences - add task info to wedge API - refactor EDID quirks - connector: move HDR sink to drm_display_info - fourcc: half-float and 32-bit float formats - mode_config: pass format info to simplify dma-buf: - heaps: Give CMA heap a stable name ci: - add device tree validation and kunit displayport: - change AUX DPCD access probe address - add quirk for DPCD probe - add panel replay definitions - backlight control helpers fbdev: - make CONFIG_FIRMWARE_EDID available on all arches fence: - fix UAF issues format-helper: - improve tests gpusvm: - introduce devmem only flag for allocation - add timeslicing support to GPU SVM ttm: - improve eviction sched: - tracing improvements - kunit improvements - memory leak fixes - reset handling improvements color mgmt: - add hardware gamma LUT handling helpers bridge: - add destroy hook - switch to reference counted drm_bridge allocations - tc358767: convert to devm_drm_bridge_alloc - improve CEC handling panel: - switch to reference counter drm_panel allocations - fwnode panel lookup - Huiling hl055fhv028c support - Raspberry Pi 7" 720x1280 support - edp: KDC KD116N3730A05, N160JCE-ELL CMN, N116BCJ-EAK - simple: AUO P238HAN01 - st7701: Winstar wf40eswaa6mnn0 - visionox: rm69299-shift - Renesas R61307, Renesas R69328 support - DJN HX83112B hdmi: - add CEC handling - YUV420 output support xe: - WildCat Lake support - Enable PanthorLake by default - mark BMG as SRIOV capable - update firmware recommendations - Expose media OA units - aux-bux support for non-volatile memory - MTD intel-dg driver for non-volatile memory - Expose fan control and voltage regulator in sysfs - restructure migration for multi-device - Restore GuC submit UAF fix - make GEM shrinker drm managed - SRIOV VF Post-migration recovery of GGTT nodes - W/A additions/reworks - Prefetch support for svm ranges - Don't allocate managed BO for each policy change - HWMON fixes for BMG - Create LRC BO without VM - PCI ID updates - make SLPC debugfs files optional - rework eviction rejection of bound external BOs - consolidate PAT programming logic for pre/post Xe2 - init changes for flicker-free boot - Enable GuC Dynamic Inhibit Context switch i915: - drm_panic support for i915/xe - initial flip queue off by default for LNL/PNL - Wildcat Lake Display support - Support for DSC fractional link bpp - Support for simultaneous Panel Replay and Adaptive sync - Support for PTL+ double buffer LUT - initial PIPEDMC event handling - drm_panel_follower support - DPLL interface renames - allocate struct intel_display dynamically - flip queue preperation - abstract DRAM detection better - avoid GuC scheduling stalls - remove DG1 force probe requirement - fix MEI interrupt handler on RT kernels - use backlight control helpers for eDP - more shared display code refactoring amdgpu: - add userq slot to INFO ioctl - SR-IOV hibernation support - Suspend improvements - Backlight improvements - Use scaling for non-native eDP modes - cleaner shader updates for GC 9.x - Remove fence slab - SDMA fw checks for userq support - RAS updates - DMCUB updates - DP tunneling fixes - Display idle D3 support - Per queue reset improvements - initial smartmux support amdkfd: - enable KFD on loongarch - mtype fix for ext coherent system memory radeon: - CS validation additional GL extensions - drop console lock during suspend/resume - bump driver version msm: - VM BIND support - CI: infrastructure updates - UBWC single source of truth - decouple GPU and KMS support - DP: rework I/O accessors - DPU: SM8750 support - DSI: SM8750 support - GPU: X1-45 support and speedbin support for X1-85 - MDSS: SM8750 support nova: - register! macro improvements - DMA object abstraction - VBIOS parser + fwsec lookup - sysmem flush page support - falcon: generic falcon boot code and HAL - FWSEC-FRTS: fb setup and load/execute ivpu: - Add Wildcat Lake support - Add turbo flag ast: - improve hardware generations implementation imx: - IMX8qxq Display Controller support lima: - Rockchip RK3528 GPU support nouveau: - fence handling cleanup panfrost: - MT8370 support - bo labeling - 64-bit register access qaic: - add RAS support rockchip: - convert inno_hdmi to a bridge rz-du: - add RZ/V2H(P) support - MIPI-DSI DCS support sitronix: - ST7567 support sun4i: - add H616 support tidss: - add TI AM62L support - AM65x OLDI bridge support bochs: - drm panic support vkms: - YUV and R* format support - use faux device vmwgfx: - fence improvements hyperv: - move out of simple - add drm_panic support" * tag 'drm-next-2025-07-30' of https://gitlab.freedesktop.org/drm/kernel: (1479 commits) drm/tidss: oldi: convert to devm_drm_bridge_alloc() API drm/tidss: encoder: convert to devm_drm_bridge_alloc() drm/amdgpu: move reset support type checks into the caller drm/amdgpu/sdma7: re-emit unprocessed state on ring reset drm/amdgpu/sdma6: re-emit unprocessed state on ring reset drm/amdgpu/sdma5.2: re-emit unprocessed state on ring reset drm/amdgpu/sdma5: re-emit unprocessed state on ring reset drm/amdgpu/gfx12: re-emit unprocessed state on ring reset drm/amdgpu/gfx11: re-emit unprocessed state on ring reset drm/amdgpu/gfx10: re-emit unprocessed state on ring reset drm/amdgpu/gfx9.4.3: re-emit unprocessed state on kcq reset drm/amdgpu/gfx9: re-emit unprocessed state on kcq reset drm/amdgpu: Add WARN_ON to the resource clear function drm/amd/pm: Use cached metrics data on SMUv13.0.6 drm/amd/pm: Use cached data for min/max clocks gpu: nova-core: fix bounds check in PmuLookupTableEntry::new drm/amdgpu: Replace HQD terminology with slots naming drm/amdgpu: Add user queue instance count in HW IP info drm/amd/amdgpu: Add helper functions for isp buffers drm/amd/amdgpu: Initialize swnode for ISP MFD device ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c262
1 files changed, 255 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 6b0fbbb91e57..97b562a79ea8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -38,6 +38,13 @@
#include <drm/drm_drv.h>
#include <drm/ttm/ttm_tt.h>
+static const u64 four_gb = 0x100000000ULL;
+
+bool amdgpu_gmc_is_pdb0_enabled(struct amdgpu_device *adev)
+{
+ return adev->gmc.xgmi.connected_to_cpu || amdgpu_virt_xgmi_migrate_enabled(adev);
+}
+
/**
* amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
*
@@ -251,10 +258,20 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
- mc->gart_start = hive_vram_end + 1;
+ /* node_segment_size may not 4GB aligned on SRIOV, align up is needed. */
+ mc->gart_start = ALIGN(hive_vram_end + 1, four_gb);
mc->gart_end = mc->gart_start + mc->gart_size - 1;
- mc->fb_start = hive_vram_start;
- mc->fb_end = hive_vram_end;
+ if (amdgpu_virt_xgmi_migrate_enabled(adev)) {
+ /* set mc->vram_start to 0 to switch the returned GPU address of
+ * amdgpu_bo_create_reserved() from FB aperture to GART aperture.
+ */
+ mc->vram_start = 0;
+ mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+ mc->visible_vram_size = min(mc->visible_vram_size, mc->real_vram_size);
+ } else {
+ mc->fb_start = hive_vram_start;
+ mc->fb_end = hive_vram_end;
+ }
dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
mc->mc_vram_size >> 20, mc->vram_start,
mc->vram_end, mc->real_vram_size >> 20);
@@ -276,7 +293,6 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
enum amdgpu_gart_placement gart_placement)
{
- const uint64_t four_gb = 0x100000000ULL;
u64 size_af, size_bf;
/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
@@ -1041,9 +1057,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
*/
u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
- u64 vram_addr = adev->vm_manager.vram_base_offset -
- adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
- u64 vram_end = vram_addr + vram_size;
+ u64 vram_addr, vram_end;
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
int idx;
@@ -1056,6 +1070,11 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
flags |= AMDGPU_PDE_PTE_FLAG(adev);
+ vram_addr = adev->vm_manager.vram_base_offset;
+ if (!amdgpu_virt_xgmi_migrate_enabled(adev))
+ vram_addr -= adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ vram_end = vram_addr + vram_size;
+
/* The first n PDE0 entries are used as PTE,
* pointing to vram
*/
@@ -1429,3 +1448,232 @@ bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_vf_memory_partition(struct amdgpu_device *adev)
+{
+ switch (adev->gmc.num_mem_partitions) {
+ case 0:
+ return UNKNOWN_MEMORY_PARTITION_MODE;
+ case 1:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ case 2:
+ return AMDGPU_NPS2_PARTITION_MODE;
+ case 4:
+ return AMDGPU_NPS4_PARTITION_MODE;
+ case 8:
+ return AMDGPU_NPS8_PARTITION_MODE;
+ default:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ }
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
+{
+ enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE;
+
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_memory_partition_mode)
+ mode = adev->nbio.funcs->get_memory_partition_mode(adev,
+ supp_modes);
+ else
+ dev_warn(adev->dev, "memory partition mode query is not supported\n");
+
+ return mode;
+}
+
+enum amdgpu_memory_partition
+amdgpu_gmc_query_memory_partition(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev))
+ return amdgpu_gmc_get_vf_memory_partition(adev);
+ else
+ return amdgpu_gmc_get_memory_partition(adev, NULL);
+}
+
+static bool amdgpu_gmc_validate_partition_info(struct amdgpu_device *adev)
+{
+ enum amdgpu_memory_partition mode;
+ u32 supp_modes;
+ bool valid;
+
+ mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware not present in supported modes */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) &&
+ !(BIT(mode - 1) & supp_modes))
+ return false;
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ case AMDGPU_NPS1_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 1);
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 2);
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 3 ||
+ adev->gmc.num_mem_partitions == 4);
+ break;
+ case AMDGPU_NPS8_PARTITION_MODE:
+ valid = (adev->gmc.num_mem_partitions == 8);
+ break;
+ default:
+ valid = false;
+ }
+
+ return valid;
+}
+
+static bool amdgpu_gmc_is_node_present(int *node_ids, int num_ids, int nid)
+{
+ int i;
+
+ /* Check if node with id 'nid' is present in 'node_ids' array */
+ for (i = 0; i < num_ids; ++i)
+ if (node_ids[i] == nid)
+ return true;
+
+ return false;
+}
+
+static void
+amdgpu_gmc_init_acpi_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ struct amdgpu_numa_info numa_info;
+ int node_ids[AMDGPU_MAX_MEM_RANGES];
+ int num_ranges = 0, ret;
+ int num_xcc, xcc_id;
+ uint32_t xcc_mask;
+
+ num_xcc = NUM_XCC(adev->gfx.xcc_mask);
+ xcc_mask = (1U << num_xcc) - 1;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info);
+ if (ret)
+ continue;
+
+ if (numa_info.nid == NUMA_NO_NODE) {
+ mem_ranges[0].size = numa_info.size;
+ mem_ranges[0].numa.node = numa_info.nid;
+ num_ranges = 1;
+ break;
+ }
+
+ if (amdgpu_gmc_is_node_present(node_ids, num_ranges,
+ numa_info.nid))
+ continue;
+
+ node_ids[num_ranges] = numa_info.nid;
+ mem_ranges[num_ranges].numa.node = numa_info.nid;
+ mem_ranges[num_ranges].size = numa_info.size;
+ ++num_ranges;
+ }
+
+ adev->gmc.num_mem_partitions = num_ranges;
+}
+
+void amdgpu_gmc_init_sw_mem_ranges(struct amdgpu_device *adev,
+ struct amdgpu_mem_partition_info *mem_ranges)
+{
+ enum amdgpu_memory_partition mode;
+ u32 start_addr = 0, size;
+ int i, r, l;
+
+ mode = amdgpu_gmc_query_memory_partition(adev);
+
+ switch (mode) {
+ case UNKNOWN_MEMORY_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 0;
+ break;
+ case AMDGPU_NPS1_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ case AMDGPU_NPS2_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 2;
+ break;
+ case AMDGPU_NPS4_PARTITION_MODE:
+ if (adev->flags & AMD_IS_APU)
+ adev->gmc.num_mem_partitions = 3;
+ else
+ adev->gmc.num_mem_partitions = 4;
+ break;
+ case AMDGPU_NPS8_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 8;
+ break;
+ default:
+ adev->gmc.num_mem_partitions = 1;
+ break;
+ }
+
+ /* Use NPS range info, if populated */
+ r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
+ &adev->gmc.num_mem_partitions);
+ if (!r) {
+ l = 0;
+ for (i = 1; i < adev->gmc.num_mem_partitions; ++i) {
+ if (mem_ranges[i].range.lpfn >
+ mem_ranges[i - 1].range.lpfn)
+ l = i;
+ }
+
+ } else {
+ if (!adev->gmc.num_mem_partitions) {
+ dev_warn(adev->dev,
+ "Not able to detect NPS mode, fall back to NPS1\n");
+ adev->gmc.num_mem_partitions = 1;
+ }
+ /* Fallback to sw based calculation */
+ size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
+ size /= adev->gmc.num_mem_partitions;
+
+ for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
+ mem_ranges[i].range.fpfn = start_addr;
+ mem_ranges[i].size =
+ ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
+ mem_ranges[i].range.lpfn = start_addr + size - 1;
+ start_addr += size;
+ }
+
+ l = adev->gmc.num_mem_partitions - 1;
+ }
+
+ /* Adjust the last one */
+ mem_ranges[l].range.lpfn =
+ (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
+ mem_ranges[l].size =
+ adev->gmc.real_vram_size -
+ ((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT);
+}
+
+int amdgpu_gmc_init_mem_ranges(struct amdgpu_device *adev)
+{
+ bool valid;
+
+ adev->gmc.mem_partitions = kcalloc(AMDGPU_MAX_MEM_RANGES,
+ sizeof(struct amdgpu_mem_partition_info),
+ GFP_KERNEL);
+ if (!adev->gmc.mem_partitions)
+ return -ENOMEM;
+
+ if (adev->gmc.is_app_apu)
+ amdgpu_gmc_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions);
+ else
+ amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+
+ if (amdgpu_sriov_vf(adev))
+ valid = true;
+ else
+ valid = amdgpu_gmc_validate_partition_info(adev);
+ if (!valid) {
+ /* TODO: handle invalid case */
+ dev_warn(adev->dev,
+ "Mem ranges not matching with hardware config\n");
+ }
+
+ return 0;
+}