aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2025-03-29 00:44:52 +0000
committerLinus Torvalds <[email protected]>2025-03-29 00:44:52 +0000
commit0c86b42439b6c11d758b3392a21117934fef00c1 (patch)
tree6aa7071476067661867af33767cc0e1863397a04 /drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
parentMerge tag 'fbdev-for-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff)
parentMerge tag 'drm-intel-gt-next-2025-03-12' of https://gitlab.freedesktop.org/dr... (diff)
downloadkernel-0c86b42439b6c11d758b3392a21117934fef00c1.tar.gz
kernel-0c86b42439b6c11d758b3392a21117934fef00c1.zip
Merge tag 'drm-next-2025-03-28' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "Outside of drm there are some rust patches from Danilo who maintains that area in here, and some pieces for drm header check tests. The major things in here are a new driver supporting the touchbar displays on M1/M2, the nova-core stub driver which is just the vehicle for adding rust abstractions and start developing a real driver inside of. xe adds support for SVM with a non-driver specific SVM core abstraction that will hopefully be useful for other drivers, along with support for shrinking for TTM devices. I'm sure xe and AMD support new devices, but the pipeline depth on these things is hard to know what they end up being in the marketplace! uapi: - add mediatek tiled fourcc - add support for notifying userspace on device wedged new driver: - appletbdrm: support for Apple Touchbar displays on m1/m2 - nova-core: skeleton rust driver to develop nova inside off firmware: - add some rust firmware pieces rust: - add 'LocalModule' type alias component: - add helper to query bound status fbdev: - fbtft: remove access to page->index media: - cec: tda998x: import driver from drm dma-buf: - add fast path for single fence merging tests: - fix lockdep warnings atomic: - allow full modeset on connector changes - clarify semantics of allow_modeset and drm_atomic_helper_check - async-flip: support on arbitary planes - writeback: fix UAF - Document atomic-state history format-helper: - support ARGB8888 to ARGB4444 conversions buddy: - fix multi-root cleanup ci: - update IGT dp: - support extended wake timeout - mst: fix RAD to string conversion - increase DPCD eDP control CAP size to 5 bytes - add DPCD eDP v1.5 definition - add helpers for LTTPR transparent mode panic: - encode QR code according to Fido 2.2 scheduler: - add parameter struct for init - improve job peek/pop operations - optimise drm_sched_job struct layout ttm: - refactor pool allocation - add helpers for TTM shrinker panel-orientation: - add a bunch of new quirks panel: - convert panels to multi-style functions - edp: Add support for B140UAN04.4, BOE NV140FHM-NZ, CSW MNB601LS1-3, LG LP079QX1-SP0V, MNE007QS3-7, STA 116QHD024002, Starry 116KHD024006, Lenovo T14s Gen6 Snapdragon - himax-hx83102: Add support for CSOT PNA957QT1-1, Kingdisplay kd110n11-51ie, Starry 2082109qfh040022-50e - visionox-r66451: use multi-style MIPI-DSI functions - raydium-rm67200: Add driver for Raydium RM67200 - simple: Add support for BOE AV123Z7M-N17, BOE AV123Z7M-N17 - sony-td4353-jdi: Use MIPI-DSI multi-func interface - summit: Add driver for Apple Summit display panel - visionox-rm692e5: Add driver for Visionox RM692E5 bridge: - pass full atomic state to various callbacks - adv7511: Report correct capabilities - it6505: Fix HDCP V compare - snd65dsi86: fix device IDs - nwl-dsi: set bridge type - ti-sn65si83: add error recovery and set bridge type - synopsys: add HDMI audio support xe: - support device-wedged event - add mmap support for PCI memory barrier - perf pmu integration and expose per-engien activity - add EU stall sampling support - GPU SVM and Xe SVM implementation - use TTM shrinker - add survivability mode to allow the driver to do firmware updates in critical failure states - PXP HWDRM support for MTL and LNL - expose package/vram temps over hwmon - enable DP tunneling - drop mmio_ext abstraction - Reject BO evcition if BO is bound to current VM - Xe suballocator improvements - re-use display vmas when possible - add GuC Buffer Cache abstraction - PCI ID update for Panther Lake and Battlemage - Enable SRIOV for Panther Lake - Refactor VRAM manager location i915: - enable extends wake timeout - support device-wedged event - Enable DP 128b/132b SST DSC - FBC dirty rectangle support for display version 30+ - convert i915/xe to drm client setup - Compute HDMI PLLS for rates not in fixed tables - Allow DSB usage when PSR is enabled on LNL+ - Enable panel replay without full modeset - Enable async flips with compressed buffers on ICL+ - support luminance based brightness via DPCD for eDP - enable VRR enable/disable without full modeset - allow GuC SLPC default strategies on MTL+ for performance - lots of display refactoring in move to struct intel_display amdgpu: - add device wedged event - support async page flips on overlay planes - enable broadcast RGB drm property - add info ioctl for virt mode - OEM i2c support for RGB lights - GC 11.5.2 + 11.5.3 support - SDMA 6.1.3 support - NBIO 7.9.1 + 7.11.2 support - MMHUB 1.8.1 + 3.3.2 support - DCN 3.6.0 support - Add dynamic workload profile switching for GC 10-12 - support larger VBIOS sizes - Mark gttsize parameters as deprecated - Initial JPEG queue resset support amdkfd: - add KFD per process flags for setting precision - sync pasid values between KGD and KFD - improve GTT/VRAM handling for APUs - fix user queue validation on GC7/8 - SDMA queue reset support raedeon: - rs400 hyperz fix i2c: - td998x: drop platform_data, split driver into media and bridge ast: - transmitter chip detection refactoring - vbios display mode refactoring - astdp: fix connection status and filter unsupported modes - cursor handling refactoring imagination: - check job dependencies with sched helper ivpu: - improve command queue handling - use workqueue for IRQ handling - add support HW fault injection - locking fixes mgag200: - add support for G200eH5 msm: - dpu: add concurrent writeback support for DPU 10.x+ - use LTTPR helpers - GPU: - Fix obscure GMU suspend failure - Expose syncobj timeline support - Extend GPU devcoredump with pagetable info - a623 support - Fix a6xx gen1/gen2 indexed-register blocks in gpu snapshot / devcoredump - Display: - Add cpu-cfg interconnect paths on SM8560 and SM8650 - Introduce KMS OMMU fault handler, causing devcoredump snapshot - Fixed error pointer dereference in msm_kms_init_aspace() - DPU: - Fix mode_changing handling - Add writeback support on SM6150 (QCS615) - Fix DSC programming in 1:1:1 topology - Reworked hardware resource allocation, moving it to the CRTC code - Enabled support for Concurrent WriteBack (CWB) on SM8650 - Enabled CDM blocks on all relevant platforms - Reworked debugfs interface for BW/clocks debugging - Clear perf params before calculating bw - Support YUV formats on writeback - Fixed double inclusion - Fixed writeback in YUV formats when using cloned output, Dropped wb2_formats_rgb - Corrected dpu_crtc_check_mode_changed and struct dpu_encoder_virt kerneldocs - Fixed uninitialized variable in dpu_crtc_kickoff_clone_mode() - DSI: - DSC-related fixes - Rework clock programming - DSI PHY: - Fix 7nm (and lower) PHY programming - Add proper DT schema definitions for DSI PHY clocks - HDMI: - Rework the driver, enabling the use of the HDMI Connector framework - Bindings: - Added eDP PHY on SA8775P nouveau: - move drm_slave_encoder interface into driver - nvkm: refactor GSP RPC - use LTTPR helpers mediatek: - HDMI fixup and refinement - add MT8188 dsc compatible - MT8365 SoC support panthor: - Expose sizes of intenral BOs via fdinfo - Fix race between reset and suspend - Improve locking qaic: - Add support for AIC200 renesas: - Fix limits in DT bindings rockchip: - support rk3562-mali - rk3576: Add HDMI support - vop2: Add new display modes on RK3588 HDMI0 up to 4K - Don't change HDMI reference clock rate - Fix DT bindings - analogix_dp: add eDP support - fix shutodnw solomon: - Set SPI device table to silence warnings - Fix pixel and scanline encoding v3d: - handle clock vc4: - Use drm_exec - Use dma-resv for wait-BO ioctl - Remove seqno infrastructure virtgpu: - Support partial mappings of GEM objects - Reserve VGA resources during initialization - Fix UAF in virtgpu_dma_buf_free_obj() - Add panic support vkms: - Switch to a managed modesetting pipeline - Add support for ARGB8888 - fix UAf xlnx: - Set correct DMA segment size - use mutex guards - Fix error handling - Fix docs" * tag 'drm-next-2025-03-28' of https://gitlab.freedesktop.org/drm/kernel: (1762 commits) drm/amd/pm: Update feature list for smu_v13_0_6 drm/amdgpu: Add parameter documentation for amdgpu_sync_fence drm/amdgpu/discovery: optionally use fw based ip discovery drm/amdgpu/discovery: use specific ip_discovery.bin for legacy asics drm/amdgpu/discovery: check ip_discovery fw file available drm/amd/pm: Remove unnecessay UQ10 to UINT conversion drm/amd/pm: Remove unnecessay UQ10 to UINT conversion drm/amdgpu/sdma_v4_4_2: update VM flush implementation for SDMA drm/amdgpu: Optimize VM invalidation engine allocation and synchronize GPU TLB flush drm/amd/amdgpu: Increase max rings to enable SDMA page ring drm/amdgpu: Decode deferred error type in gfx aca bank parser drm/amdgpu/gfx11: Add Cleaner Shader Support for GFX11.5 GPUs drm/amdgpu/mes: clean up SDMA HQD loop drm/amdgpu/mes: enable compute pipes across all MEC drm/amdgpu/mes: drop MES 10.x leftovers drm/amdgpu/mes: optimize compute loop handling drm/amdgpu/sdma: guilty tracking is per instance drm/amdgpu/sdma: fix engine reset handling drm/amdgpu: remove invalid usage of sched.ready drm/amdgpu: add cleaner shader trace point ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c486
1 files changed, 270 insertions, 216 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 83faf6e6788a..1991dd3d1056 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -93,47 +93,53 @@ MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
-int amdgpu_vcn_early_init(struct amdgpu_device *adev)
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i)
{
char ucode_prefix[25];
- int r, i;
+ int r;
+ adev->vcn.inst[i].adev = adev;
+ adev->vcn.inst[i].inst = i;
amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6))
- r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
- AMDGPU_UCODE_REQUIRED,
- "amdgpu/%s_%d.bin", ucode_prefix, i);
- else
- r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+
+ if (i != 0 && adev->vcn.per_inst_fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_%d.bin", ucode_prefix, i);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ if (!adev->vcn.inst[0].fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw,
AMDGPU_UCODE_REQUIRED,
"amdgpu/%s.bin", ucode_prefix);
- if (r) {
- amdgpu_ucode_release(&adev->vcn.inst[i].fw);
- return r;
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ } else {
+ r = 0;
}
+ adev->vcn.inst[i].fw = adev->vcn.inst[0].fw;
}
+
return r;
}
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
{
unsigned long bo_size;
const struct common_firmware_header *hdr;
unsigned char fw_check;
unsigned int fw_shared_size, log_offset;
- int i, r;
-
- INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
- mutex_init(&adev->vcn.vcn_pg_lock);
- mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
- atomic_set(&adev->vcn.total_submission_cnt, 0);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++)
- atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
+ int r;
+ mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
+ mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
+ atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
+ INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
+ atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
+ adev->vcn.inst[i].indirect_sram = true;
/*
* Some Steam Deck's BIOS versions are incompatible with the
@@ -146,18 +152,19 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
- !strncmp("F7A0114", bios_ver, 7))) {
- adev->vcn.indirect_sram = false;
+ !strncmp("F7A0114", bios_ver, 7))) {
+ adev->vcn.inst[i].indirect_sram = false;
dev_info(adev->dev,
- "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+ "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
}
}
/* from vcn4 and above, only unified queue is used */
- adev->vcn.using_unified_queue =
+ adev->vcn.inst[i].using_unified_queue =
amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0);
- hdr = (const struct common_firmware_header *)adev->vcn.inst[0].fw->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version);
adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
/* Bit 20-23, it is encode major and non-zero for new naming convention.
@@ -175,16 +182,17 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
enc_major = fw_check;
dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
- DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
- enc_major, enc_minor, dec_ver, vep, fw_rev);
+ dev_info(adev->dev,
+ "Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
+ enc_major, enc_minor, dec_ver, vep, fw_rev);
} else {
unsigned int version_major, version_minor, family_id;
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
- version_major, version_minor, family_id);
+ dev_info(adev->dev, "Found VCN firmware Version: %u.%u Family ID: %u\n",
+ version_major, version_minor, family_id);
}
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
@@ -207,80 +215,77 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
if (amdgpu_vcnfw_log)
bo_size += AMDGPU_VCNFW_LOG_SIZE;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ &adev->vcn.inst[i].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ return r;
+ }
- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
+ bo_size - fw_shared_size;
+ adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
+ bo_size - fw_shared_size;
+
+ adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
+
+ if (amdgpu_vcnfw_log) {
+ adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.log_offset = log_offset;
+ }
+
+ if (adev->vcn.inst[i].indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
- &adev->vcn.inst[i].vcpu_bo,
- &adev->vcn.inst[i].gpu_addr,
- &adev->vcn.inst[i].cpu_addr);
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ &adev->vcn.inst[i].dpg_sram_cpu_addr);
if (r) {
- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
return r;
}
-
- adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
- bo_size - fw_shared_size;
- adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
- bo_size - fw_shared_size;
-
- adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
-
- if (amdgpu_vcnfw_log) {
- adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
- adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
- adev->vcn.inst[i].fw_shared.log_offset = log_offset;
- }
-
- if (adev->vcn.indirect_sram) {
- r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->vcn.inst[i].dpg_sram_bo,
- &adev->vcn.inst[i].dpg_sram_gpu_addr,
- &adev->vcn.inst[i].dpg_sram_cpu_addr);
- if (r) {
- dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
- return r;
- }
- }
}
return 0;
}
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
{
- int i, j;
+ int j;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- amdgpu_bo_free_kernel(
- &adev->vcn.inst[j].dpg_sram_bo,
- &adev->vcn.inst[j].dpg_sram_gpu_addr,
- (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+ amdgpu_bo_free_kernel(
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr);
- kvfree(adev->vcn.inst[j].saved_bo);
+ kvfree(adev->vcn.inst[i].saved_bo);
- amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
- &adev->vcn.inst[j].gpu_addr,
- (void **)&adev->vcn.inst[j].cpu_addr);
+ amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ (void **)&adev->vcn.inst[i].cpu_addr);
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]);
- amdgpu_ucode_release(&adev->vcn.inst[j].fw);
+ if (adev->vcn.per_inst_fw) {
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ adev->vcn.inst[i].fw = NULL;
}
-
- mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
- mutex_destroy(&adev->vcn.vcn_pg_lock);
+ mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock);
+ mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
return 0;
}
@@ -300,179 +305,208 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t
return ret;
}
-int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
+static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i)
{
unsigned int size;
void *ptr;
- int i, idx;
+ int idx;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return 0;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return 0;
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
- if (!adev->vcn.inst[i].saved_bo)
- return -ENOMEM;
+ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->vcn.inst[i].saved_bo)
+ return -ENOMEM;
- if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
- drm_dev_exit(idx);
- }
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+ drm_dev_exit(idx);
+ }
+
+ return 0;
+}
+
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
+{
+ int ret, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+ if (ret)
+ return ret;
}
return 0;
}
-int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
{
bool in_ras_intr = amdgpu_ras_intr_triggered();
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
/* err_event_athub will corrupt VCPU buffer, so we need to
* restore fw data and clear buffer in amdgpu_vcn_resume() */
if (in_ras_intr)
return 0;
- return amdgpu_vcn_save_vcpu_bo(adev);
+ return amdgpu_vcn_save_vcpu_bo_inst(adev, i);
}
-int amdgpu_vcn_resume(struct amdgpu_device *adev)
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i)
{
unsigned int size;
void *ptr;
- int i, idx;
+ int idx;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return -EINVAL;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- if (adev->vcn.inst[i].saved_bo != NULL) {
+ if (adev->vcn.inst[i].saved_bo != NULL) {
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ drm_dev_exit(idx);
+ }
+ kvfree(adev->vcn.inst[i].saved_bo);
+ adev->vcn.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned int offset;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ memcpy_toio(adev->vcn.inst[i].cpu_addr,
+ adev->vcn.inst[i].fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
drm_dev_exit(idx);
}
- kvfree(adev->vcn.inst[i].saved_bo);
- adev->vcn.inst[i].saved_bo = NULL;
- } else {
- const struct common_firmware_header *hdr;
- unsigned int offset;
-
- hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(adev->vcn.inst[i].cpu_addr,
- adev->vcn.inst[i].fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- drm_dev_exit(idx);
- }
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
- }
- memset_io(ptr, 0, size);
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
}
+ memset_io(ptr, 0, size);
}
+
return 0;
}
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
- unsigned int i, j;
+ unsigned int i = vcn_inst->inst, j;
int r = 0;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
-
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- !adev->vcn.using_unified_queue) {
- struct dpg_pause_state new_state;
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
- if (fence[j] ||
- unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !adev->vcn.inst[i].using_unified_queue) {
+ struct dpg_pause_state new_state;
- adev->vcn.pause_dpg_mode(adev, j, &new_state);
- }
+ if (fence[i] ||
+ unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
- fences += fence[j];
+ adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state);
}
- if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_GATE);
- r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- false);
- if (r)
- dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
+ fences += fence[i];
+
+ if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ if (adev->vcn.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ false);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ adev->vcn.workload_profile_active = false;
+ }
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
}
}
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me];
int r = 0;
- atomic_inc(&adev->vcn.total_submission_cnt);
+ atomic_inc(&vcn_inst->total_submission_cnt);
+
+ cancel_delayed_work_sync(&vcn_inst->idle_work);
- if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ if (adev->vcn.workload_profile_active)
+ goto pg_lock;
+
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ if (!adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- true);
+ true);
if (r)
dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
+ adev->vcn.workload_profile_active = true;
}
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
- mutex_lock(&adev->vcn.vcn_pg_lock);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
+pg_lock:
+ mutex_lock(&vcn_inst->vcn_pg_lock);
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- !adev->vcn.using_unified_queue) {
+ !vcn_inst->using_unified_queue) {
struct dpg_pause_state new_state;
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
- atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+ atomic_inc(&vcn_inst->dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
} else {
unsigned int fences = 0;
unsigned int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+ for (i = 0; i < vcn_inst->num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]);
- if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+ if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
}
- adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
+ vcn_inst->pause_dpg_mode(vcn_inst, &new_state);
}
- mutex_unlock(&adev->vcn.vcn_pg_lock);
+ mutex_unlock(&vcn_inst->vcn_pg_lock);
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
@@ -482,12 +516,13 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
- !adev->vcn.using_unified_queue)
+ !adev->vcn.inst[ring->me].using_unified_queue)
atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
- atomic_dec(&ring->adev->vcn.total_submission_cnt);
+ atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt);
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work,
+ VCN_IDLE_TIMEOUT);
}
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
@@ -505,7 +540,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -570,14 +605,14 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
goto err;
ib = &job->ibs[0];
- ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
+ ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0);
ib->ptr[1] = addr;
- ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
+ ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0);
ib->ptr[3] = addr >> 32;
- ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
+ ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0);
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
- ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
+ ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0);
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
@@ -740,7 +775,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
uint32_t ib_pack_in_dw;
int i, r;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
@@ -753,7 +788,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
ib->length_dw = 0;
/* single queue headers */
- if (adev->vcn.using_unified_queue) {
+ if (adev->vcn.inst[ring->me].using_unified_queue) {
ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+ 4 + 2; /* engine info + decoding ib in dw */
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
@@ -772,7 +807,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
r = amdgpu_job_submit_direct(job, ring, &f);
@@ -870,7 +905,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
uint64_t addr;
int i, r;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
@@ -884,7 +919,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
ib->length_dw = 0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
ib->ptr[ib->length_dw++] = 0x00000018;
@@ -906,7 +941,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
r = amdgpu_job_submit_direct(job, ring, &f);
@@ -937,7 +972,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
uint64_t addr;
int i, r;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
@@ -951,7 +986,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
ib->length_dw = 0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
ib->ptr[ib->length_dw++] = 0x00000018;
@@ -973,7 +1008,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
r = amdgpu_job_submit_direct(job, ring, &f);
@@ -1058,36 +1093,32 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
}
}
-void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i)
{
- int i;
unsigned int idx;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
- /* currently only support 2 FW instances */
- if (i >= 2) {
- dev_info(adev->dev, "More then 2 VCN FW instances!\n");
- break;
- }
- idx = AMDGPU_UCODE_ID_VCN + i;
- adev->firmware.ucode[idx].ucode_id = idx;
- adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1))
+ && (i > 0))
+ return;
- if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
- IP_VERSION(4, 0, 3) ||
- amdgpu_ip_version(adev, UVD_HWIP, 0) ==
- IP_VERSION(5, 0, 1))
- break;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ /* currently only support 2 FW instances */
+ if (i >= 2) {
+ dev_info(adev->dev, "More then 2 VCN FW instances!\n");
+ return;
}
+ idx = AMDGPU_UCODE_ID_VCN + i;
+ adev->firmware.ucode[idx].ucode_id = idx;
+ adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
}
}
@@ -1390,10 +1421,33 @@ void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev)
struct dentry *root = minor->debugfs_root;
char name[32];
- if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.using_unified_queue)
+ if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue)
return;
sprintf(name, "amdgpu_vcn_sched_mask");
debugfs_create_file(name, 0600, root, adev,
&amdgpu_debugfs_vcn_sched_mask_fops);
#endif
}
+
+/**
+ * vcn_set_powergating_state - set VCN block powergating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ ret |= vinst->set_pg_state(vinst, state);
+ }
+
+ return ret;
+}