aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
diff options
context:
space:
mode:
authorshaoyunl <[email protected]>2021-02-16 17:50:42 +0000
committerAlex Deucher <[email protected]>2021-03-24 03:10:38 +0000
commite3c1b0712fdb039d9139ac0910be9a658c9cb65e (patch)
treec36c46491fe9ace02e73059e3dbbab0fb1059fa5 /drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
parentdrm/amdgpu: Add reset_list for device list used for reset (diff)
downloadkernel-e3c1b0712fdb039d9139ac0910be9a658c9cb65e.tar.gz
kernel-e3c1b0712fdb039d9139ac0910be9a658c9cb65e.zip
drm/amdgpu: Reset the devices in the XGMI hive duirng probe
In passthrough configuration, hypervisior will trigger the SBR(Secondary bus reset) to the devices without sync to each other. This could cause device hang since for XGMI configuration, all the devices within the hive need to be reset at a limit time slot. This serial of patches try to solve this issue by co-operate with new SMU which will only do minimum house keeping to response the SBR request but don't do the real reset job and leave it to driver. Driver need to do the whole sw init and minimum HW init to bring up the SMU and trigger the reset(possibly BACO) on all the ASICs at the same time Signed-off-by: shaoyunl <[email protected]> Acked-by: Andrey Grodzovsky [email protected] Signed-off-by: Alex Deucher <[email protected]>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 4d3a24fdeb9c..33f748e5bbfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -499,7 +499,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
if (!adev->gmc.xgmi.supported)
return 0;
- if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ if (!adev->gmc.xgmi.pending_reset &&
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
ret = psp_xgmi_initialize(&adev->psp);
if (ret) {
dev_err(adev->dev,
@@ -545,7 +546,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
task_barrier_add_task(&hive->tb);
- if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ if (!adev->gmc.xgmi.pending_reset &&
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
/* update node list for other device in the hive */
if (tmp_adev != adev) {
@@ -574,7 +576,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
}
}
- if (!ret)
+ if (!ret && !adev->gmc.xgmi.pending_reset)
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
exit_unlock: