diff options
| author | Lijo Lazar <[email protected]> | 2024-02-22 08:46:57 +0000 |
|---|---|---|
| committer | Alex Deucher <[email protected]> | 2024-02-26 16:14:24 +0000 |
| commit | 1b6ef74b2b03b54776778476f8adf87dd4f8beb1 (patch) | |
| tree | 94795ef19a86666ce2be38638096f85f27aecb2c /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
| parent | drm/amd/display: Prevent potential buffer overflow in map_hw_resources (diff) | |
| download | kernel-1b6ef74b2b03b54776778476f8adf87dd4f8beb1.tar.gz kernel-1b6ef74b2b03b54776778476f8adf87dd4f8beb1.zip | |
drm/amdgpu: Add fatal error detected flag
For a RAS error that needs a full reset to recover, set the fatal error
status. Clear the status once the device is reset.
Signed-off-by: Lijo Lazar <[email protected]>
Reviewed-by: Asad Kamal <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 46f3d1013e8c..2c94de305c69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2439,6 +2439,18 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + /* For any RAS error that needs a full reset to + * recover, set the fatal error status + */ + if (hive) { + list_for_each_entry(remote_adev, + &hive->device_list, + gmc.xgmi.head) + amdgpu_ras_set_fed(remote_adev, + true); + } else { + amdgpu_ras_set_fed(adev, true); + } psp_fatal_error_recovery_quirk(&adev->psp); } } @@ -3440,6 +3452,26 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (!ras) + return false; + + return atomic_read(&ras->fed); +} + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + atomic_set(&ras->fed, !!status); +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { |
