aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
diff options
context:
space:
mode:
authorTao Zhou <[email protected]>2024-01-18 06:25:07 +0000
committerAlex Deucher <[email protected]>2024-01-22 22:13:26 +0000
commit1757bb7dab6de79c92b1d54b999a2ee1066acc1f (patch)
tree1ab9b1111292af1132552b208f76069c44bde82c /drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
parentRevert "drm/amd/pm: smu v13_0_6 supports ecc info by default" (diff)
downloadkernel-1757bb7dab6de79c92b1d54b999a2ee1066acc1f.tar.gz
kernel-1757bb7dab6de79c92b1d54b999a2ee1066acc1f.zip
drm/amdgpu: update check condition of query for ras page retire
Support page retirement handling in debug mode. v2: revert smu_v13_0_6_get_ecc_info directly. Signed-off-by: Tao Zhou <[email protected]> Reviewed-by: Hawking Zhang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 83983f0e8eb5..a6cdb69897f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -91,11 +91,16 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ unsigned int error_query_mode;
int ret = 0;
unsigned long err_count;
+
+ amdgpu_ras_get_error_query_mode(adev, &error_query_mode);
+
mutex_lock(&con->page_retirement_lock);
ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
- if (ret == -EOPNOTSUPP) {
+ if (ret == -EOPNOTSUPP &&
+ error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
@@ -119,7 +124,8 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
*/
adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
}
- } else if (!ret) {
+ } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY ||
+ (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) {
if (adev->umc.ras &&
adev->umc.ras->ecc_info_query_ras_error_count)
adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);