aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
diff options
context:
space:
mode:
authorTao Zhou <[email protected]>2025-03-06 03:36:49 +0000
committerAlex Deucher <[email protected]>2025-03-07 17:53:39 +0000
commit334dc5fcc3f177823115ec4e075259997c16d4a7 (patch)
tree62801c1ae306fcebe9725ccb8ecc7d92eaa063a1 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
parentdrm/amdgpu: Fix missing drain retry fault the last entry (diff)
downloadkernel-334dc5fcc3f177823115ec4e075259997c16d4a7.tar.gz
kernel-334dc5fcc3f177823115ec4e075259997c16d4a7.zip
drm/amdgpu: increase RAS bad page threshold
For default policy, driver will issue an RMA event when the number of bad pages is greater than 8 physical rows, rather than reaches 8 physical rows, don't rely on threshold configurable parameters in default mode. Signed-off-by: Tao Zhou <[email protected]> Reviewed-by: Hawking Zhang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index ab27cecb5519..09a6f8bc1a5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -747,7 +747,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
/* Modify the header if it exceeds.
*/
if (amdgpu_bad_page_threshold != 0 &&
- control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) {
+ control->ras_num_bad_pages > ras->bad_page_cnt_threshold) {
dev_warn(adev->dev,
"Saved bad pages %d reaches threshold value %d\n",
control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
@@ -806,7 +806,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
*/
if (amdgpu_bad_page_threshold != 0 &&
control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
- control->ras_num_bad_pages < ras->bad_page_cnt_threshold)
+ control->ras_num_bad_pages <= ras->bad_page_cnt_threshold)
control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
control->ras_num_bad_pages) * 100) /
ras->bad_page_cnt_threshold;
@@ -1456,7 +1456,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
res);
return -EINVAL;
}
- if (ras->bad_page_cnt_threshold > control->ras_num_bad_pages) {
+ if (ras->bad_page_cnt_threshold >= control->ras_num_bad_pages) {
/* This means that, the threshold was increased since
* the last time the system was booted, and now,
* ras->bad_page_cnt_threshold - control->num_recs > 0,