diff options
| author | Tao Zhou <[email protected]> | 2024-10-24 10:51:13 +0000 |
|---|---|---|
| committer | Alex Deucher <[email protected]> | 2024-12-10 15:26:46 +0000 |
| commit | 19d4b27aedc73d2f5785bdef7c30fe49c16606e7 (patch) | |
| tree | 35ae56538d8e81adc0f08d1c32656d631d73785c /drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | |
| parent | drm/amdgpu: store only one RAS bad page record for all pages in one row (diff) | |
| download | kernel-19d4b27aedc73d2f5785bdef7c30fe49c16606e7.tar.gz kernel-19d4b27aedc73d2f5785bdef7c30fe49c16606e7.zip | |
drm/amdgpu: retire RAS bad pages in different NPS modes
There are some changes in format of memory normalized address per
NPS mode, need to adjust bit mapping according to NPS mode.
Signed-off-by: Tao Zhou <[email protected]>
Reviewed-by: Hawking Zhang <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v12_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 64 |
1 files changed, 41 insertions, 23 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index ce60fd6675ce..17ef9a6743f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -179,10 +179,13 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ta_ras_query_address_output *addr_out, bool dump_addr) { - uint32_t col, row, bank, channel_index, umc_inst = 0; - uint64_t soc_pa, retired_page, column, err_addr; + uint32_t col, col_lower, row, row_lower, bank; + uint32_t channel_index, umc_inst = 0; + uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS]; + uint64_t soc_pa, column, err_addr; struct ta_ras_query_address_output addr_out_tmp; struct ta_ras_query_address_output *paddr_out; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; int ret = 0; if (!addr_out) @@ -199,7 +202,7 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", err_addr); - return ret; + goto out; } bank = paddr_out->pa.bank; @@ -208,42 +211,57 @@ static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, umc_inst = addr_in->ma.umc_inst; } - soc_pa = paddr_out->pa.pa; + loop_bits[0] = UMC_V12_0_PA_C2_BIT; + loop_bits[1] = UMC_V12_0_PA_C3_BIT; + loop_bits[2] = UMC_V12_0_PA_C4_BIT; + loop_bits[3] = UMC_V12_0_PA_R13_BIT; - if (!err_data && !dump_addr) - return ret; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + + /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS4_PARTITION_MODE) { + loop_bits[0] = UMC_V12_0_PA_CH4_BIT; + loop_bits[1] = UMC_V12_0_PA_CH5_BIT; + loop_bits[2] = UMC_V12_0_PA_B0_BIT; + loop_bits[3] = UMC_V12_0_PA_R11_BIT; + } - col = (err_addr >> 1) & 0x1fULL; - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - /* clear [R13] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_R13_BIT); + soc_pa = paddr_out->pa.pa; + /* clear loop bits in soc physical address */ + for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) + soc_pa &= ~BIT_ULL(loop_bits[i]); paddr_out->pa.pa = soc_pa; + /* get column bit 0 and 1 in mca address */ + col_lower = (err_addr >> 1) & 0x3ULL; + /* MA_R13_BIT will be handled later */ + row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL; + + if (!err_data && !dump_addr) + goto out; - /* loop for all possibilities of [R13 C4 C3 C2] */ + /* loop for all possibilities of retired bits */ for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - retired_page |= (((column & 0x8) >> 3) << UMC_V12_0_PA_R13_BIT); + soc_pa = paddr_out->pa.pa; + for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) + soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]); - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - row = (retired_page >> UMC_V12_0_PA_R0_BIT) & 0x3fffULL; + col = ((column & 0x7) << 2) | col_lower; + /* add row bit 13 */ + row = ((column >> 3) << 13) | row_lower; if (dump_addr) dev_info(adev->dev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); + soc_pa, row, col, bank, channel_index); if (err_data) amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + soc_pa, channel_index, umc_inst); } +out: return ret; } |
