aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2019-11-13 22:43:06 +0000
committerDave Airlie <[email protected]>2019-11-13 22:43:07 +0000
commit0990ca235d9145ec40ed002f8ff7f6e1a910db4f (patch)
treef3b622905180952f792411641c39fd433d839f9c /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parentMerge v5.4-rc7 into drm-next (diff)
parentdrm/amdgpu/powerplay: fix AVFS handling with custom powerplay table (diff)
downloadkernel-0990ca235d9145ec40ed002f8ff7f6e1a910db4f.tar.gz
kernel-0990ca235d9145ec40ed002f8ff7f6e1a910db4f.zip
Merge tag 'drm-next-5.5-2019-11-08' of git://people.freedesktop.org/~agd5f/linux into drm-next
drm-next-5.5-2019-11-08: amdgpu: - Enable VCN dynamic powergating on RV/RV2 - Fixes for Navi14 - Misc Navi fixes - Fix MSI-X tear down - Misc Arturus fixes - Fix xgmi powerstate handling - Documenation fixes scheduler: - Fix static code checker warning - Fix possible thread reactivation while thread is stopped - Avoid cleanup if thread is parked radeon: - SI dpm fix ported from amdgpu Signed-off-by: Dave Airlie <[email protected]> From: Alex Deucher <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c40
1 files changed, 33 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index dab90c280476..404483437bd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -220,7 +220,7 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* As their names indicate, inject operation will write the
* value to the address.
*
- * Second member: struct ras_debug_if::op.
+ * The second member: struct ras_debug_if::op.
* It has three kinds of operations.
*
* - 0: disable RAS on the block. Take ::head as its data.
@@ -228,14 +228,20 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* - 2: inject errors on the block. Take ::inject as its data.
*
* How to use the interface?
- * programs:
- * copy the struct ras_debug_if in your codes and initialize it.
- * write the struct to the control node.
+ *
+ * Programs
+ *
+ * Copy the struct ras_debug_if in your codes and initialize it.
+ * Write the struct to the control node.
+ *
+ * Shells
*
* .. code-block:: bash
*
* echo op block [error [sub_block address value]] > .../ras/ras_ctrl
*
+ * Parameters:
+ *
* op: disable, enable, inject
* disable: only block is needed
* enable: block and error are needed
@@ -265,8 +271,10 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
* .. note::
- * Operation is only allowed on blocks which are supported.
+ * Operations are only allowed on blocks which are supported.
* Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ * to see which blocks support RAS on a particular asic.
+ *
*/
static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
@@ -322,7 +330,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
* Some boards contain an EEPROM which is used to persistently store a list of
- * bad pages containing ECC errors detected in vram. This interface provides
+ * bad pages which experiences ECC errors in vram. This interface provides
* a way to reset the EEPROM, e.g., after testing error injection.
*
* Usage:
@@ -362,7 +370,7 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
/**
* DOC: AMDGPU RAS sysfs Error Count Interface
*
- * It allows user to read the error count for each IP block on the gpu through
+ * It allows the user to read the error count for each IP block on the gpu through
* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
*
* It outputs the multiple lines which report the uncorrected (ue) and corrected
@@ -1027,6 +1035,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
}
/* sysfs end */
+/**
+ * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
+ *
+ * Normally when there is an uncorrectable error, the driver will reset
+ * the GPU to recover. However, in the event of an unrecoverable error,
+ * the driver provides an interface to reboot the system automatically
+ * in that event.
+ *
+ * The following file in debugfs provides that interface:
+ * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo true > .../ras/auto_reboot
+ *
+ */
/* debugfs begin */
static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{