aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2024-05-15 16:43:42 +0000
committerLinus Torvalds <[email protected]>2024-05-15 16:43:42 +0000
commitdb5d28c0bfe566908719bec8e25443aabecbb802 (patch)
treec113e307ba7a5964ff174f590cd58bce07e2e4ee /drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
parentMerge tag 'asymmetric-keys-next-6.10-rc1' of git://git.kernel.org/pub/scm/lin... (diff)
parentMerge tag 'drm-xe-next-fixes-2024-05-09-1' of https://gitlab.freedesktop.org/... (diff)
downloadkernel-db5d28c0bfe566908719bec8e25443aabecbb802.tar.gz
kernel-db5d28c0bfe566908719bec8e25443aabecbb802.zip
Merge tag 'drm-next-2024-05-15' of https://gitlab.freedesktop.org/drm/kernel
Pull drm updates from Dave Airlie: "This is the main pull request for the drm subsystems for 6.10. In drivers the main thing is a new driver for ARM Mali firmware based GPUs, otherwise there are a lot of changes to amdgpu/xe/i915/msm and scattered changes to everything else. In the core a bunch of headers and Kconfig was refactored, along with the addition of a new panic handler which is meant to provide a user friendly message when a panic happens and graphical display is enabled. New drivers: - panthor: ARM Mali/Immortalis CSF-based GPU driver Core: - add a CONFIG_DRM_WERROR option - make more headers self-contained - grab resv lock in pin/unpin - fix vmap resv locking - EDID/eDP panel matching - Kconfig cleanups - DT sound bindings - Add SIZE_HINTS property for cursor planes - Add struct drm_edid_product_id and helpers. - Use drm device based logging in more drm functions. - drop seq_file.h from a bunch of places - use drm_edid driver conversions dp: - DP Tunnel documentation - MST read sideband cap - Adaptive sync SDP prep work ttm: - improve placement for TTM BOs in idle/busy handling panic: - Fixes for drm-panic, and option to test it. - Add drm panic to simpledrm, mgag200, imx, ast bridge: - improve init ordering - adv7511: allow GPIO pin sharing - tc358775: add tc358675 support panel: - AUO B120XAN01.0 - Samsung s6e3fa7 - BOE NT116WHM-N44 - CMN N116BCA-EA1, - CrystalClear CMT430B19N00 - Startek KD050HDFIA020-C020A - powertip PH128800T006-ZHC01 - Innolux G121X1-L03 - LG sw43408 - Khadas TS050 V2 - EDO RM69380 OLED - CSOT MNB601LS1-1 amdgpu: - HDCP/ODM/RAS fixes - Devcoredump improvements - Expose VCN activity via sysfs - SMY 13.0.x updates - Enable fast updates on DCN 3.1.4 - Add dclk and vclk reporting on additional devices - Add ACA RAS infrastructure - Implement TLB flush fence - EEPROM handling fixes - SMUIO 14.0.2 support - SMU 14.0.1 Updates - SMU 14.0.2 support - Sync page table freeing with TLB flushes - DML2 refactor - DC debug improvements - DCN 3.5.x Updates - GPU reset fixes - HDP fix for second GFX pipe on GC 10.x - Enable secondary GFX pipe on GC 10.3 - Refactor and clean up BACO/BOCO/BAMACO handling - Remove invalid TTM resource start check - UAF fix in VA IOCTL - GPUVM page fault redirection to secondary IH rings for IH 6.x - Initial support for mapping kernel queues via MES - Fix VRAM memory accounting amdkfd: - MQD handling cleanup - Preemption handling fixes for XCDs - TLB flush fix for GC 9.4.2 - Properly clean up workqueue during module unload - Fix memory leak process create failure - Range check CP bad op exception targets to avoid reporting invalid exceptions to userspace - Fix eviction fence handling - Fix leak in GPU memory allocation failure case - DMABuf import handling fix - Enable SQ watchpoint for gfx10 i915: - Adding new DG2 PCI ID - add context hints for GT frequency - enable only one CCS for compute workloads - new workarounds - Fix UAF on destroy against retire race and remove two earlier partial fixes - Limit the reserved VM space to only the platforms that need it - Fix gt reset with GuC submission is disable - Add and use gt_to_guc() wrapper i915/xe display: - Lunar Lake display enabling, including cdclk and other refactors - BIOS/VBT/opregion related refactor - Digital port related refactor/clean-up - Fix 2s boot time regression on DP panel replay init - Remove duplication on audio enable/disable on SDVO and g4x+ DP - Disable AuxCCS framebuffers if built for Xe - Make crtc disable more atomic - Increase DP idle pattern wait timeout to 2ms - Start using container_of_const() for some extra const safety - Fix Jasper Lake boot freeze - Enable MST mode for 128b/132b single-stream sideband - Enable Adaptive Sync SDP Support for DP - Fix MTL supported DP rates - removal of UHBR13.5 - PLL refactoring - Limit eDP MSO pipe only for display version 20 - More display refactor towards independence from i915 dev_priv - Convert i915/xe fbdev to DRM client - More initial work to make display code more independent from i915 xe: - improved error capture - clean up some uAPI leftovers - devcoredump update - Add BMG mocs table - Handle GSCCS ER interrupt - Implement xe2- and GuC workarounds - struct xe_device cleanup - Hwmon updates - Add LRC parsing for more GPU instruction - Increase VM_BIND number of per-ioctl Ops - drm/xe: Add XE_BO_GGTT_INVALIDATE flag - Initial development for SR-IOV support - Add new PCI IDs to DG2 platform - Move userptr over to start using hmm_range_fault msm: - Switched to generating register header files during build process instead of shipping pre-generated headers - Merged DPU and MDP4 format databases. - DP: - Stop using compat string to distinguish DP and eDP cases - Added support for X Elite platform (X1E80100) - Reworked DP aux/audio support - Added SM6350 DP to the bindings - GPU: - a7xx perfcntr reg fixes - MAINTAINERS updates - a750 devcoredump support radeon: - Silence UBSAN warnings related to flexible arrays nouveau: - move some uAPI objects to uapi headers omapdrm: - console fix ast: - add i2c polling qaic: - add debugfs entries exynos: - fix platform_driver .owner - drop cleanup code mediatek: - Use devm_platform_get_and_ioremap_resource() in mtk_hdmi_ddc_probe() - Add GAMMA 12-bit LUT support for MT8188 - Rename mtk_drm_* to mtk_* - Drop driver owner initialization - Correct calculation formula of PHY Timing" * tag 'drm-next-2024-05-15' of https://gitlab.freedesktop.org/drm/kernel: (1477 commits) drm/xe/ads: Use flexible-array drm/xe: Use ordered WQ for G2H handler drm/msm/gen_header: allow skipping the validation drm/msm/a6xx: Cleanup indexed regs const'ness drm/msm: Add devcoredump support for a750 drm/msm: Adjust a7xx GBIF debugbus dumping drm/msm: Update a6xx registers XML drm/msm: Fix imported a750 snapshot header for upstream drm/msm: Import a750 snapshot registers from kgsl MAINTAINERS: Add Konrad Dybcio as a reviewer for the Adreno driver MAINTAINERS: Add a separate entry for Qualcomm Adreno GPU drivers drm/msm/a6xx: Avoid a nullptr dereference when speedbin setting fails drm/msm/adreno: fix CP cycles stat retrieval on a7xx drm/msm/a7xx: allow writing to CP_BV counter selection registers drm: zynqmp_dpsub: Always register bridge Revert "drm/bridge: ti-sn65dsi83: Fix enable error path" drm/fb_dma: Add checks in drm_fb_dma_get_scanout_buffer() drm/fbdev-generic: Do not set physical framebuffer address drm/panthor: Fix the FW reset logic drm/panthor: Make sure we handle 'unknown group state' case properly ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h33
1 files changed, 21 insertions, 12 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 2da50e095883..5ef6b745f222 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -26,6 +26,9 @@
#include <linux/list.h>
+struct ras_err_data;
+struct ras_query_context;
+
#define ACA_MAX_REGS_COUNT (16)
#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
@@ -99,7 +102,14 @@ enum aca_error_type {
ACA_ERROR_TYPE_COUNT
};
+enum aca_smu_type {
+ ACA_SMU_TYPE_UE = 0,
+ ACA_SMU_TYPE_CE,
+ ACA_SMU_TYPE_COUNT,
+};
+
struct aca_bank {
+ enum aca_smu_type type;
u64 regs[ACA_MAX_REGS_COUNT];
};
@@ -115,15 +125,10 @@ struct aca_bank_info {
int mcatype;
};
-struct aca_bank_report {
- struct aca_bank_info info;
- u64 count[ACA_ERROR_TYPE_COUNT];
-};
-
struct aca_bank_error {
struct list_head node;
struct aca_bank_info info;
- u64 count[ACA_ERROR_TYPE_COUNT];
+ u64 count;
};
struct aca_error {
@@ -157,9 +162,8 @@ struct aca_handle {
};
struct aca_bank_ops {
- int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
- struct aca_bank_report *report, void *data);
- bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
+ int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+ bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
void *data);
};
@@ -167,13 +171,15 @@ struct aca_smu_funcs {
int max_ue_bank_count;
int max_ce_bank_count;
int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
- int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count);
- int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank);
+ int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
+ int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
+ int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank);
};
struct amdgpu_aca {
struct aca_handle_manager mgr;
const struct aca_smu_funcs *smu_funcs;
+ atomic_t ue_update_flag;
bool is_enabled;
};
@@ -196,7 +202,10 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
const char *name, const struct aca_info *aca_info, void *data);
void amdgpu_aca_remove_handle(struct aca_handle *handle);
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
- enum aca_error_type type, void *data);
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx);
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count);
#endif