diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
139 files changed, 4023 insertions, 1682 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 30ed74ad29ab..714d5702dfd7 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,9 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe2 Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + depends on DRM && PCI + depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) + depends on PAGE_SIZE_4KB || COMPILE_TEST || BROKEN select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -44,6 +46,7 @@ config DRM_XE select WANT_DEV_COREDUMP select AUXILIARY_BUS select HMM_MIRROR + select REGMAP if I2C help Driver for Intel Xe2 series GPUs and later. Experimental support for Xe series is also available. @@ -85,16 +88,18 @@ config DRM_XE_GPUSVM Enable this option if you want support for CPU to GPU address mirroring. - If in doubut say "Y". + If in doubt say "Y". -config DRM_XE_DEVMEM_MIRROR - bool "Enable device memory mirror" +config DRM_XE_PAGEMAP + bool "Enable device memory pool for SVM" depends on DRM_XE_GPUSVM select GET_FREE_REGION default y help - Disable this option only if you want to compile out without device - memory mirror. Will reduce KMD memory footprint when disabled. + Disable this option only if you don't want to expose local device + memory for SVM. Will reduce KMD memory footprint when disabled. + + If in doubt say "Y". config DRM_XE_FORCE_PROBE string "Force probe xe for selected Intel hardware IDs" diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f5f5775acdc0..07c71a29963d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -21,6 +21,13 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) +generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h +quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob)) + cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob) +$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \ + $(src)/xe_device_wa_oob.rules + $(call cmd,device_wa_oob) + # Please keep these build lists sorted! # core driver code @@ -80,6 +87,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_nvm.o \ xe_oa.o \ xe_observation.o \ xe_pat.o \ @@ -124,6 +132,7 @@ xe-y += xe_bb.o \ xe_wait_user_fence.o \ xe_wopcm.o +xe-$(CONFIG_I2C) += xe_i2c.o xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o @@ -154,7 +163,8 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ xe_pci_sriov.o \ - xe_sriov_pf.o + xe_sriov_pf.o \ + xe_sriov_pf_service.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST @@ -205,7 +215,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/icl_dsi.o \ i915-display/intel_alpm.o \ i915-display/intel_atomic.o \ - i915-display/intel_atomic_plane.o \ i915-display/intel_audio.o \ i915-display/intel_backlight.o \ i915-display/intel_bios.o \ @@ -255,6 +264,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_fbc.o \ i915-display/intel_fdi.o \ i915-display/intel_fifo_underrun.o \ + i915-display/intel_flipq.o \ i915-display/intel_frontbuffer.o \ i915-display/intel_global_state.o \ i915-display/intel_gmbus.o \ @@ -271,6 +281,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_modeset_verify.o \ i915-display/intel_panel.o \ i915-display/intel_pfit.o \ + i915-display/intel_plane.o \ i915-display/intel_pmdemand.o \ i915-display/intel_pch.o \ i915-display/intel_pps.o \ @@ -338,4 +349,4 @@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) uses_generated_oob := $(addprefix $(obj)/, $(xe-y)) -$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h +$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h $(obj)/generated/xe_device_wa_oob.h diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index ff4f412c28d8..81eb046aeebf 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -142,6 +142,7 @@ enum xe_guc_action { XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D, + XE_GUC_ACTION_OPT_IN_FEATURE_KLV = 0x550E, XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, @@ -271,4 +272,7 @@ enum xe_guc_g2g_type { #define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) #define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) +/* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */ +#define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 7de8f827281f..0366a9da5977 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -16,6 +16,7 @@ * +===+=======+==============================================================+ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC Opt In Feature KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -125,6 +126,33 @@ enum { }; /** + * DOC: GuC Opt In Feature KLVs + * + * `GuC KLV`_ keys available for use with OPT_IN_FEATURE_KLV + * + * _`GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE` : 0x4001 + * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H + * containing the type of the CAT error. On HW that does not support + * reporting the CAT error type, the extra dword is set to 0xdeadbeef. + * + * _`GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH` : 0x4003 + * This KLV enables the Dynamic Inhibit Context Switch optimization, which + * consists in the GuC setting the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH bit to + * zero in the CTX_CONTEXT_CONTROL register of LRCs that are submitted + * to an oversubscribed engine. This will cause those contexts to be + * switched out immediately if they hit an unsatisfied semaphore wait + * (instead of waiting the full timeslice duration). The bit is instead set + * to one if a single context is queued on the engine, to avoid it being + * switched out if there isn't another context that can run in its place. + */ + +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001 +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u + +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u + +/** * DOC: GuC VGT Policy KLVs * * `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY. diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h index a473aa6697d0..4fcd3bf6b76f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h @@ -6,37 +6,6 @@ #ifndef __INTEL_PCODE_H__ #define __INTEL_PCODE_H__ -#include "intel_uncore.h" #include "xe_pcode.h" -static inline int -snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val, - int fast_timeout_us, int slow_timeout_ms) -{ - return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val, - slow_timeout_ms ?: 1); -} - -static inline int -snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val) -{ - - return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val); -} - -static inline int -snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1) -{ - return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1); -} - -static inline int -skl_pcode_request(struct intel_uncore *uncore, u32 mbox, - u32 request, u32 reply_mask, u32 reply, - int timeout_base_ms) -{ - return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply, - timeout_base_ms); -} - #endif /* __INTEL_PCODE_H__ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index 797091cf1c99..d012f02bc84f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -24,13 +24,6 @@ static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncor return xe_root_tile_mmio(xe); } -static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore) -{ - struct xe_device *xe = container_of(uncore, struct xe_device, uncore); - - return xe_device_get_root_tile(xe); -} - static inline u32 intel_uncore_read(struct intel_uncore *uncore, i915_reg_t i915_reg) { diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 27437c22bd70..910632f57c3d 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -1,7 +1,12 @@ // SPDX-License-Identifier: MIT /* Copyright © 2024 Intel Corporation */ +#include <drm/drm_cache.h> #include <drm/drm_gem.h> +#include <drm/drm_panic.h> + +#include "intel_fb.h" +#include "intel_display_types.h" #include "xe_bo.h" #include "intel_bo.h" @@ -59,3 +64,89 @@ void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj) { /* FIXME */ } + +struct xe_panic_data { + struct page **pages; + int page; + void *vaddr; +}; + +struct xe_framebuffer { + struct intel_framebuffer base; + struct xe_panic_data panic; +}; + +static inline struct xe_panic_data *to_xe_panic_data(struct intel_framebuffer *fb) +{ + return &container_of_const(fb, struct xe_framebuffer, base)->panic; +} + +static void xe_panic_kunmap(struct xe_panic_data *panic) +{ + if (panic->vaddr) { + drm_clflush_virt_range(panic->vaddr, PAGE_SIZE); + kunmap_local(panic->vaddr); + panic->vaddr = NULL; + } +} + +/* + * The scanout buffer pages are not mapped, so for each pixel, + * use kmap_local_page_try_from_panic() to map the page, and write the pixel. + * Try to keep the map from the previous pixel, to avoid too much map/unmap. + */ +static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct xe_panic_data *panic = to_xe_panic_data(fb); + struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base)); + unsigned int new_page; + unsigned int offset; + + if (fb->panic_tiling) + offset = fb->panic_tiling(sb->width, x, y); + else + offset = y * sb->pitch[0] + x * sb->format->cpp[0]; + + new_page = offset >> PAGE_SHIFT; + offset = offset % PAGE_SIZE; + if (new_page != panic->page) { + xe_panic_kunmap(panic); + panic->page = new_page; + panic->vaddr = ttm_bo_kmap_try_from_panic(&bo->ttm, + panic->page); + } + if (panic->vaddr) { + u32 *pix = panic->vaddr + offset; + *pix = color; + } +} + +struct intel_framebuffer *intel_bo_alloc_framebuffer(void) +{ + struct xe_framebuffer *xe_fb; + + xe_fb = kzalloc(sizeof(*xe_fb), GFP_KERNEL); + if (xe_fb) + return &xe_fb->base; + return NULL; +} + +int intel_bo_panic_setup(struct drm_scanout_buffer *sb) +{ + struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private; + struct xe_panic_data *panic = to_xe_panic_data(fb); + + panic->page = -1; + sb->set_pixel = xe_panic_page_set_pixel; + return 0; +} + +void intel_bo_panic_finish(struct intel_framebuffer *fb) +{ + struct xe_panic_data *panic = to_xe_panic_data(fb); + + xe_panic_kunmap(panic); + panic->page = -1; +} diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index b28a94df824f..fba9617a75a5 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -66,7 +66,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, goto err; } - fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd); + fb = intel_framebuffer_create(&obj->ttm.base, + drm_get_format_info(dev, + mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd); if (IS_ERR(fb)) { xe_bo_unpin_map_no_vm(obj); goto err; diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 6b362695d6b6..c38fba18effe 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -163,6 +163,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -224,7 +227,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, goto out_unlock; } - ret = xe_ggtt_node_insert_locked(vma->node, bo->size, align, 0); + ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0); if (ret) { xe_ggtt_node_fini(vma->node); goto out_unlock; @@ -326,8 +329,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: @@ -457,3 +458,8 @@ u64 intel_dpt_offset(struct i915_vma *dpt_vma) { return 0; } + +void intel_fb_get_map(struct i915_vma *vma, struct iosys_map *map) +{ + *map = vma->bo->vmap; +} diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index b35a6f201d4a..30f1073141fc 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -85,7 +85,7 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, cmd_in = xe_bo_ggtt_addr(bo); cmd_out = cmd_in + PAGE_SIZE; - xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); + xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo)); gsc_context->hdcp_bo = bo; gsc_context->hdcp_cmd_in = cmd_in; diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index af47ce34102c..dcbc4b2d3fd9 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -10,7 +10,6 @@ #include "xe_ggtt.h" #include "xe_mmio.h" -#include "intel_atomic_plane.h" #include "intel_crtc.h" #include "intel_display.h" #include "intel_display_core.h" @@ -19,6 +18,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "intel_frontbuffer.h" +#include "intel_plane.h" #include "intel_plane_initial.h" #include "xe_bo.h" #include "xe_wa.h" @@ -184,7 +184,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; if (intel_framebuffer_init(to_intel_framebuffer(fb), - &bo->ttm.base, &mode_cmd)) { + &bo->ttm.base, fb->format, &mode_cmd)) { drm_dbg_kms(&xe->drm, "intel fb init failed\n"); goto err_bo; } diff --git a/drivers/gpu/drm/xe/regs/xe_bars.h b/drivers/gpu/drm/xe/regs/xe_bars.h index ce05b6ae832f..880140d6ccdc 100644 --- a/drivers/gpu/drm/xe/regs/xe_bars.h +++ b/drivers/gpu/drm/xe/regs/xe_bars.h @@ -7,5 +7,6 @@ #define GTTMMADR_BAR 0 /* MMIO + GTT */ #define LMEM_BAR 2 /* VRAM */ +#define VF_LMEM_BAR 9 /* VF VRAM */ #endif diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 7702364b65f1..9b66cc972a63 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -16,6 +16,10 @@ #define MTL_GSC_HECI1_BASE 0x00116000 #define MTL_GSC_HECI2_BASE 0x00117000 +#define DG1_GSC_HECI2_BASE 0x00259000 +#define PVC_GSC_HECI2_BASE 0x00285000 +#define DG2_GSC_HECI2_BASE 0x00374000 + #define HECI_H_CSR(base) XE_REG((base) + 0x4) #define HECI_H_CSR_IE REG_BIT(0) #define HECI_H_CSR_IS REG_BIT(1) diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h new file mode 100644 index 000000000000..af781c8e4a80 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_REGS_H_ +#define _XE_I2C_REGS_H_ + +#include <linux/pci_regs.h> + +#include "xe_reg_defs.h" +#include "xe_regs.h" + +#define I2C_BRIDGE_OFFSET (SOC_BASE + 0xd9000) +#define I2C_CONFIG_SPACE_OFFSET (SOC_BASE + 0xf6000) +#define I2C_MEM_SPACE_OFFSET (SOC_BASE + 0xf7400) + +#define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) +#define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) + +#define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND) +#define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84) + +#endif /* _XE_I2C_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index f0ecfcac4003..13635e4331d4 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -19,6 +19,7 @@ #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) #define DISPLAY_IRQ REG_BIT(16) +#define I2C_IRQ REG_BIT(12) #define GT_DW_IRQ(x) REG_BIT(x) /* diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 994af591a2e8..1b101edb838b 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -12,9 +12,13 @@ #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_PER_CTX_PTR (0x12 + 1) +#define CTX_CS_INDIRECT_CTX (0x14 + 1) +#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) +#define CTX_ACC_CTR_THOLD (0x2a + 1) +#define CTX_ASID (0x2e + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) @@ -36,4 +40,7 @@ #define INDIRECT_CTX_RING_START_UDW (0x08 + 1) #define INDIRECT_CTX_RING_CTL (0x0a + 1) +#define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6) +#define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index b0efd9b48d1e..2995d72c3f78 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -5,7 +5,7 @@ #ifndef _XE_PMT_H_ #define _XE_PMT_H_ -#define SOC_BASE 0x280000 +#include "xe_regs.h" #define BMG_PMT_BASE_OFFSET 0xDB000 #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3abb17d2ca33..1926b4044314 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,6 +7,8 @@ #include "regs/xe_reg_defs.h" +#define SOC_BASE 0x280000 + #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 77ca1ab527ec..bb469096d072 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -106,7 +106,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Check last CCS value, or at least last value in page. */ - offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); + offset = xe_device_ccs_bytes(tile_to_xe(tile), xe_bo_size(bo)); offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; if (cpu_map[offset] != get_val) { KUNIT_FAIL(test, diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c deleted file mode 100644 index b683585db852..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 AND MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include <kunit/test.h> - -#include "xe_device.h" -#include "xe_kunit_helpers.h" -#include "xe_pci_test.h" - -static int pf_service_test_init(struct kunit *test) -{ - struct xe_pci_fake_data fake = { - .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* some random platform */ - .subplatform = XE_SUBPLATFORM_NONE, - }; - struct xe_device *xe; - struct xe_gt *gt; - - test->priv = &fake; - xe_kunit_helper_xe_device_test_init(test); - - xe = test->priv; - KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); - - gt = xe_device_get_gt(xe, 0); - pf_init_versions(gt); - - /* - * sanity check: - * - all supported platforms VF/PF ABI versions must be defined - * - base version can't be newer than latest - */ - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.latest.major); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, - gt->sriov.pf.service.version.latest.minor); - - test->priv = gt; - return 0; -} - -static void pf_negotiate_any(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, - VF2PF_HANDSHAKE_MINOR_ANY, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_base_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); -} - -static void pf_negotiate_base_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); - if (major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.base.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor - 1, - &major, &minor)); -} - -static void pf_negotiate_base_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major - 1, 1, - &major, &minor)); -} - -static void pf_negotiate_latest_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.latest.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor - 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); -} - -static void pf_negotiate_latest_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - kunit_skip(test, "no prev major"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major - 1, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); -} - -static struct kunit_case pf_service_test_cases[] = { - KUNIT_CASE(pf_negotiate_any), - KUNIT_CASE(pf_negotiate_base_match), - KUNIT_CASE(pf_negotiate_base_newer), - KUNIT_CASE(pf_negotiate_base_next), - KUNIT_CASE(pf_negotiate_base_older), - KUNIT_CASE(pf_negotiate_base_prev), - KUNIT_CASE(pf_negotiate_latest_match), - KUNIT_CASE(pf_negotiate_latest_newer), - KUNIT_CASE(pf_negotiate_latest_next), - KUNIT_CASE(pf_negotiate_latest_older), - KUNIT_CASE(pf_negotiate_latest_prev), - {} -}; - -static struct kunit_suite pf_service_suite = { - .name = "pf_service", - .test_cases = pf_service_test_cases, - .init = pf_service_test_init, -}; - -kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c index 537766cdd882..d266882adc0e 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -32,7 +32,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * bo->tile = tile; bo->ttm.bdev = &xe->ttm; - bo->size = size; + bo->ttm.base.size = size; iosys_map_set_vaddr(&bo->vmap, buf); if (flags & XE_BO_FLAG_GGTT) { @@ -43,7 +43,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * KUNIT_ASSERT_EQ(test, 0, xe_ggtt_node_insert(bo->ggtt_node[tile->id], - bo->size, SZ_4K)); + xe_bo_size(bo), SZ_4K)); } return bo; diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4a65e3103f77..edd1e701aa1c 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -74,13 +74,13 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, { struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; - bool big = bo->size >= SZ_2M; + bool big = xe_bo_size(bo) >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; int err; struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, - bo->size, + xe_bo_size(bo), ttm_bo_type_kernel, region | XE_BO_FLAG_NEEDS_CPU_ACCESS | @@ -105,7 +105,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, goto out_unlock; } - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); fence = xe_migrate_clear(m, remote, remote->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : @@ -113,15 +113,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "remote first offset should be cleared", test); - retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(remote) - 8, u64); check(retval, expected, "remote last offset should be cleared", test); } dma_fence_put(fence); /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ - xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xc0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, xe_bo_size(bo)); expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, @@ -131,15 +131,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &bo->vmap, 0, u64); check(retval, expected, "remote -> vram bo first offset should be copied", test); - retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &bo->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "remote -> vram bo offset should be copied", test); } dma_fence_put(fence); /* And other way around.. slightly hacky.. */ - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, xe_bo_size(bo)); fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, remote->ttm.resource, false); @@ -148,7 +148,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "vram -> remote bo first offset should be copied", test); - retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "vram -> remote bo last offset should be copied", test); } @@ -245,9 +245,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) - xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + xe_res_first(pt->ttm.resource, 0, xe_bo_size(pt), &src_it); else - xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); + xe_res_first_sg(xe_bo_sg(pt), 0, xe_bo_size(pt), &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, &src_it, XE_PAGE_SIZE, pt->ttm.resource); @@ -276,7 +276,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); - xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, xe_bo_size(tiny)); expected = 0; fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -286,7 +286,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &tiny->vmap, 0, u32); check(retval, expected, "Command clear small first value", test); - retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + retval = xe_map_rd(xe, &tiny->vmap, xe_bo_size(tiny) - 4, u32); check(retval, expected, "Command clear small last value", test); kunit_info(test, "Copying small buffer object to system\n"); @@ -298,7 +298,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); - xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + xe_map_memset(xe, &big->vmap, 0, 0x11, xe_bo_size(big)); expected = 0; fence = xe_migrate_clear(m, big, big->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -308,7 +308,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &big->vmap, 0, u32); check(retval, expected, "Command clear big first value", test); - retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + retval = xe_map_rd(xe, &big->vmap, xe_bo_size(big) - 4, u32); check(retval, expected, "Command clear big last value", test); kunit_info(test, "Copying big buffer object to system\n"); @@ -370,7 +370,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile, struct xe_migrate *m = tile->migrate; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it; struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource; u64 src_L0_ofs, dst_L0_ofs; @@ -498,7 +498,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, long ret; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -523,7 +523,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Clear evicted vram data first value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Clear evicted vram data last value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -532,7 +532,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -562,7 +562,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Restored value must be equal to initial value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Restored value must be equal to initial value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -570,7 +570,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -583,7 +583,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, u64 expected, retval; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -597,7 +597,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); } dma_fence_put(fence); @@ -615,7 +615,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear main buffer first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear main buffer last value", test); } dma_fence_put(fence); @@ -625,7 +625,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear ccs data last value", test); } dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index baccb657bd05..9c715e59f030 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -21,6 +21,18 @@ static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); +static void xe_pci_id_kunit_desc(const struct pci_device_id *param, char *desc) +{ + const struct xe_device_desc *dev_desc = + (const struct xe_device_desc *)param->driver_data; + + if (dev_desc) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "0x%X (%s)", + param->device, dev_desc->platform_name); +} + +KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); + /** * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters * @prev: the pointer to the previous parameter to iterate from or NULL @@ -55,6 +67,25 @@ const void *xe_pci_media_ip_gen_param(const void *prev, char *desc) } EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); +/** + * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct pci_device_id parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_id_gen_param(const void *prev, char *desc) +{ + const struct pci_device_id *pci = pci_id_gen_params(prev, desc); + + return pci->driver_data ? pci : NULL; +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); + static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) { diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 95fed41f7ff2..37b344df2dc3 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -44,9 +44,21 @@ static void check_media_ip(struct kunit *test) KUNIT_ASSERT_EQ(test, mask, 0); } +static void check_platform_gt_count(struct kunit *test) +{ + const struct pci_device_id *pci = test->param_value; + const struct xe_device_desc *desc = + (const struct xe_device_desc *)pci->driver_data; + int max_gt = desc->max_gt_per_tile; + + KUNIT_ASSERT_GT(test, max_gt, 0); + KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE); +} + static struct kunit_case xe_pci_tests[] = { KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), + KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 3a1df7a5e291..ce4d2b86b778 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -27,6 +27,7 @@ int xe_pci_fake_device_init(struct xe_device *xe); const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); const void *xe_pci_media_ip_gen_param(const void *prev, char *desc); +const void *xe_pci_id_gen_param(const void *prev, char *desc); const void *xe_pci_live_device_gen_param(const void *prev, char *desc); #endif diff --git a/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c new file mode 100644 index 000000000000..ba95e29b597d --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + xe_sriov_pf_service_init(xe); + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.latest.major); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.minor, + xe->sriov.pf.service.version.latest.minor); + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, xe->sriov.pf.service.version.base.minor); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, xe->sriov.pf.service.version.latest.major); + if (major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major - 1, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 9570672fce33..5ce0e26822f2 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -19,7 +19,7 @@ static int bb_prefetch(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt)) /* * RCS and CCS require 1K, although other engines would be * okay with 512. diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index fafacd73dcc3..b5cc65506696 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -14,7 +14,7 @@ struct xe_gt; struct xe_exec_queue; struct xe_sched_job; -struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4e39188a021a..1be2415966df 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -19,6 +19,8 @@ #include <kunit/static_stub.h> +#include <trace/events/gpu_mem.h> + #include "xe_device.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" @@ -418,6 +420,19 @@ static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt) xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0); } +static void update_global_total_pages(struct ttm_device *ttm_dev, + long num_pages) +{ +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + u64 global_total_pages = + atomic64_add_return(num_pages, &xe->global_total_pages); + + trace_gpu_mem_total(xe->drm.primary->index, 0, + global_total_pages << PAGE_SHIFT); +#endif +} + static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, u32 page_flags) { @@ -437,7 +452,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = 0; if (xe_bo_needs_ccs_pages(bo)) - extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)), PAGE_SIZE); /* @@ -525,6 +540,7 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, xe_tt->purgeable = false; xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt); + update_global_total_pages(ttm_dev, tt->num_pages); return 0; } @@ -541,6 +557,7 @@ static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) ttm_pool_free(&ttm_dev->pool, tt); xe_ttm_tt_account_subtract(xe, tt); + update_global_total_pages(ttm_dev, -(long)tt->num_pages); } static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) @@ -795,7 +812,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } if (ttm_bo->type == ttm_bo_type_sg) { - ret = xe_bo_move_notify(bo, ctx); + if (new_mem->mem_type == XE_PL_SYSTEM) + ret = xe_bo_move_notify(bo, ctx); if (!ret) ret = xe_bo_move_dmabuf(ttm_bo, new_mem); return ret; @@ -1122,7 +1140,7 @@ int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) goto out_unlock_bo; - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED); @@ -1200,7 +1218,8 @@ int xe_bo_evict_pinned(struct xe_bo *bo) goto out_unlock_bo; if (!backup) { - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, + NULL, xe_bo_size(bo), DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED); @@ -1254,7 +1273,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo) } xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, - bo->size); + xe_bo_size(bo)); } if (!bo->backup_obj) @@ -1347,7 +1366,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) } xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, - bo->size); + xe_bo_size(bo)); } bo->backup_obj = NULL; @@ -1558,7 +1577,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, offset & PAGE_MASK, - bo->size - (offset & PAGE_MASK), &cursor); + xe_bo_size(bo) - (offset & PAGE_MASK), &cursor); do { unsigned long page_offset = (offset & ~PAGE_MASK); @@ -1858,7 +1877,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->ccs_cleared = false; bo->tile = tile; - bo->size = size; bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; @@ -2036,7 +2054,7 @@ __xe_bo_create_locked(struct xe_device *xe, if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, - start + bo->size, U64_MAX); + start + xe_bo_size(bo), U64_MAX); } else { err = xe_ggtt_insert_bo(t->mem.ggtt, bo); } @@ -2157,21 +2175,6 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); } -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags) -{ - struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - type, flags); - if (IS_ERR(bo)) - return bo; - - xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); - - return bo; -} - static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); @@ -2234,7 +2237,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str xe_assert(xe, !(*src)->vmap.is_iomem); bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, - (*src)->size, dst_flags); + xe_bo_size(*src), dst_flags); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -2436,7 +2439,6 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) .no_wait_gpu = false, .gfp_retry_mayfail = true, }; - struct pin_cookie cookie; int ret; if (vm) { @@ -2447,10 +2449,10 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } - cookie = xe_vm_set_validating(vm, allow_res_evict); + xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); - xe_vm_clear_validating(vm, allow_res_evict, cookie); + xe_vm_clear_validating(vm, allow_res_evict); return ret; } @@ -2524,7 +2526,7 @@ int xe_bo_vmap(struct xe_bo *bo) * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap * to use struct iosys_map. */ - ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 02ada1fb8a23..02e8cde4c6b2 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -118,9 +118,6 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, size_t size, u64 offset, enum ttm_bo_type type, u32 flags, u64 alignment); -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, size_t size, u32 flags); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, @@ -238,6 +235,19 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) return xe_bo_addr(bo, 0, page_size); } +/** + * xe_bo_size() - Xe BO size + * @bo: The bo object. + * + * Simple helper to return Xe BO's size. + * + * Return: Xe BO's size + */ +static inline size_t xe_bo_size(struct xe_bo *bo) +{ + return bo->ttm.base.size; +} + static inline u32 __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { @@ -246,7 +256,7 @@ __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.size > xe_bo_size(bo)); XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); return ggtt_node->base.start; } @@ -300,7 +310,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo); static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) { - return PAGE_ALIGN(bo->ttm.base.size); + return PAGE_ALIGN(xe_bo_size(bo)); } static inline bool xe_bo_has_pages(struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index eb5e83c5f233..ff560d82496f 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -32,8 +32,6 @@ struct xe_bo { struct xe_bo *backup_obj; /** @parent_obj: Ref to parent bo if this a backup_obj */ struct xe_bo *parent_obj; - /** @size: Size of this buffer object */ - size_t size; /** @flags: flags for this buffer object */ u32 flags; /** @vm: VM this BO is attached to, for extobj this will be NULL */ @@ -86,7 +84,7 @@ struct xe_bo { u16 cpu_caching; /** @devmem_allocation: SVM device memory allocation */ - struct drm_gpusvm_devmem devmem_allocation; + struct drm_pagemap_devmem devmem_allocation; /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ struct list_head vram_userfault_link; diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 8ec1ff1e4e80..e9b46a2d0019 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -267,7 +267,8 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); if (!pdev) - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENODEV); + pci_dev_put(pdev); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index d83cd6ed3fa8..26e9d146ccbf 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -20,7 +20,9 @@ #include "xe_pm.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" #include "xe_step.h" +#include "xe_wa.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" @@ -82,9 +84,28 @@ static int sriov_info(struct seq_file *m, void *data) return 0; } +static int workarounds(struct xe_device *xe, struct drm_printer *p) +{ + xe_pm_runtime_get(xe); + xe_wa_device_dump(xe, p); + xe_pm_runtime_put(xe); + + return 0; +} + +static int workaround_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + workarounds(xe, &p); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, { .name = "sriov_info", .show = sriov_info, }, + { .name = "workarounds", .show = workaround_info, }, }; static int forcewake_open(struct inode *inode, struct file *file) @@ -273,4 +294,7 @@ void xe_debugfs_register(struct xe_device *xe) xe_pxp_debugfs_register(xe->pxp); fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); + + if (IS_SRIOV_PF(xe)) + xe_sriov_pf_debugfs_register(xe, root); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7a8af2311318..203e3038cc81 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; u32 chunk_offset; ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) xe_pm_runtime_get(gt_to_xe(ss->gt)); mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; - } + if (offset >= ss->read.size) + goto unlock; new_chunk_position = div_u64_rem(offset, XE_DEVCOREDUMP_CHUNK_MAX, @@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ss->read.size - offset; memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + if (pm_needed) xe_pm_runtime_put(gt_to_xe(ss->gt)); - return byte_copied; + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) @@ -313,13 +331,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = exec_queue_to_guc(q); - u32 adj_logical_mask = q->logical_mask; - u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - unsigned int fw_ref; bool cookie; - int i; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -335,14 +349,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { - if (adj_logical_mask & BIT(i)) { - adj_logical_mask |= width_mask << i; - i += q->width; - } else { - ++i; - } - } /* keep going if fw fails as we still want to save the memory and SW data */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8cfcfff250ca..6ece4defa9df 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -40,12 +40,14 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" +#include "xe_i2c.h" #include "xe_irq.h" -#include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_nvm.h" #include "xe_oa.h" #include "xe_observation.h" #include "xe_pat.h" @@ -66,6 +68,7 @@ #include "xe_wait_user_fence.h" #include "xe_wa.h" +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> static int xe_file_open(struct drm_device *dev, struct drm_file *file) @@ -678,6 +681,7 @@ static void sriov_update_device_info(struct xe_device *xe) /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1; @@ -698,6 +702,9 @@ int xe_device_probe_early(struct xe_device *xe) { int err; + xe_wa_device_init(xe); + xe_wa_process_device_oob(xe); + err = xe_mmio_probe_early(xe); if (err) return err; @@ -783,52 +790,18 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - err = xe_ttm_sys_mgr_init(xe); - if (err) - return err; - for_each_gt(gt, xe, id) { err = xe_gt_init_early(gt); if (err) return err; - - /* - * Only after this point can GT-specific MMIO operations - * (including things like communication with the GuC) - * be performed. - */ - xe_gt_mmio_init(gt); - - if (IS_SRIOV_VF(xe)) { - xe_guc_comm_init_early(>->uc.guc); - err = xe_gt_sriov_vf_bootstrap(gt); - if (err) - return err; - err = xe_gt_sriov_vf_query_config(gt); - if (err) - return err; - } } for_each_tile(tile, xe, id) { err = xe_ggtt_init_early(tile->mem.ggtt); if (err) return err; - err = xe_memirq_init(&tile->memirq); - if (err) - return err; } - for_each_gt(gt, xe, id) { - err = xe_gt_init_hwconfig(gt); - if (err) - return err; - } - - err = xe_devcoredump_init(xe); - if (err) - return err; - /* * From here on, if a step fails, make sure a Driver-FLR is triggereed */ @@ -850,6 +823,14 @@ int xe_device_probe(struct xe_device *xe) return err; } + /* + * Allow allocations only now to ensure xe_display_init_early() + * is the first to allocate, always. + */ + err = xe_ttm_sys_mgr_init(xe); + if (err) + return err; + /* Allocate and map stolen after potential VRAM resize */ err = xe_ttm_stolen_mgr_init(xe); if (err) @@ -881,6 +862,16 @@ int xe_device_probe(struct xe_device *xe) return err; } + if (xe->tiles->media_gt && + XE_WA(xe->tiles->media_gt, 15015404425_disable)) + XE_DEVICE_WA_DISABLE(xe, 15015404425); + + err = xe_devcoredump_init(xe); + if (err) + return err; + + xe_nvm_init(xe); + err = xe_heci_gsc_init(xe); if (err) return err; @@ -921,6 +912,10 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; + err = xe_i2c_probe(xe); + if (err) + goto err_unregister_display; + for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); @@ -938,6 +933,8 @@ void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); + xe_nvm_fini(xe); + drm_dev_unplug(&xe->drm); xe_bo_pci_dev_remove_all(xe); @@ -981,38 +978,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1022,6 +996,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1052,15 +1027,52 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_WA(root_gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); + tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index e4da797a984b..bc802e066a7d 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -60,35 +60,32 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } +/* + * Highest GT/tile count for any platform. Used only for memory allocation + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT + * structures should use the per-platform xe->info.max_gt_per_tile instead. + */ #define XE_MAX_GT_PER_TILE 2 -static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) -{ - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) - gt_id = 0; - - return gt_id ? tile->media_gt : tile->primary_gt; -} - static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct xe_tile *tile; struct xe_gt *gt; - /* - * FIXME: This only works for now because multi-tile and standalone - * media are mutually exclusive on the platforms we have today. - * - * id => GT mapping may change once we settle on how we want to handle - * our UAPI. - */ - if (MEDIA_VER(xe) >= 13) { - gt = xe_tile_get_gt(root_tile, gt_id); - } else { - if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) - gt_id = 0; + if (gt_id >= xe->info.tile_count * xe->info.max_gt_per_tile) + return NULL; - gt = xe->tiles[gt_id].primary_gt; + tile = &xe->tiles[gt_id / xe->info.max_gt_per_tile]; + switch (gt_id % xe->info.max_gt_per_tile) { + default: + xe_assert(xe, false); + fallthrough; + case 0: + gt = tile->primary_gt; + break; + case 1: + gt = tile->media_gt; + break; } if (!gt) @@ -130,14 +127,14 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ for_each_if((tile__) = &(xe__)->tiles[(id__)]) -/* - * FIXME: This only works for now since multi-tile and standalone media - * happen to be mutually exclusive. Future platforms may change this... - */ #define for_each_gt(gt__, xe__, id__) \ - for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) +#define for_each_gt_on_tile(gt__, tile__, id__) \ + for_each_gt((gt__), (tile__)->xe, (id__)) \ + for_each_if((gt__)->tile == (tile__)) + static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { return >->pm.fw; diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index b9440f8c781e..bd9015761aa0 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -24,6 +24,12 @@ * * vram_d3cold_threshold - Report/change vram used threshold(in MB) below * which vram save/restore is permissible during runtime D3cold entry/exit. + * + * lb_fan_control_version - Fan control version provisioned by late binding. + * Exposed only if supported by the device. + * + * lb_voltage_regulator_version - Voltage regulator version provisioned by late + * binding. Exposed only if supported by the device. */ static ssize_t @@ -65,6 +71,140 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(vram_d3cold_threshold); +static ssize_t +lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap, ver_low = FAN_TABLE, ver_high = FAN_TABLE; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version); + +static ssize_t +lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap, ver_low = VR_CONFIG, ver_high = VR_CONFIG; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); + +static int late_bind_create_files(struct device *dev) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) { + if (ret == -ENXIO) { + drm_dbg(&xe->drm, "Late binding not supported by firmware\n"); + ret = 0; + } + goto out; + } + + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + if (ret) + goto out; + } + + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); +out: + xe_pm_runtime_put(xe); + + return ret; +} + +static void late_bind_remove_files(struct device *dev) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) + sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); +out: + xe_pm_runtime_put(xe); +} + /** * DOC: PCIe Gen5 Limitations * @@ -151,8 +291,10 @@ static void xe_device_sysfs_fini(void *arg) if (xe->d3cold.capable) sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - if (xe->info.platform == XE_BATTLEMAGE) + if (xe->info.platform == XE_BATTLEMAGE) { sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); + late_bind_remove_files(xe->drm.dev); + } } int xe_device_sysfs_init(struct xe_device *xe) @@ -170,6 +312,10 @@ int xe_device_sysfs_init(struct xe_device *xe) ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); if (ret) return ret; + + ret = late_bind_create_files(dev); + if (ret) + return ret; } return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 003afb279a5e..d4d2c6854790 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -21,7 +21,9 @@ #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" +#include "xe_sriov_pf_types.h" #include "xe_sriov_types.h" +#include "xe_sriov_vf_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" #include "xe_ttm_vram_mgr_types.h" @@ -32,7 +34,9 @@ struct dram_info; struct intel_display; +struct intel_dg_nvm_dev; struct xe_ggtt; +struct xe_i2c; struct xe_pat_ops; struct xe_pxp; @@ -105,7 +109,7 @@ struct xe_vram_region { void __iomem *mapping; /** @ttm: VRAM TTM manager */ struct xe_ttm_vram_mgr ttm; -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) /** @pagemap: Used to remap device memory as ZONE_DEVICE */ struct dev_pagemap pagemap; /** @@ -293,6 +297,8 @@ struct xe_device { u8 vram_flags; /** @info.tile_count: Number of tiles */ u8 tile_count; + /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ + u8 max_gt_per_tile; /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; /** @info.vm_max_level: Max VM level */ @@ -316,6 +322,8 @@ struct xe_device { u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; + /** @info.has_gsc_nvm: Device has gsc non-volatile memory */ + u8 has_gsc_nvm:1; /** @info.has_heci_cscfi: device has heci cscfi */ u8 has_heci_cscfi:1; /** @info.has_heci_gscfi: device has heci gscfi */ @@ -357,6 +365,19 @@ struct xe_device { u8 skip_pcode:1; } info; + /** @wa_active: keep track of active workarounds */ + struct { + /** @wa_active.oob: bitmap with active OOB workarounds */ + unsigned long *oob; + + /** + * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse + * of XE_DEVICE_WA() - it can only be called on initialization after + * Device OOB WAs have been processed. + */ + bool oob_initialized; + } wa_active; + /** @survivability: survivability information for device */ struct xe_survivability survivability; @@ -403,10 +424,12 @@ struct xe_device { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; - /** @sriov.pf: PF specific data */ - struct xe_device_pf pf; - /** @sriov.vf: VF specific data */ - struct xe_device_vf vf; + union { + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; + }; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; @@ -549,6 +572,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @nvm: discrete graphics non-volatile memory */ + struct intel_dg_nvm_dev *nvm; + /** @oa: oa observation subsystem */ struct xe_oa oa; @@ -577,6 +603,9 @@ struct xe_device { /** @pmu: performance monitoring unit */ struct xe_pmu pmu; + /** @i2c: I2C host controller */ + struct xe_i2c *i2c; + /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ u32 atomic_svm_timeslice_ms; @@ -588,6 +617,14 @@ struct xe_device { u8 vm_inject_error_position; #endif +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + /** + * @global_total_pages: global GPU page usage tracked for gpu_mem + * tracepoints + */ + atomic64_t global_total_pages; +#endif + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules new file mode 100644 index 000000000000..3a0c4ccc4224 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -0,0 +1,2 @@ +15015404425 PLATFORM(LUNARLAKE) + PLATFORM(PANTHERLAKE) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 31f688e953d7..f931ff9b1ec0 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -167,7 +167,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { - u64 sz = bo->size; + u64 sz = xe_bo_size(bo); u32 mem_type = bo->ttm.resource->mem_type; xe_bo_assert_held(bo); diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 96732613b4b7..af7916315ac6 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -258,11 +258,13 @@ static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, struct eu_stall_open_properties *props) { - if (value >= xe->info.gt_count) { + struct xe_gt *gt = xe_device_get_gt(xe, value); + + if (!gt) { drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); return -EINVAL; } - props->gt = xe_device_get_gt(xe, value); + props->gt = gt; return 0; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index fee22358cc09..8991b4aed440 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -610,7 +610,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) return -EINVAL; if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 8a5cba22b586..c59a9b330697 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -64,7 +64,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) { int i, j; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, FORCEWAKE_ACK_RENDER); diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index ed9183599e31..247e41c1c48d 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -18,8 +18,8 @@ " *\n" \ " * This file was generated from rules: %s\n" \ " */\n" \ - "#ifndef _GENERATED_XE_WA_OOB_\n" \ - "#define _GENERATED_XE_WA_OOB_\n" \ + "#ifndef _GENERATED_%s_\n" \ + "#define _GENERATED_%s_\n" \ "\n" \ "enum {\n" @@ -52,7 +52,7 @@ static char *strip(char *line, size_t linelen) } #define MAX_LINE_LEN 4096 -static int parse(FILE *input, FILE *csource, FILE *cheader) +static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) { char line[MAX_LINE_LEN + 1]; char *name, *prev_name = NULL, *rules; @@ -96,7 +96,7 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) } if (name) { - fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(cheader, "\t%s_%s = %u,\n", prefix, name, idx); /* Close previous entry before starting a new one */ if (idx) @@ -118,7 +118,41 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) if (idx) fprintf(csource, ") },\n"); - fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + fprintf(cheader, "\t_%s_COUNT = %u\n", prefix, idx); + + return 0; +} + +/* Avoid GNU vs POSIX basename() discrepancy, just use our own */ +static const char *xbasename(const char *s) +{ + const char *p = strrchr(s, '/'); + + return p ? p + 1 : s; +} + +static int fn_to_prefix(const char *fn, char *prefix, size_t size) +{ + size_t len; + + fn = xbasename(fn); + len = strlen(fn); + + if (len > size - 1) + return -ENAMETOOLONG; + + memcpy(prefix, fn, len + 1); + + for (char *p = prefix; *p; p++) { + switch (*p) { + case '.': + *p = '\0'; + return 0; + default: + *p = toupper(*p); + break; + } + } return 0; } @@ -141,6 +175,7 @@ int main(int argc, const char *argv[]) [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, }; int ret = 1; + char prefix[128]; if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); @@ -148,6 +183,9 @@ int main(int argc, const char *argv[]) return 1; } + if (fn_to_prefix(args[ARGS_CHEADER].fn, prefix, sizeof(prefix)) < 0) + return 1; + for (int i = 0; i < _ARGS_COUNT; i++) { args[i].f = fopen(args[i].fn, args[i].mode); if (!args[i].f) { @@ -157,9 +195,10 @@ int main(int argc, const char *argv[]) } } - fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, - args[ARGS_CHEADER].f); + args[ARGS_CHEADER].f, prefix); if (!ret) fprintf(args[ARGS_CHEADER].f, FOOTER); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7b11fa1356f0..29d4d3f51da1 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -238,6 +238,13 @@ int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) } EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -290,6 +297,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); if (err) @@ -410,7 +421,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) goto err; } - xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); xe_ggtt_initial_clear(ggtt); @@ -682,13 +693,13 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, return; start = node->base.start; - end = start + bo->size; + end = start + xe_bo_size(bo); pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { xe_assert(xe_bo_device(bo), bo->ttm.ttm); - for (xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &cur); + for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, pte | xe_res_dma(&cur)); @@ -696,7 +707,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, /* Prepend GPU offset */ pte |= vram_region_gpu_offset(bo->ttm.resource); - for (xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, pte + cur.start); @@ -732,7 +743,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); return 0; } @@ -751,7 +762,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, - bo->size, alignment, 0, start, end, 0); + xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { xe_ggtt_node_fini(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; @@ -812,7 +823,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 0bcf97063ff6..1d84bf2f2cef 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -59,7 +59,8 @@ static int memcpy_fw(struct xe_gsc *gsc) xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); - xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, + xe_bo_size(gsc->private) - fw_size); kfree(storage); @@ -82,7 +83,8 @@ static int emit_gsc_upload(struct xe_gsc *gsc) bb->cs[bb->len++] = GSC_FW_LOAD; bb->cs[bb->len++] = lower_32_bits(offset); bb->cs[bb->len++] = upper_32_bits(offset); - bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; + bb->cs[bb->len++] = (xe_bo_size(gsc->private) / SZ_4K) | + GSC_FW_LOAD_LIMIT_VALID; job = xe_bb_create_job(gsc->q, bb); if (IS_ERR(job)) { diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index d0519cd6704a..464282a89eef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -23,6 +23,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_tile.h" /* * GSC proxy: @@ -483,7 +484,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) } /* no multi-tile devices with this feature yet */ - if (tile->id > 0) { + if (!xe_tile_is_root(tile)) { xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9752a38c0162..c8eda36546d3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -112,7 +112,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!fw_ref) return; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); @@ -146,30 +146,23 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, + long timeout_jiffies) { struct xe_sched_job *job; - struct xe_bb *bb; struct dma_fence *fence; long timeout; - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); + if (IS_ERR(job)) return PTR_ERR(job); - } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); + timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies); dma_fence_put(fence); - xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) @@ -178,27 +171,30 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +{ + struct xe_bb *bb; + int ret; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + ret = emit_job_sync(q, bb, HZ); + xe_bb_free(bb, NULL); + + return ret; +} + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; + int count_rmw = 0, count = 0, ret; unsigned long idx; - struct xe_sched_job *job; struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count_rmw = 0; - int count = 0; - - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); - - if (IS_ERR(bb)) - return PTR_ERR(bb); + size_t bb_len = 0; + u32 *cs; /* count RMW registers as those will be handled separately */ xa_for_each(&sr->xa, idx, entry) { @@ -208,13 +204,34 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ++count_rmw; } - if (count || count_rmw) - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + if (count) + bb_len += count * 2 + 1; + + if (count_rmw) + bb_len += count_rmw * 20 + 7; + + if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * Big enough to emit all of the context's 3DSTATE via + * xe_lrc_emit_hwe_state_instructions() + */ + bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + + bb = xe_bb_new(gt, bb_len, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + cs = bb->cs; if (count) { - /* emit single LRI with all non RMW regs */ + /* + * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per + * reg + 1 + */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; @@ -229,79 +246,68 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) val |= entry->set_bits; - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; + *cs++ = reg.addr; + *cs++ = val; xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } if (count_rmw) { - /* emit MI_MATH for each RMW reg */ + /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) continue; - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; - bb->cs[bb->len++] = entry->reg.addr; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + *cs++ = entry->reg.addr; + *cs++ = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = entry->clr_bits; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = entry->set_bits; + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = entry->clr_bits; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = entry->set_bits; - bb->cs[bb->len++] = MI_MATH(8); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1); - bb->cs[bb->len++] = CS_ALU_INSTR_AND; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2); - bb->cs[bb->len++] = CS_ALU_INSTR_OR; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = MI_MATH(8); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1); + *cs++ = CS_ALU_INSTR_AND; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2); + *cs++ = CS_ALU_INSTR_OR; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = entry->reg.addr; + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = entry->reg.addr; xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", entry->reg.addr, entry->clr_bits, entry->set_bits); } /* reset used GPR */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = 0; + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = 0; } - xe_lrc_emit_hwe_state_instructions(q, bb); + cs = xe_lrc_emit_hwe_state_instructions(q, cs); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } + bb->len = cs - bb->cs; - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); + ret = emit_job_sync(q, bb, HZ); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - return 0; + return ret; } int xe_gt_record_default_lrcs(struct xe_gt *gt) @@ -363,14 +369,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_nop_q; } - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - xe_map_memcpy_from(xe, default_lrc, &q->lrc[0]->bo->vmap, xe_lrc_pphwsp_offset(q->lrc[0]), @@ -390,6 +388,7 @@ put_exec_queue: int xe_gt_init_early(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -419,6 +418,25 @@ int xe_gt_init_early(struct xe_gt *gt) xe_mocs_init_early(gt); + /* + * Only after this point can GT-specific MMIO operations + * (including things like communication with the GuC) + * be performed. + */ + xe_gt_mmio_init(gt); + + err = xe_uc_init_noalloc(>->uc); + if (err) + return err; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + xe_gt_mcr_init_early(gt); + xe_pat_init(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + return 0; } @@ -433,7 +451,7 @@ static void dump_pat_on_error(struct xe_gt *gt) xe_pat_dump(gt, &p); } -static int gt_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_gt_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -442,7 +460,15 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (!fw_ref) return -ETIMEDOUT; - if (!xe_gt_is_media_type(gt)) { + err = xe_uc_init(>->uc); + if (err) + goto err_force_wake; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); + + if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; @@ -457,8 +483,10 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_gt_mcr_init(gt); err = xe_hw_engines_init_early(gt); - if (err) + if (err) { + dump_pat_on_error(gt); goto err_force_wake; + } err = xe_hw_engine_class_sysfs_init(gt); if (err) @@ -479,13 +507,12 @@ static int gt_fw_domain_init(struct xe_gt *gt) return 0; err_force_wake: - dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; } -static int all_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_all_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -518,7 +545,7 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* * USM has its only SA pool to non-block behind user operations */ @@ -534,7 +561,7 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); tile->migrate = xe_migrate_init(tile); @@ -544,7 +571,7 @@ static int all_fw_domain_init(struct xe_gt *gt) } } - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) goto err_force_wake; @@ -554,7 +581,7 @@ static int all_fw_domain_init(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -572,39 +599,6 @@ err_force_wake: return err; } -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) -{ - unsigned int fw_ref; - int err; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return -ETIMEDOUT; - - xe_gt_mcr_init_early(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_gt_enable_host_l2_vram(gt); - -out_fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return err; -} - static void xe_gt_fini(void *arg) { struct xe_gt *gt = arg; @@ -632,15 +626,15 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); + err = xe_gt_sysfs_init(gt); if (err) return err; - err = xe_gt_sysfs_init(gt); + err = gt_init_with_gt_forcewake(gt); if (err) return err; - err = gt_fw_domain_init(gt); + err = xe_gt_pagefault_init(gt); if (err) return err; @@ -654,7 +648,7 @@ int xe_gt_init(struct xe_gt *gt) xe_force_wake_init_engines(gt, gt_to_fw(gt)); - err = all_fw_domain_init(gt); + err = gt_init_with_all_forcewake(gt); if (err) return err; @@ -742,7 +736,7 @@ static int vf_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; @@ -780,11 +774,11 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) @@ -839,6 +833,9 @@ static int gt_reset(struct xe_gt *gt) goto err_out; } + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 187fa6490eaf..41880979f4de 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -24,11 +24,10 @@ extern struct fault_attr gt_reset_failure; static inline bool xe_fault_inject_gt_reset(void) { - return should_fail(>_reset_failure, 1); + return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(>_reset_failure, 1); } struct xe_gt *xe_gt_alloc(struct xe_tile *tile); -int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); void xe_gt_mmio_init(struct xe_gt *gt); @@ -107,6 +106,11 @@ static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) xe_device_uc_enabled(gt_to_xe(gt)); } +static inline bool xe_gt_is_main_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MAIN; +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index c11206410a4d..ffb210216aa9 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -121,7 +121,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (vcs_mask || vecs_mask) gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; if (xe->info.platform != XE_DG1) { diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index d4d9730f0d2c..64a2f0d6aaf9 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -420,12 +420,6 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) gt->steering[SQIDI_PSMI].instance_target = select & 0x1; } -static void init_steering_inst0(struct xe_gt *gt) -{ - gt->steering[INSTANCE0].group_target = 0; /* unused */ - gt->steering[INSTANCE0].instance_target = 0; /* unused */ -} - static const struct { const char *name; void (*init)(struct xe_gt *gt); @@ -436,7 +430,7 @@ static const struct { [DSS] = { "DSS", init_steering_dss }, [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, - [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, + [INSTANCE0] = { "INSTANCE 0", NULL }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; @@ -446,25 +440,17 @@ static const struct { * * Perform early software only initialization of the MCR lock to allow * the synchronization on accessing the STEER_SEMAPHORE register and - * use the xe_gt_mcr_multicast_write() function. + * use the xe_gt_mcr_multicast_write() function, plus the minimum + * safe MCR registers required for VRAM/CCS probing. */ void xe_gt_mcr_init_early(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); spin_lock_init(>->mcr_lock); -} - -/** - * xe_gt_mcr_init - Normal initialization of the MCR support - * @gt: GT structure - * - * Perform normal initialization of the MCR for all usages. - */ -void xe_gt_mcr_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); if (IS_SRIOV_VF(xe)) return; @@ -505,10 +491,27 @@ void xe_gt_mcr_init(struct xe_gt *gt) } } + /* Mark instance 0 as initialized, we need this early for VRAM and CCS probe. */ + gt->steering[INSTANCE0].initialized = true; +} + +/** + * xe_gt_mcr_init - Normal initialization of the MCR support + * @gt: GT structure + * + * Perform normal initialization of the MCR for all usages. + */ +void xe_gt_mcr_init(struct xe_gt *gt) +{ + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* Select non-terminated steering target for each type */ - for (int i = 0; i < NUM_STEERING_TYPES; i++) + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + gt->steering[i].initialized = true; if (gt->steering[i].ranges && xe_steering_types[i].init) xe_steering_types[i].init(gt); + } } /** @@ -570,6 +573,10 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { if (xe_mmio_in_range(>->mmio, >->steering[type].ranges[i], reg)) { + drm_WARN(>_to_xe(gt)->drm, !gt->steering[type].initialized, + "Uninitialized usage of MCR register %s/%#x\n", + xe_steering_types[type].name, reg.addr); + *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 3522865c67c9..5a75d56d8558 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -419,6 +419,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) #define PF_MULTIPLIER 8 pf_queue->num_dw = (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); #undef PF_MULTIPLIER pf_queue->gt = gt; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c08efca6420e..bdbd15f3afe3 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -16,6 +16,7 @@ #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" +#include "xe_guc_submit.h" #include "xe_mmio.h" #include "xe_pm.h" @@ -47,9 +48,16 @@ static int pf_alloc_metadata(struct xe_gt *gt) static void pf_init_workers(struct xe_gt *gt) { + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); INIT_WORK(>->sriov.pf.workers.restart, pf_worker_restart_func); } +static void pf_fini_workers(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + disable_work_sync(>->sriov.pf.workers.restart); +} + /** * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -79,6 +87,21 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +static void pf_fini_action(void *arg) +{ + struct xe_gt *gt = arg; + + pf_fini_workers(gt); +} + +static int pf_init_late(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + return devm_add_action_or_reset(xe->drm.dev, pf_fini_action, gt); +} + /** * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF. * @gt: the &xe_gt to initialize @@ -95,7 +118,15 @@ int xe_gt_sriov_pf_init(struct xe_gt *gt) if (err) return err; - return xe_gt_sriov_pf_migration_init(gt); + err = xe_gt_sriov_pf_migration_init(gt); + if (err) + return err; + + err = pf_init_late(gt); + if (err) + return err; + + return 0; } static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) @@ -172,6 +203,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ + pf_cancel_restart(gt); +} + static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -211,3 +261,27 @@ void xe_gt_sriov_pf_restart(struct xe_gt *gt) { pf_queue_restart(gt); } + +static void pf_flush_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + flush_work(>->sriov.pf.workers.restart); +} + +/** + * xe_gt_sriov_pf_wait_ready() - Wait until per-GT PF SR-IOV support is ready. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt) +{ + /* don't wait if there is another ongoing reset */ + if (xe_guc_read_stopped(>->uc.guc)) + return -EBUSY; + + pf_flush_restart(gt); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f474509411c0..e7fde3f9937a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -11,8 +11,10 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); +int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -29,6 +31,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 3556c41c041b..494909f74eb2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -104,13 +104,13 @@ static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs } if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); void *klvs = xe_guc_buf_cpu_ptr(buf); char name[8]; - xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", - xe_sriov_function_name(vfid, name, sizeof(name)), - num_klvs, str_plural(num_klvs)); + xe_gt_sriov_dbg(gt, "pushed %s config with %u KLV%s:\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs)); xe_guc_klv_print(klvs, num_dwords, &p); } @@ -238,26 +238,35 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i } /* Return: number of configuration dwords written */ -static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +static u32 encode_ggtt(u32 *cfg, u64 start, u64 size, bool details) { u32 n = 0; - if (xe_ggtt_node_allocated(config->ggtt_region)) { - if (details) { - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region->base.start); - cfg[n++] = upper_32_bits(config->ggtt_region->base.start); - } - - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region->base.size); - cfg[n++] = upper_32_bits(config->ggtt_region->base.size); + if (details) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(start); + cfg[n++] = upper_32_bits(start); } + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(size); + cfg[n++] = upper_32_bits(size); + return n; } /* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +{ + struct xe_ggtt_node *node = config->ggtt_region; + + if (!xe_ggtt_node_allocated(node)) + return 0; + + return encode_ggtt(cfg, node->base.start, node->base.size, details); +} + +/* Return: number of configuration dwords written */ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) { u32 n = 0; @@ -282,8 +291,8 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool if (config->lmem_obj) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); - cfg[n++] = lower_32_bits(config->lmem_obj->size); - cfg[n++] = upper_32_bits(config->lmem_obj->size); + cfg[n++] = lower_32_bits(xe_bo_size(config->lmem_obj)); + cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj)); } cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); @@ -332,6 +341,17 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) } xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + if (vfid == PFID) { + u64 ggtt_start = xe_wopcm_size(gt_to_xe(gt)); + u64 ggtt_size = gt_to_tile(gt)->mem.ggtt->size - ggtt_start; + + /* plain PF config data will never include a real GGTT region */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true)); + + /* fake PF GGTT config covers full GGTT range except reserved WOPCM */ + num_dwords += encode_ggtt(cfg + num_dwords, ggtt_start, ggtt_size, true); + } + num_klvs = xe_guc_klv_count(cfg, num_dwords); err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); @@ -376,7 +396,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) { u64 spare; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -388,7 +408,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) { - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -443,7 +463,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); size = round_up(size, alignment); @@ -492,7 +512,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_ggtt_node *node = config->ggtt_region; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; } @@ -560,7 +580,7 @@ int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size { int err; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) @@ -622,7 +642,7 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -693,7 +713,7 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); fair = pf_estimate_fair_ggtt(gt, num_vfs); @@ -1299,7 +1319,7 @@ static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid) struct xe_bo *bo; bo = config->lmem_obj; - return bo ? bo->size : 0; + return bo ? xe_bo_size(bo) : 0; } static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1327,7 +1347,17 @@ static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 si static void pf_force_lmtt_invalidate(struct xe_device *xe) { - /* TODO */ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + xe_assert(xe, xe_device_has_lmtt(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_invalidate_hw(lmtt); + } } static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) @@ -1388,7 +1418,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); if (err) goto fail; - offset += bo->size; + offset += xe_bo_size(bo); } } @@ -1406,7 +1436,7 @@ fail: static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); if (config->lmem_obj) { @@ -1425,7 +1455,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1469,12 +1499,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) goto release; } - err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); + err = pf_push_vf_cfg_lmem(gt, vfid, xe_bo_size(bo)); if (unlikely(err)) goto reset_lmtt; xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", - vfid, bo->size, bo->size / SZ_1M); + vfid, xe_bo_size(bo), xe_bo_size(bo) / SZ_1M); return 0; reset_lmtt: @@ -1552,7 +1582,7 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -1629,7 +1659,7 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; @@ -1663,7 +1693,7 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); result = result ?: err; err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); @@ -1991,7 +2021,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_device *xe = gt_to_xe(gt); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { pf_release_vf_config_lmem(gt, config); @@ -2082,7 +2112,7 @@ static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout) * Only GGTT and LMEM requires to be cleared by the PF. * GuC doorbell IDs and context IDs do not need any clearing. */ - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_sanitize_ggtt(config->ggtt_region, vfid); if (IS_DGFX(xe)) err = pf_sanitize_lmem(tile, config->lmem_obj, timeout); @@ -2149,7 +2179,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool is_primary = !xe_gt_is_media_type(gt); + bool is_primary = xe_gt_is_main_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -2349,7 +2379,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return -EINVAL; if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); drm_printf(&p, "restoring VF%u config:\n", vfid); xe_guc_klv_print(buf, size / sizeof(u32), &p); @@ -2366,6 +2396,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void pf_prepare_self_config(struct xe_gt *gt) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, PFID); + + /* + * We want PF to be allowed to use all of context ID, doorbells IDs + * and whole usable GGTT area. While we can store ctxs/dbs numbers + * directly in the config structure, can't do the same with the GGTT + * configuration, so let it be prepared on demand while pushing KLVs. + */ + config->num_ctxs = GUC_ID_MAX; + config->num_dbs = GUC_NUM_DOORBELLS; +} + +static int pf_push_self_config(struct xe_gt *gt) +{ + int err; + + err = pf_push_full_vf_config(gt, PFID); + if (err) { + xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n", + ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n"); + return 0; +} + static void fini_config(void *arg) { struct xe_gt *gt = arg; @@ -2389,9 +2448,18 @@ static void fini_config(void *arg) int xe_gt_sriov_pf_config_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_prepare_self_config(gt); + err = pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (err) + return err; + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); } @@ -2409,6 +2477,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt) unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); unsigned int fail = 0, skip = 0; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) { if (xe_gt_sriov_pf_config_is_empty(gt, n)) skip++; @@ -2552,10 +2624,10 @@ int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p) if (!config->lmem_obj) continue; - string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, + string_get_size(xe_bo_size(config->lmem_obj), 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "VF%u:\t%zu\t(%s)\n", - n, config->lmem_obj->size, buf); + n, xe_bo_size(config->lmem_obj), buf); } mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 1f50aec3a059..4f7fff892bc0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -15,10 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_pf_service.h" +#include "xe_tile.h" static const char *control_cmd_to_string(u32 cmd) { @@ -1064,7 +1065,9 @@ static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) return false; - xe_gt_sriov_pf_service_reset(gt, vfid); + if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) + xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); + xe_gt_sriov_pf_monitor_flr(gt, vfid); pf_enter_vf_flr_reset_mmio(gt, vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 13970d5a2867..3ed245e04d0c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -22,6 +22,7 @@ #include "xe_gt_sriov_pf_policy.h" #include "xe_gt_sriov_pf_service.h" #include "xe_pm.h" +#include "xe_sriov_pf.h" /* * /sys/kernel/debug/dri/0/ @@ -78,11 +79,6 @@ static const struct drm_info_list pf_info[] = { .data = xe_gt_sriov_pf_service_print_runtime, }, { - "negotiated_versions", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_service_print_version, - }, - { "adverse_events", .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_monitor_print_events, @@ -210,7 +206,8 @@ static int CONFIG##_set(void *data, u64 val) \ return -EOVERFLOW; \ \ xe_pm_runtime_get(xe); \ - err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ + err = xe_sriov_pf_wait_ready(xe) ?: \ + xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \ xe_pm_runtime_put(xe); \ \ return err; \ @@ -305,7 +302,7 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne xe_gt_assert(gt, gt == extract_gt(parent)); xe_gt_assert(gt, vfid == extract_vfid(parent)); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", 0644, parent, parent, &ggtt_fops); if (xe_device_has_lmtt(gt_to_xe(gt))) @@ -554,7 +551,7 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pfdentry->d_inode->i_private = gt; drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { drm_debugfs_create_files(pf_ggtt_info, ARRAY_SIZE(pf_ggtt_info), pfdentry, minor); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 821cfcc34e6b..76dd9233ef9f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -19,91 +19,7 @@ #include "xe_gt_sriov_pf_service_types.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" - -static void pf_init_versions(struct xe_gt *gt) -{ - BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); - BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); - - /* base versions may differ between platforms */ - gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; - gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; - - /* latest version is same for all platforms */ - gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; - gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_negotiate_version(struct xe_gt *gt, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; - struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; - - xe_gt_assert(gt, base.major); - xe_gt_assert(gt, base.major <= latest.major); - xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); - - /* VF doesn't care - return our latest */ - if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && - wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants newer than our - return our latest */ - if (wanted_major > latest.major) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants older than min required - reject */ - if (wanted_major < base.major || - (wanted_major == base.major && wanted_minor < base.minor)) { - return -EPERM; - } - - /* previous major - return wanted, as we should still support it */ - if (wanted_major < latest.major) { - /* XXX: we are not prepared for multi-versions yet */ - xe_gt_assert(gt, base.major == latest.major); - return -ENOPKG; - } - - /* same major - return common minor */ - *major = wanted_major; - *minor = min_t(u32, latest.minor, wanted_minor); - return 0; -} - -static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - xe_gt_assert(gt, major || minor); - - gt->sriov.pf.vfs[vfid].version.major = major; - gt->sriov.pf.vfs[vfid].version.minor = minor; -} - -static void pf_disconnect(struct xe_gt *gt, u32 vfid) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - gt->sriov.pf.vfs[vfid].version.major = 0; - gt->sriov.pf.vfs[vfid].version.minor = 0; -} - -static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - return major == gt->sriov.pf.vfs[vfid].version.major && - minor <= gt->sriov.pf.vfs[vfid].version.minor; -} +#include "xe_sriov_pf_service.h" static const struct xe_reg tgl_runtime_regs[] = { RPM_CONFIG0, /* _MMIO(0x0d00) */ @@ -266,7 +182,7 @@ static void pf_prepare_runtime_info(struct xe_gt *gt) read_many(gt, size, regs, values); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); xe_gt_sriov_pf_service_print_runtime(gt, &p); } @@ -285,8 +201,6 @@ int xe_gt_sriov_pf_service_init(struct xe_gt *gt) { int err; - pf_init_versions(gt); - err = pf_alloc_runtime_info(gt); if (unlikely(err)) goto failed; @@ -311,47 +225,6 @@ void xe_gt_sriov_pf_service_update(struct xe_gt *gt) pf_prepare_runtime_info(gt); } -/** - * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. - * @gt: the &xe_gt - * @vfid: the VF identifier - * - * Reset a VF driver negotiated VF/PF ABI version. - * After that point, the VF driver will have to perform new version handshake - * to continue use of the PF services again. - * - * This function can only be called on PF. - */ -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) -{ - pf_disconnect(gt, vfid); -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_process_handshake(struct xe_gt *gt, u32 vfid, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - int err; - - xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", - vfid, wanted_major, wanted_minor); - - err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); - - if (err < 0) { - xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", - vfid, wanted_major, wanted_minor, ERR_PTR(err)); - pf_disconnect(gt, vfid); - } else { - xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", - vfid, *major, *minor); - pf_connect(gt, vfid, *major, *minor); - } - - return 0; -} - /* Return: length of the response message or a negative error code on failure. */ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, const u32 *request, u32 len, u32 *response, u32 size) @@ -371,7 +244,8 @@ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); - err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + err = xe_sriov_pf_service_handshake_vf(gt_to_xe(gt), origin, wanted_major, wanted_minor, + &major, &minor); if (err < 0) return err; @@ -430,8 +304,10 @@ static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, u32 remaining = 0; int ret; - if (!pf_is_negotiated(gt, origin, 1, 0)) + /* this action is available from ABI 1.0 */ + if (!xe_sriov_pf_service_is_negotiated(gt_to_xe(gt), origin, 1, 0)) return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) return -EMSGSIZE; if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) @@ -528,33 +404,3 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p return 0; } - -/** - * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. - * @gt: the &xe_gt - * @p: the &drm_printer - * - * This function is for PF use only. - */ -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); - struct xe_gt_sriov_pf_service_version *version; - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - - for (n = 1; n <= total_vfs; n++) { - version = >->sriov.pf.vfs[n].version; - if (!version->major && !version->minor) - continue; - - drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); - } - - return 0; -} - -#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) -#include "tests/xe_gt_sriov_pf_service_test.c" -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h index 56aaadf0360d..10b02c9b651c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -14,9 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_service_init(struct xe_gt *gt); void xe_gt_sriov_pf_service_update(struct xe_gt *gt); -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 9b2fc9db55b8..b282838d59e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -552,7 +552,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) if (unlikely(err)) return err; - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { err = vf_get_lmem_info(gt); if (unlikely(err)) return err; @@ -649,7 +649,7 @@ s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt) struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return config->ggtt_shift; } @@ -686,21 +686,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) return 0; } -static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor) +static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_assert(xe, IS_SRIOV_VF(xe)); - gt->sriov.vf.pf_version.major = major; - gt->sriov.vf.pf_version.minor = minor; + xe->sriov.vf.pf_version.major = major; + xe->sriov.vf.pf_version.minor = minor; } -static void vf_disconnect_pf(struct xe_gt *gt) +static void vf_disconnect_pf(struct xe_device *xe) { - vf_connect_pf(gt, 0, 0); + vf_connect_pf(xe, 0, 0); } static int vf_handshake_with_pf(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; u32 major = major_wanted, minor = minor_wanted; @@ -716,13 +717,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt) } xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); - vf_connect_pf(gt, major, minor); + vf_connect_pf(xe, major, minor); return 0; failed: xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", major, minor, ERR_PTR(err)); - vf_disconnect_pf(gt); + vf_disconnect_pf(xe); return err; } @@ -775,10 +776,12 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + struct xe_device *xe = gt_to_xe(gt); - return major == gt->sriov.vf.pf_version.major && - minor <= gt->sriov.vf.pf_version.minor; + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + return major == xe->sriov.vf.pf_version.major && + minor <= xe->sriov.vf.pf_version.minor; } static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) @@ -966,7 +969,6 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) struct vf_runtime_reg *rr; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.pf_version.major); xe_gt_assert(gt, !reg.vf); if (reg.addr == GMD_ID.addr) { @@ -1037,7 +1039,7 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); } @@ -1073,9 +1075,10 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) */ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { + struct xe_device *xe = gt_to_xe(gt); struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; struct xe_uc_fw_version *wanted = >->sriov.vf.wanted_guc_version; - struct xe_gt_sriov_vf_relay_version *pf_version = >->sriov.vf.pf_version; + struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version; struct xe_uc_fw_version ver; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index ef041679e9d4..298dedf4b009 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -10,16 +10,6 @@ #include "xe_uc_fw_types.h" /** - * struct xe_gt_sriov_vf_relay_version - PF ABI version details. - */ -struct xe_gt_sriov_vf_relay_version { - /** @major: major version. */ - u16 major; - /** @minor: minor version. */ - u16 minor; -}; - -/** * struct xe_gt_sriov_vf_selfconfig - VF configuration data. */ struct xe_gt_sriov_vf_selfconfig { @@ -66,8 +56,6 @@ struct xe_gt_sriov_vf { struct xe_uc_fw_version guc_version; /** @self_config: resource configurations. */ struct xe_gt_sriov_vf_selfconfig self_config; - /** @pf_version: negotiated VF/PF ABI version. */ - struct xe_gt_sriov_vf_relay_version pf_version; /** @runtime: runtime data retrieved from the PF. */ struct xe_gt_sriov_vf_runtime runtime; }; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 6088df8e159c..086c12ee3d9d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -330,6 +330,40 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +static int send_tlb_invalidation_all(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) +{ + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION_ALL, + 0, /* seqno, replaced in send_tlb_invalidation */ + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); +} + +/** + * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs. + * @gt: the &xe_gt structure + * @fence: the &xe_gt_tlb_invalidation_fence to be signaled on completion + * + * Send a request to invalidate all TLBs across PF and all VFs. + * + * Return: 0 on success, negative error code on error + */ +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence) +{ + int err; + + xe_gt_assert(gt, gt == fence->gt); + + err = send_tlb_invalidation_all(gt, fence); + if (err) + xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err)); + + return err; +} + /* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 31072dbcad8e..f7f0f2eaf4b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -20,6 +20,7 @@ int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence); int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 305939c69747..8c63e3263643 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -290,11 +290,6 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } -bool xe_dss_mask_empty(const xe_dss_mask_t mask) -{ - return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); -} - /** * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant * @gt: GT to check diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index a72d26ba0653..c8140704ad4c 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -41,8 +41,6 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); -bool xe_dss_mask_empty(const xe_dss_mask_t mask); - bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7def0959da35..96344c604726 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -377,6 +377,8 @@ struct xe_gt { u16 group_target; /** @steering.instance_target: instance to steer accesses to */ u16 instance_target; + /** @steering.initialized: Whether this steering range is initialized */ + bool initialized; } steering[NUM_STEERING_TYPES]; /** diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 209e5d53c290..b1d1d6da3758 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -29,6 +29,7 @@ #include "xe_guc_db_mgr.h" #include "xe_guc_engine_activity.h" #include "xe_guc_hwconfig.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" #include "xe_guc_relay.h" @@ -59,7 +60,7 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); xe_assert(xe, addr < GUC_GGTT_TOP); - xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); + xe_assert(xe, xe_bo_size(bo) <= GUC_GGTT_TOP - addr); return addr; } @@ -420,7 +421,7 @@ static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 t buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); - xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(g2g_bo)); return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, desc, buf, G2G_BUFFER_SIZE); @@ -570,6 +571,86 @@ err_deregister: return err; } +static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dwords) +{ + u32 action[] = { + XE_GUC_ACTION_OPT_IN_FEATURE_KLV, + lower_32_bits(addr), + upper_32_bits(addr), + num_dwords + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static bool supports_dynamic_ics(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + /* Dynamic ICS is available for PVC and Xe2 and newer platforms. */ + if (xe->info.platform != XE_PVC && GRAPHICS_VER(xe) < 20) + return false; + + /* + * The feature is currently not compatible with multi-lrc, so the GuC + * does not support it at all on the media engines (which are the main + * users of mlrc). On the primary GT side, to avoid it being used in + * conjunction with mlrc, we only enable it if we are in single CCS + * mode. + */ + if (xe_gt_is_media_type(gt) || gt->ccs_mode > 1) + return false; + + /* + * Dynamic ICS requires GuC v70.40.1, which maps to compatibility + * version v1.18.4. + */ + return GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 18, 4); +} + +#define OPT_IN_MAX_DWORDS 16 +int xe_guc_opt_in_features_enable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + CLASS(xe_guc_buf, buf)(&guc->buf, OPT_IN_MAX_DWORDS); + u32 count = 0; + u32 *klvs; + int ret; + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + klvs = xe_guc_buf_cpu_ptr(buf); + + /* + * The extra CAT error type opt-in was added in GuC v70.17.0, which maps + * to compatibility version v1.7.0. + * Note that the GuC allows enabling this KLV even on platforms that do + * not support the extra type; in such case the returned type variable + * will be set to a known invalid value which we can check against. + */ + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE); + + if (supports_dynamic_ics(guc)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH); + + if (count) { + xe_assert(xe, count <= OPT_IN_MAX_DWORDS); + + ret = __guc_opt_in_features_enable(guc, xe_guc_buf_flush(buf), count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC opt-in features: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; @@ -577,7 +658,7 @@ static void guc_fini_hw(void *arg) unsigned int fw_ref; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_fini_hw(&guc_to_gt(guc)->uc); + xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); guc_g2g_fini(guc); @@ -627,23 +708,51 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) return 0; } -static int vf_guc_init(struct xe_guc *guc) +static int vf_guc_init_noalloc(struct xe_guc *guc) { + struct xe_gt *gt = guc_to_gt(guc); int err; - xe_guc_comm_init_early(guc); - - err = xe_guc_ct_init(&guc->ct); + err = xe_gt_sriov_vf_bootstrap(gt); if (err) return err; - err = xe_guc_relay_init(&guc->relay); + err = xe_gt_sriov_vf_query_config(gt); if (err) return err; return 0; } +int xe_guc_init_noalloc(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + xe_guc_comm_init_early(guc); + + ret = xe_guc_ct_init_noalloc(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_relay_init(&guc->relay); + if (ret) + goto out; + + if (IS_SRIOV_VF(xe)) { + ret = vf_guc_init_noalloc(guc); + if (ret) + goto out; + } + + return 0; + +out: + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); + return ret; +} + int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -653,13 +762,13 @@ int xe_guc_init(struct xe_guc *guc) guc->fw.type = XE_UC_FW_TYPE_GUC; ret = xe_uc_fw_init(&guc->fw); if (ret) - goto out; + return ret; if (!xe_uc_fw_is_enabled(&guc->fw)) return 0; if (IS_SRIOV_VF(xe)) { - ret = vf_guc_init(guc); + ret = xe_guc_ct_init(&guc->ct); if (ret) goto out; return 0; @@ -681,10 +790,6 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = xe_guc_relay_init(&guc->relay); - if (ret) - goto out; - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); @@ -693,8 +798,6 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); - xe_guc_comm_init_early(guc); - return 0; out: @@ -767,6 +870,10 @@ int xe_guc_post_load_init(struct xe_guc *guc) xe_guc_ads_populate_post_load(&guc->ads); + ret = xe_guc_opt_in_features_enable(guc); + if (ret) + return ret; + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { ret = guc_g2g_start(guc); if (ret) @@ -1112,13 +1219,17 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) ret = xe_gt_sriov_vf_connect(gt); if (ret) - return ret; + goto err_out; ret = xe_gt_sriov_vf_query_runtime(gt); if (ret) - return ret; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(guc); + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 58338be44558..22cf019a11bf 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -26,6 +26,7 @@ struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); +int xe_guc_init_noalloc(struct xe_guc *guc); int xe_guc_init(struct xe_guc *guc); int xe_guc_init_post_hwconfig(struct xe_guc *guc); int xe_guc_post_load_init(struct xe_guc *guc); @@ -33,6 +34,7 @@ int xe_guc_reset(struct xe_guc *guc); int xe_guc_upload(struct xe_guc *guc); int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_opt_in_features_enable(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 07a027755627..131cfc56be00 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -890,7 +890,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); @@ -914,7 +914,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); @@ -995,16 +995,6 @@ static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_off return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); } -static int guc_ads_update_policies(struct xe_guc_ads *ads, const struct guc_policies *policies) -{ - CLASS(xe_guc_buf_from_data, buf)(&ads_to_guc(ads)->buf, policies, sizeof(*policies)); - - if (!xe_guc_buf_is_valid(buf)) - return -ENOBUFS; - - return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); -} - /** * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy * @ads: Additional data structures object @@ -1015,13 +1005,16 @@ static int guc_ads_update_policies(struct xe_guc_ads *ads, const struct guc_poli */ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) { - struct xe_device *xe = ads_to_xe(ads); struct guc_policies *policies; - int ret; + struct xe_guc *guc = ads_to_guc(ads); + struct xe_device *xe = ads_to_xe(ads); + CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies)); - policies = kmalloc(sizeof(*policies), GFP_KERNEL); - if (!policies) - return -ENOMEM; + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + policies = xe_guc_buf_cpu_ptr(buf); + memset(policies, 0, sizeof(*policies)); policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); @@ -1031,7 +1024,5 @@ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) else policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; - ret = guc_ads_update_policies(ads, policies); - kfree(policies); - return ret; + return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); } diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 859a3ba91be5..243dad3e2418 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1817,6 +1817,12 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm str_yes_no(snapshot->kernel_reserved)); for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) { + /* + * FIXME: During devcoredump print we should avoid accessing the + * driver pointers for gt or engine. Printing should be done only + * using the snapshot captured. Here we are accessing the gt + * pointer. It should be fixed. + */ list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type, capture_class, false); snapshot_print_by_list_order(snapshot, p, type, list); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 37509f619503..3f4e6a46ff16 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -35,6 +35,11 @@ #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -80,6 +85,7 @@ struct g2h_fence { u16 error; u16 hint; u16 reason; + bool cancel; bool retry; bool fail; bool done; @@ -89,15 +95,18 @@ struct g2h_fence { static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { + memset(g2h_fence, 0, sizeof(*g2h_fence)); g2h_fence->response_buffer = response_buffer; - g2h_fence->response_data = 0; - g2h_fence->response_len = 0; - g2h_fence->fail = false; - g2h_fence->retry = false; - g2h_fence->done = false; g2h_fence->seqno = ~0x0; } +static void g2h_fence_cancel(struct g2h_fence *g2h_fence) +{ + g2h_fence->cancel = true; + g2h_fence->fail = true; + g2h_fence->done = true; +} + static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) { return g2h_fence->seqno == ~0x0; @@ -189,14 +198,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -207,12 +213,10 @@ static void primelockdep(struct xe_guc_ct *ct) fs_reclaim_release(GFP_KERNEL); } -int xe_guc_ct_init(struct xe_guc_ct *ct) +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_bo *bo; int err; xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); @@ -238,6 +242,23 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); + if (err) + return err; + + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + ct->state = XE_GUC_CT_STATE_DISABLED; + return 0; +} +ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | @@ -247,13 +268,6 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) return PTR_ERR(bo); ct->bo = bo; - - err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); - if (err) - return err; - - xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); - ct->state = XE_GUC_CT_STATE_DISABLED; return 0; } ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ @@ -374,9 +388,13 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) return ret > 0 ? -EPROTO : ret; } -static void xe_guc_ct_set_state(struct xe_guc_ct *ct, +static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state) { + struct xe_gt *gt = ct_to_gt(ct); + struct g2h_fence *g2h_fence; + unsigned long idx; + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ @@ -388,8 +406,20 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, ct->g2h_outstanding = 0; ct->state = state; + xe_gt_dbg(gt, "GuC CT communication channel %s\n", + state == XE_GUC_CT_STATE_STOPPED ? "stopped" : + str_enabled_disabled(state == XE_GUC_CT_STATE_ENABLED)); + spin_unlock_irq(&ct->fast_lock); + /* cancel all in-flight send-recv requests */ + xa_for_each(&ct->fence_lookup, idx, g2h_fence) + g2h_fence_cancel(g2h_fence); + + /* make sure guc_ct_send_recv() will see g2h_fence changes */ + smp_mb(); + wake_up_all(&ct->g2h_fence_wq); + /* * Lockdep doesn't like this under the fast lock and he destroy only * needs to be serialized with the send path which ct lock provides. @@ -443,7 +473,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); - xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size); + xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -459,11 +489,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) if (err) goto err_out; - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED); smp_mb(); wake_up_all(&ct->wq); - xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); if (ct_needs_safe_mode(ct)) ct_enter_safe_mode(ct); @@ -504,7 +533,7 @@ static void stop_g2h_handler(struct xe_guc_ct *ct) */ void xe_guc_ct_disable(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); ct_exit_safe_mode(ct); stop_g2h_handler(ct); } @@ -520,7 +549,7 @@ void xe_guc_ct_stop(struct xe_guc_ct *ct) if (!xe_guc_ct_initialized(ct)) return; - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -1083,6 +1112,11 @@ retry_same_fence: goto retry; } if (g2h_fence.fail) { + if (g2h_fence.cancel) { + xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]); + ret = -ECANCELED; + goto unlock; + } xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; @@ -1091,6 +1125,7 @@ retry_same_fence: if (ret > 0) ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; +unlock: mutex_unlock(&ct->lock); return ret; @@ -1897,7 +1932,7 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo return NULL; if (ct->bo && want_ctb) { - snapshot->ctb_size = ct->bo->size; + snapshot->ctb_size = xe_bo_size(ct->bo); snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 99c5dec446f2..18d4225e6502 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -11,6 +11,7 @@ struct drm_printer; struct xe_device; +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); int xe_guc_ct_init(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 38039c411387..c01ccb35dc75 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -79,7 +79,7 @@ static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log * * Also, can't use vmalloc as might be called from atomic context. So need * to break the buffer up into smaller chunks that can be allocated. */ - snapshot->size = log->bo->size; + snapshot->size = xe_bo_size(log->bo); snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE); snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy), diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 9fab5f5b10fa..68b192fe3b32 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,8 +5,11 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> @@ -52,9 +55,11 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 #define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -142,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -554,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -564,26 +618,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -597,24 +653,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -627,24 +687,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -658,24 +722,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; + guard(mutex)(&pc->freq_lock); - pc->user_requested_max = freq; - -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -873,30 +927,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..52ecdd5ddbff 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..c02053948a57 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index df7a5a4eec74..cafb47711e9b 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -908,12 +908,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -1084,7 +1085,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1092,12 +1093,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * list so job can be freed and kick scheduler ensuring free job is not * lost. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; - } + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ xe_sched_submission_stop(sched); @@ -1130,7 +1127,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { @@ -1265,7 +1263,7 @@ trigger_reset: /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; sched_enable: enable_scheduling(q); @@ -1275,10 +1273,8 @@ rearm: * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ - xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_NO_HANG; } static void __guc_exec_queue_fini_async(struct work_struct *w) @@ -2090,12 +2086,16 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; u32 guc_id; + u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; - if (unlikely(len < 1)) + if (unlikely(!len || len > 2)) return -EPROTO; guc_id = msg[0]; + if (len == 2) + type = msg[1]; + if (guc_id == GUC_ID_UNKNOWN) { /* * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF @@ -2109,8 +2109,19 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, if (unlikely(!q)) return -EPROTO; - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + /* + * The type is HW-defined and changes based on platform, so we don't + * decode it in the kernel and only check if it is valid. + * See bspec 54047 and 72187 for details. + */ + if (type != XE_GUC_CAT_ERR_TYPE_INVALID) + xe_gt_dbg(gt, + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + else + xe_gt_dbg(gt, + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); trace_xe_exec_queue_memory_cat_error(q); diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 27d11e06a82b..6d7b62724126 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -11,15 +11,12 @@ #include "xe_device_types.h" #include "xe_drv.h" #include "xe_heci_gsc.h" +#include "regs/xe_gsc_regs.h" #include "xe_platform_types.h" #include "xe_survivability_mode.h" #define GSC_BAR_LENGTH 0x00000FFC -#define DG1_GSC_HECI2_BASE 0x259000 -#define PVC_GSC_HECI2_BASE 0x285000 -#define DG2_GSC_HECI2_BASE 0x374000 - static void heci_gsc_irq_mask(struct irq_data *d) { /* generic irq handling */ diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 6a846e4cb221..7e43b2dd6a32 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -171,7 +171,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) sizeof(struct pxp43_new_huc_auth_in)); wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, xe_bo_ggtt_addr(huc->fw.bo), - huc->fw.bo->size); + xe_bo_size(huc->fw.bo)); do { err = xe_gsc_pkt_submit_kernel(>->uc.gsc, ggtt_offset, wr_offset, ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE, diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 3439c8522d01..796ba8c34a16 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -1059,12 +1059,13 @@ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) { + struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); unsigned int idx; if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; - if (eci.gt_id >= xe->info.gt_count) + if (!gt) return NULL; idx = array_index_nospec(eci.engine_class, diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 2d68c5b5262a..c926f840c87b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -13,15 +13,6 @@ #include "xe_vm.h" static void -hw_engine_group_free(struct drm_device *drm, void *arg) -{ - struct xe_hw_engine_group *group = arg; - - destroy_workqueue(group->resume_wq); - kfree(group); -} - -static void hw_engine_group_resume_lr_jobs_func(struct work_struct *w) { struct xe_exec_queue *q; @@ -53,7 +44,7 @@ hw_engine_group_alloc(struct xe_device *xe) struct xe_hw_engine_group *group; int err; - group = kzalloc(sizeof(*group), GFP_KERNEL); + group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -61,14 +52,14 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group->resume_wq) return ERR_PTR(-ENOMEM); + err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); + if (err) + return ERR_PTR(err); + init_rwsem(&group->mode_sem); INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); INIT_LIST_HEAD(&group->exec_queue_list); - err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); - if (err) - return ERR_PTR(err); - return group; } @@ -84,25 +75,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) enum xe_hw_engine_id id; struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; struct xe_device *xe = gt_to_xe(gt); - int err; group_rcs_ccs = hw_engine_group_alloc(xe); - if (IS_ERR(group_rcs_ccs)) { - err = PTR_ERR(group_rcs_ccs); - goto err_group_rcs_ccs; - } + if (IS_ERR(group_rcs_ccs)) + return PTR_ERR(group_rcs_ccs); group_bcs = hw_engine_group_alloc(xe); - if (IS_ERR(group_bcs)) { - err = PTR_ERR(group_bcs); - goto err_group_bcs; - } + if (IS_ERR(group_bcs)) + return PTR_ERR(group_bcs); group_vcs_vecs = hw_engine_group_alloc(xe); - if (IS_ERR(group_vcs_vecs)) { - err = PTR_ERR(group_vcs_vecs); - goto err_group_vcs_vecs; - } + if (IS_ERR(group_vcs_vecs)) + return PTR_ERR(group_vcs_vecs); for_each_hw_engine(hwe, gt, id) { switch (hwe->class) { @@ -125,15 +109,6 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) } return 0; - -err_group_vcs_vecs: - kfree(group_vcs_vecs); -err_group_bcs: - kfree(group_bcs); -err_group_rcs_ccs: - kfree(group_rcs_ccs); - - return err; } /** diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index f08fc4377d25..c17ed1ae8649 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -332,6 +332,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe int ret = 0; u32 reg_val, max; struct xe_reg rapl_limit; + u64 max_supp_power_limit = 0; mutex_lock(&hwmon->hwmon_lock); @@ -356,6 +357,20 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe goto unlock; } + /* + * If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to + * the supported maximum (U12.3 format). + * This is to avoid truncation during reg_val calculation below and ensure the valid + * power limit is sent for pcode which would clamp it to card-supported value. + */ + max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER; + if (value > max_supp_power_limit) { + value = max_supp_power_limit; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected %s exceeds channel %d limit\n", + PWR_ATTR_TO_STR(attr), channel); + } + /* Computation in 64-bits to avoid overflow. Round to nearest. */ reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); @@ -739,9 +754,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, { int ret; u32 uval; + u64 max_crit_power_curr = 0; mutex_lock(&hwmon->hwmon_lock); + /* + * If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1 + * max supported value, clamp it to the command's max (U10.6 format). + * This is to avoid truncation during uval calculation below and ensure the valid power + * limit is sent for pcode which would clamp it to card-supported value. + */ + max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor; + if (value > max_crit_power_curr) { + value = max_crit_power_curr; + drm_info(&hwmon->xe->drm, + "Power limit clamped as selected exceeds channel %d limit\n", + channel); + } uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); ret = xe_hwmon_pcode_write_i1(hwmon, uval); diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c new file mode 100644 index 000000000000..bc7dc2099470 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Intel Xe I2C attached Microcontroller Units (MCU) + * + * Copyright (C) 2025 Intel Corporation. + */ + +#include <linux/array_size.h> +#include <linux/container_of.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include <linux/property.h> +#include <linux/regmap.h> +#include <linux/sprintf.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include "regs/xe_i2c_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_i2c.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" + +/** + * DOC: Xe I2C devices + * + * Register a platform device for the I2C host controller (Synpsys DesignWare + * I2C) if the registers of that controller are mapped to the MMIO, and also the + * I2C client device for the Add-In Management Controller (the MCU) attached to + * the host controller. + * + * See drivers/i2c/busses/i2c-designware-* for more information on the I2C host + * controller. + */ + +static const char adapter_name[] = "i2c_designware"; + +static const struct property_entry xe_i2c_adapter_properties[] = { + PROPERTY_ENTRY_STRING("compatible", "intel,xe-i2c"), + PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_FAST_MODE_PLUS_FREQ), + { } +}; + +static inline void xe_i2c_read_endpoint(struct xe_mmio *mmio, void *ep) +{ + u32 *val = ep; + + val[0] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_PREFIX); + val[1] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_POSTFIX); +} + +static void xe_i2c_client_work(struct work_struct *work) +{ + struct xe_i2c *i2c = container_of(work, struct xe_i2c, work); + struct i2c_board_info info = { + .type = "amc", + .flags = I2C_CLIENT_HOST_NOTIFY, + .addr = i2c->ep.addr[1], + }; + + i2c->client[0] = i2c_new_client_device(i2c->adapter, &info); +} + +static int xe_i2c_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct xe_i2c *i2c = container_of(nb, struct xe_i2c, bus_notifier); + struct i2c_adapter *adapter = i2c_verify_adapter(data); + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE && + adapter && dev->parent == &i2c->pdev->dev) { + i2c->adapter = adapter; + schedule_work(&i2c->work); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static int xe_i2c_register_adapter(struct xe_i2c *i2c) +{ + struct pci_dev *pci = to_pci_dev(i2c->drm_dev); + struct platform_device *pdev; + struct fwnode_handle *fwnode; + int ret; + + fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); + if (IS_ERR(fwnode)) + return PTR_ERR(fwnode); + + /* + * Not using platform_device_register_full() here because we don't have + * a handle to the platform_device before it returns. xe_i2c_notifier() + * uses that handle, but it may be called before + * platform_device_register_full() is done. + */ + pdev = platform_device_alloc(adapter_name, pci_dev_id(pci)); + if (!pdev) { + ret = -ENOMEM; + goto err_fwnode_remove; + } + + if (i2c->adapter_irq) { + struct resource res; + + res = DEFINE_RES_IRQ_NAMED(i2c->adapter_irq, "xe_i2c"); + + ret = platform_device_add_resources(pdev, &res, 1); + if (ret) + goto err_pdev_put; + } + + pdev->dev.parent = i2c->drm_dev; + pdev->dev.fwnode = fwnode; + i2c->adapter_node = fwnode; + i2c->pdev = pdev; + + ret = platform_device_add(pdev); + if (ret) + goto err_pdev_put; + + return 0; + +err_pdev_put: + platform_device_put(pdev); +err_fwnode_remove: + fwnode_remove_software_node(fwnode); + + return ret; +} + +static void xe_i2c_unregister_adapter(struct xe_i2c *i2c) +{ + platform_device_unregister(i2c->pdev); + fwnode_remove_software_node(i2c->adapter_node); +} + +/** + * xe_i2c_irq_handler: Handler for I2C interrupts + * @xe: xe device instance + * @master_ctl: interrupt register + * + * Forward interrupts generated by the I2C host adapter to the I2C host adapter + * driver. + */ +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + if (!xe->i2c || !xe->i2c->adapter_irq) + return; + + if (master_ctl & I2C_IRQ) + generic_handle_irq_safe(xe->i2c->adapter_irq); +} + +static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw_irq_num) +{ + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + return 0; +} + +static const struct irq_domain_ops xe_i2c_irq_ops = { + .map = xe_i2c_irq_map, +}; + +static int xe_i2c_create_irq(struct xe_i2c *i2c) +{ + struct irq_domain *domain; + + if (!(i2c->ep.capabilities & XE_I2C_EP_CAP_IRQ)) + return 0; + + domain = irq_domain_create_linear(dev_fwnode(i2c->drm_dev), 1, &xe_i2c_irq_ops, NULL); + if (!domain) + return -ENOMEM; + + i2c->adapter_irq = irq_create_mapping(domain, 0); + i2c->irqdomain = domain; + + return 0; +} + +static void xe_i2c_remove_irq(struct xe_i2c *i2c) +{ + if (!i2c->irqdomain) + return; + + irq_dispose_mapping(i2c->adapter_irq); + irq_domain_remove(i2c->irqdomain); +} + +static int xe_i2c_read(void *context, unsigned int reg, unsigned int *val) +{ + struct xe_i2c *i2c = context; + + *val = xe_mmio_read32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET)); + + return 0; +} + +static int xe_i2c_write(void *context, unsigned int reg, unsigned int val) +{ + struct xe_i2c *i2c = context; + + xe_mmio_write32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET), val); + + return 0; +} + +static const struct regmap_config i2c_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_read = xe_i2c_read, + .reg_write = xe_i2c_write, + .fast_io = true, +}; + +void xe_i2c_pm_suspend(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return; + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return; + + if (d3cold) + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY); + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +static void xe_i2c_remove(void *data) +{ + struct xe_i2c *i2c = data; + unsigned int i; + + for (i = 0; i < XE_I2C_MAX_CLIENTS; i++) + i2c_unregister_device(i2c->client[i]); + + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + xe_i2c_unregister_adapter(i2c); + xe_i2c_remove_irq(i2c); +} + +/** + * xe_i2c_probe: Probe the I2C host adapter and the I2C clients attached to it + * @xe: xe device instance + * + * Register all the I2C devices described in the I2C Endpoint data structure. + * + * Return: 0 on success, error code on failure + */ +int xe_i2c_probe(struct xe_device *xe) +{ + struct device *drm_dev = xe->drm.dev; + struct xe_i2c_endpoint ep; + struct regmap *regmap; + struct xe_i2c *i2c; + int ret; + + if (xe->info.platform != XE_BATTLEMAGE) + return 0; + + if (IS_SRIOV_VF(xe)) + return 0; + + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); + if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return 0; + + i2c = devm_kzalloc(drm_dev, sizeof(*i2c), GFP_KERNEL); + if (!i2c) + return -ENOMEM; + + INIT_WORK(&i2c->work, xe_i2c_client_work); + i2c->mmio = xe_root_tile_mmio(xe); + i2c->drm_dev = drm_dev; + i2c->ep = ep; + xe->i2c = i2c; + + /* PCI PM isn't aware of this device, bring it up and match it with SGUnit state. */ + xe_i2c_pm_resume(xe, true); + + regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + i2c->bus_notifier.notifier_call = xe_i2c_notifier; + ret = bus_register_notifier(&i2c_bus_type, &i2c->bus_notifier); + if (ret) + return ret; + + ret = xe_i2c_create_irq(i2c); + if (ret) + goto err_unregister_notifier; + + ret = xe_i2c_register_adapter(i2c); + if (ret) + goto err_remove_irq; + + return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c); + +err_remove_irq: + xe_i2c_remove_irq(i2c); + +err_unregister_notifier: + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h new file mode 100644 index 000000000000..b767ed8ce52b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_H_ +#define _XE_I2C_H_ + +#include <linux/bits.h> +#include <linux/notifier.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +struct device; +struct fwnode_handle; +struct i2c_adapter; +struct i2c_client; +struct irq_domain; +struct platform_device; +struct xe_device; +struct xe_mmio; + +#define XE_I2C_MAX_CLIENTS 3 + +#define XE_I2C_EP_COOKIE_DEVICE 0xde + +/* Endpoint Capabilities */ +#define XE_I2C_EP_CAP_IRQ BIT(0) + +struct xe_i2c_endpoint { + u8 cookie; + u8 capabilities; + u16 addr[XE_I2C_MAX_CLIENTS]; +}; + +struct xe_i2c { + struct fwnode_handle *adapter_node; + struct platform_device *pdev; + struct i2c_adapter *adapter; + struct i2c_client *client[XE_I2C_MAX_CLIENTS]; + + struct notifier_block bus_notifier; + struct work_struct work; + + struct irq_domain *irqdomain; + int adapter_irq; + + struct xe_i2c_endpoint ep; + struct device *drm_dev; + + struct xe_mmio *mmio; +}; + +#if IS_ENABLED(CONFIG_I2C) +int xe_i2c_probe(struct xe_device *xe); +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_i2c_pm_suspend(struct xe_device *xe); +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); +#else +static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } +static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } +static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } +static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5362d3174b06..5df5b8c2a3e4 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,10 +18,12 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_hw_engine.h" +#include "xe_i2c.h" #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_pxp.h" #include "xe_sriov.h" +#include "xe_tile.h" /* * Interrupt registers for a unit are always consecutive and ordered @@ -160,7 +162,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) dmask = irqs << 16 | irqs; smask = irqs << 16; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* Enable interrupts for each engine class */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); if (ccs_mask) @@ -260,7 +262,7 @@ gt_engine_identity(struct xe_device *xe, static void gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) { - if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + if (instance == OTHER_GUC_INSTANCE && xe_gt_is_main_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); @@ -476,6 +478,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) if (xe->info.has_heci_cscfi) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); + xe_i2c_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } } @@ -550,7 +553,7 @@ static void xelp_irq_reset(struct xe_tile *tile) static void dg1_irq_reset(struct xe_tile *tile) { - if (tile->id == 0) + if (xe_tile_is_root(tile)) dg1_intr_disable(tile_to_xe(tile)); gt_irq_reset(tile); diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 63db66df064b..a2000307d5bf 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -11,6 +11,7 @@ #include "xe_assert.h" #include "xe_bo.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_lmtt.h" #include "xe_map.h" #include "xe_mmio.h" @@ -78,6 +79,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, xe_bo_size(bo)); pt->level = level; pt->bo = bo; @@ -91,6 +95,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -216,6 +223,58 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt) lmtt_setup_dir_ptr(lmtt); } +static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_gt_tlb_invalidation_fence fences[XE_MAX_GT_PER_TILE]; + struct xe_gt_tlb_invalidation_fence *fence = fences; + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_gt *gt; + int result = 0; + int err; + u8 id; + + for_each_gt_on_tile(gt, tile, id) { + xe_gt_tlb_invalidation_fence_init(gt, fence, true); + err = xe_gt_tlb_invalidation_all(gt, fence); + result = result ?: err; + fence++; + } + + lmtt_debug(lmtt, "num_fences=%d err=%d\n", (int)(fence - fences), result); + + /* + * It is fine to wait for all fences, even for those which covers the + * invalidation request that failed, as such fence should be already + * marked as signaled. + */ + fence = fences; + for_each_gt_on_tile(gt, tile, id) + xe_gt_tlb_invalidation_fence_wait(fence++); + + return result; +} + +/** + * xe_lmtt_invalidate_hw - Invalidate LMTT hardware. + * @lmtt: the &xe_lmtt to invalidate + * + * Send requests to all GuCs on this tile to invalidate all TLBs. + * + * This function should be called only when running as a PF driver. + */ +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_device *xe = lmtt_to_xe(lmtt); + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + + err = lmtt_invalidate_hw(lmtt); + if (err) + xe_sriov_warn(xe, "LMTT%u invalidation failed (%pe)", + lmtt_to_tile(lmtt)->id, ERR_PTR(err)); +} + static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, u64 pte, unsigned int idx) { @@ -226,9 +285,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: @@ -265,6 +329,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) return; lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + lmtt_invalidate_hw(lmtt); lmtt_assert(lmtt, pd->level > 0); lmtt_assert(lmtt, pt->level == pd->level - 1); @@ -386,11 +451,11 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo u64 addr, vram_offset; lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); - lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); + lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size)); lmtt_assert(lmtt, xe_bo_is_vram(bo)); vram_offset = vram_region_gpu_offset(bo->ttm.resource); - xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); while (cur.remaining) { addr = xe_res_dma(&cur); addr += vram_offset; /* XXX */ diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index cb10ef994db6..75a234fbf367 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -15,6 +15,7 @@ struct xe_lmtt_ops; #ifdef CONFIG_PCI_IOV int xe_lmtt_init(struct xe_lmtt *lmtt); void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt); int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 37598588a54f..6d38411bdeba 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -39,15 +39,46 @@ #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) #define LRC_PPHWSP_SIZE SZ_4K +#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K #define LRC_WA_BB_SIZE SZ_4K +/* + * Layout of the LRC and associated data allocated as + * lrc->bo: + * + * Region Size + * +============================+=================================+ <- __xe_lrc_ring_offset() + * | Ring | ring_size, see | + * | | xe_lrc_init() | + * +============================+=================================+ <- __xe_lrc_pphwsp_offset() + * | PPHWSP (includes SW state) | 4K | + * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() + * | Engine Context Image | n * 4K, see | + * | | xe_gt_lrc_size() | + * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() + * | Indirect Ring State Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_RING_STATE | + * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() + * | Indirect Context Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_CTX | + * +============================+=================================+ <- __xe_lrc_wa_bb_offset() + * | WA BB Per Ctx | 4k | + * +============================+=================================+ <- xe_bo_size(lrc->bo) + */ + static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) { return gt_to_xe(lrc->fence_ctx.gt); } +static bool +gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) +{ + return false; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); @@ -582,8 +613,6 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) if (xe_gt_has_indirect_ring_state(hwe->gt)) regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ } static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) @@ -717,8 +746,23 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; + u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - + LRC_INDIRECT_RING_STATE_SIZE; + + if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) + offset -= LRC_INDIRECT_CTX_BO_SIZE; + + return offset; +} + +static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; +} + +static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } #define DECL_MAP_ADDR_HELPERS(elem) \ @@ -940,8 +984,10 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - u32 *batch, size_t max_len) +static ssize_t setup_utilization_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, + size_t max_len) { u32 *cmd = batch; @@ -968,91 +1014,187 @@ static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine * return cmd - batch; } -struct wa_bb_setup { +struct bo_setup { ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *batch, size_t max_size); }; -static size_t wa_bb_offset(struct xe_lrc *lrc) -{ - return lrc->bo->size - LRC_WA_BB_SIZE; -} +struct bo_setup_state { + /* Input: */ + struct xe_lrc *lrc; + struct xe_hw_engine *hwe; + size_t max_size; + size_t reserve_dw; + unsigned int offset; + const struct bo_setup *funcs; + unsigned int num_funcs; -static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) + /* State: */ + u32 *buffer; + u32 *ptr; + unsigned int written; +}; + +static int setup_bo(struct bo_setup_state *state) { - const size_t max_size = LRC_WA_BB_SIZE; - static const struct wa_bb_setup funcs[] = { - { .setup = wa_bb_setup_utilization }, - }; ssize_t remain; - u32 *cmd, *buf = NULL; - if (lrc->bo->vmap.is_iomem) { - buf = kmalloc(max_size, GFP_KERNEL); - if (!buf) + if (state->lrc->bo->vmap.is_iomem) { + state->buffer = kmalloc(state->max_size, GFP_KERNEL); + if (!state->buffer) return -ENOMEM; - cmd = buf; + state->ptr = state->buffer; } else { - cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); + state->ptr = state->lrc->bo->vmap.vaddr + state->offset; + state->buffer = NULL; } - remain = max_size / sizeof(*cmd); + remain = state->max_size / sizeof(u32); - for (size_t i = 0; i < ARRAY_SIZE(funcs); i++) { - ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain); + for (size_t i = 0; i < state->num_funcs; i++) { + ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, + state->ptr, remain); remain -= len; /* - * There should always be at least 1 additional dword for - * the end marker + * Caller has asked for at least reserve_dw to remain unused. */ - if (len < 0 || xe_gt_WARN_ON(lrc->gt, remain < 1)) + if (len < 0 || + xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) goto fail; - cmd += len; - } - - *cmd++ = MI_BATCH_BUFFER_END; - - if (buf) { - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, - wa_bb_offset(lrc), buf, - (cmd - buf) * sizeof(*cmd)); - kfree(buf); + state->ptr += len; + state->written += len; } - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + - wa_bb_offset(lrc) + 1); - return 0; fail: - kfree(buf); + kfree(state->buffer); return -ENOSPC; } -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) +static void finish_bo(struct bo_setup_state *state) +{ + if (!state->buffer) + return; + + xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, + state->offset, state->buffer, + state->written * sizeof(u32)); + kfree(state->buffer); +} + +static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static const struct bo_setup funcs[] = { + { .setup = setup_utilization_wa }, + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = LRC_WA_BB_SIZE, + .reserve_dw = 1, + .offset = __xe_lrc_wa_bb_offset(lrc), + .funcs = funcs, + .num_funcs = ARRAY_SIZE(funcs), + }; + int ret; + + ret = setup_bo(&state); + if (ret) + return ret; + + *state.ptr++ = MI_BATCH_BUFFER_END; + state.written++; + + finish_bo(&state); + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); + + return 0; +} + +static int +setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static struct bo_setup rcs_funcs[] = { + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = (63 * 64) /* max 63 cachelines */, + .offset = __xe_lrc_indirect_ctx_offset(lrc), + }; + int ret; + + if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) + return 0; + + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) { + state.funcs = rcs_funcs; + state.num_funcs = ARRAY_SIZE(rcs_funcs); + } + + if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) + return 0; + + ret = setup_bo(&state); + if (ret) + return ret; + + /* + * Align to 64B cacheline so there's no garbage at the end for CS to + * execute: size for indirect ctx must be a multiple of 64. + */ + while (state.written & 0xf) { + *state.ptr++ = MI_NOOP; + state.written++; + } + + finish_bo(&state); + + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX, + (xe_bo_ggtt_addr(lrc->bo) + state.offset) | + /* Size in CLs. */ + (state.written * sizeof(u32) / 64)); + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX_OFFSET, + CTX_INDIRECT_CTX_OFFSET_DEFAULT); + + return 0; +} static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, u32 init_flags) { struct xe_gt *gt = hwe->gt; + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; - void *init_data = NULL; u32 arb_enable; - u32 lrc_size; u32 bo_flags; int err; kref_init(&lrc->refcount); lrc->gt = gt; + lrc->size = lrc_size; lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); + lrc->ring.size = ring_size; + lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; @@ -1061,45 +1203,36 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (vm && vm->xef) /* userspace */ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, - lrc_size + LRC_WA_BB_SIZE, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->size = lrc_size; - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - /* * Init Per-Process of HW status Page, LRC / context state to known - * values + * values. If there's already a primed default_lrc, just copy it, otherwise + * it's the early submission to record the lrc: build a new empty one from + * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { + if (gt->default_lrc[hwe->class]) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); + lrc_size - LRC_PPHWSP_SIZE); } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); + void *init_data = empty_lrc_data(hwe); + + if (!init_data) { + err = -ENOMEM; + goto err_lrc_finish; + } + + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); kfree(init_data); } @@ -1153,7 +1286,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); @@ -1183,6 +1316,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (err) goto err_lrc_finish; + err = setup_indirect_ctx(lrc, hwe); + if (err) + goto err_lrc_finish; + return 0; err_lrc_finish: @@ -1775,7 +1912,7 @@ static const struct instr_state xe_hpg_svg_state[] = { { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, }; -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) { struct xe_gt *gt = q->hwe->gt; struct xe_device *xe = gt_to_xe(gt); @@ -1810,7 +1947,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; + return cs; } for (int i = 0; i < state_table_size; i++) { @@ -1833,12 +1970,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b instr == CMD_3DSTATE_DRAWING_RECTANGLE) instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - bb->cs[bb->len] = instr; + *cs = instr; if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); + *cs |= (num_dw - 2); - bb->len += num_dw; + cs += num_dw; } + + return cs; } struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) @@ -1859,8 +1998,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - - LRC_WA_BB_SIZE; + snapshot->lrc_size = lrc->size; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index eb6e8de8c939..b6c8053c581b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -112,7 +112,7 @@ void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 883e550a9423..e9883706e004 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -22,14 +22,15 @@ struct xe_lrc { */ struct xe_bo *bo; - /** @size: size of lrc including any indirect ring state page */ + /** @size: size of the lrc and optional indirect ring state */ u32 size; /** @gt: gt which this LRC belongs to */ struct xe_gt *gt; /** @flags: LRC flags */ -#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 +#define XE_LRC_FLAG_INDIRECT_CTX 0x1 +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2 u32 flags; /** @refcount: ref count of this lrc */ diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..84f412fd3c5d 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -82,7 +82,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE +#define MAX_PTE_PER_SDI 0x1FEU /** * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. @@ -203,7 +203,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); /* Need to be sure everything fits in the first PT, or create more */ - xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); + xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M); bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, @@ -214,7 +214,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, return PTR_ERR(bo); /* PT30 & PT31 reserved for 2M identity map */ - pt29_ofs = bo->size - 3 * XE_PAGE_SIZE; + pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); @@ -236,7 +236,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -247,13 +247,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level++; } if (xe->info.has_usm) { - xe_tile_assert(tile, batch->size == SZ_1M); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_1M); batch = tile->primary_gt->usm.bb_pool->bo; m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; - xe_tile_assert(tile, batch->size == SZ_512K); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_512K); - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -306,7 +306,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { - u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; + u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, pat_index, pt30_ofs); @@ -321,7 +321,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; u64 vram_offset = IDENTITY_OFFSET + DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); - u64 pt31_ofs = bo->size - XE_PAGE_SIZE; + u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); @@ -408,7 +408,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) /* Special layout, prepared below.. */ vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION | - XE_VM_FLAG_SET_TILE_ID(tile)); + XE_VM_FLAG_SET_TILE_ID(tile), NULL); if (IS_ERR(vm)) return ERR_CAST(vm); @@ -768,7 +768,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it, ccs_it; u64 src_L0_ofs, dst_L0_ofs; u32 src_L0_pt, dst_L0_pt; @@ -791,7 +791,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) return ERR_PTR(-EINVAL); - if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) + if (src_bo != dst_bo && XE_WARN_ON(xe_bo_size(src_bo) != xe_bo_size(dst_bo))) return ERR_PTR(-EINVAL); if (!src_is_vram) @@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) @@ -1064,7 +1064,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_device *xe = gt_to_xe(gt); bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; - u64 size = bo->size; + u64 size = xe_bo_size(bo); struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; @@ -1076,9 +1076,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_only_system_ccs = true; if (!clear_vram) - xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); + xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &src_it); else - xe_res_first(src, 0, bo->size, &src_it); + xe_res_first(src, 0, xe_bo_size(bo), &src_it); while (size) { u64 clear_L0_ofs; @@ -1407,7 +1407,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, if (idx == chunk) goto next_cmd; - xe_tile_assert(tile, pt_bo->size == SZ_4K); + xe_tile_assert(tile, xe_bo_size(pt_bo) == SZ_4K); /* Map a PT at most once */ if (pt_bo->update_index < 0) @@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; @@ -1815,18 +1817,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (len & XE_CACHELINE_MASK || - ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || + !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; + void *bounce; + int err; + + BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES)); + bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL); + if (!bounce) + return -ENOMEM; /* * Less than ideal for large unaligned access but this should be * fairly rare, can fixup if this becomes common. */ do { - u8 bounce[XE_CACHELINE_BYTES]; - void *ptr = (void *)bounce; - int err; int copy_bytes = min_t(int, bytes_left, XE_CACHELINE_BYTES - (offset & XE_CACHELINE_MASK)); @@ -1835,22 +1841,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, 0); if (err) - return err; + break; if (write) { - memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes); + memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes); err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, - (void *)ptr, - sizeof(bounce), 0); + bounce, + XE_CACHELINE_BYTES, write); if (err) - return err; + break; } else { - memcpy(buf + buf_offset, ptr + ptr_offset, + memcpy(buf + buf_offset, bounce + ptr_offset, copy_bytes); } @@ -1859,14 +1865,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, offset += copy_bytes; } while (bytes_left); - return 0; + kfree(bounce); + return err; } dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write); if (IS_ERR(dma_addr)) return PTR_ERR(dma_addr); - xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor); + xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); do { struct dma_fence *__fence; @@ -1880,8 +1887,11 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, else current_bytes = min_t(int, bytes_left, cursor.size); - if (fence) - dma_fence_put(fence); + if (current_bytes & ~PAGE_MASK) { + int pitch = 4; + + current_bytes = min_t(int, current_bytes, S16_MAX * pitch); + } __fence = xe_migrate_vram(m, current_bytes, (unsigned long)buf & ~PAGE_MASK, @@ -1890,11 +1900,15 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, XE_MIGRATE_COPY_TO_VRAM : XE_MIGRATE_COPY_TO_SRAM); if (IS_ERR(__fence)) { - if (fence) + if (fence) { dma_fence_wait(fence, false); + dma_fence_put(fence); + } fence = __fence; goto out_err; } + + dma_fence_put(fence); fence = __fence; buf += current_bytes; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7357458bc0d2..e4db8d58ea2d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -22,6 +22,9 @@ #include "xe_macros.h" #include "xe_sriov.h" #include "xe_trace.h" +#include "xe_wa.h" + +#include "generated/xe_device_wa_oob.h" static void tiles_fini(void *arg) { @@ -55,6 +58,7 @@ static void tiles_fini(void *arg) static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) { struct xe_tile *tile; + struct xe_gt *gt; u8 id; /* @@ -67,7 +71,7 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) /* Possibly override number of tile based on configuration register */ if (!xe->info.skip_mtcfg) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); - u8 tile_count; + u8 tile_count, gt_count; u32 mtcfg; /* @@ -84,12 +88,15 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) xe->info.tile_count = tile_count; /* - * FIXME: Needs some work for standalone media, but - * should be impossible with multi-tile for now: - * multi-tile platform with standalone media doesn't - * exist + * We've already setup gt_count according to the full + * tile count. Re-calculate it to only include the GTs + * that belong to the remaining tile(s). */ - xe->info.gt_count = xe->info.tile_count; + gt_count = 0; + for_each_gt(gt, xe, id) + if (gt->info.id < tile_count * xe->info.max_gt_per_tile) + gt_count++; + xe->info.gt_count = gt_count; } } @@ -163,7 +170,7 @@ static void mmio_flush_pending_writes(struct xe_mmio *mmio) #define DUMMY_REG_OFFSET 0x130030 int i; - if (mmio->tile->xe->info.platform != XE_LUNARLAKE) + if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425)) return; /* 4 dummy writes */ @@ -176,7 +183,6 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readb(mmio->regs + addr); @@ -190,7 +196,6 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readw(mmio->regs + addr); @@ -217,7 +222,6 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e332f3142435..d9391bd08194 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -19,31 +19,45 @@ #include "xe_sched_job.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define DEFAULT_GUC_LOG_LEVEL 3 +#define DEFAULT_GUC_LOG_LEVEL 3 #else -#define DEFAULT_GUC_LOG_LEVEL 1 +#define DEFAULT_GUC_LOG_LEVEL 1 #endif +#define DEFAULT_PROBE_DISPLAY true +#define DEFAULT_VRAM_BAR_SIZE 0 +#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define DEFAULT_MAX_VFS ~0 +#define DEFAULT_MAX_VFS_STR "unlimited" +#define DEFAULT_WEDGED_MODE 1 +#define DEFAULT_SVM_NOTIFIER_SIZE 512 + struct xe_modparam xe_modparam = { - .probe_display = true, - .guc_log_level = DEFAULT_GUC_LOG_LEVEL, - .force_probe = CONFIG_DRM_XE_FORCE_PROBE, - .wedged_mode = 1, - .svm_notifier_size = 512, + .probe_display = DEFAULT_PROBE_DISPLAY, + .guc_log_level = DEFAULT_GUC_LOG_LEVEL, + .force_probe = DEFAULT_FORCE_PROBE, +#ifdef CONFIG_PCI_IOV + .max_vfs = DEFAULT_MAX_VFS, +#endif + .wedged_mode = DEFAULT_WEDGED_MODE, + .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); -MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); +MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " + "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); -MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " + "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); -MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size"); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " + "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " @@ -63,18 +77,21 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, - "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " + "[default=" DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); MODULE_PARM_DESC(max_vfs, "Limit number of Virtual Functions (VFs) that could be managed. " - "(0 = no VFs [default]; N = allow up to N VFs)"); + "(0=no VFs; N=allow up to N VFs " + "[default=" DEFAULT_MAX_VFS_STR "])"); #endif module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, - "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + "Module's default policy for the wedged mode (0=never, 1=upon-critical-errors, 2=upon-any-hang " + "[default=" __stringify(DEFAULT_WEDGED_MODE) "])"); static int xe_check_nomodeset(void) { diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c new file mode 100644 index 000000000000..61b0a1531a53 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. + */ + +#include <linux/intel_dg_nvm_aux.h> +#include <linux/pci.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_mmio.h" +#include "xe_nvm.h" +#include "regs/xe_gsc_regs.h" +#include "xe_sriov.h" + +#define GEN12_GUNIT_NVM_BASE 0x00102040 +#define GEN12_DEBUG_NVM_BASE 0x00101018 + +#define GEN12_CNTL_PROTECTED_NVM_REG 0x0010100C + +#define GEN12_GUNIT_NVM_SIZE 0x80 +#define GEN12_DEBUG_NVM_SIZE 0x4 + +#define NVM_NON_POSTED_ERASE_CHICKEN_BIT BIT(13) + +#define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3) + +static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { + [0] = { .name = "DESCRIPTOR", }, + [2] = { .name = "GSC", }, + [9] = { .name = "PADDING", }, + [11] = { .name = "OptionROM", }, + [12] = { .name = "DAM", }, +}; + +static void xe_nvm_release_dev(struct device *dev) +{ +} + +static bool xe_nvm_non_posted_erase(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + + if (xe->info.platform != XE_BATTLEMAGE) + return false; + return !(xe_mmio_read32(>->mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & + NVM_NON_POSTED_ERASE_CHICKEN_BIT); +} + +static bool xe_nvm_writable_override(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + bool writable_override; + resource_size_t base; + + switch (xe->info.platform) { + case XE_BATTLEMAGE: + base = DG2_GSC_HECI2_BASE; + break; + case XE_PVC: + base = PVC_GSC_HECI2_BASE; + break; + case XE_DG2: + base = DG2_GSC_HECI2_BASE; + break; + case XE_DG1: + base = DG1_GSC_HECI2_BASE; + break; + default: + drm_err(&xe->drm, "Unknown platform\n"); + return true; + } + + writable_override = + !(xe_mmio_read32(>->mmio, HECI_FWSTS2(base)) & + HECI_FW_STATUS_2_NVM_ACCESS_MODE); + if (writable_override) + drm_info(&xe->drm, "NVM access overridden by jumper\n"); + return writable_override; +} + +int xe_nvm_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct auxiliary_device *aux_dev; + struct intel_dg_nvm_dev *nvm; + int ret; + + if (!xe->info.has_gsc_nvm) + return 0; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return 0; + + /* Nvm pointer should be NULL here */ + if (WARN_ON(xe->nvm)) + return -EFAULT; + + xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!xe->nvm) + return -ENOMEM; + + nvm = xe->nvm; + + nvm->writable_override = xe_nvm_writable_override(xe); + nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); + nvm->bar.parent = &pdev->resource[0]; + nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; + nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; + nvm->bar.flags = IORESOURCE_MEM; + nvm->bar.desc = IORES_DESC_NONE; + nvm->regions = regions; + + nvm->bar2.parent = &pdev->resource[0]; + nvm->bar2.start = GEN12_DEBUG_NVM_BASE + pdev->resource[0].start; + nvm->bar2.end = nvm->bar2.start + GEN12_DEBUG_NVM_SIZE - 1; + nvm->bar2.flags = IORESOURCE_MEM; + nvm->bar2.desc = IORES_DESC_NONE; + + aux_dev = &nvm->aux_dev; + + aux_dev->name = "nvm"; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | pci_dev_id(pdev); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = xe_nvm_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); + goto err; + } + + ret = auxiliary_device_add(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); + auxiliary_device_uninit(aux_dev); + goto err; + } + return 0; + +err: + kfree(nvm); + xe->nvm = NULL; + return ret; +} + +void xe_nvm_fini(struct xe_device *xe) +{ + struct intel_dg_nvm_dev *nvm = xe->nvm; + + if (!xe->info.has_gsc_nvm) + return; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return; + + /* Nvm pointer should not be NULL here */ + if (WARN_ON(!nvm)) + return; + + auxiliary_device_delete(&nvm->aux_dev); + auxiliary_device_uninit(&nvm->aux_dev); + kfree(nvm); + xe->nvm = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h new file mode 100644 index 000000000000..7f3d5f57bed0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2019-2025 Intel Corporation. All rights reserved. + */ + +#ifndef __XE_NVM_H__ +#define __XE_NVM_H__ + +struct xe_device; + +int xe_nvm_init(struct xe_device *xe); + +void xe_nvm_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4829ed46a8b4..5729e7d3e335 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -403,7 +403,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int size_exponent = __ffs(stream->oa_buffer.bo->size); + int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; @@ -435,7 +435,7 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); + memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); } static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) @@ -1065,7 +1065,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static u32 oag_buf_size_select(const struct xe_oa_stream *stream) { return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, - stream->oa_buffer.bo->size > SZ_16M ? + xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); } @@ -1582,7 +1582,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1668,7 +1668,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { + if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1941,7 +1941,7 @@ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) /* If not provided, OA unit defaults to OA unit 0 as per uapi */ if (!param->oa_unit) - param->oa_unit = &xe_device_get_gt(oa->xe, 0)->oa.oa_unit[0]; + param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { @@ -2493,7 +2493,7 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { - if (!xe_gt_is_media_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) + if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) return 1; else if (!IS_DGFX(gt_to_xe(gt))) return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ @@ -2506,7 +2506,7 @@ static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) return XE_OA_UNIT_INVALID; - xe_gt_WARN_ON(hwe->gt, !xe_gt_is_media_type(hwe->gt)); + xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) return 0; @@ -2589,7 +2589,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) for (i = 0; i < num_units; i++) { struct xe_oa_unit *u = >->oa.oa_unit[i]; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { u->regs = __oag_regs(); u->type = DRM_XE_OA_UNIT_TYPE_OAG; } else { diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 89814b32e585..3c40ef426f0c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -38,43 +38,6 @@ enum toggle_d3cold { D3COLD_ENABLE, }; -struct xe_subplatform_desc { - enum xe_subplatform subplatform; - const char *name; - const u16 *pciidlist; -}; - -struct xe_device_desc { - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_graphics_ip; - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_media_ip; - - const char *platform_name; - const struct xe_subplatform_desc *subplatforms; - - enum xe_platform platform; - - u8 dma_mask_size; - u8 max_remote_tiles:2; - - u8 require_force_probe:1; - u8 is_dgfx:1; - - u8 has_display:1; - u8 has_fan_control:1; - u8 has_heci_gscfi:1; - u8 has_heci_cscfi:1; - u8 has_llc:1; - u8 has_mbx_power_limits:1; - u8 has_pxp:1; - u8 has_sriov:1; - u8 needs_scratch:1; - u8 skip_guc_pc:1; - u8 skip_mtcfg:1; - u8 skip_pcode:1; -}; - __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); @@ -140,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ @@ -184,6 +146,7 @@ static const struct xe_ip graphics_ips[] = { { 2004, "Xe2_LPG", &graphics_xe2 }, { 3000, "Xe3_LPG", &graphics_xe2 }, { 3001, "Xe3_LPG", &graphics_xe2 }, + { 3003, "Xe3_LPG", &graphics_xe2 }, }; /* Pre-GMDID Media IPs */ @@ -196,6 +159,7 @@ static const struct xe_ip media_ips[] = { { 1301, "Xe2_HPM", &media_xelpmp }, { 2000, "Xe2_LPM", &media_xelpmp }, { 3000, "Xe3_LPM", &media_xelpmp }, + { 3002, "Xe3_LPM", &media_xelpmp }, }; static const struct xe_device_desc tgl_desc = { @@ -205,6 +169,7 @@ static const struct xe_device_desc tgl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -215,6 +180,7 @@ static const struct xe_device_desc rkl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -227,6 +193,7 @@ static const struct xe_device_desc adl_s_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -243,6 +210,7 @@ static const struct xe_device_desc adl_p_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -257,6 +225,7 @@ static const struct xe_device_desc adl_n_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -270,7 +239,9 @@ static const struct xe_device_desc dg1_desc = { PLATFORM(DG1), .dma_mask_size = 39, .has_display = true, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -281,6 +252,7 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ PLATFORM(DG2), \ + .has_gsc_nvm = 1, \ .has_heci_gscfi = 1, \ .subplatforms = (const struct xe_subplatform_desc[]) { \ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ @@ -293,6 +265,7 @@ static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, @@ -303,6 +276,7 @@ static const struct xe_device_desc dg2_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, @@ -317,7 +291,9 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { PLATFORM(PVC), .dma_mask_size = 52, .has_display = false, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .max_remote_tiles = 1, .require_force_probe = true, .has_mbx_power_limits = false, @@ -330,6 +306,7 @@ static const struct xe_device_desc mtl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, }; static const struct xe_device_desc lnl_desc = { @@ -337,6 +314,7 @@ static const struct xe_device_desc lnl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -347,7 +325,10 @@ static const struct xe_device_desc bmg_desc = { .has_display = true, .has_fan_control = true, .has_mbx_power_limits = true, + .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_sriov = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -356,7 +337,7 @@ static const struct xe_device_desc ptl_desc = { .dma_mask_size = 46, .has_display = true, .has_sriov = true, - .require_force_probe = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -590,6 +571,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; + xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; @@ -603,6 +585,10 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && desc->has_display; + + xe_assert(xe, desc->max_gt_per_tile > 0); + xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE); + xe->info.max_gt_per_tile = desc->max_gt_per_tile; xe->info.tile_count = 1 + desc->max_remote_tiles; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); @@ -702,10 +688,11 @@ static int xe_info_init(struct xe_device *xe, */ for_each_tile(tile, xe, id) { gt = tile->primary_gt; - gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; + gt->info.id = tile->id * xe->info.max_gt_per_tile; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.engine_mask = graphics_desc->hw_engine_mask; + xe->info.gt_count++; if (MEDIA_VER(xe) < 13 && media_desc) gt->info.engine_mask |= media_desc->hw_engine_mask; @@ -723,17 +710,10 @@ static int xe_info_init(struct xe_device *xe, gt = tile->media_gt; gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.engine_mask = media_desc->hw_engine_mask; - - /* - * FIXME: At the moment multi-tile and standalone media are - * mutually exclusive on current platforms. We'll need to - * come up with a better way to number GTs if we ever wind - * up with platforms that support both together. - */ - drm_WARN_ON(&xe->drm, id != 0); - gt->info.id = xe->info.gt_count++; + xe->info.gt_count++; } return 0; diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index 8813efdcafbb..af05db07162e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -3,6 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/bitops.h> +#include <linux/pci.h> + +#include "regs/xe_bars.h" #include "xe_assert.h" #include "xe_device.h" #include "xe_gt_sriov_pf_config.h" @@ -12,6 +16,7 @@ #include "xe_pci_sriov.h" #include "xe_pm.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" #include "xe_sriov_pf_helpers.h" #include "xe_sriov_printk.h" @@ -127,6 +132,18 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs, } } +static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + u32 sizes; + + sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs); + if (!sizes) + return 0; + + return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes)); +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -138,6 +155,10 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) xe_assert(xe, num_vfs <= total_vfs); xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs)); + err = xe_sriov_pf_wait_ready(xe); + if (err) + goto out; + /* * We must hold additional reference to the runtime PM to keep PF in D0 * during VFs lifetime, as our VFs do not implement the PM capability. @@ -153,6 +174,12 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + if (IS_DGFX(xe)) { + err = resize_vf_vram_bar(xe, num_vfs); + if (err) + xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err); + } + err = pci_enable_sriov(pdev, num_vfs); if (err < 0) goto failed; @@ -169,7 +196,7 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) failed: pf_unprovision_vfs(xe, num_vfs); xe_pm_runtime_put(xe); - +out: xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n", num_vfs, str_plural(num_vfs), ERR_PTR(err)); return err; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index ca6b10d35573..4de6f69ed975 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -8,6 +8,47 @@ #include <linux/types.h> +#include "xe_platform_types.h" + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_device_desc { + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_graphics_ip; + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_media_ip; + + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + + enum xe_platform platform; + + u8 dma_mask_size; + u8 max_remote_tiles:2; + u8 max_gt_per_tile:2; + + u8 require_force_probe:1; + u8 is_dgfx:1; + + u8 has_display:1; + u8 has_fan_control:1; + u8 has_gsc_nvm:1; + u8 has_heci_gscfi:1; + u8 has_heci_cscfi:1; + u8 has_llc:1; + u8 has_mbx_power_limits:1; + u8 has_pxp:1; + u8 has_sriov:1; + u8 needs_scratch:1; + u8 skip_guc_pc:1; + u8 skip_mtcfg:1; + u8 skip_pcode:1; +}; + struct xe_graphics_desc { u8 va_bits; u8 vm_max_level; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 9189117fe825..6a7ddb9005f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -336,3 +336,33 @@ int xe_pcode_probe_early(struct xe_device *xe) return xe_pcode_ready(xe, false); } ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */ + +/* Helpers with drm device. These should only be called by the display side */ +#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) + +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_read(tile, mbox, val, val1); +} + +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_write_timeout(tile, mbox, val, timeout_ms); +} + +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms) +{ + struct xe_device *xe = to_xe_device(drm); + struct xe_tile *tile = xe_device_get_root_tile(xe); + + return xe_pcode_request(tile, mbox, request, reply_mask, reply, timeout_base_ms); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index de38f44f3201..a5584c1c75f9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -7,8 +7,10 @@ #define _XE_PCODE_H_ #include <linux/types.h> -struct xe_tile; + +struct drm_device; struct xe_device; +struct xe_tile; void xe_pcode_init(struct xe_tile *tile); int xe_pcode_probe_early(struct xe_device *xe); @@ -32,4 +34,12 @@ int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request, | FIELD_PREP(PCODE_MB_PARAM1, param1)\ | FIELD_PREP(PCODE_MB_PARAM2, param2)) +/* Helpers with drm device */ +int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1); +int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms); +#define intel_pcode_write(drm, mbox, val) \ + intel_pcode_write_timeout((drm), (mbox), (val), 1) +int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request, + u32 reply_mask, u32 reply, int timeout_base_ms); + #endif diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 0befdea77db1..92bfcba51e19 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -50,6 +50,21 @@ #define READ_PL_FROM_FW 0x1 #define READ_PL_FROM_PCODE 0x0 +#define PCODE_LATE_BINDING 0x5C +#define GET_CAPABILITY_STATUS 0x0 +#define V1_FAN_SUPPORTED REG_BIT(0) +#define VR_PARAMS_SUPPORTED REG_BIT(3) +#define V1_FAN_PROVISIONED REG_BIT(16) +#define VR_PARAMS_PROVISIONED REG_BIT(19) +#define GET_VERSION_LOW 0x1 +#define GET_VERSION_HIGH 0x2 +#define MAJOR_VERSION_MASK REG_GENMASK(31, 16) +#define MINOR_VERSION_MASK REG_GENMASK(15, 0) +#define HOTFIX_VERSION_MASK REG_GENMASK(31, 16) +#define BUILD_VERSION_MASK REG_GENMASK(15, 0) +#define FAN_TABLE 1 +#define VR_CONFIG 2 + #define PCODE_FREQUENCY_CONFIG 0x6e /* Frequency Config Sub Commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ff749edc005b..e279b47ba03b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -19,6 +19,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc.h" +#include "xe_i2c.h" #include "xe_irq.h" #include "xe_pcode.h" #include "xe_pxp.h" @@ -134,7 +135,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -146,12 +147,13 @@ int xe_pm_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); + xe_i2c_pm_suspend(xe); + drm_dbg(&xe->drm, "Device suspended\n"); return 0; err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); @@ -191,6 +193,8 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -488,6 +492,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_runtime_suspend_late(xe); + xe_i2c_pm_suspend(xe); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return 0; @@ -535,6 +541,8 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -753,11 +761,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index 69df0e3520a5..cab51d826345 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -157,10 +157,13 @@ static bool event_gt_forcewake(struct perf_event *event) return true; } -static bool event_supported(struct xe_pmu *pmu, unsigned int gt, +static bool event_supported(struct xe_pmu *pmu, unsigned int gt_id, unsigned int id) { - if (gt >= XE_MAX_GT_PER_TILE) + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); + + if (!gt) return false; return id < sizeof(pmu->supported_events) * BITS_PER_BYTE && diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c index d92ec0f515b0..ca95f2a4d4ef 100644 --- a/drivers/gpu/drm/xe/xe_pxp_submit.c +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c @@ -101,7 +101,7 @@ static int allocate_gsc_client_resources(struct xe_gt *gt, xe_assert(xe, hwe); /* PXP instructions must be issued from PPGTT */ - vm = xe_vm_create(xe, XE_VM_FLAG_GSC); + vm = xe_vm_create(xe, XE_VM_FLAG_GSC, NULL); if (IS_ERR(vm)) return PTR_ERR(vm); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e8e1743dcb1e..d517ec9ddcbf 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -141,7 +141,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id >= XE_MAX_GT_PER_TILE) + if (eci->gt_id >= xe->info.max_gt_per_tile) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); @@ -368,6 +368,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gt_list *gt_list; + int iter = 0; u8 id; if (query->size == 0) { @@ -385,12 +386,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; - gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; - gt_list->gt_list[id].gt_id = gt->info.id; - gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[iter].gt_id = gt->info.id; + gt_list->gt_list[iter].reference_clock = gt->info.reference_clock; /* * The mem_regions indexes in the mask below need to * directly identify the struct @@ -406,19 +407,21 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query * assumption. */ if (!IS_DGFX(xe)) - gt_list->gt_list[id].near_mem_regions = 0x1; + gt_list->gt_list[iter].near_mem_regions = 0x1; else - gt_list->gt_list[id].near_mem_regions = + gt_list->gt_list[iter].near_mem_regions = BIT(gt_to_tile(gt)->id) << 1; - gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ - gt_list->gt_list[id].near_mem_regions; + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[iter].near_mem_regions; - gt_list->gt_list[id].ip_ver_major = + gt_list->gt_list[iter].ip_ver_major = REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_minor = + gt_list->gt_list[iter].ip_ver_minor = REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_rev = + gt_list->gt_list[iter].ip_ver_rev = REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); + + iter++; } if (copy_to_user(query_ptr, gt_list, size)) { diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bc1689db4cd7..7b50c7c1ee21 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i) { dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; - dw[i++] = 0; + MI_FLUSH_IMM_DW; + + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; + dw[i++] = val; return i; } @@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, static void emit_migration_job_gen12(struct xe_sched_job *job, struct xe_lrc *lrc, u32 seqno) { + u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; i = emit_copy_timestamp(lrc, dw, i); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + i = emit_store_imm_ggtt(saddr, seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(dw, i); - dw[i++] = preparser_disable(false); - } + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(saddr, seqno, dw, i); + dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 29e694bb1219..95571b87aa73 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -56,37 +56,61 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 >= r->ver_start && xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start; break; case XE_RTP_MATCH_GRAPHICS_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.media >= r->step_start && xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start; break; case XE_RTP_MATCH_INTEGRATED: @@ -108,6 +132,9 @@ static bool rule_matches(const struct xe_device *xe, match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = r->match_func(gt, hwe); break; default: @@ -186,6 +213,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, struct xe_device **xe) { switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_DEVICE: + *hwe = NULL; + *gt = NULL; + *xe = ctx->xe; + break; case XE_RTP_PROCESS_TYPE_GT: *hwe = NULL; *gt = ctx->gt; @@ -326,21 +358,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, hwe->engine_id == __ffs(render_compute_mask); } -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) -{ - unsigned int dss_per_gslice = 4; - unsigned int dss; - - if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), - "Checking gslice for platform without geometry pipeline\n")) - return false; - - dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); - - return dss >= dss_per_gslice; -} - bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, const struct xe_hw_engine *hwe) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 4fe736a11c42..5ed6c14b9ae3 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -422,7 +422,8 @@ struct xe_reg_sr; #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ - struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \ + struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE }) void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, unsigned long *active_entries, @@ -466,17 +467,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, const struct xe_hw_engine *hwe); /* - * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off - * - * @gt: GT structure - * @hwe: Engine instance - * - * Returns: true if first gslice is fused off, false otherwise. - */ -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); - -/* * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device * * @gt: GT structure diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 1b76b947c706..f4cf30e298cf 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -110,12 +110,14 @@ struct xe_rtp_entry { }; enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_DEVICE, XE_RTP_PROCESS_TYPE_GT, XE_RTP_PROCESS_TYPE_ENGINE, }; struct xe_rtp_process_ctx { union { + struct xe_device *xe; struct xe_gt *gt; struct xe_hw_engine *hwe; }; diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 125c836e0ee4..90244fe59b59 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea write_unlock(&shrinker->lock); } -static s64 xe_shrinker_walk(struct xe_device *xe, - struct ttm_operation_ctx *ctx, - const struct xe_bo_shrink_flags flags, - unsigned long to_scan, unsigned long *scanned) +static s64 __xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) { unsigned int mem_type; s64 freed = 0, lret; @@ -66,11 +66,15 @@ static s64 xe_shrinker_walk(struct xe_device *xe, struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type); struct ttm_bo_lru_cursor curs; struct ttm_buffer_object *ttm_bo; + struct ttm_lru_walk_arg arg = { + .ctx = ctx, + .trylock_only = true, + }; if (!man || !man->use_tt) continue; - ttm_bo_lru_for_each_reserved_guarded(&curs, man, ctx, ttm_bo) { + ttm_bo_lru_for_each_reserved_guarded(&curs, man, &arg, ttm_bo) { if (!ttm_bo_shrink_suitable(ttm_bo, ctx)) continue; @@ -82,6 +86,50 @@ static s64 xe_shrinker_walk(struct xe_device *xe, if (*scanned >= to_scan) break; } + /* Trylocks should never error, just fail. */ + xe_assert(xe, !IS_ERR(ttm_bo)); + } + + return freed; +} + +/* + * Try shrinking idle objects without writeback first, then if not sufficient, + * try also non-idle objects and finally if that's not sufficient either, + * add writeback. This avoids stalls and explicit writebacks with light or + * moderate memory pressure. + */ +static s64 xe_shrinker_walk(struct xe_device *xe, + struct ttm_operation_ctx *ctx, + const struct xe_bo_shrink_flags flags, + unsigned long to_scan, unsigned long *scanned) +{ + bool no_wait_gpu = true; + struct xe_bo_shrink_flags save_flags = flags; + s64 lret, freed; + + swap(no_wait_gpu, ctx->no_wait_gpu); + save_flags.writeback = false; + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + swap(no_wait_gpu, ctx->no_wait_gpu); + if (lret < 0 || *scanned >= to_scan) + return lret; + + freed = lret; + if (!ctx->no_wait_gpu) { + lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; + if (*scanned >= to_scan) + return freed; + } + + if (flags.writeback) { + lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned); + if (lret < 0) + return lret; + freed += lret; } return freed; @@ -193,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup); shrink_flags.purge = false; + lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags, nr_to_scan, &nr_scanned); if (lret >= 0) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 0f721ae17b26..27ddf3cc80e9 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -3,13 +3,18 @@ * Copyright © 2023-2024 Intel Corporation */ +#include <linux/debugfs.h> +#include <drm/drm_debugfs.h> #include <drm/drm_managed.h> #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt_sriov_pf.h" #include "xe_module.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) @@ -80,9 +85,48 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) */ int xe_sriov_pf_init_early(struct xe_device *xe) { + int err; + xe_assert(xe, IS_SRIOV_PF(xe)); - return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + xe->sriov.pf.vfs = drmm_kcalloc(&xe->drm, 1 + xe_sriov_pf_get_totalvfs(xe), + sizeof(*xe->sriov.pf.vfs), GFP_KERNEL); + if (!xe->sriov.pf.vfs) + return -ENOMEM; + + err = drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + if (err) + return err; + + xe_sriov_pf_service_init(xe); + + return 0; +} + +/** + * xe_sriov_pf_wait_ready() - Wait until PF is ready to operate. + * @xe: the &xe_device to test + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_wait_ready(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err; + + if (xe_device_wedged(xe)) + return -ECANCELED; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_pf_wait_ready(gt); + if (err) + return err; + } + + return 0; } /** @@ -102,3 +146,45 @@ void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); } + +static int simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_device *xe = parent->d_inode->i_private; + void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; + + print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, + { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, +}; + +/** + * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the PF. + */ +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + struct drm_minor *minor = xe->drm.primary; + struct dentry *parent; + + /* + * /sys/kernel/debug/dri/0/ + * ├── pf + * │ ├── ... + */ + parent = debugfs_create_dir("pf", root); + if (IS_ERR(parent)) + return; + parent->d_inode->i_private = xe; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index d1220e70e1c0..e3b34f8f5e04 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -8,12 +8,15 @@ #include <linux/types.h> +struct dentry; struct drm_printer; struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +int xe_sriov_pf_wait_ready(struct xe_device *xe); +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) @@ -25,6 +28,10 @@ static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } + +static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_sriov_pf_service.c new file mode 100644 index 000000000000..eee3b2a1ba41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#include "abi/guc_relay_actions_abi.h" + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_printk.h" + +#include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_service_types.h" + +/** + * xe_sriov_pf_service_init - Early initialization of the SR-IOV PF service. + * @xe: the &xe_device to initialize + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_init(struct xe_device *xe) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* base versions may differ between platforms */ + xe->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + xe->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + xe->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + xe->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_device *xe, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_sriov_pf_service_version base = xe->sriov.pf.service.version.base; + struct xe_sriov_pf_service_version latest = xe->sriov.pf.service.version.latest; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, base.major); + xe_assert(xe, base.major <= latest.major); + xe_assert(xe, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_assert(xe, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + xe_assert(xe, major || minor); + + xe->sriov.pf.vfs[vfid].version.major = major; + xe->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_device *xe, u32 vfid) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + xe->sriov.pf.vfs[vfid].version.major = 0; + xe->sriov.pf.vfs[vfid].version.minor = 0; +} + +/** + * xe_sriov_pf_service_is_negotiated - Check if VF has negotiated given ABI version. + * @xe: the &xe_device + * @vfid: the VF identifier + * @major: the major version to check + * @minor: the minor version to check + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + * + * Returns: true if VF can use given ABI version functionality. + */ +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + return major == xe->sriov.pf.vfs[vfid].version.major && + minor <= xe->sriov.pf.vfs[vfid].version.minor; +} + +/** + * xe_sriov_pf_service_handshake_vf - Confirm a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * @wanted_major: the major service version expected by the VF + * @wanted_minor: the minor service version expected by the VF + * @major: the major service version to be used by the VF + * @minor: the minor service version to be used by the VF + * + * Negotiate a VF/PF ABI version to allow VF use the PF services. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_sriov_dbg_verbose(xe, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(xe, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_sriov_notice(xe, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(xe, vfid); + } else { + xe_sriov_dbg(xe, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(xe, vfid, *major, *minor); + } + + return err; +} + +/** + * xe_sriov_pf_service_reset_vf - Reset a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + pf_disconnect(xe, vfid); +} + +static void print_pf_version(struct drm_printer *p, const char *name, + const struct xe_sriov_pf_service_version *version) +{ + drm_printf(p, "%s:\t%u.%u\n", name, version->major, version->minor); +} + +/** + * xe_sriov_pf_service_print_versions - Print ABI versions negotiated with VFs. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for PF use only. + */ +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_sriov_pf_service_version *version; + char name[8]; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + print_pf_version(p, "base", &xe->sriov.pf.service.version.base); + print_pf_version(p, "latest", &xe->sriov.pf.service.version.latest); + + for (n = 1; n <= total_vfs; n++) { + version = &xe->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + print_pf_version(p, xe_sriov_function_name(n, name, sizeof(name)), version); + } +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_sriov_pf_service_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_sriov_pf_service.h new file mode 100644 index 000000000000..d38c18f5ed10 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_H_ +#define _XE_SRIOV_PF_SERVICE_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_device; + +void xe_sriov_pf_service_init(struct xe_device *xe); +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p); + +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor); +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor); +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h new file mode 100644 index 000000000000..0835dde358c1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_SRIOV_PF_SERVICE_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + */ +struct xe_sriov_pf_service { + struct { + struct xe_sriov_pf_service_version base; + struct xe_sriov_pf_service_version latest; + } version; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h new file mode 100644 index 000000000000..956a88f9f213 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_TYPES_H_ +#define _XE_SRIOV_PF_TYPES_H_ + +#include <linux/mutex.h> +#include <linux/types.h> + +#include "xe_sriov_pf_service_types.h" + +/** + * struct xe_sriov_metadata - per-VF device level metadata + */ +struct xe_sriov_metadata { + /** @version: negotiated VF/PF ABI version */ + struct xe_sriov_pf_service_version version; +}; + +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; + + /** @service: device level service data. */ + struct xe_sriov_pf_service service; + + /** @vfs: metadata for all VFs. */ + struct xe_sriov_metadata *vfs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index ca94382a721e..1a138108d139 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,9 +7,6 @@ #define _XE_SRIOV_TYPES_H_ #include <linux/build_bug.h> -#include <linux/mutex.h> -#include <linux/types.h> -#include <linux/workqueue_types.h> /** * VFID - Virtual Function Identifier @@ -40,37 +37,4 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); -/** - * struct xe_device_pf - Xe PF related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_PF mode. - */ -struct xe_device_pf { - /** @device_total_vfs: Maximum number of VFs supported by the device. */ - u16 device_total_vfs; - - /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ - u16 driver_max_vfs; - - /** @master_lock: protects all VFs configurations across GTs */ - struct mutex master_lock; -}; - -/** - * struct xe_device_vf - Xe Virtual Function related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_VF mode. - */ -struct xe_device_vf { - /** @migration: VF Migration state data */ - struct { - /** @migration.worker: VF migration recovery worker */ - struct work_struct worker; - /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ - unsigned long gt_flags; - } migration; -}; - #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 6526fe450e55..26e243c28994 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -147,127 +147,113 @@ void xe_sriov_vf_init_early(struct xe_device *xe) xe_sriov_info(xe, "migration not supported by this module version\n"); } -/** - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. +static bool gt_vf_post_migration_needed(struct xe_gt *gt) +{ + return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); +} + +/* + * Notify GuCs marked in flags about resource fixups apply finished. * @xe: the &xe_device struct instance - * - * After migration, we need to re-query all VF configuration to make sure - * they match previous provisioning. Note that most of VF provisioning - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. - * - * Returns: 0 if the operation completed successfully, or a negative error - * code otherwise. + * @gt_flags: flags marking to which GTs the notification shall be sent */ -static int vf_post_migration_requery_guc(struct xe_device *xe) +static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) { struct xe_gt *gt; unsigned int id; - int err, ret = 0; + int err = 0; for_each_gt(gt, xe, id) { - err = xe_gt_sriov_vf_query_config(gt); - ret = ret ?: err; + if (!test_bit(id, >_flags)) + continue; + /* skip asking GuC for RESFIX exit if new recovery request arrived */ + if (gt_vf_post_migration_needed(gt)) + continue; + err = xe_gt_sriov_vf_notify_resfix_done(gt); + if (err) + break; + clear_bit(id, >_flags); } - return ret; + if (gt_flags && !err) + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); + return err; } -static void vf_post_migration_fixup_ctb(struct xe_device *xe) +static int vf_get_next_migrated_gt_id(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; - xe_assert(xe, IS_SRIOV_VF(xe)); - for_each_gt(gt, xe, id) { - s32 shift = xe_gt_sriov_vf_ggtt_shift(gt); - - xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) + return id; } + return -1; } -/* - * vf_post_migration_imminent - Check if post-restore recovery is coming. - * @xe: the &xe_device struct instance +/** + * Perform post-migration fixups on a single GT. * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. + * After migration, GuC needs to be re-queried for VF configuration to check + * if it matches previous provisioning. Most of VF provisioning shall be the + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT + * range has changed, we have to perform fixups - shift all GGTT references + * used anywhere within the driver. After the fixups in this function succeed, + * it is allowed to ask the GuC bound to this GT to continue normal operation. + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. */ -static bool vf_post_migration_imminent(struct xe_device *xe) +static int gt_vf_post_migration_fixups(struct xe_gt *gt) { - return xe->sriov.vf.migration.gt_flags != 0 || - work_pending(&xe->sriov.vf.migration.worker); -} - -static bool vf_post_migration_fixup_ggtt_nodes(struct xe_device *xe) -{ - bool need_fixups = false; - struct xe_tile *tile; - unsigned int id; - - for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - s64 shift; - - shift = xe_gt_sriov_vf_ggtt_shift(gt); - if (shift) { - need_fixups = true; - xe_tile_sriov_vf_fixup_ggtt_nodes(tile, shift); - } - } - return need_fixups; -} + s64 shift; + int err; -/* - * Notify all GuCs about resource fixups apply finished. - */ -static void vf_post_migration_notify_resfix_done(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; + err = xe_gt_sriov_vf_query_config(gt); + if (err) + return err; - for_each_gt(gt, xe, id) { - if (vf_post_migration_imminent(xe)) - goto skip; - xe_gt_sriov_vf_notify_resfix_done(gt); + shift = xe_gt_sriov_vf_ggtt_shift(gt); + if (shift) { + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); + /* FIXME: add the recovery steps */ + xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); } - return; - -skip: - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); + return 0; } static void vf_post_migration_recovery(struct xe_device *xe) { - bool need_fixups; - int err; + unsigned long fixed_gts = 0; + int id, err; drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); - err = vf_post_migration_requery_guc(xe); - if (vf_post_migration_imminent(xe)) - goto defer; - if (unlikely(err)) - goto fail; + if (!vf_migration_supported(xe)) { xe_sriov_err(xe, "migration not supported by this module version\n"); err = -ENOTRECOVERABLE; goto fail; } - need_fixups = vf_post_migration_fixup_ggtt_nodes(xe); - /* FIXME: add the recovery steps */ - if (need_fixups) - vf_post_migration_fixup_ctb(xe); + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { + struct xe_gt *gt = xe_device_get_gt(xe, id); + + err = gt_vf_post_migration_fixups(gt); + if (err) + goto fail; + + set_bit(id, &fixed_gts); + } + + err = vf_post_migration_notify_resfix_done(xe, fixed_gts); + if (err) + goto fail; - vf_post_migration_notify_resfix_done(xe); xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); return; -defer: - xe_pm_runtime_put(xe); - drm_dbg(&xe->drm, "migration recovery deferred\n"); - return; fail: xe_pm_runtime_put(xe); drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); @@ -282,18 +268,23 @@ static void migration_worker_func(struct work_struct *w) vf_post_migration_recovery(xe); } -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +/* + * Check if post-restore recovery is coming on any of GTs. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); - return false; - } + if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) + return true; } - return true; + return false; } /** @@ -308,13 +299,9 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) xe_assert(xe, IS_SRIOV_VF(xe)); - if (!vf_ready_to_recovery_on_all_gts(xe)) + if (!vf_ready_to_recovery_on_any_gts(xe)) return; - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); - /* Ensure other threads see that no flags are set now. */ - smp_mb(); - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); drm_info(&xe->drm, "VF migration recovery %s\n", started ? "scheduled" : "already in progress"); diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h new file mode 100644 index 000000000000..8300416a6226 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_TYPES_H_ +#define _XE_SRIOV_VF_TYPES_H_ + +#include <linux/types.h> +#include <linux/workqueue_types.h> + +/** + * struct xe_sriov_vf_relay_version - PF ABI version details. + */ +struct xe_sriov_vf_relay_version { + /** @major: major version. */ + u16 major; + /** @minor: minor version. */ + u16 minor; +}; + +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @pf_version: negotiated VF/PF ABI version. */ + struct xe_sriov_vf_relay_version pf_version; + + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + } migration; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index c77b5c317fa0..10e88f2c9615 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -5,6 +5,7 @@ #include "xe_step.h" +#include <kunit/visibility.h> #include <linux/bitfield.h> #include "xe_device.h" @@ -255,3 +256,4 @@ const char *xe_step_name(enum xe_step step) return "**"; } } +EXPORT_SYMBOL_IF_KUNIT(xe_step_name); diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 1f710b3fc599..41705f5d52e3 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_heci_gsc.h" +#include "xe_i2c.h" #include "xe_mmio.h" #include "xe_pcode_api.h" #include "xe_vsec.h" @@ -173,20 +174,22 @@ static int enable_survivability_mode(struct pci_dev *pdev) survivability->mode = true; ret = xe_heci_gsc_init(xe); - if (ret) { - /* - * But if it fails, device can't enter survivability - * so move it back for correct error handling - */ - survivability->mode = false; - return ret; - } + if (ret) + goto err; xe_vsec_init(xe); + ret = xe_i2c_probe(xe); + if (ret) + goto err; + dev_err(dev, "In Survivability Mode\n"); return 0; + +err: + survivability->mode = false; + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 26418e9bdff0..a7ff5975873f 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -3,13 +3,17 @@ * Copyright © 2024 Intel Corporation */ +#include <drm/drm_drv.h> + #include "xe_bo.h" #include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" +#include "xe_pm.h" #include "xe_pt.h" #include "xe_svm.h" +#include "xe_tile.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" @@ -295,7 +299,7 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) up_write(&vm->lock); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *page_to_vr(struct page *page) { @@ -483,16 +487,18 @@ static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); } -static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) +static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) { return container_of(devmem_allocation, struct xe_bo, devmem_allocation); } -static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) +static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) { struct xe_bo *bo = to_xe_bo(devmem_allocation); + struct xe_device *xe = xe_bo_device(bo); xe_bo_put_async(bo); + xe_pm_runtime_put(xe); } static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) @@ -505,7 +511,7 @@ static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) return &tile->mem.vram.ttm.mm; } -static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, +static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, unsigned long npages, unsigned long *pfn) { struct xe_bo *bo = to_xe_bo(devmem_allocation); @@ -528,7 +534,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio return 0; } -static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { +static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { .devmem_release = xe_svm_devmem_release, .populate_devmem_pfn = xe_svm_populate_devmem_pfn, .copy_to_devmem = xe_svm_copy_to_devmem, @@ -676,75 +682,69 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *v min(end, xe_vma_end(vma))); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) { return &tile->mem.vram; } -/** - * xe_svm_alloc_vram()- Allocate device memory pages for range, - * migrating existing data. - * @vm: The VM. - * @tile: tile to allocate vram from - * @range: SVM range - * @ctx: DRM GPU SVM context - * - * Return: 0 on success, error code on failure. - */ -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) { - struct mm_struct *mm = vm->svm.gpusvm.mm; + struct xe_tile *tile = container_of(dpagemap, typeof(*tile), mem.vram.dpagemap); + struct xe_device *xe = tile_to_xe(tile); + struct device *dev = xe->drm.dev; struct xe_vram_region *vr = tile_to_vr(tile); struct drm_buddy_block *block; struct list_head *blocks; struct xe_bo *bo; - ktime_t end = 0; - int err; + ktime_t time_end = 0; + int err, idx; - range_debug(range, "ALLOCATE VRAM"); + if (!drm_dev_enter(&xe->drm, &idx)) + return -ENODEV; - if (!mmget_not_zero(mm)) - return -EFAULT; - mmap_read_lock(mm); + xe_pm_runtime_get(xe); -retry: - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, - xe_svm_range_size(range), + retry: + bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, end - start, ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_CPU_ADDR_MIRROR); if (IS_ERR(bo)) { err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) + if (xe_vm_validate_should_retry(NULL, err, &time_end)) goto retry; - goto unlock; + goto out_pm_put; } - drm_gpusvm_devmem_init(&bo->devmem_allocation, - vm->xe->drm.dev, mm, - &gpusvm_devmem_ops, - &tile->mem.vram.dpagemap, - xe_svm_range_size(range)); + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, + &dpagemap_devmem_ops, + &tile->mem.vram.dpagemap, + end - start); blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) block->private = vr; xe_bo_get(bo); - err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, - &bo->devmem_allocation, ctx); + + /* Ensure the device has a pm ref while there are device pages active. */ + xe_pm_runtime_get_noresume(xe); + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, + start, end, timeslice_ms, + xe_svm_devm_owner(xe)); if (err) xe_svm_devmem_release(&bo->devmem_allocation); xe_bo_unlock(bo); xe_bo_put(bo); -unlock: - mmap_read_unlock(mm); - mmput(mm); +out_pm_put: + xe_pm_runtime_put(xe); + drm_dev_exit(idx); return err; } @@ -810,13 +810,13 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), .devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .check_pages_threshold = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0, .devmem_only = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .timeslice_ms = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? vm->xe->atomic_svm_timeslice_ms : 0, }; struct xe_svm_range *range; @@ -852,7 +852,7 @@ retry: if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) { - err = xe_svm_alloc_vram(vm, tile, range, &ctx); + err = xe_svm_alloc_vram(tile, range, &ctx); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { if (migrate_try_count || !ctx.devmem_only) { @@ -944,7 +944,7 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) */ int xe_svm_bo_evict(struct xe_bo *bo) { - return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); + return drm_pagemap_evict_to_ram(&bo->devmem_allocation); } /** @@ -997,7 +997,31 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, return err; } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) + +/** + * xe_svm_alloc_vram()- Allocate device memory pages for range, + * migrating existing data. + * @tile: tile to allocate vram from + * @range: SVM range + * @ctx: DRM GPU SVM context + * + * Return: 0 on success, error code on failure. + */ +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + struct drm_pagemap *dpagemap; + + xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); + range_debug(range, "ALLOCATE VRAM"); + + dpagemap = xe_tile_local_pagemap(tile); + return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), + xe_svm_range_end(range), + range->base.gpusvm->mm, + ctx->timeslice_ms); +} static struct drm_pagemap_device_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, @@ -1023,6 +1047,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, static const struct drm_pagemap_ops xe_drm_pagemap_ops = { .device_map = xe_drm_pagemap_device_map, + .populate_mm = xe_drm_pagemap_populate_mm, }; /** @@ -1054,7 +1079,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) vr->pagemap.range.start = res->start; vr->pagemap.range.end = res->end; vr->pagemap.nr_range = 1; - vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); + vr->pagemap.ops = drm_pagemap_pagemap_ops_get(); vr->pagemap.owner = xe_svm_devm_owner(xe); addr = devm_memremap_pages(dev, &vr->pagemap); @@ -1075,7 +1100,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #else -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx) { diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index 19ce4f2754a7..da9a69ea0bb1 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -70,8 +70,7 @@ int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx); struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, @@ -237,10 +236,9 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } -static inline -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +static inline int +xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) { return -EOPNOTSUPP; } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index f87276df18f2..82872a51f098 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -77,6 +77,7 @@ static void user_fence_worker(struct work_struct *w) { struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); + WRITE_ONCE(ufence->signalled, 1); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value))) @@ -91,7 +92,6 @@ static void user_fence_worker(struct work_struct *w) * Wake up waiters only after updating the ufence state, allowing the UMD * to safely reuse the same ufence without encountering -EBUSY errors. */ - WRITE_ONCE(ufence->signalled, 1); wake_up_all(&ufence->xe->ufence_wq); user_fence_put(ufence); } diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 672faa0b67f1..86e9811e60ba 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -10,6 +10,7 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_memirq.h" #include "xe_migrate.h" #include "xe_pcode.h" #include "xe_sa.h" @@ -174,6 +175,12 @@ int xe_tile_init_noalloc(struct xe_tile *tile) int xe_tile_init(struct xe_tile *tile) { + int err; + + err = xe_memirq_init(&tile->memirq); + if (err) + return err; + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index eb939316d55b..cc33e8733983 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -16,4 +16,21 @@ int xe_tile_init(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return &tile->mem.vram.dpagemap; +} +#else +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return NULL; +} +#endif + +static inline bool xe_tile_is_root(struct xe_tile *tile) +{ + return tile->id == 0; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index ccebd5f0878e..86323cf3be2c 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(xe_bo, TP_fast_assign( __assign_str(dev); - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __entry->flags = bo->flags; __entry->vm = bo->vm; ), @@ -73,7 +73,7 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __assign_str(new_placement_name); __assign_str(old_placement_name); __assign_str(device_id); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3a8751a8b92d..465bda355443 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -33,6 +33,22 @@ uc_to_xe(struct xe_uc *uc) } /* Should be called once at driver load only */ +int xe_uc_init_noalloc(struct xe_uc *uc) +{ + int ret; + + ret = xe_guc_init_noalloc(&uc->guc); + if (ret) + goto err; + + /* HuC and GSC have no early dependencies and will be initialized during xe_uc_init(). */ + return 0; + +err: + xe_gt_err(uc_to_gt(uc), "Failed to early initialize uC (%pe)\n", ERR_PTR(ret)); + return ret; +} + int xe_uc_init(struct xe_uc *uc) { int ret; @@ -56,15 +72,17 @@ int xe_uc_init(struct xe_uc *uc) if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; - if (IS_SRIOV_VF(uc_to_xe(uc))) - return 0; + if (!IS_SRIOV_VF(uc_to_xe(uc))) { + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + } - ret = xe_wopcm_init(&uc->wopcm); + ret = xe_guc_min_load_for_hwconfig(&uc->guc); if (ret) goto err; return 0; - err: xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret)); return ret; @@ -126,28 +144,7 @@ int xe_uc_sanitize_reset(struct xe_uc *uc) return uc_reset(uc); } -/** - * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig - * @uc: The UC object - * - * Return: 0 on success, negative error code on error. - */ -int xe_uc_init_hwconfig(struct xe_uc *uc) -{ - int ret; - - /* GuC submission not enabled, nothing to do */ - if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; - - ret = xe_guc_min_load_for_hwconfig(&uc->guc); - if (ret) - return ret; - - return 0; -} - -static int vf_uc_init_hw(struct xe_uc *uc) +static int vf_uc_load_hw(struct xe_uc *uc) { int err; @@ -161,22 +158,30 @@ static int vf_uc_init_hw(struct xe_uc *uc) err = xe_gt_sriov_vf_connect(uc_to_gt(uc)); if (err) - return err; + goto err_out; uc->guc.submission_state.enabled = true; + err = xe_guc_opt_in_features_enable(&uc->guc); + if (err) + goto err_out; + err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) - return err; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(&uc->guc); + return err; } /* * Should be called during driver load, after every GT reset, and after every * suspend to reload / auth the firmwares. */ -int xe_uc_init_hw(struct xe_uc *uc) +int xe_uc_load_hw(struct xe_uc *uc) { int ret; @@ -185,7 +190,7 @@ int xe_uc_init_hw(struct xe_uc *uc) return 0; if (IS_SRIOV_VF(uc_to_xe(uc))) - return vf_uc_init_hw(uc); + return vf_uc_load_hw(uc); ret = xe_huc_upload(&uc->huc); if (ret) @@ -201,15 +206,15 @@ int xe_uc_init_hw(struct xe_uc *uc) ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (ret) - return ret; + goto err_out; ret = xe_guc_post_load_init(&uc->guc); if (ret) - return ret; + goto err_out; ret = xe_guc_pc_start(&uc->guc.pc); if (ret) - return ret; + goto err_out; xe_guc_engine_activity_enable_stats(&uc->guc); @@ -221,11 +226,10 @@ int xe_uc_init_hw(struct xe_uc *uc) xe_gsc_load_start(&uc->gsc); return 0; -} -int xe_uc_fini_hw(struct xe_uc *uc) -{ - return xe_uc_sanitize_reset(uc); +err_out: + xe_guc_sanitize(&uc->guc); + return ret; } int xe_uc_reset_prepare(struct xe_uc *uc) diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index c23e6f5e2514..21c9306098cf 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -8,11 +8,10 @@ struct xe_uc; +int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); -int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); -int xe_uc_init_hw(struct xe_uc *uc); -int xe_uc_fini_hw(struct xe_uc *uc); +int xe_uc_load_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 6d0869518652..9bbdde604923 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -115,10 +115,11 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ @@ -127,6 +128,7 @@ struct fw_blobs_by_type { fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, no_ver(xe, huc, ptl)) \ fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \ fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \ diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 04d1a43b81e3..d60c4b115304 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1610,8 +1610,12 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); + if (IS_ERR(vm->scratch_pt[id][i])) { + int err = PTR_ERR(vm->scratch_pt[id][i]); + + vm->scratch_pt[id][i] = NULL; + return err; + } xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); } @@ -1640,7 +1644,7 @@ static void xe_vm_free_scratch(struct xe_vm *vm) } } -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef) { struct drm_gem_object *vm_resv_obj; struct xe_vm *vm; @@ -1661,9 +1665,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->xe = xe; vm->size = 1ull << xe->info.va_bits; - vm->flags = flags; + if (xef) + vm->xef = xe_file_get(xef); /** * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be * manipulated under the PXP mutex. However, the PXP mutex can be taken @@ -1794,6 +1799,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); + if (xef && xe->info.has_asid) { + u32 asid; + + down_write(&xe->usm.lock); + err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, + XA_LIMIT(1, XE_MAX_ASID - 1), + &xe->usm.next_asid, GFP_KERNEL); + up_write(&xe->usm.lock); + if (err < 0) + goto err_unlock_close; + + vm->usm.asid = asid; + } + trace_xe_vm_create(vm); return vm; @@ -1814,6 +1833,8 @@ err_no_resv: for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); + if (vm->xef) + xe_file_put(vm->xef); kfree(vm); if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_put(xe); @@ -2059,9 +2080,8 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, struct xe_device *xe = to_xe_device(dev); struct xe_file *xef = to_xe_file(file); struct drm_xe_vm_create *args = data; - struct xe_tile *tile; struct xe_vm *vm; - u32 id, asid; + u32 id; int err; u32 flags = 0; @@ -2097,29 +2117,10 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) flags |= XE_VM_FLAG_FAULT_MODE; - vm = xe_vm_create(xe, flags); + vm = xe_vm_create(xe, flags, xef); if (IS_ERR(vm)) return PTR_ERR(vm); - if (xe->info.has_asid) { - down_write(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - up_write(&xe->usm.lock); - if (err < 0) - goto err_close_and_put; - - vm->usm.asid = asid; - } - - vm->xef = xe_file_get(xef); - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) /* Warning: Security issue - never enable by default */ args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); @@ -2380,7 +2381,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); for_each_tile(tile, vm->xe, id) tile_mask |= 0x1 << id; @@ -2887,7 +2888,7 @@ static int check_ufence(struct xe_vma *vma) static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) { - bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); int err = 0; @@ -2913,7 +2914,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; - err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx); + err = xe_svm_alloc_vram(tile, svm_range, &ctx); if (err) { drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); @@ -3421,6 +3422,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, free_bind_ops: if (args->num_binds > 1) kvfree(*bind_ops); + *bind_ops = NULL; return err; } @@ -3466,9 +3468,9 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; - if (XE_IOCTL_DBG(xe, range > bo->size) || + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { + xe_bo_size(bo) - range)) { return -EINVAL; } @@ -3527,7 +3529,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct xe_exec_queue *q = NULL; u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; + struct drm_xe_vm_bind_op *bind_ops = NULL; struct xe_vma_ops vops; struct dma_fence *fence; int err; @@ -3771,7 +3773,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, xe_vma_ops_init(&vops, vm, q, NULL, 0); - ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size, + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), DRM_XE_VM_BIND_OP_MAP, 0, 0, vm->xe->pat.idx[cache_lvl]); if (IS_ERR(ops)) { diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 3475a118f666..2ecb417c19a2 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -26,7 +26,7 @@ struct xe_sync_entry; struct xe_svm_range; struct drm_exec; -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags); +struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef); struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id); int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node); @@ -315,22 +315,14 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. - * - * Return: A pin cookie that should be used for xe_vm_clear_validating(). */ -static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, - bool allow_res_evict) +static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict) { - struct pin_cookie cookie = {}; - if (vm && !allow_res_evict) { xe_vm_assert_held(vm); - cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); /* Pairs with READ_ONCE in xe_vm_is_validating() */ WRITE_ONCE(vm->validating, current); } - - return cookie; } /** @@ -338,17 +330,14 @@ static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, * @vm: Pointer to the vm or NULL * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same * value as for xe_vm_set_validation(). - * @cookie: Cookie obtained from xe_vm_set_validating(). * * Register this task as currently making bos resident for the vm. Intended * to avoid eviction by the same task of shared bos bound to the vm. * Call with the vm's resv lock held. */ -static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, - struct pin_cookie cookie) +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict) { if (vm && !allow_res_evict) { - lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); /* Pairs with READ_ONCE in xe_vm_is_validating() */ WRITE_ONCE(vm->validating, NULL); } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index bed6088e1bb3..8a07feef503b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -266,7 +266,7 @@ struct xe_vm { * up for revalidation. Protected from access with the * @invalidated_lock. Removing items from the list * additionally requires @lock in write mode, and adding - * items to the list requires either the @userptr.notifer_lock in + * items to the list requires either the @userptr.notifier_lock in * write mode, OR @lock in write mode. */ struct list_head invalidated; diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c index 3e573b0b7ebd..8f23a27871b6 100644 --- a/drivers/gpu/drm/xe/xe_vsec.c +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -24,6 +24,7 @@ #define BMG_DEVICE_ID 0xE2F8 static struct intel_vsec_header bmg_telemetry = { + .rev = 1, .length = 0x10, .id = VSEC_ID_TELEMETRY, .num_entries = 2, @@ -32,28 +33,19 @@ static struct intel_vsec_header bmg_telemetry = { .offset = BMG_DISCOVERY_OFFSET, }; -static struct intel_vsec_header bmg_punit_crashlog = { +static struct intel_vsec_header bmg_crashlog = { + .rev = 1, .length = 0x10, .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, + .num_entries = 2, + .entry_size = 6, .tbir = 0, .offset = BMG_DISCOVERY_OFFSET + 0x60, }; -static struct intel_vsec_header bmg_oobmsm_crashlog = { - .length = 0x10, - .id = VSEC_ID_CRASHLOG, - .num_entries = 1, - .entry_size = 4, - .tbir = 0, - .offset = BMG_DISCOVERY_OFFSET + 0x78, -}; - static struct intel_vsec_header *bmg_capabilities[] = { &bmg_telemetry, - &bmg_punit_crashlog, - &bmg_oobmsm_crashlog, + &bmg_crashlog, NULL }; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4a76de391abb..22a98600fd8f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -10,6 +10,7 @@ #include <linux/compiler_types.h> #include <linux/fault-inject.h> +#include <generated/xe_device_wa_oob.h> #include <generated/xe_wa_oob.h> #include "regs/xe_engine_regs.h" @@ -285,6 +286,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16021865536"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, { XE_RTP_NAME("14021486841"), XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), @@ -644,6 +657,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -860,9 +877,34 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = { static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); +static __maybe_unused const struct xe_rtp_entry device_oob_was[] = { +#include <generated/xe_device_wa_oob.c> + {} +}; + +static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT); + __diag_pop(); /** + * xe_wa_process_device_oob - process OOB workaround table + * @xe: device instance to process workarounds for + * + * process OOB workaround table for this device, marking in @xe the + * workarounds that are active. + */ + +void xe_wa_process_device_oob(struct xe_device *xe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)); + + xe->wa_active.oob_initialized = true; + xe_rtp_process(&ctx, device_oob_was); +} + +/** * xe_wa_process_oob - process OOB workaround table * @gt: GT instance to process workarounds for * @@ -931,6 +973,28 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) } /** + * xe_wa_device_init - initialize device with workaround oob bookkeeping + * @xe: Xe device instance to initialize + * + * Returns 0 for success, negative with error code otherwise + */ +int xe_wa_device_init(struct xe_device *xe) +{ + unsigned long *p; + + p = drmm_kzalloc(&xe->drm, + sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)), + GFP_KERNEL); + + if (!p) + return -ENOMEM; + + xe->wa_active.oob = p; + + return 0; +} + +/** * xe_wa_init - initialize gt with workaround bookkeeping * @gt: GT instance to initialize * @@ -964,6 +1028,16 @@ int xe_wa_init(struct xe_gt *gt) } ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "Device OOB Workarounds\n"); + for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)) + if (device_oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); +} + void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 52337405b5bc..f3880c65cb8d 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -13,17 +13,19 @@ struct xe_gt; struct xe_hw_engine; struct xe_tile; +int xe_wa_device_init(struct xe_device *xe); int xe_wa_init(struct xe_gt *gt); +void xe_wa_process_device_oob(struct xe_device *xe); void xe_wa_process_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** - * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any - * other more specific type + * XE_WA - Out-of-band workarounds, to be queried and called as needed. * @gt__: gt instance * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h */ @@ -32,4 +34,20 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ }) +/** + * XE_DEVICE_WA - Out-of-band Device workarounds, to be queried and called + * as needed. + * @xe__: xe_device + * @id__: XE_DEVICE_WA_OOB_<id__>, as generated by build system in generated/xe_device_wa_oob.h + */ +#define XE_DEVICE_WA(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + +#define XE_DEVICE_WA_DISABLE(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + clear_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 0ee74a5b2407..e990f20eccfe 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -34,14 +34,16 @@ 14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) no_media_l3 MEDIA_VERSION(3000) @@ -58,9 +60,15 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION(3003) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + MEDIA_VERSION(3002) + GRAPHICS_VERSION(3003) # SoC workaround - currently applies to all platforms with the following # primary GT GMDID 14022085890 GRAPHICS_VERSION(2001) + +15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) |
