diff options
| author | Dave Airlie <[email protected]> | 2024-06-10 23:08:54 +0000 |
|---|---|---|
| committer | Dave Airlie <[email protected]> | 2024-06-10 23:09:07 +0000 |
| commit | 7957066ca614b63aa6687e825ccbc215fa4584ea (patch) | |
| tree | df0dc7f4f762cab6b59f84463c1a4a0949827c9d /drivers/gpu/drm/xe/xe_exec_queue.c | |
| parent | Linux 6.10-rc3 (diff) | |
| parent | drm/xe: move disable_c6 call (diff) | |
| download | kernel-7957066ca614b63aa6687e825ccbc215fa4584ea.tar.gz kernel-7957066ca614b63aa6687e825ccbc215fa4584ea.zip | |
Merge tag 'drm-xe-next-2024-06-06' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next
UAPI Changes:
- Expose the L3 bank mask (Francois)
Cross-subsystem Changes:
- Update Xe driver maintainers (Oded)
Display (i915):
- Add missing include to intel_vga.c (Michal Wajdeczko)
Driver Changes:
- Fix Display (xe-only) detection for ADL-N (Lucas)
- Runtime PM fixes that enabled PC-10 and D3Cold (Francois, Rodrigo)
- Fix unexpected silent drm backmerge issues (Thomas)
- More (a lot more) preparation for SR-IOV support (Michal Wajdeczko)
- Devcoredump fixes and improvements (Jose, Tejas, Matt Brost)
- Introduce device 'wedged' state (Rodrigo)
- Improve debug and info messages (Michal Wajdeczko, Rodrigo, Nirmoy)
- Adding or fixing workarounds (Tejas, Shekhar, Lucas, Bommu)
- Check result of drmm_mutex_init (Michal Wajdeczko)
- Enlarge the critical dma fence area for preempt fences (Matt Auld)
- Prevent UAF in VM's rebind work (Matt Auld)
- GuC submit related clean-ups and fixes (Matt Brost, Himal, Jonathan, Niranjana)
- Prefer local helpers to perform dma reservation locking (Himal)
- Spelling and typo fixes (Colin, Francois)
- Prep patches for 1 job per VM bind IOCTL (no uapi change yet) (Matt Brost)
- Remove uninitialized end var from xe_gt_tlb_invalidation_range (Nirmoy)
- GSC related changes targeting LNL support (Daniele)
- Fix assert in L3 bank mask generation (Francois)
- Perform dma_map when moving system buffer objects to TT (Thomas)
- Add helpers for manipulating macro arguments (Michal Wajdeczko)
- Refactor default device atomic settings (Nirmoy)
- Add debugfs node to dump mocs (Janga)
- Use ordered WQ for G2H handler (Matt Brost)
- Clean up and fixes in header includes (Michal Wajdeczko)
- Prefer flexible-array over deprecated zero-lenght ones (Lucas)
- Add Indirect Ring State support (Niranjana)
- Fix UBSAN shift-out-of-bounds failure (Shuicheng)
- HWMon fixes and additions (Karthik)
- Clean-up refactor around probe init functions (Lucas, Michal Wajdeczko)
- Fix PCODE init function (Himal)
- Only use reserved BCS instances for usm migrate exec queue (Matt Brost)
- Only zap PTEs as needed (Matt Brost)
- Per client usage info (Lucas)
- Core hotunplug improvements converting stuff towards devm (Matt Auld)
- Don't emit false error if running in execlist mode (Michal Wajdeczko)
- Remove unused struct (Dr. David)
- Support/debug for slow GuC loads (John Harrison)
- Decouple job seqno and lrc seqno (Matt Brost)
- Allow migrate vm gpu submissions from reclaim context (Thomas)
- Rename drm-client running time to run_ticks and fix a UAF (Umesh)
- Check empty pinned BO list with lock held (Nirmoy)
- Drop undesired prefix from the platform name (Michal Wajdeczko)
- Remove unwanted mutex locking on xe file close (Niranjana)
- Replace format-less snprintf() with strscpy() (Arnd)
- Other general clean-ups on registers definitions and function names (Michal Wajdeczko)
- Add kernel-doc to some xe_lrc interfaces (Niranajana)
- Use missing lock in relay_needs_worker (Nirmoy)
- Drop redundant W=1 warnings from Makefile (Jani)
- Simplify if condition in preempt fences code (Thorsten)
- Flush engine buffers before signalling user fence on all engines (Andrzej)
- Don't overmap identity VRAM mapping (Matt Brost)
- Do not dereference NULL job->fence in trace points (Matt Brost)
- Add synchronous gt reset debugfs (Jonathan)
- Xe gt_idle fixes (Riana)
Signed-off-by: Dave Airlie <[email protected]>
From: Rodrigo Vivi <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec_queue.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.c | 77 |
1 files changed, 47 insertions, 30 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 395de93579fa..27215075c799 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -86,7 +86,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, if (extensions) { /* - * may set q->usm, must come before xe_lrc_init(), + * may set q->usm, must come before xe_lrc_create(), * may overwrite q->sched_props, must come before q->ops->init() */ err = exec_queue_user_extensions(xe, q, extensions, 0); @@ -96,45 +96,30 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, } } - if (xe_exec_queue_is_parallel(q)) { - q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); - q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; - } - return q; } static int __xe_exec_queue_init(struct xe_exec_queue *q) { - struct xe_device *xe = gt_to_xe(q->gt); int i, err; for (i = 0; i < q->width; ++i) { - err = xe_lrc_init(q->lrc + i, q->hwe, q, q->vm, SZ_16K); - if (err) + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); + if (IS_ERR(q->lrc[i])) { + err = PTR_ERR(q->lrc[i]); goto err_lrc; + } } err = q->ops->init(q); if (err) goto err_lrc; - /* - * Normally the user vm holds an rpm ref to keep the device - * awake, and the context holds a ref for the vm, however for - * some engines we use the kernels migrate vm underneath which offers no - * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we - * can perform GuC CT actions when needed. Caller is expected to have - * already grabbed the rpm ref outside any sensitive locks. - */ - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) - xe_pm_runtime_get_noresume(xe); - return 0; err_lrc: for (i = i - 1; i >= 0; --i) - xe_lrc_finish(q->lrc + i); + xe_lrc_put(q->lrc[i]); return err; } @@ -215,9 +200,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) int i; for (i = 0; i < q->width; ++i) - xe_lrc_finish(q->lrc + i); - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) - xe_pm_runtime_put(gt_to_xe(q->gt)); + xe_lrc_put(q->lrc[i]); __xe_exec_queue_free(q); } @@ -720,7 +703,7 @@ bool xe_exec_queue_is_lr(struct xe_exec_queue *q) static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) { - return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1; + return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; } /** @@ -731,7 +714,7 @@ static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) */ bool xe_exec_queue_ring_full(struct xe_exec_queue *q) { - struct xe_lrc *lrc = q->lrc; + struct xe_lrc *lrc = q->lrc[0]; s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; return xe_exec_queue_num_job_inflight(q) >= max_job; @@ -757,16 +740,50 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) int i; for (i = 0; i < q->width; ++i) { - if (xe_lrc_seqno(&q->lrc[i]) != - q->lrc[i].fence_ctx.next_seqno - 1) + if (xe_lrc_seqno(q->lrc[i]) != + q->lrc[i]->fence_ctx.next_seqno - 1) return false; } return true; } - return xe_lrc_seqno(&q->lrc[0]) == - q->lrc[0].fence_ctx.next_seqno - 1; + return xe_lrc_seqno(q->lrc[0]) == + q->lrc[0]->fence_ctx.next_seqno - 1; +} + +/** + * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue + * from hw + * @q: The exec queue + * + * Update the timestamp saved by HW for this exec queue and save run ticks + * calculated by using the delta from last update. + */ +void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) +{ + struct xe_lrc *lrc; + u32 old_ts, new_ts; + + /* + * Jobs that are run during driver load may use an exec_queue, but are + * not associated with a user xe file, so avoid accumulating busyness + * for kernel specific work. + */ + if (!q->vm || !q->vm->xef) + return; + + /* + * Only sample the first LRC. For parallel submission, all of them are + * scheduled together and we compensate that below by multiplying by + * width - this may introduce errors if that premise is not true and + * they don't exit 100% aligned. On the other hand, looping through + * the LRCs and reading them in different time could also introduce + * errors. + */ + lrc = q->lrc[0]; + new_ts = xe_lrc_update_timestamp(lrc, &old_ts); + q->run_ticks += (new_ts - old_ts) * q->width; } void xe_exec_queue_kill(struct xe_exec_queue *q) |
