aboutsummaryrefslogtreecommitdiffstats
path: root/include/drm/gpu_scheduler.h
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2021-07-21 01:58:26 +0000
committerDave Airlie <[email protected]>2021-07-21 01:58:28 +0000
commit588b3eee528873d73bf777f329d35b2e65e24777 (patch)
tree5c93b3d1c08f208fb959bfbffa584b00175e3910 /include/drm/gpu_scheduler.h
parentMerge tag 'drm-intel-next-2021-07-08' of git://anongit.freedesktop.org/drm/dr... (diff)
parentdrm/dp: For drm_panel_dp_aux_backlight(), init backlight as disabled (diff)
downloadkernel-588b3eee528873d73bf777f329d35b2e65e24777.tar.gz
kernel-588b3eee528873d73bf777f329d35b2e65e24777.zip
Merge tag 'drm-misc-next-2021-07-16' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
drm-misc-next for v5.15: UAPI Changes: Cross-subsystem Changes: - udmabuf: Add support for mapping hugepages - Add dma-buf stats to sysfs. - Assorted fixes to fbdev/omap2. - dma-buf: Document DMA_BUF_IOCTL_SYNC - Improve dma-buf non-dynamic exporter expectations better. - Add module parameters for dma-buf size and list limit. - Add HDMI codec support to vc4, to replace vc4's own codec. - Document dma-buf implicit fencing rules. - dma_resv_test_signaled test_all handling. Core Changes: - Extract i915's eDP backlight code into DRM helpers. - Assorted docbook updates. - Rework drm_dp_aux documentation. - Add support for the DP aux bus. - Shrink dma-fence-chain slightly. - Add alloc/free helpers for dma-fence-chain. - Assorted fixes to TTM., drm/of, bridge - drm_gem_plane_helper_prepare/cleanup_fb is now the default for gem drivers. - Small fix for scheduler completion. - Remove use of drm_device.irq_enabled. - Print the driver name to dmesg when registering framebuffer. - Export drm/gem's shadow plane handling, and use it in vkms. - Assorted small fixes. Driver Changes: - Add eDP backlight to nouveau. - Assorted fixes and cleanups to nouveau, panfrost, vmwgfx, anx7625, amdgpu, gma500, radeon, mgag200, vgem, vc4, vkms, omapdrm. - Add support for Samsung DB7430, Samsung ATNA33XC20, EDT ETMV570G2DHU, EDT ETM0350G0DH6, Innolux EJ030NA panels. - Fix some simple pannels missing bus_format and connector types. - Add mks-guest-stats instrumentation support to vmwgfx. - Merge i915-ttm topic branch. - Make s6e63m0 panel use Mipi-DBI helpers. - Add detect() supoprt for AST. - Use interrupts for hotplug on vc4. - vmwgfx is now moved to drm-misc-next, as sroland is no longer a maintainer for now. - vmwgfx now uses copies of vmware's internal device headers. - Slowly convert ti-sn65dsi83 over to atomic. - Rework amdgpu dma-resv handling. - Fix virtio fencing for planes. - Ensure amdgpu can always evict to SYSTEM. - Many drivers fixed for implicit fencing rules. - Set default prepare/cleanup fb for tiny, vram and simple helpers too. - Rework panfrost gpu reset and related serialization. - Update VKMS todo list. - Make bochs a tiny gpu driver, and use vram helper. - Use linux irq interfaces instead of drm_irq in some drivers. - Add support for Raspberry Pi Pico to GUD. Signed-off-by: Dave Airlie <[email protected]> # gpg: Signature made Fri 16 Jul 2021 21:06:04 AEST # gpg: using RSA key B97BD6A80CAC4981091AE547FE558C72A67013C3 # gpg: Good signature from "Maarten Lankhorst <[email protected]>" [expired] # gpg: aka "Maarten Lankhorst <[email protected]>" [expired] # gpg: aka "Maarten Lankhorst <[email protected]>" [expired] # gpg: Note: This key has expired! # Primary key fingerprint: B97B D6A8 0CAC 4981 091A E547 FE55 8C72 A670 13C3 From: Maarten Lankhorst <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
Diffstat (limited to 'include/drm/gpu_scheduler.h')
-rw-r--r--include/drm/gpu_scheduler.h37
1 files changed, 36 insertions, 1 deletions
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index d18af49fd009..88ae7f331bb1 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -239,6 +239,38 @@ struct drm_sched_backend_ops {
* @timedout_job: Called when a job has taken too long to execute,
* to trigger GPU recovery.
*
+ * This method is called in a workqueue context.
+ *
+ * Drivers typically issue a reset to recover from GPU hangs, and this
+ * procedure usually follows the following workflow:
+ *
+ * 1. Stop the scheduler using drm_sched_stop(). This will park the
+ * scheduler thread and cancel the timeout work, guaranteeing that
+ * nothing is queued while we reset the hardware queue
+ * 2. Try to gracefully stop non-faulty jobs (optional)
+ * 3. Issue a GPU reset (driver-specific)
+ * 4. Re-submit jobs using drm_sched_resubmit_jobs()
+ * 5. Restart the scheduler using drm_sched_start(). At that point, new
+ * jobs can be queued, and the scheduler thread is unblocked
+ *
+ * Note that some GPUs have distinct hardware queues but need to reset
+ * the GPU globally, which requires extra synchronization between the
+ * timeout handler of the different &drm_gpu_scheduler. One way to
+ * achieve this synchronization is to create an ordered workqueue
+ * (using alloc_ordered_workqueue()) at the driver level, and pass this
+ * queue to drm_sched_init(), to guarantee that timeout handlers are
+ * executed sequentially. The above workflow needs to be slightly
+ * adjusted in that case:
+ *
+ * 1. Stop all schedulers impacted by the reset using drm_sched_stop()
+ * 2. Try to gracefully stop non-faulty jobs on all queues impacted by
+ * the reset (optional)
+ * 3. Issue a GPU reset on all faulty queues (driver-specific)
+ * 4. Re-submit jobs on all schedulers impacted by the reset using
+ * drm_sched_resubmit_jobs()
+ * 5. Restart all schedulers that were stopped in step #1 using
+ * drm_sched_start()
+ *
* Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal,
* and the underlying driver has started or completed recovery.
*
@@ -269,6 +301,7 @@ struct drm_sched_backend_ops {
* finished.
* @hw_rq_count: the number of jobs currently in the hardware queue.
* @job_id_count: used to assign unique id to the each job.
+ * @timeout_wq: workqueue used to queue @work_tdr
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
* timeout interval is over.
* @thread: the kthread on which the scheduler which run.
@@ -293,6 +326,7 @@ struct drm_gpu_scheduler {
wait_queue_head_t job_scheduled;
atomic_t hw_rq_count;
atomic64_t job_id_count;
+ struct workqueue_struct *timeout_wq;
struct delayed_work work_tdr;
struct task_struct *thread;
struct list_head pending_list;
@@ -306,7 +340,8 @@ struct drm_gpu_scheduler {
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
- uint32_t hw_submission, unsigned hang_limit, long timeout,
+ uint32_t hw_submission, unsigned hang_limit,
+ long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name);
void drm_sched_fini(struct drm_gpu_scheduler *sched);