From 548b61a8ce18dec8757fcc112eac5bd125161408 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 15 Nov 2023 14:44:09 -0800 Subject: drm/msm/gpu: Move gpu devcore's to gpu device The dpu devcore's are already associated with the dpu device. So we should associate the gpu devcore's with the gpu device, for easier classification. Signed-off-by: Rob Clark Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/567738/ --- drivers/gpu/drm/msm/msm_gpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_gpu.c') diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 7f64c6667300..2b7c9db3ded3 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -292,8 +292,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, /* Set the active crash state to be dumped on failure */ gpu->crashstate = state; - /* FIXME: Release the crashstate if this errors out? */ - dev_coredumpm(gpu->dev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL, + dev_coredumpm(&gpu->pdev->dev, THIS_MODULE, gpu, 0, GFP_KERNEL, msm_gpu_devcoredump_read, msm_gpu_devcoredump_free); } #else -- cgit From 12578c075f89d6bd1b8af21751fbc2e1f78d2ce0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 17 Nov 2023 07:24:28 -0800 Subject: drm/msm/gpu: Skip retired submits in recover worker If we somehow raced with submit retiring, either while waiting for worker to have a chance to run or acquiring the gpu lock, then the recover worker should just bail. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/568034/ --- drivers/gpu/drm/msm/msm_gpu.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/msm/msm_gpu.c') diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 2b7c9db3ded3..095390774f22 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -365,29 +365,31 @@ static void recover_worker(struct kthread_work *work) DRM_DEV_ERROR(dev->dev, "%s: hangcheck recover!\n", gpu->name); submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); - if (submit) { - /* Increment the fault counts */ - submit->queue->faults++; - if (submit->aspace) - submit->aspace->faults++; - get_comm_cmdline(submit, &comm, &cmd); + /* + * If the submit retired while we were waiting for the worker to run, + * or waiting to acquire the gpu lock, then nothing more to do. + */ + if (!submit) + goto out_unlock; - if (comm && cmd) { - DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", - gpu->name, comm, cmd); + /* Increment the fault counts */ + submit->queue->faults++; + if (submit->aspace) + submit->aspace->faults++; - msm_rd_dump_submit(priv->hangrd, submit, - "offending task: %s (%s)", comm, cmd); - } else { - msm_rd_dump_submit(priv->hangrd, submit, NULL); - } + get_comm_cmdline(submit, &comm, &cmd); + + if (comm && cmd) { + DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", + gpu->name, comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", comm, cmd); } else { - /* - * We couldn't attribute this fault to any particular context, - * so increment the global fault count instead. - */ - gpu->global_faults++; + DRM_DEV_ERROR(dev->dev, "%s: offending task: unknown\n", gpu->name); + + msm_rd_dump_submit(priv->hangrd, submit, NULL); } /* Record the crash state */ @@ -440,6 +442,7 @@ static void recover_worker(struct kthread_work *work) pm_runtime_put(&gpu->pdev->dev); +out_unlock: mutex_unlock(&gpu->lock); msm_gpu_retire(gpu); -- cgit