diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_submit.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 86 |
1 files changed, 61 insertions, 25 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 799ef9f48003..fc4f99d46763 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -48,6 +48,8 @@ #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6 +static int guc_submit_reset_prepare(struct xe_guc *guc); + static struct xe_guc * exec_queue_to_guc(struct xe_exec_queue *q) { @@ -239,7 +241,7 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) EXEC_QUEUE_STATE_BANNED)); } -static void guc_submit_fini(struct drm_device *drm, void *arg) +static void guc_submit_sw_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; struct xe_device *xe = guc_to_xe(guc); @@ -257,6 +259,19 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) xa_destroy(&guc->submission_state.exec_queue_lookup); } +static void guc_submit_fini(void *arg) +{ + struct xe_guc *guc = arg; + + /* Forcefully kill any remaining exec queues */ + xe_guc_ct_stop(&guc->ct); + guc_submit_reset_prepare(guc); + xe_guc_softreset(guc); + xe_guc_submit_stop(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); +} + static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; @@ -326,7 +341,11 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) guc->submission_state.initialized = true; - return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); + err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc); + if (err) + return err; + + return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc); } /* @@ -1252,6 +1271,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc, */ void xe_guc_submit_wedge(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; @@ -1266,20 +1286,28 @@ void xe_guc_submit_wedge(struct xe_guc *guc) if (!guc->submission_state.initialized) return; - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; " - "Although device is wedged.\n", - xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)); - return; - } + if (xe->wedged.mode == 2) { + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, + guc_submit_wedged_fini, guc); + if (err) { + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " + "Although device is wedged.\n"); + return; + } - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (xe_exec_queue_get_unless_zero(q)) - set_exec_queue_wedged(q); - mutex_unlock(&guc->submission_state.lock); + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) + if (xe_exec_queue_get_unless_zero(q)) + set_exec_queue_wedged(q); + mutex_unlock(&guc->submission_state.lock); + } else { + /* Forcefully kill any remaining exec queues, signal fences */ + guc_submit_reset_prepare(guc); + xe_guc_submit_stop(guc); + xe_guc_softreset(guc); + xe_uc_fw_sanitize(&guc->fw); + xe_guc_submit_pause_abort(guc); + } } static bool guc_submit_hint_wedged(struct xe_guc *guc) @@ -2230,6 +2258,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched = &q->guc->sched; + bool do_destroy = false; /* Stop scheduling + flush any DRM scheduler operations */ xe_sched_submission_stop(sched); @@ -2237,7 +2266,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) /* Clean up lost G2H + reset engine state */ if (exec_queue_registered(q)) { if (exec_queue_destroyed(q)) - __guc_exec_queue_destroy(guc, q); + do_destroy = true; } if (q->guc->suspend_pending) { set_exec_queue_suspended(q); @@ -2273,18 +2302,15 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) xe_guc_exec_queue_trigger_cleanup(q); } } + + if (do_destroy) + __guc_exec_queue_destroy(guc, q); } -int xe_guc_submit_reset_prepare(struct xe_guc *guc) +static int guc_submit_reset_prepare(struct xe_guc *guc) { int ret; - if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) - return 0; - - if (!guc->submission_state.initialized) - return 0; - /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset @@ -2299,6 +2325,17 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) return ret; } +int xe_guc_submit_reset_prepare(struct xe_guc *guc) +{ + if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc))) + return 0; + + if (!guc->submission_state.initialized) + return 0; + + return guc_submit_reset_prepare(guc); +} + void xe_guc_submit_reset_wait(struct xe_guc *guc) { wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || @@ -2695,8 +2732,7 @@ void xe_guc_submit_pause_abort(struct xe_guc *guc) continue; xe_sched_submission_start(sched); - if (exec_queue_killed_or_banned_or_wedged(q)) - xe_guc_exec_queue_trigger_cleanup(q); + guc_exec_queue_kill(q); } mutex_unlock(&guc->submission_state.lock); } |
