summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_guc_submit.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_submit.c')
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c86
1 files changed, 61 insertions, 25 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 799ef9f48003..fc4f99d46763 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -48,6 +48,8 @@
#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6
+static int guc_submit_reset_prepare(struct xe_guc *guc);
+
static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue *q)
{
@@ -239,7 +241,7 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
EXEC_QUEUE_STATE_BANNED));
}
-static void guc_submit_fini(struct drm_device *drm, void *arg)
+static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
struct xe_device *xe = guc_to_xe(guc);
@@ -257,6 +259,19 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
xa_destroy(&guc->submission_state.exec_queue_lookup);
}
+static void guc_submit_fini(void *arg)
+{
+ struct xe_guc *guc = arg;
+
+ /* Forcefully kill any remaining exec queues */
+ xe_guc_ct_stop(&guc->ct);
+ guc_submit_reset_prepare(guc);
+ xe_guc_softreset(guc);
+ xe_guc_submit_stop(guc);
+ xe_uc_fw_sanitize(&guc->fw);
+ xe_guc_submit_pause_abort(guc);
+}
+
static void guc_submit_wedged_fini(void *arg)
{
struct xe_guc *guc = arg;
@@ -326,7 +341,11 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
guc->submission_state.initialized = true;
- return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
+ err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc);
+ if (err)
+ return err;
+
+ return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc);
}
/*
@@ -1252,6 +1271,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
*/
void xe_guc_submit_wedge(struct xe_guc *guc)
{
+ struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
unsigned long index;
@@ -1266,20 +1286,28 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
if (!guc->submission_state.initialized)
return;
- err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
- guc_submit_wedged_fini, guc);
- if (err) {
- xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; "
- "Although device is wedged.\n",
- xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
- return;
- }
+ if (xe->wedged.mode == 2) {
+ err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
+ guc_submit_wedged_fini, guc);
+ if (err) {
+ xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; "
+ "Although device is wedged.\n");
+ return;
+ }
- mutex_lock(&guc->submission_state.lock);
- xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
- if (xe_exec_queue_get_unless_zero(q))
- set_exec_queue_wedged(q);
- mutex_unlock(&guc->submission_state.lock);
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ if (xe_exec_queue_get_unless_zero(q))
+ set_exec_queue_wedged(q);
+ mutex_unlock(&guc->submission_state.lock);
+ } else {
+ /* Forcefully kill any remaining exec queues, signal fences */
+ guc_submit_reset_prepare(guc);
+ xe_guc_submit_stop(guc);
+ xe_guc_softreset(guc);
+ xe_uc_fw_sanitize(&guc->fw);
+ xe_guc_submit_pause_abort(guc);
+ }
}
static bool guc_submit_hint_wedged(struct xe_guc *guc)
@@ -2230,6 +2258,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched = &q->guc->sched;
+ bool do_destroy = false;
/* Stop scheduling + flush any DRM scheduler operations */
xe_sched_submission_stop(sched);
@@ -2237,7 +2266,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
/* Clean up lost G2H + reset engine state */
if (exec_queue_registered(q)) {
if (exec_queue_destroyed(q))
- __guc_exec_queue_destroy(guc, q);
+ do_destroy = true;
}
if (q->guc->suspend_pending) {
set_exec_queue_suspended(q);
@@ -2273,18 +2302,15 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
xe_guc_exec_queue_trigger_cleanup(q);
}
}
+
+ if (do_destroy)
+ __guc_exec_queue_destroy(guc, q);
}
-int xe_guc_submit_reset_prepare(struct xe_guc *guc)
+static int guc_submit_reset_prepare(struct xe_guc *guc)
{
int ret;
- if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
- return 0;
-
- if (!guc->submission_state.initialized)
- return 0;
-
/*
* Using an atomic here rather than submission_state.lock as this
* function can be called while holding the CT lock (engine reset
@@ -2299,6 +2325,17 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
return ret;
}
+int xe_guc_submit_reset_prepare(struct xe_guc *guc)
+{
+ if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
+ return 0;
+
+ if (!guc->submission_state.initialized)
+ return 0;
+
+ return guc_submit_reset_prepare(guc);
+}
+
void xe_guc_submit_reset_wait(struct xe_guc *guc)
{
wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
@@ -2695,8 +2732,7 @@ void xe_guc_submit_pause_abort(struct xe_guc *guc)
continue;
xe_sched_submission_start(sched);
- if (exec_queue_killed_or_banned_or_wedged(q))
- xe_guc_exec_queue_trigger_cleanup(q);
+ guc_exec_queue_kill(q);
}
mutex_unlock(&guc->submission_state.lock);
}