Skip to content

Commit b2d7ec4

Browse files
committed
drm/xe: Attach last fence to TLB invalidation job queues
Add support for attaching the last fence to TLB invalidation job queues to address serialization issues during bursts of unbind jobs. Ensure that user fence signaling for a bind job reflects both the bind job itself and the last fences of all related TLB invalidations. Maintain submission order based solely on the state of the bind and TLB invalidation queues. Introduce support functions for last fence attachment to TLB invalidation queues. v3: - Fix assert in xe_exec_queue_tlb_inval_last_fence_set (CI) - Ensure migrate lock held for migrate queues (Testing) v5: - Style nits (Thomas) - Rewrite commit message (Thomas) Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Link: https://patch.msgid.link/20251031234050.3043507-3-matthew.brost@intel.com
1 parent adda4e8 commit b2d7ec4

6 files changed

Lines changed: 156 additions & 2 deletions

File tree

drivers/gpu/drm/xe/xe_exec_queue.c

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ void xe_exec_queue_destroy(struct kref *ref)
387387
{
388388
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
389389
struct xe_exec_queue *eq, *next;
390+
int i;
390391

391392
xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
392393

@@ -397,6 +398,9 @@ void xe_exec_queue_destroy(struct kref *ref)
397398
xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
398399

399400
xe_exec_queue_last_fence_put_unlocked(q);
401+
for_each_tlb_inval(i)
402+
xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i);
403+
400404
if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
401405
list_for_each_entry_safe(eq, next, &q->multi_gt_list,
402406
multi_gt_link)
@@ -1014,7 +1018,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
10141018
static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
10151019
struct xe_vm *vm)
10161020
{
1017-
if (q->flags & EXEC_QUEUE_FLAG_VM) {
1021+
if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
1022+
xe_migrate_job_lock_assert(q);
1023+
} else if (q->flags & EXEC_QUEUE_FLAG_VM) {
10181024
lockdep_assert_held(&vm->lock);
10191025
} else {
10201026
xe_vm_assert_held(vm);
@@ -1113,6 +1119,7 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
11131119
struct dma_fence *fence)
11141120
{
11151121
xe_exec_queue_last_fence_lockdep_assert(q, vm);
1122+
xe_assert(vm->xe, !dma_fence_is_container(fence));
11161123

11171124
xe_exec_queue_last_fence_put(q, vm);
11181125
q->last_fence = dma_fence_get(fence);
@@ -1141,6 +1148,100 @@ int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
11411148
return err;
11421149
}
11431150

1151+
/**
1152+
* xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
1153+
* @q: The exec queue
1154+
* @vm: The VM the engine does a bind for
1155+
* @type: Either primary or media GT
1156+
*/
1157+
void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
1158+
struct xe_vm *vm,
1159+
unsigned int type)
1160+
{
1161+
xe_exec_queue_last_fence_lockdep_assert(q, vm);
1162+
xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1163+
type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1164+
1165+
xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
1166+
}
1167+
1168+
/**
1169+
* xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
1170+
* invalidation fence unlocked
1171+
* @q: The exec queue
1172+
* @type: Either primary or media GT
1173+
*
1174+
* Only safe to be called from xe_exec_queue_destroy().
1175+
*/
1176+
void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
1177+
unsigned int type)
1178+
{
1179+
xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1180+
type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1181+
1182+
dma_fence_put(q->tlb_inval[type].last_fence);
1183+
q->tlb_inval[type].last_fence = NULL;
1184+
}
1185+
1186+
/**
1187+
* xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
1188+
* @q: The exec queue
1189+
* @vm: The VM the engine does a bind for
1190+
* @type: Either primary or media GT
1191+
*
1192+
* Get last fence, takes a ref
1193+
*
1194+
* Returns: last fence if not signaled, dma fence stub if signaled
1195+
*/
1196+
struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
1197+
struct xe_vm *vm,
1198+
unsigned int type)
1199+
{
1200+
struct dma_fence *fence;
1201+
1202+
xe_exec_queue_last_fence_lockdep_assert(q, vm);
1203+
xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1204+
type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1205+
xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
1206+
EXEC_QUEUE_FLAG_MIGRATE));
1207+
1208+
if (q->tlb_inval[type].last_fence &&
1209+
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1210+
&q->tlb_inval[type].last_fence->flags))
1211+
xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
1212+
1213+
fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
1214+
dma_fence_get(fence);
1215+
return fence;
1216+
}
1217+
1218+
/**
1219+
* xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation
1220+
* @q: The exec queue
1221+
* @vm: The VM the engine does a bind for
1222+
* @fence: The fence
1223+
* @type: Either primary or media GT
1224+
*
1225+
* Set the last fence for the tlb invalidation type on the queue. Increases
1226+
* reference count for fence, when closing queue
1227+
* xe_exec_queue_tlb_inval_last_fence_put should be called.
1228+
*/
1229+
void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
1230+
struct xe_vm *vm,
1231+
struct dma_fence *fence,
1232+
unsigned int type)
1233+
{
1234+
xe_exec_queue_last_fence_lockdep_assert(q, vm);
1235+
xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1236+
type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1237+
xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
1238+
EXEC_QUEUE_FLAG_MIGRATE));
1239+
xe_assert(vm->xe, !dma_fence_is_container(fence));
1240+
1241+
xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
1242+
q->tlb_inval[type].last_fence = dma_fence_get(fence);
1243+
}
1244+
11441245
/**
11451246
* xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references
11461247
* within all LRCs of a queue.

drivers/gpu/drm/xe/xe_exec_queue.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ struct drm_file;
1414
struct xe_device;
1515
struct xe_file;
1616

17+
#define for_each_tlb_inval(__i) \
18+
for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \
19+
__i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i)
20+
1721
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
1822
u32 logical_mask, u16 width,
1923
struct xe_hw_engine *hw_engine, u32 flags,
@@ -86,6 +90,23 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
8690
struct dma_fence *fence);
8791
int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
8892
struct xe_vm *vm);
93+
94+
void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
95+
struct xe_vm *vm,
96+
unsigned int type);
97+
98+
void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
99+
unsigned int type);
100+
101+
struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
102+
struct xe_vm *vm,
103+
unsigned int type);
104+
105+
void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
106+
struct xe_vm *vm,
107+
struct dma_fence *fence,
108+
unsigned int type);
109+
89110
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
90111

91112
int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);

drivers/gpu/drm/xe/xe_exec_queue_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ struct xe_exec_queue {
146146
* dependency scheduler
147147
*/
148148
struct xe_dep_scheduler *dep_scheduler;
149+
/**
150+
* @last_fence: last fence for tlb invalidation, protected by
151+
* vm->lock in write mode
152+
*/
153+
struct dma_fence *last_fence;
149154
} tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT];
150155

151156
/** @pxp: PXP info tracking */

drivers/gpu/drm/xe/xe_migrate.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2333,6 +2333,20 @@ void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q)
23332333
xe_vm_assert_held(q->vm); /* User queues VM's should be locked */
23342334
}
23352335

2336+
#if IS_ENABLED(CONFIG_PROVE_LOCKING)
2337+
/**
2338+
* xe_migrate_job_lock_assert() - Assert migrate job lock held of queue
2339+
* @q: Migrate queue
2340+
*/
2341+
void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
2342+
{
2343+
struct xe_migrate *m = gt_to_tile(q->gt)->migrate;
2344+
2345+
xe_gt_assert(q->gt, q == m->q);
2346+
lockdep_assert_held(&m->job_mutex);
2347+
}
2348+
#endif
2349+
23362350
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
23372351
#include "tests/xe_migrate.c"
23382352
#endif

drivers/gpu/drm/xe/xe_migrate.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,14 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
152152

153153
void xe_migrate_wait(struct xe_migrate *m);
154154

155+
#if IS_ENABLED(CONFIG_PROVE_LOCKING)
156+
void xe_migrate_job_lock_assert(struct xe_exec_queue *q);
157+
#else
158+
static inline void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
159+
{
160+
}
161+
#endif
162+
155163
void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q);
156164
void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q);
157165

drivers/gpu/drm/xe/xe_vm.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1731,8 +1731,13 @@ void xe_vm_close_and_put(struct xe_vm *vm)
17311731

17321732
down_write(&vm->lock);
17331733
for_each_tile(tile, xe, id) {
1734-
if (vm->q[id])
1734+
if (vm->q[id]) {
1735+
int i;
1736+
17351737
xe_exec_queue_last_fence_put(vm->q[id], vm);
1738+
for_each_tlb_inval(i)
1739+
xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
1740+
}
17361741
}
17371742
up_write(&vm->lock);
17381743

0 commit comments

Comments
 (0)