Skip to content

Commit 9fc27cb

Browse files
committed
drm/amdgpu: don't reemit ring contents more than once
If we cancel a bad job and reemit the ring contents, and we get another timeout, cancel everything rather than reemitting. The wptr markers are only relevant for the original emit. If we reemit, the wptr markers are no longer correct. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> (cherry picked from commit fb62a20)
1 parent dc8a887 commit 9fc27cb

2 files changed

Lines changed: 19 additions & 5 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,7 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
709709
struct amdgpu_ring *ring = af->ring;
710710
unsigned long flags;
711711
u32 seq, last_seq;
712+
bool reemitted = false;
712713

713714
last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
714715
seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
@@ -726,17 +727,26 @@ void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
726727
if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
727728
fence = container_of(unprocessed, struct amdgpu_fence, base);
728729

729-
if (fence == af)
730+
if (fence->reemitted > 1)
731+
reemitted = true;
732+
else if (fence == af)
730733
dma_fence_set_error(&fence->base, -ETIME);
731734
else if (fence->context == af->context)
732735
dma_fence_set_error(&fence->base, -ECANCELED);
733736
}
734737
rcu_read_unlock();
735738
} while (last_seq != seq);
736739
spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
737-
/* signal the guilty fence */
738-
amdgpu_fence_write(ring, (u32)af->base.seqno);
739-
amdgpu_fence_process(ring);
740+
741+
if (reemitted) {
742+
/* if we've already reemitted once then just cancel everything */
743+
amdgpu_fence_driver_force_completion(af->ring);
744+
af->ring->ring_backup_entries_to_copy = 0;
745+
} else {
746+
/* signal the guilty fence */
747+
amdgpu_fence_write(ring, (u32)af->base.seqno);
748+
amdgpu_fence_process(ring);
749+
}
740750
}
741751

742752
void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
@@ -784,10 +794,12 @@ void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
784794
/* save everything if the ring is not guilty, otherwise
785795
* just save the content from other contexts.
786796
*/
787-
if (!guilty_fence || (fence->context != guilty_fence->context))
797+
if (!fence->reemitted &&
798+
(!guilty_fence || (fence->context != guilty_fence->context)))
788799
amdgpu_ring_backup_unprocessed_command(ring, wptr,
789800
fence->wptr);
790801
wptr = fence->wptr;
802+
fence->reemitted++;
791803
}
792804
rcu_read_unlock();
793805
} while (last_seq != seq);

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ struct amdgpu_fence {
148148
u64 wptr;
149149
/* fence context for resets */
150150
u64 context;
151+
/* has this fence been reemitted */
152+
unsigned int reemitted;
151153
};
152154

153155
extern const struct drm_sched_backend_ops amdgpu_sched_ops;

0 commit comments

Comments
 (0)