Skip to content

Commit abc158c

Browse files
author
Peter Zijlstra
committed
sched: Prepare generic code for delayed dequeue
While most of the delayed dequeue code can be done inside the sched_class itself, there is one location where we do not have an appropriate hook, namely ttwu_runnable(). Add an ENQUEUE_DELAYED call to the on_rq path to deal with waking delayed dequeue tasks. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Valentin Schneider <vschneid@redhat.com> Tested-by: Valentin Schneider <vschneid@redhat.com> Link: https://lkml.kernel.org/r/20240727105029.200000445@infradead.org
1 parent e890106 commit abc158c

3 files changed

Lines changed: 16 additions & 1 deletion

File tree

include/linux/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,7 @@ struct sched_entity {
544544

545545
struct list_head group_node;
546546
unsigned int on_rq;
547+
unsigned int sched_delayed;
547548

548549
u64 exec_start;
549550
u64 sum_exec_runtime;

kernel/sched/core.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2036,6 +2036,8 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
20362036

20372037
void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
20382038
{
2039+
SCHED_WARN_ON(flags & DEQUEUE_SLEEP);
2040+
20392041
WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
20402042
ASSERT_EXCLUSIVE_WRITER(p->on_rq);
20412043

@@ -3689,12 +3691,14 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
36893691

36903692
rq = __task_rq_lock(p, &rf);
36913693
if (task_on_rq_queued(p)) {
3694+
update_rq_clock(rq);
3695+
if (p->se.sched_delayed)
3696+
enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED);
36923697
if (!task_on_cpu(rq, p)) {
36933698
/*
36943699
* When on_rq && !on_cpu the task is preempted, see if
36953700
* it should preempt the task that is current now.
36963701
*/
3697-
update_rq_clock(rq);
36983702
wakeup_preempt(rq, p, wake_flags);
36993703
}
37003704
ttwu_do_wakeup(p);
@@ -4074,11 +4078,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
40744078
* case the whole 'p->on_rq && ttwu_runnable()' case below
40754079
* without taking any locks.
40764080
*
4081+
* Specifically, given current runs ttwu() we must be before
4082+
* schedule()'s block_task(), as such this must not observe
4083+
* sched_delayed.
4084+
*
40774085
* In particular:
40784086
* - we rely on Program-Order guarantees for all the ordering,
40794087
* - we're serialized against set_special_state() by virtue of
40804088
* it disabling IRQs (this allows not taking ->pi_lock).
40814089
*/
4090+
SCHED_WARN_ON(p->se.sched_delayed);
40824091
if (!ttwu_state_match(p, state, &success))
40834092
goto out;
40844093

@@ -4370,6 +4379,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
43704379
p->se.slice = sysctl_sched_base_slice;
43714380
INIT_LIST_HEAD(&p->se.group_node);
43724381

4382+
/* A delayed task cannot be in clone(). */
4383+
SCHED_WARN_ON(p->se.sched_delayed);
4384+
43734385
#ifdef CONFIG_FAIR_GROUP_SCHED
43744386
p->se.cfs_rq = NULL;
43754387
#endif

kernel/sched/sched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2253,6 +2253,7 @@ extern const u32 sched_prio_to_wmult[40];
22532253
#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
22542254
#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
22552255
#define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */
2256+
#define DEQUEUE_DELAYED 0x200 /* Matches ENQUEUE_DELAYED */
22562257

22572258
#define ENQUEUE_WAKEUP 0x01
22582259
#define ENQUEUE_RESTORE 0x02
@@ -2268,6 +2269,7 @@ extern const u32 sched_prio_to_wmult[40];
22682269
#endif
22692270
#define ENQUEUE_INITIAL 0x80
22702271
#define ENQUEUE_MIGRATING 0x100
2272+
#define ENQUEUE_DELAYED 0x200
22712273

22722274
#define RETRY_TASK ((void *)-1UL)
22732275

0 commit comments

Comments
 (0)