Skip to content

Commit 2e83b87

Browse files
committed
srcu: Create an srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe()
On strict load-store architectures, the use of this_cpu_inc() by srcu_read_lock() and srcu_read_unlock() is not NMI-safe in TREE SRCU. To see this suppose that an NMI arrives in the middle of srcu_read_lock(), just after it has read ->srcu_lock_count, but before it has written the incremented value back to memory. If that NMI handler also does srcu_read_lock() and srcu_read_lock() on that same srcu_struct structure, then upon return from that NMI handler, the interrupted srcu_read_lock() will overwrite the NMI handler's update to ->srcu_lock_count, but leave unchanged the NMI handler's update by srcu_read_unlock() to ->srcu_unlock_count. This can result in a too-short SRCU grace period, which can in turn result in arbitrary memory corruption. If the NMI handler instead interrupts the srcu_read_unlock(), this can result in eternal SRCU grace periods, which is not much better. This commit therefore creates a pair of new srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() functions, which allow SRCU readers in both NMI handlers and in process and IRQ context. It is bad practice to mix the existing and the new _nmisafe() primitives on the same srcu_struct structure. Use one set or the other, not both. Just to underline that "bad practice" point, using srcu_read_lock() at process level and srcu_read_lock_nmisafe() in your NMI handler will not, repeat NOT, work. If you do not immediately understand why this is the case, please review the earlier paragraphs in this commit log. [ paulmck: Apply kernel test robot feedback. ] [ paulmck: Apply feedback from Randy Dunlap. ] [ paulmck: Apply feedback from John Ogness. ] [ paulmck: Apply feedback from Frederic Weisbecker. ] Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/ Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: John Ogness <john.ogness@linutronix.de> Cc: Petr Mladek <pmladek@suse.com>
1 parent 5d0f595 commit 2e83b87

5 files changed

Lines changed: 105 additions & 6 deletions

File tree

arch/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
468468
config ARCH_HAVE_NMI_SAFE_CMPXCHG
469469
bool
470470

471+
config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
472+
bool
473+
471474
config HAVE_ALIGNED_STRUCT_PAGE
472475
bool
473476
help

include/linux/srcu.h

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,20 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
6464
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
6565
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
6666

67+
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
68+
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
69+
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
70+
#else
71+
static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
72+
{
73+
return __srcu_read_lock(ssp);
74+
}
75+
static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
76+
{
77+
__srcu_read_unlock(ssp, idx);
78+
}
79+
#endif /* CONFIG_NEED_SRCU_NMI_SAFE */
80+
6781
#ifdef CONFIG_SRCU
6882
void srcu_init(void);
6983
#else /* #ifdef CONFIG_SRCU */
@@ -166,6 +180,25 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
166180
return retval;
167181
}
168182

183+
/**
184+
* srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
185+
* @ssp: srcu_struct in which to register the new reader.
186+
*
187+
* Enter an SRCU read-side critical section, but in an NMI-safe manner.
188+
* See srcu_read_lock() for more information.
189+
*/
190+
static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
191+
{
192+
int retval;
193+
194+
if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
195+
retval = __srcu_read_lock_nmisafe(ssp);
196+
else
197+
retval = __srcu_read_lock(ssp);
198+
rcu_lock_acquire(&(ssp)->dep_map);
199+
return retval;
200+
}
201+
169202
/* Used by tracing, cannot be traced and cannot invoke lockdep. */
170203
static inline notrace int
171204
srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
@@ -191,6 +224,24 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
191224
__srcu_read_unlock(ssp, idx);
192225
}
193226

227+
/**
228+
* srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
229+
* @ssp: srcu_struct in which to unregister the old reader.
230+
* @idx: return value from corresponding srcu_read_lock().
231+
*
232+
* Exit an SRCU read-side critical section, but in an NMI-safe manner.
233+
*/
234+
static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
235+
__releases(ssp)
236+
{
237+
WARN_ON_ONCE(idx & ~0x1);
238+
rcu_lock_release(&(ssp)->dep_map);
239+
if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
240+
__srcu_read_unlock_nmisafe(ssp, idx);
241+
else
242+
__srcu_read_unlock(ssp, idx);
243+
}
244+
194245
/* Used by tracing, cannot be traced and cannot call lockdep. */
195246
static inline notrace void
196247
srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)

kernel/rcu/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ config TREE_SRCU
7272
help
7373
This option selects the full-fledged version of SRCU.
7474

75+
config NEED_SRCU_NMI_SAFE
76+
def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU
77+
7578
config TASKS_RCU_GENERIC
7679
def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
7780
select SRCU

kernel/rcu/rcutorture.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -615,10 +615,14 @@ static struct rcu_torture_ops rcu_busted_ops = {
615615
DEFINE_STATIC_SRCU(srcu_ctl);
616616
static struct srcu_struct srcu_ctld;
617617
static struct srcu_struct *srcu_ctlp = &srcu_ctl;
618+
static struct rcu_torture_ops srcud_ops;
618619

619620
static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
620621
{
621-
return srcu_read_lock(srcu_ctlp);
622+
if (cur_ops == &srcud_ops)
623+
return srcu_read_lock_nmisafe(srcu_ctlp);
624+
else
625+
return srcu_read_lock(srcu_ctlp);
622626
}
623627

624628
static void
@@ -642,7 +646,10 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
642646

643647
static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
644648
{
645-
srcu_read_unlock(srcu_ctlp, idx);
649+
if (cur_ops == &srcud_ops)
650+
srcu_read_unlock_nmisafe(srcu_ctlp, idx);
651+
else
652+
srcu_read_unlock(srcu_ctlp, idx);
646653
}
647654

648655
static int torture_srcu_read_lock_held(void)

kernel/rcu/srcutree.c

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,41 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
654654
}
655655
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
656656

657+
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
658+
659+
/*
660+
* Counts the new reader in the appropriate per-CPU element of the
661+
* srcu_struct, but in an NMI-safe manner using RMW atomics.
662+
* Returns an index that must be passed to the matching srcu_read_unlock().
663+
*/
664+
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
665+
{
666+
int idx;
667+
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
668+
669+
idx = READ_ONCE(ssp->srcu_idx) & 0x1;
670+
atomic_long_inc(&sdp->srcu_lock_count[idx]);
671+
smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */
672+
return idx;
673+
}
674+
EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
675+
676+
/*
677+
* Removes the count for the old reader from the appropriate per-CPU
678+
* element of the srcu_struct. Note that this may well be a different
679+
* CPU than that which was incremented by the corresponding srcu_read_lock().
680+
*/
681+
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
682+
{
683+
struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
684+
685+
smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */
686+
atomic_long_inc(&sdp->srcu_unlock_count[idx]);
687+
}
688+
EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
689+
690+
#endif // CONFIG_NEED_SRCU_NMI_SAFE
691+
657692
/*
658693
* Start an SRCU grace period.
659694
*/
@@ -1090,7 +1125,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
10901125
int ss_state;
10911126

10921127
check_init_srcu_struct(ssp);
1093-
idx = srcu_read_lock(ssp);
1128+
idx = __srcu_read_lock_nmisafe(ssp);
10941129
ss_state = smp_load_acquire(&ssp->srcu_size_state);
10951130
if (ss_state < SRCU_SIZE_WAIT_CALL)
10961131
sdp = per_cpu_ptr(ssp->sda, 0);
@@ -1123,7 +1158,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
11231158
srcu_funnel_gp_start(ssp, sdp, s, do_norm);
11241159
else if (needexp)
11251160
srcu_funnel_exp_start(ssp, sdp_mynode, s);
1126-
srcu_read_unlock(ssp, idx);
1161+
__srcu_read_unlock_nmisafe(ssp, idx);
11271162
return s;
11281163
}
11291164

@@ -1427,13 +1462,13 @@ void srcu_barrier(struct srcu_struct *ssp)
14271462
/* Initial count prevents reaching zero until all CBs are posted. */
14281463
atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
14291464

1430-
idx = srcu_read_lock(ssp);
1465+
idx = __srcu_read_lock_nmisafe(ssp);
14311466
if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
14321467
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
14331468
else
14341469
for_each_possible_cpu(cpu)
14351470
srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
1436-
srcu_read_unlock(ssp, idx);
1471+
__srcu_read_unlock_nmisafe(ssp, idx);
14371472

14381473
/* Remove the initial count, at which point reaching zero can happen. */
14391474
if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))

0 commit comments

Comments
 (0)