2525#include <trace/events/lock.h>
2626
2727/*
28- * Include queued spinlock statistics code
28+ * Include queued spinlock definitions and statistics code
2929 */
30+ #include "qspinlock.h"
3031#include "qspinlock_stat.h"
3132
3233/*
6768 */
6869
6970#include "mcs_spinlock.h"
70- #define MAX_NODES 4
71-
72- /*
73- * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
74- * size and four of them will fit nicely in one 64-byte cacheline. For
75- * pvqspinlock, however, we need more space for extra data. To accommodate
76- * that, we insert two more long words to pad it up to 32 bytes. IOW, only
77- * two of them can fit in a cacheline in this case. That is OK as it is rare
78- * to have more than 2 levels of slowpath nesting in actual use. We don't
79- * want to penalize pvqspinlocks to optimize for a rare case in native
80- * qspinlocks.
81- */
82- struct qnode {
83- struct mcs_spinlock mcs ;
84- #ifdef CONFIG_PARAVIRT_SPINLOCKS
85- long reserved [2 ];
86- #endif
87- };
88-
89- /*
90- * The pending bit spinning loop count.
91- * This heuristic is used to limit the number of lockword accesses
92- * made by atomic_cond_read_relaxed when waiting for the lock to
93- * transition out of the "== _Q_PENDING_VAL" state. We don't spin
94- * indefinitely because there's no guarantee that we'll make forward
95- * progress.
96- */
97- #ifndef _Q_PENDING_LOOPS
98- #define _Q_PENDING_LOOPS 1
99- #endif
10071
10172/*
10273 * Per-CPU queue node structures; we can never have more than 4 nested
@@ -106,161 +77,7 @@ struct qnode {
10677 *
10778 * PV doubles the storage and uses the second cacheline for PV state.
10879 */
109- static DEFINE_PER_CPU_ALIGNED (struct qnode , qnodes [MAX_NODES ]) ;
110-
111- /*
112- * We must be able to distinguish between no-tail and the tail at 0:0,
113- * therefore increment the cpu number by one.
114- */
115-
116- static inline __pure u32 encode_tail (int cpu , int idx )
117- {
118- u32 tail ;
119-
120- tail = (cpu + 1 ) << _Q_TAIL_CPU_OFFSET ;
121- tail |= idx << _Q_TAIL_IDX_OFFSET ; /* assume < 4 */
122-
123- return tail ;
124- }
125-
126- static inline __pure struct mcs_spinlock * decode_tail (u32 tail )
127- {
128- int cpu = (tail >> _Q_TAIL_CPU_OFFSET ) - 1 ;
129- int idx = (tail & _Q_TAIL_IDX_MASK ) >> _Q_TAIL_IDX_OFFSET ;
130-
131- return per_cpu_ptr (& qnodes [idx ].mcs , cpu );
132- }
133-
134- static inline __pure
135- struct mcs_spinlock * grab_mcs_node (struct mcs_spinlock * base , int idx )
136- {
137- return & ((struct qnode * )base + idx )-> mcs ;
138- }
139-
140- #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
141-
142- #if _Q_PENDING_BITS == 8
143- /**
144- * clear_pending - clear the pending bit.
145- * @lock: Pointer to queued spinlock structure
146- *
147- * *,1,* -> *,0,*
148- */
149- static __always_inline void clear_pending (struct qspinlock * lock )
150- {
151- WRITE_ONCE (lock -> pending , 0 );
152- }
153-
154- /**
155- * clear_pending_set_locked - take ownership and clear the pending bit.
156- * @lock: Pointer to queued spinlock structure
157- *
158- * *,1,0 -> *,0,1
159- *
160- * Lock stealing is not allowed if this function is used.
161- */
162- static __always_inline void clear_pending_set_locked (struct qspinlock * lock )
163- {
164- WRITE_ONCE (lock -> locked_pending , _Q_LOCKED_VAL );
165- }
166-
167- /*
168- * xchg_tail - Put in the new queue tail code word & retrieve previous one
169- * @lock : Pointer to queued spinlock structure
170- * @tail : The new queue tail code word
171- * Return: The previous queue tail code word
172- *
173- * xchg(lock, tail), which heads an address dependency
174- *
175- * p,*,* -> n,*,* ; prev = xchg(lock, node)
176- */
177- static __always_inline u32 xchg_tail (struct qspinlock * lock , u32 tail )
178- {
179- /*
180- * We can use relaxed semantics since the caller ensures that the
181- * MCS node is properly initialized before updating the tail.
182- */
183- return (u32 )xchg_relaxed (& lock -> tail ,
184- tail >> _Q_TAIL_OFFSET ) << _Q_TAIL_OFFSET ;
185- }
186-
187- #else /* _Q_PENDING_BITS == 8 */
188-
189- /**
190- * clear_pending - clear the pending bit.
191- * @lock: Pointer to queued spinlock structure
192- *
193- * *,1,* -> *,0,*
194- */
195- static __always_inline void clear_pending (struct qspinlock * lock )
196- {
197- atomic_andnot (_Q_PENDING_VAL , & lock -> val );
198- }
199-
200- /**
201- * clear_pending_set_locked - take ownership and clear the pending bit.
202- * @lock: Pointer to queued spinlock structure
203- *
204- * *,1,0 -> *,0,1
205- */
206- static __always_inline void clear_pending_set_locked (struct qspinlock * lock )
207- {
208- atomic_add (- _Q_PENDING_VAL + _Q_LOCKED_VAL , & lock -> val );
209- }
210-
211- /**
212- * xchg_tail - Put in the new queue tail code word & retrieve previous one
213- * @lock : Pointer to queued spinlock structure
214- * @tail : The new queue tail code word
215- * Return: The previous queue tail code word
216- *
217- * xchg(lock, tail)
218- *
219- * p,*,* -> n,*,* ; prev = xchg(lock, node)
220- */
221- static __always_inline u32 xchg_tail (struct qspinlock * lock , u32 tail )
222- {
223- u32 old , new ;
224-
225- old = atomic_read (& lock -> val );
226- do {
227- new = (old & _Q_LOCKED_PENDING_MASK ) | tail ;
228- /*
229- * We can use relaxed semantics since the caller ensures that
230- * the MCS node is properly initialized before updating the
231- * tail.
232- */
233- } while (!atomic_try_cmpxchg_relaxed (& lock -> val , & old , new ));
234-
235- return old ;
236- }
237- #endif /* _Q_PENDING_BITS == 8 */
238-
239- /**
240- * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
241- * @lock : Pointer to queued spinlock structure
242- * Return: The previous lock value
243- *
244- * *,*,* -> *,1,*
245- */
246- #ifndef queued_fetch_set_pending_acquire
247- static __always_inline u32 queued_fetch_set_pending_acquire (struct qspinlock * lock )
248- {
249- return atomic_fetch_or_acquire (_Q_PENDING_VAL , & lock -> val );
250- }
251- #endif
252-
253- /**
254- * set_locked - Set the lock bit and own the lock
255- * @lock: Pointer to queued spinlock structure
256- *
257- * *,*,0 -> *,0,1
258- */
259- static __always_inline void set_locked (struct qspinlock * lock )
260- {
261- WRITE_ONCE (lock -> locked , _Q_LOCKED_VAL );
262- }
263-
80+ static DEFINE_PER_CPU_ALIGNED (struct qnode , qnodes [_Q_MAX_NODES ]) ;
26481
26582/*
26683 * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
@@ -410,7 +227,7 @@ void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
410227 * any MCS node. This is not the most elegant solution, but is
411228 * simple enough.
412229 */
413- if (unlikely (idx >= MAX_NODES )) {
230+ if (unlikely (idx >= _Q_MAX_NODES )) {
414231 lockevent_inc (lock_no_node );
415232 while (!queued_spin_trylock (lock ))
416233 cpu_relax ();
@@ -465,7 +282,7 @@ void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
465282 * head of the waitqueue.
466283 */
467284 if (old & _Q_TAIL_MASK ) {
468- prev = decode_tail (old );
285+ prev = decode_tail (old , qnodes );
469286
470287 /* Link @node into the waitqueue. */
471288 WRITE_ONCE (prev -> next , node );
0 commit comments