Merge tag 'kbuild-fixes-v6.7' of git://git.kernel.org/pub/scm/linux/kernel/git/masahi...
[linux-2.6-microblaze.git] / fs / bcachefs / six.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/sched/task.h>
12 #include <linux/slab.h>
13
14 #include <trace/events/lock.h>
15
16 #include "six.h"
17
18 #ifdef DEBUG
19 #define EBUG_ON(cond)                   BUG_ON(cond)
20 #else
21 #define EBUG_ON(cond)                   do {} while (0)
22 #endif
23
24 #define six_acquire(l, t, r, ip)        lock_acquire(l, 0, t, r, 1, NULL, ip)
25 #define six_release(l, ip)              lock_release(l, ip)
26
27 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
28
29 #define SIX_LOCK_HELD_read_OFFSET       0
30 #define SIX_LOCK_HELD_read              ~(~0U << 26)
31 #define SIX_LOCK_HELD_intent            (1U << 26)
32 #define SIX_LOCK_HELD_write             (1U << 27)
33 #define SIX_LOCK_WAITING_read           (1U << (28 + SIX_LOCK_read))
34 #define SIX_LOCK_WAITING_write          (1U << (28 + SIX_LOCK_write))
35 #define SIX_LOCK_NOSPIN                 (1U << 31)
36
37 struct six_lock_vals {
38         /* Value we add to the lock in order to take the lock: */
39         u32                     lock_val;
40
41         /* If the lock has this value (used as a mask), taking the lock fails: */
42         u32                     lock_fail;
43
44         /* Mask that indicates lock is held for this type: */
45         u32                     held_mask;
46
47         /* Waitlist we wakeup when releasing the lock: */
48         enum six_lock_type      unlock_wakeup;
49 };
50
51 static const struct six_lock_vals l[] = {
52         [SIX_LOCK_read] = {
53                 .lock_val       = 1U << SIX_LOCK_HELD_read_OFFSET,
54                 .lock_fail      = SIX_LOCK_HELD_write,
55                 .held_mask      = SIX_LOCK_HELD_read,
56                 .unlock_wakeup  = SIX_LOCK_write,
57         },
58         [SIX_LOCK_intent] = {
59                 .lock_val       = SIX_LOCK_HELD_intent,
60                 .lock_fail      = SIX_LOCK_HELD_intent,
61                 .held_mask      = SIX_LOCK_HELD_intent,
62                 .unlock_wakeup  = SIX_LOCK_intent,
63         },
64         [SIX_LOCK_write] = {
65                 .lock_val       = SIX_LOCK_HELD_write,
66                 .lock_fail      = SIX_LOCK_HELD_read,
67                 .held_mask      = SIX_LOCK_HELD_write,
68                 .unlock_wakeup  = SIX_LOCK_read,
69         },
70 };
71
72 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
73 {
74         if ((atomic_read(&lock->state) & mask) != mask)
75                 atomic_or(mask, &lock->state);
76 }
77
78 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
79 {
80         if (atomic_read(&lock->state) & mask)
81                 atomic_and(~mask, &lock->state);
82 }
83
84 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
85                                  u32 old, struct task_struct *owner)
86 {
87         if (type != SIX_LOCK_intent)
88                 return;
89
90         if (!(old & SIX_LOCK_HELD_intent)) {
91                 EBUG_ON(lock->owner);
92                 lock->owner = owner;
93         } else {
94                 EBUG_ON(lock->owner != current);
95         }
96 }
97
98 static inline unsigned pcpu_read_count(struct six_lock *lock)
99 {
100         unsigned read_count = 0;
101         int cpu;
102
103         for_each_possible_cpu(cpu)
104                 read_count += *per_cpu_ptr(lock->readers, cpu);
105         return read_count;
106 }
107
108 /*
109  * __do_six_trylock() - main trylock routine
110  *
111  * Returns 1 on success, 0 on failure
112  *
113  * In percpu reader mode, a failed trylock may cause a spurious trylock failure
114  * for anoter thread taking the competing lock type, and we may havve to do a
115  * wakeup: when a wakeup is required, we return -1 - wakeup_type.
116  */
117 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
118                             struct task_struct *task, bool try)
119 {
120         int ret;
121         u32 old;
122
123         EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
124         EBUG_ON(type == SIX_LOCK_write &&
125                 (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
126
127         /*
128          * Percpu reader mode:
129          *
130          * The basic idea behind this algorithm is that you can implement a lock
131          * between two threads without any atomics, just memory barriers:
132          *
133          * For two threads you'll need two variables, one variable for "thread a
134          * has the lock" and another for "thread b has the lock".
135          *
136          * To take the lock, a thread sets its variable indicating that it holds
137          * the lock, then issues a full memory barrier, then reads from the
138          * other thread's variable to check if the other thread thinks it has
139          * the lock. If we raced, we backoff and retry/sleep.
140          *
141          * Failure to take the lock may cause a spurious trylock failure in
142          * another thread, because we temporarily set the lock to indicate that
143          * we held it. This would be a problem for a thread in six_lock(), when
144          * they are calling trylock after adding themself to the waitlist and
145          * prior to sleeping.
146          *
147          * Therefore, if we fail to get the lock, and there were waiters of the
148          * type we conflict with, we will have to issue a wakeup.
149          *
150          * Since we may be called under wait_lock (and by the wakeup code
151          * itself), we return that the wakeup has to be done instead of doing it
152          * here.
153          */
154         if (type == SIX_LOCK_read && lock->readers) {
155                 preempt_disable();
156                 this_cpu_inc(*lock->readers); /* signal that we own lock */
157
158                 smp_mb();
159
160                 old = atomic_read(&lock->state);
161                 ret = !(old & l[type].lock_fail);
162
163                 this_cpu_sub(*lock->readers, !ret);
164                 preempt_enable();
165
166                 if (!ret) {
167                         smp_mb();
168                         if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
169                                 ret = -1 - SIX_LOCK_write;
170                 }
171         } else if (type == SIX_LOCK_write && lock->readers) {
172                 if (try) {
173                         atomic_add(SIX_LOCK_HELD_write, &lock->state);
174                         smp_mb__after_atomic();
175                 }
176
177                 ret = !pcpu_read_count(lock);
178
179                 if (try && !ret) {
180                         old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
181                         if (old & SIX_LOCK_WAITING_read)
182                                 ret = -1 - SIX_LOCK_read;
183                 }
184         } else {
185                 old = atomic_read(&lock->state);
186                 do {
187                         ret = !(old & l[type].lock_fail);
188                         if (!ret || (type == SIX_LOCK_write && !try)) {
189                                 smp_mb();
190                                 break;
191                         }
192                 } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
193
194                 EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
195         }
196
197         if (ret > 0)
198                 six_set_owner(lock, type, old, task);
199
200         EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
201                 (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
202
203         return ret;
204 }
205
206 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
207 {
208         struct six_lock_waiter *w, *next;
209         struct task_struct *task;
210         bool saw_one;
211         int ret;
212 again:
213         ret = 0;
214         saw_one = false;
215         raw_spin_lock(&lock->wait_lock);
216
217         list_for_each_entry_safe(w, next, &lock->wait_list, list) {
218                 if (w->lock_want != lock_type)
219                         continue;
220
221                 if (saw_one && lock_type != SIX_LOCK_read)
222                         goto unlock;
223                 saw_one = true;
224
225                 ret = __do_six_trylock(lock, lock_type, w->task, false);
226                 if (ret <= 0)
227                         goto unlock;
228
229                 /*
230                  * Similar to percpu_rwsem_wake_function(), we need to guard
231                  * against the wakee noticing w->lock_acquired, returning, and
232                  * then exiting before we do the wakeup:
233                  */
234                 task = get_task_struct(w->task);
235                 __list_del(w->list.prev, w->list.next);
236                 /*
237                  * The release barrier here ensures the ordering of the
238                  * __list_del before setting w->lock_acquired; @w is on the
239                  * stack of the thread doing the waiting and will be reused
240                  * after it sees w->lock_acquired with no other locking:
241                  * pairs with smp_load_acquire() in six_lock_slowpath()
242                  */
243                 smp_store_release(&w->lock_acquired, true);
244                 wake_up_process(task);
245                 put_task_struct(task);
246         }
247
248         six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
249 unlock:
250         raw_spin_unlock(&lock->wait_lock);
251
252         if (ret < 0) {
253                 lock_type = -ret - 1;
254                 goto again;
255         }
256 }
257
258 __always_inline
259 static void six_lock_wakeup(struct six_lock *lock, u32 state,
260                             enum six_lock_type lock_type)
261 {
262         if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
263                 return;
264
265         if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
266                 return;
267
268         __six_lock_wakeup(lock, lock_type);
269 }
270
271 __always_inline
272 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
273 {
274         int ret;
275
276         ret = __do_six_trylock(lock, type, current, try);
277         if (ret < 0)
278                 __six_lock_wakeup(lock, -ret - 1);
279
280         return ret > 0;
281 }
282
283 /**
284  * six_trylock_ip - attempt to take a six lock without blocking
285  * @lock:       lock to take
286  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
287  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
288  *
289  * Return: true on success, false on failure.
290  */
291 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
292 {
293         if (!do_six_trylock(lock, type, true))
294                 return false;
295
296         if (type != SIX_LOCK_write)
297                 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
298         return true;
299 }
300 EXPORT_SYMBOL_GPL(six_trylock_ip);
301
302 /**
303  * six_relock_ip - attempt to re-take a lock that was held previously
304  * @lock:       lock to take
305  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
306  * @seq:        lock sequence number obtained from six_lock_seq() while lock was
307  *              held previously
308  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
309  *
310  * Return: true on success, false on failure.
311  */
312 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
313                    unsigned seq, unsigned long ip)
314 {
315         if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
316                 return false;
317
318         if (six_lock_seq(lock) != seq) {
319                 six_unlock_ip(lock, type, ip);
320                 return false;
321         }
322
323         return true;
324 }
325 EXPORT_SYMBOL_GPL(six_relock_ip);
326
327 #ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
328
329 static inline bool six_can_spin_on_owner(struct six_lock *lock)
330 {
331         struct task_struct *owner;
332         bool ret;
333
334         if (need_resched())
335                 return false;
336
337         rcu_read_lock();
338         owner = READ_ONCE(lock->owner);
339         ret = !owner || owner_on_cpu(owner);
340         rcu_read_unlock();
341
342         return ret;
343 }
344
345 static inline bool six_spin_on_owner(struct six_lock *lock,
346                                      struct task_struct *owner,
347                                      u64 end_time)
348 {
349         bool ret = true;
350         unsigned loop = 0;
351
352         rcu_read_lock();
353         while (lock->owner == owner) {
354                 /*
355                  * Ensure we emit the owner->on_cpu, dereference _after_
356                  * checking lock->owner still matches owner. If that fails,
357                  * owner might point to freed memory. If it still matches,
358                  * the rcu_read_lock() ensures the memory stays valid.
359                  */
360                 barrier();
361
362                 if (!owner_on_cpu(owner) || need_resched()) {
363                         ret = false;
364                         break;
365                 }
366
367                 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
368                         six_set_bitmask(lock, SIX_LOCK_NOSPIN);
369                         ret = false;
370                         break;
371                 }
372
373                 cpu_relax();
374         }
375         rcu_read_unlock();
376
377         return ret;
378 }
379
380 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
381 {
382         struct task_struct *task = current;
383         u64 end_time;
384
385         if (type == SIX_LOCK_write)
386                 return false;
387
388         preempt_disable();
389         if (!six_can_spin_on_owner(lock))
390                 goto fail;
391
392         if (!osq_lock(&lock->osq))
393                 goto fail;
394
395         end_time = sched_clock() + 10 * NSEC_PER_USEC;
396
397         while (1) {
398                 struct task_struct *owner;
399
400                 /*
401                  * If there's an owner, wait for it to either
402                  * release the lock or go to sleep.
403                  */
404                 owner = READ_ONCE(lock->owner);
405                 if (owner && !six_spin_on_owner(lock, owner, end_time))
406                         break;
407
408                 if (do_six_trylock(lock, type, false)) {
409                         osq_unlock(&lock->osq);
410                         preempt_enable();
411                         return true;
412                 }
413
414                 /*
415                  * When there's no owner, we might have preempted between the
416                  * owner acquiring the lock and setting the owner field. If
417                  * we're an RT task that will live-lock because we won't let
418                  * the owner complete.
419                  */
420                 if (!owner && (need_resched() || rt_task(task)))
421                         break;
422
423                 /*
424                  * The cpu_relax() call is a compiler barrier which forces
425                  * everything in this loop to be re-loaded. We don't need
426                  * memory barriers as we'll eventually observe the right
427                  * values at the cost of a few extra spins.
428                  */
429                 cpu_relax();
430         }
431
432         osq_unlock(&lock->osq);
433 fail:
434         preempt_enable();
435
436         /*
437          * If we fell out of the spin path because of need_resched(),
438          * reschedule now, before we try-lock again. This avoids getting
439          * scheduled out right after we obtained the lock.
440          */
441         if (need_resched())
442                 schedule();
443
444         return false;
445 }
446
447 #else /* CONFIG_SIX_LOCK_SPIN_ON_OWNER */
448
449 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
450 {
451         return false;
452 }
453
454 #endif
455
456 noinline
457 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
458                              struct six_lock_waiter *wait,
459                              six_lock_should_sleep_fn should_sleep_fn, void *p,
460                              unsigned long ip)
461 {
462         int ret = 0;
463
464         if (type == SIX_LOCK_write) {
465                 EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
466                 atomic_add(SIX_LOCK_HELD_write, &lock->state);
467                 smp_mb__after_atomic();
468         }
469
470         trace_contention_begin(lock, 0);
471         lock_contended(&lock->dep_map, ip);
472
473         if (six_optimistic_spin(lock, type))
474                 goto out;
475
476         wait->task              = current;
477         wait->lock_want         = type;
478         wait->lock_acquired     = false;
479
480         raw_spin_lock(&lock->wait_lock);
481         six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
482         /*
483          * Retry taking the lock after taking waitlist lock, in case we raced
484          * with an unlock:
485          */
486         ret = __do_six_trylock(lock, type, current, false);
487         if (ret <= 0) {
488                 wait->start_time = local_clock();
489
490                 if (!list_empty(&lock->wait_list)) {
491                         struct six_lock_waiter *last =
492                                 list_last_entry(&lock->wait_list,
493                                         struct six_lock_waiter, list);
494
495                         if (time_before_eq64(wait->start_time, last->start_time))
496                                 wait->start_time = last->start_time + 1;
497                 }
498
499                 list_add_tail(&wait->list, &lock->wait_list);
500         }
501         raw_spin_unlock(&lock->wait_lock);
502
503         if (unlikely(ret > 0)) {
504                 ret = 0;
505                 goto out;
506         }
507
508         if (unlikely(ret < 0)) {
509                 __six_lock_wakeup(lock, -ret - 1);
510                 ret = 0;
511         }
512
513         while (1) {
514                 set_current_state(TASK_UNINTERRUPTIBLE);
515
516                 /*
517                  * Ensures that writes to the waitlist entry happen after we see
518                  * wait->lock_acquired: pairs with the smp_store_release in
519                  * __six_lock_wakeup
520                  */
521                 if (smp_load_acquire(&wait->lock_acquired))
522                         break;
523
524                 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
525                 if (unlikely(ret)) {
526                         bool acquired;
527
528                         /*
529                          * If should_sleep_fn() returns an error, we are
530                          * required to return that error even if we already
531                          * acquired the lock - should_sleep_fn() might have
532                          * modified external state (e.g. when the deadlock cycle
533                          * detector in bcachefs issued a transaction restart)
534                          */
535                         raw_spin_lock(&lock->wait_lock);
536                         acquired = wait->lock_acquired;
537                         if (!acquired)
538                                 list_del(&wait->list);
539                         raw_spin_unlock(&lock->wait_lock);
540
541                         if (unlikely(acquired))
542                                 do_six_unlock_type(lock, type);
543                         break;
544                 }
545
546                 schedule();
547         }
548
549         __set_current_state(TASK_RUNNING);
550 out:
551         if (ret && type == SIX_LOCK_write) {
552                 six_clear_bitmask(lock, SIX_LOCK_HELD_write);
553                 six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
554         }
555         trace_contention_end(lock, 0);
556
557         return ret;
558 }
559
560 /**
561  * six_lock_ip_waiter - take a lock, with full waitlist interface
562  * @lock:       lock to take
563  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
564  * @wait:       pointer to wait object, which will be added to lock's waitlist
565  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
566  *              to scheduling
567  * @p:          passed through to @should_sleep_fn
568  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
569  *
570  * This is the most general six_lock() variant, with parameters to support full
571  * cycle detection for deadlock avoidance.
572  *
573  * The code calling this function must implement tracking of held locks, and the
574  * @wait object should be embedded into the struct that tracks held locks -
575  * which must also be accessible in a thread-safe way.
576  *
577  * @should_sleep_fn should invoke the cycle detector; it should walk each
578  * lock's waiters, and for each waiter recursively walk their held locks.
579  *
580  * When this function must block, @wait will be added to @lock's waitlist before
581  * calling trylock, and before calling @should_sleep_fn, and @wait will not be
582  * removed from the lock waitlist until the lock has been successfully acquired,
583  * or we abort.
584  *
585  * @wait.start_time will be monotonically increasing for any given waitlist, and
586  * thus may be used as a loop cursor.
587  *
588  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
589  */
590 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
591                        struct six_lock_waiter *wait,
592                        six_lock_should_sleep_fn should_sleep_fn, void *p,
593                        unsigned long ip)
594 {
595         int ret;
596
597         wait->start_time = 0;
598
599         if (type != SIX_LOCK_write)
600                 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
601
602         ret = do_six_trylock(lock, type, true) ? 0
603                 : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
604
605         if (ret && type != SIX_LOCK_write)
606                 six_release(&lock->dep_map, ip);
607         if (!ret)
608                 lock_acquired(&lock->dep_map, ip);
609
610         return ret;
611 }
612 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
613
614 __always_inline
615 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
616 {
617         u32 state;
618
619         if (type == SIX_LOCK_intent)
620                 lock->owner = NULL;
621
622         if (type == SIX_LOCK_read &&
623             lock->readers) {
624                 smp_mb(); /* unlock barrier */
625                 this_cpu_dec(*lock->readers);
626                 smp_mb(); /* between unlocking and checking for waiters */
627                 state = atomic_read(&lock->state);
628         } else {
629                 u32 v = l[type].lock_val;
630
631                 if (type != SIX_LOCK_read)
632                         v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
633
634                 EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
635                 state = atomic_sub_return_release(v, &lock->state);
636         }
637
638         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
639 }
640
641 /**
642  * six_unlock_ip - drop a six lock
643  * @lock:       lock to unlock
644  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
645  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
646  *
647  * When a lock is held multiple times (because six_lock_incement()) was used),
648  * this decrements the 'lock held' counter by one.
649  *
650  * For example:
651  * six_lock_read(&foo->lock);                           read count 1
652  * six_lock_increment(&foo->lock, SIX_LOCK_read);       read count 2
653  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 1
654  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 0
655  */
656 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
657 {
658         EBUG_ON(type == SIX_LOCK_write &&
659                 !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
660         EBUG_ON((type == SIX_LOCK_write ||
661                  type == SIX_LOCK_intent) &&
662                 lock->owner != current);
663
664         if (type != SIX_LOCK_write)
665                 six_release(&lock->dep_map, ip);
666         else
667                 lock->seq++;
668
669         if (type == SIX_LOCK_intent &&
670             lock->intent_lock_recurse) {
671                 --lock->intent_lock_recurse;
672                 return;
673         }
674
675         do_six_unlock_type(lock, type);
676 }
677 EXPORT_SYMBOL_GPL(six_unlock_ip);
678
679 /**
680  * six_lock_downgrade - convert an intent lock to a read lock
681  * @lock:       lock to dowgrade
682  *
683  * @lock will have read count incremented and intent count decremented
684  */
685 void six_lock_downgrade(struct six_lock *lock)
686 {
687         six_lock_increment(lock, SIX_LOCK_read);
688         six_unlock_intent(lock);
689 }
690 EXPORT_SYMBOL_GPL(six_lock_downgrade);
691
692 /**
693  * six_lock_tryupgrade - attempt to convert read lock to an intent lock
694  * @lock:       lock to upgrade
695  *
696  * On success, @lock will have intent count incremented and read count
697  * decremented
698  *
699  * Return: true on success, false on failure
700  */
701 bool six_lock_tryupgrade(struct six_lock *lock)
702 {
703         u32 old = atomic_read(&lock->state), new;
704
705         do {
706                 new = old;
707
708                 if (new & SIX_LOCK_HELD_intent)
709                         return false;
710
711                 if (!lock->readers) {
712                         EBUG_ON(!(new & SIX_LOCK_HELD_read));
713                         new -= l[SIX_LOCK_read].lock_val;
714                 }
715
716                 new |= SIX_LOCK_HELD_intent;
717         } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
718
719         if (lock->readers)
720                 this_cpu_dec(*lock->readers);
721
722         six_set_owner(lock, SIX_LOCK_intent, old, current);
723
724         return true;
725 }
726 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
727
728 /**
729  * six_trylock_convert - attempt to convert a held lock from one type to another
730  * @lock:       lock to upgrade
731  * @from:       SIX_LOCK_read or SIX_LOCK_intent
732  * @to:         SIX_LOCK_read or SIX_LOCK_intent
733  *
734  * On success, @lock will have intent count incremented and read count
735  * decremented
736  *
737  * Return: true on success, false on failure
738  */
739 bool six_trylock_convert(struct six_lock *lock,
740                          enum six_lock_type from,
741                          enum six_lock_type to)
742 {
743         EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
744
745         if (to == from)
746                 return true;
747
748         if (to == SIX_LOCK_read) {
749                 six_lock_downgrade(lock);
750                 return true;
751         } else {
752                 return six_lock_tryupgrade(lock);
753         }
754 }
755 EXPORT_SYMBOL_GPL(six_trylock_convert);
756
757 /**
758  * six_lock_increment - increase held lock count on a lock that is already held
759  * @lock:       lock to increment
760  * @type:       SIX_LOCK_read or SIX_LOCK_intent
761  *
762  * @lock must already be held, with a lock type that is greater than or equal to
763  * @type
764  *
765  * A corresponding six_unlock_type() call will be required for @lock to be fully
766  * unlocked.
767  */
768 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
769 {
770         six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
771
772         /* XXX: assert already locked, and that we don't overflow: */
773
774         switch (type) {
775         case SIX_LOCK_read:
776                 if (lock->readers) {
777                         this_cpu_inc(*lock->readers);
778                 } else {
779                         EBUG_ON(!(atomic_read(&lock->state) &
780                                   (SIX_LOCK_HELD_read|
781                                    SIX_LOCK_HELD_intent)));
782                         atomic_add(l[type].lock_val, &lock->state);
783                 }
784                 break;
785         case SIX_LOCK_intent:
786                 EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
787                 lock->intent_lock_recurse++;
788                 break;
789         case SIX_LOCK_write:
790                 BUG();
791                 break;
792         }
793 }
794 EXPORT_SYMBOL_GPL(six_lock_increment);
795
796 /**
797  * six_lock_wakeup_all - wake up all waiters on @lock
798  * @lock:       lock to wake up waiters for
799  *
800  * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
801  * abort the lock operation.
802  *
803  * This function is never needed in a bug-free program; it's only useful in
804  * debug code, e.g. to determine if a cycle detector is at fault.
805  */
806 void six_lock_wakeup_all(struct six_lock *lock)
807 {
808         u32 state = atomic_read(&lock->state);
809         struct six_lock_waiter *w;
810
811         six_lock_wakeup(lock, state, SIX_LOCK_read);
812         six_lock_wakeup(lock, state, SIX_LOCK_intent);
813         six_lock_wakeup(lock, state, SIX_LOCK_write);
814
815         raw_spin_lock(&lock->wait_lock);
816         list_for_each_entry(w, &lock->wait_list, list)
817                 wake_up_process(w->task);
818         raw_spin_unlock(&lock->wait_lock);
819 }
820 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
821
822 /**
823  * six_lock_counts - return held lock counts, for each lock type
824  * @lock:       lock to return counters for
825  *
826  * Return: the number of times a lock is held for read, intent and write.
827  */
828 struct six_lock_count six_lock_counts(struct six_lock *lock)
829 {
830         struct six_lock_count ret;
831
832         ret.n[SIX_LOCK_read]    = !lock->readers
833                 ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
834                 : pcpu_read_count(lock);
835         ret.n[SIX_LOCK_intent]  = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
836                 lock->intent_lock_recurse;
837         ret.n[SIX_LOCK_write]   = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
838
839         return ret;
840 }
841 EXPORT_SYMBOL_GPL(six_lock_counts);
842
843 /**
844  * six_lock_readers_add - directly manipulate reader count of a lock
845  * @lock:       lock to add/subtract readers for
846  * @nr:         reader count to add/subtract
847  *
848  * When an upper layer is implementing lock reentrency, we may have both read
849  * and intent locks on the same lock.
850  *
851  * When we need to take a write lock, the read locks will cause self-deadlock,
852  * because six locks themselves do not track which read locks are held by the
853  * current thread and which are held by a different thread - it does no
854  * per-thread tracking of held locks.
855  *
856  * The upper layer that is tracking held locks may however, if trylock() has
857  * failed, count up its own read locks, subtract them, take the write lock, and
858  * then re-add them.
859  *
860  * As in any other situation when taking a write lock, @lock must be held for
861  * intent one (or more) times, so @lock will never be left unlocked.
862  */
863 void six_lock_readers_add(struct six_lock *lock, int nr)
864 {
865         if (lock->readers) {
866                 this_cpu_add(*lock->readers, nr);
867         } else {
868                 EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
869                 /* reader count starts at bit 0 */
870                 atomic_add(nr, &lock->state);
871         }
872 }
873 EXPORT_SYMBOL_GPL(six_lock_readers_add);
874
875 /**
876  * six_lock_exit - release resources held by a lock prior to freeing
877  * @lock:       lock to exit
878  *
879  * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
880  * required to free the percpu read counts.
881  */
882 void six_lock_exit(struct six_lock *lock)
883 {
884         WARN_ON(lock->readers && pcpu_read_count(lock));
885         WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
886
887         free_percpu(lock->readers);
888         lock->readers = NULL;
889 }
890 EXPORT_SYMBOL_GPL(six_lock_exit);
891
892 void __six_lock_init(struct six_lock *lock, const char *name,
893                      struct lock_class_key *key, enum six_lock_init_flags flags)
894 {
895         atomic_set(&lock->state, 0);
896         raw_spin_lock_init(&lock->wait_lock);
897         INIT_LIST_HEAD(&lock->wait_list);
898 #ifdef CONFIG_DEBUG_LOCK_ALLOC
899         debug_check_no_locks_freed((void *) lock, sizeof(*lock));
900         lockdep_init_map(&lock->dep_map, name, key, 0);
901 #endif
902
903         /*
904          * Don't assume that we have real percpu variables available in
905          * userspace:
906          */
907 #ifdef __KERNEL__
908         if (flags & SIX_LOCK_INIT_PCPU) {
909                 /*
910                  * We don't return an error here on memory allocation failure
911                  * since percpu is an optimization, and locks will work with the
912                  * same semantics in non-percpu mode: callers can check for
913                  * failure if they wish by checking lock->readers, but generally
914                  * will not want to treat it as an error.
915                  */
916                 lock->readers = alloc_percpu(unsigned);
917         }
918 #endif
919 }
920 EXPORT_SYMBOL_GPL(__six_lock_init);