fs/bcachefs/six.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include <linux/export.h>
   4 #include <linux/log2.h>
   5 #include <linux/percpu.h>
   6 #include <linux/preempt.h>
   7 #include <linux/rcupdate.h>
   8 #include <linux/sched.h>
   9 #include <linux/sched/clock.h>
  10 #include <linux/sched/rt.h>
  11 #include <linux/sched/task.h>
  12 #include <linux/slab.h>
  13
  14 #include <trace/events/lock.h>
  15
  16 #include "six.h"
  17
  18 #ifdef DEBUG
  19 #define EBUG_ON(cond)                   BUG_ON(cond)
  20 #else
  21 #define EBUG_ON(cond)                   do {} while (0)
  22 #endif
  23
  24 #define six_acquire(l, t, r, ip)        lock_acquire(l, 0, t, r, 1, NULL, ip)
  25 #define six_release(l, ip)              lock_release(l, ip)
  26
  27 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
  28
  29 #define SIX_LOCK_HELD_read_OFFSET       0
  30 #define SIX_LOCK_HELD_read              ~(~0U << 26)
  31 #define SIX_LOCK_HELD_intent            (1U << 26)
  32 #define SIX_LOCK_HELD_write             (1U << 27)
  33 #define SIX_LOCK_WAITING_read           (1U << (28 + SIX_LOCK_read))
  34 #define SIX_LOCK_WAITING_write          (1U << (28 + SIX_LOCK_write))
  35 #define SIX_LOCK_NOSPIN                 (1U << 31)
  36
  37 struct six_lock_vals {
  38         /* Value we add to the lock in order to take the lock: */
  39         u32                     lock_val;
  40
  41         /* If the lock has this value (used as a mask), taking the lock fails: */
  42         u32                     lock_fail;
  43
  44         /* Mask that indicates lock is held for this type: */
  45         u32                     held_mask;
  46
  47         /* Waitlist we wakeup when releasing the lock: */
  48         enum six_lock_type      unlock_wakeup;
  49 };
  50
  51 static const struct six_lock_vals l[] = {
  52         [SIX_LOCK_read] = {
  53                 .lock_val       = 1U << SIX_LOCK_HELD_read_OFFSET,
  54                 .lock_fail      = SIX_LOCK_HELD_write,
  55                 .held_mask      = SIX_LOCK_HELD_read,
  56                 .unlock_wakeup  = SIX_LOCK_write,
  57         },
  58         [SIX_LOCK_intent] = {
  59                 .lock_val       = SIX_LOCK_HELD_intent,
  60                 .lock_fail      = SIX_LOCK_HELD_intent,
  61                 .held_mask      = SIX_LOCK_HELD_intent,
  62                 .unlock_wakeup  = SIX_LOCK_intent,
  63         },
  64         [SIX_LOCK_write] = {
  65                 .lock_val       = SIX_LOCK_HELD_write,
  66                 .lock_fail      = SIX_LOCK_HELD_read,
  67                 .held_mask      = SIX_LOCK_HELD_write,
  68                 .unlock_wakeup  = SIX_LOCK_read,
  69         },
  70 };
  71
  72 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
  73 {
  74         if ((atomic_read(&lock->state) & mask) != mask)
  75                 atomic_or(mask, &lock->state);
  76 }
  77
  78 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
  79 {
  80         if (atomic_read(&lock->state) & mask)
  81                 atomic_and(~mask, &lock->state);
  82 }
  83
  84 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
  85                                  u32 old, struct task_struct *owner)
  86 {
  87         if (type != SIX_LOCK_intent)
  88                 return;
  89
  90         if (!(old & SIX_LOCK_HELD_intent)) {
  91                 EBUG_ON(lock->owner);
  92                 lock->owner = owner;
  93         } else {
  94                 EBUG_ON(lock->owner != current);
  95         }
  96 }
  97
  98 static inline unsigned pcpu_read_count(struct six_lock *lock)
  99 {
 100         unsigned read_count = 0;
 101         int cpu;
 102
 103         for_each_possible_cpu(cpu)
 104                 read_count += *per_cpu_ptr(lock->readers, cpu);
 105         return read_count;
 106 }
 107
 108 /*
 109  * __do_six_trylock() - main trylock routine
 110  *
 111  * Returns 1 on success, 0 on failure
 112  *
 113  * In percpu reader mode, a failed trylock may cause a spurious trylock failure
 114  * for anoter thread taking the competing lock type, and we may havve to do a
 115  * wakeup: when a wakeup is required, we return -1 - wakeup_type.
 116  */
 117 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
 118                             struct task_struct *task, bool try)
 119 {
 120         int ret;
 121         u32 old;
 122
 123         EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
 124         EBUG_ON(type == SIX_LOCK_write &&
 125                 (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
 126
 127         /*
 128          * Percpu reader mode:
 129          *
 130          * The basic idea behind this algorithm is that you can implement a lock
 131          * between two threads without any atomics, just memory barriers:
 132          *
 133          * For two threads you'll need two variables, one variable for "thread a
 134          * has the lock" and another for "thread b has the lock".
 135          *
 136          * To take the lock, a thread sets its variable indicating that it holds
 137          * the lock, then issues a full memory barrier, then reads from the
 138          * other thread's variable to check if the other thread thinks it has
 139          * the lock. If we raced, we backoff and retry/sleep.
 140          *
 141          * Failure to take the lock may cause a spurious trylock failure in
 142          * another thread, because we temporarily set the lock to indicate that
 143          * we held it. This would be a problem for a thread in six_lock(), when
 144          * they are calling trylock after adding themself to the waitlist and
 145          * prior to sleeping.
 146          *
 147          * Therefore, if we fail to get the lock, and there were waiters of the
 148          * type we conflict with, we will have to issue a wakeup.
 149          *
 150          * Since we may be called under wait_lock (and by the wakeup code
 151          * itself), we return that the wakeup has to be done instead of doing it
 152          * here.
 153          */
 154         if (type == SIX_LOCK_read && lock->readers) {
 155                 preempt_disable();
 156                 this_cpu_inc(*lock->readers); /* signal that we own lock */
 157
 158                 smp_mb();
 159
 160                 old = atomic_read(&lock->state);
 161                 ret = !(old & l[type].lock_fail);
 162
 163                 this_cpu_sub(*lock->readers, !ret);
 164                 preempt_enable();
 165
 166                 if (!ret) {
 167                         smp_mb();
 168                         if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
 169                                 ret = -1 - SIX_LOCK_write;
 170                 }
 171         } else if (type == SIX_LOCK_write && lock->readers) {
 172                 if (try) {
 173                         atomic_add(SIX_LOCK_HELD_write, &lock->state);
 174                         smp_mb__after_atomic();
 175                 }
 176
 177                 ret = !pcpu_read_count(lock);
 178
 179                 if (try && !ret) {
 180                         old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
 181                         if (old & SIX_LOCK_WAITING_read)
 182                                 ret = -1 - SIX_LOCK_read;
 183                 }
 184         } else {
 185                 old = atomic_read(&lock->state);
 186                 do {
 187                         ret = !(old & l[type].lock_fail);
 188                         if (!ret || (type == SIX_LOCK_write && !try)) {
 189                                 smp_mb();
 190                                 break;
 191                         }
 192                 } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
 193
 194                 EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
 195         }
 196
 197         if (ret > 0)
 198                 six_set_owner(lock, type, old, task);
 199
 200         EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
 201                 (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
 202
 203         return ret;
 204 }
 205
 206 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
 207 {
 208         struct six_lock_waiter *w, *next;
 209         struct task_struct *task;
 210         bool saw_one;
 211         int ret;
 212 again:
 213         ret = 0;
 214         saw_one = false;
 215         raw_spin_lock(&lock->wait_lock);
 216
 217         list_for_each_entry_safe(w, next, &lock->wait_list, list) {
 218                 if (w->lock_want != lock_type)
 219                         continue;
 220
 221                 if (saw_one && lock_type != SIX_LOCK_read)
 222                         goto unlock;
 223                 saw_one = true;
 224
 225                 ret = __do_six_trylock(lock, lock_type, w->task, false);
 226                 if (ret <= 0)
 227                         goto unlock;
 228
 229                 /*
 230                  * Similar to percpu_rwsem_wake_function(), we need to guard
 231                  * against the wakee noticing w->lock_acquired, returning, and
 232                  * then exiting before we do the wakeup:
 233                  */
 234                 task = get_task_struct(w->task);
 235                 __list_del(w->list.prev, w->list.next);
 236                 /*
 237                  * The release barrier here ensures the ordering of the
 238                  * __list_del before setting w->lock_acquired; @w is on the
 239                  * stack of the thread doing the waiting and will be reused
 240                  * after it sees w->lock_acquired with no other locking:
 241                  * pairs with smp_load_acquire() in six_lock_slowpath()
 242                  */
 243                 smp_store_release(&w->lock_acquired, true);
 244                 wake_up_process(task);
 245                 put_task_struct(task);
 246         }
 247
 248         six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
 249 unlock:
 250         raw_spin_unlock(&lock->wait_lock);
 251
 252         if (ret < 0) {
 253                 lock_type = -ret - 1;
 254                 goto again;
 255         }
 256 }
 257
 258 __always_inline
 259 static void six_lock_wakeup(struct six_lock *lock, u32 state,
 260                             enum six_lock_type lock_type)
 261 {
 262         if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
 263                 return;
 264
 265         if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
 266                 return;
 267
 268         __six_lock_wakeup(lock, lock_type);
 269 }
 270
 271 __always_inline
 272 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
 273 {
 274         int ret;
 275
 276         ret = __do_six_trylock(lock, type, current, try);
 277         if (ret < 0)
 278                 __six_lock_wakeup(lock, -ret - 1);
 279
 280         return ret > 0;
 281 }
 282
 283 /**
 284  * six_trylock_ip - attempt to take a six lock without blocking
 285  * @lock:       lock to take
 286  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 287  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 288  *
 289  * Return: true on success, false on failure.
 290  */
 291 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
 292 {
 293         if (!do_six_trylock(lock, type, true))
 294                 return false;
 295
 296         if (type != SIX_LOCK_write)
 297                 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
 298         return true;
 299 }
 300 EXPORT_SYMBOL_GPL(six_trylock_ip);
 301
 302 /**
 303  * six_relock_ip - attempt to re-take a lock that was held previously
 304  * @lock:       lock to take
 305  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 306  * @seq:        lock sequence number obtained from six_lock_seq() while lock was
 307  *              held previously
 308  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 309  *
 310  * Return: true on success, false on failure.
 311  */
 312 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
 313                    unsigned seq, unsigned long ip)
 314 {
 315         if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
 316                 return false;
 317
 318         if (six_lock_seq(lock) != seq) {
 319                 six_unlock_ip(lock, type, ip);
 320                 return false;
 321         }
 322
 323         return true;
 324 }
 325 EXPORT_SYMBOL_GPL(six_relock_ip);
 326
 327 #ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
 328
 329 static inline bool six_can_spin_on_owner(struct six_lock *lock)
 330 {
 331         struct task_struct *owner;
 332         bool ret;
 333
 334         if (need_resched())
 335                 return false;
 336
 337         rcu_read_lock();
 338         owner = READ_ONCE(lock->owner);
 339         ret = !owner || owner_on_cpu(owner);
 340         rcu_read_unlock();
 341
 342         return ret;
 343 }
 344
 345 static inline bool six_spin_on_owner(struct six_lock *lock,
 346                                      struct task_struct *owner,
 347                                      u64 end_time)
 348 {
 349         bool ret = true;
 350         unsigned loop = 0;
 351
 352         rcu_read_lock();
 353         while (lock->owner == owner) {
 354                 /*
 355                  * Ensure we emit the owner->on_cpu, dereference _after_
 356                  * checking lock->owner still matches owner. If that fails,
 357                  * owner might point to freed memory. If it still matches,
 358                  * the rcu_read_lock() ensures the memory stays valid.
 359                  */
 360                 barrier();
 361
 362                 if (!owner_on_cpu(owner) || need_resched()) {
 363                         ret = false;
 364                         break;
 365                 }
 366
 367                 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
 368                         six_set_bitmask(lock, SIX_LOCK_NOSPIN);
 369                         ret = false;
 370                         break;
 371                 }
 372
 373                 cpu_relax();
 374         }
 375         rcu_read_unlock();
 376
 377         return ret;
 378 }
 379
 380 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
 381 {
 382         struct task_struct *task = current;
 383         u64 end_time;
 384
 385         if (type == SIX_LOCK_write)
 386                 return false;
 387
 388         preempt_disable();
 389         if (!six_can_spin_on_owner(lock))
 390                 goto fail;
 391
 392         if (!osq_lock(&lock->osq))
 393                 goto fail;
 394
 395         end_time = sched_clock() + 10 * NSEC_PER_USEC;
 396
 397         while (1) {
 398                 struct task_struct *owner;
 399
 400                 /*
 401                  * If there's an owner, wait for it to either
 402                  * release the lock or go to sleep.
 403                  */
 404                 owner = READ_ONCE(lock->owner);
 405                 if (owner && !six_spin_on_owner(lock, owner, end_time))
 406                         break;
 407
 408                 if (do_six_trylock(lock, type, false)) {
 409                         osq_unlock(&lock->osq);
 410                         preempt_enable();
 411                         return true;
 412                 }
 413
 414                 /*
 415                  * When there's no owner, we might have preempted between the
 416                  * owner acquiring the lock and setting the owner field. If
 417                  * we're an RT task that will live-lock because we won't let
 418                  * the owner complete.
 419                  */
 420                 if (!owner && (need_resched() || rt_task(task)))
 421                         break;
 422
 423                 /*
 424                  * The cpu_relax() call is a compiler barrier which forces
 425                  * everything in this loop to be re-loaded. We don't need
 426                  * memory barriers as we'll eventually observe the right
 427                  * values at the cost of a few extra spins.
 428                  */
 429                 cpu_relax();
 430         }
 431
 432         osq_unlock(&lock->osq);
 433 fail:
 434         preempt_enable();
 435
 436         /*
 437          * If we fell out of the spin path because of need_resched(),
 438          * reschedule now, before we try-lock again. This avoids getting
 439          * scheduled out right after we obtained the lock.
 440          */
 441         if (need_resched())
 442                 schedule();
 443
 444         return false;
 445 }
 446
 447 #else /* CONFIG_SIX_LOCK_SPIN_ON_OWNER */
 448
 449 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
 450 {
 451         return false;
 452 }
 453
 454 #endif
 455
 456 noinline
 457 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
 458                              struct six_lock_waiter *wait,
 459                              six_lock_should_sleep_fn should_sleep_fn, void *p,
 460                              unsigned long ip)
 461 {
 462         int ret = 0;
 463
 464         if (type == SIX_LOCK_write) {
 465                 EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
 466                 atomic_add(SIX_LOCK_HELD_write, &lock->state);
 467                 smp_mb__after_atomic();
 468         }
 469
 470         trace_contention_begin(lock, 0);
 471         lock_contended(&lock->dep_map, ip);
 472
 473         if (six_optimistic_spin(lock, type))
 474                 goto out;
 475
 476         wait->task              = current;
 477         wait->lock_want         = type;
 478         wait->lock_acquired     = false;
 479
 480         raw_spin_lock(&lock->wait_lock);
 481         six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
 482         /*
 483          * Retry taking the lock after taking waitlist lock, in case we raced
 484          * with an unlock:
 485          */
 486         ret = __do_six_trylock(lock, type, current, false);
 487         if (ret <= 0) {
 488                 wait->start_time = local_clock();
 489
 490                 if (!list_empty(&lock->wait_list)) {
 491                         struct six_lock_waiter *last =
 492                                 list_last_entry(&lock->wait_list,
 493                                         struct six_lock_waiter, list);
 494
 495                         if (time_before_eq64(wait->start_time, last->start_time))
 496                                 wait->start_time = last->start_time + 1;
 497                 }
 498
 499                 list_add_tail(&wait->list, &lock->wait_list);
 500         }
 501         raw_spin_unlock(&lock->wait_lock);
 502
 503         if (unlikely(ret > 0)) {
 504                 ret = 0;
 505                 goto out;
 506         }
 507
 508         if (unlikely(ret < 0)) {
 509                 __six_lock_wakeup(lock, -ret - 1);
 510                 ret = 0;
 511         }
 512
 513         while (1) {
 514                 set_current_state(TASK_UNINTERRUPTIBLE);
 515
 516                 /*
 517                  * Ensures that writes to the waitlist entry happen after we see
 518                  * wait->lock_acquired: pairs with the smp_store_release in
 519                  * __six_lock_wakeup
 520                  */
 521                 if (smp_load_acquire(&wait->lock_acquired))
 522                         break;
 523
 524                 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
 525                 if (unlikely(ret)) {
 526                         bool acquired;
 527
 528                         /*
 529                          * If should_sleep_fn() returns an error, we are
 530                          * required to return that error even if we already
 531                          * acquired the lock - should_sleep_fn() might have
 532                          * modified external state (e.g. when the deadlock cycle
 533                          * detector in bcachefs issued a transaction restart)
 534                          */
 535                         raw_spin_lock(&lock->wait_lock);
 536                         acquired = wait->lock_acquired;
 537                         if (!acquired)
 538                                 list_del(&wait->list);
 539                         raw_spin_unlock(&lock->wait_lock);
 540
 541                         if (unlikely(acquired))
 542                                 do_six_unlock_type(lock, type);
 543                         break;
 544                 }
 545
 546                 schedule();
 547         }
 548
 549         __set_current_state(TASK_RUNNING);
 550 out:
 551         if (ret && type == SIX_LOCK_write) {
 552                 six_clear_bitmask(lock, SIX_LOCK_HELD_write);
 553                 six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
 554         }
 555         trace_contention_end(lock, 0);
 556
 557         return ret;
 558 }
 559
 560 /**
 561  * six_lock_ip_waiter - take a lock, with full waitlist interface
 562  * @lock:       lock to take
 563  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 564  * @wait:       pointer to wait object, which will be added to lock's waitlist
 565  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
 566  *              to scheduling
 567  * @p:          passed through to @should_sleep_fn
 568  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 569  *
 570  * This is the most general six_lock() variant, with parameters to support full
 571  * cycle detection for deadlock avoidance.
 572  *
 573  * The code calling this function must implement tracking of held locks, and the
 574  * @wait object should be embedded into the struct that tracks held locks -
 575  * which must also be accessible in a thread-safe way.
 576  *
 577  * @should_sleep_fn should invoke the cycle detector; it should walk each
 578  * lock's waiters, and for each waiter recursively walk their held locks.
 579  *
 580  * When this function must block, @wait will be added to @lock's waitlist before
 581  * calling trylock, and before calling @should_sleep_fn, and @wait will not be
 582  * removed from the lock waitlist until the lock has been successfully acquired,
 583  * or we abort.
 584  *
 585  * @wait.start_time will be monotonically increasing for any given waitlist, and
 586  * thus may be used as a loop cursor.
 587  *
 588  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
 589  */
 590 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
 591                        struct six_lock_waiter *wait,
 592                        six_lock_should_sleep_fn should_sleep_fn, void *p,
 593                        unsigned long ip)
 594 {
 595         int ret;
 596
 597         wait->start_time = 0;
 598
 599         if (type != SIX_LOCK_write)
 600                 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
 601
 602         ret = do_six_trylock(lock, type, true) ? 0
 603                 : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
 604
 605         if (ret && type != SIX_LOCK_write)
 606                 six_release(&lock->dep_map, ip);
 607         if (!ret)
 608                 lock_acquired(&lock->dep_map, ip);
 609
 610         return ret;
 611 }
 612 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
 613
 614 __always_inline
 615 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
 616 {
 617         u32 state;
 618
 619         if (type == SIX_LOCK_intent)
 620                 lock->owner = NULL;
 621
 622         if (type == SIX_LOCK_read &&
 623             lock->readers) {
 624                 smp_mb(); /* unlock barrier */
 625                 this_cpu_dec(*lock->readers);
 626                 smp_mb(); /* between unlocking and checking for waiters */
 627                 state = atomic_read(&lock->state);
 628         } else {
 629                 u32 v = l[type].lock_val;
 630
 631                 if (type != SIX_LOCK_read)
 632                         v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
 633
 634                 EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
 635                 state = atomic_sub_return_release(v, &lock->state);
 636         }
 637
 638         six_lock_wakeup(lock, state, l[type].unlock_wakeup);
 639 }
 640
 641 /**
 642  * six_unlock_ip - drop a six lock
 643  * @lock:       lock to unlock
 644  * @type:       SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
 645  * @ip:         ip parameter for lockdep/lockstat, i.e. _THIS_IP_
 646  *
 647  * When a lock is held multiple times (because six_lock_incement()) was used),
 648  * this decrements the 'lock held' counter by one.
 649  *
 650  * For example:
 651  * six_lock_read(&foo->lock);                           read count 1
 652  * six_lock_increment(&foo->lock, SIX_LOCK_read);       read count 2
 653  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 1
 654  * six_lock_unlock(&foo->lock, SIX_LOCK_read);          read count 0
 655  */
 656 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
 657 {
 658         EBUG_ON(type == SIX_LOCK_write &&
 659                 !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
 660         EBUG_ON((type == SIX_LOCK_write ||
 661                  type == SIX_LOCK_intent) &&
 662                 lock->owner != current);
 663
 664         if (type != SIX_LOCK_write)
 665                 six_release(&lock->dep_map, ip);
 666         else
 667                 lock->seq++;
 668
 669         if (type == SIX_LOCK_intent &&
 670             lock->intent_lock_recurse) {
 671                 --lock->intent_lock_recurse;
 672                 return;
 673         }
 674
 675         do_six_unlock_type(lock, type);
 676 }
 677 EXPORT_SYMBOL_GPL(six_unlock_ip);
 678
 679 /**
 680  * six_lock_downgrade - convert an intent lock to a read lock
 681  * @lock:       lock to dowgrade
 682  *
 683  * @lock will have read count incremented and intent count decremented
 684  */
 685 void six_lock_downgrade(struct six_lock *lock)
 686 {
 687         six_lock_increment(lock, SIX_LOCK_read);
 688         six_unlock_intent(lock);
 689 }
 690 EXPORT_SYMBOL_GPL(six_lock_downgrade);
 691
 692 /**
 693  * six_lock_tryupgrade - attempt to convert read lock to an intent lock
 694  * @lock:       lock to upgrade
 695  *
 696  * On success, @lock will have intent count incremented and read count
 697  * decremented
 698  *
 699  * Return: true on success, false on failure
 700  */
 701 bool six_lock_tryupgrade(struct six_lock *lock)
 702 {
 703         u32 old = atomic_read(&lock->state), new;
 704
 705         do {
 706                 new = old;
 707
 708                 if (new & SIX_LOCK_HELD_intent)
 709                         return false;
 710
 711                 if (!lock->readers) {
 712                         EBUG_ON(!(new & SIX_LOCK_HELD_read));
 713                         new -= l[SIX_LOCK_read].lock_val;
 714                 }
 715
 716                 new |= SIX_LOCK_HELD_intent;
 717         } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
 718
 719         if (lock->readers)
 720                 this_cpu_dec(*lock->readers);
 721
 722         six_set_owner(lock, SIX_LOCK_intent, old, current);
 723
 724         return true;
 725 }
 726 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
 727
 728 /**
 729  * six_trylock_convert - attempt to convert a held lock from one type to another
 730  * @lock:       lock to upgrade
 731  * @from:       SIX_LOCK_read or SIX_LOCK_intent
 732  * @to:         SIX_LOCK_read or SIX_LOCK_intent
 733  *
 734  * On success, @lock will have intent count incremented and read count
 735  * decremented
 736  *
 737  * Return: true on success, false on failure
 738  */
 739 bool six_trylock_convert(struct six_lock *lock,
 740                          enum six_lock_type from,
 741                          enum six_lock_type to)
 742 {
 743         EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
 744
 745         if (to == from)
 746                 return true;
 747
 748         if (to == SIX_LOCK_read) {
 749                 six_lock_downgrade(lock);
 750                 return true;
 751         } else {
 752                 return six_lock_tryupgrade(lock);
 753         }
 754 }
 755 EXPORT_SYMBOL_GPL(six_trylock_convert);
 756
 757 /**
 758  * six_lock_increment - increase held lock count on a lock that is already held
 759  * @lock:       lock to increment
 760  * @type:       SIX_LOCK_read or SIX_LOCK_intent
 761  *
 762  * @lock must already be held, with a lock type that is greater than or equal to
 763  * @type
 764  *
 765  * A corresponding six_unlock_type() call will be required for @lock to be fully
 766  * unlocked.
 767  */
 768 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
 769 {
 770         six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
 771
 772         /* XXX: assert already locked, and that we don't overflow: */
 773
 774         switch (type) {
 775         case SIX_LOCK_read:
 776                 if (lock->readers) {
 777                         this_cpu_inc(*lock->readers);
 778                 } else {
 779                         EBUG_ON(!(atomic_read(&lock->state) &
 780                                   (SIX_LOCK_HELD_read|
 781                                    SIX_LOCK_HELD_intent)));
 782                         atomic_add(l[type].lock_val, &lock->state);
 783                 }
 784                 break;
 785         case SIX_LOCK_intent:
 786                 EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
 787                 lock->intent_lock_recurse++;
 788                 break;
 789         case SIX_LOCK_write:
 790                 BUG();
 791                 break;
 792         }
 793 }
 794 EXPORT_SYMBOL_GPL(six_lock_increment);
 795
 796 /**
 797  * six_lock_wakeup_all - wake up all waiters on @lock
 798  * @lock:       lock to wake up waiters for
 799  *
 800  * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
 801  * abort the lock operation.
 802  *
 803  * This function is never needed in a bug-free program; it's only useful in
 804  * debug code, e.g. to determine if a cycle detector is at fault.
 805  */
 806 void six_lock_wakeup_all(struct six_lock *lock)
 807 {
 808         u32 state = atomic_read(&lock->state);
 809         struct six_lock_waiter *w;
 810
 811         six_lock_wakeup(lock, state, SIX_LOCK_read);
 812         six_lock_wakeup(lock, state, SIX_LOCK_intent);
 813         six_lock_wakeup(lock, state, SIX_LOCK_write);
 814
 815         raw_spin_lock(&lock->wait_lock);
 816         list_for_each_entry(w, &lock->wait_list, list)
 817                 wake_up_process(w->task);
 818         raw_spin_unlock(&lock->wait_lock);
 819 }
 820 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
 821
 822 /**
 823  * six_lock_counts - return held lock counts, for each lock type
 824  * @lock:       lock to return counters for
 825  *
 826  * Return: the number of times a lock is held for read, intent and write.
 827  */
 828 struct six_lock_count six_lock_counts(struct six_lock *lock)
 829 {
 830         struct six_lock_count ret;
 831
 832         ret.n[SIX_LOCK_read]    = !lock->readers
 833                 ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
 834                 : pcpu_read_count(lock);
 835         ret.n[SIX_LOCK_intent]  = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
 836                 lock->intent_lock_recurse;
 837         ret.n[SIX_LOCK_write]   = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
 838
 839         return ret;
 840 }
 841 EXPORT_SYMBOL_GPL(six_lock_counts);
 842
 843 /**
 844  * six_lock_readers_add - directly manipulate reader count of a lock
 845  * @lock:       lock to add/subtract readers for
 846  * @nr:         reader count to add/subtract
 847  *
 848  * When an upper layer is implementing lock reentrency, we may have both read
 849  * and intent locks on the same lock.
 850  *
 851  * When we need to take a write lock, the read locks will cause self-deadlock,
 852  * because six locks themselves do not track which read locks are held by the
 853  * current thread and which are held by a different thread - it does no
 854  * per-thread tracking of held locks.
 855  *
 856  * The upper layer that is tracking held locks may however, if trylock() has
 857  * failed, count up its own read locks, subtract them, take the write lock, and
 858  * then re-add them.
 859  *
 860  * As in any other situation when taking a write lock, @lock must be held for
 861  * intent one (or more) times, so @lock will never be left unlocked.
 862  */
 863 void six_lock_readers_add(struct six_lock *lock, int nr)
 864 {
 865         if (lock->readers) {
 866                 this_cpu_add(*lock->readers, nr);
 867         } else {
 868                 EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
 869                 /* reader count starts at bit 0 */
 870                 atomic_add(nr, &lock->state);
 871         }
 872 }
 873 EXPORT_SYMBOL_GPL(six_lock_readers_add);
 874
 875 /**
 876  * six_lock_exit - release resources held by a lock prior to freeing
 877  * @lock:       lock to exit
 878  *
 879  * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
 880  * required to free the percpu read counts.
 881  */
 882 void six_lock_exit(struct six_lock *lock)
 883 {
 884         WARN_ON(lock->readers && pcpu_read_count(lock));
 885         WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
 886
 887         free_percpu(lock->readers);
 888         lock->readers = NULL;
 889 }
 890 EXPORT_SYMBOL_GPL(six_lock_exit);
 891
 892 void __six_lock_init(struct six_lock *lock, const char *name,
 893                      struct lock_class_key *key, enum six_lock_init_flags flags)
 894 {
 895         atomic_set(&lock->state, 0);
 896         raw_spin_lock_init(&lock->wait_lock);
 897         INIT_LIST_HEAD(&lock->wait_list);
 898 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 899         debug_check_no_locks_freed((void *) lock, sizeof(*lock));
 900         lockdep_init_map(&lock->dep_map, name, key, 0);
 901 #endif
 902
 903         /*
 904          * Don't assume that we have real percpu variables available in
 905          * userspace:
 906          */
 907 #ifdef __KERNEL__
 908         if (flags & SIX_LOCK_INIT_PCPU) {
 909                 /*
 910                  * We don't return an error here on memory allocation failure
 911                  * since percpu is an optimization, and locks will work with the
 912                  * same semantics in non-percpu mode: callers can check for
 913                  * failure if they wish by checking lock->readers, but generally
 914                  * will not want to treat it as an error.
 915                  */
 916                 lock->readers = alloc_percpu(unsigned);
 917         }
 918 #endif
 919 }
 920 EXPORT_SYMBOL_GPL(__six_lock_init);