btrfs: Implement DREW lock
authorNikolay Borisov <nborisov@suse.com>
Thu, 30 Jan 2020 12:59:44 +0000 (14:59 +0200)
committerDavid Sterba <dsterba@suse.com>
Mon, 23 Mar 2020 16:01:43 +0000 (17:01 +0100)
A (D)ouble (R)eader (W)riter (E)xclustion lock is a locking primitive
that allows to have multiple readers or multiple writers but not
multiple readers and writers holding it concurrently.

The code is factored out from the existing open-coded locking scheme
used to exclude pending snapshots from nocow writers and vice-versa.
Current implementation actually favors Readers (that is snapshot
creaters) to writers (nocow writers of the filesystem).

The API provides lock/unlock/trylock for reads and writes.

Formal specification for TLA+ provided by Valentin Schneider is at
https://lore.kernel.org/linux-btrfs/2dcaf81c-f0d3-409e-cb29-733d8b3b4cc9@arm.com/

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/locking.c
fs/btrfs/locking.h

index 2ee7d8b..ab81512 100644 (file)
@@ -33,6 +33,7 @@
 #include "extent_map.h"
 #include "async-thread.h"
 #include "block-rsv.h"
+#include "locking.h"
 
 struct btrfs_trans_handle;
 struct btrfs_transaction;
index e713900..fb647d8 100644 (file)
@@ -565,3 +565,96 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
        }
        return eb;
 }
+
+/*
+ * DREW locks
+ * ==========
+ *
+ * DREW stands for double-reader-writer-exclusion lock. It's used in situation
+ * where you want to provide A-B exclusion but not AA or BB.
+ *
+ * Currently implementation gives more priority to reader. If a reader and a
+ * writer both race to acquire their respective sides of the lock the writer
+ * would yield its lock as soon as it detects a concurrent reader. Additionally
+ * if there are pending readers no new writers would be allowed to come in and
+ * acquire the lock.
+ */
+
+int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
+{
+       int ret;
+
+       ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
+       if (ret)
+               return ret;
+
+       atomic_set(&lock->readers, 0);
+       init_waitqueue_head(&lock->pending_readers);
+       init_waitqueue_head(&lock->pending_writers);
+
+       return 0;
+}
+
+void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
+{
+       percpu_counter_destroy(&lock->writers);
+}
+
+/* Return true if acquisition is successful, false otherwise */
+bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
+{
+       if (atomic_read(&lock->readers))
+               return false;
+
+       percpu_counter_inc(&lock->writers);
+
+       /* Ensure writers count is updated before we check for pending readers */
+       smp_mb();
+       if (atomic_read(&lock->readers)) {
+               btrfs_drew_write_unlock(lock);
+               return false;
+       }
+
+       return true;
+}
+
+void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
+{
+       while (true) {
+               if (btrfs_drew_try_write_lock(lock))
+                       return;
+               wait_event(lock->pending_writers, !atomic_read(&lock->readers));
+       }
+}
+
+void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
+{
+       percpu_counter_dec(&lock->writers);
+       cond_wake_up(&lock->pending_readers);
+}
+
+void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
+{
+       atomic_inc(&lock->readers);
+
+       /*
+        * Ensure the pending reader count is perceieved BEFORE this reader
+        * goes to sleep in case of active writers. This guarantees new writers
+        * won't be allowed and that the current reader will be woken up when
+        * the last active writer finishes its jobs.
+        */
+       smp_mb__after_atomic();
+
+       wait_event(lock->pending_readers,
+                  percpu_counter_sum(&lock->writers) == 0);
+}
+
+void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
+{
+       /*
+        * atomic_dec_and_test implies a full barrier, so woken up writers
+        * are guaranteed to see the decrement
+        */
+       if (atomic_dec_and_test(&lock->readers))
+               wake_up(&lock->pending_writers);
+}
index 21a2858..d715846 100644 (file)
@@ -6,6 +6,9 @@
 #ifndef BTRFS_LOCKING_H
 #define BTRFS_LOCKING_H
 
+#include <linux/atomic.h>
+#include <linux/wait.h>
+#include <linux/percpu_counter.h>
 #include "extent_io.h"
 
 #define BTRFS_WRITE_LOCK 1
@@ -13,6 +16,8 @@
 #define BTRFS_WRITE_LOCK_BLOCKING 3
 #define BTRFS_READ_LOCK_BLOCKING 4
 
+struct btrfs_path;
+
 void btrfs_tree_lock(struct extent_buffer *eb);
 void btrfs_tree_unlock(struct extent_buffer *eb);
 
@@ -48,4 +53,19 @@ static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
                BUG();
 }
 
+struct btrfs_drew_lock {
+       atomic_t readers;
+       struct percpu_counter writers;
+       wait_queue_head_t pending_writers;
+       wait_queue_head_t pending_readers;
+};
+
+int btrfs_drew_lock_init(struct btrfs_drew_lock *lock);
+void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock);
+void btrfs_drew_write_lock(struct btrfs_drew_lock *lock);
+bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock);
+void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock);
+void btrfs_drew_read_lock(struct btrfs_drew_lock *lock);
+void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock);
+
 #endif