afs: Make vnode->cb_interest RCU safe
authorDavid Howells <dhowells@redhat.com>
Mon, 13 May 2019 15:14:32 +0000 (16:14 +0100)
committerDavid Howells <dhowells@redhat.com>
Thu, 16 May 2019 21:23:21 +0000 (22:23 +0100)
Use RCU-based freeing for afs_cb_interest struct objects and use RCU on
vnode->cb_interest.  Use that change to allow afs_check_validity() to use
read_seqbegin_or_lock() instead of read_seqlock_excl().

This also requires the caller of afs_check_validity() to hold the RCU read
lock across the call.

Signed-off-by: David Howells <dhowells@redhat.com>
fs/afs/callback.c
fs/afs/dir.c
fs/afs/inode.c
fs/afs/internal.h
fs/afs/rotate.c
fs/afs/security.c
fs/afs/super.c

index 4876079..d441bef 100644 (file)
@@ -94,15 +94,15 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode,
        struct afs_server *server = entry->server;
 
 again:
-       if (vnode->cb_interest &&
-           likely(vnode->cb_interest == entry->cb_interest))
+       vcbi = rcu_dereference_protected(vnode->cb_interest,
+                                        lockdep_is_held(&vnode->io_lock));
+       if (vcbi && likely(vcbi == entry->cb_interest))
                return 0;
 
        read_lock(&slist->lock);
        cbi = afs_get_cb_interest(entry->cb_interest);
        read_unlock(&slist->lock);
 
-       vcbi = vnode->cb_interest;
        if (vcbi) {
                if (vcbi == cbi) {
                        afs_put_cb_interest(afs_v2net(vnode), cbi);
@@ -114,8 +114,9 @@ again:
                 */
                if (cbi && vcbi->server == cbi->server) {
                        write_seqlock(&vnode->cb_lock);
-                       old = vnode->cb_interest;
-                       vnode->cb_interest = cbi;
+                       old = rcu_dereference_protected(vnode->cb_interest,
+                                                       lockdep_is_held(&vnode->cb_lock.lock));
+                       rcu_assign_pointer(vnode->cb_interest, cbi);
                        write_sequnlock(&vnode->cb_lock);
                        afs_put_cb_interest(afs_v2net(vnode), old);
                        return 0;
@@ -160,8 +161,9 @@ again:
         */
        write_seqlock(&vnode->cb_lock);
 
-       old = vnode->cb_interest;
-       vnode->cb_interest = cbi;
+       old = rcu_dereference_protected(vnode->cb_interest,
+                                       lockdep_is_held(&vnode->cb_lock.lock));
+       rcu_assign_pointer(vnode->cb_interest, cbi);
        vnode->cb_s_break = cbi->server->cb_s_break;
        vnode->cb_v_break = vnode->volume->cb_v_break;
        clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
@@ -191,10 +193,11 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
                                vi = NULL;
 
                        write_unlock(&cbi->server->cb_break_lock);
-                       kfree(vi);
+                       if (vi)
+                               kfree_rcu(vi, rcu);
                        afs_put_server(net, cbi->server);
                }
-               kfree(cbi);
+               kfree_rcu(cbi, rcu);
        }
 }
 
index f7344b0..338c226 100644 (file)
@@ -638,11 +638,12 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
                                   struct key *key)
 {
        struct afs_lookup_cookie *cookie;
-       struct afs_cb_interest *cbi = NULL;
+       struct afs_cb_interest *dcbi, *cbi = NULL;
        struct afs_super_info *as = dir->i_sb->s_fs_info;
        struct afs_status_cb *scb;
        struct afs_iget_data data;
        struct afs_fs_cursor fc;
+       struct afs_server *server;
        struct afs_vnode *dvnode = AFS_FS_I(dir);
        struct inode *inode = NULL;
        int ret, i;
@@ -658,10 +659,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
        cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */
 
        read_seqlock_excl(&dvnode->cb_lock);
-       if (dvnode->cb_interest &&
-           dvnode->cb_interest->server &&
-           test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags))
-               cookie->one_only = true;
+       dcbi = rcu_dereference_protected(dvnode->cb_interest,
+                                        lockdep_is_held(&dvnode->cb_lock.lock));
+       if (dcbi) {
+               server = dcbi->server;
+               if (server &&
+                   test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags))
+                       cookie->one_only = true;
+       }
        read_sequnlock_excl(&dvnode->cb_lock);
 
        for (i = 0; i < 50; i++)
index 5c95da7..ba35b48 100644 (file)
@@ -139,9 +139,10 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
                vnode->cb_expires_at = ktime_get_real_seconds();
        } else {
                vnode->cb_expires_at = scb->callback.expires_at;
-               old_cbi = vnode->cb_interest;
+               old_cbi = rcu_dereference_protected(vnode->cb_interest,
+                                                   lockdep_is_held(&vnode->cb_lock.lock));
                if (cbi != old_cbi)
-                       vnode->cb_interest = afs_get_cb_interest(cbi);
+                       rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(cbi));
                else
                        old_cbi = NULL;
                set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
@@ -245,9 +246,10 @@ static void afs_apply_callback(struct afs_fs_cursor *fc,
 
        if (!afs_cb_is_broken(cb_break, vnode, fc->cbi)) {
                vnode->cb_expires_at    = cb->expires_at;
-               old = vnode->cb_interest;
+               old = rcu_dereference_protected(vnode->cb_interest,
+                                               lockdep_is_held(&vnode->cb_lock.lock));
                if (old != fc->cbi) {
-                       vnode->cb_interest = afs_get_cb_interest(fc->cbi);
+                       rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(fc->cbi));
                        afs_put_cb_interest(afs_v2net(vnode), old);
                }
                set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
@@ -565,36 +567,46 @@ void afs_zap_data(struct afs_vnode *vnode)
  */
 bool afs_check_validity(struct afs_vnode *vnode)
 {
+       struct afs_cb_interest *cbi;
+       struct afs_server *server;
+       struct afs_volume *volume = vnode->volume;
        time64_t now = ktime_get_real_seconds();
        bool valid;
+       unsigned int cb_break, cb_s_break, cb_v_break;
+       int seq = 0;
 
-       /* Quickly check the callback state.  Ideally, we'd use read_seqbegin
-        * here, but we have no way to pass the net namespace to the RCU
-        * cleanup for the server record.
-        */
-       read_seqlock_excl(&vnode->cb_lock);
-
-       if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
-               if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
-                   vnode->cb_v_break != vnode->volume->cb_v_break) {
-                       vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
-                       vnode->cb_v_break = vnode->volume->cb_v_break;
-                       valid = false;
-               } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-                       valid = false;
-               } else if (vnode->cb_expires_at - 10 <= now) {
-                       valid = false;
-               } else {
+       do {
+               read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+               cb_v_break = READ_ONCE(volume->cb_v_break);
+               cb_break = vnode->cb_break;
+
+               if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+                       cbi = rcu_dereference(vnode->cb_interest);
+                       server = rcu_dereference(cbi->server);
+                       cb_s_break = READ_ONCE(server->cb_s_break);
+
+                       if (vnode->cb_s_break != cb_s_break ||
+                           vnode->cb_v_break != cb_v_break) {
+                               vnode->cb_s_break = cb_s_break;
+                               vnode->cb_v_break = cb_v_break;
+                               valid = false;
+                       } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+                               valid = false;
+                       } else if (vnode->cb_expires_at - 10 <= now) {
+                               valid = false;
+                       } else {
+                               valid = true;
+                       }
+               } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
                        valid = true;
+               } else {
+                       vnode->cb_v_break = cb_v_break;
+                       valid = false;
                }
-       } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-               valid = true;
-       } else {
-               vnode->cb_v_break = vnode->volume->cb_v_break;
-               valid = false;
-       }
 
-       read_sequnlock_excl(&vnode->cb_lock);
+       } while (need_seqretry(&vnode->cb_lock, seq));
+
+       done_seqretry(&vnode->cb_lock, seq);
        return valid;
 }
 
@@ -616,7 +628,9 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
               vnode->fid.vid, vnode->fid.vnode, vnode->flags,
               key_serial(key));
 
+       rcu_read_lock();
        valid = afs_check_validity(vnode);
+       rcu_read_unlock();
 
        if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
                clear_nlink(&vnode->vfs_inode);
@@ -703,6 +717,7 @@ int afs_drop_inode(struct inode *inode)
  */
 void afs_evict_inode(struct inode *inode)
 {
+       struct afs_cb_interest *cbi;
        struct afs_vnode *vnode;
 
        vnode = AFS_FS_I(inode);
@@ -719,10 +734,14 @@ void afs_evict_inode(struct inode *inode)
        truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
 
-       if (vnode->cb_interest) {
-               afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
-               vnode->cb_interest = NULL;
+       write_seqlock(&vnode->cb_lock);
+       cbi = rcu_dereference_protected(vnode->cb_interest,
+                                       lockdep_is_held(&vnode->cb_lock.lock));
+       if (cbi) {
+               afs_put_cb_interest(afs_i2net(inode), cbi);
+               rcu_assign_pointer(vnode->cb_interest, NULL);
        }
+       write_sequnlock(&vnode->cb_lock);
 
        while (!list_empty(&vnode->wb_keys)) {
                struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
index 54688f6..3dbb1e8 100644 (file)
@@ -554,7 +554,10 @@ struct afs_server {
 struct afs_vol_interest {
        struct hlist_node       srv_link;       /* Link in server->cb_volumes */
        struct hlist_head       cb_interests;   /* List of callback interests on the server */
-       afs_volid_t             vid;            /* Volume ID to match */
+       union {
+               struct rcu_head rcu;
+               afs_volid_t     vid;            /* Volume ID to match */
+       };
        unsigned int            usage;
 };
 
@@ -566,7 +569,10 @@ struct afs_cb_interest {
        struct afs_vol_interest *vol_interest;
        struct afs_server       *server;        /* Server on which this interest resides */
        struct super_block      *sb;            /* Superblock on which inodes reside */
-       afs_volid_t             vid;            /* Volume ID to match */
+       union {
+               struct rcu_head rcu;
+               afs_volid_t     vid;            /* Volume ID to match */
+       };
        refcount_t              usage;
 };
 
@@ -676,7 +682,7 @@ struct afs_vnode {
        afs_lock_type_t         lock_type : 8;
 
        /* outstanding callback notification on this file */
-       struct afs_cb_interest  *cb_interest;   /* Server on which this resides */
+       struct afs_cb_interest __rcu *cb_interest; /* Server on which this resides */
        unsigned int            cb_s_break;     /* Mass break counter on ->server */
        unsigned int            cb_v_break;     /* Mass break counter on ->volume */
        unsigned int            cb_break;       /* Break counter on vnode */
index 52f3a99..b00c739 100644 (file)
@@ -66,7 +66,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
        fc->untried = (1UL << fc->server_list->nr_servers) - 1;
        fc->index = READ_ONCE(fc->server_list->preferred);
 
-       cbi = vnode->cb_interest;
+       cbi = rcu_dereference_protected(vnode->cb_interest,
+                                       lockdep_is_held(&vnode->io_lock));
        if (cbi) {
                /* See if the vnode's preferred record is still available */
                for (i = 0; i < fc->server_list->nr_servers; i++) {
@@ -87,8 +88,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
 
                /* Note that the callback promise is effectively broken */
                write_seqlock(&vnode->cb_lock);
-               ASSERTCMP(cbi, ==, vnode->cb_interest);
-               vnode->cb_interest = NULL;
+               ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest));
+               rcu_assign_pointer(vnode->cb_interest, NULL);
                if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
                        vnode->cb_break++;
                write_sequnlock(&vnode->cb_lock);
@@ -417,7 +418,9 @@ selected_server:
        if (error < 0)
                goto failed_set_error;
 
-       fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+       fc->cbi = afs_get_cb_interest(
+               rcu_dereference_protected(vnode->cb_interest,
+                                         lockdep_is_held(&vnode->io_lock)));
 
        read_lock(&server->fs_lock);
        alist = rcu_dereference_protected(server->addresses,
@@ -487,12 +490,15 @@ failed:
 bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 {
        struct afs_vnode *vnode = fc->vnode;
-       struct afs_cb_interest *cbi = vnode->cb_interest;
+       struct afs_cb_interest *cbi;
        struct afs_addr_list *alist;
        int error = fc->ac.error;
 
        _enter("");
 
+       cbi = rcu_dereference_protected(vnode->cb_interest,
+                                       lockdep_is_held(&vnode->io_lock));
+
        switch (error) {
        case SHRT_MAX:
                if (!cbi) {
@@ -501,7 +507,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
                        return false;
                }
 
-               fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+               fc->cbi = afs_get_cb_interest(cbi);
 
                read_lock(&cbi->server->fs_lock);
                alist = rcu_dereference_protected(cbi->server->addresses,
index 857f09d..5d8ece9 100644 (file)
@@ -146,7 +146,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                                }
 
                                if (afs_cb_is_broken(cb_break, vnode,
-                                                    vnode->cb_interest)) {
+                                                    rcu_dereference(vnode->cb_interest))) {
                                        changed = true;
                                        break;
                                }
@@ -176,7 +176,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                }
        }
 
-       if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest))
+       if (afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)))
                goto someone_else_changed_it;
 
        /* We need a ref on any permits list we want to copy as we'll have to
@@ -253,14 +253,16 @@ found:
 
        kfree(new);
 
+       rcu_read_lock();
        spin_lock(&vnode->lock);
        zap = rcu_access_pointer(vnode->permit_cache);
-       if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) &&
+       if (!afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)) &&
            zap == permits)
                rcu_assign_pointer(vnode->permit_cache, replacement);
        else
                zap = replacement;
        spin_unlock(&vnode->lock);
+       rcu_read_unlock();
        afs_put_permits(zap);
 out_put:
        afs_put_permits(permits);
index a81c235..f76473a 100644 (file)
@@ -677,7 +677,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
        vnode->volume           = NULL;
        vnode->lock_key         = NULL;
        vnode->permit_cache     = NULL;
-       vnode->cb_interest      = NULL;
+       RCU_INIT_POINTER(vnode->cb_interest, NULL);
 #ifdef CONFIG_AFS_FSCACHE
        vnode->cache            = NULL;
 #endif
@@ -707,7 +707,7 @@ static void afs_destroy_inode(struct inode *inode)
 
        _debug("DESTROY INODE %p", inode);
 
-       ASSERTCMP(vnode->cb_interest, ==, NULL);
+       ASSERTCMP(rcu_access_pointer(vnode->cb_interest), ==, NULL);
 
        atomic_dec(&afs_count_active_inodes);
 }