smb: During unmount, ensure all cached dir instances drop their dentry
authorPaul Aurich <paul@darkrain42.org>
Mon, 18 Nov 2024 21:50:28 +0000 (13:50 -0800)
committerSteve French <stfrench@microsoft.com>
Wed, 27 Nov 2024 00:47:08 +0000 (18:47 -0600)
The unmount process (cifs_kill_sb() calling close_all_cached_dirs()) can
race with various cached directory operations, which ultimately results
in dentries not being dropped and these kernel BUGs:

BUG: Dentry ffff88814f37e358{i=1000000000080,n=/}  still in use (2) [unmount of cifs cifs]
VFS: Busy inodes after unmount of cifs (cifs)
------------[ cut here ]------------
kernel BUG at fs/super.c:661!

This happens when a cfid is in the process of being cleaned up when, and
has been removed from the cfids->entries list, including:

- Receiving a lease break from the server
- Server reconnection triggers invalidate_all_cached_dirs(), which
  removes all the cfids from the list
- The laundromat thread decides to expire an old cfid.

To solve these problems, dropping the dentry is done in queued work done
in a newly-added cfid_put_wq workqueue, and close_all_cached_dirs()
flushes that workqueue after it drops all the dentries of which it's
aware. This is a global workqueue (rather than scoped to a mount), but
the queued work is minimal.

The final cleanup work for cleaning up a cfid is performed via work
queued in the serverclose_wq workqueue; this is done separate from
dropping the dentries so that close_all_cached_dirs() doesn't block on
any server operations.

Both of these queued works expect to invoked with a cfid reference and
a tcon reference to avoid those objects from being freed while the work
is ongoing.

While we're here, add proper locking to close_all_cached_dirs(), and
locking around the freeing of cfid->dentry.

Fixes: ebe98f1447bb ("cifs: enable caching of directories for which a lease is held")
Cc: stable@vger.kernel.org
Signed-off-by: Paul Aurich <paul@darkrain42.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
fs/smb/client/cached_dir.c
fs/smb/client/cached_dir.h
fs/smb/client/cifsfs.c
fs/smb/client/cifsglob.h
fs/smb/client/inode.c
fs/smb/client/trace.h

index 81b92d2..d9e1d1d 100644 (file)
@@ -17,6 +17,11 @@ static void free_cached_dir(struct cached_fid *cfid);
 static void smb2_close_cached_fid(struct kref *ref);
 static void cfids_laundromat_worker(struct work_struct *work);
 
+struct cached_dir_dentry {
+       struct list_head entry;
+       struct dentry *dentry;
+};
+
 static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
                                                    const char *path,
                                                    bool lookup_only,
@@ -472,7 +477,10 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb)
        struct cifs_tcon *tcon;
        struct tcon_link *tlink;
        struct cached_fids *cfids;
+       struct cached_dir_dentry *tmp_list, *q;
+       LIST_HEAD(entry);
 
+       spin_lock(&cifs_sb->tlink_tree_lock);
        for (node = rb_first(root); node; node = rb_next(node)) {
                tlink = rb_entry(node, struct tcon_link, tl_rbnode);
                tcon = tlink_tcon(tlink);
@@ -481,11 +489,30 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb)
                cfids = tcon->cfids;
                if (cfids == NULL)
                        continue;
+               spin_lock(&cfids->cfid_list_lock);
                list_for_each_entry(cfid, &cfids->entries, entry) {
-                       dput(cfid->dentry);
+                       tmp_list = kmalloc(sizeof(*tmp_list), GFP_ATOMIC);
+                       if (tmp_list == NULL)
+                               break;
+                       spin_lock(&cfid->fid_lock);
+                       tmp_list->dentry = cfid->dentry;
                        cfid->dentry = NULL;
+                       spin_unlock(&cfid->fid_lock);
+
+                       list_add_tail(&tmp_list->entry, &entry);
                }
+               spin_unlock(&cfids->cfid_list_lock);
+       }
+       spin_unlock(&cifs_sb->tlink_tree_lock);
+
+       list_for_each_entry_safe(tmp_list, q, &entry, entry) {
+               list_del(&tmp_list->entry);
+               dput(tmp_list->dentry);
+               kfree(tmp_list);
        }
+
+       /* Flush any pending work that will drop dentries */
+       flush_workqueue(cfid_put_wq);
 }
 
 /*
@@ -496,14 +523,18 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
 {
        struct cached_fids *cfids = tcon->cfids;
        struct cached_fid *cfid, *q;
-       LIST_HEAD(entry);
 
        if (cfids == NULL)
                return;
 
+       /*
+        * Mark all the cfids as closed, and move them to the cfids->dying list.
+        * They'll be cleaned up later by cfids_invalidation_worker. Take
+        * a reference to each cfid during this process.
+        */
        spin_lock(&cfids->cfid_list_lock);
        list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
-               list_move(&cfid->entry, &entry);
+               list_move(&cfid->entry, &cfids->dying);
                cfids->num_entries--;
                cfid->is_open = false;
                cfid->on_list = false;
@@ -516,26 +547,47 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
                } else
                        kref_get(&cfid->refcount);
        }
+       /*
+        * Queue dropping of the dentries once locks have been dropped
+        */
+       if (!list_empty(&cfids->dying))
+               queue_work(cfid_put_wq, &cfids->invalidation_work);
        spin_unlock(&cfids->cfid_list_lock);
-
-       list_for_each_entry_safe(cfid, q, &entry, entry) {
-               list_del(&cfid->entry);
-               cancel_work_sync(&cfid->lease_break);
-               /*
-                * Drop the ref-count from above, either the lease-ref (if there
-                * was one) or the extra one acquired.
-                */
-               kref_put(&cfid->refcount, smb2_close_cached_fid);
-       }
 }
 
 static void
-smb2_cached_lease_break(struct work_struct *work)
+cached_dir_offload_close(struct work_struct *work)
 {
        struct cached_fid *cfid = container_of(work,
-                               struct cached_fid, lease_break);
+                               struct cached_fid, close_work);
+       struct cifs_tcon *tcon = cfid->tcon;
+
+       WARN_ON(cfid->on_list);
 
        kref_put(&cfid->refcount, smb2_close_cached_fid);
+       cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close);
+}
+
+/*
+ * Release the cached directory's dentry, and then queue work to drop cached
+ * directory itself (closing on server if needed).
+ *
+ * Must be called with a reference to the cached_fid and a reference to the
+ * tcon.
+ */
+static void cached_dir_put_work(struct work_struct *work)
+{
+       struct cached_fid *cfid = container_of(work, struct cached_fid,
+                                              put_work);
+       struct dentry *dentry;
+
+       spin_lock(&cfid->fid_lock);
+       dentry = cfid->dentry;
+       cfid->dentry = NULL;
+       spin_unlock(&cfid->fid_lock);
+
+       dput(dentry);
+       queue_work(serverclose_wq, &cfid->close_work);
 }
 
 int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
@@ -562,8 +614,10 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
                        cfid->on_list = false;
                        cfids->num_entries--;
 
-                       queue_work(cifsiod_wq,
-                                  &cfid->lease_break);
+                       ++tcon->tc_count;
+                       trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+                                           netfs_trace_tcon_ref_get_cached_lease_break);
+                       queue_work(cfid_put_wq, &cfid->put_work);
                        spin_unlock(&cfids->cfid_list_lock);
                        return true;
                }
@@ -585,7 +639,8 @@ static struct cached_fid *init_cached_dir(const char *path)
                return NULL;
        }
 
-       INIT_WORK(&cfid->lease_break, smb2_cached_lease_break);
+       INIT_WORK(&cfid->close_work, cached_dir_offload_close);
+       INIT_WORK(&cfid->put_work, cached_dir_put_work);
        INIT_LIST_HEAD(&cfid->entry);
        INIT_LIST_HEAD(&cfid->dirents.entries);
        mutex_init(&cfid->dirents.de_mutex);
@@ -598,6 +653,9 @@ static void free_cached_dir(struct cached_fid *cfid)
 {
        struct cached_dirent *dirent, *q;
 
+       WARN_ON(work_pending(&cfid->close_work));
+       WARN_ON(work_pending(&cfid->put_work));
+
        dput(cfid->dentry);
        cfid->dentry = NULL;
 
@@ -615,10 +673,30 @@ static void free_cached_dir(struct cached_fid *cfid)
        kfree(cfid);
 }
 
+static void cfids_invalidation_worker(struct work_struct *work)
+{
+       struct cached_fids *cfids = container_of(work, struct cached_fids,
+                                                invalidation_work);
+       struct cached_fid *cfid, *q;
+       LIST_HEAD(entry);
+
+       spin_lock(&cfids->cfid_list_lock);
+       /* move cfids->dying to the local list */
+       list_cut_before(&entry, &cfids->dying, &cfids->dying);
+       spin_unlock(&cfids->cfid_list_lock);
+
+       list_for_each_entry_safe(cfid, q, &entry, entry) {
+               list_del(&cfid->entry);
+               /* Drop the ref-count acquired in invalidate_all_cached_dirs */
+               kref_put(&cfid->refcount, smb2_close_cached_fid);
+       }
+}
+
 static void cfids_laundromat_worker(struct work_struct *work)
 {
        struct cached_fids *cfids;
        struct cached_fid *cfid, *q;
+       struct dentry *dentry;
        LIST_HEAD(entry);
 
        cfids = container_of(work, struct cached_fids, laundromat_work.work);
@@ -644,18 +722,28 @@ static void cfids_laundromat_worker(struct work_struct *work)
 
        list_for_each_entry_safe(cfid, q, &entry, entry) {
                list_del(&cfid->entry);
-               /*
-                * Cancel and wait for the work to finish in case we are racing
-                * with it.
-                */
-               cancel_work_sync(&cfid->lease_break);
-               /*
-                * Drop the ref-count from above, either the lease-ref (if there
-                * was one) or the extra one acquired.
-                */
-               kref_put(&cfid->refcount, smb2_close_cached_fid);
+
+               spin_lock(&cfid->fid_lock);
+               dentry = cfid->dentry;
+               cfid->dentry = NULL;
+               spin_unlock(&cfid->fid_lock);
+
+               dput(dentry);
+               if (cfid->is_open) {
+                       spin_lock(&cifs_tcp_ses_lock);
+                       ++cfid->tcon->tc_count;
+                       trace_smb3_tcon_ref(cfid->tcon->debug_id, cfid->tcon->tc_count,
+                                           netfs_trace_tcon_ref_get_cached_laundromat);
+                       spin_unlock(&cifs_tcp_ses_lock);
+                       queue_work(serverclose_wq, &cfid->close_work);
+               } else
+                       /*
+                        * Drop the ref-count from above, either the lease-ref (if there
+                        * was one) or the extra one acquired.
+                        */
+                       kref_put(&cfid->refcount, smb2_close_cached_fid);
        }
-       queue_delayed_work(cifsiod_wq, &cfids->laundromat_work,
+       queue_delayed_work(cfid_put_wq, &cfids->laundromat_work,
                           dir_cache_timeout * HZ);
 }
 
@@ -668,9 +756,11 @@ struct cached_fids *init_cached_dirs(void)
                return NULL;
        spin_lock_init(&cfids->cfid_list_lock);
        INIT_LIST_HEAD(&cfids->entries);
+       INIT_LIST_HEAD(&cfids->dying);
 
+       INIT_WORK(&cfids->invalidation_work, cfids_invalidation_worker);
        INIT_DELAYED_WORK(&cfids->laundromat_work, cfids_laundromat_worker);
-       queue_delayed_work(cifsiod_wq, &cfids->laundromat_work,
+       queue_delayed_work(cfid_put_wq, &cfids->laundromat_work,
                           dir_cache_timeout * HZ);
 
        return cfids;
@@ -689,6 +779,7 @@ void free_cached_dirs(struct cached_fids *cfids)
                return;
 
        cancel_delayed_work_sync(&cfids->laundromat_work);
+       cancel_work_sync(&cfids->invalidation_work);
 
        spin_lock(&cfids->cfid_list_lock);
        list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
@@ -696,6 +787,11 @@ void free_cached_dirs(struct cached_fids *cfids)
                cfid->is_open = false;
                list_move(&cfid->entry, &entry);
        }
+       list_for_each_entry_safe(cfid, q, &cfids->dying, entry) {
+               cfid->on_list = false;
+               cfid->is_open = false;
+               list_move(&cfid->entry, &entry);
+       }
        spin_unlock(&cfids->cfid_list_lock);
 
        list_for_each_entry_safe(cfid, q, &entry, entry) {
index 81ba0fd..1dfe79d 100644 (file)
@@ -44,7 +44,8 @@ struct cached_fid {
        spinlock_t fid_lock;
        struct cifs_tcon *tcon;
        struct dentry *dentry;
-       struct work_struct lease_break;
+       struct work_struct put_work;
+       struct work_struct close_work;
        struct smb2_file_all_info file_all_info;
        struct cached_dirents dirents;
 };
@@ -53,10 +54,13 @@ struct cached_fid {
 struct cached_fids {
        /* Must be held when:
         * - accessing the cfids->entries list
+        * - accessing the cfids->dying list
         */
        spinlock_t cfid_list_lock;
        int num_entries;
        struct list_head entries;
+       struct list_head dying;
+       struct work_struct invalidation_work;
        struct delayed_work laundromat_work;
 };
 
index 9798534..c9f9b6e 100644 (file)
@@ -157,6 +157,7 @@ struct workqueue_struct     *fileinfo_put_wq;
 struct workqueue_struct        *cifsoplockd_wq;
 struct workqueue_struct        *deferredclose_wq;
 struct workqueue_struct        *serverclose_wq;
+struct workqueue_struct        *cfid_put_wq;
 __u32 cifs_lock_secret;
 
 /*
@@ -1920,9 +1921,16 @@ init_cifs(void)
                goto out_destroy_deferredclose_wq;
        }
 
+       cfid_put_wq = alloc_workqueue("cfid_put_wq",
+                                     WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+       if (!cfid_put_wq) {
+               rc = -ENOMEM;
+               goto out_destroy_serverclose_wq;
+       }
+
        rc = cifs_init_inodecache();
        if (rc)
-               goto out_destroy_serverclose_wq;
+               goto out_destroy_cfid_put_wq;
 
        rc = cifs_init_netfs();
        if (rc)
@@ -1990,6 +1998,8 @@ out_destroy_netfs:
        cifs_destroy_netfs();
 out_destroy_inodecache:
        cifs_destroy_inodecache();
+out_destroy_cfid_put_wq:
+       destroy_workqueue(cfid_put_wq);
 out_destroy_serverclose_wq:
        destroy_workqueue(serverclose_wq);
 out_destroy_deferredclose_wq:
index e97e6df..6e63abe 100644 (file)
@@ -1991,7 +1991,7 @@ require use of the stronger protocol */
  * cifsInodeInfo->lock_sem     cifsInodeInfo->llist            cifs_init_once
  *                             ->can_cache_brlcks
  * cifsInodeInfo->deferred_lock        cifsInodeInfo->deferred_closes  cifsInodeInfo_alloc
- * cached_fid->fid_mutex               cifs_tcon->crfid                tcon_info_alloc
+ * cached_fids->cfid_list_lock cifs_tcon->cfids->entries        init_cached_dirs
  * cifsFileInfo->fh_mutex              cifsFileInfo                    cifs_new_fileinfo
  * cifsFileInfo->file_info_lock        cifsFileInfo->count             cifs_new_fileinfo
  *                             ->invalidHandle                 initiate_cifs_search
@@ -2079,6 +2079,7 @@ extern struct workqueue_struct *fileinfo_put_wq;
 extern struct workqueue_struct *cifsoplockd_wq;
 extern struct workqueue_struct *deferredclose_wq;
 extern struct workqueue_struct *serverclose_wq;
+extern struct workqueue_struct *cfid_put_wq;
 extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_sm_req_poolp;
index befe434..42c0306 100644 (file)
@@ -2496,13 +2496,10 @@ cifs_dentry_needs_reval(struct dentry *dentry)
                return true;
 
        if (!open_cached_dir_by_dentry(tcon, dentry->d_parent, &cfid)) {
-               spin_lock(&cfid->fid_lock);
                if (cfid->time && cifs_i->time > cfid->time) {
-                       spin_unlock(&cfid->fid_lock);
                        close_cached_dir(cfid);
                        return false;
                }
-               spin_unlock(&cfid->fid_lock);
                close_cached_dir(cfid);
        }
        /*
index 0b52d22..12cbd34 100644 (file)
@@ -44,6 +44,8 @@
        EM(netfs_trace_tcon_ref_free_ipc,               "FRE Ipc   ") \
        EM(netfs_trace_tcon_ref_free_ipc_fail,          "FRE Ipc-F ") \
        EM(netfs_trace_tcon_ref_free_reconnect_server,  "FRE Reconn") \
+       EM(netfs_trace_tcon_ref_get_cached_laundromat,  "GET Ch-Lau") \
+       EM(netfs_trace_tcon_ref_get_cached_lease_break, "GET Ch-Lea") \
        EM(netfs_trace_tcon_ref_get_cancelled_close,    "GET Cn-Cls") \
        EM(netfs_trace_tcon_ref_get_dfs_refer,          "GET DfsRef") \
        EM(netfs_trace_tcon_ref_get_find,               "GET Find  ") \
@@ -52,6 +54,7 @@
        EM(netfs_trace_tcon_ref_new,                    "NEW       ") \
        EM(netfs_trace_tcon_ref_new_ipc,                "NEW Ipc   ") \
        EM(netfs_trace_tcon_ref_new_reconnect_server,   "NEW Reconn") \
+       EM(netfs_trace_tcon_ref_put_cached_close,       "PUT Ch-Cls") \
        EM(netfs_trace_tcon_ref_put_cancelled_close,    "PUT Cn-Cls") \
        EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \
        EM(netfs_trace_tcon_ref_put_cancelled_mid,      "PUT Cn-Mid") \