NFSv4/pNFS: Do layout state recovery upon reboot
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Thu, 13 Jun 2024 05:00:55 +0000 (01:00 -0400)
committerAnna Schumaker <Anna.Schumaker@Netapp.com>
Mon, 8 Jul 2024 17:47:26 +0000 (13:47 -0400)
Some pNFS implementations, such as flexible files, want the client to
send the layout stats and layout errors that may have incurred while the
metadata server was booting. To do so, the client sends a layoutreturn
with an all-zero stateid while the server is in grace during reboot
recovery.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/nfs4state.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
include/linux/nfs_fs_sb.h

index 24188af..39ba9f4 100644 (file)
@@ -2548,7 +2548,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server,
                const struct nfs_fh *dummy)
 {
 #if IS_ENABLED(CONFIG_NFS_V4_2)
-       server->caps |= NFS_CAP_LAYOUTSTATS;
+       server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN;
 #endif
        return 0;
 }
index 5b45241..877f682 100644 (file)
@@ -1863,6 +1863,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
 
        if (!nfs4_state_clear_reclaim_reboot(clp))
                return;
+       pnfs_destroy_all_layouts(clp);
        ops = clp->cl_mvops->reboot_recovery_ops;
        cred = nfs4_get_clid_cred(clp);
        err = nfs4_reclaim_complete(clp, ops, cred);
@@ -2068,7 +2069,6 @@ static int nfs4_establish_lease(struct nfs_client *clp)
        put_cred(cred);
        if (status != 0)
                return status;
-       pnfs_destroy_all_layouts(clp);
        return 0;
 }
 
@@ -2680,6 +2680,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
                        section = "reclaim reboot";
                        status = nfs4_do_reclaim(clp,
                                clp->cl_mvops->reboot_recovery_ops);
+                       if (status == 0)
+                               status = pnfs_layout_handle_reboot(clp);
                        if (status == -EAGAIN)
                                continue;
                        if (status < 0)
index 31df5fa..aa69848 100644 (file)
@@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
                u32 seq);
 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
                                struct list_head *tmp_list);
+static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo);
 
 /* Return the registered pnfs layout driver module matching given id */
 static struct pnfs_layoutdriver_type *
@@ -937,25 +938,37 @@ restart:
        return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
 }
 
-int pnfs_layout_destroy_byclid(struct nfs_client *clp,
-                              enum pnfs_layout_destroy_mode mode)
+static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp,
+                                                   struct list_head *list)
 {
        struct nfs_server *server;
-       LIST_HEAD(layout_list);
 
        spin_lock(&clp->cl_lock);
        rcu_read_lock();
 restart:
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
-               if (pnfs_layout_bulk_destroy_byserver_locked(clp,
-                                       server,
-                                       &layout_list) != 0)
+               if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
+                                                            list) != 0)
                        goto restart;
        }
        rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
+}
 
-       return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
+static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp,
+                                        struct list_head *list,
+                                        enum pnfs_layout_destroy_mode mode)
+{
+       pnfs_layout_build_destroy_list_byclient(clp, list);
+       return pnfs_layout_free_bulk_destroy_list(list, mode);
+}
+
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+                              enum pnfs_layout_destroy_mode mode)
+{
+       LIST_HEAD(layout_list);
+
+       return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode);
 }
 
 /*
@@ -971,6 +984,67 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
        pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE);
 }
 
+static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp,
+                                                   struct list_head *list)
+{
+       struct nfs_server *server;
+
+       spin_lock(&clp->cl_lock);
+       rcu_read_lock();
+restart:
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+               if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN))
+                       continue;
+               if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
+                                                            list) != 0)
+                       goto restart;
+       }
+       rcu_read_unlock();
+       spin_unlock(&clp->cl_lock);
+}
+
+static int pnfs_layout_bulk_list_reboot(struct list_head *list)
+{
+       struct pnfs_layout_hdr *lo;
+       struct nfs_server *server;
+       int ret;
+
+       list_for_each_entry(lo, list, plh_bulk_destroy) {
+               server = NFS_SERVER(lo->plh_inode);
+               ret = pnfs_layout_return_on_reboot(lo);
+               switch (ret) {
+               case 0:
+                       continue;
+               case -NFS4ERR_BAD_STATEID:
+                       server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN;
+                       break;
+               case -NFS4ERR_NO_GRACE:
+                       break;
+               default:
+                       goto err;
+               }
+               break;
+       }
+       return 0;
+err:
+       return ret;
+}
+
+int pnfs_layout_handle_reboot(struct nfs_client *clp)
+{
+       LIST_HEAD(list);
+       int ret = 0, ret2;
+
+       pnfs_layout_build_recover_list_byclient(clp, &list);
+       if (!list_empty(&list))
+               ret = pnfs_layout_bulk_list_reboot(&list);
+       ret2 = pnfs_layout_do_destroy_byclid(clp, &list,
+                                            PNFS_LAYOUT_INVALIDATE);
+       if (!ret)
+               ret = ret2;
+       return (ret == 0) ?  0 : -EAGAIN;
+}
+
 static void
 pnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred)
 {
@@ -1445,6 +1519,24 @@ pnfs_commit_and_return_layout(struct inode *inode)
        return ret;
 }
 
+static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo)
+{
+       struct inode *inode = lo->plh_inode;
+       const struct cred *cred;
+
+       spin_lock(&inode->i_lock);
+       if (!pnfs_layout_is_valid(lo)) {
+               spin_unlock(&inode->i_lock);
+               return 0;
+       }
+       cred = get_cred(lo->plh_lc_cred);
+       pnfs_get_layout_hdr(lo);
+       spin_unlock(&inode->i_lock);
+
+       return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY,
+                                     PNFS_FL_LAYOUTRETURN_PRIVILEGED);
+}
+
 bool pnfs_roc(struct inode *ino,
                struct nfs4_layoutreturn_args *args,
                struct nfs4_layoutreturn_res *res,
index d192feb..bb5142b 100644 (file)
@@ -356,6 +356,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
                                       struct pnfs_layout_segment *lseg);
 void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
                                      enum pnfs_iomode iomode);
+int pnfs_layout_handle_reboot(struct nfs_client *clp);
 
 /* nfs4_deviceid_flags */
 enum {
@@ -737,6 +738,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
 {
 }
 
+static inline int pnfs_layout_handle_reboot(struct nfs_client *clp)
+{
+       return 0;
+}
+
 static inline struct pnfs_layout_segment *
 pnfs_get_lseg(struct pnfs_layout_segment *lseg)
 {
index fe5b1a8..ba9df18 100644 (file)
@@ -278,6 +278,7 @@ struct nfs_server {
 #define NFS_CAP_LGOPEN         (1U << 5)
 #define NFS_CAP_CASE_INSENSITIVE       (1U << 6)
 #define NFS_CAP_CASE_PRESERVING        (1U << 7)
+#define NFS_CAP_REBOOT_LAYOUTRETURN    (1U << 8)
 #define NFS_CAP_OPEN_XOR       (1U << 12)
 #define NFS_CAP_DELEGTIME      (1U << 13)
 #define NFS_CAP_POSIX_LOCK     (1U << 14)