Merge tag 'nfs-for-5.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jul 2021 16:43:57 +0000 (09:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jul 2021 16:43:57 +0000 (09:43 -0700)
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Features:

   - Multiple patches to add support for fcntl() leases over NFSv4.

   - A sysfs interface to display more information about the various
     transport connections used by the RPC client

   - A sysfs interface to allow a suitably privileged user to offline a
     transport that may no longer point to a valid server

   - A sysfs interface to allow a suitably privileged user to change the
     server IP address used by the RPC client

  Stable fixes:

   - Two sunrpc fixes for deadlocks involving privileged rpc_wait_queues

  Bugfixes:

   - SUNRPC: Avoid a KASAN slab-out-of-bounds bug in xdr_set_page_base()

   - SUNRPC: prevent port reuse on transports which don't request it.

   - NFSv3: Fix memory leak in posix_acl_create()

   - NFS: Various fixes to attribute revalidation timeouts

   - NFSv4: Fix handling of non-atomic change attribute updates

   - NFSv4: If a server is down, don't cause mounts to other servers to
     hang as well

   - pNFS: Fix an Oops in pnfs_mark_request_commit() when doing O_DIRECT

   - NFS: Fix mount failures due to incorrect setting of the
     has_sec_mnt_opts filesystem flag

   - NFS: Ensure nfs_readpage returns promptly when an internal error
     occurs

   - NFS: Fix fscache read from NFS after cache error

   - pNFS: Various bugfixes around the LAYOUTGET operation"

* tag 'nfs-for-5.14-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (46 commits)
  NFSv4/pNFS: Return an error if _nfs4_pnfs_v3_ds_connect can't load NFSv3
  NFSv4/pNFS: Don't call _nfs4_pnfs_v3_ds_connect multiple times
  NFSv4/pnfs: Clean up layout get on open
  NFSv4/pnfs: Fix layoutget behaviour after invalidation
  NFSv4/pnfs: Fix the layout barrier update
  NFS: Fix fscache read from NFS after cache error
  NFS: Ensure nfs_readpage returns promptly when internal error occurs
  sunrpc: remove an offlined xprt using sysfs
  sunrpc: provide showing transport's state info in the sysfs directory
  sunrpc: display xprt's queuelen of assigned tasks via sysfs
  sunrpc: provide multipath info in the sysfs directory
  NFSv4.1 identify and mark RPC tasks that can move between transports
  sunrpc: provide transport info in the sysfs directory
  SUNRPC: take a xprt offline using sysfs
  sunrpc: add dst_attr attributes to the sysfs xprt directory
  SUNRPC for TCP display xprt's source port in sysfs xprt_info
  SUNRPC query transport's source port
  SUNRPC display xprt's main value in sysfs's xprt_info
  SUNRPC mark the first transport
  sunrpc: add add sysfs directory per xprt under each xprt_switch
  ...

34 files changed:
fs/nfs/delegation.c
fs/nfs/delegation.h
fs/nfs/direct.c
fs/nfs/fscache.c
fs/nfs/getroot.c
fs/nfs/inode.c
fs/nfs/nfs3proc.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4client.c
fs/nfs/nfs4file.c
fs/nfs/nfs4proc.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs_nfs.c
fs/nfs/read.c
fs/nfs/write.c
include/linux/nfs_fs.h
include/linux/nfs_xdr.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/xprt.h
include/linux/sunrpc/xprtmultipath.h
include/linux/sunrpc/xprtsock.h
net/sunrpc/Makefile
net/sunrpc/clnt.c
net/sunrpc/sched.c
net/sunrpc/sunrpc_syms.c
net/sunrpc/sysfs.c [new file with mode: 0644]
net/sunrpc/sysfs.h [new file with mode: 0644]
net/sunrpc/xdr.c
net/sunrpc/xprt.c
net/sunrpc/xprtmultipath.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtsock.c

index e6ec6f0..1111839 100644 (file)
@@ -75,6 +75,13 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
        set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
 }
 
+static void nfs_mark_return_delegation(struct nfs_server *server,
+                                      struct nfs_delegation *delegation)
+{
+       set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+       set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
+}
+
 static bool
 nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
                fmode_t flags)
@@ -293,6 +300,7 @@ nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
                goto out;
        spin_lock(&delegation->lock);
        if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+               clear_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
                /* Refcount matched in nfs_end_delegation_return() */
                ret = nfs_get_delegation(delegation);
        }
@@ -314,16 +322,17 @@ nfs_start_delegation_return(struct nfs_inode *nfsi)
        return delegation;
 }
 
-static void
-nfs_abort_delegation_return(struct nfs_delegation *delegation,
-               struct nfs_client *clp)
+static void nfs_abort_delegation_return(struct nfs_delegation *delegation,
+                                       struct nfs_client *clp, int err)
 {
 
        spin_lock(&delegation->lock);
        clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
-       set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
+       if (err == -EAGAIN) {
+               set_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags);
+               set_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state);
+       }
        spin_unlock(&delegation->lock);
-       set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
 }
 
 static struct nfs_delegation *
@@ -521,11 +530,18 @@ out:
 static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
 {
        struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+       unsigned int mode = O_WRONLY | O_RDWR;
        int err = 0;
 
        if (delegation == NULL)
                return 0;
-       do {
+
+       if (!issync)
+               mode |= O_NONBLOCK;
+       /* Recall of any remaining application leases */
+       err = break_lease(inode, mode);
+
+       while (err == 0) {
                if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
                        break;
                err = nfs_delegation_claim_opens(inode, &delegation->stateid,
@@ -536,10 +552,10 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
                 * Guard against state recovery
                 */
                err = nfs4_wait_clnt_recover(clp);
-       } while (err == 0);
+       }
 
        if (err) {
-               nfs_abort_delegation_return(delegation, clp);
+               nfs_abort_delegation_return(delegation, clp, err);
                goto out;
        }
 
@@ -568,6 +584,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
        if (ret)
                clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
        if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
+           test_bit(NFS_DELEGATION_RETURN_DELAYED, &delegation->flags) ||
            test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
                ret = false;
 
@@ -647,6 +664,38 @@ out:
        return err;
 }
 
+static bool nfs_server_clear_delayed_delegations(struct nfs_server *server)
+{
+       struct nfs_delegation *d;
+       bool ret = false;
+
+       list_for_each_entry_rcu (d, &server->delegations, super_list) {
+               if (!test_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags))
+                       continue;
+               nfs_mark_return_delegation(server, d);
+               clear_bit(NFS_DELEGATION_RETURN_DELAYED, &d->flags);
+               ret = true;
+       }
+       return ret;
+}
+
+static bool nfs_client_clear_delayed_delegations(struct nfs_client *clp)
+{
+       struct nfs_server *server;
+       bool ret = false;
+
+       if (!test_and_clear_bit(NFS4CLNT_DELEGRETURN_DELAYED, &clp->cl_state))
+               goto out;
+       rcu_read_lock();
+       list_for_each_entry_rcu (server, &clp->cl_superblocks, client_link) {
+               if (nfs_server_clear_delayed_delegations(server))
+                       ret = true;
+       }
+       rcu_read_unlock();
+out:
+       return ret;
+}
+
 /**
  * nfs_client_return_marked_delegations - return previously marked delegations
  * @clp: nfs_client to process
@@ -659,8 +708,14 @@ out:
  */
 int nfs_client_return_marked_delegations(struct nfs_client *clp)
 {
-       return nfs_client_for_each_server(clp,
-                       nfs_server_return_marked_delegations, NULL);
+       int err = nfs_client_for_each_server(
+               clp, nfs_server_return_marked_delegations, NULL);
+       if (err)
+               return err;
+       /* If a return was delayed, sleep to prevent hard looping */
+       if (nfs_client_clear_delayed_delegations(clp))
+               ssleep(1);
+       return 0;
 }
 
 /**
@@ -698,13 +753,14 @@ int nfs4_inode_return_delegation(struct inode *inode)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_delegation *delegation;
-       int err = 0;
 
-       nfs_wb_all(inode);
        delegation = nfs_start_delegation_return(nfsi);
+       /* Synchronous recall of any application leases */
+       break_lease(inode, O_WRONLY | O_RDWR);
+       nfs_wb_all(inode);
        if (delegation != NULL)
-               err = nfs_end_delegation_return(inode, delegation, 1);
-       return err;
+               return nfs_end_delegation_return(inode, delegation, 1);
+       return 0;
 }
 
 /**
@@ -775,13 +831,6 @@ static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
        set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
 }
 
-static void nfs_mark_return_delegation(struct nfs_server *server,
-               struct nfs_delegation *delegation)
-{
-       set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
-       set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
-}
-
 static bool nfs_server_mark_return_all_delegations(struct nfs_server *server)
 {
        struct nfs_delegation *delegation;
@@ -1010,6 +1059,9 @@ int nfs_async_inode_return_delegation(struct inode *inode,
        nfs_mark_return_delegation(server, delegation);
        rcu_read_unlock();
 
+       /* If there are any application leases or delegations, recall them */
+       break_lease(inode, O_WRONLY | O_RDWR | O_NONBLOCK);
+
        nfs_delegation_run_state_manager(clp);
        return 0;
 out_enoent:
index c19b4fd..1c37899 100644 (file)
@@ -36,6 +36,7 @@ enum {
        NFS_DELEGATION_REVOKED,
        NFS_DELEGATION_TEST_EXPIRED,
        NFS_DELEGATION_INODE_FREEING,
+       NFS_DELEGATION_RETURN_DELAYED,
 };
 
 int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
index 2d30a4d..2e894fe 100644 (file)
@@ -700,8 +700,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
        struct nfs_direct_req *dreq = hdr->dreq;
        struct nfs_commit_info cinfo;
-       bool request_commit = false;
        struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+       int flags = NFS_ODIRECT_DONE;
 
        nfs_init_cinfo_from_dreq(&cinfo, dreq);
 
@@ -713,15 +713,9 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 
        nfs_direct_count_bytes(dreq, hdr);
        if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) {
-               switch (dreq->flags) {
-               case 0:
+               if (!dreq->flags)
                        dreq->flags = NFS_ODIRECT_DO_COMMIT;
-                       request_commit = true;
-                       break;
-               case NFS_ODIRECT_RESCHED_WRITES:
-               case NFS_ODIRECT_DO_COMMIT:
-                       request_commit = true;
-               }
+               flags = dreq->flags;
        }
        spin_unlock(&dreq->lock);
 
@@ -729,12 +723,15 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 
                req = nfs_list_entry(hdr->pages.next);
                nfs_list_remove_request(req);
-               if (request_commit) {
+               if (flags == NFS_ODIRECT_DO_COMMIT) {
                        kref_get(&req->wb_kref);
                        memcpy(&req->wb_verf, &hdr->verf.verifier,
                               sizeof(req->wb_verf));
                        nfs_mark_request_commit(req, hdr->lseg, &cinfo,
                                hdr->ds_commit_idx);
+               } else if (flags == NFS_ODIRECT_RESCHED_WRITES) {
+                       kref_get(&req->wb_kref);
+                       nfs_mark_request_commit(req, NULL, &cinfo, 0);
                }
                nfs_unlock_and_release_request(req);
        }
index c4c021c..d743629 100644 (file)
@@ -385,12 +385,15 @@ static void nfs_readpage_from_fscache_complete(struct page *page,
                 "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n",
                 page, context, error);
 
-       /* if the read completes with an error, we just unlock the page and let
-        * the VM reissue the readpage */
-       if (!error) {
+       /*
+        * If the read completes with an error, mark the page with PG_checked,
+        * unlock the page, and let the VM reissue the readpage.
+        */
+       if (!error)
                SetPageUptodate(page);
-               unlock_page(page);
-       }
+       else
+               SetPageChecked(page);
+       unlock_page(page);
 }
 
 /*
@@ -405,6 +408,11 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
                 "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
                 nfs_i_fscache(inode), page, page->index, page->flags, inode);
 
+       if (PageChecked(page)) {
+               ClearPageChecked(page);
+               return 1;
+       }
+
        ret = fscache_read_or_alloc_page(nfs_i_fscache(inode),
                                         page,
                                         nfs_readpage_from_fscache_complete,
index aaeeb46..59355c1 100644 (file)
@@ -67,7 +67,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 int nfs_get_root(struct super_block *s, struct fs_context *fc)
 {
        struct nfs_fs_context *ctx = nfs_fc2context(fc);
-       struct nfs_server *server = NFS_SB(s);
+       struct nfs_server *server = NFS_SB(s), *clone_server;
        struct nfs_fsinfo fsinfo;
        struct dentry *root;
        struct inode *inode;
@@ -127,7 +127,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
        }
        spin_unlock(&root->d_lock);
        fc->root = root;
-       if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
+       if (server->caps & NFS_CAP_SECURITY_LABEL)
                kflags |= SECURITY_LSM_NATIVE_LABELS;
        if (ctx->clone_data.sb) {
                if (d_inode(fc->root)->i_fop != &nfs_dir_operations) {
@@ -137,15 +137,19 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
                /* clone lsm security options from the parent to the new sb */
                error = security_sb_clone_mnt_opts(ctx->clone_data.sb,
                                                   s, kflags, &kflags_out);
+               if (error)
+                       goto error_splat_root;
+               clone_server = NFS_SB(ctx->clone_data.sb);
+               server->has_sec_mnt_opts = clone_server->has_sec_mnt_opts;
        } else {
                error = security_sb_set_mnt_opts(s, fc->security,
                                                        kflags, &kflags_out);
        }
        if (error)
                goto error_splat_root;
-       if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
+       if (server->caps & NFS_CAP_SECURITY_LABEL &&
                !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
-               NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
+               server->caps &= ~NFS_CAP_SECURITY_LABEL;
 
        nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label);
        error = 0;
index 529c409..853213b 100644 (file)
@@ -1101,6 +1101,7 @@ EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
 void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
        filp->private_data = get_nfs_open_context(ctx);
+       set_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags);
        if (list_empty(&ctx->list))
                nfs_inode_attach_open_context(ctx);
 }
@@ -1120,6 +1121,8 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct
                        continue;
                if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
                        continue;
+               if (!test_bit(NFS_CONTEXT_FILE_OPEN, &pos->flags))
+                       continue;
                ctx = get_nfs_open_context(pos);
                if (ctx)
                        break;
@@ -1135,6 +1138,7 @@ void nfs_file_clear_open_context(struct file *filp)
        if (ctx) {
                struct inode *inode = d_inode(ctx->dentry);
 
+               clear_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags);
                /*
                 * We fatal error on write before. Try to writeback
                 * every page again.
@@ -2055,35 +2059,33 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                                        | NFS_INO_INVALID_OTHER;
                                if (S_ISDIR(inode->i_mode))
                                        nfs_force_lookup_revalidate(inode);
+                               attr_changed = true;
                                dprintk("NFS: change_attr change on server for file %s/%ld\n",
                                                inode->i_sb->s_id,
                                                inode->i_ino);
                        } else if (!have_delegation)
                                nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER;
                        inode_set_iversion_raw(inode, fattr->change_attr);
-                       attr_changed = true;
                }
        } else {
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_CHANGE;
-               cache_revalidated = false;
+               if (!have_delegation ||
+                   (nfsi->cache_validity & NFS_INO_INVALID_CHANGE) != 0)
+                       cache_revalidated = false;
        }
 
-       if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
+       if (fattr->valid & NFS_ATTR_FATTR_MTIME)
                inode->i_mtime = fattr->mtime;
-       } else if (fattr_supported & NFS_ATTR_FATTR_MTIME) {
+       else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_MTIME;
-               cache_revalidated = false;
-       }
 
-       if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
+       if (fattr->valid & NFS_ATTR_FATTR_CTIME)
                inode->i_ctime = fattr->ctime;
-       } else if (fattr_supported & NFS_ATTR_FATTR_CTIME) {
+       else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_CTIME;
-               cache_revalidated = false;
-       }
 
        /* Check if our cached file size is stale */
        if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -2096,7 +2098,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                                i_size_write(inode, new_isize);
                                if (!have_writers)
                                        invalid |= NFS_INO_INVALID_DATA;
-                               attr_changed = true;
                        }
                        dprintk("NFS: isize change on server for file %s/%ld "
                                        "(%Ld to %Ld)\n",
@@ -2111,19 +2112,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        fattr->du.nfs3.used = 0;
                        fattr->valid |= NFS_ATTR_FATTR_SPACE_USED;
                }
-       } else {
+       } else
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_SIZE;
-               cache_revalidated = false;
-       }
 
        if (fattr->valid & NFS_ATTR_FATTR_ATIME)
                inode->i_atime = fattr->atime;
-       else if (fattr_supported & NFS_ATTR_FATTR_ATIME) {
+       else if (fattr_supported & NFS_ATTR_FATTR_ATIME)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_ATIME;
-               cache_revalidated = false;
-       }
 
        if (fattr->valid & NFS_ATTR_FATTR_MODE) {
                if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
@@ -2132,71 +2129,55 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        inode->i_mode = newmode;
                        invalid |= NFS_INO_INVALID_ACCESS
                                | NFS_INO_INVALID_ACL;
-                       attr_changed = true;
                }
-       } else if (fattr_supported & NFS_ATTR_FATTR_MODE) {
+       } else if (fattr_supported & NFS_ATTR_FATTR_MODE)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_MODE;
-               cache_revalidated = false;
-       }
 
        if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
                if (!uid_eq(inode->i_uid, fattr->uid)) {
                        invalid |= NFS_INO_INVALID_ACCESS
                                | NFS_INO_INVALID_ACL;
                        inode->i_uid = fattr->uid;
-                       attr_changed = true;
                }
-       } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) {
+       } else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_OTHER;
-               cache_revalidated = false;
-       }
 
        if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
                if (!gid_eq(inode->i_gid, fattr->gid)) {
                        invalid |= NFS_INO_INVALID_ACCESS
                                | NFS_INO_INVALID_ACL;
                        inode->i_gid = fattr->gid;
-                       attr_changed = true;
                }
-       } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) {
+       } else if (fattr_supported & NFS_ATTR_FATTR_GROUP)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_OTHER;
-               cache_revalidated = false;
-       }
 
        if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
                if (inode->i_nlink != fattr->nlink) {
                        if (S_ISDIR(inode->i_mode))
                                invalid |= NFS_INO_INVALID_DATA;
                        set_nlink(inode, fattr->nlink);
-                       attr_changed = true;
                }
-       } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) {
+       } else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_NLINK;
-               cache_revalidated = false;
-       }
 
        if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
                /*
                 * report the blocks in 512byte units
                 */
                inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-       } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) {
+       } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_BLOCKS;
-               cache_revalidated = false;
-       }
 
-       if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) {
+       if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
                inode->i_blocks = fattr->du.nfs2.blocks;
-       } else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) {
+       else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_BLOCKS;
-               cache_revalidated = false;
-       }
 
        /* Update attrtimeo value if we're out of the unstable period */
        if (attr_changed) {
index 5c4e23a..2299446 100644 (file)
@@ -385,7 +385,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                                break;
 
                        case NFS3_CREATE_UNCHECKED:
-                               goto out;
+                               goto out_release_acls;
                }
                nfs_fattr_init(data->res.dir_attr);
                nfs_fattr_init(data->res.fattr);
@@ -751,7 +751,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                break;
        default:
                status = -EINVAL;
-               goto out;
+               goto out_release_acls;
        }
 
        d_alias = nfs3_do_create(dir, dentry, data);
index 543d916..ba78df4 100644 (file)
@@ -45,6 +45,7 @@ enum nfs4_client_state {
        NFS4CLNT_RECALL_RUNNING,
        NFS4CLNT_RECALL_ANY_LAYOUT_READ,
        NFS4CLNT_RECALL_ANY_LAYOUT_RW,
+       NFS4CLNT_DELEGRETURN_DELAYED,
 };
 
 #define NFS4_RENEW_TIMEOUT             0x01
@@ -322,7 +323,8 @@ extern int update_open_stateid(struct nfs4_state *state,
                                const nfs4_stateid *open_stateid,
                                const nfs4_stateid *deleg_stateid,
                                fmode_t fmode);
-
+extern int nfs4_proc_setlease(struct file *file, long arg,
+                             struct file_lock **lease, void **priv);
 extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
                struct nfs_fsinfo *fsinfo);
 extern void nfs4_update_changeattr(struct inode *dir,
index 4271938..28431ac 100644 (file)
@@ -197,8 +197,11 @@ void nfs40_shutdown_client(struct nfs_client *clp)
 
 struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
 {
-       int err;
+       char buf[INET6_ADDRSTRLEN + 1];
+       const char *ip_addr = cl_init->ip_addr;
        struct nfs_client *clp = nfs_alloc_client(cl_init);
+       int err;
+
        if (IS_ERR(clp))
                return clp;
 
@@ -222,6 +225,44 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
        init_waitqueue_head(&clp->cl_lock_waitq);
 #endif
        INIT_LIST_HEAD(&clp->pending_cb_stateids);
+
+       if (cl_init->minorversion != 0)
+               __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
+       __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+       __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
+
+       /*
+        * Set up the connection to the server before we add add to the
+        * global list.
+        */
+       err = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_GSS_KRB5I);
+       if (err == -EINVAL)
+               err = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
+       if (err < 0)
+               goto error;
+
+       /* If no clientaddr= option was specified, find a usable cb address */
+       if (ip_addr == NULL) {
+               struct sockaddr_storage cb_addr;
+               struct sockaddr *sap = (struct sockaddr *)&cb_addr;
+
+               err = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
+               if (err < 0)
+                       goto error;
+               err = rpc_ntop(sap, buf, sizeof(buf));
+               if (err < 0)
+                       goto error;
+               ip_addr = (const char *)buf;
+       }
+       strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
+
+       err = nfs_idmap_new(clp);
+       if (err < 0) {
+               dprintk("%s: failed to create idmapper. Error = %d\n",
+                       __func__, err);
+               goto error;
+       }
+       __set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
        return clp;
 
 error:
@@ -372,8 +413,6 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
 struct nfs_client *nfs4_init_client(struct nfs_client *clp,
                                    const struct nfs_client_initdata *cl_init)
 {
-       char buf[INET6_ADDRSTRLEN + 1];
-       const char *ip_addr = cl_init->ip_addr;
        struct nfs_client *old;
        int error;
 
@@ -381,43 +420,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
                /* the client is initialised already */
                return clp;
 
-       /* Check NFS protocol revision and initialize RPC op vector */
-       clp->rpc_ops = &nfs_v4_clientops;
-
-       if (clp->cl_minorversion != 0)
-               __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
-       __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
-       __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
-
-       error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_GSS_KRB5I);
-       if (error == -EINVAL)
-               error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
-       if (error < 0)
-               goto error;
-
-       /* If no clientaddr= option was specified, find a usable cb address */
-       if (ip_addr == NULL) {
-               struct sockaddr_storage cb_addr;
-               struct sockaddr *sap = (struct sockaddr *)&cb_addr;
-
-               error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
-               if (error < 0)
-                       goto error;
-               error = rpc_ntop(sap, buf, sizeof(buf));
-               if (error < 0)
-                       goto error;
-               ip_addr = (const char *)buf;
-       }
-       strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
-
-       error = nfs_idmap_new(clp);
-       if (error < 0) {
-               dprintk("%s: failed to create idmapper. Error = %d\n",
-                       __func__, error);
-               goto error;
-       }
-       __set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
-
        error = nfs4_init_client_minor_version(clp);
        if (error < 0)
                goto error;
index a1e5c6b..c820de5 100644 (file)
@@ -435,6 +435,12 @@ void nfs42_ssc_unregister_ops(void)
 }
 #endif /* CONFIG_NFS_V4_2 */
 
+static int nfs4_setlease(struct file *file, long arg, struct file_lock **lease,
+                        void **priv)
+{
+       return nfs4_proc_setlease(file, arg, lease, priv);
+}
+
 const struct file_operations nfs4_file_operations = {
        .read_iter      = nfs_file_read,
        .write_iter     = nfs_file_write,
@@ -448,7 +454,7 @@ const struct file_operations nfs4_file_operations = {
        .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .check_flags    = nfs_check_flags,
-       .setlease       = simple_nosetlease,
+       .setlease       = nfs4_setlease,
 #ifdef CONFIG_NFS_V4_2
        .copy_file_range = nfs4_copy_file_range,
        .llseek         = nfs4_file_llseek,
index e653654..e1214bb 100644 (file)
@@ -1155,7 +1155,11 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
                                   struct nfs4_sequence_args *args,
                                   struct nfs4_sequence_res *res)
 {
-       return nfs4_do_call_sync(clnt, server, msg, args, res, 0);
+       unsigned short task_flags = 0;
+
+       if (server->nfs_client->cl_minorversion)
+               task_flags = RPC_TASK_MOVEABLE;
+       return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags);
 }
 
 
@@ -1205,12 +1209,12 @@ nfs4_update_changeattr_locked(struct inode *inode,
        u64 change_attr = inode_peek_iversion_raw(inode);
 
        cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+       if (S_ISDIR(inode->i_mode))
+               cache_validity |= NFS_INO_INVALID_DATA;
 
        switch (NFS_SERVER(inode)->change_attr_type) {
        case NFS4_CHANGE_TYPE_IS_UNDEFINED:
-               break;
-       case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
-               if ((s64)(change_attr - cinfo->after) > 0)
+               if (cinfo->after == change_attr)
                        goto out;
                break;
        default:
@@ -1218,24 +1222,21 @@ nfs4_update_changeattr_locked(struct inode *inode,
                        goto out;
        }
 
-       if (cinfo->atomic && cinfo->before == change_attr) {
-               nfsi->attrtimeo_timestamp = jiffies;
-       } else {
-               if (S_ISDIR(inode->i_mode)) {
-                       cache_validity |= NFS_INO_INVALID_DATA;
+       inode_set_iversion_raw(inode, cinfo->after);
+       if (!cinfo->atomic || cinfo->before != change_attr) {
+               if (S_ISDIR(inode->i_mode))
                        nfs_force_lookup_revalidate(inode);
-               } else {
-                       if (!NFS_PROTO(inode)->have_delegation(inode,
-                                                              FMODE_READ))
-                               cache_validity |= NFS_INO_REVAL_PAGECACHE;
-               }
 
-               if (cinfo->before != change_attr)
-                       cache_validity |= NFS_INO_INVALID_ACCESS |
-                                         NFS_INO_INVALID_ACL |
-                                         NFS_INO_INVALID_XATTR;
+               if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+                       cache_validity |=
+                               NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
+                               NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
+                               NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
+                               NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR |
+                               NFS_INO_REVAL_PAGECACHE;
+               nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
        }
-       inode_set_iversion_raw(inode, cinfo->after);
+       nfsi->attrtimeo_timestamp = jiffies;
        nfsi->read_cache_jiffies = timestamp;
        nfsi->attr_gencount = nfs_inc_attr_generation_counter();
        nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
@@ -2569,6 +2570,9 @@ static int nfs4_run_open_task(struct nfs4_opendata *data,
        };
        int status;
 
+       if (server->nfs_client->cl_minorversion)
+               task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
        kref_get(&data->kref);
        data->rpc_done = false;
        data->rpc_status = 0;
@@ -3749,6 +3753,9 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
        };
        int status = -ENOMEM;
 
+       if (server->nfs_client->cl_minorversion)
+               task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
        nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP,
                &task_setup_data.rpc_client, &msg);
 
@@ -4188,6 +4195,9 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
        };
        unsigned short task_flags = 0;
 
+       if (nfs4_has_session(server->nfs_client))
+               task_flags = RPC_TASK_MOVEABLE;
+
        /* Is this is an attribute revalidation, subject to softreval? */
        if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
                task_flags |= RPC_TASK_TIMEOUT;
@@ -4307,6 +4317,9 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
        };
        unsigned short task_flags = 0;
 
+       if (server->nfs_client->cl_minorversion)
+               task_flags = RPC_TASK_MOVEABLE;
+
        /* Is this is an attribute revalidation, subject to softreval? */
        if (nfs_lookup_is_soft_revalidate(dentry))
                task_flags |= RPC_TASK_TIMEOUT;
@@ -6538,7 +6551,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
                .rpc_client = server->client,
                .rpc_message = &msg,
                .callback_ops = &nfs4_delegreturn_ops,
-               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE,
        };
        int status = 0;
 
@@ -6856,6 +6869,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
                .workqueue = nfsiod_workqueue,
                .flags = RPC_TASK_ASYNC,
        };
+       struct nfs_client *client =
+               NFS_SERVER(lsp->ls_state->inode)->nfs_client;
+
+       if (client->cl_minorversion)
+               task_setup_data.flags |= RPC_TASK_MOVEABLE;
 
        nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client,
                NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg);
@@ -7130,6 +7148,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
                .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
        };
        int ret;
+       struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client;
+
+       if (client->cl_minorversion)
+               task_setup_data.flags |= RPC_TASK_MOVEABLE;
 
        dprintk("%s: begin!\n", __func__);
        data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
@@ -7438,6 +7460,43 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
        return nfs4_retry_setlk(state, cmd, request);
 }
 
+static int nfs4_delete_lease(struct file *file, void **priv)
+{
+       return generic_setlease(file, F_UNLCK, NULL, priv);
+}
+
+static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
+                         void **priv)
+{
+       struct inode *inode = file_inode(file);
+       fmode_t type = arg == F_RDLCK ? FMODE_READ : FMODE_WRITE;
+       int ret;
+
+       /* No delegation, no lease */
+       if (!nfs4_have_delegation(inode, type))
+               return -EAGAIN;
+       ret = generic_setlease(file, arg, lease, priv);
+       if (ret || nfs4_have_delegation(inode, type))
+               return ret;
+       /* We raced with a delegation return */
+       nfs4_delete_lease(file, priv);
+       return -EAGAIN;
+}
+
+int nfs4_proc_setlease(struct file *file, long arg, struct file_lock **lease,
+                      void **priv)
+{
+       switch (arg) {
+       case F_RDLCK:
+       case F_WRLCK:
+               return nfs4_add_lease(file, arg, lease, priv);
+       case F_UNLCK:
+               return nfs4_delete_lease(file, priv);
+       default:
+               return -EINVAL;
+       }
+}
+
 int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
        struct nfs_server *server = NFS_SERVER(state->inode);
@@ -9186,7 +9245,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
                .rpc_client = clp->cl_rpcclient,
                .rpc_message = &msg,
                .callback_ops = &nfs41_sequence_ops,
-               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE,
        };
        struct rpc_task *ret;
 
@@ -9385,7 +9444,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 {
        struct inode *inode = lgp->args.inode;
        struct nfs_server *server = NFS_SERVER(inode);
-       struct pnfs_layout_hdr *lo;
+       struct pnfs_layout_hdr *lo = lgp->lo;
        int nfs4err = task->tk_status;
        int err, status = 0;
        LIST_HEAD(head);
@@ -9437,7 +9496,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
        case -NFS4ERR_BAD_STATEID:
                exception->timeout = 0;
                spin_lock(&inode->i_lock);
-               lo = NFS_I(inode)->layout;
                /* If the open stateid was bad, then recover it. */
                if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
                    !nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
@@ -9509,7 +9567,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
                .rpc_message = &msg,
                .callback_ops = &nfs4_layoutget_call_ops,
                .callback_data = lgp,
-               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF |
+                        RPC_TASK_MOVEABLE,
        };
        struct pnfs_layout_segment *lseg = NULL;
        struct nfs4_exception exception = {
@@ -9520,9 +9579,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
 
        dprintk("--> %s\n", __func__);
 
-       /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
-       pnfs_get_layout_hdr(NFS_I(inode)->layout);
-
        nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
 
        task = rpc_run_task(&task_setup_data);
@@ -9650,6 +9706,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
                .rpc_message = &msg,
                .callback_ops = &nfs4_layoutreturn_call_ops,
                .callback_data = lrp,
+               .flags = RPC_TASK_MOVEABLE,
        };
        int status = 0;
 
@@ -9804,6 +9861,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
                .rpc_message = &msg,
                .callback_ops = &nfs4_layoutcommit_ops,
                .callback_data = data,
+               .flags = RPC_TASK_MOVEABLE,
        };
        struct rpc_task *task;
        int status = 0;
@@ -10131,7 +10189,7 @@ static int nfs41_free_stateid(struct nfs_server *server,
                .rpc_client = server->client,
                .rpc_message = &msg,
                .callback_ops = &nfs41_free_stateid_ops,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
        };
        struct nfs_free_stateid_data *data;
        struct rpc_task *task;
index cf9cc62..cc232d1 100644 (file)
@@ -954,6 +954,7 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 {
        struct nfs_pgio_header *hdr;
        int ret;
+       unsigned short task_flags = 0;
 
        hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
        if (!hdr) {
@@ -962,14 +963,17 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
        }
        nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
        ret = nfs_generic_pgio(desc, hdr);
-       if (ret == 0)
+       if (ret == 0) {
+               if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion)
+                       task_flags = RPC_TASK_MOVEABLE;
                ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
                                        hdr,
                                        hdr->cred,
                                        NFS_PROTO(hdr->inode),
                                        desc->pg_rpc_callops,
                                        desc->pg_ioflags,
-                                       RPC_TASK_CRED_NOREF);
+                                       RPC_TASK_CRED_NOREF | task_flags);
+       }
        return ret;
 }
 
index 2c01ee8..ef14ea0 100644 (file)
@@ -966,10 +966,8 @@ void
 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
                        const struct cred *cred, bool update_barrier)
 {
-       u32 oldseq, newseq, new_barrier = 0;
-
-       oldseq = be32_to_cpu(lo->plh_stateid.seqid);
-       newseq = be32_to_cpu(new->seqid);
+       u32 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
+       u32 newseq = be32_to_cpu(new->seqid);
 
        if (!pnfs_layout_is_valid(lo)) {
                pnfs_set_layout_cred(lo, cred);
@@ -979,19 +977,21 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
                clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
                return;
        }
-       if (pnfs_seqid_is_newer(newseq, oldseq)) {
+
+       if (pnfs_seqid_is_newer(newseq, oldseq))
                nfs4_stateid_copy(&lo->plh_stateid, new);
-               /*
-                * Because of wraparound, we want to keep the barrier
-                * "close" to the current seqids.
-                */
-               new_barrier = newseq - atomic_read(&lo->plh_outstanding);
-       }
-       if (update_barrier)
-               new_barrier = be32_to_cpu(new->seqid);
-       else if (new_barrier == 0)
+
+       if (update_barrier) {
+               pnfs_barrier_update(lo, newseq);
                return;
-       pnfs_barrier_update(lo, new_barrier);
+       }
+       /*
+        * Because of wraparound, we want to keep the barrier
+        * "close" to the current seqids. We really only want to
+        * get here from a layoutget call.
+        */
+       if (atomic_read(&lo->plh_outstanding) == 1)
+                pnfs_barrier_update(lo, be32_to_cpu(lo->plh_stateid.seqid));
 }
 
 static bool
@@ -1128,8 +1128,7 @@ void pnfs_layoutget_free(struct nfs4_layoutget *lgp)
        size_t max_pages = lgp->args.layout.pglen / PAGE_SIZE;
 
        nfs4_free_pages(lgp->args.layout.pages, max_pages);
-       if (lgp->args.inode)
-               pnfs_put_layout_hdr(NFS_I(lgp->args.inode)->layout);
+       pnfs_put_layout_hdr(lgp->lo);
        put_nfs_open_context(lgp->args.ctx);
        kfree(lgp);
 }
@@ -2014,7 +2013,7 @@ lookup_again:
         * If the layout segment list is empty, but there are outstanding
         * layoutget calls, then they might be subject to a layoutrecall.
         */
-       if (list_empty(&lo->plh_segs) &&
+       if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) &&
            atomic_read(&lo->plh_outstanding) != 0) {
                spin_unlock(&ino->i_lock);
                lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
@@ -2124,6 +2123,9 @@ lookup_again:
                goto out_put_layout_hdr;
        }
 
+       lgp->lo = lo;
+       pnfs_get_layout_hdr(lo);
+
        lseg = nfs4_proc_layoutget(lgp, &timeout);
        trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
                                 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
@@ -2255,6 +2257,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data,
                pnfs_put_layout_hdr(lo);
                return;
        }
+       lgp->lo = lo;
        data->lgp = lgp;
        data->o_arg.lg_args = &lgp->args;
        data->o_res.lg_res = &lgp->res;
@@ -2263,6 +2266,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data,
 static void _lgopen_prepare_floating(struct nfs4_opendata *data,
                                     struct nfs_open_context *ctx)
 {
+       struct inode *ino = data->dentry->d_inode;
        struct pnfs_layout_range rng = {
                .iomode = (data->o_arg.fmode & FMODE_WRITE) ?
                          IOMODE_RW: IOMODE_READ,
@@ -2271,7 +2275,7 @@ static void _lgopen_prepare_floating(struct nfs4_opendata *data,
        };
        struct nfs4_layoutget *lgp;
 
-       lgp = pnfs_alloc_init_layoutget_args(NULL, ctx, &current_stateid,
+       lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid,
                                             &rng, GFP_KERNEL);
        if (!lgp)
                return;
@@ -2291,6 +2295,8 @@ void pnfs_lgopen_prepare(struct nfs4_opendata *data,
        /* Could check on max_ops, but currently hardcoded high enough */
        if (!nfs_server_capable(data->dir->d_inode, NFS_CAP_LGOPEN))
                return;
+       if (data->lgp)
+               return;
        if (data->state)
                _lgopen_prepare_attached(data, ctx);
        else
@@ -2330,13 +2336,13 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
                }
                return;
        }
-       if (!lgp->args.inode) {
+       if (!lgp->lo) {
                lo = _pnfs_grab_empty_layout(ino, ctx);
                if (!lo)
                        return;
-               lgp->args.inode = ino;
+               lgp->lo = lo;
        } else
-               lo = NFS_I(lgp->args.inode)->layout;
+               lo = lgp->lo;
 
        lseg = pnfs_layout_process(lgp);
        if (!IS_ERR(lseg)) {
@@ -2349,11 +2355,9 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
 void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
 {
        if (lgp != NULL) {
-               struct inode *inode = lgp->args.inode;
-               if (inode) {
-                       struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
-                       pnfs_clear_first_layoutget(lo);
-                       nfs_layoutget_end(lo);
+               if (lgp->lo) {
+                       pnfs_clear_first_layoutget(lgp->lo);
+                       nfs_layoutget_end(lgp->lo);
                }
                pnfs_layoutget_free(lgp);
        }
@@ -2362,7 +2366,7 @@ void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
 struct pnfs_layout_segment *
 pnfs_layout_process(struct nfs4_layoutget *lgp)
 {
-       struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
+       struct pnfs_layout_hdr *lo = lgp->lo;
        struct nfs4_layoutget_res *res = &lgp->res;
        struct pnfs_layout_segment *lseg;
        struct inode *ino = lo->plh_inode;
@@ -2390,11 +2394,13 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                goto out_forget;
        }
 
+       if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo))
+               goto out_forget;
+
        if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
                /* existing state ID, make sure the sequence number matches. */
                if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
-                       if (!pnfs_layout_is_valid(lo) &&
-                           pnfs_is_first_layoutget(lo))
+                       if (!pnfs_layout_is_valid(lo))
                                lo->plh_barrier = 0;
                        dprintk("%s forget reply due to sequence\n", __func__);
                        goto out_forget;
@@ -2413,8 +2419,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                goto out_forget;
        } else {
                /* We have a completely new layout */
-               if (!pnfs_is_first_layoutget(lo))
-                       goto out_forget;
                pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true);
        }
 
index 49d3389..cf19914 100644 (file)
@@ -805,19 +805,16 @@ out:
 }
 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
 
-static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
+static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
 {
        might_sleep();
-       wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
-                       TASK_KILLABLE);
+       return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE);
 }
 
 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
 {
        smp_mb__before_atomic();
-       clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
-       smp_mb__after_atomic();
-       wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
+       clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state);
 }
 
 static struct nfs_client *(*get_v3_ds_connect)(
@@ -858,7 +855,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
        dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
 
        if (!load_v3_ds_connect())
-               goto out;
+               return -EPROTONOSUPPORT;
 
        list_for_each_entry(da, &ds->ds_addrs, da_node) {
                dprintk("%s: DS %s: trying address %s\n",
@@ -993,30 +990,33 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 {
        int err;
 
-again:
-       err = 0;
-       if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
-               if (version == 3) {
-                       err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
-                                                      retrans);
-               } else if (version == 4) {
-                       err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo,
-                                                      retrans, minor_version);
-               } else {
-                       dprintk("%s: unsupported DS version %d\n", __func__,
-                               version);
-                       err = -EPROTONOSUPPORT;
-               }
+       do {
+               err = nfs4_wait_ds_connect(ds);
+               if (err || ds->ds_clp)
+                       goto out;
+               if (nfs4_test_deviceid_unavailable(devid))
+                       return -ENODEV;
+       } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
 
-               nfs4_clear_ds_conn_bit(ds);
-       } else {
-               nfs4_wait_ds_connect(ds);
+       if (ds->ds_clp)
+               goto connect_done;
 
-               /* what was waited on didn't connect AND didn't mark unavail */
-               if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
-                       goto again;
+       switch (version) {
+       case 3:
+               err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans);
+               break;
+       case 4:
+               err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans,
+                                              minor_version);
+               break;
+       default:
+               dprintk("%s: unsupported DS version %d\n", __func__, version);
+               err = -EPROTONOSUPPORT;
        }
 
+connect_done:
+       nfs4_clear_ds_conn_bit(ds);
+out:
        /*
         * At this point the ds->ds_clp should be ready, but it might have
         * hit an error.
index d2b6dce..9f39e0a 100644 (file)
@@ -74,8 +74,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
-static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio,
-                                    struct inode *inode)
+static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio)
 {
        struct nfs_pgio_mirror *pgm;
        unsigned long npages;
@@ -86,9 +85,9 @@ static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio,
        WARN_ON_ONCE(pgio->pg_mirror_count != 1);
 
        pgm = &pgio->pg_mirrors[0];
-       NFS_I(inode)->read_io += pgm->pg_bytes_written;
+       NFS_I(pgio->pg_inode)->read_io += pgm->pg_bytes_written;
        npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+       nfs_add_stats(pgio->pg_inode, NFSIOS_READPAGES, npages);
 }
 
 
@@ -363,22 +362,23 @@ int nfs_readpage(struct file *file, struct page *page)
        } else
                desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
 
+       xchg(&desc.ctx->error, 0);
        if (!IS_SYNC(inode)) {
                ret = nfs_readpage_from_fscache(desc.ctx, inode, page);
                if (ret == 0)
-                       goto out;
+                       goto out_wait;
        }
 
-       xchg(&desc.ctx->error, 0);
        nfs_pageio_init_read(&desc.pgio, inode, false,
                             &nfs_async_read_completion_ops);
 
        ret = readpage_async_filler(&desc, page);
+       if (ret)
+               goto out;
 
-       if (!ret)
-               nfs_pageio_complete_read(&desc.pgio, inode);
-
+       nfs_pageio_complete_read(&desc.pgio);
        ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
+out_wait:
        if (!ret) {
                ret = wait_on_page_locked_killable(page);
                if (!PageUptodate(page) && !ret)
@@ -430,7 +430,7 @@ int nfs_readpages(struct file *file, struct address_space *mapping,
 
        ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
-       nfs_pageio_complete_read(&desc.pgio, inode);
+       nfs_pageio_complete_read(&desc.pgio);
 
 read_complete:
        put_nfs_open_context(desc.ctx);
index 3bf8217..eae9bf1 100644 (file)
@@ -1810,6 +1810,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
                struct nfs_commit_info *cinfo)
 {
        struct nfs_commit_data  *data;
+       unsigned short task_flags = 0;
 
        /* another commit raced with us */
        if (list_empty(head))
@@ -1820,8 +1821,11 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
        /* Set up the argument struct */
        nfs_init_commit(data, head, NULL, cinfo);
        atomic_inc(&cinfo->mds->rpcs_out);
+       if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
+               task_flags = RPC_TASK_MOVEABLE;
        return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
-                                  data->mds_ops, how, RPC_TASK_CRED_NOREF);
+                                  data->mds_ops, how,
+                                  RPC_TASK_CRED_NOREF | task_flags);
 }
 
 /*
index ffba254..ce64745 100644 (file)
@@ -84,6 +84,7 @@ struct nfs_open_context {
 #define NFS_CONTEXT_RESEND_WRITES      (1)
 #define NFS_CONTEXT_BAD                        (2)
 #define NFS_CONTEXT_UNLOCK     (3)
+#define NFS_CONTEXT_FILE_OPEN          (4)
        int error;
 
        struct list_head list;
index 717ecc8..e9698b6 100644 (file)
@@ -277,6 +277,7 @@ struct nfs4_layoutget {
        struct nfs4_layoutget_args args;
        struct nfs4_layoutget_res res;
        const struct cred *cred;
+       struct pnfs_layout_hdr *lo;
        gfp_t gfp_flags;
 };
 
index 02e7a58..8b5d5c9 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/sunrpc/xprtmultipath.h>
 
 struct rpc_inode;
+struct rpc_sysfs_client;
 
 /*
  * The high-level client handle
@@ -71,6 +72,7 @@ struct rpc_clnt {
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
        struct dentry           *cl_debugfs;    /* debugfs directory */
 #endif
+       struct rpc_sysfs_client *cl_sysfs;      /* sysfs directory */
        /* cl_work is only needed after cl_xpi is no longer used,
         * and that are of similar size
         */
index df696ef..a237b8d 100644 (file)
@@ -121,6 +121,7 @@ struct rpc_task_setup {
  */
 #define RPC_TASK_ASYNC         0x0001          /* is an async task */
 #define RPC_TASK_SWAPPER       0x0002          /* is swapping in/out */
+#define RPC_TASK_MOVEABLE      0x0004          /* nfs4.1+ rpc tasks */
 #define RPC_TASK_NULLCREDS     0x0010          /* Use AUTH_NULL credential */
 #define RPC_CALL_MAJORSEEN     0x0020          /* major timeout seen */
 #define RPC_TASK_ROOTCREDS     0x0040          /* force root creds */
@@ -139,6 +140,7 @@ struct rpc_task_setup {
 #define RPC_IS_SOFT(t)         ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
 #define RPC_IS_SOFTCONN(t)     ((t)->tk_flags & RPC_TASK_SOFTCONN)
 #define RPC_WAS_SENT(t)                ((t)->tk_flags & RPC_TASK_SENT)
+#define RPC_IS_MOVEABLE(t)     ((t)->tk_flags & RPC_TASK_MOVEABLE)
 
 #define RPC_TASK_RUNNING       0
 #define RPC_TASK_QUEUED                1
index 61b622e..c8c39f2 100644 (file)
@@ -53,6 +53,7 @@ enum rpc_display_format_t {
 
 struct rpc_task;
 struct rpc_xprt;
+struct xprt_class;
 struct seq_file;
 struct svc_serv;
 struct net;
@@ -182,9 +183,11 @@ enum xprt_transports {
        XPRT_TRANSPORT_LOCAL    = 257,
 };
 
+struct rpc_sysfs_xprt;
 struct rpc_xprt {
        struct kref             kref;           /* Reference count */
        const struct rpc_xprt_ops *ops;         /* transport methods */
+       unsigned int            id;             /* transport id */
 
        const struct rpc_timeout *timeout;      /* timeout parms */
        struct sockaddr_storage addr;           /* server address */
@@ -288,6 +291,9 @@ struct rpc_xprt {
        atomic_t                inject_disconnect;
 #endif
        struct rcu_head         rcu;
+       const struct xprt_class *xprt_class;
+       struct rpc_sysfs_xprt   *xprt_sysfs;
+       bool                    main; /*mark if this is the 1st transport */
 };
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
@@ -370,6 +376,7 @@ struct rpc_xprt *   xprt_alloc(struct net *net, size_t size,
 void                   xprt_free(struct rpc_xprt *);
 void                   xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task);
 bool                   xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req);
+void                   xprt_cleanup_ids(void);
 
 static inline int
 xprt_enable_swap(struct rpc_xprt *xprt)
@@ -408,6 +415,7 @@ void                        xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
 
 bool                   xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *);
 void                   xprt_unlock_connect(struct rpc_xprt *, void *);
+void                   xprt_release_write(struct rpc_xprt *, struct rpc_task *);
 
 /*
  * Reserved bit positions in xprt->state
@@ -419,6 +427,8 @@ void                        xprt_unlock_connect(struct rpc_xprt *, void *);
 #define XPRT_BOUND             (4)
 #define XPRT_BINDING           (5)
 #define XPRT_CLOSING           (6)
+#define XPRT_OFFLINE           (7)
+#define XPRT_REMOVE            (8)
 #define XPRT_CONGESTED         (9)
 #define XPRT_CWND_WAIT         (10)
 #define XPRT_WRITE_SPACE       (11)
index c6cce3f..b19addc 100644 (file)
 #define _NET_SUNRPC_XPRTMULTIPATH_H
 
 struct rpc_xprt_iter_ops;
+struct rpc_sysfs_xprt_switch;
 struct rpc_xprt_switch {
        spinlock_t              xps_lock;
        struct kref             xps_kref;
 
+       unsigned int            xps_id;
        unsigned int            xps_nxprts;
        unsigned int            xps_nactive;
        atomic_long_t           xps_queuelen;
@@ -23,6 +25,7 @@ struct rpc_xprt_switch {
 
        const struct rpc_xprt_iter_ops *xps_iter_ops;
 
+       struct rpc_sysfs_xprt_switch *xps_sysfs;
        struct rcu_head         xps_rcu;
 };
 
@@ -71,4 +74,7 @@ extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi);
 
 extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
                const struct sockaddr *sap);
+
+extern void xprt_multipath_cleanup_ids(void);
+
 #endif
index 3c1423e..8c2a712 100644 (file)
@@ -10,6 +10,7 @@
 
 int            init_socket_xprt(void);
 void           cleanup_socket_xprt(void);
+unsigned short get_srcport(struct rpc_xprt *);
 
 #define RPC_MIN_RESVPORT       (1U)
 #define RPC_MAX_RESVPORT       (65535U)
index 9488600..1c8de39 100644 (file)
@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
            auth.o auth_null.o auth_unix.o \
            svc.o svcsock.o svcauth.o svcauth_unix.o \
            addr.o rpcb_clnt.o timer.o xdr.o \
-           sunrpc_syms.o cache.o rpc_pipe.o \
+           sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \
            svc_xprt.o \
            xprtmultipath.o
 sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
index 42623d6..8b4de70 100644 (file)
@@ -41,6 +41,7 @@
 #include <trace/events/sunrpc.h>
 
 #include "sunrpc.h"
+#include "sysfs.h"
 #include "netns.h"
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -327,6 +328,7 @@ err_auth:
 out:
        if (pipefs_sb)
                rpc_put_sb_net(net);
+       rpc_sysfs_client_destroy(clnt);
        rpc_clnt_debugfs_unregister(clnt);
        return err;
 }
@@ -410,6 +412,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
        }
 
        rpc_clnt_set_transport(clnt, xprt, timeout);
+       xprt->main = true;
        xprt_iter_init(&clnt->cl_xpi, xps);
        xprt_switch_put(xps);
 
@@ -423,6 +426,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
        /* save the nodename */
        rpc_clnt_set_nodename(clnt, nodename);
 
+       rpc_sysfs_client_setup(clnt, xps, rpc_net_ns(clnt));
        err = rpc_client_register(clnt, args->authflavor, args->client_name);
        if (err)
                goto out_no_path;
@@ -733,6 +737,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
 
        rpc_unregister_client(clnt);
        __rpc_clnt_remove_pipedir(clnt);
+       rpc_sysfs_client_destroy(clnt);
        rpc_clnt_debugfs_unregister(clnt);
 
        /*
@@ -879,6 +884,7 @@ static void rpc_free_client_work(struct work_struct *work)
         * so they cannot be called in rpciod, so they are handled separately
         * here.
         */
+       rpc_sysfs_client_destroy(clnt);
        rpc_clnt_debugfs_unregister(clnt);
        rpc_free_clid(clnt);
        rpc_clnt_remove_pipedir(clnt);
@@ -2100,6 +2106,30 @@ call_connect_status(struct rpc_task *task)
        case -ENOTCONN:
        case -EAGAIN:
        case -ETIMEDOUT:
+               if (!(task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) &&
+                   (task->tk_flags & RPC_TASK_MOVEABLE) &&
+                   test_bit(XPRT_REMOVE, &xprt->state)) {
+                       struct rpc_xprt *saved = task->tk_xprt;
+                       struct rpc_xprt_switch *xps;
+
+                       rcu_read_lock();
+                       xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+                       rcu_read_unlock();
+                       if (xps->xps_nxprts > 1) {
+                               long value;
+
+                               xprt_release(task);
+                               value = atomic_long_dec_return(&xprt->queuelen);
+                               if (value == 0)
+                                       rpc_xprt_switch_remove_xprt(xps, saved);
+                               xprt_put(saved);
+                               task->tk_xprt = NULL;
+                               task->tk_action = call_start;
+                       }
+                       xprt_switch_put(xps);
+                       if (!task->tk_xprt)
+                               return;
+               }
                goto out_retry;
        case -ENOBUFS:
                rpc_delay(task, HZ >> 2);
index 39ed0e0..c045f63 100644 (file)
@@ -591,11 +591,21 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
        struct list_head *q;
        struct rpc_task *task;
 
+       /*
+        * Service the privileged queue.
+        */
+       q = &queue->tasks[RPC_NR_PRIORITY - 1];
+       if (queue->maxpriority > RPC_PRIORITY_PRIVILEGED && !list_empty(q)) {
+               task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+               goto out;
+       }
+
        /*
         * Service a batch of tasks from a single owner.
         */
        q = &queue->tasks[queue->priority];
-       if (!list_empty(q) && --queue->nr) {
+       if (!list_empty(q) && queue->nr) {
+               queue->nr--;
                task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
                goto out;
        }
index 236fadc..691c000 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/sunrpc/xprtsock.h>
 
 #include "sunrpc.h"
+#include "sysfs.h"
 #include "netns.h"
 
 unsigned int sunrpc_net_id;
@@ -103,6 +104,10 @@ init_sunrpc(void)
        if (err)
                goto out4;
 
+       err = rpc_sysfs_init();
+       if (err)
+               goto out5;
+
        sunrpc_debugfs_init();
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
        rpc_register_sysctl();
@@ -111,6 +116,8 @@ init_sunrpc(void)
        init_socket_xprt();     /* clnt sock transport */
        return 0;
 
+out5:
+       unregister_rpc_pipefs();
 out4:
        unregister_pernet_subsys(&sunrpc_net_ops);
 out3:
@@ -124,7 +131,10 @@ out:
 static void __exit
 cleanup_sunrpc(void)
 {
+       rpc_sysfs_exit();
        rpc_cleanup_clids();
+       xprt_cleanup_ids();
+       xprt_multipath_cleanup_ids();
        rpcauth_remove_module();
        cleanup_socket_xprt();
        svc_cleanup_xprt_sock();
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
new file mode 100644 (file)
index 0000000..64da3bf
--- /dev/null
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+#include <linux/sunrpc/clnt.h>
+#include <linux/kobject.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/xprtsock.h>
+
+#include "sysfs.h"
+
+struct xprt_addr {
+       const char *addr;
+       struct rcu_head rcu;
+};
+
+static void free_xprt_addr(struct rcu_head *head)
+{
+       struct xprt_addr *addr = container_of(head, struct xprt_addr, rcu);
+
+       kfree(addr->addr);
+       kfree(addr);
+}
+
+static struct kset *rpc_sunrpc_kset;
+static struct kobject *rpc_sunrpc_client_kobj, *rpc_sunrpc_xprt_switch_kobj;
+
+static void rpc_sysfs_object_release(struct kobject *kobj)
+{
+       kfree(kobj);
+}
+
+static const struct kobj_ns_type_operations *
+rpc_sysfs_object_child_ns_type(struct kobject *kobj)
+{
+       return &net_ns_type_operations;
+}
+
+static struct kobj_type rpc_sysfs_object_type = {
+       .release = rpc_sysfs_object_release,
+       .sysfs_ops = &kobj_sysfs_ops,
+       .child_ns_type = rpc_sysfs_object_child_ns_type,
+};
+
+static struct kobject *rpc_sysfs_object_alloc(const char *name,
+                                             struct kset *kset,
+                                             struct kobject *parent)
+{
+       struct kobject *kobj;
+
+       kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+       if (kobj) {
+               kobj->kset = kset;
+               if (kobject_init_and_add(kobj, &rpc_sysfs_object_type,
+                                        parent, "%s", name) == 0)
+                       return kobj;
+               kobject_put(kobj);
+       }
+       return NULL;
+}
+
+static inline struct rpc_xprt *
+rpc_sysfs_xprt_kobj_get_xprt(struct kobject *kobj)
+{
+       struct rpc_sysfs_xprt *x = container_of(kobj,
+               struct rpc_sysfs_xprt, kobject);
+
+       return xprt_get(x->xprt);
+}
+
+static inline struct rpc_xprt_switch *
+rpc_sysfs_xprt_kobj_get_xprt_switch(struct kobject *kobj)
+{
+       struct rpc_sysfs_xprt *x = container_of(kobj,
+               struct rpc_sysfs_xprt, kobject);
+
+       return xprt_switch_get(x->xprt_switch);
+}
+
+static inline struct rpc_xprt_switch *
+rpc_sysfs_xprt_switch_kobj_get_xprt(struct kobject *kobj)
+{
+       struct rpc_sysfs_xprt_switch *x = container_of(kobj,
+               struct rpc_sysfs_xprt_switch, kobject);
+
+       return xprt_switch_get(x->xprt_switch);
+}
+
+static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj,
+                                          struct kobj_attribute *attr,
+                                          char *buf)
+{
+       struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+       ssize_t ret;
+
+       if (!xprt)
+               return 0;
+       ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]);
+       xprt_put(xprt);
+       return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
+                                       struct kobj_attribute *attr,
+                                       char *buf)
+{
+       struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+       ssize_t ret;
+
+       if (!xprt)
+               return 0;
+
+       ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n"
+                      "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n"
+                      "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n"
+                      "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n"
+                      "tasks_queuelen=%ld\n",
+                      xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs,
+                      xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen,
+                      xprt->sending.qlen, xprt->pending.qlen,
+                      xprt->backlog.qlen, xprt->main,
+                      (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
+                      get_srcport(xprt) : 0,
+                      atomic_long_read(&xprt->queuelen));
+       xprt_put(xprt);
+       return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
+                                        struct kobj_attribute *attr,
+                                        char *buf)
+{
+       struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+       ssize_t ret;
+       int locked, connected, connecting, close_wait, bound, binding,
+           closing, congested, cwnd_wait, write_space, offline, remove;
+
+       if (!xprt)
+               return 0;
+
+       if (!xprt->state) {
+               ret = sprintf(buf, "state=CLOSED\n");
+       } else {
+               locked = test_bit(XPRT_LOCKED, &xprt->state);
+               connected = test_bit(XPRT_CONNECTED, &xprt->state);
+               connecting = test_bit(XPRT_CONNECTING, &xprt->state);
+               close_wait = test_bit(XPRT_CLOSE_WAIT, &xprt->state);
+               bound = test_bit(XPRT_BOUND, &xprt->state);
+               binding = test_bit(XPRT_BINDING, &xprt->state);
+               closing = test_bit(XPRT_CLOSING, &xprt->state);
+               congested = test_bit(XPRT_CONGESTED, &xprt->state);
+               cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
+               write_space = test_bit(XPRT_WRITE_SPACE, &xprt->state);
+               offline = test_bit(XPRT_OFFLINE, &xprt->state);
+               remove = test_bit(XPRT_REMOVE, &xprt->state);
+
+               ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s %s\n",
+                             locked ? "LOCKED" : "",
+                             connected ? "CONNECTED" : "",
+                             connecting ? "CONNECTING" : "",
+                             close_wait ? "CLOSE_WAIT" : "",
+                             bound ? "BOUND" : "",
+                             binding ? "BOUNDING" : "",
+                             closing ? "CLOSING" : "",
+                             congested ? "CONGESTED" : "",
+                             cwnd_wait ? "CWND_WAIT" : "",
+                             write_space ? "WRITE_SPACE" : "",
+                             offline ? "OFFLINE" : "",
+                             remove ? "REMOVE" : "");
+       }
+
+       xprt_put(xprt);
+       return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
+                                              struct kobj_attribute *attr,
+                                              char *buf)
+{
+       struct rpc_xprt_switch *xprt_switch =
+               rpc_sysfs_xprt_switch_kobj_get_xprt(kobj);
+       ssize_t ret;
+
+       if (!xprt_switch)
+               return 0;
+       ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\nqueue_len=%ld\n",
+                     xprt_switch->xps_nxprts, xprt_switch->xps_nactive,
+                     atomic_long_read(&xprt_switch->xps_queuelen));
+       xprt_switch_put(xprt_switch);
+       return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
+                                           struct kobj_attribute *attr,
+                                           const char *buf, size_t count)
+{
+       struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+       struct sockaddr *saddr;
+       char *dst_addr;
+       int port;
+       struct xprt_addr *saved_addr;
+       size_t buf_len;
+
+       if (!xprt)
+               return 0;
+       if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP ||
+             xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) {
+               xprt_put(xprt);
+               return -EOPNOTSUPP;
+       }
+
+       if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+               count = -EINTR;
+               goto out_put;
+       }
+       saddr = (struct sockaddr *)&xprt->addr;
+       port = rpc_get_port(saddr);
+
+       /* buf_len is the len until the first occurence of either
+        * '\n' or '\0'
+        */
+       buf_len = strcspn(buf, "\n");
+
+       dst_addr = kstrndup(buf, buf_len, GFP_KERNEL);
+       if (!dst_addr)
+               goto out_err;
+       saved_addr = kzalloc(sizeof(*saved_addr), GFP_KERNEL);
+       if (!saved_addr)
+               goto out_err_free;
+       saved_addr->addr =
+               rcu_dereference_raw(xprt->address_strings[RPC_DISPLAY_ADDR]);
+       rcu_assign_pointer(xprt->address_strings[RPC_DISPLAY_ADDR], dst_addr);
+       call_rcu(&saved_addr->rcu, free_xprt_addr);
+       xprt->addrlen = rpc_pton(xprt->xprt_net, buf, buf_len, saddr,
+                                sizeof(*saddr));
+       rpc_set_port(saddr, port);
+
+       xprt_force_disconnect(xprt);
+out:
+       xprt_release_write(xprt, NULL);
+out_put:
+       xprt_put(xprt);
+       return count;
+out_err_free:
+       kfree(dst_addr);
+out_err:
+       count = -ENOMEM;
+       goto out;
+}
+
+static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
+                                          struct kobj_attribute *attr,
+                                          const char *buf, size_t count)
+{
+       struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+       int offline = 0, online = 0, remove = 0;
+       struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj);
+
+       if (!xprt)
+               return 0;
+
+       if (!strncmp(buf, "offline", 7))
+               offline = 1;
+       else if (!strncmp(buf, "online", 6))
+               online = 1;
+       else if (!strncmp(buf, "remove", 6))
+               remove = 1;
+       else
+               return -EINVAL;
+
+       if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+               count = -EINTR;
+               goto out_put;
+       }
+       if (xprt->main) {
+               count = -EINVAL;
+               goto release_tasks;
+       }
+       if (offline) {
+               set_bit(XPRT_OFFLINE, &xprt->state);
+               spin_lock(&xps->xps_lock);
+               xps->xps_nactive--;
+               spin_unlock(&xps->xps_lock);
+       } else if (online) {
+               clear_bit(XPRT_OFFLINE, &xprt->state);
+               spin_lock(&xps->xps_lock);
+               xps->xps_nactive++;
+               spin_unlock(&xps->xps_lock);
+       } else if (remove) {
+               if (test_bit(XPRT_OFFLINE, &xprt->state)) {
+                       set_bit(XPRT_REMOVE, &xprt->state);
+                       xprt_force_disconnect(xprt);
+                       if (test_bit(XPRT_CONNECTED, &xprt->state)) {
+                               if (!xprt->sending.qlen &&
+                                   !xprt->pending.qlen &&
+                                   !xprt->backlog.qlen &&
+                                   !atomic_long_read(&xprt->queuelen))
+                                       rpc_xprt_switch_remove_xprt(xps, xprt);
+                       }
+               } else {
+                       count = -EINVAL;
+               }
+       }
+
+release_tasks:
+       xprt_release_write(xprt, NULL);
+out_put:
+       xprt_put(xprt);
+       xprt_switch_put(xps);
+       return count;
+}
+
+int rpc_sysfs_init(void)
+{
+       rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, kernel_kobj);
+       if (!rpc_sunrpc_kset)
+               return -ENOMEM;
+       rpc_sunrpc_client_kobj =
+               rpc_sysfs_object_alloc("rpc-clients", rpc_sunrpc_kset, NULL);
+       if (!rpc_sunrpc_client_kobj)
+               goto err_client;
+       rpc_sunrpc_xprt_switch_kobj =
+               rpc_sysfs_object_alloc("xprt-switches", rpc_sunrpc_kset, NULL);
+       if (!rpc_sunrpc_xprt_switch_kobj)
+               goto err_switch;
+       return 0;
+err_switch:
+       kobject_put(rpc_sunrpc_client_kobj);
+       rpc_sunrpc_client_kobj = NULL;
+err_client:
+       kset_unregister(rpc_sunrpc_kset);
+       rpc_sunrpc_kset = NULL;
+       return -ENOMEM;
+}
+
+static void rpc_sysfs_client_release(struct kobject *kobj)
+{
+       struct rpc_sysfs_client *c;
+
+       c = container_of(kobj, struct rpc_sysfs_client, kobject);
+       kfree(c);
+}
+
+static void rpc_sysfs_xprt_switch_release(struct kobject *kobj)
+{
+       struct rpc_sysfs_xprt_switch *xprt_switch;
+
+       xprt_switch = container_of(kobj, struct rpc_sysfs_xprt_switch, kobject);
+       kfree(xprt_switch);
+}
+
+static void rpc_sysfs_xprt_release(struct kobject *kobj)
+{
+       struct rpc_sysfs_xprt *xprt;
+
+       xprt = container_of(kobj, struct rpc_sysfs_xprt, kobject);
+       kfree(xprt);
+}
+
+static const void *rpc_sysfs_client_namespace(struct kobject *kobj)
+{
+       return container_of(kobj, struct rpc_sysfs_client, kobject)->net;
+}
+
+static const void *rpc_sysfs_xprt_switch_namespace(struct kobject *kobj)
+{
+       return container_of(kobj, struct rpc_sysfs_xprt_switch, kobject)->net;
+}
+
+static const void *rpc_sysfs_xprt_namespace(struct kobject *kobj)
+{
+       return container_of(kobj, struct rpc_sysfs_xprt,
+                           kobject)->xprt->xprt_net;
+}
+
+static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr,
+       0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store);
+
+static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info,
+       0444, rpc_sysfs_xprt_info_show, NULL);
+
+static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state,
+       0644, rpc_sysfs_xprt_state_show, rpc_sysfs_xprt_state_change);
+
+static struct attribute *rpc_sysfs_xprt_attrs[] = {
+       &rpc_sysfs_xprt_dstaddr.attr,
+       &rpc_sysfs_xprt_info.attr,
+       &rpc_sysfs_xprt_change_state.attr,
+       NULL,
+};
+
+static struct kobj_attribute rpc_sysfs_xprt_switch_info =
+       __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL);
+
+static struct attribute *rpc_sysfs_xprt_switch_attrs[] = {
+       &rpc_sysfs_xprt_switch_info.attr,
+       NULL,
+};
+
+static struct kobj_type rpc_sysfs_client_type = {
+       .release = rpc_sysfs_client_release,
+       .sysfs_ops = &kobj_sysfs_ops,
+       .namespace = rpc_sysfs_client_namespace,
+};
+
+static struct kobj_type rpc_sysfs_xprt_switch_type = {
+       .release = rpc_sysfs_xprt_switch_release,
+       .default_attrs = rpc_sysfs_xprt_switch_attrs,
+       .sysfs_ops = &kobj_sysfs_ops,
+       .namespace = rpc_sysfs_xprt_switch_namespace,
+};
+
+static struct kobj_type rpc_sysfs_xprt_type = {
+       .release = rpc_sysfs_xprt_release,
+       .default_attrs = rpc_sysfs_xprt_attrs,
+       .sysfs_ops = &kobj_sysfs_ops,
+       .namespace = rpc_sysfs_xprt_namespace,
+};
+
+void rpc_sysfs_exit(void)
+{
+       kobject_put(rpc_sunrpc_client_kobj);
+       kobject_put(rpc_sunrpc_xprt_switch_kobj);
+       kset_unregister(rpc_sunrpc_kset);
+}
+
+static struct rpc_sysfs_client *rpc_sysfs_client_alloc(struct kobject *parent,
+                                                      struct net *net,
+                                                      int clid)
+{
+       struct rpc_sysfs_client *p;
+
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (p) {
+               p->net = net;
+               p->kobject.kset = rpc_sunrpc_kset;
+               if (kobject_init_and_add(&p->kobject, &rpc_sysfs_client_type,
+                                        parent, "clnt-%d", clid) == 0)
+                       return p;
+               kobject_put(&p->kobject);
+       }
+       return NULL;
+}
+
+static struct rpc_sysfs_xprt_switch *
+rpc_sysfs_xprt_switch_alloc(struct kobject *parent,
+                           struct rpc_xprt_switch *xprt_switch,
+                           struct net *net,
+                           gfp_t gfp_flags)
+{
+       struct rpc_sysfs_xprt_switch *p;
+
+       p = kzalloc(sizeof(*p), gfp_flags);
+       if (p) {
+               p->net = net;
+               p->kobject.kset = rpc_sunrpc_kset;
+               if (kobject_init_and_add(&p->kobject,
+                                        &rpc_sysfs_xprt_switch_type,
+                                        parent, "switch-%d",
+                                        xprt_switch->xps_id) == 0)
+                       return p;
+               kobject_put(&p->kobject);
+       }
+       return NULL;
+}
+
+static struct rpc_sysfs_xprt *rpc_sysfs_xprt_alloc(struct kobject *parent,
+                                                  struct rpc_xprt *xprt,
+                                                  gfp_t gfp_flags)
+{
+       struct rpc_sysfs_xprt *p;
+
+       p = kzalloc(sizeof(*p), gfp_flags);
+       if (!p)
+               goto out;
+       p->kobject.kset = rpc_sunrpc_kset;
+       if (kobject_init_and_add(&p->kobject, &rpc_sysfs_xprt_type,
+                                parent, "xprt-%d-%s", xprt->id,
+                                xprt->address_strings[RPC_DISPLAY_PROTO]) == 0)
+               return p;
+       kobject_put(&p->kobject);
+out:
+       return NULL;
+}
+
+void rpc_sysfs_client_setup(struct rpc_clnt *clnt,
+                           struct rpc_xprt_switch *xprt_switch,
+                           struct net *net)
+{
+       struct rpc_sysfs_client *rpc_client;
+
+       rpc_client = rpc_sysfs_client_alloc(rpc_sunrpc_client_kobj,
+                                           net, clnt->cl_clid);
+       if (rpc_client) {
+               char name[] = "switch";
+               struct rpc_sysfs_xprt_switch *xswitch =
+                       (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+               int ret;
+
+               clnt->cl_sysfs = rpc_client;
+               rpc_client->clnt = clnt;
+               rpc_client->xprt_switch = xprt_switch;
+               kobject_uevent(&rpc_client->kobject, KOBJ_ADD);
+               ret = sysfs_create_link_nowarn(&rpc_client->kobject,
+                                              &xswitch->kobject, name);
+               if (ret)
+                       pr_warn("can't create link to %s in sysfs (%d)\n",
+                               name, ret);
+       }
+}
+
+void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch,
+                                struct rpc_xprt *xprt,
+                                gfp_t gfp_flags)
+{
+       struct rpc_sysfs_xprt_switch *rpc_xprt_switch;
+       struct net *net;
+
+       if (xprt_switch->xps_net)
+               net = xprt_switch->xps_net;
+       else
+               net = xprt->xprt_net;
+       rpc_xprt_switch =
+               rpc_sysfs_xprt_switch_alloc(rpc_sunrpc_xprt_switch_kobj,
+                                           xprt_switch, net, gfp_flags);
+       if (rpc_xprt_switch) {
+               xprt_switch->xps_sysfs = rpc_xprt_switch;
+               rpc_xprt_switch->xprt_switch = xprt_switch;
+               rpc_xprt_switch->xprt = xprt;
+               kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_ADD);
+       }
+}
+
+void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch,
+                         struct rpc_xprt *xprt,
+                         gfp_t gfp_flags)
+{
+       struct rpc_sysfs_xprt *rpc_xprt;
+       struct rpc_sysfs_xprt_switch *switch_obj =
+               (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+
+       rpc_xprt = rpc_sysfs_xprt_alloc(&switch_obj->kobject, xprt, gfp_flags);
+       if (rpc_xprt) {
+               xprt->xprt_sysfs = rpc_xprt;
+               rpc_xprt->xprt = xprt;
+               rpc_xprt->xprt_switch = xprt_switch;
+               kobject_uevent(&rpc_xprt->kobject, KOBJ_ADD);
+       }
+}
+
+void rpc_sysfs_client_destroy(struct rpc_clnt *clnt)
+{
+       struct rpc_sysfs_client *rpc_client = clnt->cl_sysfs;
+
+       if (rpc_client) {
+               char name[] = "switch";
+
+               sysfs_remove_link(&rpc_client->kobject, name);
+               kobject_uevent(&rpc_client->kobject, KOBJ_REMOVE);
+               kobject_del(&rpc_client->kobject);
+               kobject_put(&rpc_client->kobject);
+               clnt->cl_sysfs = NULL;
+       }
+}
+
+void rpc_sysfs_xprt_switch_destroy(struct rpc_xprt_switch *xprt_switch)
+{
+       struct rpc_sysfs_xprt_switch *rpc_xprt_switch = xprt_switch->xps_sysfs;
+
+       if (rpc_xprt_switch) {
+               kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_REMOVE);
+               kobject_del(&rpc_xprt_switch->kobject);
+               kobject_put(&rpc_xprt_switch->kobject);
+               xprt_switch->xps_sysfs = NULL;
+       }
+}
+
+void rpc_sysfs_xprt_destroy(struct rpc_xprt *xprt)
+{
+       struct rpc_sysfs_xprt *rpc_xprt = xprt->xprt_sysfs;
+
+       if (rpc_xprt) {
+               kobject_uevent(&rpc_xprt->kobject, KOBJ_REMOVE);
+               kobject_del(&rpc_xprt->kobject);
+               kobject_put(&rpc_xprt->kobject);
+               xprt->xprt_sysfs = NULL;
+       }
+}
diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h
new file mode 100644 (file)
index 0000000..6620ceb
--- /dev/null
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+#ifndef __SUNRPC_SYSFS_H
+#define __SUNRPC_SYSFS_H
+
+struct rpc_sysfs_client {
+       struct kobject kobject;
+       struct net *net;
+       struct rpc_clnt *clnt;
+       struct rpc_xprt_switch *xprt_switch;
+};
+
+struct rpc_sysfs_xprt_switch {
+       struct kobject kobject;
+       struct net *net;
+       struct rpc_xprt_switch *xprt_switch;
+       struct rpc_xprt *xprt;
+};
+
+struct rpc_sysfs_xprt {
+       struct kobject kobject;
+       struct rpc_xprt *xprt;
+       struct rpc_xprt_switch *xprt_switch;
+};
+
+int rpc_sysfs_init(void);
+void rpc_sysfs_exit(void);
+
+void rpc_sysfs_client_setup(struct rpc_clnt *clnt,
+                           struct rpc_xprt_switch *xprt_switch,
+                           struct net *net);
+void rpc_sysfs_client_destroy(struct rpc_clnt *clnt);
+void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch,
+                                struct rpc_xprt *xprt, gfp_t gfp_flags);
+void rpc_sysfs_xprt_switch_destroy(struct rpc_xprt_switch *xprt);
+void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch,
+                         struct rpc_xprt *xprt, gfp_t gfp_flags);
+void rpc_sysfs_xprt_destroy(struct rpc_xprt *xprt);
+
+#endif
index 3964ff7..ca10ba2 100644 (file)
@@ -1230,10 +1230,9 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
        void *kaddr;
 
        maxlen = xdr->buf->page_len;
-       if (base >= maxlen) {
-               base = maxlen;
-               maxlen = 0;
-       } else
+       if (base >= maxlen)
+               return 0;
+       else
                maxlen -= base;
        if (len > maxlen)
                len = maxlen;
index 3509a7f..fb6db09 100644 (file)
@@ -55,6 +55,7 @@
 #include <trace/events/sunrpc.h>
 
 #include "sunrpc.h"
+#include "sysfs.h"
 
 /*
  * Local variables
@@ -443,7 +444,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
 
-static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
+void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
 {
        if (xprt->snd_task != task)
                return;
@@ -1746,6 +1747,30 @@ static void xprt_free_all_slots(struct rpc_xprt *xprt)
        }
 }
 
+static DEFINE_IDA(rpc_xprt_ids);
+
+void xprt_cleanup_ids(void)
+{
+       ida_destroy(&rpc_xprt_ids);
+}
+
+static int xprt_alloc_id(struct rpc_xprt *xprt)
+{
+       int id;
+
+       id = ida_simple_get(&rpc_xprt_ids, 0, 0, GFP_KERNEL);
+       if (id < 0)
+               return id;
+
+       xprt->id = id;
+       return 0;
+}
+
+static void xprt_free_id(struct rpc_xprt *xprt)
+{
+       ida_simple_remove(&rpc_xprt_ids, xprt->id);
+}
+
 struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
                unsigned int num_prealloc,
                unsigned int max_alloc)
@@ -1758,6 +1783,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
        if (xprt == NULL)
                goto out;
 
+       xprt_alloc_id(xprt);
        xprt_init(xprt, net);
 
        for (i = 0; i < num_prealloc; i++) {
@@ -1786,6 +1812,8 @@ void xprt_free(struct rpc_xprt *xprt)
 {
        put_net(xprt->xprt_net);
        xprt_free_all_slots(xprt);
+       xprt_free_id(xprt);
+       rpc_sysfs_xprt_destroy(xprt);
        kfree_rcu(xprt, rcu);
 }
 EXPORT_SYMBOL_GPL(xprt_free);
index 1b40731..c60820e 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/xprtmultipath.h>
 
+#include "sysfs.h"
+
 typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
                const struct rpc_xprt *cur);
 
@@ -55,6 +57,7 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
        if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
                xprt_switch_add_xprt_locked(xps, xprt);
        spin_unlock(&xps->xps_lock);
+       rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL);
 }
 
 static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
@@ -62,7 +65,8 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
 {
        if (unlikely(xprt == NULL))
                return;
-       xps->xps_nactive--;
+       if (!test_bit(XPRT_OFFLINE, &xprt->state))
+               xps->xps_nactive--;
        xps->xps_nxprts--;
        if (xps->xps_nxprts == 0)
                xps->xps_net = NULL;
@@ -86,6 +90,30 @@ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
        xprt_put(xprt);
 }
 
+static DEFINE_IDA(rpc_xprtswitch_ids);
+
+void xprt_multipath_cleanup_ids(void)
+{
+       ida_destroy(&rpc_xprtswitch_ids);
+}
+
+static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags)
+{
+       int id;
+
+       id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags);
+       if (id < 0)
+               return id;
+
+       xps->xps_id = id;
+       return 0;
+}
+
+static void xprt_switch_free_id(struct rpc_xprt_switch *xps)
+{
+       ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id);
+}
+
 /**
  * xprt_switch_alloc - Allocate a new struct rpc_xprt_switch
  * @xprt: pointer to struct rpc_xprt
@@ -103,12 +131,15 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
        if (xps != NULL) {
                spin_lock_init(&xps->xps_lock);
                kref_init(&xps->xps_kref);
+               xprt_switch_alloc_id(xps, gfp_flags);
                xps->xps_nxprts = xps->xps_nactive = 0;
                atomic_long_set(&xps->xps_queuelen, 0);
                xps->xps_net = NULL;
                INIT_LIST_HEAD(&xps->xps_xprt_list);
                xps->xps_iter_ops = &rpc_xprt_iter_singular;
+               rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags);
                xprt_switch_add_xprt_locked(xps, xprt);
+               rpc_sysfs_xprt_setup(xps, xprt, gfp_flags);
        }
 
        return xps;
@@ -136,6 +167,8 @@ static void xprt_switch_free(struct kref *kref)
                        struct rpc_xprt_switch, xps_kref);
 
        xprt_switch_free_entries(xps);
+       rpc_sysfs_xprt_switch_destroy(xps);
+       xprt_switch_free_id(xps);
        kfree_rcu(xps, xps_rcu);
 }
 
@@ -198,7 +231,8 @@ void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
 static
 bool xprt_is_active(const struct rpc_xprt *xprt)
 {
-       return kref_read(&xprt->kref) != 0;
+       return (kref_read(&xprt->kref) != 0 &&
+               !test_bit(XPRT_OFFLINE, &xprt->state));
 }
 
 static
index 19a49d2..9c2ffc6 100644 (file)
@@ -73,6 +73,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 unsigned int xprt_rdma_memreg_strategy         = RPCRDMA_FRWR;
 int xprt_rdma_pad_optimize;
+static struct xprt_class xprt_rdma;
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 
@@ -349,6 +350,7 @@ xprt_setup_rdma(struct xprt_create *args)
        /* Ensure xprt->addr holds valid server TCP (not RDMA)
         * address, for any side protocols which peek at it */
        xprt->prot = IPPROTO_TCP;
+       xprt->xprt_class = &xprt_rdma;
        xprt->addrlen = args->addrlen;
        memcpy(&xprt->addr, sap, xprt->addrlen);
 
index 316d049..e573dce 100644 (file)
@@ -91,6 +91,11 @@ static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
 
 static struct ctl_table_header *sunrpc_table_header;
 
+static struct xprt_class xs_local_transport;
+static struct xprt_class xs_udp_transport;
+static struct xprt_class xs_tcp_transport;
+static struct xprt_class xs_bc_tcp_transport;
+
 /*
  * FIXME: changing the UDP slot table size should also resize the UDP
  *        socket buffers for existing UDP transports
@@ -1648,6 +1653,13 @@ static int xs_get_srcport(struct sock_xprt *transport)
        return port;
 }
 
+unsigned short get_srcport(struct rpc_xprt *xprt)
+{
+       struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
+       return sock->srcport;
+}
+EXPORT_SYMBOL(get_srcport);
+
 static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
 {
        if (transport->srcport != 0)
@@ -1689,7 +1701,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
                err = kernel_bind(sock, (struct sockaddr *)&myaddr,
                                transport->xprt.addrlen);
                if (err == 0) {
-                       transport->srcport = port;
+                       if (transport->xprt.reuseport)
+                               transport->srcport = port;
                        break;
                }
                last = port;
@@ -2779,6 +2792,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
        transport = container_of(xprt, struct sock_xprt, xprt);
 
        xprt->prot = 0;
+       xprt->xprt_class = &xs_local_transport;
        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
        xprt->bind_timeout = XS_BIND_TO;
@@ -2848,6 +2862,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
        transport = container_of(xprt, struct sock_xprt, xprt);
 
        xprt->prot = IPPROTO_UDP;
+       xprt->xprt_class = &xs_udp_transport;
        /* XXX: header size can vary due to auth type, IPv6, etc. */
        xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
@@ -2928,6 +2943,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        transport = container_of(xprt, struct sock_xprt, xprt);
 
        xprt->prot = IPPROTO_TCP;
+       xprt->xprt_class = &xs_tcp_transport;
        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
        xprt->bind_timeout = XS_BIND_TO;
@@ -3001,6 +3017,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
        transport = container_of(xprt, struct sock_xprt, xprt);
 
        xprt->prot = IPPROTO_TCP;
+       xprt->xprt_class = &xs_bc_tcp_transport;
        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
        xprt->timeout = &xs_tcp_default_timeout;