Merge tag 'nfs-for-5.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 May 2019 21:33:15 +0000 (14:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 9 May 2019 21:33:15 +0000 (14:33 -0700)
Pull NFS client updates from Anna Schumaker:
 "Highlights include:

  Stable bugfixes:
   - Fall back to MDS if no deviceid is found rather than aborting   # v4.11+
   - NFS4: Fix v4.0 client state corruption when mount

  Features:
   - Much improved handling of soft mounts with NFS v4.0:
       - Reduce risk of false positive timeouts
       - Faster failover of reads and writes after a timeout
       - Added a "softerr" mount option to return ETIMEDOUT instead of
         EIO to the application after a timeout
   - Increase number of xprtrdma backchannel requests
   - Add additional xprtrdma tracepoints
   - Improved send completion batching for xprtrdma

  Other bugfixes and cleanups:
   - Return -EINVAL when NFS v4.2 is passed an invalid dedup mode
   - Reduce usage of GFP_ATOMIC pages in SUNRPC
   - Various minor NFS over RDMA cleanups and bugfixes
   - Use the correct container namespace for upcalls
   - Don't share superblocks between user namespaces
   - Various other container fixes
   - Make nfs_match_client() killable to prevent soft lockups
   - Don't mark all open state for recovery when handling recallable
     state revoked flag"

* tag 'nfs-for-5.2-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (69 commits)
  SUNRPC: Rebalance a kref in auth_gss.c
  NFS: Fix a double unlock from nfs_match,get_client
  nfs: pass the correct prototype to read_cache_page
  NFSv4: don't mark all open state for recovery when handling recallable state revoked flag
  SUNRPC: Fix an error code in gss_alloc_msg()
  SUNRPC: task should be exit if encode return EKEYEXPIRED more times
  NFS4: Fix v4.0 client state corruption when mount
  PNFS fallback to MDS if no deviceid found
  NFS: make nfs_match_client killable
  lockd: Store the lockd client credential in struct nlm_host
  NFS: When mounting, don't share filesystems between different user namespaces
  NFS: Convert NFSv2 to use the container user namespace
  NFSv4: Convert the NFS client idmapper to use the container user namespace
  NFS: Convert NFSv3 to use the container user namespace
  SUNRPC: Use namespace of listening daemon in the client AUTH_GSS upcall
  SUNRPC: Use the client user namespace when encoding creds
  NFS: Store the credential of the mount process in the nfs_server
  SUNRPC: Cache cred of process creating the rpc_client
  xprtrdma: Remove stale comment
  xprtrdma: Update comments that reference ib_drain_qp
  ...

59 files changed:
fs/lockd/clntlock.c
fs/lockd/clntproc.c
fs/lockd/host.c
fs/lockd/mon.c
fs/nfs/client.c
fs/nfs/delegation.c
fs/nfs/delegation.h
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/filelayout/filelayout.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/mount_clnt.c
fs/nfs/nfs2xdr.c
fs/nfs/nfs3client.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4client.c
fs/nfs/nfs4file.c
fs/nfs/nfs4idmap.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/read.c
fs/nfs/super.c
fs/nfs/symlink.c
fs/nfs/write.c
fs/nfsd/nfs4callback.c
include/linux/lockd/bind.h
include/linux/lockd/lockd.h
include/linux/nfs_fs.h
include/linux/nfs_fs_sb.h
include/linux/nfs_page.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/xprt.h
include/trace/events/rpcrdma.h
include/trace/events/sunrpc.h
include/uapi/linux/nfs_mount.h
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_unix.c
net/sunrpc/clnt.c
net/sunrpc/debugfs.c
net/sunrpc/rpcb_clnt.c
net/sunrpc/sched.c
net/sunrpc/socklib.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/backchannel.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c

index c2a1286..70f520b 100644 (file)
@@ -63,7 +63,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
        host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
                                   nlm_init->protocol, nlm_version,
                                   nlm_init->hostname, nlm_init->noresvport,
-                                  nlm_init->net);
+                                  nlm_init->net, nlm_init->cred);
        if (host == NULL)
                goto out_nohost;
        if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
index e8a0040..d9c32d1 100644 (file)
@@ -715,7 +715,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
        struct nlm_rqst *req = data;
        u32 status = ntohl(req->a_res.status);
 
-       if (RPC_ASSASSINATED(task))
+       if (RPC_SIGNALLED(task))
                goto die;
 
        if (task->tk_status < 0) {
@@ -783,7 +783,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
        struct nlm_rqst *req = data;
        u32 status = ntohl(req->a_res.status);
 
-       if (RPC_ASSASSINATED(task))
+       if (RPC_SIGNALLED(task))
                goto die;
 
        if (task->tk_status < 0) {
index f0b5c98..7d46faf 100644 (file)
@@ -60,6 +60,7 @@ struct nlm_lookup_host_info {
        const size_t            hostname_len;   /* it's length */
        const int               noresvport;     /* use non-priv port */
        struct net              *net;           /* network namespace to bind */
+       const struct cred       *cred;
 };
 
 /*
@@ -162,6 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
        host->h_nsmhandle  = nsm;
        host->h_addrbuf    = nsm->sm_addrbuf;
        host->net          = ni->net;
+       host->h_cred       = get_cred(ni->cred),
        strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
 
 out:
@@ -188,6 +190,7 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
        clnt = host->h_rpcclnt;
        if (clnt != NULL)
                rpc_shutdown_client(clnt);
+       put_cred(host->h_cred);
        kfree(host);
 
        ln->nrhosts--;
@@ -202,6 +205,8 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
  * @version: NLM protocol version
  * @hostname: '\0'-terminated hostname of server
  * @noresvport: 1 if non-privileged port should be used
+ * @net: pointer to net namespace
+ * @cred: pointer to cred
  *
  * Returns an nlm_host structure that matches the passed-in
  * [server address, transport protocol, NLM version, server hostname].
@@ -214,7 +219,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
                                     const u32 version,
                                     const char *hostname,
                                     int noresvport,
-                                    struct net *net)
+                                    struct net *net,
+                                    const struct cred *cred)
 {
        struct nlm_lookup_host_info ni = {
                .server         = 0,
@@ -226,6 +232,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
                .hostname_len   = strlen(hostname),
                .noresvport     = noresvport,
                .net            = net,
+               .cred           = cred,
        };
        struct hlist_head *chain;
        struct nlm_host *host;
@@ -458,6 +465,7 @@ nlm_bind_host(struct nlm_host *host)
                        .authflavor     = RPC_AUTH_UNIX,
                        .flags          = (RPC_CLNT_CREATE_NOPING |
                                           RPC_CLNT_CREATE_AUTOBIND),
+                       .cred           = host->h_cred,
                };
 
                /*
index 654594e..1eabd91 100644 (file)
@@ -82,6 +82,7 @@ static struct rpc_clnt *nsm_create(struct net *net, const char *nodename)
                .version                = NSM_VERSION,
                .authflavor             = RPC_AUTH_NULL,
                .flags                  = RPC_CLNT_CREATE_NOPING,
+               .cred                   = current_cred(),
        };
 
        return rpc_create(&args);
index 90d71fd..da74c4c 100644 (file)
@@ -284,6 +284,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
        struct nfs_client *clp;
        const struct sockaddr *sap = data->addr;
        struct nfs_net *nn = net_generic(data->net, nfs_net_id);
+       int error;
 
 again:
        list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
@@ -296,9 +297,11 @@ again:
                if (clp->cl_cons_state > NFS_CS_READY) {
                        refcount_inc(&clp->cl_count);
                        spin_unlock(&nn->nfs_client_lock);
-                       nfs_wait_client_init_complete(clp);
+                       error = nfs_wait_client_init_complete(clp);
                        nfs_put_client(clp);
                        spin_lock(&nn->nfs_client_lock);
+                       if (error < 0)
+                               return ERR_PTR(error);
                        goto again;
                }
 
@@ -407,6 +410,8 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
                clp = nfs_match_client(cl_init);
                if (clp) {
                        spin_unlock(&nn->nfs_client_lock);
+                       if (IS_ERR(clp))
+                               return clp;
                        if (new)
                                new->rpc_ops->free_client(new);
                        return nfs_found_client(cl_init, clp);
@@ -500,6 +505,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
                .program        = &nfs_program,
                .version        = clp->rpc_ops->version,
                .authflavor     = flavor,
+               .cred           = cl_init->cred,
        };
 
        if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
@@ -598,6 +604,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
                        sizeof(server->client->cl_timeout_default));
        server->client->cl_timeout = &server->client->cl_timeout_default;
        server->client->cl_softrtry = 0;
+       if (server->flags & NFS_MOUNT_SOFTERR)
+               server->client->cl_softerr = 1;
        if (server->flags & NFS_MOUNT_SOFT)
                server->client->cl_softrtry = 1;
 
@@ -652,6 +660,7 @@ static int nfs_init_server(struct nfs_server *server,
                .proto = data->nfs_server.protocol,
                .net = data->net,
                .timeparms = &timeparms,
+               .cred = server->cred,
        };
        struct nfs_client *clp;
        int error;
@@ -920,6 +929,7 @@ void nfs_free_server(struct nfs_server *server)
        ida_destroy(&server->lockowner_id);
        ida_destroy(&server->openowner_id);
        nfs_free_iostats(server->io_stats);
+       put_cred(server->cred);
        kfree(server);
        nfs_release_automount_timer();
 }
@@ -940,6 +950,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
        if (!server)
                return ERR_PTR(-ENOMEM);
 
+       server->cred = get_cred(current_cred());
+
        error = -ENOMEM;
        fattr = nfs_alloc_fattr();
        if (fattr == NULL)
@@ -1006,6 +1018,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
        if (!server)
                return ERR_PTR(-ENOMEM);
 
+       server->cred = get_cred(source->cred);
+
        error = -ENOMEM;
        fattr_fsinfo = nfs_alloc_fattr();
        if (fattr_fsinfo == NULL)
index 2f6b447..8b78274 100644 (file)
@@ -1033,6 +1033,18 @@ void nfs_mark_test_expired_all_delegations(struct nfs_client *clp)
        rcu_read_unlock();
 }
 
+/**
+ * nfs_test_expired_all_delegations - test all delegations for a client
+ * @clp: nfs_client to process
+ *
+ * Helper for handling "recallable state revoked" status from server.
+ */
+void nfs_test_expired_all_delegations(struct nfs_client *clp)
+{
+       nfs_mark_test_expired_all_delegations(clp);
+       nfs4_schedule_state_manager(clp);
+}
+
 /**
  * nfs_reap_expired_delegations - reap expired delegations
  * @clp: nfs_client to process
index 35b4b02..5799777 100644 (file)
@@ -58,6 +58,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp);
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 void nfs_mark_test_expired_all_delegations(struct nfs_client *clp);
+void nfs_test_expired_all_delegations(struct nfs_client *clp);
 void nfs_reap_expired_delegations(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
index a71d0b4..47d445b 100644 (file)
@@ -714,8 +714,9 @@ out:
  * We only need to convert from xdr once so future lookups are much simpler
  */
 static
-int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
+int nfs_readdir_filler(void *data, struct page* page)
 {
+       nfs_readdir_descriptor_t *desc = data;
        struct inode    *inode = file_inode(desc->file);
        int ret;
 
@@ -762,8 +763,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-       return read_cache_page(desc->file->f_mapping,
-                       desc->page_index, (filler_t *)nfs_readdir_filler, desc);
+       return read_cache_page(desc->file->f_mapping, desc->page_index,
+                       nfs_readdir_filler, desc);
 }
 
 /*
index 0fd811a..2436bd9 100644 (file)
@@ -492,7 +492,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
                        struct nfs_page *req;
                        unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
                        /* XXX do we need to do the eof zeroing found in async_filler? */
-                       req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+                       req = nfs_create_request(dreq->ctx, pagevec[i],
                                                 pgbase, req_len);
                        if (IS_ERR(req)) {
                                result = PTR_ERR(req);
@@ -663,6 +663,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
        }
 
        list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+               /* Bump the transmission count */
+               req->wb_nio++;
                if (!nfs_pageio_add_request(&desc, req)) {
                        nfs_list_move_request(req, &failed);
                        spin_lock(&cinfo.inode->i_lock);
@@ -703,6 +705,11 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
                req = nfs_list_entry(data->pages.next);
                nfs_list_remove_request(req);
                if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+                       /*
+                        * Despite the reboot, the write was successful,
+                        * so reset wb_nio.
+                        */
+                       req->wb_nio = 0;
                        /* Note the rewrite will go through mds */
                        nfs_mark_request_commit(req, NULL, &cinfo, 0);
                } else
@@ -899,7 +906,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
                        struct nfs_page *req;
                        unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 
-                       req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+                       req = nfs_create_request(dreq->ctx, pagevec[i],
                                                 pgbase, req_len);
                        if (IS_ERR(req)) {
                                result = PTR_ERR(req);
index 4899b85..144e183 100644 (file)
@@ -147,7 +147,7 @@ nfs_file_flush(struct file *file, fl_owner_t id)
                return 0;
 
        /* Flush writes to the server and return any errors */
-       return vfs_fsync(file, 0);
+       return nfs_wb_all(inode);
 }
 
 ssize_t
@@ -199,13 +199,6 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
  * Flush any dirty pages for this process, and check for write errors.
  * The return status from this call provides a reliable indication of
  * whether any write errors occurred for this process.
- *
- * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
- * disk, but it retrieves and clears ctx->error after synching, despite
- * the two being set at the same time in nfs_context_set_write_error().
- * This is because the former is used to notify the _next_ call to
- * nfs_file_write() that a write error occurred, and hence cause it to
- * fall back to doing a synchronous write.
  */
 static int
 nfs_file_fsync_commit(struct file *file, int datasync)
@@ -220,11 +213,8 @@ nfs_file_fsync_commit(struct file *file, int datasync)
        nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
        do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
        status = nfs_commit_inode(inode, FLUSH_SYNC);
-       if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) {
-               ret = xchg(&ctx->error, 0);
-               if (ret)
-                       goto out;
-       }
+       if (status == 0)
+               status = file_check_and_advance_wb_err(file);
        if (status < 0) {
                ret = status;
                goto out;
@@ -245,13 +235,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        trace_nfs_fsync_enter(inode);
 
        do {
-               struct nfs_open_context *ctx = nfs_file_open_context(file);
-               ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-               if (test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) {
-                       int ret2 = xchg(&ctx->error, 0);
-                       if (ret2)
-                               ret = ret2;
-               }
+               ret = file_write_and_wait_range(file, start, end);
                if (ret != 0)
                        break;
                ret = nfs_file_fsync_commit(file, datasync);
@@ -600,8 +584,7 @@ static int nfs_need_check_write(struct file *filp, struct inode *inode)
        struct nfs_open_context *ctx;
 
        ctx = nfs_file_open_context(filp);
-       if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
-           nfs_ctx_key_to_expire(ctx, inode))
+       if (nfs_ctx_key_to_expire(ctx, inode))
                return 1;
        return 0;
 }
@@ -655,7 +638,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 
        /* Return error values */
        if (nfs_need_check_write(file, inode)) {
-               int err = vfs_fsync(file, 0);
+               int err = nfs_wb_all(inode);
                if (err < 0)
                        result = err;
        }
@@ -709,7 +692,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
         * Flush all pending writes before doing anything
         * with locks..
         */
-       vfs_fsync(filp, 0);
+       nfs_wb_all(inode);
 
        l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
        if (!IS_ERR(l_ctx)) {
index 61f46fa..3cb073c 100644 (file)
@@ -904,7 +904,7 @@ fl_pnfs_update_layout(struct inode *ino,
        status = filelayout_check_deviceid(lo, fl, gfp_flags);
        if (status) {
                pnfs_put_lseg(lseg);
-               lseg = ERR_PTR(status);
+               lseg = NULL;
        }
 out:
        return lseg;
@@ -917,7 +917,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
        pnfs_generic_pg_check_layout(pgio);
        if (!pgio->pg_lseg) {
                pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
-                                                     req->wb_context,
+                                                     nfs_req_openctx(req),
                                                      0,
                                                      NFS4_MAX_UINT64,
                                                      IOMODE_READ,
@@ -944,7 +944,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
        pnfs_generic_pg_check_layout(pgio);
        if (!pgio->pg_lseg) {
                pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
-                                                     req->wb_context,
+                                                     nfs_req_openctx(req),
                                                      0,
                                                      NFS4_MAX_UINT64,
                                                      IOMODE_RW,
index 6673d4f..9920c52 100644 (file)
@@ -28,6 +28,8 @@
 #define FF_LAYOUT_POLL_RETRY_MAX     (15*HZ)
 #define FF_LAYOUTRETURN_MAXERR 20
 
+static unsigned short io_maxretrans;
+
 static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
                struct nfs_pgio_header *hdr);
 static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
@@ -871,7 +873,7 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
 {
        pnfs_put_lseg(pgio->pg_lseg);
        pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                          req->wb_context,
+                                          nfs_req_openctx(req),
                                           0,
                                           NFS4_MAX_UINT64,
                                           IOMODE_READ,
@@ -925,6 +927,7 @@ retry:
        pgm = &pgio->pg_mirrors[0];
        pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
 
+       pgio->pg_maxretrans = io_maxretrans;
        return;
 out_nolseg:
        if (pgio->pg_error < 0)
@@ -950,7 +953,7 @@ retry:
        pnfs_generic_pg_check_layout(pgio);
        if (!pgio->pg_lseg) {
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  req->wb_context,
+                                                  nfs_req_openctx(req),
                                                   0,
                                                   NFS4_MAX_UINT64,
                                                   IOMODE_RW,
@@ -992,6 +995,7 @@ retry:
                pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize;
        }
 
+       pgio->pg_maxretrans = io_maxretrans;
        return;
 
 out_mds:
@@ -1006,7 +1010,7 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
 {
        if (!pgio->pg_lseg) {
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  req->wb_context,
+                                                  nfs_req_openctx(req),
                                                   0,
                                                   NFS4_MAX_UINT64,
                                                   IOMODE_RW,
@@ -2515,3 +2519,7 @@ MODULE_DESCRIPTION("The NFSv4 flexfile layout driver");
 
 module_init(nfs4flexfilelayout_init);
 module_exit(nfs4flexfilelayout_exit);
+
+module_param(io_maxretrans, ushort, 0644);
+MODULE_PARM_DESC(io_maxretrans, "The  number of times the NFSv4.1 client "
+                       "retries an I/O request before returning an error. ");
index f61af83..3bc2550 100644 (file)
@@ -885,10 +885,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
                spin_lock(&inode->i_lock);
                res = __nfs_find_lock_context(ctx);
                if (res == NULL) {
-                       list_add_tail_rcu(&new->list, &ctx->lock_context.list);
-                       new->open_context = ctx;
-                       res = new;
-                       new = NULL;
+                       new->open_context = get_nfs_open_context(ctx);
+                       if (new->open_context) {
+                               list_add_tail_rcu(&new->list,
+                                               &ctx->lock_context.list);
+                               res = new;
+                               new = NULL;
+                       } else
+                               res = ERR_PTR(-EBADF);
                }
                spin_unlock(&inode->i_lock);
                kfree(new);
@@ -906,6 +910,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
                return;
        list_del_rcu(&l_ctx->list);
        spin_unlock(&inode->i_lock);
+       put_nfs_open_context(ctx);
        kfree_rcu(l_ctx, rcu_head);
 }
 EXPORT_SYMBOL_GPL(nfs_put_lock_context);
index 331a050..498fab7 100644 (file)
@@ -84,6 +84,7 @@ struct nfs_client_initdata {
        u32 minorversion;
        struct net *net;
        const struct rpc_timeout *timeparms;
+       const struct cred *cred;
 };
 
 /*
@@ -766,15 +767,10 @@ static inline bool nfs_error_is_fatal(int err)
        case -ESTALE:
        case -E2BIG:
        case -ENOMEM:
+       case -ETIMEDOUT:
                return true;
        default:
                return false;
        }
 }
 
-static inline void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
-{
-       ctx->error = error;
-       smp_wmb();
-       set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
-}
index d979ff4..cb7c10e 100644 (file)
@@ -163,6 +163,7 @@ int nfs_mount(struct nfs_mount_request *info)
                .program        = &mnt_program,
                .version        = info->version,
                .authflavor     = RPC_AUTH_UNIX,
+               .cred           = current_cred(),
        };
        struct rpc_clnt         *mnt_clnt;
        int                     status;
@@ -249,6 +250,7 @@ void nfs_umount(const struct nfs_mount_request *info)
                .version        = info->version,
                .authflavor     = RPC_AUTH_UNIX,
                .flags          = RPC_CLNT_CREATE_NOPING,
+               .cred           = current_cred(),
        };
        struct rpc_message msg  = {
                .rpc_argp       = info->dirpath,
index a7ed29d..572794d 100644 (file)
@@ -76,6 +76,20 @@ static int nfs_stat_to_errno(enum nfs_stat);
  * or decoded inline.
  */
 
+static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
+{
+       if (clnt && clnt->cl_cred)
+               return clnt->cl_cred->user_ns;
+       return &init_user_ns;
+}
+
+static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
+{
+       if (rqstp->rq_task)
+               return rpc_userns(rqstp->rq_task->tk_client);
+       return &init_user_ns;
+}
+
 /*
  *     typedef opaque  nfsdata<>;
  */
@@ -248,7 +262,8 @@ static __be32 *xdr_decode_time(__be32 *p, struct timespec *timep)
  *     };
  *
  */
-static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+               struct user_namespace *userns)
 {
        u32 rdev, type;
        __be32 *p;
@@ -263,10 +278,10 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 
        fattr->mode = be32_to_cpup(p++);
        fattr->nlink = be32_to_cpup(p++);
-       fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
+       fattr->uid = make_kuid(userns, be32_to_cpup(p++));
        if (!uid_valid(fattr->uid))
                goto out_uid;
-       fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+       fattr->gid = make_kgid(userns, be32_to_cpup(p++));
        if (!gid_valid(fattr->gid))
                goto out_gid;
                
@@ -321,7 +336,8 @@ static __be32 *xdr_time_not_set(__be32 *p)
        return p;
 }
 
-static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
+static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
+               struct user_namespace *userns)
 {
        struct timespec ts;
        __be32 *p;
@@ -333,11 +349,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
        else
                *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
        if (attr->ia_valid & ATTR_UID)
-               *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
+               *p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
        else
                *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
        if (attr->ia_valid & ATTR_GID)
-               *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
+               *p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
        else
                *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
        if (attr->ia_valid & ATTR_SIZE)
@@ -451,7 +467,8 @@ out_cheating:
  *     };
  */
 static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
-                          __u32 *op_status)
+                          __u32 *op_status,
+                          struct user_namespace *userns)
 {
        enum nfs_stat status;
        int error;
@@ -463,7 +480,7 @@ static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result,
                *op_status = status;
        if (status != NFS_OK)
                goto out_default;
-       error = decode_fattr(xdr, result);
+       error = decode_fattr(xdr, result, userns);
 out:
        return error;
 out_default:
@@ -498,19 +515,21 @@ static void encode_diropargs(struct xdr_stream *xdr, const struct nfs_fh *fh,
  *             void;
  *     };
  */
-static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result)
+static int decode_diropok(struct xdr_stream *xdr, struct nfs_diropok *result,
+               struct user_namespace *userns)
 {
        int error;
 
        error = decode_fhandle(xdr, result->fh);
        if (unlikely(error))
                goto out;
-       error = decode_fattr(xdr, result->fattr);
+       error = decode_fattr(xdr, result->fattr, userns);
 out:
        return error;
 }
 
-static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
+static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result,
+               struct user_namespace *userns)
 {
        enum nfs_stat status;
        int error;
@@ -520,7 +539,7 @@ static int decode_diropres(struct xdr_stream *xdr, struct nfs_diropok *result)
                goto out;
        if (status != NFS_OK)
                goto out_default;
-       error = decode_diropok(xdr, result);
+       error = decode_diropok(xdr, result, userns);
 out:
        return error;
 out_default:
@@ -559,7 +578,7 @@ static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
        const struct nfs_sattrargs *args = data;
 
        encode_fhandle(xdr, args->fh);
-       encode_sattr(xdr, args->sattr);
+       encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
 }
 
 static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
@@ -674,7 +693,7 @@ static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
        const struct nfs_createargs *args = data;
 
        encode_diropargs(xdr, args->fh, args->name, args->len);
-       encode_sattr(xdr, args->sattr);
+       encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
 }
 
 static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
@@ -741,7 +760,7 @@ static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
 
        encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
        encode_path(xdr, args->pages, args->pathlen);
-       encode_sattr(xdr, args->sattr);
+       encode_sattr(xdr, args->sattr, rpc_rqst_userns(req));
 }
 
 /*
@@ -803,13 +822,13 @@ out_default:
 static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
                                 void *result)
 {
-       return decode_attrstat(xdr, result, NULL);
+       return decode_attrstat(xdr, result, NULL, rpc_rqst_userns(req));
 }
 
 static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
                                 void *result)
 {
-       return decode_diropres(xdr, result);
+       return decode_diropres(xdr, result, rpc_rqst_userns(req));
 }
 
 /*
@@ -864,7 +883,7 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
        result->op_status = status;
        if (status != NFS_OK)
                goto out_default;
-       error = decode_fattr(xdr, result->fattr);
+       error = decode_fattr(xdr, result->fattr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        error = decode_nfsdata(xdr, result);
@@ -881,7 +900,8 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
 
        /* All NFSv2 writes are "file sync" writes */
        result->verf->committed = NFS_FILE_SYNC;
-       return decode_attrstat(xdr, result->fattr, &result->op_status);
+       return decode_attrstat(xdr, result->fattr, &result->op_status,
+                       rpc_rqst_userns(req));
 }
 
 /**
index 7879f2a..1afdb0f 100644 (file)
@@ -91,6 +91,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
                .proto = ds_proto,
                .net = mds_clp->cl_net,
                .timeparms = &ds_timeout,
+               .cred = mds_srv->cred,
        };
        struct nfs_client *clp;
        char buf[INET6_ADDRSTRLEN + 1];
index 110358f..abbbdde 100644 (file)
@@ -104,6 +104,20 @@ static const umode_t nfs_type2fmt[] = {
        [NF3FIFO] = S_IFIFO,
 };
 
+static struct user_namespace *rpc_userns(const struct rpc_clnt *clnt)
+{
+       if (clnt && clnt->cl_cred)
+               return clnt->cl_cred->user_ns;
+       return &init_user_ns;
+}
+
+static struct user_namespace *rpc_rqst_userns(const struct rpc_rqst *rqstp)
+{
+       if (rqstp->rq_task)
+               return rpc_userns(rqstp->rq_task->tk_client);
+       return &init_user_ns;
+}
+
 /*
  * Encode/decode NFSv3 basic data types
  *
@@ -516,7 +530,8 @@ static __be32 *xdr_decode_nfstime3(__be32 *p, struct timespec *timep)
  *             set_mtime       mtime;
  *     };
  */
-static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
+static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr,
+               struct user_namespace *userns)
 {
        struct timespec ts;
        u32 nbytes;
@@ -551,13 +566,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
 
        if (attr->ia_valid & ATTR_UID) {
                *p++ = xdr_one;
-               *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
+               *p++ = cpu_to_be32(from_kuid_munged(userns, attr->ia_uid));
        } else
                *p++ = xdr_zero;
 
        if (attr->ia_valid & ATTR_GID) {
                *p++ = xdr_one;
-               *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
+               *p++ = cpu_to_be32(from_kgid_munged(userns, attr->ia_gid));
        } else
                *p++ = xdr_zero;
 
@@ -606,7 +621,8 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
  *             nfstime3        ctime;
  *     };
  */
-static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+               struct user_namespace *userns)
 {
        umode_t fmode;
        __be32 *p;
@@ -619,10 +635,10 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
 
        fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
        fattr->nlink = be32_to_cpup(p++);
-       fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
+       fattr->uid = make_kuid(userns, be32_to_cpup(p++));
        if (!uid_valid(fattr->uid))
                goto out_uid;
-       fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
+       fattr->gid = make_kgid(userns, be32_to_cpup(p++));
        if (!gid_valid(fattr->gid))
                goto out_gid;
 
@@ -659,7 +675,8 @@ out_gid:
  *             void;
  *     };
  */
-static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+               struct user_namespace *userns)
 {
        __be32 *p;
 
@@ -667,7 +684,7 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        if (unlikely(!p))
                return -EIO;
        if (*p != xdr_zero)
-               return decode_fattr3(xdr, fattr);
+               return decode_fattr3(xdr, fattr, userns);
        return 0;
 }
 
@@ -728,14 +745,15 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
        return 0;
 }
 
-static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+               struct user_namespace *userns)
 {
        int error;
 
        error = decode_pre_op_attr(xdr, fattr);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, fattr);
+       error = decode_post_op_attr(xdr, fattr, userns);
 out:
        return error;
 }
@@ -837,7 +855,7 @@ static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
 {
        const struct nfs3_sattrargs *args = data;
        encode_nfs_fh3(xdr, args->fh);
-       encode_sattr3(xdr, args->sattr);
+       encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
        encode_sattrguard3(xdr, args);
 }
 
@@ -998,13 +1016,14 @@ static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
  *     };
  */
 static void encode_createhow3(struct xdr_stream *xdr,
-                             const struct nfs3_createargs *args)
+                             const struct nfs3_createargs *args,
+                             struct user_namespace *userns)
 {
        encode_uint32(xdr, args->createmode);
        switch (args->createmode) {
        case NFS3_CREATE_UNCHECKED:
        case NFS3_CREATE_GUARDED:
-               encode_sattr3(xdr, args->sattr);
+               encode_sattr3(xdr, args->sattr, userns);
                break;
        case NFS3_CREATE_EXCLUSIVE:
                encode_createverf3(xdr, args->verifier);
@@ -1021,7 +1040,7 @@ static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
        const struct nfs3_createargs *args = data;
 
        encode_diropargs3(xdr, args->fh, args->name, args->len);
-       encode_createhow3(xdr, args);
+       encode_createhow3(xdr, args, rpc_rqst_userns(req));
 }
 
 /*
@@ -1039,7 +1058,7 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
        const struct nfs3_mkdirargs *args = data;
 
        encode_diropargs3(xdr, args->fh, args->name, args->len);
-       encode_sattr3(xdr, args->sattr);
+       encode_sattr3(xdr, args->sattr, rpc_rqst_userns(req));
 }
 
 /*
@@ -1056,11 +1075,12 @@ static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
  *     };
  */
 static void encode_symlinkdata3(struct xdr_stream *xdr,
-                               const void *data)
+                               const void *data,
+                               struct user_namespace *userns)
 {
        const struct nfs3_symlinkargs *args = data;
 
-       encode_sattr3(xdr, args->sattr);
+       encode_sattr3(xdr, args->sattr, userns);
        encode_nfspath3(xdr, args->pages, args->pathlen);
 }
 
@@ -1071,7 +1091,7 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
        const struct nfs3_symlinkargs *args = data;
 
        encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
-       encode_symlinkdata3(xdr, args);
+       encode_symlinkdata3(xdr, args, rpc_rqst_userns(req));
        xdr->buf->flags |= XDRBUF_WRITE;
 }
 
@@ -1100,24 +1120,26 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
  *     };
  */
 static void encode_devicedata3(struct xdr_stream *xdr,
-                              const struct nfs3_mknodargs *args)
+                              const struct nfs3_mknodargs *args,
+                              struct user_namespace *userns)
 {
-       encode_sattr3(xdr, args->sattr);
+       encode_sattr3(xdr, args->sattr, userns);
        encode_specdata3(xdr, args->rdev);
 }
 
 static void encode_mknoddata3(struct xdr_stream *xdr,
-                             const struct nfs3_mknodargs *args)
+                             const struct nfs3_mknodargs *args,
+                             struct user_namespace *userns)
 {
        encode_ftype3(xdr, args->type);
        switch (args->type) {
        case NF3CHR:
        case NF3BLK:
-               encode_devicedata3(xdr, args);
+               encode_devicedata3(xdr, args, userns);
                break;
        case NF3SOCK:
        case NF3FIFO:
-               encode_sattr3(xdr, args->sattr);
+               encode_sattr3(xdr, args->sattr, userns);
                break;
        case NF3REG:
        case NF3DIR:
@@ -1134,7 +1156,7 @@ static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
        const struct nfs3_mknodargs *args = data;
 
        encode_diropargs3(xdr, args->fh, args->name, args->len);
-       encode_mknoddata3(xdr, args);
+       encode_mknoddata3(xdr, args, rpc_rqst_userns(req));
 }
 
 /*
@@ -1379,7 +1401,7 @@ static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
                goto out;
        if (status != NFS3_OK)
                goto out_default;
-       error = decode_fattr3(xdr, result);
+       error = decode_fattr3(xdr, result, rpc_rqst_userns(req));
 out:
        return error;
 out_default:
@@ -1414,7 +1436,7 @@ static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_wcc_data(xdr, result);
+       error = decode_wcc_data(xdr, result, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -1449,6 +1471,7 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
                                   struct xdr_stream *xdr,
                                   void *data)
 {
+       struct user_namespace *userns = rpc_rqst_userns(req);
        struct nfs3_diropres *result = data;
        enum nfs_stat status;
        int error;
@@ -1461,14 +1484,14 @@ static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
        error = decode_nfs_fh3(xdr, result->fh);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, userns);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->dir_attr);
+       error = decode_post_op_attr(xdr, result->dir_attr, userns);
 out:
        return error;
 out_default:
-       error = decode_post_op_attr(xdr, result->dir_attr);
+       error = decode_post_op_attr(xdr, result->dir_attr, userns);
        if (unlikely(error))
                goto out;
        return nfs3_stat_to_errno(status);
@@ -1504,7 +1527,7 @@ static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -1545,7 +1568,7 @@ static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result);
+       error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -1623,7 +1646,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        result->op_status = status;
@@ -1694,7 +1717,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_wcc_data(xdr, result->fattr);
+       error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        result->op_status = status;
@@ -1728,14 +1751,15 @@ out_status:
  *     };
  */
 static int decode_create3resok(struct xdr_stream *xdr,
-                              struct nfs3_diropres *result)
+                              struct nfs3_diropres *result,
+                              struct user_namespace *userns)
 {
        int error;
 
        error = decode_post_op_fh3(xdr, result->fh);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, userns);
        if (unlikely(error))
                goto out;
        /* The server isn't required to return a file handle.
@@ -1744,7 +1768,7 @@ static int decode_create3resok(struct xdr_stream *xdr,
         * values for the new object. */
        if (result->fh->size == 0)
                result->fattr->valid = 0;
-       error = decode_wcc_data(xdr, result->dir_attr);
+       error = decode_wcc_data(xdr, result->dir_attr, userns);
 out:
        return error;
 }
@@ -1753,6 +1777,7 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
                                   struct xdr_stream *xdr,
                                   void *data)
 {
+       struct user_namespace *userns = rpc_rqst_userns(req);
        struct nfs3_diropres *result = data;
        enum nfs_stat status;
        int error;
@@ -1762,11 +1787,11 @@ static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
                goto out;
        if (status != NFS3_OK)
                goto out_default;
-       error = decode_create3resok(xdr, result);
+       error = decode_create3resok(xdr, result, userns);
 out:
        return error;
 out_default:
-       error = decode_wcc_data(xdr, result->dir_attr);
+       error = decode_wcc_data(xdr, result->dir_attr, userns);
        if (unlikely(error))
                goto out;
        return nfs3_stat_to_errno(status);
@@ -1801,7 +1826,7 @@ static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_wcc_data(xdr, result->dir_attr);
+       error = decode_wcc_data(xdr, result->dir_attr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -1836,6 +1861,7 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
                                   struct xdr_stream *xdr,
                                   void *data)
 {
+       struct user_namespace *userns = rpc_rqst_userns(req);
        struct nfs_renameres *result = data;
        enum nfs_stat status;
        int error;
@@ -1843,10 +1869,10 @@ static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_wcc_data(xdr, result->old_fattr);
+       error = decode_wcc_data(xdr, result->old_fattr, userns);
        if (unlikely(error))
                goto out;
-       error = decode_wcc_data(xdr, result->new_fattr);
+       error = decode_wcc_data(xdr, result->new_fattr, userns);
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -1880,6 +1906,7 @@ out_status:
 static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
                                 void *data)
 {
+       struct user_namespace *userns = rpc_rqst_userns(req);
        struct nfs3_linkres *result = data;
        enum nfs_stat status;
        int error;
@@ -1887,10 +1914,10 @@ static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, userns);
        if (unlikely(error))
                goto out;
-       error = decode_wcc_data(xdr, result->dir_attr);
+       error = decode_wcc_data(xdr, result->dir_attr, userns);
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -1939,6 +1966,7 @@ out_status:
 int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
                       bool plus)
 {
+       struct user_namespace *userns = rpc_userns(entry->server->client);
        struct nfs_entry old = *entry;
        __be32 *p;
        int error;
@@ -1973,7 +2001,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
        if (plus) {
                entry->fattr->valid = 0;
-               error = decode_post_op_attr(xdr, entry->fattr);
+               error = decode_post_op_attr(xdr, entry->fattr, userns);
                if (unlikely(error))
                        return error;
                if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
@@ -2045,11 +2073,12 @@ static int decode_dirlist3(struct xdr_stream *xdr)
 }
 
 static int decode_readdir3resok(struct xdr_stream *xdr,
-                               struct nfs3_readdirres *result)
+                               struct nfs3_readdirres *result,
+                               struct user_namespace *userns)
 {
        int error;
 
-       error = decode_post_op_attr(xdr, result->dir_attr);
+       error = decode_post_op_attr(xdr, result->dir_attr, userns);
        if (unlikely(error))
                goto out;
        /* XXX: do we need to check if result->verf != NULL ? */
@@ -2074,11 +2103,11 @@ static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
                goto out;
        if (status != NFS3_OK)
                goto out_default;
-       error = decode_readdir3resok(xdr, result);
+       error = decode_readdir3resok(xdr, result, rpc_rqst_userns(req));
 out:
        return error;
 out_default:
-       error = decode_post_op_attr(xdr, result->dir_attr);
+       error = decode_post_op_attr(xdr, result->dir_attr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        return nfs3_stat_to_errno(status);
@@ -2138,7 +2167,7 @@ static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -2212,7 +2241,7 @@ static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -2273,7 +2302,7 @@ static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        if (status != NFS3_OK)
@@ -2315,7 +2344,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
        error = decode_nfsstat3(xdr, &status);
        if (unlikely(error))
                goto out;
-       error = decode_wcc_data(xdr, result->fattr);
+       error = decode_wcc_data(xdr, result->fattr, rpc_rqst_userns(req));
        if (unlikely(error))
                goto out;
        result->op_status = status;
@@ -2331,14 +2360,15 @@ out_status:
 #ifdef CONFIG_NFS_V3_ACL
 
 static inline int decode_getacl3resok(struct xdr_stream *xdr,
-                                     struct nfs3_getaclres *result)
+                                     struct nfs3_getaclres *result,
+                                     struct user_namespace *userns)
 {
        struct posix_acl **acl;
        unsigned int *aclcnt;
        size_t hdrlen;
        int error;
 
-       error = decode_post_op_attr(xdr, result->fattr);
+       error = decode_post_op_attr(xdr, result->fattr, userns);
        if (unlikely(error))
                goto out;
        error = decode_uint32(xdr, &result->mask);
@@ -2386,7 +2416,7 @@ static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
                goto out;
        if (status != NFS3_OK)
                goto out_default;
-       error = decode_getacl3resok(xdr, result);
+       error = decode_getacl3resok(xdr, result, rpc_rqst_userns(req));
 out:
        return error;
 out_default:
@@ -2405,7 +2435,7 @@ static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
                goto out;
        if (status != NFS3_OK)
                goto out_default;
-       error = decode_post_op_attr(xdr, result);
+       error = decode_post_op_attr(xdr, result, rpc_rqst_userns(req));
 out:
        return error;
 out_default:
index 06ac3d9..8a38a25 100644 (file)
@@ -206,6 +206,7 @@ struct nfs4_exception {
        unsigned char delay : 1,
                      recovering : 1,
                      retry : 1;
+       bool interruptible;
 };
 
 struct nfs4_state_recovery_ops {
index 1339ede..3ce2463 100644 (file)
@@ -870,6 +870,7 @@ static int nfs4_set_client(struct nfs_server *server,
                .minorversion = minorversion,
                .net = net,
                .timeparms = timeparms,
+               .cred = server->cred,
        };
        struct nfs_client *clp;
 
@@ -931,6 +932,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
                .minorversion = minor_version,
                .net = mds_clp->cl_net,
                .timeparms = &ds_timeout,
+               .cred = mds_srv->cred,
        };
        char buf[INET6_ADDRSTRLEN + 1];
 
@@ -1107,6 +1109,8 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
        if (!server)
                return ERR_PTR(-ENOMEM);
 
+       server->cred = get_cred(current_cred());
+
        auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
 
        /* set up the general RPC client */
@@ -1143,6 +1147,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
        parent_server = NFS_SB(data->sb);
        parent_client = parent_server->nfs_client;
 
+       server->cred = get_cred(parent_server->cred);
+
        /* Initialise the client representation from the parent server */
        nfs_server_copy_userdata(server, parent_server);
 
index 00d1719..cf42a8b 100644 (file)
@@ -125,7 +125,7 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
                return filemap_fdatawrite(file->f_mapping);
 
        /* Flush writes to the server and return any errors */
-       return vfs_fsync(file, 0);
+       return nfs_wb_all(inode);
 }
 
 #ifdef CONFIG_NFS_V4_2
@@ -187,7 +187,7 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
        bool same_inode = false;
        int ret;
 
-       if (remap_flags & ~REMAP_FILE_ADVISORY)
+       if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
                return -EINVAL;
 
        /* check alignment w.r.t. clone_blksize */
index bf34dda..4884fda 100644 (file)
@@ -69,8 +69,16 @@ struct idmap {
        struct rpc_pipe         *idmap_pipe;
        struct idmap_legacy_upcalldata *idmap_upcall_data;
        struct mutex            idmap_mutex;
+       const struct cred       *cred;
 };
 
+static struct user_namespace *idmap_userns(const struct idmap *idmap)
+{
+       if (idmap && idmap->cred)
+               return idmap->cred->user_ns;
+       return &init_user_ns;
+}
+
 /**
  * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
  * @fattr: fully initialised struct nfs_fattr
@@ -271,14 +279,15 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
                                         const char *type, struct idmap *idmap)
 {
        char *desc;
-       struct key *rkey;
+       struct key *rkey = ERR_PTR(-EAGAIN);
        ssize_t ret;
 
        ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
        if (ret < 0)
                return ERR_PTR(ret);
 
-       rkey = request_key(&key_type_id_resolver, desc, "");
+       if (!idmap->cred || idmap->cred->user_ns == &init_user_ns)
+               rkey = request_key(&key_type_id_resolver, desc, "");
        if (IS_ERR(rkey)) {
                mutex_lock(&idmap->idmap_mutex);
                rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
@@ -452,6 +461,9 @@ nfs_idmap_new(struct nfs_client *clp)
        if (idmap == NULL)
                return -ENOMEM;
 
+       mutex_init(&idmap->idmap_mutex);
+       idmap->cred = get_cred(clp->cl_rpcclient->cl_cred);
+
        rpc_init_pipe_dir_object(&idmap->idmap_pdo,
                        &nfs_idmap_pipe_dir_object_ops,
                        idmap);
@@ -462,7 +474,6 @@ nfs_idmap_new(struct nfs_client *clp)
                goto err;
        }
        idmap->idmap_pipe = pipe;
-       mutex_init(&idmap->idmap_mutex);
 
        error = rpc_add_pipe_dir_object(clp->cl_net,
                        &clp->cl_rpcclient->cl_pipedir_objects,
@@ -475,6 +486,7 @@ nfs_idmap_new(struct nfs_client *clp)
 err_destroy_pipe:
        rpc_destroy_pipe_data(idmap->idmap_pipe);
 err:
+       put_cred(idmap->cred);
        kfree(idmap);
        return error;
 }
@@ -491,6 +503,7 @@ nfs_idmap_delete(struct nfs_client *clp)
                        &clp->cl_rpcclient->cl_pipedir_objects,
                        &idmap->idmap_pdo);
        rpc_destroy_pipe_data(idmap->idmap_pipe);
+       put_cred(idmap->cred);
        kfree(idmap);
 }
 
@@ -735,7 +748,7 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_
        if (!nfs_map_string_to_numeric(name, namelen, &id))
                ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
        if (ret == 0) {
-               *uid = make_kuid(&init_user_ns, id);
+               *uid = make_kuid(idmap_userns(idmap), id);
                if (!uid_valid(*uid))
                        ret = -ERANGE;
        }
@@ -752,7 +765,7 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size
        if (!nfs_map_string_to_numeric(name, namelen, &id))
                ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
        if (ret == 0) {
-               *gid = make_kgid(&init_user_ns, id);
+               *gid = make_kgid(idmap_userns(idmap), id);
                if (!gid_valid(*gid))
                        ret = -ERANGE;
        }
@@ -766,7 +779,7 @@ int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf,
        int ret = -EINVAL;
        __u32 id;
 
-       id = from_kuid(&init_user_ns, uid);
+       id = from_kuid_munged(idmap_userns(idmap), uid);
        if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
                ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
        if (ret < 0)
@@ -780,7 +793,7 @@ int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf,
        int ret = -EINVAL;
        __u32 id;
 
-       id = from_kgid(&init_user_ns, gid);
+       id = from_kgid_munged(idmap_userns(idmap), gid);
        if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
                ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
        if (ret < 0)
index 741ff8c..c29cbef 100644 (file)
@@ -400,17 +400,32 @@ static long nfs4_update_delay(long *timeout)
        return ret;
 }
 
-static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
+static int nfs4_delay_killable(long *timeout)
 {
-       int res = 0;
-
        might_sleep();
 
        freezable_schedule_timeout_killable_unsafe(
                nfs4_update_delay(timeout));
-       if (fatal_signal_pending(current))
-               res = -ERESTARTSYS;
-       return res;
+       if (!__fatal_signal_pending(current))
+               return 0;
+       return -EINTR;
+}
+
+static int nfs4_delay_interruptible(long *timeout)
+{
+       might_sleep();
+
+       freezable_schedule_timeout_interruptible(nfs4_update_delay(timeout));
+       if (!signal_pending(current))
+               return 0;
+       return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
+}
+
+static int nfs4_delay(long *timeout, bool interruptible)
+{
+       if (interruptible)
+               return nfs4_delay_interruptible(timeout);
+       return nfs4_delay_killable(timeout);
 }
 
 /* This is the error handling routine for processes that are allowed
@@ -546,7 +561,8 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
 
        ret = nfs4_do_handle_exception(server, errorcode, exception);
        if (exception->delay) {
-               ret = nfs4_delay(server->client, &exception->timeout);
+               ret = nfs4_delay(&exception->timeout,
+                               exception->interruptible);
                goto out_retry;
        }
        if (exception->recovering) {
@@ -978,10 +994,8 @@ int nfs4_setup_sequence(struct nfs_client *client,
        if (res->sr_slot != NULL)
                goto out_start;
 
-       if (session) {
+       if (session)
                tbl = &session->fc_slot_table;
-               task->tk_timeout = 0;
-       }
 
        spin_lock(&tbl->slot_tbl_lock);
        /* The state manager will wait until the slot table is empty */
@@ -990,9 +1004,8 @@ int nfs4_setup_sequence(struct nfs_client *client,
 
        slot = nfs4_alloc_slot(tbl);
        if (IS_ERR(slot)) {
-               /* Try again in 1/4 second */
                if (slot == ERR_PTR(-ENOMEM))
-                       task->tk_timeout = HZ >> 2;
+                       goto out_sleep_timeout;
                goto out_sleep;
        }
        spin_unlock(&tbl->slot_tbl_lock);
@@ -1004,11 +1017,20 @@ out_start:
        nfs41_sequence_res_init(res);
        rpc_call_start(task);
        return 0;
-
+out_sleep_timeout:
+       /* Try again in 1/4 second */
+       if (args->sa_privileged)
+               rpc_sleep_on_priority_timeout(&tbl->slot_tbl_waitq, task,
+                               jiffies + (HZ >> 2), RPC_PRIORITY_PRIVILEGED);
+       else
+               rpc_sleep_on_timeout(&tbl->slot_tbl_waitq, task,
+                               NULL, jiffies + (HZ >> 2));
+       spin_unlock(&tbl->slot_tbl_lock);
+       return -EAGAIN;
 out_sleep:
        if (args->sa_privileged)
                rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
-                               NULL, RPC_PRIORITY_PRIVILEGED);
+                               RPC_PRIORITY_PRIVILEGED);
        else
                rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
        spin_unlock(&tbl->slot_tbl_lock);
@@ -3060,7 +3082,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
                                        int *opened)
 {
        struct nfs_server *server = NFS_SERVER(dir);
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        struct nfs4_state *res;
        struct nfs4_open_createattrs c = {
                .label = label,
@@ -3673,7 +3697,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 
 int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = nfs4_handle_exception(server,
@@ -3715,7 +3741,9 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
                struct nfs_fsinfo *info)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = _nfs4_lookup_root(server, fhandle, info);
@@ -3942,7 +3970,9 @@ static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
                                struct nfs_fattr *fattr, struct nfs4_label *label,
                                struct inode *inode)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode);
@@ -4065,7 +4095,9 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
                                   const struct qstr *name, struct nfs_fh *fhandle,
                                   struct nfs_fattr *fattr, struct nfs4_label *label)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        struct rpc_clnt *client = *clnt;
        int err;
        do {
@@ -4169,7 +4201,9 @@ static int _nfs4_proc_lookupp(struct inode *inode,
 static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
                             struct nfs_fattr *fattr, struct nfs4_label *label)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
@@ -4216,7 +4250,9 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
 
 static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = _nfs4_proc_access(inode, entry);
@@ -4271,7 +4307,9 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
 static int nfs4_proc_readlink(struct inode *inode, struct page *page,
                unsigned int pgbase, unsigned int pglen)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = _nfs4_proc_readlink(inode, page, pgbase, pglen);
@@ -4347,7 +4385,9 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
 
 static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        struct inode *inode = d_inode(dentry);
        int err;
 
@@ -4368,7 +4408,9 @@ static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
 
 static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
 
        do {
@@ -4527,7 +4569,9 @@ out:
 
 static int nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = nfs4_handle_exception(NFS_SERVER(inode),
@@ -4634,7 +4678,9 @@ out:
 static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
                struct page *page, unsigned int len, struct iattr *sattr)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        struct nfs4_label l, *label = NULL;
        int err;
 
@@ -4673,7 +4719,9 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
                struct iattr *sattr)
 {
        struct nfs_server *server = NFS_SERVER(dir);
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        struct nfs4_label l, *label = NULL;
        int err;
 
@@ -4733,7 +4781,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
 static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
                u64 cookie, struct page **pages, unsigned int count, bool plus)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = _nfs4_proc_readdir(dentry, cred, cookie,
@@ -4784,7 +4834,9 @@ static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
                struct iattr *sattr, dev_t rdev)
 {
        struct nfs_server *server = NFS_SERVER(dir);
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        struct nfs4_label l, *label = NULL;
        int err;
 
@@ -4826,7 +4878,9 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 
 static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = nfs4_handle_exception(server,
@@ -4857,7 +4911,9 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 
 static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        unsigned long now = jiffies;
        int err;
 
@@ -4919,7 +4975,9 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
 static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
                struct nfs_pathconf *pathconf)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
 
        do {
@@ -5488,7 +5546,9 @@ out_free:
 
 static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        ssize_t ret;
        do {
                ret = __nfs4_get_acl_uncached(inode, buf, buflen);
@@ -5622,7 +5682,9 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
 static int nfs4_get_security_label(struct inode *inode, void *buf,
                                        size_t buflen)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
 
        if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
@@ -6263,7 +6325,9 @@ out:
 
 static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
 
        do {
@@ -6827,6 +6891,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
        struct nfs4_exception exception = {
                .state = state,
                .inode = state->inode,
+               .interruptible = true,
        };
        int err;
 
@@ -7240,7 +7305,9 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
                           struct nfs4_fs_locations *fs_locations,
                           struct page *page)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = _nfs4_proc_fs_locations(client, dir, name,
@@ -7383,7 +7450,9 @@ int nfs4_proc_get_locations(struct inode *inode,
        struct nfs_client *clp = server->nfs_client;
        const struct nfs4_mig_recovery_ops *ops =
                                        clp->cl_mvops->mig_recovery_ops;
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int status;
 
        dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
@@ -7507,7 +7576,9 @@ int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred)
        struct nfs_client *clp = server->nfs_client;
        const struct nfs4_mig_recovery_ops *ops =
                                        clp->cl_mvops->mig_recovery_ops;
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int status;
 
        dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__,
@@ -7573,7 +7644,9 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
 int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
                      struct nfs4_secinfo_flavors *flavors)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = -NFS4ERR_WRONGSEC;
@@ -9263,7 +9336,9 @@ static int
 nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
                           struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                /* first try using integrity protection */
@@ -9430,7 +9505,9 @@ static int nfs41_test_stateid(struct nfs_server *server,
                nfs4_stateid *stateid,
                const struct cred *cred)
 {
-       struct nfs4_exception exception = { };
+       struct nfs4_exception exception = {
+               .interruptible = true,
+       };
        int err;
        do {
                err = _nfs41_test_stateid(server, stateid, cred);
index 3de3647..e2e3c4f 100644 (file)
@@ -159,6 +159,10 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
                /* Sustain the lease, even if it's empty.  If the clientid4
                 * goes stale it's of no use for trunking discovery. */
                nfs4_schedule_state_renewal(*result);
+
+               /* If the client state need to recover, do it. */
+               if (clp->cl_state)
+                       nfs4_schedule_state_manager(clp);
        }
 out:
        return status;
@@ -2346,8 +2350,7 @@ static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
 {
        /* FIXME: For now, we destroy all layouts. */
        pnfs_destroy_all_layouts(clp);
-       /* FIXME: For now, we test all delegations+open state+locks. */
-       nfs41_handle_some_state_revoked(clp);
+       nfs_test_expired_all_delegations(clp);
        dprintk("%s: Recallable state revoked on server %s!\n", __func__,
                        clp->cl_hostname);
 }
index e9f39fa..6ec3001 100644 (file)
@@ -16,8 +16,8 @@
 #include <linux/nfs.h>
 #include <linux/nfs3.h>
 #include <linux/nfs4.h>
-#include <linux/nfs_page.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 #include <linux/nfs_mount.h>
 #include <linux/export.h>
 
@@ -47,7 +47,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 
        hdr->req = nfs_list_entry(mirror->pg_list.next);
        hdr->inode = desc->pg_inode;
-       hdr->cred = hdr->req->wb_context->cred;
+       hdr->cred = nfs_req_openctx(hdr->req)->cred;
        hdr->io_start = req_offset(hdr->req);
        hdr->good_bytes = mirror->pg_count;
        hdr->io_completion = desc->pg_io_completion;
@@ -295,25 +295,13 @@ out:
                nfs_release_request(head);
 }
 
-/**
- * nfs_create_request - Create an NFS read/write request.
- * @ctx: open context to use
- * @page: page to write
- * @last: last nfs request created for this page group or NULL if head
- * @offset: starting offset within the page for the write
- * @count: number of bytes to read/write
- *
- * The page must be locked by the caller. This makes sure we never
- * create two different requests for the same page.
- * User should ensure it is safe to sleep in this function.
- */
-struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct page *page,
-                  struct nfs_page *last, unsigned int offset,
+static struct nfs_page *
+__nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
+                  unsigned int pgbase, unsigned int offset,
                   unsigned int count)
 {
        struct nfs_page         *req;
-       struct nfs_lock_context *l_ctx;
+       struct nfs_open_context *ctx = l_ctx->open_context;
 
        if (test_bit(NFS_CONTEXT_BAD, &ctx->flags))
                return ERR_PTR(-EBADF);
@@ -322,13 +310,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
        if (req == NULL)
                return ERR_PTR(-ENOMEM);
 
-       /* get lock context early so we can deal with alloc failures */
-       l_ctx = nfs_get_lock_context(ctx);
-       if (IS_ERR(l_ctx)) {
-               nfs_page_free(req);
-               return ERR_CAST(l_ctx);
-       }
        req->wb_lock_context = l_ctx;
+       refcount_inc(&l_ctx->count);
        atomic_inc(&l_ctx->io_count);
 
        /* Initialize the request struct. Initially, we assume a
@@ -340,14 +323,58 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
                get_page(page);
        }
        req->wb_offset  = offset;
-       req->wb_pgbase  = offset;
+       req->wb_pgbase  = pgbase;
        req->wb_bytes   = count;
-       req->wb_context = get_nfs_open_context(ctx);
        kref_init(&req->wb_kref);
-       nfs_page_group_init(req, last);
+       req->wb_nio = 0;
        return req;
 }
 
+/**
+ * nfs_create_request - Create an NFS read/write request.
+ * @ctx: open context to use
+ * @page: page to write
+ * @offset: starting offset within the page for the write
+ * @count: number of bytes to read/write
+ *
+ * The page must be locked by the caller. This makes sure we never
+ * create two different requests for the same page.
+ * User should ensure it is safe to sleep in this function.
+ */
+struct nfs_page *
+nfs_create_request(struct nfs_open_context *ctx, struct page *page,
+                  unsigned int offset, unsigned int count)
+{
+       struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
+       struct nfs_page *ret;
+
+       if (IS_ERR(l_ctx))
+               return ERR_CAST(l_ctx);
+       ret = __nfs_create_request(l_ctx, page, offset, offset, count);
+       if (!IS_ERR(ret))
+               nfs_page_group_init(ret, NULL);
+       nfs_put_lock_context(l_ctx);
+       return ret;
+}
+
+static struct nfs_page *
+nfs_create_subreq(struct nfs_page *req, struct nfs_page *last,
+                 unsigned int pgbase, unsigned int offset,
+                 unsigned int count)
+{
+       struct nfs_page *ret;
+
+       ret = __nfs_create_request(req->wb_lock_context, req->wb_page,
+                       pgbase, offset, count);
+       if (!IS_ERR(ret)) {
+               nfs_lock_request(ret);
+               ret->wb_index = req->wb_index;
+               nfs_page_group_init(ret, last);
+               ret->wb_nio = req->wb_nio;
+       }
+       return ret;
+}
+
 /**
  * nfs_unlock_request - Unlock request and wake up sleepers.
  * @req: pointer to request
@@ -386,8 +413,8 @@ void nfs_unlock_and_release_request(struct nfs_page *req)
 static void nfs_clear_request(struct nfs_page *req)
 {
        struct page *page = req->wb_page;
-       struct nfs_open_context *ctx = req->wb_context;
        struct nfs_lock_context *l_ctx = req->wb_lock_context;
+       struct nfs_open_context *ctx;
 
        if (page != NULL) {
                put_page(page);
@@ -396,16 +423,13 @@ static void nfs_clear_request(struct nfs_page *req)
        if (l_ctx != NULL) {
                if (atomic_dec_and_test(&l_ctx->io_count)) {
                        wake_up_var(&l_ctx->io_count);
+                       ctx = l_ctx->open_context;
                        if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
                                rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
                }
                nfs_put_lock_context(l_ctx);
                req->wb_lock_context = NULL;
        }
-       if (ctx != NULL) {
-               put_nfs_open_context(ctx);
-               req->wb_context = NULL;
-       }
 }
 
 /**
@@ -550,7 +574,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
        hdr->args.pgbase = req->wb_pgbase;
        hdr->args.pages  = hdr->page_array.pagevec;
        hdr->args.count  = count;
-       hdr->args.context = get_nfs_open_context(req->wb_context);
+       hdr->args.context = get_nfs_open_context(nfs_req_openctx(req));
        hdr->args.lock_context = req->wb_lock_context;
        hdr->args.stable  = NFS_UNSTABLE;
        switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
@@ -698,6 +722,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
        desc->pg_mirrors_dynamic = NULL;
        desc->pg_mirrors = desc->pg_mirrors_static;
        nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize);
+       desc->pg_maxretrans = 0;
 }
 
 /**
@@ -906,9 +931,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
        struct file_lock_context *flctx;
 
        if (prev) {
-               if (!nfs_match_open_context(req->wb_context, prev->wb_context))
+               if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev)))
                        return false;
-               flctx = d_inode(req->wb_context->dentry)->i_flctx;
+               flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx;
                if (flctx != NULL &&
                    !(list_empty_careful(&flctx->flc_posix) &&
                      list_empty_careful(&flctx->flc_flock)) &&
@@ -957,6 +982,15 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
                        return 0;
                mirror->pg_base = req->wb_pgbase;
        }
+
+       if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) {
+               if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR)
+                       desc->pg_error = -ETIMEDOUT;
+               else
+                       desc->pg_error = -EIO;
+               return 0;
+       }
+
        if (!nfs_can_coalesce_requests(prev, req, desc))
                return 0;
        nfs_list_move_request(req, &mirror->pg_list);
@@ -1049,14 +1083,10 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                pgbase += subreq->wb_bytes;
 
                if (bytes_left) {
-                       subreq = nfs_create_request(req->wb_context,
-                                       req->wb_page,
-                                       subreq, pgbase, bytes_left);
+                       subreq = nfs_create_subreq(req, subreq, pgbase,
+                                       offset, bytes_left);
                        if (IS_ERR(subreq))
                                goto err_ptr;
-                       nfs_lock_request(subreq);
-                       subreq->wb_offset  = offset;
-                       subreq->wb_index = req->wb_index;
                }
        } while (bytes_left > 0);
 
@@ -1158,19 +1188,14 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                             lastreq = lastreq->wb_this_page)
                                ;
 
-                       dupreq = nfs_create_request(req->wb_context,
-                                       req->wb_page, lastreq, pgbase, bytes);
+                       dupreq = nfs_create_subreq(req, lastreq,
+                                       pgbase, offset, bytes);
 
+                       nfs_page_group_unlock(req);
                        if (IS_ERR(dupreq)) {
-                               nfs_page_group_unlock(req);
                                desc->pg_error = PTR_ERR(dupreq);
                                goto out_failed;
                        }
-
-                       nfs_lock_request(dupreq);
-                       nfs_page_group_unlock(req);
-                       dupreq->wb_offset = offset;
-                       dupreq->wb_index = req->wb_index;
                } else
                        dupreq = req;
 
index 7066cd7..83722e9 100644 (file)
@@ -2436,7 +2436,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
                        rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
 
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  req->wb_context,
+                                                  nfs_req_openctx(req),
                                                   req_offset(req),
                                                   rd_size,
                                                   IOMODE_READ,
@@ -2463,7 +2463,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
        pnfs_generic_pg_check_range(pgio, req);
        if (pgio->pg_lseg == NULL) {
                pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  req->wb_context,
+                                                  nfs_req_openctx(req),
                                                   req_offset(req),
                                                   wb_size,
                                                   IOMODE_RW,
index c0420b9..f15609c 100644 (file)
@@ -459,7 +459,7 @@ static inline bool
 pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
                         struct nfs_commit_info *cinfo, u32 ds_commit_idx)
 {
-       struct inode *inode = d_inode(req->wb_context->dentry);
+       struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
        if (lseg == NULL || ld->mark_request_commit == NULL)
@@ -471,7 +471,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 static inline bool
 pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
-       struct inode *inode = d_inode(req->wb_context->dentry);
+       struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
        if (ld == NULL || ld->clear_request_commit == NULL)
index 1d95a60..c799e54 100644 (file)
@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
 static void nfs_readpage_release(struct nfs_page *req)
 {
-       struct inode *inode = d_inode(req->wb_context->dentry);
+       struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
 
        dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
                (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
@@ -118,7 +118,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
        len = nfs_page_length(page);
        if (len == 0)
                return nfs_return_empty_page(page);
-       new = nfs_create_request(ctx, page, NULL, 0, len);
+       new = nfs_create_request(ctx, page, 0, len);
        if (IS_ERR(new)) {
                unlock_page(page);
                return PTR_ERR(new);
@@ -363,7 +363,7 @@ readpage_async_filler(void *data, struct page *page)
        if (len == 0)
                return nfs_return_empty_page(page);
 
-       new = nfs_create_request(desc->ctx, page, NULL, 0, len);
+       new = nfs_create_request(desc->ctx, page, 0, len);
        if (IS_ERR(new))
                goto out_error;
 
index 450ae77..d6c6874 100644 (file)
@@ -78,7 +78,7 @@
 
 enum {
        /* Mount options that take no arguments */
-       Opt_soft, Opt_hard,
+       Opt_soft, Opt_softerr, Opt_hard,
        Opt_posix, Opt_noposix,
        Opt_cto, Opt_nocto,
        Opt_ac, Opt_noac,
@@ -125,6 +125,7 @@ static const match_table_t nfs_mount_option_tokens = {
        { Opt_sloppy, "sloppy" },
 
        { Opt_soft, "soft" },
+       { Opt_softerr, "softerr" },
        { Opt_hard, "hard" },
        { Opt_deprecated, "intr" },
        { Opt_deprecated, "nointr" },
@@ -628,7 +629,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
                const char *str;
                const char *nostr;
        } nfs_info[] = {
-               { NFS_MOUNT_SOFT, ",soft", ",hard" },
+               { NFS_MOUNT_SOFT, ",soft", "" },
+               { NFS_MOUNT_SOFTERR, ",softerr", "" },
                { NFS_MOUNT_POSIX, ",posix", "" },
                { NFS_MOUNT_NOCTO, ",nocto", "" },
                { NFS_MOUNT_NOAC, ",noac", "" },
@@ -658,6 +660,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
                seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
        if (nfss->acdirmax != NFS_DEF_ACDIRMAX*HZ || showdefaults)
                seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
+       if (!(nfss->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)))
+                       seq_puts(m, ",hard");
        for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
                if (nfss->flags & nfs_infop->flag)
                        seq_puts(m, nfs_infop->str);
@@ -1239,10 +1243,15 @@ static int nfs_parse_mount_options(char *raw,
                 */
                case Opt_soft:
                        mnt->flags |= NFS_MOUNT_SOFT;
+                       mnt->flags &= ~NFS_MOUNT_SOFTERR;
                        break;
-               case Opt_hard:
+               case Opt_softerr:
+                       mnt->flags |= NFS_MOUNT_SOFTERR;
                        mnt->flags &= ~NFS_MOUNT_SOFT;
                        break;
+               case Opt_hard:
+                       mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR);
+                       break;
                case Opt_posix:
                        mnt->flags |= NFS_MOUNT_POSIX;
                        break;
@@ -2476,6 +2485,21 @@ static int nfs_compare_super_address(struct nfs_server *server1,
        return 1;
 }
 
+static int nfs_compare_userns(const struct nfs_server *old,
+               const struct nfs_server *new)
+{
+       const struct user_namespace *oldns = &init_user_ns;
+       const struct user_namespace *newns = &init_user_ns;
+
+       if (old->client && old->client->cl_cred)
+               oldns = old->client->cl_cred->user_ns;
+       if (new->client && new->client->cl_cred)
+               newns = new->client->cl_cred->user_ns;
+       if (oldns != newns)
+               return 0;
+       return 1;
+}
+
 static int nfs_compare_super(struct super_block *sb, void *data)
 {
        struct nfs_sb_mountdata *sb_mntdata = data;
@@ -2489,6 +2513,8 @@ static int nfs_compare_super(struct super_block *sb, void *data)
                return 0;
        if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
                return 0;
+       if (!nfs_compare_userns(old, server))
+               return 0;
        return nfs_compare_mount_options(sb, server, mntflags);
 }
 
index 06eb44b..25ba299 100644 (file)
@@ -26,8 +26,9 @@
  * and straight-forward than readdir caching.
  */
 
-static int nfs_symlink_filler(struct inode *inode, struct page *page)
+static int nfs_symlink_filler(void *data, struct page *page)
 {
+       struct inode *inode = data;
        int error;
 
        error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
@@ -65,8 +66,8 @@ static const char *nfs_get_link(struct dentry *dentry,
                err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
                if (err)
                        return err;
-               page = read_cache_page(&inode->i_data, 0,
-                                       (filler_t *)nfs_symlink_filler, inode);
+               page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
+                               inode);
                if (IS_ERR(page))
                        return ERR_CAST(page);
        }
index f3ebaba..bc5bb93 100644 (file)
@@ -244,6 +244,12 @@ static void nfs_set_pageerror(struct address_space *mapping)
        nfs_zap_mapping(mapping->host, mapping);
 }
 
+static void nfs_mapping_set_error(struct page *page, int error)
+{
+       SetPageError(page);
+       mapping_set_error(page_file_mapping(page), error);
+}
+
 /*
  * nfs_page_group_search_locked
  * @head - head request of page group
@@ -582,11 +588,10 @@ release_request:
        return ERR_PTR(ret);
 }
 
-static void nfs_write_error_remove_page(struct nfs_page *req)
+static void nfs_write_error(struct nfs_page *req, int error)
 {
+       nfs_mapping_set_error(req->wb_page, error);
        nfs_end_page_writeback(req);
-       generic_error_remove_page(page_file_mapping(req->wb_page),
-                                 req->wb_page);
        nfs_release_request(req);
 }
 
@@ -609,6 +614,7 @@ nfs_error_is_fatal_on_server(int err)
 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
                                struct page *page)
 {
+       struct address_space *mapping;
        struct nfs_page *req;
        int ret = 0;
 
@@ -622,19 +628,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
        nfs_set_page_writeback(page);
        WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
 
-       ret = req->wb_context->error;
        /* If there is a fatal error that covers this write, just exit */
-       if (nfs_error_is_fatal_on_server(ret))
+       ret = 0;
+       mapping = page_file_mapping(page);
+       if (test_bit(AS_ENOSPC, &mapping->flags) ||
+           test_bit(AS_EIO, &mapping->flags))
                goto out_launder;
 
-       ret = 0;
        if (!nfs_pageio_add_request(pgio, req)) {
                ret = pgio->pg_error;
                /*
                 * Remove the problematic req upon fatal errors on the server
                 */
                if (nfs_error_is_fatal(ret)) {
-                       nfs_context_set_write_error(req->wb_context, ret);
                        if (nfs_error_is_fatal_on_server(ret))
                                goto out_launder;
                } else
@@ -646,8 +652,8 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 out:
        return ret;
 out_launder:
-       nfs_write_error_remove_page(req);
-       return ret;
+       nfs_write_error(req, ret);
+       return 0;
 }
 
 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
@@ -958,7 +964,8 @@ static void
 nfs_clear_request_commit(struct nfs_page *req)
 {
        if (test_bit(PG_CLEAN, &req->wb_flags)) {
-               struct inode *inode = d_inode(req->wb_context->dentry);
+               struct nfs_open_context *ctx = nfs_req_openctx(req);
+               struct inode *inode = d_inode(ctx->dentry);
                struct nfs_commit_info cinfo;
 
                nfs_init_cinfo_from_inode(&cinfo, inode);
@@ -999,10 +1006,12 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
                if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
                    (hdr->good_bytes < bytes)) {
                        nfs_set_pageerror(page_file_mapping(req->wb_page));
-                       nfs_context_set_write_error(req->wb_context, hdr->error);
+                       nfs_mapping_set_error(req->wb_page, hdr->error);
                        goto remove_req;
                }
                if (nfs_write_need_commit(hdr)) {
+                       /* Reset wb_nio, since the write was successful. */
+                       req->wb_nio = 0;
                        memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
                        nfs_mark_request_commit(req, hdr->lseg, &cinfo,
                                hdr->pgio_mirror_idx);
@@ -1136,6 +1145,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
                req->wb_bytes = end - req->wb_offset;
        else
                req->wb_bytes = rqend - req->wb_offset;
+       req->wb_nio = 0;
        return req;
 out_flushme:
        /*
@@ -1165,7 +1175,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
        req = nfs_try_to_update_request(inode, page, offset, bytes);
        if (req != NULL)
                goto out;
-       req = nfs_create_request(ctx, page, NULL, offset, bytes);
+       req = nfs_create_request(ctx, page, offset, bytes);
        if (IS_ERR(req))
                goto out;
        nfs_inode_add_request(inode, req);
@@ -1210,7 +1220,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
                        return 0;
                l_ctx = req->wb_lock_context;
                do_flush = req->wb_page != page ||
-                       !nfs_match_open_context(req->wb_context, ctx);
+                       !nfs_match_open_context(nfs_req_openctx(req), ctx);
                if (l_ctx && flctx &&
                    !(list_empty_careful(&flctx->flc_posix) &&
                      list_empty_careful(&flctx->flc_flock))) {
@@ -1410,8 +1420,10 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
  */
 static void nfs_redirty_request(struct nfs_page *req)
 {
+       /* Bump the transmission count */
+       req->wb_nio++;
        nfs_mark_request_dirty(req);
-       set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
+       set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
        nfs_end_page_writeback(req);
        nfs_release_request(req);
 }
@@ -1423,14 +1435,10 @@ static void nfs_async_write_error(struct list_head *head, int error)
        while (!list_empty(head)) {
                req = nfs_list_entry(head->next);
                nfs_list_remove_request(req);
-               if (nfs_error_is_fatal(error)) {
-                       nfs_context_set_write_error(req->wb_context, error);
-                       if (nfs_error_is_fatal_on_server(error)) {
-                               nfs_write_error_remove_page(req);
-                               continue;
-                       }
-               }
-               nfs_redirty_request(req);
+               if (nfs_error_is_fatal(error))
+                       nfs_write_error(req, error);
+               else
+                       nfs_redirty_request(req);
        }
 }
 
@@ -1735,7 +1743,8 @@ void nfs_init_commit(struct nfs_commit_data *data,
                     struct nfs_commit_info *cinfo)
 {
        struct nfs_page *first = nfs_list_entry(head->next);
-       struct inode *inode = d_inode(first->wb_context->dentry);
+       struct nfs_open_context *ctx = nfs_req_openctx(first);
+       struct inode *inode = d_inode(ctx->dentry);
 
        /* Set up the RPC argument and reply structs
         * NB: take care not to mess about with data->commit et al. */
@@ -1743,7 +1752,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
        list_splice_init(head, &data->pages);
 
        data->inode       = inode;
-       data->cred        = first->wb_context->cred;
+       data->cred        = ctx->cred;
        data->lseg        = lseg; /* reference transferred */
        /* only set lwb for pnfs commit */
        if (lseg)
@@ -1756,7 +1765,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
        /* Note: we always request a commit of the entire inode */
        data->args.offset = 0;
        data->args.count  = 0;
-       data->context     = get_nfs_open_context(first->wb_context);
+       data->context     = get_nfs_open_context(ctx);
        data->res.fattr   = &data->fattr;
        data->res.verf    = &data->verf;
        nfs_fattr_init(&data->fattr);
@@ -1839,14 +1848,15 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
                        nfs_clear_page_commit(req->wb_page);
 
                dprintk("NFS:       commit (%s/%llu %d@%lld)",
-                       req->wb_context->dentry->d_sb->s_id,
-                       (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
+                       nfs_req_openctx(req)->dentry->d_sb->s_id,
+                       (unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
                        req->wb_bytes,
                        (long long)req_offset(req));
                if (status < 0) {
-                       nfs_context_set_write_error(req->wb_context, status);
-                       if (req->wb_page)
+                       if (req->wb_page) {
+                               nfs_mapping_set_error(req->wb_page, status);
                                nfs_inode_remove_request(req);
+                       }
                        dprintk_cont(", error = %d\n", status);
                        goto next;
                }
@@ -1863,7 +1873,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
                /* We have a mismatch. Write the page again */
                dprintk_cont(" mismatch\n");
                nfs_mark_request_dirty(req);
-               set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
+               set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
        next:
                nfs_unlock_and_release_request(req);
                /* Latency breaker */
index 7caa380..9b93e7a 100644 (file)
@@ -868,6 +868,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
                .program        = &cb_program,
                .version        = 1,
                .flags          = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+               .cred           = current_cred(),
        };
        struct rpc_clnt *client;
        const struct cred *cred;
@@ -1033,7 +1034,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
                 * the submission code will error out, so we don't need to
                 * handle that case here.
                 */
-               if (task->tk_flags & RPC_TASK_KILLED)
+               if (RPC_SIGNALLED(task))
                        goto need_restart;
 
                return true;
@@ -1086,7 +1087,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
        dprintk("%s: freed slot, new seqid=%d\n", __func__,
                clp->cl_cb_session->se_cb_seq_nr);
 
-       if (task->tk_flags & RPC_TASK_KILLED)
+       if (RPC_SIGNALLED(task))
                goto need_restart;
 out:
        return ret;
index 053a4ef..8c0cf10 100644 (file)
@@ -46,6 +46,7 @@ struct nlmclnt_initdata {
        int                     noresvport;
        struct net              *net;
        const struct nlmclnt_operations *nlmclnt_ops;
+       const struct cred       *cred;
 };
 
 /*
index b065ef4..c9b422d 100644 (file)
@@ -70,6 +70,7 @@ struct nlm_host {
        struct nsm_handle       *h_nsmhandle;   /* NSM status handle */
        char                    *h_addrbuf;     /* address eyecatcher */
        struct net              *net;           /* host net */
+       const struct cred       *h_cred;
        char                    nodename[UNX_MAXNODENAME + 1];
        const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */
 };
@@ -229,7 +230,8 @@ struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
                                        const u32 version,
                                        const char *hostname,
                                        int noresvport,
-                                       struct net *net);
+                                       struct net *net,
+                                       const struct cred *cred);
 void             nlmclnt_release_host(struct nlm_host *);
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
                                        const char *hostname,
index 40e3037..d363d57 100644 (file)
@@ -76,7 +76,6 @@ struct nfs_open_context {
        fmode_t mode;
 
        unsigned long flags;
-#define NFS_CONTEXT_ERROR_WRITE                (0)
 #define NFS_CONTEXT_RESEND_WRITES      (1)
 #define NFS_CONTEXT_BAD                        (2)
 #define NFS_CONTEXT_UNLOCK     (3)
index c827d31..1e78032 100644 (file)
@@ -139,6 +139,16 @@ struct nfs_server {
        struct nfs_iostats __percpu *io_stats;  /* I/O statistics */
        atomic_long_t           writeback;      /* number of writeback pages */
        int                     flags;          /* various flags */
+
+/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
+#define NFS_MOUNT_LOOKUP_CACHE_NONEG   0x10000
+#define NFS_MOUNT_LOOKUP_CACHE_NONE    0x20000
+#define NFS_MOUNT_NORESVPORT           0x40000
+#define NFS_MOUNT_LEGACY_INTERFACE     0x80000
+#define NFS_MOUNT_LOCAL_FLOCK          0x100000
+#define NFS_MOUNT_LOCAL_FCNTL          0x200000
+#define NFS_MOUNT_SOFTERR              0x400000
+
        unsigned int            caps;           /* server capabilities */
        unsigned int            rsize;          /* read size */
        unsigned int            rpages;         /* read size (in pages) */
@@ -231,6 +241,9 @@ struct nfs_server {
 
        /* XDR related information */
        unsigned int            read_hdrsize;
+
+       /* User namespace info */
+       const struct cred       *cred;
 };
 
 /* Server capabilities */
index ad69430..0bbd587 100644 (file)
@@ -42,7 +42,6 @@ struct nfs_inode;
 struct nfs_page {
        struct list_head        wb_list;        /* Defines state of page: */
        struct page             *wb_page;       /* page to read in/write out */
-       struct nfs_open_context *wb_context;    /* File state context info */
        struct nfs_lock_context *wb_lock_context;       /* lock context info */
        pgoff_t                 wb_index;       /* Offset >> PAGE_SHIFT */
        unsigned int            wb_offset,      /* Offset & ~PAGE_MASK */
@@ -53,6 +52,7 @@ struct nfs_page {
        struct nfs_write_verifier       wb_verf;        /* Commit cookie */
        struct nfs_page         *wb_this_page;  /* list of reqs for this page */
        struct nfs_page         *wb_head;       /* head pointer for req list */
+       unsigned short          wb_nio;         /* Number of I/O attempts */
 };
 
 struct nfs_pageio_descriptor;
@@ -87,7 +87,6 @@ struct nfs_pgio_mirror {
 };
 
 struct nfs_pageio_descriptor {
-       unsigned char           pg_moreio : 1;
        struct inode            *pg_inode;
        const struct nfs_pageio_ops *pg_ops;
        const struct nfs_rw_ops *pg_rw_ops;
@@ -105,6 +104,8 @@ struct nfs_pageio_descriptor {
        struct nfs_pgio_mirror  pg_mirrors_static[1];
        struct nfs_pgio_mirror  *pg_mirrors_dynamic;
        u32                     pg_mirror_idx;  /* current mirror */
+       unsigned short          pg_maxretrans;
+       unsigned char           pg_moreio : 1;
 };
 
 /* arbitrarily selected limit to number of mirrors */
@@ -114,7 +115,6 @@ struct nfs_pageio_descriptor {
 
 extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
                                            struct page *page,
-                                           struct nfs_page *last,
                                            unsigned int offset,
                                            unsigned int count);
 extern void nfs_release_request(struct nfs_page *);
@@ -199,4 +199,10 @@ loff_t req_offset(struct nfs_page *req)
        return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset;
 }
 
+static inline struct nfs_open_context *
+nfs_req_openctx(struct nfs_page *req)
+{
+       return req->wb_lock_context->open_context;
+}
+
 #endif /* _LINUX_NFS_PAGE_H */
index 98bc988..6e80731 100644 (file)
@@ -50,6 +50,7 @@ struct rpc_clnt {
        struct rpc_iostats *    cl_metrics;     /* per-client statistics */
 
        unsigned int            cl_softrtry : 1,/* soft timeouts */
+                               cl_softerr  : 1,/* Timeouts return errors */
                                cl_discrtry : 1,/* disconnect before retry */
                                cl_noretranstimeo: 1,/* No retransmit timeouts */
                                cl_autobind : 1,/* use getport() */
@@ -71,6 +72,7 @@ struct rpc_clnt {
        struct dentry           *cl_debugfs;    /* debugfs directory */
 #endif
        struct rpc_xprt_iter    cl_xpi;
+       const struct cred       *cl_cred;
 };
 
 /*
@@ -125,6 +127,7 @@ struct rpc_create_args {
        unsigned long           flags;
        char                    *client_name;
        struct svc_xprt         *bc_xprt;       /* NFSv4.1 backchannel */
+       const struct cred       *cred;
 };
 
 struct rpc_add_xprt_test {
@@ -144,6 +147,7 @@ struct rpc_add_xprt_test {
 #define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7)
 #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT        (1UL << 8)
 #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT     (1UL << 9)
+#define RPC_CLNT_CREATE_SOFTERR                (1UL << 10)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
index 52d41d0..d0e4518 100644 (file)
@@ -35,7 +35,6 @@ struct rpc_wait {
        struct list_head        list;           /* wait queue links */
        struct list_head        links;          /* Links to related tasks */
        struct list_head        timer_list;     /* Timer list */
-       unsigned long           expires;
 };
 
 /*
@@ -62,6 +61,8 @@ struct rpc_task {
                struct rpc_wait         tk_wait;        /* RPC wait */
        } u;
 
+       int                     tk_rpc_status;  /* Result of last RPC operation */
+
        /*
         * RPC call state
         */
@@ -125,7 +126,6 @@ struct rpc_task_setup {
 #define RPC_CALL_MAJORSEEN     0x0020          /* major timeout seen */
 #define RPC_TASK_ROOTCREDS     0x0040          /* force root creds */
 #define RPC_TASK_DYNAMIC       0x0080          /* task was kmalloc'ed */
-#define RPC_TASK_KILLED                0x0100          /* task was killed */
 #define RPC_TASK_SOFT          0x0200          /* Use soft timeouts */
 #define RPC_TASK_SOFTCONN      0x0400          /* Fail if can't connect */
 #define RPC_TASK_SENT          0x0800          /* message was sent */
@@ -135,7 +135,6 @@ struct rpc_task_setup {
 
 #define RPC_IS_ASYNC(t)                ((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)      ((t)->tk_flags & RPC_TASK_SWAPPER)
-#define RPC_ASSASSINATED(t)    ((t)->tk_flags & RPC_TASK_KILLED)
 #define RPC_IS_SOFT(t)         ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
 #define RPC_IS_SOFTCONN(t)     ((t)->tk_flags & RPC_TASK_SOFTCONN)
 #define RPC_WAS_SENT(t)                ((t)->tk_flags & RPC_TASK_SENT)
@@ -146,6 +145,7 @@ struct rpc_task_setup {
 #define RPC_TASK_NEED_XMIT     3
 #define RPC_TASK_NEED_RECV     4
 #define RPC_TASK_MSG_PIN_WAIT  5
+#define RPC_TASK_SIGNALLED     6
 
 #define RPC_IS_RUNNING(t)      test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
 #define rpc_set_running(t)     set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
@@ -169,6 +169,8 @@ struct rpc_task_setup {
 
 #define RPC_IS_ACTIVATED(t)    test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
 
+#define RPC_SIGNALLED(t)       test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
+
 /*
  * Task priorities.
  * Note: if you change these, you must also change
@@ -183,7 +185,6 @@ struct rpc_task_setup {
 struct rpc_timer {
        struct timer_list timer;
        struct list_head list;
-       unsigned long expires;
 };
 
 /*
@@ -217,6 +218,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
 struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
 void           rpc_put_task(struct rpc_task *);
 void           rpc_put_task_async(struct rpc_task *);
+void           rpc_signal_task(struct rpc_task *);
 void           rpc_exit_task(struct rpc_task *);
 void           rpc_exit(struct rpc_task *, int);
 void           rpc_release_calldata(const struct rpc_call_ops *, void *);
@@ -225,11 +227,19 @@ void              rpc_execute(struct rpc_task *);
 void           rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
 void           rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
 void           rpc_destroy_wait_queue(struct rpc_wait_queue *);
+unsigned long  rpc_task_timeout(const struct rpc_task *task);
+void           rpc_sleep_on_timeout(struct rpc_wait_queue *queue,
+                                       struct rpc_task *task,
+                                       rpc_action action,
+                                       unsigned long timeout);
 void           rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
                                        rpc_action action);
+void           rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue,
+                                       struct rpc_task *task,
+                                       unsigned long timeout,
+                                       int priority);
 void           rpc_sleep_on_priority(struct rpc_wait_queue *,
                                        struct rpc_task *,
-                                       rpc_action action,
                                        int priority);
 void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
                struct rpc_wait_queue *queue,
index 3a39154..a6d9fce 100644 (file)
@@ -143,7 +143,7 @@ struct rpc_xprt_ops {
        void            (*buf_free)(struct rpc_task *task);
        void            (*prepare_request)(struct rpc_rqst *req);
        int             (*send_request)(struct rpc_rqst *req);
-       void            (*set_retrans_timeout)(struct rpc_task *task);
+       void            (*wait_for_reply_request)(struct rpc_task *task);
        void            (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
        void            (*release_request)(struct rpc_task *task);
        void            (*close)(struct rpc_xprt *xprt);
@@ -378,8 +378,8 @@ xprt_disable_swap(struct rpc_xprt *xprt)
 int                    xprt_register_transport(struct xprt_class *type);
 int                    xprt_unregister_transport(struct xprt_class *type);
 int                    xprt_load_transport(const char *);
-void                   xprt_set_retrans_timeout_def(struct rpc_task *task);
-void                   xprt_set_retrans_timeout_rtt(struct rpc_task *task);
+void                   xprt_wait_for_reply_request_def(struct rpc_task *task);
+void                   xprt_wait_for_reply_request_rtt(struct rpc_task *task);
 void                   xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
 void                   xprt_wait_for_buffer_space(struct rpc_xprt *xprt);
 bool                   xprt_write_space(struct rpc_xprt *xprt);
index 962975b..df9851c 100644 (file)
@@ -511,6 +511,33 @@ TRACE_EVENT(xprtrdma_marshal,
        )
 );
 
+TRACE_EVENT(xprtrdma_marshal_failed,
+       TP_PROTO(const struct rpc_rqst *rqst,
+                int ret
+       ),
+
+       TP_ARGS(rqst, ret),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+               __field(int, ret)
+       ),
+
+       TP_fast_assign(
+               __entry->task_id = rqst->rq_task->tk_pid;
+               __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+               __entry->ret = ret;
+       ),
+
+       TP_printk("task:%u@%u xid=0x%08x: ret=%d",
+               __entry->task_id, __entry->client_id, __entry->xid,
+               __entry->ret
+       )
+);
+
 TRACE_EVENT(xprtrdma_post_send,
        TP_PROTO(
                const struct rpcrdma_req *req,
index f0a6f0c..ffa3c51 100644 (file)
@@ -82,7 +82,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
 TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
 TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
 TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
-TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
 TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
 TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
 TRACE_DEFINE_ENUM(RPC_TASK_SENT);
@@ -97,7 +96,6 @@ TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
                { RPC_CALL_MAJORSEEN, "MAJORSEEN" },                    \
                { RPC_TASK_ROOTCREDS, "ROOTCREDS" },                    \
                { RPC_TASK_DYNAMIC, "DYNAMIC" },                        \
-               { RPC_TASK_KILLED, "KILLED" },                          \
                { RPC_TASK_SOFT, "SOFT" },                              \
                { RPC_TASK_SOFTCONN, "SOFTCONN" },                      \
                { RPC_TASK_SENT, "SENT" },                              \
@@ -111,6 +109,7 @@ TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
 TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
 TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
 TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
+TRACE_DEFINE_ENUM(RPC_TASK_SIGNALLED);
 
 #define rpc_show_runstate(flags)                                       \
        __print_flags(flags, "|",                                       \
@@ -119,7 +118,8 @@ TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
                { (1UL << RPC_TASK_ACTIVE), "ACTIVE" },                 \
                { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" },           \
                { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" },           \
-               { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
+               { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" },     \
+               { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" })
 
 DECLARE_EVENT_CLASS(rpc_task_running,
 
@@ -186,7 +186,7 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
                __entry->client_id = task->tk_client ?
                                     task->tk_client->cl_clid : -1;
                __entry->task_id = task->tk_pid;
-               __entry->timeout = task->tk_timeout;
+               __entry->timeout = rpc_task_timeout(task);
                __entry->runstate = task->tk_runstate;
                __entry->status = task->tk_status;
                __entry->flags = task->tk_flags;
index e44e006..e3bcfc6 100644 (file)
@@ -66,13 +66,4 @@ struct nfs_mount_data {
 #define NFS_MOUNT_UNSHARED     0x8000  /* 5 */
 #define NFS_MOUNT_FLAGMASK     0xFFFF
 
-/* The following are for internal use only */
-#define NFS_MOUNT_LOOKUP_CACHE_NONEG   0x10000
-#define NFS_MOUNT_LOOKUP_CACHE_NONE    0x20000
-#define NFS_MOUNT_NORESVPORT           0x40000
-#define NFS_MOUNT_LEGACY_INTERFACE     0x80000
-
-#define NFS_MOUNT_LOCAL_FLOCK  0x100000
-#define NFS_MOUNT_LOCAL_FCNTL  0x200000
-
 #endif
index 3fd56c0..4ce42c6 100644 (file)
@@ -269,6 +269,7 @@ err:
 struct gss_upcall_msg {
        refcount_t count;
        kuid_t  uid;
+       const char *service_name;
        struct rpc_pipe_msg msg;
        struct list_head list;
        struct gss_auth *auth;
@@ -316,6 +317,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
                gss_put_ctx(gss_msg->ctx);
        rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
        gss_put_auth(gss_msg->auth);
+       kfree_const(gss_msg->service_name);
        kfree(gss_msg);
 }
 
@@ -410,9 +412,12 @@ gss_upcall_callback(struct rpc_task *task)
        gss_release_msg(gss_msg);
 }
 
-static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
+static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg,
+                             const struct cred *cred)
 {
-       uid_t uid = from_kuid(&init_user_ns, gss_msg->uid);
+       struct user_namespace *userns = cred->user_ns;
+
+       uid_t uid = from_kuid_munged(userns, gss_msg->uid);
        memcpy(gss_msg->databuf, &uid, sizeof(uid));
        gss_msg->msg.data = gss_msg->databuf;
        gss_msg->msg.len = sizeof(uid);
@@ -420,17 +425,31 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
        BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf));
 }
 
+static ssize_t
+gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg,
+               char __user *buf, size_t buflen)
+{
+       struct gss_upcall_msg *gss_msg = container_of(msg,
+                                                     struct gss_upcall_msg,
+                                                     msg);
+       if (msg->copied == 0)
+               gss_encode_v0_msg(gss_msg, file->f_cred);
+       return rpc_pipe_generic_upcall(file, msg, buf, buflen);
+}
+
 static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
                                const char *service_name,
-                               const char *target_name)
+                               const char *target_name,
+                               const struct cred *cred)
 {
+       struct user_namespace *userns = cred->user_ns;
        struct gss_api_mech *mech = gss_msg->auth->mech;
        char *p = gss_msg->databuf;
        size_t buflen = sizeof(gss_msg->databuf);
        int len;
 
        len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
-                       from_kuid(&init_user_ns, gss_msg->uid));
+                       from_kuid_munged(userns, gss_msg->uid));
        buflen -= len;
        p += len;
        gss_msg->msg.len = len;
@@ -491,6 +510,25 @@ out_overflow:
        return -ENOMEM;
 }
 
+static ssize_t
+gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg,
+               char __user *buf, size_t buflen)
+{
+       struct gss_upcall_msg *gss_msg = container_of(msg,
+                                                     struct gss_upcall_msg,
+                                                     msg);
+       int err;
+       if (msg->copied == 0) {
+               err = gss_encode_v1_msg(gss_msg,
+                                       gss_msg->service_name,
+                                       gss_msg->auth->target_name,
+                                       file->f_cred);
+               if (err)
+                       return err;
+       }
+       return rpc_pipe_generic_upcall(file, msg, buf, buflen);
+}
+
 static struct gss_upcall_msg *
 gss_alloc_msg(struct gss_auth *gss_auth,
                kuid_t uid, const char *service_name)
@@ -513,16 +551,14 @@ gss_alloc_msg(struct gss_auth *gss_auth,
        refcount_set(&gss_msg->count, 1);
        gss_msg->uid = uid;
        gss_msg->auth = gss_auth;
-       switch (vers) {
-       case 0:
-               gss_encode_v0_msg(gss_msg);
-               break;
-       default:
-               err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
-               if (err)
+       kref_get(&gss_auth->kref);
+       if (service_name) {
+               gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS);
+               if (!gss_msg->service_name) {
+                       err = -ENOMEM;
                        goto err_put_pipe_version;
+               }
        }
-       kref_get(&gss_auth->kref);
        return gss_msg;
 err_put_pipe_version:
        put_pipe_version(gss_auth->net);
@@ -581,8 +617,8 @@ gss_refresh_upcall(struct rpc_task *task)
                /* XXX: warning on the first, under the assumption we
                 * shouldn't normally hit this case on a refresh. */
                warn_gssd();
-               task->tk_timeout = 15*HZ;
-               rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
+               rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue,
+                               task, NULL, jiffies + (15 * HZ));
                err = -EAGAIN;
                goto out;
        }
@@ -595,7 +631,6 @@ gss_refresh_upcall(struct rpc_task *task)
        if (gss_cred->gc_upcall != NULL)
                rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
        else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
-               task->tk_timeout = 0;
                gss_cred->gc_upcall = gss_msg;
                /* gss_upcall_callback will release the reference to gss_upcall_msg */
                refcount_inc(&gss_msg->count);
@@ -707,7 +742,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
                goto err;
        }
 
-       uid = make_kuid(&init_user_ns, id);
+       uid = make_kuid(current_user_ns(), id);
        if (!uid_valid(uid)) {
                err = -EINVAL;
                goto err;
@@ -2116,7 +2151,7 @@ static const struct rpc_credops gss_nullops = {
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
-       .upcall         = rpc_pipe_generic_upcall,
+       .upcall         = gss_v0_upcall,
        .downcall       = gss_pipe_downcall,
        .destroy_msg    = gss_pipe_destroy_msg,
        .open_pipe      = gss_pipe_open_v0,
@@ -2124,7 +2159,7 @@ static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
 };
 
 static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
-       .upcall         = rpc_pipe_generic_upcall,
+       .upcall         = gss_v1_upcall,
        .downcall       = gss_pipe_downcall,
        .destroy_msg    = gss_pipe_destroy_msg,
        .open_pipe      = gss_pipe_open_v1,
index d4018e5..e7df1f7 100644 (file)
@@ -107,6 +107,8 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
        __be32          *p, *cred_len, *gidarr_len;
        int             i;
        struct group_info *gi = cred->cr_cred->group_info;
+       struct user_namespace *userns = clnt->cl_cred ?
+               clnt->cl_cred->user_ns : &init_user_ns;
 
        /* Credential */
 
@@ -122,14 +124,13 @@ unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
        p = xdr_reserve_space(xdr, 3 * sizeof(*p));
        if (!p)
                goto marshal_failed;
-       *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
-       *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
+       *p++ = cpu_to_be32(from_kuid_munged(userns, cred->cr_cred->fsuid));
+       *p++ = cpu_to_be32(from_kgid_munged(userns, cred->cr_cred->fsgid));
 
        gidarr_len = p++;
        if (gi)
                for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
-                       *p++ = cpu_to_be32(from_kgid(&init_user_ns,
-                                                    gi->gid[i]));
+                       *p++ = cpu_to_be32(from_kgid_munged(userns, gi->gid[i]));
        *gidarr_len = cpu_to_be32(p - gidarr_len - 1);
        *cred_len = cpu_to_be32((p - cred_len - 1) << 2);
        p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
index 8ff11dc..c1f1afa 100644 (file)
@@ -394,6 +394,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
        if (err)
                goto out_no_clid;
 
+       clnt->cl_cred     = get_cred(args->cred);
        clnt->cl_procinfo = version->procs;
        clnt->cl_maxproc  = version->nrprocs;
        clnt->cl_prog     = args->prognumber ? : program->number;
@@ -439,6 +440,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 out_no_path:
        rpc_free_iostats(clnt->cl_metrics);
 out_no_stats:
+       put_cred(clnt->cl_cred);
        rpc_free_clid(clnt);
 out_no_clid:
        kfree(clnt);
@@ -484,8 +486,11 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
        }
 
        clnt->cl_softrtry = 1;
-       if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
+       if (args->flags & (RPC_CLNT_CREATE_HARDRTRY|RPC_CLNT_CREATE_SOFTERR)) {
                clnt->cl_softrtry = 0;
+               if (args->flags & RPC_CLNT_CREATE_SOFTERR)
+                       clnt->cl_softerr = 1;
+       }
 
        if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
                clnt->cl_autobind = 1;
@@ -623,10 +628,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
        /* Turn off autobind on clones */
        new->cl_autobind = 0;
        new->cl_softrtry = clnt->cl_softrtry;
+       new->cl_softerr = clnt->cl_softerr;
        new->cl_noretranstimeo = clnt->cl_noretranstimeo;
        new->cl_discrtry = clnt->cl_discrtry;
        new->cl_chatty = clnt->cl_chatty;
        new->cl_principal = clnt->cl_principal;
+       new->cl_cred = get_cred(clnt->cl_cred);
        return new;
 
 out_err:
@@ -648,6 +655,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
                .prognumber     = clnt->cl_prog,
                .version        = clnt->cl_vers,
                .authflavor     = clnt->cl_auth->au_flavor,
+               .cred           = clnt->cl_cred,
        };
        return __rpc_clone_client(&args, clnt);
 }
@@ -669,6 +677,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
                .prognumber     = clnt->cl_prog,
                .version        = clnt->cl_vers,
                .authflavor     = flavor,
+               .cred           = clnt->cl_cred,
        };
        return __rpc_clone_client(&args, clnt);
 }
@@ -827,14 +836,8 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
         * Spin lock all_tasks to prevent changes...
         */
        spin_lock(&clnt->cl_lock);
-       list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
-               if (!RPC_IS_ACTIVATED(rovr))
-                       continue;
-               if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
-                       rovr->tk_flags |= RPC_TASK_KILLED;
-                       rpc_exit(rovr, -EIO);
-               }
-       }
+       list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
+               rpc_signal_task(rovr);
        spin_unlock(&clnt->cl_lock);
 }
 EXPORT_SYMBOL_GPL(rpc_killall_tasks);
@@ -882,6 +885,7 @@ rpc_free_client(struct rpc_clnt *clnt)
        xprt_put(rcu_dereference_raw(clnt->cl_xprt));
        xprt_iter_destroy(&clnt->cl_xpi);
        rpciod_down();
+       put_cred(clnt->cl_cred);
        rpc_free_clid(clnt);
        kfree(clnt);
        return parent;
@@ -946,6 +950,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
                .prognumber     = program->number,
                .version        = vers,
                .authflavor     = old->cl_auth->au_flavor,
+               .cred           = old->cl_cred,
        };
        struct rpc_clnt *clnt;
        int err;
@@ -1007,6 +1012,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
                atomic_inc(&clnt->cl_count);
                if (clnt->cl_softrtry)
                        task->tk_flags |= RPC_TASK_SOFT;
+               if (clnt->cl_softerr)
+                       task->tk_flags |= RPC_TASK_TIMEOUT;
                if (clnt->cl_noretranstimeo)
                        task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
                if (atomic_read(&clnt->cl_swapper))
@@ -1470,22 +1477,14 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
 }
 EXPORT_SYMBOL_GPL(rpc_force_rebind);
 
-/*
- * Restart an (async) RPC call from the call_prepare state.
- * Usually called from within the exit handler.
- */
-int
-rpc_restart_call_prepare(struct rpc_task *task)
+static int
+__rpc_restart_call(struct rpc_task *task, void (*action)(struct rpc_task *))
 {
-       if (RPC_ASSASSINATED(task))
-               return 0;
-       task->tk_action = call_start;
        task->tk_status = 0;
-       if (task->tk_ops->rpc_call_prepare != NULL)
-               task->tk_action = rpc_prepare_task;
+       task->tk_rpc_status = 0;
+       task->tk_action = action;
        return 1;
 }
-EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
 
 /*
  * Restart an (async) RPC call. Usually called from within the
@@ -1494,14 +1493,23 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
 int
 rpc_restart_call(struct rpc_task *task)
 {
-       if (RPC_ASSASSINATED(task))
-               return 0;
-       task->tk_action = call_start;
-       task->tk_status = 0;
-       return 1;
+       return __rpc_restart_call(task, call_start);
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call);
 
+/*
+ * Restart an (async) RPC call from the call_prepare state.
+ * Usually called from within the exit handler.
+ */
+int
+rpc_restart_call_prepare(struct rpc_task *task)
+{
+       if (task->tk_ops->rpc_call_prepare != NULL)
+               return __rpc_restart_call(task, rpc_prepare_task);
+       return rpc_restart_call(task);
+}
+EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
+
 const char
 *rpc_proc_name(const struct rpc_task *task)
 {
@@ -1516,6 +1524,19 @@ const char
                return "no proc";
 }
 
+static void
+__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
+{
+       task->tk_rpc_status = rpc_status;
+       rpc_exit(task, tk_status);
+}
+
+static void
+rpc_call_rpcerror(struct rpc_task *task, int status)
+{
+       __rpc_call_rpcerror(task, status, status);
+}
+
 /*
  * 0.  Initial state
  *
@@ -1580,7 +1601,7 @@ call_reserveresult(struct rpc_task *task)
 
                printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
                                __func__, status);
-               rpc_exit(task, -EIO);
+               rpc_call_rpcerror(task, -EIO);
                return;
        }
 
@@ -1608,7 +1629,7 @@ call_reserveresult(struct rpc_task *task)
                                __func__, status);
                break;
        }
-       rpc_exit(task, status);
+       rpc_call_rpcerror(task, status);
 }
 
 /*
@@ -1676,7 +1697,7 @@ call_refreshresult(struct rpc_task *task)
        }
        dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
                                task->tk_pid, __func__, status);
-       rpc_exit(task, status);
+       rpc_call_rpcerror(task, status);
 }
 
 /*
@@ -1727,7 +1748,7 @@ call_allocate(struct rpc_task *task)
        if (status == 0)
                return;
        if (status != -ENOMEM) {
-               rpc_exit(task, status);
+               rpc_call_rpcerror(task, status);
                return;
        }
 
@@ -1793,10 +1814,17 @@ call_encode(struct rpc_task *task)
                        rpc_delay(task, HZ >> 4);
                        break;
                case -EKEYEXPIRED:
-                       task->tk_action = call_refresh;
+                       if (!task->tk_cred_retry) {
+                               rpc_exit(task, task->tk_status);
+                       } else {
+                               task->tk_action = call_refresh;
+                               task->tk_cred_retry--;
+                               dprintk("RPC: %5u %s: retry refresh creds\n",
+                                       task->tk_pid, __func__);
+                       }
                        break;
                default:
-                       rpc_exit(task, task->tk_status);
+                       rpc_call_rpcerror(task, task->tk_status);
                }
                return;
        } else {
@@ -1857,7 +1885,6 @@ call_bind(struct rpc_task *task)
        if (!xprt_prepare_transmit(task))
                return;
 
-       task->tk_timeout = xprt->bind_timeout;
        xprt->ops->rpcbind(task);
 }
 
@@ -1938,7 +1965,7 @@ call_bind_status(struct rpc_task *task)
                                task->tk_pid, -task->tk_status);
        }
 
-       rpc_exit(task, status);
+       rpc_call_rpcerror(task, status);
        return;
 
 retry_timeout:
@@ -1973,7 +2000,7 @@ call_connect(struct rpc_task *task)
        if (task->tk_status < 0)
                return;
        if (task->tk_flags & RPC_TASK_NOCONNECT) {
-               rpc_exit(task, -ENOTCONN);
+               rpc_call_rpcerror(task, -ENOTCONN);
                return;
        }
        if (!xprt_prepare_transmit(task))
@@ -2033,7 +2060,7 @@ call_connect_status(struct rpc_task *task)
                task->tk_action = call_transmit;
                return;
        }
-       rpc_exit(task, status);
+       rpc_call_rpcerror(task, status);
        return;
 out_retry:
        /* Check for timeouts before looping back to call_bind */
@@ -2118,7 +2145,7 @@ call_transmit_status(struct rpc_task *task)
                        if (!task->tk_msg.rpc_proc->p_proc)
                                trace_xprt_ping(task->tk_xprt,
                                                task->tk_status);
-                       rpc_exit(task, task->tk_status);
+                       rpc_call_rpcerror(task, task->tk_status);
                        return;
                }
                /* fall through */
@@ -2282,7 +2309,7 @@ call_status(struct rpc_task *task)
        rpc_check_timeout(task);
        return;
 out_exit:
-       rpc_exit(task, status);
+       rpc_call_rpcerror(task, status);
 }
 
 static bool
@@ -2306,29 +2333,40 @@ rpc_check_timeout(struct rpc_task *task)
        task->tk_timeouts++;
 
        if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) {
-               rpc_exit(task, -ETIMEDOUT);
+               rpc_call_rpcerror(task, -ETIMEDOUT);
                return;
        }
 
        if (RPC_IS_SOFT(task)) {
+               /*
+                * Once a "no retrans timeout" soft tasks (a.k.a NFSv4) has
+                * been sent, it should time out only if the transport
+                * connection gets terminally broken.
+                */
+               if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) &&
+                   rpc_check_connected(task->tk_rqstp))
+                       return;
+
                if (clnt->cl_chatty) {
-                       printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+                       pr_notice_ratelimited(
+                               "%s: server %s not responding, timed out\n",
                                clnt->cl_program->name,
                                task->tk_xprt->servername);
                }
                if (task->tk_flags & RPC_TASK_TIMEOUT)
-                       rpc_exit(task, -ETIMEDOUT);
+                       rpc_call_rpcerror(task, -ETIMEDOUT);
                else
-                       rpc_exit(task, -EIO);
+                       __rpc_call_rpcerror(task, -EIO, -ETIMEDOUT);
                return;
        }
 
        if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
                task->tk_flags |= RPC_CALL_MAJORSEEN;
                if (clnt->cl_chatty) {
-                       printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
-                       clnt->cl_program->name,
-                       task->tk_xprt->servername);
+                       pr_notice_ratelimited(
+                               "%s: server %s not responding, still trying\n",
+                               clnt->cl_program->name,
+                               task->tk_xprt->servername);
                }
        }
        rpc_force_rebind(clnt);
@@ -2358,7 +2396,7 @@ call_decode(struct rpc_task *task)
 
        if (task->tk_flags & RPC_CALL_MAJORSEEN) {
                if (clnt->cl_chatty) {
-                       printk(KERN_NOTICE "%s: server %s OK\n",
+                       pr_notice_ratelimited("%s: server %s OK\n",
                                clnt->cl_program->name,
                                task->tk_xprt->servername);
                }
@@ -2881,7 +2919,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
 
        printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
                task->tk_pid, task->tk_flags, task->tk_status,
-               clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
+               clnt, task->tk_rqstp, rpc_task_timeout(task), task->tk_ops,
                clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
                task->tk_action, rpc_waitq);
 }
index 19bb356..95ebd76 100644 (file)
@@ -33,7 +33,7 @@ tasks_show(struct seq_file *f, void *v)
 
        seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n",
                task->tk_pid, task->tk_flags, task->tk_status,
-               clnt->cl_clid, xid, task->tk_timeout, task->tk_ops,
+               clnt->cl_clid, xid, rpc_task_timeout(task), task->tk_ops,
                clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
                task->tk_action, rpc_waitq);
        return 0;
index 41a971a..2277b7c 100644 (file)
@@ -240,6 +240,7 @@ static int rpcb_create_local_unix(struct net *net)
                .program        = &rpcb_program,
                .version        = RPCBVERS_2,
                .authflavor     = RPC_AUTH_NULL,
+               .cred           = current_cred(),
                /*
                 * We turn off the idle timeout to prevent the kernel
                 * from automatically disconnecting the socket.
@@ -299,6 +300,7 @@ static int rpcb_create_local_net(struct net *net)
                .program        = &rpcb_program,
                .version        = RPCBVERS_2,
                .authflavor     = RPC_AUTH_UNIX,
+               .cred           = current_cred(),
                .flags          = RPC_CLNT_CREATE_NOPING,
        };
        struct rpc_clnt *clnt, *clnt4;
@@ -358,7 +360,8 @@ out:
 static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
                                    const char *hostname,
                                    struct sockaddr *srvaddr, size_t salen,
-                                   int proto, u32 version)
+                                   int proto, u32 version,
+                                   const struct cred *cred)
 {
        struct rpc_create_args args = {
                .net            = net,
@@ -370,6 +373,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
                .program        = &rpcb_program,
                .version        = version,
                .authflavor     = RPC_AUTH_UNIX,
+               .cred           = cred,
                .flags          = (RPC_CLNT_CREATE_NOPING |
                                        RPC_CLNT_CREATE_NONPRIVPORT),
        };
@@ -694,7 +698,8 @@ void rpcb_getport_async(struct rpc_task *task)
 
        /* Put self on the wait queue to ensure we get notified if
         * some other task is already attempting to bind the port */
-       rpc_sleep_on(&xprt->binding, task, NULL);
+       rpc_sleep_on_timeout(&xprt->binding, task,
+                       NULL, jiffies + xprt->bind_timeout);
 
        if (xprt_test_and_set_binding(xprt)) {
                dprintk("RPC: %5u %s: waiting for another binder\n",
@@ -744,7 +749,8 @@ void rpcb_getport_async(struct rpc_task *task)
        rpcb_clnt = rpcb_create(xprt->xprt_net,
                                clnt->cl_nodename,
                                xprt->servername, sap, salen,
-                               xprt->prot, bind_version);
+                               xprt->prot, bind_version,
+                               clnt->cl_cred);
        if (IS_ERR(rpcb_clnt)) {
                status = PTR_ERR(rpcb_clnt);
                dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
index 28956c7..1a12fb0 100644 (file)
@@ -58,6 +58,20 @@ static struct rpc_wait_queue delay_queue;
 struct workqueue_struct *rpciod_workqueue __read_mostly;
 struct workqueue_struct *xprtiod_workqueue __read_mostly;
 
+unsigned long
+rpc_task_timeout(const struct rpc_task *task)
+{
+       unsigned long timeout = READ_ONCE(task->tk_timeout);
+
+       if (timeout != 0) {
+               unsigned long now = jiffies;
+               if (time_before(now, timeout))
+                       return timeout - now;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(rpc_task_timeout);
+
 /*
  * Disable the timer for a given RPC task. Should be called with
  * queue->lock and bh_disabled in order to avoid races within
@@ -66,7 +80,7 @@ struct workqueue_struct *xprtiod_workqueue __read_mostly;
 static void
 __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-       if (task->tk_timeout == 0)
+       if (list_empty(&task->u.tk_wait.timer_list))
                return;
        dprintk("RPC: %5u disabling timer\n", task->tk_pid);
        task->tk_timeout = 0;
@@ -78,25 +92,21 @@ __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 static void
 rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
 {
-       queue->timer_list.expires = expires;
-       mod_timer(&queue->timer_list.timer, expires);
+       timer_reduce(&queue->timer_list.timer, expires);
 }
 
 /*
  * Set up a timer for the current task.
  */
 static void
-__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
+__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
+               unsigned long timeout)
 {
-       if (!task->tk_timeout)
-               return;
-
        dprintk("RPC: %5u setting alarm for %u ms\n",
-               task->tk_pid, jiffies_to_msecs(task->tk_timeout));
+               task->tk_pid, jiffies_to_msecs(timeout - jiffies));
 
-       task->u.tk_wait.expires = jiffies + task->tk_timeout;
-       if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires))
-               rpc_set_queue_timer(queue, task->u.tk_wait.expires);
+       task->tk_timeout = timeout;
+       rpc_set_queue_timer(queue, timeout);
        list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
@@ -188,6 +198,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
        if (RPC_IS_QUEUED(task))
                return;
 
+       INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
        if (RPC_IS_PRIORITY(queue))
                __rpc_add_wait_queue_priority(queue, task, queue_priority);
        else if (RPC_IS_SWAPPER(task))
@@ -238,7 +249,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
        queue->maxpriority = nr_queues - 1;
        rpc_reset_waitqueue_priority(queue);
        queue->qlen = 0;
-       timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
+       timer_setup(&queue->timer_list.timer,
+                       __rpc_queue_timer_fn,
+                       TIMER_DEFERRABLE);
        INIT_LIST_HEAD(&queue->timer_list.list);
        rpc_assign_waitqueue_name(queue, qname);
 }
@@ -362,7 +375,6 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
  */
 static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
                struct rpc_task *task,
-               rpc_action action,
                unsigned char queue_priority)
 {
        dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
@@ -372,47 +384,100 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
 
        __rpc_add_wait_queue(q, task, queue_priority);
 
-       WARN_ON_ONCE(task->tk_callback != NULL);
-       task->tk_callback = action;
-       __rpc_add_timer(q, task);
 }
 
-void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
-                               rpc_action action)
+static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
+               struct rpc_task *task, unsigned long timeout,
+               unsigned char queue_priority)
+{
+       if (time_is_after_jiffies(timeout)) {
+               __rpc_sleep_on_priority(q, task, queue_priority);
+               __rpc_add_timer(q, task, timeout);
+       } else
+               task->tk_status = -ETIMEDOUT;
+}
+
+static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action)
+{
+       if (action && !WARN_ON_ONCE(task->tk_callback != NULL))
+               task->tk_callback = action;
+}
+
+static bool rpc_sleep_check_activated(struct rpc_task *task)
 {
        /* We shouldn't ever put an inactive task to sleep */
-       WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
-       if (!RPC_IS_ACTIVATED(task)) {
+       if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) {
                task->tk_status = -EIO;
                rpc_put_task_async(task);
-               return;
+               return false;
        }
+       return true;
+}
+
+void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
+                               rpc_action action, unsigned long timeout)
+{
+       if (!rpc_sleep_check_activated(task))
+               return;
+
+       rpc_set_tk_callback(task, action);
+
+       /*
+        * Protect the queue operations.
+        */
+       spin_lock_bh(&q->lock);
+       __rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
+       spin_unlock_bh(&q->lock);
+}
+EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
 
+void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
+                               rpc_action action)
+{
+       if (!rpc_sleep_check_activated(task))
+               return;
+
+       rpc_set_tk_callback(task, action);
+
+       WARN_ON_ONCE(task->tk_timeout != 0);
        /*
         * Protect the queue operations.
         */
        spin_lock_bh(&q->lock);
-       __rpc_sleep_on_priority(q, task, action, task->tk_priority);
+       __rpc_sleep_on_priority(q, task, task->tk_priority);
        spin_unlock_bh(&q->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_sleep_on);
 
+void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
+               struct rpc_task *task, unsigned long timeout, int priority)
+{
+       if (!rpc_sleep_check_activated(task))
+               return;
+
+       priority -= RPC_PRIORITY_LOW;
+       /*
+        * Protect the queue operations.
+        */
+       spin_lock_bh(&q->lock);
+       __rpc_sleep_on_priority_timeout(q, task, timeout, priority);
+       spin_unlock_bh(&q->lock);
+}
+EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
+
 void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
-               rpc_action action, int priority)
+               int priority)
 {
-       /* We shouldn't ever put an inactive task to sleep */
-       WARN_ON_ONCE(!RPC_IS_ACTIVATED(task));
-       if (!RPC_IS_ACTIVATED(task)) {
-               task->tk_status = -EIO;
-               rpc_put_task_async(task);
+       if (!rpc_sleep_check_activated(task))
                return;
-       }
 
+       WARN_ON_ONCE(task->tk_timeout != 0);
+       priority -= RPC_PRIORITY_LOW;
        /*
         * Protect the queue operations.
         */
        spin_lock_bh(&q->lock);
-       __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
+       __rpc_sleep_on_priority(q, task, priority);
        spin_unlock_bh(&q->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
@@ -704,7 +769,7 @@ static void __rpc_queue_timer_fn(struct timer_list *t)
        spin_lock(&queue->lock);
        expires = now = jiffies;
        list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
-               timeo = task->u.tk_wait.expires;
+               timeo = task->tk_timeout;
                if (time_after_eq(now, timeo)) {
                        dprintk("RPC: %5u timeout\n", task->tk_pid);
                        task->tk_status = -ETIMEDOUT;
@@ -730,8 +795,7 @@ static void __rpc_atrun(struct rpc_task *task)
  */
 void rpc_delay(struct rpc_task *task, unsigned long delay)
 {
-       task->tk_timeout = delay;
-       rpc_sleep_on(&delay_queue, task, __rpc_atrun);
+       rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay);
 }
 EXPORT_SYMBOL_GPL(rpc_delay);
 
@@ -759,8 +823,7 @@ static void
 rpc_reset_task_statistics(struct rpc_task *task)
 {
        task->tk_timeouts = 0;
-       task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_KILLED|RPC_TASK_SENT);
-
+       task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT);
        rpc_init_task_statistics(task);
 }
 
@@ -773,7 +836,6 @@ void rpc_exit_task(struct rpc_task *task)
        if (task->tk_ops->rpc_call_done != NULL) {
                task->tk_ops->rpc_call_done(task, task->tk_calldata);
                if (task->tk_action != NULL) {
-                       WARN_ON(RPC_ASSASSINATED(task));
                        /* Always release the RPC slot and buffer memory */
                        xprt_release(task);
                        rpc_reset_task_statistics(task);
@@ -781,6 +843,19 @@ void rpc_exit_task(struct rpc_task *task)
        }
 }
 
+void rpc_signal_task(struct rpc_task *task)
+{
+       struct rpc_wait_queue *queue;
+
+       if (!RPC_IS_ACTIVATED(task))
+               return;
+       set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
+       smp_mb__after_atomic();
+       queue = READ_ONCE(task->tk_waitqueue);
+       if (queue)
+               rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
+}
+
 void rpc_exit(struct rpc_task *task, int status)
 {
        task->tk_status = status;
@@ -836,6 +911,13 @@ static void __rpc_execute(struct rpc_task *task)
                 */
                if (!RPC_IS_QUEUED(task))
                        continue;
+
+               /*
+                * Signalled tasks should exit rather than sleep.
+                */
+               if (RPC_SIGNALLED(task))
+                       rpc_exit(task, -ERESTARTSYS);
+
                /*
                 * The queue->lock protects against races with
                 * rpc_make_runnable().
@@ -861,7 +943,7 @@ static void __rpc_execute(struct rpc_task *task)
                status = out_of_line_wait_on_bit(&task->tk_runstate,
                                RPC_TASK_QUEUED, rpc_wait_bit_killable,
                                TASK_KILLABLE);
-               if (status == -ERESTARTSYS) {
+               if (status < 0) {
                        /*
                         * When a sync task receives a signal, it exits with
                         * -ERESTARTSYS. In order to catch any callbacks that
@@ -869,7 +951,7 @@ static void __rpc_execute(struct rpc_task *task)
                         * break the loop here, but go around once more.
                         */
                        dprintk("RPC: %5u got signal\n", task->tk_pid);
-                       task->tk_flags |= RPC_TASK_KILLED;
+                       set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
                        rpc_exit(task, -ERESTARTSYS);
                }
                dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
index 7e55cfc..9faea12 100644 (file)
@@ -106,7 +106,7 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb
                /* ACL likes to be lazy in allocating pages - ACLs
                 * are small by default but can get huge. */
                if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
-                       *ppage = alloc_page(GFP_ATOMIC);
+                       *ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
                        if (unlikely(*ppage == NULL)) {
                                if (copied == 0)
                                        copied = -ENOMEM;
index d7117d2..a9d40bc 100644 (file)
@@ -73,6 +73,15 @@ static void   xprt_destroy(struct rpc_xprt *xprt);
 static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
 
+static unsigned long xprt_request_timeout(const struct rpc_rqst *req)
+{
+       unsigned long timeout = jiffies + req->rq_timeout;
+
+       if (time_before(timeout, req->rq_majortimeo))
+               return timeout;
+       return req->rq_majortimeo;
+}
+
 /**
  * xprt_register_transport - register a transport implementation
  * @transport: transport to register
@@ -209,9 +218,12 @@ out_unlock:
 out_sleep:
        dprintk("RPC: %5u failed to lock transport %p\n",
                        task->tk_pid, xprt);
-       task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
        task->tk_status = -EAGAIN;
-       rpc_sleep_on(&xprt->sending, task, NULL);
+       if  (RPC_IS_SOFT(task))
+               rpc_sleep_on_timeout(&xprt->sending, task, NULL,
+                               xprt_request_timeout(req));
+       else
+               rpc_sleep_on(&xprt->sending, task, NULL);
        return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -273,9 +285,12 @@ out_unlock:
        xprt_clear_locked(xprt);
 out_sleep:
        dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
-       task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
        task->tk_status = -EAGAIN;
-       rpc_sleep_on(&xprt->sending, task, NULL);
+       if (RPC_IS_SOFT(task))
+               rpc_sleep_on_timeout(&xprt->sending, task, NULL,
+                               xprt_request_timeout(req));
+       else
+               rpc_sleep_on(&xprt->sending, task, NULL);
        return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -554,53 +569,44 @@ bool xprt_write_space(struct rpc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(xprt_write_space);
 
-/**
- * xprt_set_retrans_timeout_def - set a request's retransmit timeout
- * @task: task whose timeout is to be set
- *
- * Set a request's retransmit timeout based on the transport's
- * default timeout parameters.  Used by transports that don't adjust
- * the retransmit timeout based on round-trip time estimation.
- */
-void xprt_set_retrans_timeout_def(struct rpc_task *task)
+static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
 {
-       task->tk_timeout = task->tk_rqstp->rq_timeout;
+       s64 delta = ktime_to_ns(ktime_get() - abstime);
+       return likely(delta >= 0) ?
+               jiffies - nsecs_to_jiffies(delta) :
+               jiffies + nsecs_to_jiffies(-delta);
 }
-EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
 
-/**
- * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
- * @task: task whose timeout is to be set
- *
- * Set a request's retransmit timeout using the RTT estimator.
- */
-void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
+static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
 {
-       int timer = task->tk_msg.rpc_proc->p_timer;
-       struct rpc_clnt *clnt = task->tk_client;
-       struct rpc_rtt *rtt = clnt->cl_rtt;
-       struct rpc_rqst *req = task->tk_rqstp;
-       unsigned long max_timeout = clnt->cl_timeout->to_maxval;
+       const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
+       unsigned long majortimeo = req->rq_timeout;
 
-       task->tk_timeout = rpc_calc_rto(rtt, timer);
-       task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
-       if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
-               task->tk_timeout = max_timeout;
+       if (to->to_exponential)
+               majortimeo <<= to->to_retries;
+       else
+               majortimeo += to->to_increment * to->to_retries;
+       if (majortimeo > to->to_maxval || majortimeo == 0)
+               majortimeo = to->to_maxval;
+       return majortimeo;
 }
-EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
 
 static void xprt_reset_majortimeo(struct rpc_rqst *req)
 {
-       const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
+       req->rq_majortimeo += xprt_calc_majortimeo(req);
+}
 
-       req->rq_majortimeo = req->rq_timeout;
-       if (to->to_exponential)
-               req->rq_majortimeo <<= to->to_retries;
+static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
+{
+       unsigned long time_init;
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       if (likely(xprt && xprt_connected(xprt)))
+               time_init = jiffies;
        else
-               req->rq_majortimeo += to->to_increment * to->to_retries;
-       if (req->rq_majortimeo > to->to_maxval || req->rq_majortimeo == 0)
-               req->rq_majortimeo = to->to_maxval;
-       req->rq_majortimeo += jiffies;
+               time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
+       req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+       req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
 }
 
 /**
@@ -822,9 +828,9 @@ void xprt_connect(struct rpc_task *task)
                xprt->ops->close(xprt);
 
        if (!xprt_connected(xprt)) {
-               task->tk_timeout = task->tk_rqstp->rq_timeout;
                task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
-               rpc_sleep_on(&xprt->pending, task, NULL);
+               rpc_sleep_on_timeout(&xprt->pending, task, NULL,
+                               xprt_request_timeout(task->tk_rqstp));
 
                if (test_bit(XPRT_CLOSING, &xprt->state))
                        return;
@@ -949,7 +955,7 @@ xprt_is_pinned_rqst(struct rpc_rqst *req)
  * @req: Request to pin
  *
  * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
- * so should be holding the xprt receive lock.
+ * so should be holding xprt->queue_lock.
  */
 void xprt_pin_rqst(struct rpc_rqst *req)
 {
@@ -961,7 +967,7 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst);
  * xprt_unpin_rqst - Unpin a request on the transport receive list
  * @req: Request to pin
  *
- * Caller should be holding the xprt receive lock.
+ * Caller should be holding xprt->queue_lock.
  */
 void xprt_unpin_rqst(struct rpc_rqst *req)
 {
@@ -1017,7 +1023,6 @@ xprt_request_enqueue_receive(struct rpc_task *task)
        set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
        spin_unlock(&xprt->queue_lock);
 
-       xprt_reset_majortimeo(req);
        /* Turn off autodisconnect */
        del_singleshot_timer_sync(&xprt->timer);
 }
@@ -1102,6 +1107,49 @@ static void xprt_timer(struct rpc_task *task)
                task->tk_status = 0;
 }
 
+/**
+ * xprt_wait_for_reply_request_def - wait for reply
+ * @task: pointer to rpc_task
+ *
+ * Set a request's retransmit timeout based on the transport's
+ * default timeout parameters.  Used by transports that don't adjust
+ * the retransmit timeout based on round-trip time estimation,
+ * and put the task to sleep on the pending queue.
+ */
+void xprt_wait_for_reply_request_def(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+
+       rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
+                       xprt_request_timeout(req));
+}
+EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_def);
+
+/**
+ * xprt_wait_for_reply_request_rtt - wait for reply using RTT estimator
+ * @task: pointer to rpc_task
+ *
+ * Set a request's retransmit timeout using the RTT estimator,
+ * and put the task to sleep on the pending queue.
+ */
+void xprt_wait_for_reply_request_rtt(struct rpc_task *task)
+{
+       int timer = task->tk_msg.rpc_proc->p_timer;
+       struct rpc_clnt *clnt = task->tk_client;
+       struct rpc_rtt *rtt = clnt->cl_rtt;
+       struct rpc_rqst *req = task->tk_rqstp;
+       unsigned long max_timeout = clnt->cl_timeout->to_maxval;
+       unsigned long timeout;
+
+       timeout = rpc_calc_rto(rtt, timer);
+       timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
+       if (timeout > max_timeout || timeout == 0)
+               timeout = max_timeout;
+       rpc_sleep_on_timeout(&req->rq_xprt->pending, task, xprt_timer,
+                       jiffies + timeout);
+}
+EXPORT_SYMBOL_GPL(xprt_wait_for_reply_request_rtt);
+
 /**
  * xprt_request_wait_receive - wait for the reply to an RPC request
  * @task: RPC task about to send a request
@@ -1121,8 +1169,7 @@ void xprt_request_wait_receive(struct rpc_task *task)
         */
        spin_lock(&xprt->queue_lock);
        if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
-               xprt->ops->set_retrans_timeout(task);
-               rpc_sleep_on(&xprt->pending, task, xprt_timer);
+               xprt->ops->wait_for_reply_request(task);
                /*
                 * Send an extra queue wakeup call if the
                 * connection was dropped in case the call to
@@ -1337,6 +1384,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
                        if (status < 0)
                                goto out_dequeue;
                }
+               if (RPC_SIGNALLED(task)) {
+                       status = -ERESTARTSYS;
+                       goto out_dequeue;
+               }
        }
 
        /*
@@ -1605,7 +1656,6 @@ xprt_request_init(struct rpc_task *task)
        struct rpc_xprt *xprt = task->tk_xprt;
        struct rpc_rqst *req = task->tk_rqstp;
 
-       req->rq_timeout = task->tk_client->cl_timeout->to_initval;
        req->rq_task    = task;
        req->rq_xprt    = xprt;
        req->rq_buffer  = NULL;
@@ -1618,7 +1668,7 @@ xprt_request_init(struct rpc_task *task)
        req->rq_snd_buf.bvec = NULL;
        req->rq_rcv_buf.bvec = NULL;
        req->rq_release_snd_buf = NULL;
-       xprt_reset_majortimeo(req);
+       xprt_init_majortimeo(task, req);
        dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
                        req, ntohl(req->rq_xid));
 }
@@ -1647,7 +1697,6 @@ void xprt_reserve(struct rpc_task *task)
        if (task->tk_rqstp != NULL)
                return;
 
-       task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
        if (!xprt_throttle_congested(xprt, task))
                xprt_do_reserve(xprt, task);
@@ -1670,7 +1719,6 @@ void xprt_retry_reserve(struct rpc_task *task)
        if (task->tk_rqstp != NULL)
                return;
 
-       task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
        xprt_do_reserve(xprt, task);
 }
@@ -1827,7 +1875,9 @@ found:
                xprt->idle_timeout = 0;
        INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
        if (xprt_has_timer(xprt))
-               timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
+               timer_setup(&xprt->timer,
+                               xprt_init_autodisconnect,
+                               TIMER_DEFERRABLE);
        else
                timer_setup(&xprt->timer, NULL, 0);
 
index d79b18c..ce98659 100644 (file)
 
 #undef RPCRDMA_BACKCHANNEL_DEBUG
 
-static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
-                                unsigned int count)
-{
-       struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-       struct rpcrdma_req *req;
-       struct rpc_rqst *rqst;
-       unsigned int i;
-
-       for (i = 0; i < (count << 1); i++) {
-               struct rpcrdma_regbuf *rb;
-               size_t size;
-
-               req = rpcrdma_create_req(r_xprt);
-               if (IS_ERR(req))
-                       return PTR_ERR(req);
-               rqst = &req->rl_slot;
-
-               rqst->rq_xprt = xprt;
-               INIT_LIST_HEAD(&rqst->rq_bc_list);
-               __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
-               spin_lock(&xprt->bc_pa_lock);
-               list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
-               spin_unlock(&xprt->bc_pa_lock);
-
-               size = r_xprt->rx_data.inline_rsize;
-               rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
-               if (IS_ERR(rb))
-                       goto out_fail;
-               req->rl_sendbuf = rb;
-               xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base,
-                            min_t(size_t, size, PAGE_SIZE));
-       }
-       return 0;
-
-out_fail:
-       rpcrdma_req_destroy(req);
-       return -ENOMEM;
-}
-
 /**
  * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
  * @xprt: transport associated with these backchannel resources
@@ -68,34 +29,10 @@ out_fail:
 int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
 {
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-       int rc;
 
-       /* The backchannel reply path returns each rpc_rqst to the
-        * bc_pa_list _after_ the reply is sent. If the server is
-        * faster than the client, it can send another backward
-        * direction request before the rpc_rqst is returned to the
-        * list. The client rejects the request in this case.
-        *
-        * Twice as many rpc_rqsts are prepared to ensure there is
-        * always an rpc_rqst available as soon as a reply is sent.
-        */
-       if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
-               goto out_err;
-
-       rc = rpcrdma_bc_setup_reqs(r_xprt, reqs);
-       if (rc)
-               goto out_free;
-
-       r_xprt->rx_buf.rb_bc_srv_max_requests = reqs;
+       r_xprt->rx_buf.rb_bc_srv_max_requests = RPCRDMA_BACKWARD_WRS >> 1;
        trace_xprtrdma_cb_setup(r_xprt, reqs);
        return 0;
-
-out_free:
-       xprt_rdma_bc_destroy(xprt, reqs);
-
-out_err:
-       pr_err("RPC:       %s: setup backchannel transport failed\n", __func__);
-       return -ENOMEM;
 }
 
 /**
@@ -107,10 +44,10 @@ out_err:
 size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
 {
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-       struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
+       struct rpcrdma_ep *ep = &r_xprt->rx_ep;
        size_t maxmsg;
 
-       maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize);
+       maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv);
        maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
        return maxmsg - RPCRDMA_HDRLEN_MIN;
 }
@@ -123,7 +60,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
 
        rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
        xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
-                       req->rl_rdmabuf->rg_base, rqst);
+                       rdmab_data(req->rl_rdmabuf), rqst);
 
        p = xdr_reserve_space(&req->rl_stream, 28);
        if (unlikely(!p))
@@ -223,6 +160,43 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
        spin_unlock(&xprt->bc_pa_lock);
 }
 
+static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+       struct rpcrdma_req *req;
+       struct rpc_rqst *rqst;
+       size_t size;
+
+       spin_lock(&xprt->bc_pa_lock);
+       rqst = list_first_entry_or_null(&xprt->bc_pa_list, struct rpc_rqst,
+                                       rq_bc_pa_list);
+       if (!rqst)
+               goto create_req;
+       list_del(&rqst->rq_bc_pa_list);
+       spin_unlock(&xprt->bc_pa_lock);
+       return rqst;
+
+create_req:
+       spin_unlock(&xprt->bc_pa_lock);
+
+       /* Set a limit to prevent a remote from overrunning our resources.
+        */
+       if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
+               return NULL;
+
+       size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE);
+       req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
+       if (!req)
+               return NULL;
+
+       xprt->bc_alloc_count++;
+       rqst = &req->rl_slot;
+       rqst->rq_xprt = xprt;
+       __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+       xdr_buf_init(&rqst->rq_snd_buf, rdmab_data(req->rl_sendbuf), size);
+       return rqst;
+}
+
 /**
  * rpcrdma_bc_receive_call - Handle a backward direction call
  * @r_xprt: transport receiving the call
@@ -254,18 +228,10 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
        pr_info("RPC:       %s: %*ph\n", __func__, size, p);
 #endif
 
-       /* Grab a free bc rqst */
-       spin_lock(&xprt->bc_pa_lock);
-       if (list_empty(&xprt->bc_pa_list)) {
-               spin_unlock(&xprt->bc_pa_lock);
+       rqst = rpcrdma_bc_rqst_get(r_xprt);
+       if (!rqst)
                goto out_overflow;
-       }
-       rqst = list_first_entry(&xprt->bc_pa_list,
-                               struct rpc_rqst, rq_bc_pa_list);
-       list_del(&rqst->rq_bc_pa_list);
-       spin_unlock(&xprt->bc_pa_lock);
 
-       /* Prepare rqst */
        rqst->rq_reply_bytes_recvd = 0;
        rqst->rq_xid = *p;
 
index 52cb6c1..794ba4c 100644 (file)
 
 /**
  * frwr_is_supported - Check if device supports FRWR
- * @ia: interface adapter to check
+ * @device: interface adapter to check
  *
  * Returns true if device supports FRWR, otherwise false
  */
-bool frwr_is_supported(struct rpcrdma_ia *ia)
+bool frwr_is_supported(struct ib_device *device)
 {
-       struct ib_device_attr *attrs = &ia->ri_device->attrs;
+       struct ib_device_attr *attrs = &device->attrs;
 
        if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
                goto out_not_supported;
@@ -98,7 +98,7 @@ bool frwr_is_supported(struct rpcrdma_ia *ia)
 
 out_not_supported:
        pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
-               ia->ri_device->name);
+               device->name);
        return false;
 }
 
@@ -131,7 +131,7 @@ frwr_mr_recycle_worker(struct work_struct *work)
 
        if (mr->mr_dir != DMA_NONE) {
                trace_xprtrdma_mr_unmap(mr);
-               ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+               ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
                                mr->mr_sg, mr->mr_nents, mr->mr_dir);
                mr->mr_dir = DMA_NONE;
        }
@@ -194,12 +194,11 @@ out_list_err:
  * frwr_open - Prepare an endpoint for use with FRWR
  * @ia: interface adapter this endpoint will use
  * @ep: endpoint to prepare
- * @cdata: transport parameters
  *
  * On success, sets:
  *     ep->rep_attr.cap.max_send_wr
  *     ep->rep_attr.cap.max_recv_wr
- *     cdata->max_requests
+ *     ep->rep_max_requests
  *     ia->ri_max_segs
  *
  * And these FRWR-related fields:
@@ -208,10 +207,9 @@ out_list_err:
  *
  * On failure, a negative errno is returned.
  */
-int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
-             struct rpcrdma_create_data_internal *cdata)
+int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep)
 {
-       struct ib_device_attr *attrs = &ia->ri_device->attrs;
+       struct ib_device_attr *attrs = &ia->ri_id->device->attrs;
        int max_qp_wr, depth, delta;
 
        ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
@@ -253,24 +251,23 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
                } while (delta > 0);
        }
 
-       max_qp_wr = ia->ri_device->attrs.max_qp_wr;
+       max_qp_wr = ia->ri_id->device->attrs.max_qp_wr;
        max_qp_wr -= RPCRDMA_BACKWARD_WRS;
        max_qp_wr -= 1;
        if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
                return -ENOMEM;
-       if (cdata->max_requests > max_qp_wr)
-               cdata->max_requests = max_qp_wr;
-       ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
+       if (ep->rep_max_requests > max_qp_wr)
+               ep->rep_max_requests = max_qp_wr;
+       ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
        if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
-               cdata->max_requests = max_qp_wr / depth;
-               if (!cdata->max_requests)
+               ep->rep_max_requests = max_qp_wr / depth;
+               if (!ep->rep_max_requests)
                        return -EINVAL;
-               ep->rep_attr.cap.max_send_wr = cdata->max_requests *
-                                              depth;
+               ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
        }
        ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
        ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
-       ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+       ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests;
        ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
        ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
 
@@ -300,15 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
                     (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth);
 }
 
-static void
-__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
-{
-       if (wc->status != IB_WC_WR_FLUSH_ERR)
-               pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
-                      wr, ib_wc_status_msg(wc->status),
-                      wc->status, wc->vendor_err);
-}
-
 /**
  * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
  * @cq:        completion queue (ignored)
@@ -323,10 +311,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
                        container_of(cqe, struct rpcrdma_frwr, fr_cqe);
 
        /* WARNING: Only wr_cqe and status are reliable at this point */
-       if (wc->status != IB_WC_SUCCESS) {
+       if (wc->status != IB_WC_SUCCESS)
                frwr->fr_state = FRWR_FLUSHED_FR;
-               __frwr_sendcompletion_flush(wc, "fastreg");
-       }
        trace_xprtrdma_wc_fastreg(wc, frwr);
 }
 
@@ -344,10 +330,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
                                                 fr_cqe);
 
        /* WARNING: Only wr_cqe and status are reliable at this point */
-       if (wc->status != IB_WC_SUCCESS) {
+       if (wc->status != IB_WC_SUCCESS)
                frwr->fr_state = FRWR_FLUSHED_LI;
-               __frwr_sendcompletion_flush(wc, "localinv");
-       }
        trace_xprtrdma_wc_li(wc, frwr);
 }
 
@@ -366,12 +350,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
                                                 fr_cqe);
 
        /* WARNING: Only wr_cqe and status are reliable at this point */
-       if (wc->status != IB_WC_SUCCESS) {
+       if (wc->status != IB_WC_SUCCESS)
                frwr->fr_state = FRWR_FLUSHED_LI;
-               __frwr_sendcompletion_flush(wc, "localinv");
-       }
-       complete(&frwr->fr_linv_done);
        trace_xprtrdma_wc_li_wake(wc, frwr);
+       complete(&frwr->fr_linv_done);
 }
 
 /**
@@ -436,7 +418,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
        }
        mr->mr_dir = rpcrdma_data_dir(writing);
 
-       mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
+       mr->mr_nents =
+               ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir);
        if (!mr->mr_nents)
                goto out_dmamap_err;
 
@@ -466,7 +449,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
        return seg;
 
 out_dmamap_err:
-       frwr->fr_state = FRWR_IS_INVALID;
+       mr->mr_dir = DMA_NONE;
        trace_xprtrdma_frwr_sgerr(mr, i);
        rpcrdma_mr_put(mr);
        return ERR_PTR(-EIO);
index 6c1fb27..85115a2 100644 (file)
@@ -105,16 +105,23 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
        return size;
 }
 
+/**
+ * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
+ * @r_xprt: transport instance to initialize
+ *
+ * The max_inline fields contain the maximum size of an RPC message
+ * so the marshaling code doesn't have to repeat this calculation
+ * for every RPC.
+ */
 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
 {
-       struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       unsigned int maxsegs = ia->ri_max_segs;
-
-       ia->ri_max_inline_write = cdata->inline_wsize -
-                                 rpcrdma_max_call_header_size(maxsegs);
-       ia->ri_max_inline_read = cdata->inline_rsize -
-                                rpcrdma_max_reply_header_size(maxsegs);
+       unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs;
+       struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+
+       ep->rep_max_inline_send =
+               ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
+       ep->rep_max_inline_recv =
+               ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
 }
 
 /* The client can send a request inline as long as the RPCRDMA header
@@ -131,7 +138,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
        struct xdr_buf *xdr = &rqst->rq_snd_buf;
        unsigned int count, remaining, offset;
 
-       if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
+       if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
                return false;
 
        if (xdr->page_len) {
@@ -159,9 +166,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
 static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
                                   struct rpc_rqst *rqst)
 {
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-
-       return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
+       return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
 }
 
 /* The client is required to provide a Reply chunk if the maximum
@@ -173,10 +178,9 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
                          const struct rpc_rqst *rqst)
 {
        const struct xdr_buf *buf = &rqst->rq_rcv_buf;
-       const struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 
-       return buf->head[0].iov_len + buf->tail[0].iov_len <
-               ia->ri_max_inline_read;
+       return (buf->head[0].iov_len + buf->tail[0].iov_len) <
+               r_xprt->rx_ep.rep_max_inline_recv;
 }
 
 /* Split @vec on page boundaries into SGEs. FMR registers pages, not
@@ -238,7 +242,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
                 */
                if (unlikely(xdrbuf->flags & XDRBUF_SPARSE_PAGES)) {
                        if (!*ppages)
-                               *ppages = alloc_page(GFP_ATOMIC);
+                               *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
                        if (!*ppages)
                                return -ENOBUFS;
                }
@@ -508,50 +512,45 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 }
 
 /**
- * rpcrdma_unmap_sendctx - DMA-unmap Send buffers
+ * rpcrdma_sendctx_unmap - DMA-unmap Send buffer
  * @sc: sendctx containing SGEs to unmap
  *
  */
-void
-rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
+void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc)
 {
-       struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia;
        struct ib_sge *sge;
-       unsigned int count;
 
        /* The first two SGEs contain the transport header and
         * the inline buffer. These are always left mapped so
         * they can be cheaply re-used.
         */
-       sge = &sc->sc_sges[2];
-       for (count = sc->sc_unmap_count; count; ++sge, --count)
-               ib_dma_unmap_page(ia->ri_device,
-                                 sge->addr, sge->length, DMA_TO_DEVICE);
+       for (sge = &sc->sc_sges[2]; sc->sc_unmap_count;
+            ++sge, --sc->sc_unmap_count)
+               ib_dma_unmap_page(sc->sc_device, sge->addr, sge->length,
+                                 DMA_TO_DEVICE);
 
-       if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) {
-               smp_mb__after_atomic();
+       if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES,
+                              &sc->sc_req->rl_flags))
                wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES);
-       }
 }
 
 /* Prepare an SGE for the RPC-over-RDMA transport header.
  */
-static bool
-rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
-                       u32 len)
+static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt,
+                                   struct rpcrdma_req *req, u32 len)
 {
        struct rpcrdma_sendctx *sc = req->rl_sendctx;
        struct rpcrdma_regbuf *rb = req->rl_rdmabuf;
        struct ib_sge *sge = sc->sc_sges;
 
-       if (!rpcrdma_dma_map_regbuf(ia, rb))
+       if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
                goto out_regbuf;
        sge->addr = rdmab_addr(rb);
        sge->length = len;
        sge->lkey = rdmab_lkey(rb);
 
-       ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
-                                     sge->length, DMA_TO_DEVICE);
+       ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length,
+                                     DMA_TO_DEVICE);
        sc->sc_wr.num_sge++;
        return true;
 
@@ -563,23 +562,23 @@ out_regbuf:
 /* Prepare the Send SGEs. The head and tail iovec, and each entry
  * in the page list, gets its own SGE.
  */
-static bool
-rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
-                        struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
+static bool rpcrdma_prepare_msg_sges(struct rpcrdma_xprt *r_xprt,
+                                    struct rpcrdma_req *req,
+                                    struct xdr_buf *xdr,
+                                    enum rpcrdma_chunktype rtype)
 {
        struct rpcrdma_sendctx *sc = req->rl_sendctx;
        unsigned int sge_no, page_base, len, remaining;
        struct rpcrdma_regbuf *rb = req->rl_sendbuf;
-       struct ib_device *device = ia->ri_device;
        struct ib_sge *sge = sc->sc_sges;
-       u32 lkey = ia->ri_pd->local_dma_lkey;
        struct page *page, **ppages;
 
        /* The head iovec is straightforward, as it is already
         * DMA-mapped. Sync the content that has changed.
         */
-       if (!rpcrdma_dma_map_regbuf(ia, rb))
+       if (!rpcrdma_regbuf_dma_map(r_xprt, rb))
                goto out_regbuf;
+       sc->sc_device = rdmab_device(rb);
        sge_no = 1;
        sge[sge_no].addr = rdmab_addr(rb);
        sge[sge_no].length = xdr->head[0].iov_len;
@@ -626,13 +625,14 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
                                goto out_mapping_overflow;
 
                        len = min_t(u32, PAGE_SIZE - page_base, remaining);
-                       sge[sge_no].addr = ib_dma_map_page(device, *ppages,
-                                                          page_base, len,
-                                                          DMA_TO_DEVICE);
-                       if (ib_dma_mapping_error(device, sge[sge_no].addr))
+                       sge[sge_no].addr =
+                               ib_dma_map_page(rdmab_device(rb), *ppages,
+                                               page_base, len, DMA_TO_DEVICE);
+                       if (ib_dma_mapping_error(rdmab_device(rb),
+                                                sge[sge_no].addr))
                                goto out_mapping_err;
                        sge[sge_no].length = len;
-                       sge[sge_no].lkey = lkey;
+                       sge[sge_no].lkey = rdmab_lkey(rb);
 
                        sc->sc_unmap_count++;
                        ppages++;
@@ -653,13 +653,13 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
 
 map_tail:
                sge_no++;
-               sge[sge_no].addr = ib_dma_map_page(device, page,
-                                                  page_base, len,
-                                                  DMA_TO_DEVICE);
-               if (ib_dma_mapping_error(device, sge[sge_no].addr))
+               sge[sge_no].addr =
+                       ib_dma_map_page(rdmab_device(rb), page, page_base, len,
+                                       DMA_TO_DEVICE);
+               if (ib_dma_mapping_error(rdmab_device(rb), sge[sge_no].addr))
                        goto out_mapping_err;
                sge[sge_no].length = len;
-               sge[sge_no].lkey = lkey;
+               sge[sge_no].lkey = rdmab_lkey(rb);
                sc->sc_unmap_count++;
        }
 
@@ -674,12 +674,12 @@ out_regbuf:
        return false;
 
 out_mapping_overflow:
-       rpcrdma_unmap_sendctx(sc);
+       rpcrdma_sendctx_unmap(sc);
        pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no);
        return false;
 
 out_mapping_err:
-       rpcrdma_unmap_sendctx(sc);
+       rpcrdma_sendctx_unmap(sc);
        trace_xprtrdma_dma_maperr(sge[sge_no].addr);
        return false;
 }
@@ -699,7 +699,7 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
                          struct rpcrdma_req *req, u32 hdrlen,
                          struct xdr_buf *xdr, enum rpcrdma_chunktype rtype)
 {
-       req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf);
+       req->rl_sendctx = rpcrdma_sendctx_get_locked(r_xprt);
        if (!req->rl_sendctx)
                return -EAGAIN;
        req->rl_sendctx->sc_wr.num_sge = 0;
@@ -707,11 +707,11 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
        req->rl_sendctx->sc_req = req;
        __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags);
 
-       if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen))
+       if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen))
                return -EIO;
 
        if (rtype != rpcrdma_areadch)
-               if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype))
+               if (!rpcrdma_prepare_msg_sges(r_xprt, req, xdr, rtype))
                        return -EIO;
 
        return 0;
@@ -747,8 +747,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
        int ret;
 
        rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
-       xdr_init_encode(xdr, &req->rl_hdrbuf,
-                       req->rl_rdmabuf->rg_base, rqst);
+       xdr_init_encode(xdr, &req->rl_hdrbuf, rdmab_data(req->rl_rdmabuf),
+                       rqst);
 
        /* Fixed header fields */
        ret = -EMSGSIZE;
@@ -876,6 +876,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
        return 0;
 
 out_err:
+       trace_xprtrdma_marshal_failed(rqst, ret);
        switch (ret) {
        case -EAGAIN:
                xprt_wait_for_buffer_space(rqst->rq_xprt);
index 907464c..bed57d8 100644 (file)
@@ -261,7 +261,7 @@ static const struct rpc_xprt_ops xprt_rdma_bc_procs = {
        .buf_alloc              = xprt_rdma_bc_allocate,
        .buf_free               = xprt_rdma_bc_free,
        .send_request           = xprt_rdma_bc_send_request,
-       .set_retrans_timeout    = xprt_set_retrans_timeout_def,
+       .wait_for_reply_request = xprt_wait_for_reply_request_def,
        .close                  = xprt_rdma_bc_close,
        .destroy                = xprt_rdma_bc_put,
        .print_stats            = xprt_rdma_print_stats
index 5d26135..1f73a6a 100644 (file)
@@ -68,9 +68,9 @@
  * tunables
  */
 
-static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
 unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
-static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 unsigned int xprt_rdma_memreg_strategy         = RPCRDMA_FRWR;
 int xprt_rdma_pad_optimize;
 
@@ -288,7 +288,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
 
        cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
 
-       rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+       rpcrdma_ep_destroy(r_xprt);
        rpcrdma_buffer_destroy(&r_xprt->rx_buf);
        rpcrdma_ia_close(&r_xprt->rx_ia);
 
@@ -311,10 +311,8 @@ static const struct rpc_timeout xprt_rdma_default_timeout = {
 static struct rpc_xprt *
 xprt_setup_rdma(struct xprt_create *args)
 {
-       struct rpcrdma_create_data_internal cdata;
        struct rpc_xprt *xprt;
        struct rpcrdma_xprt *new_xprt;
-       struct rpcrdma_ep *new_ep;
        struct sockaddr *sap;
        int rc;
 
@@ -349,40 +347,12 @@ xprt_setup_rdma(struct xprt_create *args)
                xprt_set_bound(xprt);
        xprt_rdma_format_addresses(xprt, sap);
 
-       cdata.max_requests = xprt_rdma_slot_table_entries;
-
-       cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
-       cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
-
-       cdata.inline_wsize = xprt_rdma_max_inline_write;
-       if (cdata.inline_wsize > cdata.wsize)
-               cdata.inline_wsize = cdata.wsize;
-
-       cdata.inline_rsize = xprt_rdma_max_inline_read;
-       if (cdata.inline_rsize > cdata.rsize)
-               cdata.inline_rsize = cdata.rsize;
-
-       /*
-        * Create new transport instance, which includes initialized
-        *  o ia
-        *  o endpoint
-        *  o buffers
-        */
-
        new_xprt = rpcx_to_rdmax(xprt);
-
        rc = rpcrdma_ia_open(new_xprt);
        if (rc)
                goto out1;
 
-       /*
-        * initialize and create ep
-        */
-       new_xprt->rx_data = cdata;
-       new_ep = &new_xprt->rx_ep;
-
-       rc = rpcrdma_ep_create(&new_xprt->rx_ep,
-                               &new_xprt->rx_ia, &new_xprt->rx_data);
+       rc = rpcrdma_ep_create(new_xprt);
        if (rc)
                goto out2;
 
@@ -413,7 +383,7 @@ out4:
        rpcrdma_buffer_destroy(&new_xprt->rx_buf);
        rc = -ENODEV;
 out3:
-       rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+       rpcrdma_ep_destroy(new_xprt);
 out2:
        rpcrdma_ia_close(&new_xprt->rx_ia);
 out1:
@@ -585,52 +555,15 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
        rpc_wake_up_next(&xprt->backlog);
 }
 
-static bool
-rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-                   size_t size, gfp_t flags)
+static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
+                                struct rpcrdma_regbuf *rb, size_t size,
+                                gfp_t flags)
 {
-       struct rpcrdma_regbuf *rb;
-
-       if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size)
-               return true;
-
-       rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
-       if (IS_ERR(rb))
-               return false;
-
-       rpcrdma_free_regbuf(req->rl_sendbuf);
-       r_xprt->rx_stats.hardway_register_count += size;
-       req->rl_sendbuf = rb;
-       return true;
-}
-
-/* The rq_rcv_buf is used only if a Reply chunk is necessary.
- * The decision to use a Reply chunk is made later in
- * rpcrdma_marshal_req. This buffer is registered at that time.
- *
- * Otherwise, the associated RPC Reply arrives in a separate
- * Receive buffer, arbitrarily chosen by the HCA. The buffer
- * allocated here for the RPC Reply is not utilized in that
- * case. See rpcrdma_inline_fixup.
- *
- * A regbuf is used here to remember the buffer size.
- */
-static bool
-rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-                   size_t size, gfp_t flags)
-{
-       struct rpcrdma_regbuf *rb;
-
-       if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size)
-               return true;
-
-       rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags);
-       if (IS_ERR(rb))
-               return false;
-
-       rpcrdma_free_regbuf(req->rl_recvbuf);
-       r_xprt->rx_stats.hardway_register_count += size;
-       req->rl_recvbuf = rb;
+       if (unlikely(rdmab_length(rb) < size)) {
+               if (!rpcrdma_regbuf_realloc(rb, size, flags))
+                       return false;
+               r_xprt->rx_stats.hardway_register_count += size;
+       }
        return true;
 }
 
@@ -655,13 +588,15 @@ xprt_rdma_allocate(struct rpc_task *task)
        if (RPC_IS_SWAPPER(task))
                flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
 
-       if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
+       if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
+                                 flags))
                goto out_fail;
-       if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
+       if (!rpcrdma_check_regbuf(r_xprt, req->rl_recvbuf, rqst->rq_rcvsize,
+                                 flags))
                goto out_fail;
 
-       rqst->rq_buffer = req->rl_sendbuf->rg_base;
-       rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
+       rqst->rq_buffer = rdmab_data(req->rl_sendbuf);
+       rqst->rq_rbuffer = rdmab_data(req->rl_recvbuf);
        trace_xprtrdma_op_allocate(task, req);
        return 0;
 
@@ -815,7 +750,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
        .alloc_slot             = xprt_rdma_alloc_slot,
        .free_slot              = xprt_rdma_free_slot,
        .release_request        = xprt_release_rqst_cong,       /* ditto */
-       .set_retrans_timeout    = xprt_set_retrans_timeout_def, /* ditto */
+       .wait_for_reply_request = xprt_wait_for_reply_request_def, /* ditto */
        .timer                  = xprt_rdma_timer,
        .rpcbind                = rpcb_getport_async,   /* sunrpc/rpcb_clnt.c */
        .set_port               = xprt_rdma_set_port,
index 30cfc0e..bef5eac 100644 (file)
 static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
-static int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp);
-static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
+                    gfp_t flags);
+static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
+static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
 static void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
 
-/* Wait for outstanding transport work to finish.
+/* Wait for outstanding transport work to finish. ib_drain_qp
+ * handles the drains in the wrong order for us, so open code
+ * them here.
  */
 static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
 {
@@ -132,11 +137,6 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 
        /* WARNING: Only wr_cqe and status are reliable at this point */
        trace_xprtrdma_wc_send(sc, wc);
-       if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
-               pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
-                      ib_wc_status_msg(wc->status),
-                      wc->status, wc->vendor_err);
-
        rpcrdma_sendctx_put_locked(sc);
 }
 
@@ -174,10 +174,6 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
        return;
 
 out_flushed:
-       if (wc->status != IB_WC_WR_FLUSH_ERR)
-               pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
-                      ib_wc_status_msg(wc->status),
-                      wc->status, wc->vendor_err);
        rpcrdma_recv_buffer_put(rep);
 }
 
@@ -185,7 +181,6 @@ static void
 rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
                               struct rdma_conn_param *param)
 {
-       struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
        const struct rpcrdma_connect_private *pmsg = param->private_data;
        unsigned int rsize, wsize;
 
@@ -202,12 +197,13 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
                wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
        }
 
-       if (rsize < cdata->inline_rsize)
-               cdata->inline_rsize = rsize;
-       if (wsize < cdata->inline_wsize)
-               cdata->inline_wsize = wsize;
-       dprintk("RPC:       %s: max send %u, max recv %u\n",
-               __func__, cdata->inline_wsize, cdata->inline_rsize);
+       if (rsize < r_xprt->rx_ep.rep_inline_recv)
+               r_xprt->rx_ep.rep_inline_recv = rsize;
+       if (wsize < r_xprt->rx_ep.rep_inline_send)
+               r_xprt->rx_ep.rep_inline_send = wsize;
+       dprintk("RPC:       %s: max send %u, max recv %u\n", __func__,
+               r_xprt->rx_ep.rep_inline_send,
+               r_xprt->rx_ep.rep_inline_recv);
        rpcrdma_set_max_header_sizes(r_xprt);
 }
 
@@ -247,7 +243,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
                pr_info("rpcrdma: removing device %s for %s:%s\n",
-                       ia->ri_device->name,
+                       ia->ri_id->device->name,
                        rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
 #endif
                set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
@@ -256,7 +252,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
                wait_for_completion(&ia->ri_remove_done);
 
                ia->ri_id = NULL;
-               ia->ri_device = NULL;
                /* Return 1 to ensure the core destroys the id. */
                return 1;
        case RDMA_CM_EVENT_ESTABLISHED:
@@ -291,7 +286,7 @@ disconnected:
 
        dprintk("RPC:       %s: %s:%s on %s/frwr: %s\n", __func__,
                rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
-               ia->ri_device->name, rdma_event_msg(event->event));
+               ia->ri_id->device->name, rdma_event_msg(event->event));
        return 0;
 }
 
@@ -370,9 +365,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
                rc = PTR_ERR(ia->ri_id);
                goto out_err;
        }
-       ia->ri_device = ia->ri_id->device;
 
-       ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
+       ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0);
        if (IS_ERR(ia->ri_pd)) {
                rc = PTR_ERR(ia->ri_pd);
                pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
@@ -381,12 +375,12 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
 
        switch (xprt_rdma_memreg_strategy) {
        case RPCRDMA_FRWR:
-               if (frwr_is_supported(ia))
+               if (frwr_is_supported(ia->ri_id->device))
                        break;
                /*FALLTHROUGH*/
        default:
                pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
-                      ia->ri_device->name, xprt_rdma_memreg_strategy);
+                      ia->ri_id->device->name, xprt_rdma_memreg_strategy);
                rc = -EINVAL;
                goto out_err;
        }
@@ -438,11 +432,11 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
         * mappings and MRs are gone.
         */
        list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
-               rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
+               rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
        list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
-               rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
-               rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
-               rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
+               rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf);
+               rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
+               rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
        }
        rpcrdma_mrs_destroy(buf);
        ib_dealloc_pd(ia->ri_pd);
@@ -468,7 +462,6 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
                rdma_destroy_id(ia->ri_id);
        }
        ia->ri_id = NULL;
-       ia->ri_device = NULL;
 
        /* If the pd is still busy, xprtrdma missed freeing a resource */
        if (ia->ri_pd && !IS_ERR(ia->ri_pd))
@@ -476,19 +469,26 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
        ia->ri_pd = NULL;
 }
 
-/*
- * Create unconnected endpoint.
+/**
+ * rpcrdma_ep_create - Create unconnected endpoint
+ * @r_xprt: transport to instantiate
+ *
+ * Returns zero on success, or a negative errno.
  */
-int
-rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
-                 struct rpcrdma_create_data_internal *cdata)
+int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
 {
+       struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
        struct ib_cq *sendcq, *recvcq;
        unsigned int max_sge;
        int rc;
 
-       max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge,
+       ep->rep_max_requests = xprt_rdma_slot_table_entries;
+       ep->rep_inline_send = xprt_rdma_max_inline_write;
+       ep->rep_inline_recv = xprt_rdma_max_inline_read;
+
+       max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge,
                        RPCRDMA_MAX_SEND_SGES);
        if (max_sge < RPCRDMA_MIN_SEND_SGES) {
                pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
@@ -496,7 +496,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        }
        ia->ri_max_send_sges = max_sge;
 
-       rc = frwr_open(ia, ep, cdata);
+       rc = frwr_open(ia, ep);
        if (rc)
                return rc;
 
@@ -518,23 +518,21 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                ep->rep_attr.cap.max_send_sge,
                ep->rep_attr.cap.max_recv_sge);
 
-       /* set trigger for requesting send completion */
-       ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH,
-                                  cdata->max_requests >> 2);
+       ep->rep_send_batch = ep->rep_max_requests >> 3;
        ep->rep_send_count = ep->rep_send_batch;
        init_waitqueue_head(&ep->rep_connect_wait);
        ep->rep_receive_count = 0;
 
-       sendcq = ib_alloc_cq(ia->ri_device, NULL,
+       sendcq = ib_alloc_cq(ia->ri_id->device, NULL,
                             ep->rep_attr.cap.max_send_wr + 1,
-                            ia->ri_device->num_comp_vectors > 1 ? 1 : 0,
+                            ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0,
                             IB_POLL_WORKQUEUE);
        if (IS_ERR(sendcq)) {
                rc = PTR_ERR(sendcq);
                goto out1;
        }
 
-       recvcq = ib_alloc_cq(ia->ri_device, NULL,
+       recvcq = ib_alloc_cq(ia->ri_id->device, NULL,
                             ep->rep_attr.cap.max_recv_wr + 1,
                             0, IB_POLL_WORKQUEUE);
        if (IS_ERR(recvcq)) {
@@ -552,15 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        pmsg->cp_magic = rpcrdma_cmp_magic;
        pmsg->cp_version = RPCRDMA_CMP_VERSION;
        pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK;
-       pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
-       pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
+       pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->rep_inline_send);
+       pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->rep_inline_recv);
        ep->rep_remote_cma.private_data = pmsg;
        ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
 
        /* Client offers RDMA Read but does not initiate */
        ep->rep_remote_cma.initiator_depth = 0;
        ep->rep_remote_cma.responder_resources =
-               min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
+               min_t(int, U8_MAX, ia->ri_id->device->attrs.max_qp_rd_atom);
 
        /* Limit transport retries so client can detect server
         * GID changes quickly. RPC layer handles re-establishing
@@ -583,16 +581,16 @@ out1:
        return rc;
 }
 
-/*
- * rpcrdma_ep_destroy
+/**
+ * rpcrdma_ep_destroy - Disconnect and destroy endpoint.
+ * @r_xprt: transport instance to shut down
  *
- * Disconnect and destroy endpoint. After this, the only
- * valid operations on the ep are to free it (if dynamically
- * allocated) or re-create it.
  */
-void
-rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt)
 {
+       struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
        if (ia->ri_id && ia->ri_id->qp) {
                rpcrdma_ep_disconnect(ep, ia);
                rdma_destroy_qp(ia->ri_id);
@@ -622,7 +620,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
                goto out1;
 
        rc = -ENOMEM;
-       err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
+       err = rpcrdma_ep_create(r_xprt);
        if (err) {
                pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
                goto out2;
@@ -639,7 +637,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
        return 0;
 
 out3:
-       rpcrdma_ep_destroy(ep, ia);
+       rpcrdma_ep_destroy(r_xprt);
 out2:
        rpcrdma_ia_close(ia);
 out1:
@@ -672,7 +670,7 @@ rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
         */
        old = id;
        rc = -ENETUNREACH;
-       if (ia->ri_device != id->device) {
+       if (ia->ri_id->device != id->device) {
                pr_err("rpcrdma: can't reconnect on different device!\n");
                goto out_destroy;
        }
@@ -796,8 +794,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  */
 
 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
- * queue activity, and ib_drain_qp has flushed all remaining Send
- * requests.
+ * queue activity, and rpcrdma_xprt_drain has flushed all remaining
+ * Send requests.
  */
 static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf)
 {
@@ -867,20 +865,20 @@ static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf,
 
 /**
  * rpcrdma_sendctx_get_locked - Acquire a send context
- * @buf: transport buffers from which to acquire an unused context
+ * @r_xprt: controlling transport instance
  *
  * Returns pointer to a free send completion context; or NULL if
  * the queue is empty.
  *
  * Usage: Called to acquire an SGE array before preparing a Send WR.
  *
- * The caller serializes calls to this function (per rpcrdma_buffer),
- * and provides an effective memory barrier that flushes the new value
+ * The caller serializes calls to this function (per transport), and
+ * provides an effective memory barrier that flushes the new value
  * of rb_sc_head.
  */
-struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf)
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt)
 {
-       struct rpcrdma_xprt *r_xprt;
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
        struct rpcrdma_sendctx *sc;
        unsigned long next_head;
 
@@ -905,7 +903,6 @@ out_emptyq:
         * backing up. Cause the caller to pause and try again.
         */
        set_bit(RPCRDMA_BUF_F_EMPTY_SCQ, &buf->rb_flags);
-       r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf);
        r_xprt->rx_stats.empty_sendctx_q++;
        return NULL;
 }
@@ -917,7 +914,7 @@ out_emptyq:
  * Usage: Called from Send completion to return a sendctxt
  * to the queue.
  *
- * The caller serializes calls to this function (per rpcrdma_buffer).
+ * The caller serializes calls to this function (per transport).
  */
 static void
 rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
@@ -925,7 +922,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
        struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf;
        unsigned long next_tail;
 
-       /* Unmap SGEs of previously completed by unsignaled
+       /* Unmap SGEs of previously completed but unsignaled
         * Sends by walking up the queue until @sc is found.
         */
        next_tail = buf->rb_sc_tail;
@@ -933,7 +930,7 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
                next_tail = rpcrdma_sendctx_next(buf, next_tail);
 
                /* ORDER: item must be accessed _before_ tail is updated */
-               rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]);
+               rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]);
 
        } while (buf->rb_sc_ctxs[next_tail] != sc);
 
@@ -996,54 +993,70 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
        rpcrdma_mrs_create(r_xprt);
 }
 
-struct rpcrdma_req *
-rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_req_create - Allocate an rpcrdma_req object
+ * @r_xprt: controlling r_xprt
+ * @size: initial size, in bytes, of send and receive buffers
+ * @flags: GFP flags passed to memory allocators
+ *
+ * Returns an allocated and fully initialized rpcrdma_req or NULL.
+ */
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
+                                      gfp_t flags)
 {
        struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
        struct rpcrdma_regbuf *rb;
        struct rpcrdma_req *req;
 
-       req = kzalloc(sizeof(*req), GFP_KERNEL);
+       req = kzalloc(sizeof(*req), flags);
        if (req == NULL)
-               return ERR_PTR(-ENOMEM);
+               goto out1;
 
-       rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
-                                 DMA_TO_DEVICE, GFP_KERNEL);
-       if (IS_ERR(rb)) {
-               kfree(req);
-               return ERR_PTR(-ENOMEM);
-       }
+       rb = rpcrdma_regbuf_alloc(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, flags);
+       if (!rb)
+               goto out2;
        req->rl_rdmabuf = rb;
-       xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
+       xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb));
+
+       req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags);
+       if (!req->rl_sendbuf)
+               goto out3;
+
+       req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags);
+       if (!req->rl_recvbuf)
+               goto out4;
+
        req->rl_buffer = buffer;
        INIT_LIST_HEAD(&req->rl_registered);
-
        spin_lock(&buffer->rb_lock);
        list_add(&req->rl_all, &buffer->rb_allreqs);
        spin_unlock(&buffer->rb_lock);
        return req;
+
+out4:
+       kfree(req->rl_sendbuf);
+out3:
+       kfree(req->rl_rdmabuf);
+out2:
+       kfree(req);
+out1:
+       return NULL;
 }
 
-static int
-rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
+static bool rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, bool temp)
 {
-       struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
        struct rpcrdma_rep *rep;
-       int rc;
 
-       rc = -ENOMEM;
        rep = kzalloc(sizeof(*rep), GFP_KERNEL);
        if (rep == NULL)
                goto out;
 
-       rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
+       rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.rep_inline_recv,
                                               DMA_FROM_DEVICE, GFP_KERNEL);
-       if (IS_ERR(rep->rr_rdmabuf)) {
-               rc = PTR_ERR(rep->rr_rdmabuf);
+       if (!rep->rr_rdmabuf)
                goto out_free;
-       }
-       xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base,
+       xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf),
                     rdmab_length(rep->rr_rdmabuf));
 
        rep->rr_cqe.done = rpcrdma_wc_receive;
@@ -1058,22 +1071,27 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt, bool temp)
        spin_lock(&buf->rb_lock);
        list_add(&rep->rr_list, &buf->rb_recv_bufs);
        spin_unlock(&buf->rb_lock);
-       return 0;
+       return true;
 
 out_free:
        kfree(rep);
 out:
-       return rc;
+       return false;
 }
 
-int
-rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
+/**
+ * rpcrdma_buffer_create - Create initial set of req/rep objects
+ * @r_xprt: transport instance to (re)initialize
+ *
+ * Returns zero on success, otherwise a negative errno.
+ */
+int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 {
        struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
        int i, rc;
 
        buf->rb_flags = 0;
-       buf->rb_max_requests = r_xprt->rx_data.max_requests;
+       buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests;
        buf->rb_bc_srv_max_requests = 0;
        spin_lock_init(&buf->rb_mrlock);
        spin_lock_init(&buf->rb_lock);
@@ -1086,16 +1104,15 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 
        INIT_LIST_HEAD(&buf->rb_send_bufs);
        INIT_LIST_HEAD(&buf->rb_allreqs);
+
+       rc = -ENOMEM;
        for (i = 0; i < buf->rb_max_requests; i++) {
                struct rpcrdma_req *req;
 
-               req = rpcrdma_create_req(r_xprt);
-               if (IS_ERR(req)) {
-                       dprintk("RPC:       %s: request buffer %d alloc"
-                               " failed\n", __func__, i);
-                       rc = PTR_ERR(req);
+               req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE,
+                                        GFP_KERNEL);
+               if (!req)
                        goto out;
-               }
                list_add(&req->rl_list, &buf->rb_send_bufs);
        }
 
@@ -1121,10 +1138,9 @@ out:
        return rc;
 }
 
-static void
-rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
 {
-       rpcrdma_free_regbuf(rep->rr_rdmabuf);
+       rpcrdma_regbuf_free(rep->rr_rdmabuf);
        kfree(rep);
 }
 
@@ -1140,9 +1156,9 @@ rpcrdma_req_destroy(struct rpcrdma_req *req)
 {
        list_del(&req->rl_all);
 
-       rpcrdma_free_regbuf(req->rl_recvbuf);
-       rpcrdma_free_regbuf(req->rl_sendbuf);
-       rpcrdma_free_regbuf(req->rl_rdmabuf);
+       rpcrdma_regbuf_free(req->rl_recvbuf);
+       rpcrdma_regbuf_free(req->rl_sendbuf);
+       rpcrdma_regbuf_free(req->rl_rdmabuf);
        kfree(req);
 }
 
@@ -1180,7 +1196,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
  * rpcrdma_buffer_destroy - Release all hw resources
  * @buf: root control block for resources
  *
- * ORDERING: relies on a prior ib_drain_qp :
+ * ORDERING: relies on a prior rpcrdma_xprt_drain :
  * - No more Send or Receive completions can occur
  * - All MRs, reps, and reqs are returned to their free lists
  */
@@ -1202,7 +1218,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
                rep = list_first_entry(&buf->rb_recv_bufs,
                                       struct rpcrdma_rep, rr_list);
                list_del(&rep->rr_list);
-               rpcrdma_destroy_rep(rep);
+               rpcrdma_rep_destroy(rep);
        }
 
        while (!list_empty(&buf->rb_send_bufs)) {
@@ -1281,7 +1297,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
 
        if (mr->mr_dir != DMA_NONE) {
                trace_xprtrdma_mr_unmap(mr);
-               ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+               ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
                                mr->mr_sg, mr->mr_nents, mr->mr_dir);
                mr->mr_dir = DMA_NONE;
        }
@@ -1331,7 +1347,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
        }
        spin_unlock(&buffers->rb_lock);
        if (rep)
-               rpcrdma_destroy_rep(rep);
+               rpcrdma_rep_destroy(rep);
 }
 
 /*
@@ -1348,69 +1364,90 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
                list_add(&rep->rr_list, &buffers->rb_recv_bufs);
                spin_unlock(&buffers->rb_lock);
        } else {
-               rpcrdma_destroy_rep(rep);
+               rpcrdma_rep_destroy(rep);
        }
 }
 
-/**
- * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
- * @size: size of buffer to be allocated, in bytes
- * @direction: direction of data movement
- * @flags: GFP flags
- *
- * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
- * can be persistently DMA-mapped for I/O.
+/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
  *
  * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
  * receiving the payload of RDMA RECV operations. During Long Calls
  * or Replies they may be registered externally via frwr_map.
  */
-struct rpcrdma_regbuf *
-rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
                     gfp_t flags)
 {
        struct rpcrdma_regbuf *rb;
 
-       rb = kmalloc(sizeof(*rb) + size, flags);
-       if (rb == NULL)
-               return ERR_PTR(-ENOMEM);
+       rb = kmalloc(sizeof(*rb), flags);
+       if (!rb)
+               return NULL;
+       rb->rg_data = kmalloc(size, flags);
+       if (!rb->rg_data) {
+               kfree(rb);
+               return NULL;
+       }
 
        rb->rg_device = NULL;
        rb->rg_direction = direction;
        rb->rg_iov.length = size;
-
        return rb;
 }
 
 /**
- * __rpcrdma_map_regbuf - DMA-map a regbuf
- * @ia: controlling rpcrdma_ia
+ * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
+ * @rb: regbuf to reallocate
+ * @size: size of buffer to be allocated, in bytes
+ * @flags: GFP flags
+ *
+ * Returns true if reallocation was successful. If false is
+ * returned, @rb is left untouched.
+ */
+bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags)
+{
+       void *buf;
+
+       buf = kmalloc(size, flags);
+       if (!buf)
+               return false;
+
+       rpcrdma_regbuf_dma_unmap(rb);
+       kfree(rb->rg_data);
+
+       rb->rg_data = buf;
+       rb->rg_iov.length = size;
+       return true;
+}
+
+/**
+ * __rpcrdma_regbuf_dma_map - DMA-map a regbuf
+ * @r_xprt: controlling transport instance
  * @rb: regbuf to be mapped
+ *
+ * Returns true if the buffer is now DMA mapped to @r_xprt's device
  */
-bool
-__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+                             struct rpcrdma_regbuf *rb)
 {
-       struct ib_device *device = ia->ri_device;
+       struct ib_device *device = r_xprt->rx_ia.ri_id->device;
 
        if (rb->rg_direction == DMA_NONE)
                return false;
 
-       rb->rg_iov.addr = ib_dma_map_single(device,
-                                           (void *)rb->rg_base,
-                                           rdmab_length(rb),
-                                           rb->rg_direction);
+       rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb),
+                                           rdmab_length(rb), rb->rg_direction);
        if (ib_dma_mapping_error(device, rdmab_addr(rb))) {
                trace_xprtrdma_dma_maperr(rdmab_addr(rb));
                return false;
        }
 
        rb->rg_device = device;
-       rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
+       rb->rg_iov.lkey = r_xprt->rx_ia.ri_pd->local_dma_lkey;
        return true;
 }
 
-static void
-rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
+static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb)
 {
        if (!rb)
                return;
@@ -1418,19 +1455,16 @@ rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
        if (!rpcrdma_regbuf_is_mapped(rb))
                return;
 
-       ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
-                           rdmab_length(rb), rb->rg_direction);
+       ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb),
+                           rb->rg_direction);
        rb->rg_device = NULL;
 }
 
-/**
- * rpcrdma_free_regbuf - deregister and free registered buffer
- * @rb: regbuf to be deregistered and freed
- */
-void
-rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
+static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
 {
-       rpcrdma_dma_unmap_regbuf(rb);
+       rpcrdma_regbuf_dma_unmap(rb);
+       if (rb)
+               kfree(rb->rg_data);
        kfree(rb);
 }
 
@@ -1497,17 +1531,15 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
                        list_del(&rep->rr_list);
                spin_unlock(&buf->rb_lock);
                if (!rep) {
-                       if (rpcrdma_create_rep(r_xprt, temp))
+                       if (!rpcrdma_rep_create(r_xprt, temp))
                                break;
                        continue;
                }
 
                rb = rep->rr_rdmabuf;
-               if (!rpcrdma_regbuf_is_mapped(rb)) {
-                       if (!__rpcrdma_dma_map_regbuf(&r_xprt->rx_ia, rb)) {
-                               rpcrdma_recv_buffer_put(rep);
-                               break;
-                       }
+               if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) {
+                       rpcrdma_recv_buffer_put(rep);
+                       break;
                }
 
                trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
index 10f6593..d1e0749 100644 (file)
  * Interface Adapter -- one per transport instance
  */
 struct rpcrdma_ia {
-       struct ib_device        *ri_device;
        struct rdma_cm_id       *ri_id;
        struct ib_pd            *ri_pd;
-       struct completion       ri_done;
-       struct completion       ri_remove_done;
        int                     ri_async_rc;
        unsigned int            ri_max_segs;
        unsigned int            ri_max_frwr_depth;
-       unsigned int            ri_max_inline_write;
-       unsigned int            ri_max_inline_read;
        unsigned int            ri_max_send_sges;
        bool                    ri_implicit_roundup;
        enum ib_mr_type         ri_mrtype;
        unsigned long           ri_flags;
+       struct completion       ri_done;
+       struct completion       ri_remove_done;
 };
 
 enum {
@@ -93,22 +90,29 @@ enum {
 struct rpcrdma_ep {
        unsigned int            rep_send_count;
        unsigned int            rep_send_batch;
+       unsigned int            rep_max_inline_send;
+       unsigned int            rep_max_inline_recv;
        int                     rep_connected;
        struct ib_qp_init_attr  rep_attr;
        wait_queue_head_t       rep_connect_wait;
        struct rpcrdma_connect_private  rep_cm_private;
        struct rdma_conn_param  rep_remote_cma;
+       unsigned int            rep_max_requests;       /* set by /proc */
+       unsigned int            rep_inline_send;        /* negotiated */
+       unsigned int            rep_inline_recv;        /* negotiated */
        int                     rep_receive_count;
 };
 
 /* Pre-allocate extra Work Requests for handling backward receives
  * and sends. This is a fixed value because the Work Queues are
- * allocated when the forward channel is set up.
+ * allocated when the forward channel is set up, long before the
+ * backchannel is provisioned. This value is two times
+ * NFS4_DEF_CB_SLOT_TABLE_SIZE.
  */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
-#define RPCRDMA_BACKWARD_WRS           (8)
+#define RPCRDMA_BACKWARD_WRS (32)
 #else
-#define RPCRDMA_BACKWARD_WRS           (0)
+#define RPCRDMA_BACKWARD_WRS (0)
 #endif
 
 /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
@@ -121,33 +125,34 @@ struct rpcrdma_regbuf {
        struct ib_sge           rg_iov;
        struct ib_device        *rg_device;
        enum dma_data_direction rg_direction;
-       __be32                  rg_base[0] __attribute__ ((aligned(256)));
+       void                    *rg_data;
 };
 
-static inline u64
-rdmab_addr(struct rpcrdma_regbuf *rb)
+static inline u64 rdmab_addr(struct rpcrdma_regbuf *rb)
 {
        return rb->rg_iov.addr;
 }
 
-static inline u32
-rdmab_length(struct rpcrdma_regbuf *rb)
+static inline u32 rdmab_length(struct rpcrdma_regbuf *rb)
 {
        return rb->rg_iov.length;
 }
 
-static inline u32
-rdmab_lkey(struct rpcrdma_regbuf *rb)
+static inline u32 rdmab_lkey(struct rpcrdma_regbuf *rb)
 {
        return rb->rg_iov.lkey;
 }
 
-static inline struct ib_device *
-rdmab_device(struct rpcrdma_regbuf *rb)
+static inline struct ib_device *rdmab_device(struct rpcrdma_regbuf *rb)
 {
        return rb->rg_device;
 }
 
+static inline void *rdmab_data(const struct rpcrdma_regbuf *rb)
+{
+       return rb->rg_data;
+}
+
 #define RPCRDMA_DEF_GFP                (GFP_NOIO | __GFP_NOWARN)
 
 /* To ensure a transport can always make forward progress,
@@ -222,34 +227,18 @@ struct rpcrdma_xprt;
 struct rpcrdma_sendctx {
        struct ib_send_wr       sc_wr;
        struct ib_cqe           sc_cqe;
+       struct ib_device        *sc_device;
        struct rpcrdma_xprt     *sc_xprt;
        struct rpcrdma_req      *sc_req;
        unsigned int            sc_unmap_count;
        struct ib_sge           sc_sges[];
 };
 
-/* Limit the number of SGEs that can be unmapped during one
- * Send completion. This caps the amount of work a single
- * completion can do before returning to the provider.
- *
- * Setting this to zero disables Send completion batching.
- */
-enum {
-       RPCRDMA_MAX_SEND_BATCH = 7,
-};
-
 /*
  * struct rpcrdma_mr - external memory region metadata
  *
  * An external memory region is any buffer or page that is registered
  * on the fly (ie, not pre-registered).
- *
- * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
- * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
- * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
- * track of registration metadata while each RPC is pending.
- * rpcrdma_deregister_external() uses this metadata to unmap and
- * release these resources when an RPC is complete.
  */
 enum rpcrdma_frwr_state {
        FRWR_IS_INVALID,        /* ready to be used */
@@ -418,20 +407,6 @@ enum {
        RPCRDMA_BUF_F_EMPTY_SCQ = 0,
 };
 
-/*
- * Internal structure for transport instance creation. This
- * exists primarily for modularity.
- *
- * This data should be set with mount options
- */
-struct rpcrdma_create_data_internal {
-       unsigned int    max_requests;   /* max requests (slots) in flight */
-       unsigned int    rsize;          /* mount rsize - max read hdr+data */
-       unsigned int    wsize;          /* mount wsize - max write hdr+data */
-       unsigned int    inline_rsize;   /* max non-rdma read data payload */
-       unsigned int    inline_wsize;   /* max non-rdma write data payload */
-};
-
 /*
  * Statistics for RPCRDMA
  */
@@ -476,13 +451,11 @@ struct rpcrdma_xprt {
        struct rpcrdma_ia       rx_ia;
        struct rpcrdma_ep       rx_ep;
        struct rpcrdma_buffer   rx_buf;
-       struct rpcrdma_create_data_internal rx_data;
        struct delayed_work     rx_connect_worker;
        struct rpcrdma_stats    rx_stats;
 };
 
 #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
-#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
 
 static inline const char *
 rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
@@ -516,9 +489,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
 /*
  * Endpoint calls - xprtrdma/verbs.c
  */
-int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
-                               struct rpcrdma_create_data_internal *);
-void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt);
+void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt);
 int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
 void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
 
@@ -528,11 +500,12 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
 /*
  * Buffer calls - xprtrdma/verbs.c
  */
-struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
+struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
+                                      gfp_t flags);
 void rpcrdma_req_destroy(struct rpcrdma_req *req);
 int rpcrdma_buffer_create(struct rpcrdma_xprt *);
 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
-struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
+struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt);
 
 struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
 void rpcrdma_mr_put(struct rpcrdma_mr *mr);
@@ -548,23 +521,34 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
 void rpcrdma_buffer_put(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
 
-struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
-                                           gfp_t);
-bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
-void rpcrdma_free_regbuf(struct rpcrdma_regbuf *);
+bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
+                           gfp_t flags);
+bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+                             struct rpcrdma_regbuf *rb);
 
-static inline bool
-rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
+/**
+ * rpcrdma_regbuf_is_mapped - check if buffer is DMA mapped
+ *
+ * Returns true if the buffer is now mapped to rb->rg_device.
+ */
+static inline bool rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
 {
        return rb->rg_device != NULL;
 }
 
-static inline bool
-rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
+/**
+ * rpcrdma_regbuf_dma_map - DMA-map a regbuf
+ * @r_xprt: controlling transport instance
+ * @rb: regbuf to be mapped
+ *
+ * Returns true if the buffer is currently DMA mapped.
+ */
+static inline bool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt,
+                                         struct rpcrdma_regbuf *rb)
 {
        if (likely(rpcrdma_regbuf_is_mapped(rb)))
                return true;
-       return __rpcrdma_dma_map_regbuf(ia, rb);
+       return __rpcrdma_regbuf_dma_map(r_xprt, rb);
 }
 
 /*
@@ -579,9 +563,8 @@ rpcrdma_data_dir(bool writing)
 
 /* Memory registration calls xprtrdma/frwr_ops.c
  */
-bool frwr_is_supported(struct rpcrdma_ia *);
-int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
-             struct rpcrdma_create_data_internal *cdata);
+bool frwr_is_supported(struct ib_device *device);
+int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep);
 int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr);
 void frwr_release_mr(struct rpcrdma_mr *mr);
 size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
@@ -610,7 +593,7 @@ int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
                              struct rpcrdma_req *req, u32 hdrlen,
                              struct xdr_buf *xdr,
                              enum rpcrdma_chunktype rtype);
-void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc);
+void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc);
 int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
 void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *);
 void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
@@ -627,7 +610,9 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
 
 /* RPC/RDMA module init - xprtrdma/transport.c
  */
+extern unsigned int xprt_rdma_slot_table_entries;
 extern unsigned int xprt_rdma_max_inline_read;
+extern unsigned int xprt_rdma_max_inline_write;
 void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
 void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
 void xprt_rdma_close(struct rpc_xprt *xprt);
index 732d4b5..c69951e 100644 (file)
@@ -2017,6 +2017,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
                 * we'll need to figure out how to pass a namespace to
                 * connect.
                 */
+               task->tk_rpc_status = -ENOTCONN;
                rpc_exit(task, -ENOTCONN);
                return;
        }
@@ -2690,7 +2691,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
        .buf_free               = rpc_free,
        .prepare_request        = xs_stream_prepare_request,
        .send_request           = xs_local_send_request,
-       .set_retrans_timeout    = xprt_set_retrans_timeout_def,
+       .wait_for_reply_request = xprt_wait_for_reply_request_def,
        .close                  = xs_close,
        .destroy                = xs_destroy,
        .print_stats            = xs_local_print_stats,
@@ -2710,7 +2711,7 @@ static const struct rpc_xprt_ops xs_udp_ops = {
        .buf_alloc              = rpc_malloc,
        .buf_free               = rpc_free,
        .send_request           = xs_udp_send_request,
-       .set_retrans_timeout    = xprt_set_retrans_timeout_rtt,
+       .wait_for_reply_request = xprt_wait_for_reply_request_rtt,
        .timer                  = xs_udp_timer,
        .release_request        = xprt_release_rqst_cong,
        .close                  = xs_close,
@@ -2733,7 +2734,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
        .buf_free               = rpc_free,
        .prepare_request        = xs_stream_prepare_request,
        .send_request           = xs_tcp_send_request,
-       .set_retrans_timeout    = xprt_set_retrans_timeout_def,
+       .wait_for_reply_request = xprt_wait_for_reply_request_def,
        .close                  = xs_tcp_shutdown,
        .destroy                = xs_destroy,
        .set_connect_timeout    = xs_tcp_set_connect_timeout,
@@ -2761,7 +2762,7 @@ static const struct rpc_xprt_ops bc_tcp_ops = {
        .buf_alloc              = bc_malloc,
        .buf_free               = bc_free,
        .send_request           = bc_send_request,
-       .set_retrans_timeout    = xprt_set_retrans_timeout_def,
+       .wait_for_reply_request = xprt_wait_for_reply_request_def,
        .close                  = bc_close,
        .destroy                = bc_destroy,
        .print_stats            = xs_tcp_print_stats,