Merge tag 'nfs-for-5.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 7 Apr 2020 20:51:39 +0000 (13:51 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 7 Apr 2020 20:51:39 +0000 (13:51 -0700)
Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - Fix a page leak in nfs_destroy_unlinked_subrequests()

   - Fix use-after-free issues in nfs_pageio_add_request()

   - Fix new mount code constant_table array definitions

   - finish_automount() requires us to hold 2 refs to the mount record

  Features:
   - Improve the accuracy of telldir/seekdir by using 64-bit cookies
     when possible.

   - Allow one RDMA active connection and several zombie connections to
     prevent blocking if the remote server is unresponsive.

   - Limit the size of the NFS access cache by default

   - Reduce the number of references to credentials that are taken by
     NFS

   - pNFS files and flexfiles drivers now support per-layout segment
     COMMIT lists.

   - Enable partial-file layout segments in the pNFS/flexfiles driver.

   - Add support for CB_RECALL_ANY to the pNFS flexfiles layout type

   - pNFS/flexfiles Report NFS4ERR_DELAY and NFS4ERR_GRACE errors from
     the DS using the layouterror mechanism.

  Bugfixes and cleanups:
   - SUNRPC: Fix krb5p regressions

   - Don't specify NFS version in "UDP not supported" error

   - nfsroot: set tcp as the default transport protocol

   - pnfs: Return valid stateids in nfs_layout_find_inode_by_stateid()

   - alloc_nfs_open_context() must use the file cred when available

   - Fix locking when dereferencing the delegation cred

   - Fix memory leaks in O_DIRECT when nfs_get_lock_context() fails

   - Various clean ups of the NFS O_DIRECT commit code

   - Clean up RDMA connect/disconnect

   - Replace zero-length arrays with C99-style flexible arrays"

* tag 'nfs-for-5.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (86 commits)
  NFS: Clean up process of marking inode stale.
  SUNRPC: Don't start a timer on an already queued rpc task
  NFS/pnfs: Reference the layout cred in pnfs_prepare_layoutreturn()
  NFS/pnfs: Fix dereference of layout cred in pnfs_layoutcommit_inode()
  NFS: Beware when dereferencing the delegation cred
  NFS: Add a module parameter to set nfs_mountpoint_expiry_timeout
  NFS: finish_automount() requires us to hold 2 refs to the mount record
  NFS: Fix a few constant_table array definitions
  NFS: Try to join page groups before an O_DIRECT retransmission
  NFS: Refactor nfs_lock_and_join_requests()
  NFS: Reverse the submission order of requests in __nfs_pageio_add_request()
  NFS: Clean up nfs_lock_and_join_requests()
  NFS: Remove the redundant function nfs_pgio_has_mirroring()
  NFS: Fix memory leaks in nfs_pageio_stop_mirroring()
  NFS: Fix a request reference leak in nfs_direct_write_clear_reqs()
  NFS: Fix use-after-free issues in nfs_pageio_add_request()
  NFS: Fix races nfs_page_group_destroy() vs nfs_destroy_unlinked_subrequests()
  NFS: Fix a page leak in nfs_destroy_unlinked_subrequests()
  NFS: Remove unused FLUSH_SYNC support in nfs_initiate_pgio()
  pNFS/flexfiles: Specify the layout segment range in LAYOUTGET
  ...

1  2 
fs/nfs/dir.c
fs/nfs/nfs4proc.c
fs/nfs/super.c
include/linux/nfs_xdr.h
include/linux/sunrpc/xdr.h
include/trace/events/rpcrdma.h
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/clnt.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtsock.c

diff --combined fs/nfs/dir.c
@@@ -141,10 -141,9 +141,9 @@@ struct nfs_cache_array 
        int size;
        int eof_index;
        u64 last_cookie;
-       struct nfs_cache_array_entry array[0];
+       struct nfs_cache_array_entry array[];
  };
  
- typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
  typedef struct {
        struct file     *file;
        struct page     *page;
        u64             *dir_cookie;
        u64             last_cookie;
        loff_t          current_index;
-       decode_dirent_t decode;
+       loff_t          prev_index;
  
        unsigned long   dir_verifier;
        unsigned long   timestamp;
@@@ -240,6 -239,25 +239,25 @@@ out
        return ret;
  }
  
+ static inline
+ int is_32bit_api(void)
+ {
+ #ifdef CONFIG_COMPAT
+       return in_compat_syscall();
+ #else
+       return (BITS_PER_LONG == 32);
+ #endif
+ }
+ static
+ bool nfs_readdir_use_cookie(const struct file *filp)
+ {
+       if ((filp->f_mode & FMODE_32BITHASH) ||
+           (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+               return false;
+       return true;
+ }
  static
  int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
  {
@@@ -289,7 -307,7 +307,7 @@@ int nfs_readdir_search_for_cookie(struc
                            !nfs_readdir_inode_mapping_valid(nfsi)) {
                                ctx->duped = 0;
                                ctx->attr_gencount = nfsi->attr_gencount;
-                       } else if (new_pos < desc->ctx->pos) {
+                       } else if (new_pos < desc->prev_index) {
                                if (ctx->duped > 0
                                    && ctx->dup_cookie == *desc->dir_cookie) {
                                        if (printk_ratelimit()) {
                                ctx->dup_cookie = *desc->dir_cookie;
                                ctx->duped = -1;
                        }
-                       desc->ctx->pos = new_pos;
+                       if (nfs_readdir_use_cookie(desc->file))
+                               desc->ctx->pos = *desc->dir_cookie;
+                       else
+                               desc->ctx->pos = new_pos;
+                       desc->prev_index = new_pos;
                        desc->cache_entry_index = i;
                        return 0;
                }
@@@ -376,9 -398,10 +398,10 @@@ error
  static int xdr_decode(nfs_readdir_descriptor_t *desc,
                      struct nfs_entry *entry, struct xdr_stream *xdr)
  {
+       struct inode *inode = file_inode(desc->file);
        int error;
  
-       error = desc->decode(xdr, entry, desc->plus);
+       error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus);
        if (error)
                return error;
        entry->fattr->time_start = desc->timestamp;
@@@ -756,6 -779,7 +779,7 @@@ int readdir_search_pagecache(nfs_readdi
  
        if (desc->page_index == 0) {
                desc->current_index = 0;
+               desc->prev_index = 0;
                desc->last_cookie = 0;
        }
        do {
@@@ -786,11 -810,14 +810,14 @@@ int nfs_do_filldir(nfs_readdir_descript
                        desc->eof = true;
                        break;
                }
-               desc->ctx->pos++;
                if (i < (array->size-1))
                        *desc->dir_cookie = array->array[i+1].cookie;
                else
                        *desc->dir_cookie = array->last_cookie;
+               if (nfs_readdir_use_cookie(file))
+                       desc->ctx->pos = *desc->dir_cookie;
+               else
+                       desc->ctx->pos++;
                if (ctx->duped != 0)
                        ctx->duped = 1;
        }
@@@ -860,9 -887,14 +887,14 @@@ static int nfs_readdir(struct file *fil
  {
        struct dentry   *dentry = file_dentry(file);
        struct inode    *inode = d_inode(dentry);
-       nfs_readdir_descriptor_t my_desc,
-                       *desc = &my_desc;
        struct nfs_open_dir_context *dir_ctx = file->private_data;
+       nfs_readdir_descriptor_t my_desc = {
+               .file = file,
+               .ctx = ctx,
+               .dir_cookie = &dir_ctx->dir_cookie,
+               .plus = nfs_use_readdirplus(inode, ctx),
+       },
+                       *desc = &my_desc;
        int res = 0;
  
        dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
         * to either find the entry with the appropriate number or
         * revalidate the cookie.
         */
-       memset(desc, 0, sizeof(*desc));
-       desc->file = file;
-       desc->ctx = ctx;
-       desc->dir_cookie = &dir_ctx->dir_cookie;
-       desc->decode = NFS_PROTO(inode)->decode_dirent;
-       desc->plus = nfs_use_readdirplus(inode, ctx);
        if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
                res = nfs_revalidate_mapping(inode, file->f_mapping);
        if (res < 0)
@@@ -954,7 -978,10 +978,10 @@@ static loff_t nfs_llseek_dir(struct fil
        }
        if (offset != filp->f_pos) {
                filp->f_pos = offset;
-               dir_ctx->dir_cookie = 0;
+               if (nfs_readdir_use_cookie(filp))
+                       dir_ctx->dir_cookie = offset;
+               else
+                       dir_ctx->dir_cookie = 0;
                dir_ctx->duped = 0;
        }
        inode_unlock(inode);
@@@ -2282,7 -2309,7 +2309,7 @@@ static DEFINE_SPINLOCK(nfs_access_lru_l
  static LIST_HEAD(nfs_access_lru_list);
  static atomic_long_t nfs_access_nr_entries;
  
- static unsigned long nfs_access_max_cachesize = ULONG_MAX;
+ static unsigned long nfs_access_max_cachesize = 4*1024*1024;
  module_param(nfs_access_max_cachesize, ulong, 0644);
  MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
  
@@@ -2489,7 -2516,7 +2516,7 @@@ static int nfs_access_get_cached_rcu(st
        rcu_read_lock();
        if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
                goto out;
 -      lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
 +      lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
        cache = list_entry(lh, struct nfs_access_entry, lru);
        if (lh == &nfsi->access_cache_entry_lru ||
            cred_fscmp(cred, cache->cred) != 0)
@@@ -2642,9 -2669,10 +2669,10 @@@ static int nfs_do_access(struct inode *
        status = NFS_PROTO(inode)->access(inode, &cache);
        if (status != 0) {
                if (status == -ESTALE) {
-                       nfs_zap_caches(inode);
                        if (!S_ISDIR(inode->i_mode))
-                               set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+                               nfs_set_inode_stale(inode);
+                       else
+                               nfs_zap_caches(inode);
                }
                goto out;
        }
@@@ -2732,14 -2760,7 +2760,7 @@@ force_lookup
        if (!NFS_PROTO(inode)->access)
                goto out_notsup;
  
-       /* Always try fast lookups first */
-       rcu_read_lock();
-       res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
-       rcu_read_unlock();
-       if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
-               /* Fast lookup failed, try the slow way */
-               res = nfs_do_access(inode, cred, mask);
-       }
+       res = nfs_do_access(inode, cred, mask);
  out:
        if (!res && (mask & MAY_EXEC))
                res = nfs_execute_ok(inode, mask);
diff --combined fs/nfs/nfs4proc.c
@@@ -2346,7 -2346,7 +2346,7 @@@ static int _nfs4_proc_open_confirm(stru
                .callback_ops = &nfs4_open_confirm_ops,
                .callback_data = data,
                .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
        };
        int status;
  
@@@ -2511,7 -2511,7 +2511,7 @@@ static int nfs4_run_open_task(struct nf
                .callback_ops = &nfs4_open_ops,
                .callback_data = data,
                .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
        };
        int status;
  
@@@ -2790,16 -2790,19 +2790,19 @@@ static int nfs41_check_delegation_state
                return NFS_OK;
        }
  
+       spin_lock(&delegation->lock);
        nfs4_stateid_copy(&stateid, &delegation->stateid);
  
        if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
                                &delegation->flags)) {
+               spin_unlock(&delegation->lock);
                rcu_read_unlock();
                return NFS_OK;
        }
  
        if (delegation->cred)
                cred = get_cred(delegation->cred);
+       spin_unlock(&delegation->lock);
        rcu_read_unlock();
        status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
        trace_nfs4_test_delegation_stateid(state, NULL, status);
@@@ -3651,7 -3654,7 +3654,7 @@@ int nfs4_do_close(struct nfs4_state *st
                .rpc_message = &msg,
                .callback_ops = &nfs4_close_ops,
                .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
        };
        int status = -ENOMEM;
  
@@@ -4002,7 -4005,7 +4005,7 @@@ static int nfs4_proc_get_root(struct nf
  {
        int error;
        struct nfs_fattr *fattr = info->fattr;
 -      struct nfs4_label *label = NULL;
 +      struct nfs4_label *label = fattr->label;
  
        error = nfs4_server_capabilities(server, mntfh);
        if (error < 0) {
                return error;
        }
  
 -      label = nfs4_label_alloc(server, GFP_KERNEL);
 -      if (IS_ERR(label))
 -              return PTR_ERR(label);
 -
        error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL);
        if (error < 0) {
                dprintk("nfs4_get_root: getattr error = %d\n", -error);
 -              goto err_free_label;
 +              goto out;
        }
  
        if (fattr->valid & NFS_ATTR_FATTR_FSID &&
            !nfs_fsid_equal(&server->fsid, &fattr->fsid))
                memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
  
 -err_free_label:
 -      nfs4_label_free(label);
 -
 +out:
        return error;
  }
  
@@@ -5544,7 -5553,7 +5547,7 @@@ unwind
  struct nfs4_cached_acl {
        int cached;
        size_t len;
-       char data[0];
+       char data[];
  };
  
  static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl)
@@@ -6253,6 -6262,7 +6256,7 @@@ static void nfs4_delegreturn_done(struc
                /* Fallthrough */
        case -NFS4ERR_BAD_STATEID:
        case -NFS4ERR_STALE_STATEID:
+       case -ETIMEDOUT:
                task->tk_status = 0;
                break;
        case -NFS4ERR_OLD_STATEID:
@@@ -6343,7 -6353,7 +6347,7 @@@ static int _nfs4_proc_delegreturn(struc
                .rpc_client = server->client,
                .rpc_message = &msg,
                .callback_ops = &nfs4_delegreturn_ops,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | RPC_TASK_TIMEOUT,
        };
        int status = 0;
  
@@@ -6926,7 -6936,7 +6930,7 @@@ static int _nfs4_do_setlk(struct nfs4_s
                .rpc_message = &msg,
                .callback_ops = &nfs4_lock_ops,
                .workqueue = nfsiod_workqueue,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
        };
        int ret;
  
@@@ -9170,7 -9180,7 +9174,7 @@@ nfs4_proc_layoutget(struct nfs4_layoutg
                .rpc_message = &msg,
                .callback_ops = &nfs4_layoutget_call_ops,
                .callback_data = lgp,
-               .flags = RPC_TASK_ASYNC,
+               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
        };
        struct pnfs_layout_segment *lseg = NULL;
        struct nfs4_exception exception = {
@@@ -9287,6 -9297,7 +9291,7 @@@ static void nfs4_layoutreturn_release(v
                lrp->ld_private.ops->free(&lrp->ld_private);
        pnfs_put_layout_hdr(lrp->args.layout);
        nfs_iput_and_deactive(lrp->inode);
+       put_cred(lrp->cred);
        kfree(calldata);
        dprintk("<-- %s\n", __func__);
  }
diff --combined fs/nfs/super.c
@@@ -176,6 -176,41 +176,41 @@@ void nfs_sb_deactive(struct super_bloc
  }
  EXPORT_SYMBOL_GPL(nfs_sb_deactive);
  
+ static int __nfs_list_for_each_server(struct list_head *head,
+               int (*fn)(struct nfs_server *, void *),
+               void *data)
+ {
+       struct nfs_server *server, *last = NULL;
+       int ret = 0;
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, head, client_link) {
+               if (!nfs_sb_active(server->super))
+                       continue;
+               rcu_read_unlock();
+               if (last)
+                       nfs_sb_deactive(last->super);
+               last = server;
+               ret = fn(server, data);
+               if (ret)
+                       goto out;
+               rcu_read_lock();
+       }
+       rcu_read_unlock();
+ out:
+       if (last)
+               nfs_sb_deactive(last->super);
+       return ret;
+ }
+ int nfs_client_for_each_server(struct nfs_client *clp,
+               int (*fn)(struct nfs_server *, void *),
+               void *data)
+ {
+       return __nfs_list_for_each_server(&clp->cl_superblocks, fn, data);
+ }
+ EXPORT_SYMBOL_GPL(nfs_client_for_each_server);
  /*
   * Deliver file system statistics to userspace
   */
@@@ -1179,6 -1214,7 +1214,6 @@@ int nfs_get_tree_common(struct fs_conte
        struct super_block *s;
        int (*compare_super)(struct super_block *, struct fs_context *) = nfs_compare_super;
        struct nfs_server *server = ctx->server;
 -      unsigned long kflags = 0, kflags_out = 0;
        int error;
  
        ctx->server = NULL;
                goto error_splat_super;
        }
  
 -      if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
 -              kflags |= SECURITY_LSM_NATIVE_LABELS;
 -      if (ctx->clone_data.sb) {
 -              if (d_inode(fc->root)->i_fop != &nfs_dir_operations) {
 -                      error = -ESTALE;
 -                      goto error_splat_root;
 -              }
 -              /* clone any lsm security options from the parent to the new sb */
 -              error = security_sb_clone_mnt_opts(ctx->clone_data.sb, s, kflags,
 -                              &kflags_out);
 -      } else {
 -              error = security_sb_set_mnt_opts(s, fc->security,
 -                                                      kflags, &kflags_out);
 -      }
 -      if (error)
 -              goto error_splat_root;
 -      if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
 -              !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
 -              NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
 -
        s->s_flags |= SB_ACTIVE;
        error = 0;
  
@@@ -1247,6 -1303,10 +1282,6 @@@ out
  out_err_nosb:
        nfs_free_server(server);
        goto out;
 -
 -error_splat_root:
 -      dput(fc->root);
 -      fc->root = NULL;
  error_splat_super:
        deactivate_locked_super(s);
        goto out;
diff --combined include/linux/nfs_xdr.h
@@@ -75,7 -75,6 +75,7 @@@ struct nfs_fattr 
        struct nfs4_string      *owner_name;
        struct nfs4_string      *group_name;
        struct nfs4_threshold   *mdsthreshold;  /* pNFS threshold hints */
 +      struct nfs4_label       *label;
  };
  
  #define NFS_ATTR_FATTR_TYPE           (1U << 0)
@@@ -1266,16 -1265,25 +1266,25 @@@ struct nfstime4 
  struct pnfs_commit_bucket {
        struct list_head written;
        struct list_head committing;
-       struct pnfs_layout_segment *wlseg;
-       struct pnfs_layout_segment *clseg;
+       struct pnfs_layout_segment *lseg;
        struct nfs_writeverf direct_verf;
  };
  
+ struct pnfs_commit_array {
+       struct list_head cinfo_list;
+       struct list_head lseg_list;
+       struct pnfs_layout_segment *lseg;
+       struct rcu_head rcu;
+       refcount_t refcount;
+       unsigned int nbuckets;
+       struct pnfs_commit_bucket buckets[];
+ };
  struct pnfs_ds_commit_info {
-       int nwritten;
-       int ncommitting;
-       int nbuckets;
-       struct pnfs_commit_bucket *buckets;
+       struct list_head commits;
+       unsigned int nwritten;
+       unsigned int ncommitting;
+       const struct pnfs_commit_ops *ops;
  };
  
  struct nfs41_state_protection {
@@@ -1386,22 -1394,11 +1395,11 @@@ struct nfs41_free_stateid_res 
        unsigned int                    status;
  };
  
- static inline void
- nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
- {
-       kfree(cinfo->buckets);
- }
  #else
  
  struct pnfs_ds_commit_info {
  };
  
- static inline void
- nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
- {
- }
  #endif /* CONFIG_NFS_V4_1 */
  
  #ifdef CONFIG_NFS_V4_2
@@@ -184,10 -184,23 +184,9 @@@ xdr_adjust_iovec(struct kvec *iov, __be
  extern void xdr_shift_buf(struct xdr_buf *, size_t);
  extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
  extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
- extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int);
  extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
  extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
  
 -/*
 - * Helper structure for copying from an sk_buff.
 - */
 -struct xdr_skb_reader {
 -      struct sk_buff  *skb;
 -      unsigned int    offset;
 -      size_t          count;
 -      __wsum          csum;
 -};
 -
 -typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
 -
 -extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
 -
  extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
  extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
  
@@@ -286,59 -299,6 +285,59 @@@ xdr_align_size(size_t n
        return (n + mask) & ~mask;
  }
  
 +/**
 + * xdr_pad_size - Calculate size of an object's pad
 + * @n: Size of an object being XDR encoded (in bytes)
 + *
 + * This implementation avoids the need for conditional
 + * branches or modulo division.
 + *
 + * Return value:
 + *   Size (in bytes) of the needed XDR pad
 + */
 +static inline size_t xdr_pad_size(size_t n)
 +{
 +      return xdr_align_size(n) - n;
 +}
 +
 +/**
 + * xdr_stream_encode_item_present - Encode a "present" list item
 + * @xdr: pointer to xdr_stream
 + *
 + * Return values:
 + *   On success, returns length in bytes of XDR buffer consumed
 + *   %-EMSGSIZE on XDR buffer overflow
 + */
 +static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr)
 +{
 +      const size_t len = sizeof(__be32);
 +      __be32 *p = xdr_reserve_space(xdr, len);
 +
 +      if (unlikely(!p))
 +              return -EMSGSIZE;
 +      *p = xdr_one;
 +      return len;
 +}
 +
 +/**
 + * xdr_stream_encode_item_absent - Encode a "not present" list item
 + * @xdr: pointer to xdr_stream
 + *
 + * Return values:
 + *   On success, returns length in bytes of XDR buffer consumed
 + *   %-EMSGSIZE on XDR buffer overflow
 + */
 +static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
 +{
 +      const size_t len = sizeof(__be32);
 +      __be32 *p = xdr_reserve_space(xdr, len);
 +
 +      if (unlikely(!p))
 +              return -EMSGSIZE;
 +      *p = xdr_zero;
 +      return len;
 +}
 +
  /**
   * xdr_stream_encode_u32 - Encode a 32-bit integer
   * @xdr: pointer to xdr_stream
@@@ -104,12 -104,12 +104,12 @@@ DECLARE_EVENT_CLASS(xprtrdma_connect_cl
        TP_fast_assign(
                __entry->r_xprt = r_xprt;
                __entry->rc = rc;
-               __entry->connect_status = r_xprt->rx_ep.rep_connected;
+               __entry->connect_status = r_xprt->rx_ep->re_connect_status;
                __assign_str(addr, rpcrdma_addrstr(r_xprt));
                __assign_str(port, rpcrdma_portstr(r_xprt));
        ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d",
+       TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d",
                __get_str(addr), __get_str(port), __entry->r_xprt,
                __entry->rc, __entry->connect_status
        )
@@@ -228,20 -228,20 +228,20 @@@ DECLARE_EVENT_CLASS(xprtrdma_frwr_done
        TP_ARGS(wc, frwr),
  
        TP_STRUCT__entry(
-               __field(const void *, mr)
+               __field(u32, mr_id)
                __field(unsigned int, status)
                __field(unsigned int, vendor_err)
        ),
  
        TP_fast_assign(
-               __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr);
+               __entry->mr_id = frwr->fr_mr->res.id;
                __entry->status = wc->status;
                __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
        ),
  
        TP_printk(
-               "mr=%p: %s (%u/0x%x)",
-               __entry->mr, rdma_show_wc_status(__entry->status),
+               "mr.id=%u: %s (%u/0x%x)",
+               __entry->mr_id, rdma_show_wc_status(__entry->status),
                __entry->status, __entry->vendor_err
        )
  );
@@@ -274,7 -274,8 +274,8 @@@ DECLARE_EVENT_CLASS(xprtrdma_mr
        TP_ARGS(mr),
  
        TP_STRUCT__entry(
-               __field(const void *, mr)
+               __field(u32, mr_id)
+               __field(int, nents)
                __field(u32, handle)
                __field(u32, length)
                __field(u64, offset)
        ),
  
        TP_fast_assign(
-               __entry->mr = mr;
+               __entry->mr_id  = mr->frwr.fr_mr->res.id;
+               __entry->nents  = mr->mr_nents;
                __entry->handle = mr->mr_handle;
                __entry->length = mr->mr_length;
                __entry->offset = mr->mr_offset;
                __entry->dir    = mr->mr_dir;
        ),
  
-       TP_printk("mr=%p %u@0x%016llx:0x%08x (%s)",
-               __entry->mr, __entry->length,
+       TP_printk("mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)",
+               __entry->mr_id, __entry->nents, __entry->length,
                (unsigned long long)__entry->offset, __entry->handle,
                xprtrdma_show_direction(__entry->dir)
        )
@@@ -340,68 -342,37 +342,37 @@@ DECLARE_EVENT_CLASS(xprtrdma_cb_event
   ** Connection events
   **/
  
- TRACE_EVENT(xprtrdma_cm_event,
-       TP_PROTO(
-               const struct rpcrdma_xprt *r_xprt,
-               struct rdma_cm_event *event
-       ),
-       TP_ARGS(r_xprt, event),
-       TP_STRUCT__entry(
-               __field(const void *, r_xprt)
-               __field(unsigned int, event)
-               __field(int, status)
-               __string(addr, rpcrdma_addrstr(r_xprt))
-               __string(port, rpcrdma_portstr(r_xprt))
-       ),
-       TP_fast_assign(
-               __entry->r_xprt = r_xprt;
-               __entry->event = event->event;
-               __entry->status = event->status;
-               __assign_str(addr, rpcrdma_addrstr(r_xprt));
-               __assign_str(port, rpcrdma_portstr(r_xprt));
-       ),
-       TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)",
-               __get_str(addr), __get_str(port),
-               __entry->r_xprt, rdma_show_cm_event(__entry->event),
-               __entry->event, __entry->status
-       )
- );
  TRACE_EVENT(xprtrdma_inline_thresh,
        TP_PROTO(
-               const struct rpcrdma_xprt *r_xprt
+               const struct rpcrdma_ep *ep
        ),
  
-       TP_ARGS(r_xprt),
+       TP_ARGS(ep),
  
        TP_STRUCT__entry(
-               __field(const void *, r_xprt)
                __field(unsigned int, inline_send)
                __field(unsigned int, inline_recv)
                __field(unsigned int, max_send)
                __field(unsigned int, max_recv)
-               __string(addr, rpcrdma_addrstr(r_xprt))
-               __string(port, rpcrdma_portstr(r_xprt))
+               __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+               __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
        ),
  
        TP_fast_assign(
-               const struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+               const struct rdma_cm_id *id = ep->re_id;
  
-               __entry->r_xprt = r_xprt;
-               __entry->inline_send = ep->rep_inline_send;
-               __entry->inline_recv = ep->rep_inline_recv;
-               __entry->max_send = ep->rep_max_inline_send;
-               __entry->max_recv = ep->rep_max_inline_recv;
-               __assign_str(addr, rpcrdma_addrstr(r_xprt));
-               __assign_str(port, rpcrdma_portstr(r_xprt));
+               __entry->inline_send = ep->re_inline_send;
+               __entry->inline_recv = ep->re_inline_recv;
+               __entry->max_send = ep->re_max_inline_send;
+               __entry->max_recv = ep->re_max_inline_recv;
+               memcpy(__entry->srcaddr, &id->route.addr.src_addr,
+                      sizeof(struct sockaddr_in6));
+               memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
+                      sizeof(struct sockaddr_in6));
        ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u",
-               __get_str(addr), __get_str(port), __entry->r_xprt,
+       TP_printk("%pISpc -> %pISpc neg send/recv=%u/%u, calc send/recv=%u/%u",
+               __entry->srcaddr, __entry->dstaddr,
                __entry->inline_send, __entry->inline_recv,
                __entry->max_send, __entry->max_recv
        )
  
  DEFINE_CONN_EVENT(connect);
  DEFINE_CONN_EVENT(disconnect);
+ DEFINE_CONN_EVENT(flush_dct);
  
  DEFINE_RXPRT_EVENT(xprtrdma_create);
  DEFINE_RXPRT_EVENT(xprtrdma_op_destroy);
- DEFINE_RXPRT_EVENT(xprtrdma_remove);
- DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
  DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
  DEFINE_RXPRT_EVENT(xprtrdma_op_close);
  DEFINE_RXPRT_EVENT(xprtrdma_op_setport);
@@@ -480,32 -450,33 +450,33 @@@ TRACE_EVENT(xprtrdma_op_set_cto
  
  TRACE_EVENT(xprtrdma_qp_event,
        TP_PROTO(
-               const struct rpcrdma_xprt *r_xprt,
+               const struct rpcrdma_ep *ep,
                const struct ib_event *event
        ),
  
-       TP_ARGS(r_xprt, event),
+       TP_ARGS(ep, event),
  
        TP_STRUCT__entry(
-               __field(const void *, r_xprt)
-               __field(unsigned int, event)
+               __field(unsigned long, event)
                __string(name, event->device->name)
-               __string(addr, rpcrdma_addrstr(r_xprt))
-               __string(port, rpcrdma_portstr(r_xprt))
+               __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+               __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
        ),
  
        TP_fast_assign(
-               __entry->r_xprt = r_xprt;
+               const struct rdma_cm_id *id = ep->re_id;
                __entry->event = event->event;
                __assign_str(name, event->device->name);
-               __assign_str(addr, rpcrdma_addrstr(r_xprt));
-               __assign_str(port, rpcrdma_portstr(r_xprt));
+               memcpy(__entry->srcaddr, &id->route.addr.src_addr,
+                      sizeof(struct sockaddr_in6));
+               memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
+                      sizeof(struct sockaddr_in6));
        ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p: dev %s: %s (%u)",
-               __get_str(addr), __get_str(port), __entry->r_xprt,
-               __get_str(name), rdma_show_ib_event(__entry->event),
-               __entry->event
+       TP_printk("%pISpc -> %pISpc device=%s %s (%lu)",
+               __entry->srcaddr, __entry->dstaddr, __get_str(name),
+               rdma_show_ib_event(__entry->event), __entry->event
        )
  );
  
@@@ -801,7 -772,7 +772,7 @@@ TRACE_EVENT(xprtrdma_post_recvs
                __entry->r_xprt = r_xprt;
                __entry->count = count;
                __entry->status = status;
-               __entry->posted = r_xprt->rx_ep.rep_receive_count;
+               __entry->posted = r_xprt->rx_ep->re_receive_count;
                __assign_str(addr, rpcrdma_addrstr(r_xprt));
                __assign_str(port, rpcrdma_portstr(r_xprt));
        ),
@@@ -920,17 -891,17 +891,17 @@@ TRACE_EVENT(xprtrdma_frwr_alloc
        TP_ARGS(mr, rc),
  
        TP_STRUCT__entry(
-               __field(const void *, mr)
+               __field(u32, mr_id)
                __field(int, rc)
        ),
  
        TP_fast_assign(
-               __entry->mr = mr;
-               __entry->rc     = rc;
+               __entry->mr_id = mr->frwr.fr_mr->res.id;
+               __entry->rc = rc;
        ),
  
-       TP_printk("mr=%p: rc=%d",
-               __entry->mr, __entry->rc
+       TP_printk("mr.id=%u: rc=%d",
+               __entry->mr_id, __entry->rc
        )
  );
  
@@@ -943,7 -914,8 +914,8 @@@ TRACE_EVENT(xprtrdma_frwr_dereg
        TP_ARGS(mr, rc),
  
        TP_STRUCT__entry(
-               __field(const void *, mr)
+               __field(u32, mr_id)
+               __field(int, nents)
                __field(u32, handle)
                __field(u32, length)
                __field(u64, offset)
        ),
  
        TP_fast_assign(
-               __entry->mr = mr;
+               __entry->mr_id  = mr->frwr.fr_mr->res.id;
+               __entry->nents  = mr->mr_nents;
                __entry->handle = mr->mr_handle;
                __entry->length = mr->mr_length;
                __entry->offset = mr->mr_offset;
                __entry->rc     = rc;
        ),
  
-       TP_printk("mr=%p %u@0x%016llx:0x%08x (%s): rc=%d",
-               __entry->mr, __entry->length,
+       TP_printk("mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s): rc=%d",
+               __entry->mr_id, __entry->nents, __entry->length,
                (unsigned long long)__entry->offset, __entry->handle,
                xprtrdma_show_direction(__entry->dir),
                __entry->rc
@@@ -977,21 -950,21 +950,21 @@@ TRACE_EVENT(xprtrdma_frwr_sgerr
        TP_ARGS(mr, sg_nents),
  
        TP_STRUCT__entry(
-               __field(const void *, mr)
+               __field(u32, mr_id)
                __field(u64, addr)
                __field(u32, dir)
                __field(int, nents)
        ),
  
        TP_fast_assign(
-               __entry->mr = mr;
+               __entry->mr_id = mr->frwr.fr_mr->res.id;
                __entry->addr = mr->mr_sg->dma_address;
                __entry->dir = mr->mr_dir;
                __entry->nents = sg_nents;
        ),
  
-       TP_printk("mr=%p dma addr=0x%llx (%s) sg_nents=%d",
-               __entry->mr, __entry->addr,
+       TP_printk("mr.id=%u DMA addr=0x%llx (%s) sg_nents=%d",
+               __entry->mr_id, __entry->addr,
                xprtrdma_show_direction(__entry->dir),
                __entry->nents
        )
@@@ -1006,7 -979,7 +979,7 @@@ TRACE_EVENT(xprtrdma_frwr_maperr
        TP_ARGS(mr, num_mapped),
  
        TP_STRUCT__entry(
-               __field(const void *, mr)
+               __field(u32, mr_id)
                __field(u64, addr)
                __field(u32, dir)
                __field(int, num_mapped)
        ),
  
        TP_fast_assign(
-               __entry->mr = mr;
+               __entry->mr_id = mr->frwr.fr_mr->res.id;
                __entry->addr = mr->mr_sg->dma_address;
                __entry->dir = mr->mr_dir;
                __entry->num_mapped = num_mapped;
                __entry->nents = mr->mr_nents;
        ),
  
-       TP_printk("mr=%p dma addr=0x%llx (%s) nents=%d of %d",
-               __entry->mr, __entry->addr,
+       TP_printk("mr.id=%u DMA addr=0x%llx (%s) nents=%d of %d",
+               __entry->mr_id, __entry->addr,
                xprtrdma_show_direction(__entry->dir),
                __entry->num_mapped, __entry->nents
        )
  DEFINE_MR_EVENT(localinv);
  DEFINE_MR_EVENT(map);
  DEFINE_MR_EVENT(unmap);
- DEFINE_MR_EVENT(remoteinv);
+ DEFINE_MR_EVENT(reminv);
  DEFINE_MR_EVENT(recycle);
  
  TRACE_EVENT(xprtrdma_dma_maperr,
@@@ -1469,7 -1442,7 +1442,7 @@@ DECLARE_EVENT_CLASS(svcrdma_segment_eve
  );
  
  #define DEFINE_SEGMENT_EVENT(name)                                    \
 -              DEFINE_EVENT(svcrdma_segment_event, svcrdma_encode_##name,\
 +              DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\
                                TP_PROTO(                               \
                                        u32 handle,                     \
                                        u32 length,                     \
                                ),                                      \
                                TP_ARGS(handle, length, offset))
  
 -DEFINE_SEGMENT_EVENT(rseg);
 -DEFINE_SEGMENT_EVENT(wseg);
 +DEFINE_SEGMENT_EVENT(decode_wseg);
 +DEFINE_SEGMENT_EVENT(encode_rseg);
 +DEFINE_SEGMENT_EVENT(send_rseg);
 +DEFINE_SEGMENT_EVENT(encode_wseg);
 +DEFINE_SEGMENT_EVENT(send_wseg);
  
  DECLARE_EVENT_CLASS(svcrdma_chunk_event,
        TP_PROTO(
  );
  
  #define DEFINE_CHUNK_EVENT(name)                                      \
 -              DEFINE_EVENT(svcrdma_chunk_event, svcrdma_encode_##name,\
 +              DEFINE_EVENT(svcrdma_chunk_event, svcrdma_##name,       \
                                TP_PROTO(                               \
                                        u32 length                      \
                                ),                                      \
                                TP_ARGS(length))
  
 -DEFINE_CHUNK_EVENT(pzr);
 -DEFINE_CHUNK_EVENT(write);
 -DEFINE_CHUNK_EVENT(reply);
 +DEFINE_CHUNK_EVENT(send_pzr);
 +DEFINE_CHUNK_EVENT(encode_write_chunk);
 +DEFINE_CHUNK_EVENT(send_write_chunk);
 +DEFINE_CHUNK_EVENT(encode_read_chunk);
 +DEFINE_CHUNK_EVENT(send_reply_chunk);
  
 -TRACE_EVENT(svcrdma_encode_read,
 +TRACE_EVENT(svcrdma_send_read_chunk,
        TP_PROTO(
                u32 length,
                u32 position
@@@ -1639,24 -1607,6 +1612,24 @@@ TRACE_EVENT(svcrdma_dma_map_rwctx
        )
  );
  
 +TRACE_EVENT(svcrdma_send_pullup,
 +      TP_PROTO(
 +              unsigned int len
 +      ),
 +
 +      TP_ARGS(len),
 +
 +      TP_STRUCT__entry(
 +              __field(unsigned int, len)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->len = len;
 +      ),
 +
 +      TP_printk("len=%u", __entry->len)
 +);
 +
  TRACE_EVENT(svcrdma_send_failed,
        TP_PROTO(
                const struct svc_rqst *rqst,
@@@ -1836,6 -1786,34 +1809,6 @@@ TRACE_EVENT(svcrdma_post_rw
  DEFINE_SENDCOMP_EVENT(read);
  DEFINE_SENDCOMP_EVENT(write);
  
 -TRACE_EVENT(svcrdma_cm_event,
 -      TP_PROTO(
 -              const struct rdma_cm_event *event,
 -              const struct sockaddr *sap
 -      ),
 -
 -      TP_ARGS(event, sap),
 -
 -      TP_STRUCT__entry(
 -              __field(unsigned int, event)
 -              __field(int, status)
 -              __array(__u8, addr, INET6_ADDRSTRLEN + 10)
 -      ),
 -
 -      TP_fast_assign(
 -              __entry->event = event->event;
 -              __entry->status = event->status;
 -              snprintf(__entry->addr, sizeof(__entry->addr) - 1,
 -                       "%pISpc", sap);
 -      ),
 -
 -      TP_printk("addr=%s event=%s (%u/%d)",
 -              __entry->addr,
 -              rdma_show_cm_event(__entry->event),
 -              __entry->event, __entry->status
 -      )
 -);
 -
  TRACE_EVENT(svcrdma_qp_error,
        TP_PROTO(
                const struct ib_event *event,
@@@ -20,6 -20,7 +20,7 @@@
  #include <linux/sunrpc/clnt.h>
  #include <linux/sunrpc/auth.h>
  #include <linux/sunrpc/auth_gss.h>
+ #include <linux/sunrpc/gss_krb5.h>
  #include <linux/sunrpc/svcauth_gss.h>
  #include <linux/sunrpc/gss_err.h>
  #include <linux/workqueue.h>
@@@ -1050,7 -1051,7 +1051,7 @@@ gss_create_new(const struct rpc_auth_cr
                goto err_put_mech;
        auth = &gss_auth->rpc_auth;
        auth->au_cslack = GSS_CRED_SLACK >> 2;
-       auth->au_rslack = GSS_VERF_SLACK >> 2;
+       auth->au_rslack = GSS_KRB5_MAX_SLACK_NEEDED >> 2;
        auth->au_verfsize = GSS_VERF_SLACK >> 2;
        auth->au_ralign = GSS_VERF_SLACK >> 2;
        auth->au_flags = 0;
@@@ -1724,8 -1725,9 +1725,9 @@@ bad_mic
        goto out;
  }
  
- static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-                             struct rpc_task *task, struct xdr_stream *xdr)
+ static noinline_for_stack int
+ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+                  struct rpc_task *task, struct xdr_stream *xdr)
  {
        struct rpc_rqst *rqstp = task->tk_rqstp;
        struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf;
@@@ -1816,8 -1818,9 +1818,9 @@@ out
        return -EAGAIN;
  }
  
- static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-                            struct rpc_task *task, struct xdr_stream *xdr)
+ static noinline_for_stack int
+ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+                 struct rpc_task *task, struct xdr_stream *xdr)
  {
        struct rpc_rqst *rqstp = task->tk_rqstp;
        struct xdr_buf  *snd_buf = &rqstp->rq_snd_buf;
        else
                iov = snd_buf->head;
        p = iov->iov_base + iov->iov_len;
 -      pad = 3 - ((snd_buf->len - offset - 1) & 3);
 +      pad = xdr_pad_size(snd_buf->len - offset);
        memset(p, 0, pad);
        iov->iov_len += pad;
        snd_buf->len += pad;
@@@ -1934,35 -1937,69 +1937,69 @@@ gss_unwrap_resp_auth(struct rpc_cred *c
        return 0;
  }
  
- static int
+ /*
+  * RFC 2203, Section 5.3.2.2
+  *
+  *    struct rpc_gss_integ_data {
+  *            opaque databody_integ<>;
+  *            opaque checksum<>;
+  *    };
+  *
+  *    struct rpc_gss_data_t {
+  *            unsigned int seq_num;
+  *            proc_req_arg_t arg;
+  *    };
+  */
+ static noinline_for_stack int
  gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
                      struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
                      struct xdr_stream *xdr)
  {
-       struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
-       u32 data_offset, mic_offset, integ_len, maj_stat;
+       struct xdr_buf gss_data, *rcv_buf = &rqstp->rq_rcv_buf;
        struct rpc_auth *auth = cred->cr_auth;
+       u32 len, offset, seqno, maj_stat;
        struct xdr_netobj mic;
-       __be32 *p;
+       int ret;
  
-       p = xdr_inline_decode(xdr, 2 * sizeof(*p));
-       if (unlikely(!p))
+       ret = -EIO;
+       mic.data = NULL;
+       /* opaque databody_integ<>; */
+       if (xdr_stream_decode_u32(xdr, &len))
                goto unwrap_failed;
-       integ_len = be32_to_cpup(p++);
-       if (integ_len & 3)
+       if (len & 3)
                goto unwrap_failed;
-       data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base;
-       mic_offset = integ_len + data_offset;
-       if (mic_offset > rcv_buf->len)
+       offset = rcv_buf->len - xdr_stream_remaining(xdr);
+       if (xdr_stream_decode_u32(xdr, &seqno))
                goto unwrap_failed;
-       if (be32_to_cpup(p) != rqstp->rq_seqno)
+       if (seqno != rqstp->rq_seqno)
                goto bad_seqno;
+       if (xdr_buf_subsegment(rcv_buf, &gss_data, offset, len))
+               goto unwrap_failed;
  
-       if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
+       /*
+        * The xdr_stream now points to the beginning of the
+        * upper layer payload, to be passed below to
+        * rpcauth_unwrap_resp_decode(). The checksum, which
+        * follows the upper layer payload in @rcv_buf, is
+        * located and parsed without updating the xdr_stream.
+        */
+       /* opaque checksum<>; */
+       offset += len;
+       if (xdr_decode_word(rcv_buf, offset, &len))
+               goto unwrap_failed;
+       offset += sizeof(__be32);
+       if (offset + len > rcv_buf->len)
+               goto unwrap_failed;
+       mic.len = len;
+       mic.data = kmalloc(len, GFP_NOFS);
+       if (!mic.data)
                goto unwrap_failed;
-       if (xdr_buf_read_mic(rcv_buf, &mic, mic_offset))
+       if (read_bytes_from_xdr_buf(rcv_buf, offset, mic.data, mic.len))
                goto unwrap_failed;
-       maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
+       maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &gss_data, &mic);
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
        if (maj_stat != GSS_S_COMPLETE)
  
        auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len);
        auth->au_ralign = auth->au_verfsize + 2;
-       return 0;
+       ret = 0;
+ out:
+       kfree(mic.data);
+       return ret;
  unwrap_failed:
        trace_rpcgss_unwrap_failed(task);
-       return -EIO;
+       goto out;
  bad_seqno:
-       trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p));
-       return -EIO;
+       trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, seqno);
+       goto out;
  bad_mic:
        trace_rpcgss_verify_mic(task, maj_stat);
-       return -EIO;
+       goto out;
  }
  
- static int
+ static noinline_for_stack int
  gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
                     struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
                     struct xdr_stream *xdr)
diff --combined net/sunrpc/clnt.c
@@@ -1099,8 -1099,9 +1099,9 @@@ rpc_task_set_rpc_message(struct rpc_tas
                task->tk_msg.rpc_proc = msg->rpc_proc;
                task->tk_msg.rpc_argp = msg->rpc_argp;
                task->tk_msg.rpc_resp = msg->rpc_resp;
-               if (msg->rpc_cred != NULL)
-                       task->tk_msg.rpc_cred = get_cred(msg->rpc_cred);
+               task->tk_msg.rpc_cred = msg->rpc_cred;
+               if (!(task->tk_flags & RPC_TASK_CRED_NOREF))
+                       get_cred(task->tk_msg.rpc_cred);
        }
  }
  
@@@ -1126,6 -1127,9 +1127,9 @@@ struct rpc_task *rpc_run_task(const str
  
        task = rpc_new_task(task_setup_data);
  
+       if (!RPC_IS_ASYNC(task))
+               task->tk_flags |= RPC_TASK_CRED_NOREF;
        rpc_task_set_client(task, task_setup_data->rpc_client);
        rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
  
@@@ -2509,7 -2513,6 +2513,7 @@@ call_decode(struct rpc_task *task
                goto out;
  
        req->rq_rcv_buf.len = req->rq_private_buf.len;
 +      trace_xprt_recvfrom(&req->rq_rcv_buf);
  
        /* Check that the softirq receive buffer is valid */
        WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
@@@ -103,21 -103,20 +103,20 @@@ static unsigned int rpcrdma_max_reply_h
  
  /**
   * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
-  * @r_xprt: transport instance to initialize
+  * @ep: endpoint to initialize
   *
   * The max_inline fields contain the maximum size of an RPC message
   * so the marshaling code doesn't have to repeat this calculation
   * for every RPC.
   */
- void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
+ void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep)
  {
-       unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs;
-       struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+       unsigned int maxsegs = ep->re_max_rdma_segs;
  
-       ep->rep_max_inline_send =
-               ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
-       ep->rep_max_inline_recv =
-               ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
+       ep->re_max_inline_send =
+               ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs);
+       ep->re_max_inline_recv =
+               ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
  }
  
  /* The client can send a request inline as long as the RPCRDMA header
@@@ -132,9 -131,10 +131,10 @@@ static bool rpcrdma_args_inline(struct 
                                struct rpc_rqst *rqst)
  {
        struct xdr_buf *xdr = &rqst->rq_snd_buf;
+       struct rpcrdma_ep *ep = r_xprt->rx_ep;
        unsigned int count, remaining, offset;
  
-       if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
+       if (xdr->len > ep->re_max_inline_send)
                return false;
  
        if (xdr->page_len) {
                        remaining -= min_t(unsigned int,
                                           PAGE_SIZE - offset, remaining);
                        offset = 0;
-                       if (++count > r_xprt->rx_ep.rep_attr.cap.max_send_sge)
+                       if (++count > ep->re_attr.cap.max_send_sge)
                                return false;
                }
        }
  static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
                                   struct rpc_rqst *rqst)
  {
-       return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
+       return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
  }
  
  /* The client is required to provide a Reply chunk if the maximum
@@@ -176,7 -176,7 +176,7 @@@ rpcrdma_nonpayload_inline(const struct 
        const struct xdr_buf *buf = &rqst->rq_rcv_buf;
  
        return (buf->head[0].iov_len + buf->tail[0].iov_len) <
-               r_xprt->rx_ep.rep_max_inline_recv;
+               r_xprt->rx_ep->re_max_inline_recv;
  }
  
  /* Split @vec on page boundaries into SGEs. FMR registers pages, not
@@@ -255,7 -255,7 +255,7 @@@ rpcrdma_convert_iovs(struct rpcrdma_xpr
        /* When encoding a Read chunk, the tail iovec contains an
         * XDR pad and may be omitted.
         */
-       if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
+       if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
                goto out;
  
        /* When encoding a Write chunk, some servers need to see an
         * layer provides space in the tail iovec that may be used
         * for this purpose.
         */
-       if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
+       if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup)
                goto out;
  
        if (xdrbuf->tail[0].iov_len)
@@@ -275,6 -275,32 +275,6 @@@ out
        return n;
  }
  
 -static inline int
 -encode_item_present(struct xdr_stream *xdr)
 -{
 -      __be32 *p;
 -
 -      p = xdr_reserve_space(xdr, sizeof(*p));
 -      if (unlikely(!p))
 -              return -EMSGSIZE;
 -
 -      *p = xdr_one;
 -      return 0;
 -}
 -
 -static inline int
 -encode_item_not_present(struct xdr_stream *xdr)
 -{
 -      __be32 *p;
 -
 -      p = xdr_reserve_space(xdr, sizeof(*p));
 -      if (unlikely(!p))
 -              return -EMSGSIZE;
 -
 -      *p = xdr_zero;
 -      return 0;
 -}
 -
  static void
  xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
  {
@@@ -388,7 -414,7 +388,7 @@@ static int rpcrdma_encode_read_list(str
        } while (nsegs);
  
  done:
 -      return encode_item_not_present(xdr);
 +      return xdr_stream_encode_item_absent(xdr);
  }
  
  /* Register and XDR encode the Write list. Supports encoding a list
@@@ -427,7 -453,7 +427,7 @@@ static int rpcrdma_encode_write_list(st
        if (nsegs < 0)
                return nsegs;
  
 -      if (encode_item_present(xdr) < 0)
 +      if (xdr_stream_encode_item_present(xdr) < 0)
                return -EMSGSIZE;
        segcount = xdr_reserve_space(xdr, sizeof(*segcount));
        if (unlikely(!segcount))
        *segcount = cpu_to_be32(nchunks);
  
  done:
 -      return encode_item_not_present(xdr);
 +      return xdr_stream_encode_item_absent(xdr);
  }
  
  /* Register and XDR encode the Reply chunk. Supports encoding an array
@@@ -481,14 -507,14 +481,14 @@@ static int rpcrdma_encode_reply_chunk(s
        __be32 *segcount;
  
        if (wtype != rpcrdma_replych)
 -              return encode_item_not_present(xdr);
 +              return xdr_stream_encode_item_absent(xdr);
  
        seg = req->rl_segments;
        nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
        if (nsegs < 0)
                return nsegs;
  
 -      if (encode_item_present(xdr) < 0)
 +      if (xdr_stream_encode_item_present(xdr) < 0)
                return -EMSGSIZE;
        segcount = xdr_reserve_space(xdr, sizeof(*segcount));
        if (unlikely(!segcount))
@@@ -1450,8 -1476,8 +1450,8 @@@ void rpcrdma_reply_handler(struct rpcrd
  
        if (credits == 0)
                credits = 1;    /* don't deadlock */
-       else if (credits > r_xprt->rx_ep.rep_max_requests)
-               credits = r_xprt->rx_ep.rep_max_requests;
+       else if (credits > r_xprt->rx_ep->re_max_requests)
+               credits = r_xprt->rx_ep->re_max_requests;
        if (buf->rb_credits != credits)
                rpcrdma_update_cwnd(r_xprt, credits);
        rpcrdma_post_recvs(r_xprt, false);
diff --combined net/sunrpc/xprtsock.c
@@@ -54,7 -54,6 +54,7 @@@
  
  #include <trace/events/sunrpc.h>
  
 +#include "socklib.h"
  #include "sunrpc.h"
  
  static void xs_close(struct rpc_xprt *xprt);
@@@ -750,6 -749,125 +750,6 @@@ xs_stream_start_connect(struct sock_xpr
  
  #define XS_SENDMSG_FLAGS      (MSG_DONTWAIT | MSG_NOSIGNAL)
  
 -static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
 -{
 -      if (seek)
 -              iov_iter_advance(&msg->msg_iter, seek);
 -      return sock_sendmsg(sock, msg);
 -}
 -
 -static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
 -{
 -      iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
 -      return xs_sendmsg(sock, msg, seek);
 -}
 -
 -static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
 -{
 -      int err;
 -
 -      err = xdr_alloc_bvec(xdr, GFP_KERNEL);
 -      if (err < 0)
 -              return err;
 -
 -      iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
 -                      xdr_buf_pagecount(xdr),
 -                      xdr->page_len + xdr->page_base);
 -      return xs_sendmsg(sock, msg, base + xdr->page_base);
 -}
 -
 -#define xs_record_marker_len() sizeof(rpc_fraghdr)
 -
 -/* Common case:
 - *  - stream transport
 - *  - sending from byte 0 of the message
 - *  - the message is wholly contained in @xdr's head iovec
 - */
 -static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
 -              rpc_fraghdr marker, struct kvec *vec, size_t base)
 -{
 -      struct kvec iov[2] = {
 -              [0] = {
 -                      .iov_base       = &marker,
 -                      .iov_len        = sizeof(marker)
 -              },
 -              [1] = *vec,
 -      };
 -      size_t len = iov[0].iov_len + iov[1].iov_len;
 -
 -      iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
 -      return xs_sendmsg(sock, msg, base);
 -}
 -
 -/**
 - * xs_sendpages - write pages directly to a socket
 - * @sock: socket to send on
 - * @addr: UDP only -- address of destination
 - * @addrlen: UDP only -- length of destination address
 - * @xdr: buffer containing this request
 - * @base: starting position in the buffer
 - * @rm: stream record marker field
 - * @sent_p: return the total number of bytes successfully queued for sending
 - *
 - */
 -static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
 -{
 -      struct msghdr msg = {
 -              .msg_name = addr,
 -              .msg_namelen = addrlen,
 -              .msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
 -      };
 -      unsigned int rmsize = rm ? sizeof(rm) : 0;
 -      unsigned int remainder = rmsize + xdr->len - base;
 -      unsigned int want;
 -      int err = 0;
 -
 -      if (unlikely(!sock))
 -              return -ENOTSOCK;
 -
 -      want = xdr->head[0].iov_len + rmsize;
 -      if (base < want) {
 -              unsigned int len = want - base;
 -              remainder -= len;
 -              if (remainder == 0)
 -                      msg.msg_flags &= ~MSG_MORE;
 -              if (rmsize)
 -                      err = xs_send_rm_and_kvec(sock, &msg, rm,
 -                                      &xdr->head[0], base);
 -              else
 -                      err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
 -              if (remainder == 0 || err != len)
 -                      goto out;
 -              *sent_p += err;
 -              base = 0;
 -      } else
 -              base -= want;
 -
 -      if (base < xdr->page_len) {
 -              unsigned int len = xdr->page_len - base;
 -              remainder -= len;
 -              if (remainder == 0)
 -                      msg.msg_flags &= ~MSG_MORE;
 -              err = xs_send_pagedata(sock, &msg, xdr, base);
 -              if (remainder == 0 || err != len)
 -                      goto out;
 -              *sent_p += err;
 -              base = 0;
 -      } else
 -              base -= xdr->page_len;
 -
 -      if (base >= xdr->tail[0].iov_len)
 -              return 0;
 -      msg.msg_flags &= ~MSG_MORE;
 -      err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
 -out:
 -      if (err > 0) {
 -              *sent_p += err;
 -              err = 0;
 -      }
 -      return err;
 -}
 -
  /**
   * xs_nospace - handle transmit was incomplete
   * @req: pointer to RPC request
@@@ -841,11 -959,8 +841,11 @@@ static int xs_local_send_request(struc
        struct xdr_buf *xdr = &req->rq_snd_buf;
        rpc_fraghdr rm = xs_stream_record_marker(xdr);
        unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
 +      struct msghdr msg = {
 +              .msg_flags      = XS_SENDMSG_FLAGS,
 +      };
 +      unsigned int uninitialized_var(sent);
        int status;
 -      int sent = 0;
  
        /* Close the stream if the previous transmission was incomplete */
        if (xs_send_request_was_aborted(transport, req)) {
                        req->rq_svec->iov_base, req->rq_svec->iov_len);
  
        req->rq_xtime = ktime_get();
 -      status = xs_sendpages(transport->sock, NULL, 0, xdr,
 -                            transport->xmit.offset, rm, &sent);
 +      status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
 +                                 transport->xmit.offset, rm, &sent);
        dprintk("RPC:       %s(%u) = %d\n",
                        __func__, xdr->len - transport->xmit.offset, status);
  
@@@ -910,12 -1025,7 +910,12 @@@ static int xs_udp_send_request(struct r
        struct rpc_xprt *xprt = req->rq_xprt;
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct xdr_buf *xdr = &req->rq_snd_buf;
 -      int sent = 0;
 +      struct msghdr msg = {
 +              .msg_name       = xs_addr(xprt),
 +              .msg_namelen    = xprt->addrlen,
 +              .msg_flags      = XS_SENDMSG_FLAGS,
 +      };
 +      unsigned int uninitialized_var(sent);
        int status;
  
        xs_pktdump("packet data:",
                return -EBADSLT;
  
        req->rq_xtime = ktime_get();
 -      status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
 -                            xdr, 0, 0, &sent);
 +      status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent);
  
        dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
                        xdr->len, status);
@@@ -995,12 -1106,9 +995,12 @@@ static int xs_tcp_send_request(struct r
        struct xdr_buf *xdr = &req->rq_snd_buf;
        rpc_fraghdr rm = xs_stream_record_marker(xdr);
        unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
 +      struct msghdr msg = {
 +              .msg_flags      = XS_SENDMSG_FLAGS,
 +      };
        bool vm_wait = false;
 +      unsigned int uninitialized_var(sent);
        int status;
 -      int sent;
  
        /* Close the stream if the previous transmission was incomplete */
        if (xs_send_request_was_aborted(transport, req)) {
         * called sendmsg(). */
        req->rq_xtime = ktime_get();
        while (1) {
 -              sent = 0;
 -              status = xs_sendpages(transport->sock, NULL, 0, xdr,
 -                                    transport->xmit.offset, rm, &sent);
 +              status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
 +                                         transport->xmit.offset, rm, &sent);
  
                dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
                                xdr->len - transport->xmit.offset, status);
@@@ -1861,7 -1970,7 +1861,7 @@@ static int xs_local_setup_socket(struc
        struct rpc_xprt *xprt = &transport->xprt;
        struct file *filp;
        struct socket *sock;
-       int status = -EIO;
+       int status;
  
        status = __sock_create(xprt->xprt_net, AF_LOCAL,
                                        SOCK_STREAM, 0, &sock, 1);
@@@ -2527,25 -2636,46 +2527,25 @@@ static void bc_free(struct rpc_task *ta
        free_page((unsigned long)buf);
  }
  
 -/*
 - * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
 - * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
 - */
  static int bc_sendto(struct rpc_rqst *req)
  {
 -      int len;
 -      struct xdr_buf *xbufp = &req->rq_snd_buf;
 +      struct xdr_buf *xdr = &req->rq_snd_buf;
        struct sock_xprt *transport =
                        container_of(req->rq_xprt, struct sock_xprt, xprt);
 -      unsigned long headoff;
 -      unsigned long tailoff;
 -      struct page *tailpage;
        struct msghdr msg = {
 -              .msg_flags      = MSG_MORE
 +              .msg_flags      = 0,
        };
        rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
 -                                       (u32)xbufp->len);
 -      struct kvec iov = {
 -              .iov_base       = &marker,
 -              .iov_len        = sizeof(marker),
 -      };
 +                                       (u32)xdr->len);
 +      unsigned int sent = 0;
 +      int err;
  
        req->rq_xtime = ktime_get();
 -
 -      len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len);
 -      if (len != iov.iov_len)
 +      err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent);
 +      xdr_free_bvec(xdr);
 +      if (err < 0 || sent != (xdr->len + sizeof(marker)))
                return -EAGAIN;
 -
 -      tailpage = NULL;
 -      if (xbufp->tail[0].iov_len)
 -              tailpage = virt_to_page(xbufp->tail[0].iov_base);
 -      tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
 -      headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
 -      len = svc_send_common(transport->sock, xbufp,
 -                            virt_to_page(xbufp->head[0].iov_base), headoff,
 -                            tailpage, tailoff);
 -      if (len != xbufp->len)
 -              return -EAGAIN;
 -      return len;
 +      return sent;
  }
  
  /*