Merge tag 'nfs-for-5.13-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 7 May 2021 18:23:41 +0000 (11:23 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 7 May 2021 18:23:41 +0000 (11:23 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 7 May 2021 18:23:41 +0000 (11:23 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 7 May 2021 18:23:41 +0000 (11:23 -0700)
diff --git a/fs/Kconfig b/fs/Kconfig

index 89a750d..141a856 100644 (file)
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -338,8 +338,8 @@ config NFS_COMMON
         default y
  
  config NFS_V4_2_SSC_HELPER
-       tristate
-       default y if NFS_V4=y || NFS_FS=y
+       bool
+       default y if NFS_V4_2
  
  source "net/sunrpc/Kconfig"
  source "fs/ceph/Kconfig"
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c

index f7786e0..ed9d580 100644 (file)
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -137,12 +137,12 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp,
                 list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
                         if (!pnfs_layout_is_valid(lo))
                                 continue;
-                       if (stateid != NULL &&
-                           !nfs4_stateid_match_other(stateid, &lo->plh_stateid))
+                       if (!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
                                 continue;
-                       if (!nfs_sb_active(server->super))
-                               continue;
-                       inode = igrab(lo->plh_inode);
+                       if (nfs_sb_active(server->super))
+                               inode = igrab(lo->plh_inode);
+                       else
+                               inode = ERR_PTR(-EAGAIN);
                         rcu_read_unlock();
                         if (inode)
                                 return inode;
@@ -176,9 +176,10 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp,
                                 continue;
                         if (nfsi->layout != lo)
                                 continue;
-                       if (!nfs_sb_active(server->super))
-                               continue;
-                       inode = igrab(lo->plh_inode);
+                       if (nfs_sb_active(server->super))
+                               inode = igrab(lo->plh_inode);
+                       else
+                               inode = ERR_PTR(-EAGAIN);
                         rcu_read_unlock();
                         if (inode)
                                 return inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c

index ff5c4d0..cfeaadf 100644 (file)
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -476,7 +476,6 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
                         to->to_maxval = to->to_initval;
                 to->to_exponential = 0;
                 break;
-#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
         case XPRT_TRANSPORT_UDP:
                 if (retrans == NFS_UNSPEC_RETRANS)
                         to->to_retries = NFS_DEF_UDP_RETRANS;
@@ -487,7 +486,6 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
                 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
                 to->to_exponential = 1;
                 break;
-#endif
         default:
                 BUG();
         }
@@ -698,9 +696,18 @@ static int nfs_init_server(struct nfs_server *server,
         /* Initialise the client representation from the mount data */
         server->flags = ctx->flags;
         server->options = ctx->options;
-       server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
-               NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
-               NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
+       server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+
+       switch (clp->rpc_ops->version) {
+       case 2:
+               server->fattr_valid = NFS_ATTR_FATTR_V2;
+               break;
+       case 3:
+               server->fattr_valid = NFS_ATTR_FATTR_V3;
+               break;
+       default:
+               server->fattr_valid = NFS_ATTR_FATTR_V4;
+       }
  
         if (ctx->rsize)
                 server->rsize = nfs_block_size(ctx->rsize, NULL);
@@ -794,6 +801,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
         server->maxfilesize = fsinfo->maxfilesize;
  
         server->time_delta = fsinfo->time_delta;
+       server->change_attr_type = fsinfo->change_attr_type;
  
         server->clone_blksize = fsinfo->clone_blksize;
         /* We're airborne Set socket buffersize */
@@ -935,6 +943,8 @@ struct nfs_server *nfs_alloc_server(void)
                 return NULL;
         }
  
+       server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+
         ida_init(&server->openowner_id);
         ida_init(&server->lockowner_id);
         pnfs_init_server(server);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c

index 04bf806..e6ec6f0 100644 (file)
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -114,7 +114,7 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
         return ret;
  }
  /**
- * nfs_have_delegation - check if inode has a delegation, mark it
+ * nfs4_have_delegation - check if inode has a delegation, mark it
   * NFS_DELEGATION_REFERENCED if there is one.
   * @inode: inode to check
   * @flags: delegation types to check for
@@ -481,6 +481,22 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
         if (freeme == NULL)
                 goto out;
  add_new:
+       /*
+        * If we didn't revalidate the change attribute before setting
+        * the delegation, then pre-emptively ask for a full attribute
+        * cache revalidation.
+        */
+       spin_lock(&inode->i_lock);
+       if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_CHANGE)
+               nfs_set_cache_invalid(inode,
+                       NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
+                       NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+                       NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
+                       NFS_INO_INVALID_OTHER | NFS_INO_INVALID_DATA |
+                       NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
+                       NFS_INO_INVALID_XATTR);
+       spin_unlock(&inode->i_lock);
+
         list_add_tail_rcu(&delegation->super_list, &server->delegations);
         rcu_assign_pointer(nfsi->delegation, delegation);
         delegation = NULL;
@@ -488,11 +504,6 @@ add_new:
         atomic_long_inc(&nfs_active_delegations);
  
         trace_nfs4_set_delegation(inode, type);
-
-       spin_lock(&inode->i_lock);
-       if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME))
-               NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED;
-       spin_unlock(&inode->i_lock);
  out:
         spin_unlock(&clp->cl_lock);
         if (delegation != NULL)
@@ -674,7 +685,7 @@ void nfs_inode_evict_delegation(struct inode *inode)
  }
  
  /**
- * nfs_inode_return_delegation - synchronously return a delegation
+ * nfs4_inode_return_delegation - synchronously return a delegation
   * @inode: inode to process
   *
   * This routine will always flush any dirty data to disk on the
@@ -697,7 +708,7 @@ int nfs4_inode_return_delegation(struct inode *inode)
  }
  
  /**
- * nfs_inode_return_delegation_on_close - asynchronously return a delegation
+ * nfs4_inode_return_delegation_on_close - asynchronously return a delegation
   * @inode: inode to process
   *
   * This routine is called on file close in order to determine if the
@@ -811,7 +822,7 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
  }
  
  /**
- * nfs_super_return_all_delegations - return delegations for one superblock
+ * nfs_server_return_all_delegations - return delegations for one superblock
   * @server: pointer to nfs_server to process
   *
   */
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h

index 9b00a0b..c19b4fd 100644 (file)
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -84,8 +84,7 @@ int nfs4_inode_make_writeable(struct inode *inode);
  
  static inline int nfs_have_delegated_attributes(struct inode *inode)
  {
-       return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) &&
-               !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED);
+       return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
  }
  
  #endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index 3d8e369..1a6d286 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -866,6 +866,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
                         break;
                 }
  
+               verf_arg = verf_res;
+
                 status = nfs_readdir_page_filler(desc, entry, pages, pglen,
                                                  arrays, narrays);
         } while (!status && nfs_readdir_page_needs_filling(page));
@@ -927,7 +929,12 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
                         }
                         return res;
                 }
-               memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
+               /*
+                * Set the cookie verifier if the page cache was empty
+                */
+               if (desc->page_index == 0)
+                       memcpy(nfsi->cookieverf, verf,
+                              sizeof(nfsi->cookieverf));
         }
         res = nfs_readdir_search_array(desc);
         if (res == 0) {
@@ -974,10 +981,10 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
  /*
   * Once we've found the start of the dirent within a page: fill 'er up...
   */
-static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
+static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
+                          const __be32 *verf)
  {
         struct file     *file = desc->file;
-       struct nfs_inode *nfsi = NFS_I(file_inode(file));
         struct nfs_cache_array *array;
         unsigned int i = 0;
  
@@ -991,7 +998,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
                         desc->eof = true;
                         break;
                 }
-               memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
+               memcpy(desc->verf, verf, sizeof(desc->verf));
                 if (i < (array->size-1))
                         desc->dir_cookie = array->array[i+1].cookie;
                 else
@@ -1048,7 +1055,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
  
         for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
                 desc->page = arrays[i];
-               nfs_do_filldir(desc);
+               nfs_do_filldir(desc, verf);
         }
         desc->page = NULL;
  
@@ -1069,6 +1076,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
  {
         struct dentry   *dentry = file_dentry(file);
         struct inode    *inode = d_inode(dentry);
+       struct nfs_inode *nfsi = NFS_I(inode);
         struct nfs_open_dir_context *dir_ctx = file->private_data;
         struct nfs_readdir_descriptor *desc;
         int res;
@@ -1122,7 +1130,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
                         break;
                 }
                 if (res == -ETOOSMALL && desc->plus) {
-                       clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
+                       clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
                         nfs_zap_caches(inode);
                         desc->page_index = 0;
                         desc->plus = false;
@@ -1132,7 +1140,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
                 if (res < 0)
                         break;
  
-               nfs_do_filldir(desc);
+               nfs_do_filldir(desc, nfsi->cookieverf);
                 nfs_readdir_page_unlock_and_put_cached(desc);
         } while (!desc->eof);
  
@@ -1703,7 +1711,7 @@ static void nfs_drop_nlink(struct inode *inode)
         NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
         nfs_set_cache_invalid(
                 inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
-                              NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED);
+                              NFS_INO_INVALID_NLINK);
         spin_unlock(&inode->i_lock);
  }
  
@@ -2940,7 +2948,7 @@ static int nfs_execute_ok(struct inode *inode, int mask)
  
         if (S_ISDIR(inode->i_mode))
                 return 0;
-       if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) {
+       if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
                 if (mask & MAY_NOT_BLOCK)
                         return -ECHILD;
                 ret = __nfs_revalidate_inode(server, inode);
@@ -2998,7 +3006,8 @@ out_notsup:
         if (mask & MAY_NOT_BLOCK)
                 return -ECHILD;
  
-       res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+       res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
+                                                 NFS_INO_INVALID_OTHER);
         if (res == 0)
                 res = generic_permission(&init_user_ns, inode, mask);
         goto out;
diff --git a/fs/nfs/export.c b/fs/nfs/export.c

index f2b34cf..37a1a88 100644 (file)
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -169,19 +169,8 @@ out:
  
  static u64 nfs_fetch_iversion(struct inode *inode)
  {
-       struct nfs_server *server = NFS_SERVER(inode);
-
-       /* Is this the right call?: */
-       nfs_revalidate_inode(server, inode);
-       /*
-        * Also, note we're ignoring any returned error.  That seems to be
-        * the practice for cache consistency information elsewhere in
-        * the server, but I'm not sure why.
-        */
-       if (server->nfs_client->rpc_ops->version >= 4)
-               return inode_peek_iversion_raw(inode);
-       else
-               return time_to_chattr(&inode->i_ctime);
+       nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
+       return inode_peek_iversion_raw(inode);
  }
  
  const struct export_operations nfs_export_ops = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c

index 16ad505..1fef107 100644 (file)
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -105,7 +105,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
  
         if (filp->f_flags & O_DIRECT)
                 goto force_reval;
-       if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE))
+       if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE))
                 goto force_reval;
         return 0;
  force_reval:
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c

index 872112b..d383de0 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -106,7 +106,7 @@ static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
         if (unlikely(!p))
                 return -ENOBUFS;
         fh->size = be32_to_cpup(p++);
-       if (fh->size > sizeof(struct nfs_fh)) {
+       if (fh->size > NFS_MAXFHSIZE) {
                 printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
                        fh->size);
                 return -EOVERFLOW;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c

index a06d213..d95c9a3 100644 (file)
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -283,20 +283,40 @@ static int nfs_verify_server_address(struct sockaddr *addr)
         return 0;
  }
  
+#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
+static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
+{
+       return true;
+}
+#else
+static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
+{
+       if (ctx->version == 4)
+               return true;
+       return false;
+}
+#endif
+
  /*
   * Sanity check the NFS transport protocol.
- *
   */
-static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
+static int nfs_validate_transport_protocol(struct fs_context *fc,
+                                          struct nfs_fs_context *ctx)
  {
         switch (ctx->nfs_server.protocol) {
         case XPRT_TRANSPORT_UDP:
+               if (nfs_server_transport_udp_invalid(ctx))
+                       goto out_invalid_transport_udp;
+               break;
         case XPRT_TRANSPORT_TCP:
         case XPRT_TRANSPORT_RDMA:
                 break;
         default:
                 ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
         }
+       return 0;
+out_invalid_transport_udp:
+       return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
  }
  
  /*
@@ -305,8 +325,6 @@ static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
   */
  static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx)
  {
-       nfs_validate_transport_protocol(ctx);
-
         if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP ||
             ctx->mount_server.protocol == XPRT_TRANSPORT_TCP)
                         return;
@@ -932,6 +950,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
         struct nfs_fh *mntfh = ctx->mntfh;
         struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
         int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
+       int ret;
  
         if (data == NULL)
                 goto out_no_data;
@@ -976,6 +995,15 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
                         memset(mntfh->data + mntfh->size, 0,
                                sizeof(mntfh->data) - mntfh->size);
  
+               /*
+                * for proto == XPRT_TRANSPORT_UDP, which is what uses
+                * to_exponential, implying shift: limit the shift value
+                * to BITS_PER_LONG (majortimeo is unsigned long)
+                */
+               if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */
+                       if (data->retrans >= 64) /* shift value is too large */
+                               goto out_invalid_data;
+
                 /*
                  * Translate to nfs_fs_context, which nfs_fill_super
                  * can deal with.
@@ -1048,6 +1076,10 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
                 goto generic;
         }
  
+       ret = nfs_validate_transport_protocol(fc, ctx);
+       if (ret)
+               return ret;
+
         ctx->skip_reconfig_option_check = true;
         return 0;
  
@@ -1076,6 +1108,9 @@ out_no_address:
  
  out_invalid_fh:
         return nfs_invalf(fc, "NFS: invalid root filehandle");
+
+out_invalid_data:
+       return nfs_invalf(fc, "NFS: invalid binary mount data");
  }
  
  #if IS_ENABLED(CONFIG_NFS_V4)
@@ -1146,6 +1181,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
  {
         struct nfs_fs_context *ctx = nfs_fc2context(fc);
         struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+       int ret;
         char *c;
  
         if (!data) {
@@ -1218,9 +1254,9 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
         ctx->acdirmin   = data->acdirmin;
         ctx->acdirmax   = data->acdirmax;
         ctx->nfs_server.protocol = data->proto;
-       nfs_validate_transport_protocol(ctx);
-       if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
-               goto out_invalid_transport_udp;
+       ret = nfs_validate_transport_protocol(fc, ctx);
+       if (ret)
+               return ret;
  done:
         ctx->skip_reconfig_option_check = true;
         return 0;
@@ -1231,9 +1267,6 @@ out_inval_auth:
  
  out_no_address:
         return nfs_invalf(fc, "NFS4: mount program didn't pass remote address");
-
-out_invalid_transport_udp:
-       return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
  }
  #endif
  
@@ -1298,6 +1331,10 @@ static int nfs_fs_context_validate(struct fs_context *fc)
         if (!nfs_verify_server_address(sap))
                 goto out_no_address;
  
+       ret = nfs_validate_transport_protocol(fc, ctx);
+       if (ret)
+               return ret;
+
         if (ctx->version == 4) {
                 if (IS_ENABLED(CONFIG_NFS_V4)) {
                         if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
@@ -1306,9 +1343,6 @@ static int nfs_fs_context_validate(struct fs_context *fc)
                                 port = NFS_PORT;
                         max_namelen = NFS4_MAXNAMLEN;
                         max_pathlen = NFS4_MAXPATHLEN;
-                       nfs_validate_transport_protocol(ctx);
-                       if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
-                               goto out_invalid_transport_udp;
                         ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL |
                                         NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK |
                                         NFS_MOUNT_LOCAL_FCNTL);
@@ -1317,10 +1351,6 @@ static int nfs_fs_context_validate(struct fs_context *fc)
                 }
         } else {
                 nfs_set_mount_transport_protocol(ctx);
-#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
-              if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
-                      goto out_invalid_transport_udp;
-#endif
                 if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
                         port = NFS_RDMA_PORT;
         }
@@ -1354,8 +1384,6 @@ out_no_device_name:
  out_v4_not_compiled:
         nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel");
         return -EPROTONOSUPPORT;
-out_invalid_transport_udp:
-       return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
  out_no_address:
         return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
  out_mountproto_mismatch:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c

index 5a8854d..529c409 100644 (file)
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -164,34 +164,19 @@ static int nfs_attribute_timeout(struct inode *inode)
         return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
  }
  
-static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags)
+static bool nfs_check_cache_flags_invalid(struct inode *inode,
+                                         unsigned long flags)
  {
         unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
  
-       /* Special case for the pagecache or access cache */
-       if (flags == NFS_INO_REVAL_PAGECACHE &&
-           !(cache_validity & NFS_INO_REVAL_FORCED))
-               return false;
         return (cache_validity & flags) != 0;
  }
  
-static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags)
-{
-       unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
-
-       if ((cache_validity & flags) != 0)
-               return true;
-       if (nfs_attribute_timeout(inode))
-               return true;
-       return false;
-}
-
  bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
  {
-       if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
-               return nfs_check_cache_invalid_delegated(inode, flags);
-
-       return nfs_check_cache_invalid_not_delegated(inode, flags);
+       if (nfs_check_cache_flags_invalid(inode, flags))
+               return true;
+       return nfs_attribute_cache_expired(inode);
  }
  EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
  
@@ -214,20 +199,21 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
  
         if (have_delegation) {
                 if (!(flags & NFS_INO_REVAL_FORCED))
-                       flags &= ~NFS_INO_INVALID_OTHER;
-               flags &= ~(NFS_INO_INVALID_CHANGE
-                               | NFS_INO_INVALID_SIZE
-                               | NFS_INO_REVAL_PAGECACHE
-                               | NFS_INO_INVALID_XATTR);
-       }
+                       flags &= ~(NFS_INO_INVALID_MODE |
+                                  NFS_INO_INVALID_OTHER |
+                                  NFS_INO_INVALID_XATTR);
+               flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
+       } else if (flags & NFS_INO_REVAL_PAGECACHE)
+               flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE;
  
         if (!nfs_has_xattr_cache(nfsi))
                 flags &= ~NFS_INO_INVALID_XATTR;
+       if (flags & NFS_INO_INVALID_DATA)
+               nfs_fscache_invalidate(inode);
         if (inode->i_mapping->nrpages == 0)
                 flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
+       flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED);
         nfsi->cache_validity |= flags;
-       if (flags & NFS_INO_INVALID_DATA)
-               nfs_fscache_invalidate(inode);
  }
  EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
  
@@ -452,6 +438,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
                 .fattr  = fattr
         };
         struct inode *inode = ERR_PTR(-ENOENT);
+       u64 fattr_supported = NFS_SB(sb)->fattr_valid;
         unsigned long hash;
  
         nfs_attr_check_mountpoint(sb, fattr);
@@ -484,8 +471,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
                 inode->i_mode = fattr->mode;
                 nfsi->cache_validity = 0;
                 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
-                               && nfs_server_capable(inode, NFS_CAP_MODE))
-                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+                               && (fattr_supported & NFS_ATTR_FATTR_MODE))
+                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
                 /* Why so? Because we want revalidate for devices/FIFOs, and
                  * that's precisely what we have in nfs_file_inode_operations.
                  */
@@ -530,15 +517,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
                 nfsi->attr_gencount = fattr->gencount;
                 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
                         inode->i_atime = fattr->atime;
-               else if (nfs_server_capable(inode, NFS_CAP_ATIME))
+               else if (fattr_supported & NFS_ATTR_FATTR_ATIME)
                         nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
                 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
                         inode->i_mtime = fattr->mtime;
-               else if (nfs_server_capable(inode, NFS_CAP_MTIME))
+               else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
                         nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
                 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
                         inode->i_ctime = fattr->ctime;
-               else if (nfs_server_capable(inode, NFS_CAP_CTIME))
+               else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
                         nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
                 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
                         inode_set_iversion_raw(inode, fattr->change_attr);
@@ -550,29 +537,31 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
                         nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE);
                 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
                         set_nlink(inode, fattr->nlink);
-               else if (nfs_server_capable(inode, NFS_CAP_NLINK))
-                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+               else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
+                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK);
                 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
                         inode->i_uid = fattr->uid;
-               else if (nfs_server_capable(inode, NFS_CAP_OWNER))
+               else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
                         nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
                 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
                         inode->i_gid = fattr->gid;
-               else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
+               else if (fattr_supported & NFS_ATTR_FATTR_GROUP)
                         nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
                 if (nfs_server_capable(inode, NFS_CAP_XATTR))
                         nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
                 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
                         inode->i_blocks = fattr->du.nfs2.blocks;
+               else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED &&
+                        fattr->size != 0)
+                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
                 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
                         /*
                          * report the blocks in 512byte units
                          */
                         inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-               }
-
-               if (nfsi->cache_validity != 0)
-                       nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+               } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED &&
+                          fattr->size != 0)
+                       nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
  
                 nfs_setsecurity(inode, fattr, label);
  
@@ -634,8 +623,7 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
         }
  
         /* Optimization: if the end result is no change, don't RPC */
-       attr->ia_valid &= NFS_VALID_ATTRS;
-       if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
+       if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
                 return 0;
  
         trace_nfs_setattr_enter(inode);
@@ -710,12 +698,20 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
         spin_lock(&inode->i_lock);
         NFS_I(inode)->attr_gencount = fattr->gencount;
         if ((attr->ia_valid & ATTR_SIZE) != 0) {
-               nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
+               nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
+                                                    NFS_INO_INVALID_BLOCKS);
                 nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
                 nfs_vmtruncate(inode, attr->ia_size);
         }
         if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
                 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME;
+               if ((attr->ia_valid & ATTR_KILL_SUID) != 0 &&
+                   inode->i_mode & S_ISUID)
+                       inode->i_mode &= ~S_ISUID;
+               if ((attr->ia_valid & ATTR_KILL_SGID) != 0 &&
+                   (inode->i_mode & (S_ISGID | S_IXGRP)) ==
+                    (S_ISGID | S_IXGRP))
+                       inode->i_mode &= ~S_ISGID;
                 if ((attr->ia_valid & ATTR_MODE) != 0) {
                         int mode = attr->ia_mode & S_IALLUGO;
                         mode |= inode->i_mode & ~S_IALLUGO;
@@ -793,14 +789,28 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
         dput(parent);
  }
  
-static bool nfs_need_revalidate_inode(struct inode *inode)
+static u32 nfs_get_valid_attrmask(struct inode *inode)
  {
-       if (NFS_I(inode)->cache_validity &
-                       (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
-               return true;
-       if (nfs_attribute_cache_expired(inode))
-               return true;
-       return false;
+       unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+       u32 reply_mask = STATX_INO | STATX_TYPE;
+
+       if (!(cache_validity & NFS_INO_INVALID_ATIME))
+               reply_mask |= STATX_ATIME;
+       if (!(cache_validity & NFS_INO_INVALID_CTIME))
+               reply_mask |= STATX_CTIME;
+       if (!(cache_validity & NFS_INO_INVALID_MTIME))
+               reply_mask |= STATX_MTIME;
+       if (!(cache_validity & NFS_INO_INVALID_SIZE))
+               reply_mask |= STATX_SIZE;
+       if (!(cache_validity & NFS_INO_INVALID_NLINK))
+               reply_mask |= STATX_NLINK;
+       if (!(cache_validity & NFS_INO_INVALID_MODE))
+               reply_mask |= STATX_MODE;
+       if (!(cache_validity & NFS_INO_INVALID_OTHER))
+               reply_mask |= STATX_UID | STATX_GID;
+       if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
+               reply_mask |= STATX_BLOCKS;
+       return reply_mask;
  }
  
  int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
@@ -815,9 +825,13 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
  
         trace_nfs_getattr_enter(inode);
  
+       request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
+                       STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
+                       STATX_INO | STATX_SIZE | STATX_BLOCKS;
+
         if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
                 nfs_readdirplus_parent_cache_hit(path->dentry);
-               goto out_no_update;
+               goto out_no_revalidate;
         }
  
         /* Flush out writes to the server in order to update c/mtime.  */
@@ -850,14 +864,24 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
         /* Check whether the cached attributes are stale */
         do_update |= force_sync || nfs_attribute_cache_expired(inode);
         cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
-       do_update |= cache_validity &
-               (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL);
+       do_update |= cache_validity & NFS_INO_INVALID_CHANGE;
         if (request_mask & STATX_ATIME)
                 do_update |= cache_validity & NFS_INO_INVALID_ATIME;
-       if (request_mask & (STATX_CTIME|STATX_MTIME))
-               do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE;
+       if (request_mask & STATX_CTIME)
+               do_update |= cache_validity & NFS_INO_INVALID_CTIME;
+       if (request_mask & STATX_MTIME)
+               do_update |= cache_validity & NFS_INO_INVALID_MTIME;
+       if (request_mask & STATX_SIZE)
+               do_update |= cache_validity & NFS_INO_INVALID_SIZE;
+       if (request_mask & STATX_NLINK)
+               do_update |= cache_validity & NFS_INO_INVALID_NLINK;
+       if (request_mask & STATX_MODE)
+               do_update |= cache_validity & NFS_INO_INVALID_MODE;
+       if (request_mask & (STATX_UID | STATX_GID))
+               do_update |= cache_validity & NFS_INO_INVALID_OTHER;
         if (request_mask & STATX_BLOCKS)
                 do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
+
         if (do_update) {
                 /* Update the attribute cache */
                 if (!(server->flags & NFS_MOUNT_NOAC))
@@ -871,8 +895,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
                 nfs_readdirplus_parent_cache_hit(path->dentry);
  out_no_revalidate:
         /* Only return attributes that were revalidated. */
-       stat->result_mask &= request_mask;
-out_no_update:
+       stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
+
         generic_fillattr(&init_user_ns, inode, stat);
         stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
         if (S_ISDIR(inode->i_mode))
@@ -963,7 +987,6 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
  {
         struct nfs_inode *nfsi;
         struct inode *inode;
-       struct nfs_server *server;
  
         if (!(ctx->mode & FMODE_WRITE))
                 return;
@@ -979,10 +1002,10 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
                 return;
         if (!list_empty(&nfsi->open_files))
                 return;
-       server = NFS_SERVER(inode);
-       if (server->flags & NFS_MOUNT_NOCTO)
+       if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)
                 return;
-       nfs_revalidate_inode(server, inode);
+       nfs_revalidate_inode(inode,
+                            NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
  }
  EXPORT_SYMBOL_GPL(nfs_close_context);
  
@@ -1237,16 +1260,16 @@ int nfs_attribute_cache_expired(struct inode *inode)
  
  /**
   * nfs_revalidate_inode - Revalidate the inode attributes
- * @server: pointer to nfs_server struct
   * @inode: pointer to inode struct
+ * @flags: cache flags to check
   *
   * Updates inode attribute information by retrieving the data from the server.
   */
-int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+int nfs_revalidate_inode(struct inode *inode, unsigned long flags)
  {
-       if (!nfs_need_revalidate_inode(inode))
+       if (!nfs_check_cache_invalid(inode, flags))
                 return NFS_STALE(inode) ? -ESTALE : 0;
-       return __nfs_revalidate_inode(server, inode);
+       return __nfs_revalidate_inode(NFS_SERVER(inode), inode);
  }
  EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
  
@@ -1332,7 +1355,7 @@ out:
  
  bool nfs_mapping_need_revalidate_inode(struct inode *inode)
  {
-       return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
+       return nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE) ||
                 NFS_STALE(inode);
  }
  
@@ -1468,8 +1491,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
         if (!nfs_file_has_buffered_writers(nfsi)) {
                 /* Verify a few of the more important attributes */
                 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
-                       invalid |= NFS_INO_INVALID_CHANGE
-                               | NFS_INO_REVAL_PAGECACHE;
+                       invalid |= NFS_INO_INVALID_CHANGE;
  
                 ts = inode->i_mtime;
                 if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
@@ -1483,28 +1505,21 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
                         cur_size = i_size_read(inode);
                         new_isize = nfs_size_to_loff_t(fattr->size);
                         if (cur_size != new_isize)
-                               invalid |= NFS_INO_INVALID_SIZE
-                                       | NFS_INO_REVAL_PAGECACHE;
+                               invalid |= NFS_INO_INVALID_SIZE;
                 }
         }
  
         /* Have any file permissions changed? */
         if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
-               invalid |= NFS_INO_INVALID_ACCESS
-                       | NFS_INO_INVALID_ACL
-                       | NFS_INO_INVALID_OTHER;
+               invalid |= NFS_INO_INVALID_MODE;
         if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
-               invalid |= NFS_INO_INVALID_ACCESS
-                       | NFS_INO_INVALID_ACL
-                       | NFS_INO_INVALID_OTHER;
+               invalid |= NFS_INO_INVALID_OTHER;
         if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
-               invalid |= NFS_INO_INVALID_ACCESS
-                       | NFS_INO_INVALID_ACL
-                       | NFS_INO_INVALID_OTHER;
+               invalid |= NFS_INO_INVALID_OTHER;
  
         /* Has the link count changed? */
         if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
-               invalid |= NFS_INO_INVALID_OTHER;
+               invalid |= NFS_INO_INVALID_NLINK;
  
         ts = inode->i_atime;
         if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
@@ -1642,41 +1657,142 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
  #endif
  
  /**
- * nfs_inode_attrs_need_update - check if the inode attributes need updating
+ * nfs_inode_attrs_cmp_generic - compare attributes
+ * @fattr: attributes
   * @inode: pointer to inode
+ *
+ * Attempt to divine whether or not an RPC call reply carrying stale
+ * attributes got scheduled after another call carrying updated ones.
+ * Note also the check for wraparound of 'attr_gencount'
+ *
+ * The function returns '1' if it thinks the attributes in @fattr are
+ * more recent than the ones cached in @inode. Otherwise it returns
+ * the value '0'.
+ */
+static int nfs_inode_attrs_cmp_generic(const struct nfs_fattr *fattr,
+                                      const struct inode *inode)
+{
+       unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
+
+       return (long)(fattr->gencount - attr_gencount) > 0 ||
+              (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0;
+}
+
+/**
+ * nfs_inode_attrs_cmp_monotonic - compare attributes
   * @fattr: attributes
+ * @inode: pointer to inode
   *
   * Attempt to divine whether or not an RPC call reply carrying stale
   * attributes got scheduled after another call carrying updated ones.
   *
- * To do so, the function first assumes that a more recent ctime means
- * that the attributes in fattr are newer, however it also attempt to
- * catch the case where ctime either didn't change, or went backwards
- * (if someone reset the clock on the server) by looking at whether
- * or not this RPC call was started after the inode was last updated.
- * Note also the check for wraparound of 'attr_gencount'
+ * We assume that the server observes monotonic semantics for
+ * the change attribute, so a larger value means that the attributes in
+ * @fattr are more recent, in which case the function returns the
+ * value '1'.
+ * A return value of '0' indicates no measurable change
+ * A return value of '-1' means that the attributes in @inode are
+ * more recent.
+ */
+static int nfs_inode_attrs_cmp_monotonic(const struct nfs_fattr *fattr,
+                                        const struct inode *inode)
+{
+       s64 diff = fattr->change_attr - inode_peek_iversion_raw(inode);
+       if (diff > 0)
+               return 1;
+       return diff == 0 ? 0 : -1;
+}
+
+/**
+ * nfs_inode_attrs_cmp_strict_monotonic - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
   *
- * The function returns 'true' if it thinks the attributes in 'fattr' are
- * more recent than the ones cached in the inode.
+ * Attempt to divine whether or not an RPC call reply carrying stale
+ * attributes got scheduled after another call carrying updated ones.
   *
+ * We assume that the server observes strictly monotonic semantics for
+ * the change attribute, so a larger value means that the attributes in
+ * @fattr are more recent, in which case the function returns the
+ * value '1'.
+ * A return value of '-1' means that the attributes in @inode are
+ * more recent or unchanged.
   */
-static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
+static int nfs_inode_attrs_cmp_strict_monotonic(const struct nfs_fattr *fattr,
+                                               const struct inode *inode)
  {
-       const struct nfs_inode *nfsi = NFS_I(inode);
+       return  nfs_inode_attrs_cmp_monotonic(fattr, inode) > 0 ? 1 : -1;
+}
  
-       return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
-               ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
+/**
+ * nfs_inode_attrs_cmp - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * This function returns '1' if it thinks the attributes in @fattr are
+ * more recent than the ones cached in @inode. It returns '-1' if
+ * the attributes in @inode are more recent than the ones in @fattr,
+ * and it returns 0 if not sure.
+ */
+static int nfs_inode_attrs_cmp(const struct nfs_fattr *fattr,
+                              const struct inode *inode)
+{
+       if (nfs_inode_attrs_cmp_generic(fattr, inode) > 0)
+               return 1;
+       switch (NFS_SERVER(inode)->change_attr_type) {
+       case NFS4_CHANGE_TYPE_IS_UNDEFINED:
+               break;
+       case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+               if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
+                       break;
+               return nfs_inode_attrs_cmp_monotonic(fattr, inode);
+       default:
+               if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
+                       break;
+               return nfs_inode_attrs_cmp_strict_monotonic(fattr, inode);
+       }
+       return 0;
+}
+
+/**
+ * nfs_inode_finish_partial_attr_update - complete a previous inode update
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * Returns '1' if the last attribute update left the inode cached
+ * attributes in a partially unrevalidated state, and @fattr
+ * matches the change attribute of that partial update.
+ * Otherwise returns '0'.
+ */
+static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
+                                               const struct inode *inode)
+{
+       const unsigned long check_valid =
+               NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
+               NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+               NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER |
+               NFS_INO_INVALID_NLINK;
+       unsigned long cache_validity = NFS_I(inode)->cache_validity;
+
+       if (!(cache_validity & NFS_INO_INVALID_CHANGE) &&
+           (cache_validity & check_valid) != 0 &&
+           (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
+           nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0)
+               return 1;
+       return 0;
  }
  
-static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
+static int nfs_refresh_inode_locked(struct inode *inode,
+                                   struct nfs_fattr *fattr)
  {
-       int ret;
+       int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
+       int ret = 0;
  
         trace_nfs_refresh_inode_enter(inode);
  
-       if (nfs_inode_attrs_need_update(inode, fattr))
+       if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode))
                 ret = nfs_update_inode(inode, fattr);
-       else
+       else if (attr_cmp == 0)
                 ret = nfs_check_inode_attributes(inode, fattr);
  
         trace_nfs_refresh_inode_exit(inode, ret);
@@ -1761,11 +1877,13 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
   */
  int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
  {
+       int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
         int status;
  
         /* Don't do a WCC update if these attributes are already stale */
-       if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
-                       !nfs_inode_attrs_need_update(inode, fattr)) {
+       if (attr_cmp < 0)
+               return 0;
+       if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) {
                 fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
                                 | NFS_ATTR_FATTR_PRESIZE
                                 | NFS_ATTR_FATTR_PREMTIME
@@ -1839,9 +1957,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc);
   */
  static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  {
-       struct nfs_server *server;
+       struct nfs_server *server = NFS_SERVER(inode);
         struct nfs_inode *nfsi = NFS_I(inode);
         loff_t cur_isize, new_isize;
+       u64 fattr_supported = server->fattr_valid;
         unsigned long invalid = 0;
         unsigned long now = jiffies;
         unsigned long save_cache_validity;
@@ -1885,7 +2004,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                 goto out_err;
         }
  
-       server = NFS_SERVER(inode);
         /* Update the fsid? */
         if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
                         !nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
@@ -1904,14 +2022,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
         nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
                         | NFS_INO_INVALID_ATIME
                         | NFS_INO_REVAL_FORCED
-                       | NFS_INO_REVAL_PAGECACHE
                         | NFS_INO_INVALID_BLOCKS);
  
         /* Do atomic weak cache consistency updates */
         nfs_wcc_update_inode(inode, fattr);
  
         if (pnfs_layoutcommit_outstanding(inode)) {
-               nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR;
+               nfsi->cache_validity |=
+                       save_cache_validity &
+                       (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
+                        NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+                        NFS_INO_INVALID_BLOCKS);
                 cache_revalidated = false;
         }
  
@@ -1928,6 +2049,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                                 save_cache_validity |= NFS_INO_INVALID_CTIME
                                         | NFS_INO_INVALID_MTIME
                                         | NFS_INO_INVALID_SIZE
+                                       | NFS_INO_INVALID_BLOCKS
+                                       | NFS_INO_INVALID_NLINK
+                                       | NFS_INO_INVALID_MODE
                                         | NFS_INO_INVALID_OTHER;
                                 if (S_ISDIR(inode->i_mode))
                                         nfs_force_lookup_revalidate(inode);
@@ -1940,28 +2064,24 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                         attr_changed = true;
                 }
         } else {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_CHANGE
-                               | NFS_INO_REVAL_PAGECACHE
-                               | NFS_INO_REVAL_FORCED);
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_CHANGE;
                 cache_revalidated = false;
         }
  
         if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
                 inode->i_mtime = fattr->mtime;
-       } else if (server->caps & NFS_CAP_MTIME) {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_MTIME
-                               | NFS_INO_REVAL_FORCED);
+       } else if (fattr_supported & NFS_ATTR_FATTR_MTIME) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_MTIME;
                 cache_revalidated = false;
         }
  
         if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
                 inode->i_ctime = fattr->ctime;
-       } else if (server->caps & NFS_CAP_CTIME) {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_CTIME
-                               | NFS_INO_REVAL_FORCED);
+       } else if (fattr_supported & NFS_ATTR_FATTR_CTIME) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_CTIME;
                 cache_revalidated = false;
         }
  
@@ -1985,21 +2105,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                                         (long long)cur_isize,
                                         (long long)new_isize);
                 }
+               if (new_isize == 0 &&
+                   !(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED |
+                                     NFS_ATTR_FATTR_BLOCKS_USED))) {
+                       fattr->du.nfs3.used = 0;
+                       fattr->valid |= NFS_ATTR_FATTR_SPACE_USED;
+               }
         } else {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_SIZE
-                               | NFS_INO_REVAL_PAGECACHE
-                               | NFS_INO_REVAL_FORCED);
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_SIZE;
                 cache_revalidated = false;
         }
  
-
         if (fattr->valid & NFS_ATTR_FATTR_ATIME)
                 inode->i_atime = fattr->atime;
-       else if (server->caps & NFS_CAP_ATIME) {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_ATIME
-                               | NFS_INO_REVAL_FORCED);
+       else if (fattr_supported & NFS_ATTR_FATTR_ATIME) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_ATIME;
                 cache_revalidated = false;
         }
  
@@ -2012,10 +2134,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                                 | NFS_INO_INVALID_ACL;
                         attr_changed = true;
                 }
-       } else if (server->caps & NFS_CAP_MODE) {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_OTHER
-                               | NFS_INO_REVAL_FORCED);
+       } else if (fattr_supported & NFS_ATTR_FATTR_MODE) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_MODE;
                 cache_revalidated = false;
         }
  
@@ -2026,10 +2147,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                         inode->i_uid = fattr->uid;
                         attr_changed = true;
                 }
-       } else if (server->caps & NFS_CAP_OWNER) {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_OTHER
-                               | NFS_INO_REVAL_FORCED);
+       } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_OTHER;
                 cache_revalidated = false;
         }
  
@@ -2040,10 +2160,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                         inode->i_gid = fattr->gid;
                         attr_changed = true;
                 }
-       } else if (server->caps & NFS_CAP_OWNER_GROUP) {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_OTHER
-                               | NFS_INO_REVAL_FORCED);
+       } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_OTHER;
                 cache_revalidated = false;
         }
  
@@ -2054,10 +2173,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                         set_nlink(inode, fattr->nlink);
                         attr_changed = true;
                 }
-       } else if (server->caps & NFS_CAP_NLINK) {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_OTHER
-                               | NFS_INO_REVAL_FORCED);
+       } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_NLINK;
                 cache_revalidated = false;
         }
  
@@ -2066,18 +2184,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                  * report the blocks in 512byte units
                  */
                 inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-       } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+       } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_BLOCKS;
+               cache_revalidated = false;
+       }
+
+       if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) {
                 inode->i_blocks = fattr->du.nfs2.blocks;
-       else {
-               nfsi->cache_validity |= save_cache_validity &
-                               (NFS_INO_INVALID_BLOCKS
-                               | NFS_INO_REVAL_FORCED);
+       } else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) {
+               nfsi->cache_validity |=
+                       save_cache_validity & NFS_INO_INVALID_BLOCKS;
                 cache_revalidated = false;
         }
  
         /* Update attrtimeo value if we're out of the unstable period */
         if (attr_changed) {
-               invalid &= ~NFS_INO_INVALID_ATTR;
                 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
                 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
                 nfsi->attrtimeo_timestamp = now;
@@ -2094,7 +2216,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                         nfsi->attrtimeo_timestamp = now;
                 }
                 /* Set the barrier to be more recent than this fattr */
-               if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+               if ((long)(fattr->gencount - nfsi->attr_gencount) > 0)
                         nfsi->attr_gencount = fattr->gencount;
         }
  
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h

index 7395d09..a36af04 100644 (file)
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -181,7 +181,7 @@ struct nfs_mount_request {
         struct net              *net;
  };
  
-extern int nfs_mount(struct nfs_mount_request *info);
+extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans);
  extern void nfs_umount(const struct nfs_mount_request *info);
  
  /* client.c */
diff --git a/fs/nfs/io.c b/fs/nfs/io.c

index 5088fda..b5551ed 100644 (file)
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -104,7 +104,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
  }
  
  /**
- * nfs_end_io_direct - declare the file is being used for direct i/o
+ * nfs_start_io_direct - declare the file is being used for direct i/o
   * @inode: file inode
   *
   * Declare that a direct I/O operation is about to start, and ensure
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c

index dda5c3e..c5e3b6b 100644 (file)
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -136,14 +136,16 @@ struct mnt_fhstatus {
  /**
   * nfs_mount - Obtain an NFS file handle for the given host and path
   * @info: pointer to mount request arguments
+ * @timeo: deciseconds the mount waits for a response before it retries
+ * @retrans: number of times the mount retries a request
   *
- * Uses default timeout parameters specified by underlying transport. On
- * successful return, the auth_flavs list and auth_flav_len will be populated
- * with the list from the server or a faked-up list if the server didn't
- * provide one.
+ * Uses timeout parameters specified by caller. On successful return, the
+ * auth_flavs list and auth_flav_len will be populated with the list from the
+ * server or a faked-up list if the server didn't provide one.
   */
-int nfs_mount(struct nfs_mount_request *info)
+int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans)
  {
+       struct rpc_timeout mnt_timeout;
         struct mountres result = {
                 .fh             = info->fh,
                 .auth_count     = info->auth_flav_len,
@@ -158,6 +160,7 @@ int nfs_mount(struct nfs_mount_request *info)
                 .protocol       = info->protocol,
                 .address        = info->sap,
                 .addrsize       = info->salen,
+               .timeout        = &mnt_timeout,
                 .servername     = info->hostname,
                 .program        = &mnt_program,
                 .version        = info->version,
@@ -177,6 +180,7 @@ int nfs_mount(struct nfs_mount_request *info)
         if (info->noresvport)
                 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
  
+       nfs_init_timeout_values(&mnt_timeout, info->protocol, timeo, retrans);
         mnt_clnt = rpc_create(&args);
         if (IS_ERR(mnt_clnt))
                 goto out_clnt_err;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c

index bb386a6..9ec560a 100644 (file)
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -65,7 +65,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
         if (!nfs_server_capable(inode, NFS_CAP_ACLS))
                 return ERR_PTR(-EOPNOTSUPP);
  
-       status = nfs_revalidate_inode(server, inode);
+       status = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
         if (status < 0)
                 return ERR_PTR(status);
  
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c

index ed1c837..e6eca1d 100644 (file)
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -433,7 +433,7 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
         if (unlikely(!p))
                 return -EIO;
         length = be32_to_cpup(p++);
-       if (unlikely(length > NFS3_FHSIZE))
+       if (unlikely(length > NFS3_FHSIZE || length == 0))
                 goto out_toobig;
         p = xdr_inline_decode(xdr, length);
         if (unlikely(!p))
@@ -442,7 +442,7 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
         memcpy(fh->data, p, length);
         return 0;
  out_toobig:
-       dprintk("NFS: file handle size (%u) too big\n", length);
+       trace_nfs_xdr_bad_filehandle(xdr, NFSERR_BADHANDLE);
         return -E2BIG;
  }
  
@@ -2227,6 +2227,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
  
         /* ignore properties */
         result->lease_time = 0;
+       result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
         return 0;
  }
  
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c

index 094024b..a243495 100644 (file)
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -46,11 +46,12 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
  {
         struct inode *inode = file_inode(filep);
         struct nfs_server *server = NFS_SERVER(inode);
+       u32 bitmask[3];
         struct nfs42_falloc_args args = {
                 .falloc_fh      = NFS_FH(inode),
                 .falloc_offset  = offset,
                 .falloc_length  = len,
-               .falloc_bitmask = nfs4_fattr_bitmap,
+               .falloc_bitmask = bitmask,
         };
         struct nfs42_falloc_res res = {
                 .falloc_server  = server,
@@ -68,6 +69,10 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
                 return status;
         }
  
+       memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
+       if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
+               bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
         res.falloc_fattr = nfs_alloc_fattr();
         if (!res.falloc_fattr)
                 return -ENOMEM;
@@ -75,7 +80,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
         status = nfs4_call_sync(server->client, server, msg,
                                 &args.seq_args, &res.seq_res, 0);
         if (status == 0)
-               status = nfs_post_op_update_inode(inode, res.falloc_fattr);
+               status = nfs_post_op_update_inode_force_wcc(inode,
+                                                           res.falloc_fattr);
  
         kfree(res.falloc_fattr);
         return status;
@@ -84,7 +90,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
  static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
                                 loff_t offset, loff_t len)
  {
-       struct nfs_server *server = NFS_SERVER(file_inode(filep));
+       struct inode *inode = file_inode(filep);
+       struct nfs_server *server = NFS_SERVER(inode);
         struct nfs4_exception exception = { };
         struct nfs_lock_context *lock;
         int err;
@@ -93,9 +100,13 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
         if (IS_ERR(lock))
                 return PTR_ERR(lock);
  
-       exception.inode = file_inode(filep);
+       exception.inode = inode;
         exception.state = lock->open_context->state;
  
+       err = nfs_sync_inode(inode);
+       if (err)
+               goto out;
+
         do {
                 err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
                 if (err == -ENOTSUPP) {
@@ -104,7 +115,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
                 }
                 err = nfs4_handle_exception(server, err, &exception);
         } while (exception.retry);
-
+out:
         nfs_put_lock_context(lock);
         return err;
  }
@@ -142,16 +153,13 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
                 return -EOPNOTSUPP;
  
         inode_lock(inode);
-       err = nfs_sync_inode(inode);
-       if (err)
-               goto out_unlock;
  
         err = nfs42_proc_fallocate(&msg, filep, offset, len);
         if (err == 0)
                 truncate_pagecache_range(inode, offset, (offset + len) -1);
         if (err == -EOPNOTSUPP)
                 NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
-out_unlock:
+
         inode_unlock(inode);
         return err;
  }
@@ -261,6 +269,33 @@ out:
         return status;
  }
  
+/**
+ * nfs42_copy_dest_done - perform inode cache updates after clone/copy offload
+ * @inode: pointer to destination inode
+ * @pos: destination offset
+ * @len: copy length
+ *
+ * Punch a hole in the inode page cache, so that the NFS client will
+ * know to retrieve new data.
+ * Update the file size if necessary, and then mark the inode as having
+ * invalid cached values for change attribute, ctime, mtime and space used.
+ */
+static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
+{
+       loff_t newsize = pos + len;
+       loff_t end = newsize - 1;
+
+       truncate_pagecache_range(inode, pos, end);
+       spin_lock(&inode->i_lock);
+       if (newsize > i_size_read(inode))
+               i_size_write(inode, newsize);
+       nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+                                            NFS_INO_INVALID_CTIME |
+                                            NFS_INO_INVALID_MTIME |
+                                            NFS_INO_INVALID_BLOCKS);
+       spin_unlock(&inode->i_lock);
+}
+
  static ssize_t _nfs42_proc_copy(struct file *src,
                                 struct nfs_lock_context *src_lock,
                                 struct file *dst,
@@ -354,19 +389,8 @@ static ssize_t _nfs42_proc_copy(struct file *src,
                         goto out;
         }
  
-       truncate_pagecache_range(dst_inode, pos_dst,
-                                pos_dst + res->write_res.count);
-       spin_lock(&dst_inode->i_lock);
-       nfs_set_cache_invalid(
-               dst_inode, NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED |
-                                  NFS_INO_INVALID_SIZE | NFS_INO_INVALID_ATTR |
-                                  NFS_INO_INVALID_DATA);
-       spin_unlock(&dst_inode->i_lock);
-       spin_lock(&src_inode->i_lock);
-       nfs_set_cache_invalid(src_inode, NFS_INO_REVAL_PAGECACHE |
-                                                NFS_INO_REVAL_FORCED |
-                                                NFS_INO_INVALID_ATIME);
-       spin_unlock(&src_inode->i_lock);
+       nfs42_copy_dest_done(dst_inode, pos_dst, res->write_res.count);
+       nfs_invalidate_atime(src_inode);
         status = res->write_res.count;
  out:
         if (args->sync)
@@ -659,7 +683,10 @@ static loff_t _nfs42_proc_llseek(struct file *filep,
         if (status)
                 return status;
  
-       return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+       if (whence == SEEK_DATA && res.sr_eof)
+               return -NFS4ERR_NXIO;
+       else
+               return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
  }
  
  loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
@@ -1044,8 +1071,10 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
  
         status = nfs4_call_sync(server->client, server, msg,
                                 &args.seq_args, &res.seq_res, 0);
-       if (status == 0)
+       if (status == 0) {
+               nfs42_copy_dest_done(dst_inode, dst_offset, count);
                 status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
+       }
  
         kfree(res.dst_fattr);
         return status;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c

index 6c2ce79..1c4d2a0 100644 (file)
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -168,7 +168,7 @@ nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
   *        make it easier to copy the value after an RPC, even if
   *        the value will not be passed up to application (e.g.
   *        for a 'query' getxattr with NULL buffer).
- * @len:   Length of the value. Can be 0 for zero-length attribues.
+ * @len:   Length of the value. Can be 0 for zero-length attributes.
   *         @value and @pages will be NULL if @len is 0.
   */
  static struct nfs4_xattr_entry *
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c

index 441a2fa..57b3821 100644 (file)
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -420,9 +420,7 @@ static const struct nfs4_ssc_client_ops nfs4_ssc_clnt_ops_tbl = {
   */
  void nfs42_ssc_register_ops(void)
  {
-#ifdef CONFIG_NFSD_V4
         nfs42_ssc_register(&nfs4_ssc_clnt_ops_tbl);
-#endif
  }
  
  /**
@@ -433,9 +431,7 @@ void nfs42_ssc_register_ops(void)
   */
  void nfs42_ssc_unregister_ops(void)
  {
-#ifdef CONFIG_NFSD_V4
         nfs42_ssc_unregister(&nfs4_ssc_clnt_ops_tbl);
-#endif
  }
  #endif /* CONFIG_NFS_V4_2 */
  
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index 545010d..87d04f2 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -108,9 +108,10 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
  static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
                 const struct cred *, bool);
  #endif
-static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
-               struct nfs_server *server,
-               struct nfs4_label *label);
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
+                            const __u32 *src, struct inode *inode,
+                            struct nfs_server *server,
+                            struct nfs4_label *label);
  
  #ifdef CONFIG_NFS_V4_SECURITY_LABEL
  static inline struct nfs4_label *
@@ -263,6 +264,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
                         | FATTR4_WORD1_FS_LAYOUT_TYPES,
                         FATTR4_WORD2_LAYOUT_BLKSIZE
                         | FATTR4_WORD2_CLONE_BLKSIZE
+                       | FATTR4_WORD2_CHANGE_ATTR_TYPE
                         | FATTR4_WORD2_XATTR_SUPPORT
  };
  
@@ -283,7 +285,7 @@ const u32 nfs4_fs_locations_bitmap[3] = {
  };
  
  static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
-               struct inode *inode)
+                                   struct inode *inode, unsigned long flags)
  {
         unsigned long cache_validity;
  
@@ -291,22 +293,20 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
         if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
                 return;
  
-       cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
-       if (!(cache_validity & NFS_INO_REVAL_FORCED))
-               cache_validity &= ~(NFS_INO_INVALID_CHANGE
-                               | NFS_INO_INVALID_SIZE);
+       cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
  
+       /* Remove the attributes over which we have full control */
+       dst[1] &= ~FATTR4_WORD1_RAWDEV;
         if (!(cache_validity & NFS_INO_INVALID_SIZE))
                 dst[0] &= ~FATTR4_WORD0_SIZE;
  
         if (!(cache_validity & NFS_INO_INVALID_CHANGE))
                 dst[0] &= ~FATTR4_WORD0_CHANGE;
-}
  
-static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst,
-               const __u32 *src, struct inode *inode)
-{
-       nfs4_bitmap_copy_adjust(dst, src, inode);
+       if (!(cache_validity & NFS_INO_INVALID_MODE))
+               dst[1] &= ~FATTR4_WORD1_MODE;
+       if (!(cache_validity & NFS_INO_INVALID_OTHER))
+               dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
  }
  
  static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
@@ -1169,14 +1169,26 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
  static void
  nfs4_inc_nlink_locked(struct inode *inode)
  {
-       nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+       nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+                                            NFS_INO_INVALID_CTIME |
+                                            NFS_INO_INVALID_NLINK);
         inc_nlink(inode);
  }
  
+static void
+nfs4_inc_nlink(struct inode *inode)
+{
+       spin_lock(&inode->i_lock);
+       nfs4_inc_nlink_locked(inode);
+       spin_unlock(&inode->i_lock);
+}
+
  static void
  nfs4_dec_nlink_locked(struct inode *inode)
  {
-       nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+       nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+                                            NFS_INO_INVALID_CTIME |
+                                            NFS_INO_INVALID_NLINK);
         drop_nlink(inode);
  }
  
@@ -1186,11 +1198,23 @@ nfs4_update_changeattr_locked(struct inode *inode,
                 unsigned long timestamp, unsigned long cache_validity)
  {
         struct nfs_inode *nfsi = NFS_I(inode);
+       u64 change_attr = inode_peek_iversion_raw(inode);
  
         cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
  
-       if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) {
-               nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+       switch (NFS_SERVER(inode)->change_attr_type) {
+       case NFS4_CHANGE_TYPE_IS_UNDEFINED:
+               break;
+       case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+               if ((s64)(change_attr - cinfo->after) > 0)
+                       goto out;
+               break;
+       default:
+               if ((s64)(change_attr - cinfo->after) >= 0)
+                       goto out;
+       }
+
+       if (cinfo->atomic && cinfo->before == change_attr) {
                 nfsi->attrtimeo_timestamp = jiffies;
         } else {
                 if (S_ISDIR(inode->i_mode)) {
@@ -1202,7 +1226,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
                                 cache_validity |= NFS_INO_REVAL_PAGECACHE;
                 }
  
-               if (cinfo->before != inode_peek_iversion_raw(inode))
+               if (cinfo->before != change_attr)
                         cache_validity |= NFS_INO_INVALID_ACCESS |
                                           NFS_INO_INVALID_ACL |
                                           NFS_INO_INVALID_XATTR;
@@ -1210,8 +1234,9 @@ nfs4_update_changeattr_locked(struct inode *inode,
         inode_set_iversion_raw(inode, cinfo->after);
         nfsi->read_cache_jiffies = timestamp;
         nfsi->attr_gencount = nfs_inc_attr_generation_counter();
-       nfs_set_cache_invalid(inode, cache_validity);
         nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
+out:
+       nfs_set_cache_invalid(inode, cache_validity);
  }
  
  void
@@ -3344,12 +3369,17 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
                 .inode = inode,
                 .stateid = &arg.stateid,
         };
+       unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
         int err;
  
+       if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
+               adjust_flags |= NFS_INO_INVALID_MODE;
+       if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
+               adjust_flags |= NFS_INO_INVALID_OTHER;
+
         do {
-               nfs4_bitmap_copy_adjust_setattr(bitmask,
-                               nfs4_bitmask(server, olabel),
-                               inode);
+               nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, olabel),
+                                       inode, adjust_flags);
  
                 err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx);
                 switch (err) {
@@ -3591,6 +3621,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
         struct nfs4_closedata *calldata = data;
         struct nfs4_state *state = calldata->state;
         struct inode *inode = calldata->inode;
+       struct nfs_server *server = NFS_SERVER(inode);
         struct pnfs_layout_hdr *lo;
         bool is_rdonly, is_wronly, is_rdwr;
         int call_close = 0;
@@ -3647,8 +3678,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
         if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
                 /* Close-to-open cache consistency revalidation */
                 if (!nfs4_have_delegation(inode, FMODE_READ)) {
-                       calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
-                       nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL);
+                       nfs4_bitmask_set(calldata->arg.bitmask_store,
+                                        server->cache_consistency_bitmask,
+                                        inode, server, NULL);
+                       calldata->arg.bitmask = calldata->arg.bitmask_store;
                 } else
                         calldata->arg.bitmask = NULL;
         }
@@ -3835,12 +3868,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
                         res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
                 }
                 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
-               server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
-                               NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
-                               NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
-                               NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
-                               NFS_CAP_CTIME|NFS_CAP_MTIME|
-                               NFS_CAP_SECURITY_LABEL);
+               server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
+                                 NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL);
+               server->fattr_valid = NFS_ATTR_FATTR_V4;
                 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
                                 res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
                         server->caps |= NFS_CAP_ACLS;
@@ -3848,25 +3878,29 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
                         server->caps |= NFS_CAP_HARDLINKS;
                 if (res.has_symlinks != 0)
                         server->caps |= NFS_CAP_SYMLINKS;
-               if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
-                       server->caps |= NFS_CAP_FILEID;
-               if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
-                       server->caps |= NFS_CAP_MODE;
-               if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
-                       server->caps |= NFS_CAP_NLINK;
-               if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
-                       server->caps |= NFS_CAP_OWNER;
-               if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
-                       server->caps |= NFS_CAP_OWNER_GROUP;
-               if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
-                       server->caps |= NFS_CAP_ATIME;
-               if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
-                       server->caps |= NFS_CAP_CTIME;
-               if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
-                       server->caps |= NFS_CAP_MTIME;
+               if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
+                       server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
+               if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
+                       server->fattr_valid &= ~NFS_ATTR_FATTR_MODE;
+               if (!(res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS))
+                       server->fattr_valid &= ~NFS_ATTR_FATTR_NLINK;
+               if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER))
+                       server->fattr_valid &= ~(NFS_ATTR_FATTR_OWNER |
+                               NFS_ATTR_FATTR_OWNER_NAME);
+               if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP))
+                       server->fattr_valid &= ~(NFS_ATTR_FATTR_GROUP |
+                               NFS_ATTR_FATTR_GROUP_NAME);
+               if (!(res.attr_bitmask[1] & FATTR4_WORD1_SPACE_USED))
+                       server->fattr_valid &= ~NFS_ATTR_FATTR_SPACE_USED;
+               if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS))
+                       server->fattr_valid &= ~NFS_ATTR_FATTR_ATIME;
+               if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA))
+                       server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME;
+               if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
+                       server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
  #ifdef CONFIG_NFS_V4_SECURITY_LABEL
-               if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
-                       server->caps |= NFS_CAP_SECURITY_LABEL;
+               if (!(res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL))
+                       server->fattr_valid &= ~NFS_ATTR_FATTR_V4_SECURITY_LABEL;
  #endif
                 memcpy(server->attr_bitmask_nl, res.attr_bitmask,
                                 sizeof(server->attr_bitmask));
@@ -4154,8 +4188,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
         if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
                 task_flags |= RPC_TASK_TIMEOUT;
  
-       nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
-
+       nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode, 0);
         nfs_fattr_init(fattr);
         nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
         return nfs4_do_call_sync(server->client, server, &msg,
@@ -4582,11 +4615,11 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
         status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
         if (status == 0) {
                 spin_lock(&dir->i_lock);
-               nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
-                                             NFS_INO_INVALID_DATA);
                 /* Removing a directory decrements nlink in the parent */
                 if (ftype == NF4DIR && dir->i_nlink > 2)
                         nfs4_dec_nlink_locked(dir);
+               nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
+                                             NFS_INO_INVALID_DATA);
                 spin_unlock(&dir->i_lock);
         }
         return status;
@@ -4715,11 +4748,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
                         /* Note: If we moved a directory, nlink will change */
                         nfs4_update_changeattr(old_dir, &res->old_cinfo,
                                         res->old_fattr->time_start,
-                                       NFS_INO_INVALID_OTHER |
+                                       NFS_INO_INVALID_NLINK |
                                             NFS_INO_INVALID_DATA);
                         nfs4_update_changeattr(new_dir, &res->new_cinfo,
                                         res->new_fattr->time_start,
-                                       NFS_INO_INVALID_OTHER |
+                                       NFS_INO_INVALID_NLINK |
                                             NFS_INO_INVALID_DATA);
                 } else
                         nfs4_update_changeattr(old_dir, &res->old_cinfo,
@@ -4761,12 +4794,13 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
         }
  
         nfs4_inode_make_writeable(inode);
-       nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode);
-
+       nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.label), inode,
+                               NFS_INO_INVALID_CHANGE);
         status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
         if (!status) {
                 nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
                                        NFS_INO_INVALID_DATA);
+               nfs4_inc_nlink(inode);
                 status = nfs_post_op_update_inode(inode, res.fattr);
                 if (!status)
                         nfs_setsecurity(inode, res.fattr, res.label);
@@ -4844,12 +4878,12 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
                                     &data->arg.seq_args, &data->res.seq_res, 1);
         if (status == 0) {
                 spin_lock(&dir->i_lock);
-               nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
-                               data->res.fattr->time_start,
-                               NFS_INO_INVALID_DATA);
                 /* Creating a directory bumps nlink in the parent */
                 if (data->arg.ftype == NF4DIR)
                         nfs4_inc_nlink_locked(dir);
+               nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
+                                             data->res.fattr->time_start,
+                                             NFS_INO_INVALID_DATA);
                 spin_unlock(&dir->i_lock);
                 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
         }
@@ -5416,37 +5450,39 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
         return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
  }
  
-static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
-                               struct nfs_server *server,
-                               struct nfs4_label *label)
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
+                            struct inode *inode, struct nfs_server *server,
+                            struct nfs4_label *label)
  {
-
         unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+       unsigned int i;
  
-       if ((cache_validity & NFS_INO_INVALID_DATA) ||
-               (cache_validity & NFS_INO_REVAL_PAGECACHE) ||
-               (cache_validity & NFS_INO_REVAL_FORCED) ||
-               (cache_validity & NFS_INO_INVALID_OTHER))
-               nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
+       memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
  
+       if (cache_validity & NFS_INO_INVALID_CHANGE)
+               bitmask[0] |= FATTR4_WORD0_CHANGE;
         if (cache_validity & NFS_INO_INVALID_ATIME)
                 bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
+       if (cache_validity & NFS_INO_INVALID_MODE)
+               bitmask[1] |= FATTR4_WORD1_MODE;
         if (cache_validity & NFS_INO_INVALID_OTHER)
-               bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
-                               FATTR4_WORD1_OWNER_GROUP |
-                               FATTR4_WORD1_NUMLINKS;
+               bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP;
+       if (cache_validity & NFS_INO_INVALID_NLINK)
+               bitmask[1] |= FATTR4_WORD1_NUMLINKS;
         if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
                 bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
-       if (cache_validity & NFS_INO_INVALID_CHANGE)
-               bitmask[0] |= FATTR4_WORD0_CHANGE;
         if (cache_validity & NFS_INO_INVALID_CTIME)
                 bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
         if (cache_validity & NFS_INO_INVALID_MTIME)
                 bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
-       if (cache_validity & NFS_INO_INVALID_SIZE)
-               bitmask[0] |= FATTR4_WORD0_SIZE;
         if (cache_validity & NFS_INO_INVALID_BLOCKS)
                 bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
+       if (cache_validity & NFS_INO_INVALID_SIZE)
+               bitmask[0] |= FATTR4_WORD0_SIZE;
+
+       for (i = 0; i < NFS4_BITMASK_SZ; i++)
+               bitmask[i] &= server->attr_bitmask[i];
  }
  
  static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
@@ -5459,8 +5495,10 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
                 hdr->args.bitmask = NULL;
                 hdr->res.fattr = NULL;
         } else {
-               hdr->args.bitmask = server->cache_consistency_bitmask;
-               nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL);
+               nfs4_bitmask_set(hdr->args.bitmask_store,
+                                server->cache_consistency_bitmask,
+                                hdr->inode, server, NULL);
+               hdr->args.bitmask = hdr->args.bitmask_store;
         }
  
         if (!hdr->pgio_done_cb)
@@ -5858,7 +5896,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
  
         if (!nfs4_server_supports_acls(server))
                 return -EOPNOTSUPP;
-       ret = nfs_revalidate_inode(server, inode);
+       ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
         if (ret < 0)
                 return ret;
         if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
@@ -6502,8 +6540,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
  
         data->args.fhandle = &data->fh;
         data->args.stateid = &data->stateid;
-       data->args.bitmask = server->cache_consistency_bitmask;
-       nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL);
+       nfs4_bitmask_set(data->args.bitmask_store,
+                        server->cache_consistency_bitmask, inode, server,
+                        NULL);
+       data->args.bitmask = data->args.bitmask_store;
         nfs_copy_fh(&data->fh, NFS_FH(inode));
         nfs4_stateid_copy(&data->stateid, stateid);
         data->res.fattr = &data->fattr;
@@ -7250,22 +7290,22 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
  
  #ifdef CONFIG_NFS_V4_1
  struct nfs4_lock_waiter {
-       struct task_struct      *task;
         struct inode            *inode;
-       struct nfs_lowner       *owner;
+       struct nfs_lowner       owner;
+       wait_queue_entry_t      wait;
  };
  
  static int
  nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
  {
-       int ret;
-       struct nfs4_lock_waiter *waiter = wait->private;
+       struct nfs4_lock_waiter *waiter =
+               container_of(wait, struct nfs4_lock_waiter, wait);
  
         /* NULL key means to wake up everyone */
         if (key) {
                 struct cb_notify_lock_args      *cbnl = key;
                 struct nfs_lowner               *lowner = &cbnl->cbnl_owner,
-                                               *wowner = waiter->owner;
+                                               *wowner = &waiter->owner;
  
                 /* Only wake if the callback was for the same owner. */
                 if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev)
@@ -7276,53 +7316,45 @@ nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, vo
                         return 0;
         }
  
-       /* override "private" so we can use default_wake_function */
-       wait->private = waiter->task;
-       ret = woken_wake_function(wait, mode, flags, key);
-       if (ret)
-               list_del_init(&wait->entry);
-       wait->private = waiter;
-       return ret;
+       return woken_wake_function(wait, mode, flags, key);
  }
  
  static int
  nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
  {
-       int status = -ERESTARTSYS;
         struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
         struct nfs_server *server = NFS_SERVER(state->inode);
         struct nfs_client *clp = server->nfs_client;
         wait_queue_head_t *q = &clp->cl_lock_waitq;
-       struct nfs_lowner owner = { .clientid = clp->cl_clientid,
-                                   .id = lsp->ls_seqid.owner_id,
-                                   .s_dev = server->s_dev };
-       struct nfs4_lock_waiter waiter = { .task  = current,
-                                          .inode = state->inode,
-                                          .owner = &owner};
-       wait_queue_entry_t wait;
+       struct nfs4_lock_waiter waiter = {
+               .inode = state->inode,
+               .owner = { .clientid = clp->cl_clientid,
+                          .id = lsp->ls_seqid.owner_id,
+                          .s_dev = server->s_dev },
+       };
+       int status;
  
         /* Don't bother with waitqueue if we don't expect a callback */
         if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
                 return nfs4_retry_setlk_simple(state, cmd, request);
  
-       init_wait(&wait);
-       wait.private = &waiter;
-       wait.func = nfs4_wake_lock_waiter;
+       init_wait(&waiter.wait);
+       waiter.wait.func = nfs4_wake_lock_waiter;
+       add_wait_queue(q, &waiter.wait);
  
-       while(!signalled()) {
-               add_wait_queue(q, &wait);
+       do {
                 status = nfs4_proc_setlk(state, cmd, request);
-               if ((status != -EAGAIN) || IS_SETLK(cmd)) {
-                       finish_wait(q, &wait);
+               if (status != -EAGAIN || IS_SETLK(cmd))
                         break;
-               }
  
                 status = -ERESTARTSYS;
                 freezer_do_not_count();
-               wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT);
+               wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
+                          NFS4_LOCK_MAXTIMEOUT);
                 freezer_count();
-               finish_wait(q, &wait);
-       }
+       } while (!signalled());
+
+       remove_wait_queue(q, &waiter.wait);
  
         return status;
  }
@@ -7615,7 +7647,7 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
                         return -EACCES;
         }
  
-       ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+       ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
         if (ret)
                 return ret;
  
@@ -7646,7 +7678,7 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
                         return 0;
         }
  
-       ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+       ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
         if (ret)
                 return ret;
  
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c

index 2eec5bb..f22818a 100644 (file)
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -645,7 +645,7 @@ void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
  }
  
  /**
- * nfs4_purge_state_owners - Release all cached state owners
+ * nfs4_free_state_owners - Release all cached state owners
   * @head: resulting list of state owners
   *
   * Frees a list of state owners that was generated by
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h

index 48d761e..2ef75ca 100644 (file)
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -666,7 +666,42 @@ TRACE_EVENT(nfs4_state_mgr_failed,
                 )
  )
  
-TRACE_EVENT(nfs4_xdr_status,
+TRACE_EVENT(nfs4_xdr_bad_operation,
+               TP_PROTO(
+                       const struct xdr_stream *xdr,
+                       u32 op,
+                       u32 expected
+               ),
+
+               TP_ARGS(xdr, op, expected),
+
+               TP_STRUCT__entry(
+                       __field(unsigned int, task_id)
+                       __field(unsigned int, client_id)
+                       __field(u32, xid)
+                       __field(u32, op)
+                       __field(u32, expected)
+               ),
+
+               TP_fast_assign(
+                       const struct rpc_rqst *rqstp = xdr->rqst;
+                       const struct rpc_task *task = rqstp->rq_task;
+
+                       __entry->task_id = task->tk_pid;
+                       __entry->client_id = task->tk_client->cl_clid;
+                       __entry->xid = be32_to_cpu(rqstp->rq_xid);
+                       __entry->op = op;
+                       __entry->expected = expected;
+               ),
+
+               TP_printk(
+                       "task:%u@%d xid=0x%08x operation=%u, expected=%u",
+                       __entry->task_id, __entry->client_id, __entry->xid,
+                       __entry->op, __entry->expected
+               )
+);
+
+DECLARE_EVENT_CLASS(nfs4_xdr_event,
                 TP_PROTO(
                         const struct xdr_stream *xdr,
                         u32 op,
@@ -701,6 +736,16 @@ TRACE_EVENT(nfs4_xdr_status,
                         __entry->op
                 )
  );
+#define DEFINE_NFS4_XDR_EVENT(name) \
+       DEFINE_EVENT(nfs4_xdr_event, name, \
+                       TP_PROTO( \
+                               const struct xdr_stream *xdr, \
+                               u32 op, \
+                               u32 error \
+                       ), \
+                       TP_ARGS(xdr, op, error))
+DEFINE_NFS4_XDR_EVENT(nfs4_xdr_status);
+DEFINE_NFS4_XDR_EVENT(nfs4_xdr_bad_filehandle);
  
  DECLARE_EVENT_CLASS(nfs4_cb_error_class,
                 TP_PROTO(
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c

index d4fd3be..a8cff19 100644 (file)
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -144,7 +144,17 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
   * layout types will be returned.
   */
  #define decode_fsinfo_maxsz    (op_decode_hdr_maxsz + \
-                                nfs4_fattr_bitmap_maxsz + 4 + 8 + 5)
+                                nfs4_fattr_bitmap_maxsz + 1 + \
+                                1 /* lease time */ + \
+                                2 /* max filesize */ + \
+                                2 /* max read */ + \
+                                2 /* max write */ + \
+                                nfstime4_maxsz /* time delta */ + \
+                                5 /* fs layout types */ + \
+                                1 /* layout blksize */ + \
+                                1 /* clone blksize */ + \
+                                1 /* change attr type */ + \
+                                1 /* xattr support */)
  #define encode_renew_maxsz     (op_encode_hdr_maxsz + 3)
  #define decode_renew_maxsz     (op_decode_hdr_maxsz)
  #define encode_setclientid_maxsz \
@@ -3200,9 +3210,7 @@ out_status:
         *nfs_retval = nfs4_stat_to_errno(nfserr);
         return true;
  out_bad_operation:
-       dprintk("nfs: Server returned operation"
-               " %d but we issued a request for %d\n",
-                       opnum, expected);
+       trace_nfs4_xdr_bad_operation(xdr, opnum, expected);
         *nfs_retval = -EREMOTEIO;
         return false;
  out_overflow:
@@ -3487,8 +3495,11 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru
                 if (unlikely(!p))
                         return -EIO;
                 len = be32_to_cpup(p);
-               if (len > NFS4_FHSIZE)
-                       return -EIO;
+               if (len > NFS4_FHSIZE || len == 0) {
+                       trace_nfs4_xdr_bad_filehandle(xdr, OP_READDIR,
+                                                     NFS4ERR_BADHANDLE);
+                       return -EREMOTEIO;
+               }
                 p = xdr_inline_decode(xdr, len);
                 if (unlikely(!p))
                         return -EIO;
@@ -4837,6 +4848,32 @@ static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
         return 0;
  }
  
+static int decode_attr_change_attr_type(struct xdr_stream *xdr,
+                                       uint32_t *bitmap,
+                                       enum nfs4_change_attr_type *res)
+{
+       u32 tmp = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+
+       dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
+       if (bitmap[2] & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+               if (xdr_stream_decode_u32(xdr, &tmp))
+                       return -EIO;
+               bitmap[2] &= ~FATTR4_WORD2_CHANGE_ATTR_TYPE;
+       }
+
+       switch(tmp) {
+       case NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR:
+       case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER:
+       case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS:
+       case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+               *res = tmp;
+               break;
+       default:
+               *res = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+       }
+       return 0;
+}
+
  static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
  {
         unsigned int savep;
@@ -4885,6 +4922,11 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
         if (status)
                 goto xdr_error;
  
+       status = decode_attr_change_attr_type(xdr, bitmap,
+                                             &fsinfo->change_attr_type);
+       if (status)
+               goto xdr_error;
+
         status = decode_attr_xattrsupport(xdr, bitmap,
                                           &fsinfo->xattr_support);
         if (status)
@@ -4913,8 +4955,10 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
         if (unlikely(!p))
                 return -EIO;
         len = be32_to_cpup(p);
-       if (len > NFS4_FHSIZE)
-               return -EIO;
+       if (len > NFS4_FHSIZE || len == 0) {
+               trace_nfs4_xdr_bad_filehandle(xdr, OP_GETFH, NFS4ERR_BADHANDLE);
+               return -EREMOTEIO;
+       }
         fh->size = len;
         p = xdr_inline_decode(xdr, len);
         if (unlikely(!p))
diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c

index a90b363..5d1bfcc 100644 (file)
--- a/fs/nfs/nfstrace.c
+++ b/fs/nfs/nfstrace.c
@@ -12,3 +12,4 @@
  EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
  EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
  EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_bad_filehandle);
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h

index 5a59dcd..eb1ef34 100644 (file)
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -45,6 +45,11 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME);
  TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME);
  TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE);
  TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
+TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE);
  
  #define nfs_show_cache_validity(v) \
         __print_flags(v, "|", \
@@ -60,7 +65,11 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
                         { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \
                         { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \
                         { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \
-                       { NFS_INO_INVALID_XATTR, "INVALID_XATTR" })
+                       { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \
+                       { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \
+                       { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \
+                       { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \
+                       { NFS_INO_INVALID_MODE, "INVALID_MODE" })
  
  TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS);
  TRACE_DEFINE_ENUM(NFS_INO_STALE);
@@ -1392,7 +1401,7 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
                         { NFSERR_BADTYPE, "BADTYPE" }, \
                         { NFSERR_JUKEBOX, "JUKEBOX" })
  
-TRACE_EVENT(nfs_xdr_status,
+DECLARE_EVENT_CLASS(nfs_xdr_event,
                 TP_PROTO(
                         const struct xdr_stream *xdr,
                         int error
@@ -1434,6 +1443,15 @@ TRACE_EVENT(nfs_xdr_status,
                         nfs_show_status(__entry->error)
                 )
  );
+#define DEFINE_NFS_XDR_EVENT(name) \
+       DEFINE_EVENT(nfs_xdr_event, name, \
+                       TP_PROTO( \
+                               const struct xdr_stream *xdr, \
+                               int error \
+                       ), \
+                       TP_ARGS(xdr, error))
+DEFINE_NFS_XDR_EVENT(nfs_xdr_status);
+DEFINE_NFS_XDR_EVENT(nfs_xdr_bad_filehandle);
  
  #endif /* _TRACE_NFS_H */
  
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c

index 78c9c4b..6c20b28 100644 (file)
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -577,7 +577,7 @@ static void nfs_clear_request(struct nfs_page *req)
  }
  
  /**
- * nfs_release_request - Release the count on an NFS read/write request
+ * nfs_free_request - Release the count on an NFS read/write request
   * @req: request to release
   *
   * Note: Should never be called with the spinlock held!
@@ -1152,7 +1152,7 @@ nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc,
  }
  
  /**
- * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
+ * __nfs_pageio_add_request - Attempt to coalesce a request into a page list.
   * @desc: destination io descriptor
   * @req: request
   *
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c

index 102b66e..03e0b34 100644 (file)
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1344,7 +1344,7 @@ _pnfs_return_layout(struct inode *ino)
         }
         valid_layout = pnfs_layout_is_valid(lo);
         pnfs_clear_layoutcommit(ino, &tmp_list);
-       pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0);
+       pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0);
  
         if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
                 struct pnfs_layout_range range = {
@@ -2410,9 +2410,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                         .iomode = IOMODE_ANY,
                         .length = NFS4_MAX_UINT64,
                 };
-               pnfs_set_plh_return_info(lo, IOMODE_ANY, 0);
-               pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
-                                               &range, 0);
+               pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
                 goto out_forget;
         } else {
                 /* We have a completely new layout */
@@ -2468,6 +2466,9 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
  
         assert_spin_locked(&lo->plh_inode->i_lock);
  
+       if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               tmp_list = &lo->plh_return_segs;
+
         list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
                 if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
                         dprintk("%s: marking lseg %p iomode %d "
@@ -2475,6 +2476,8 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                                 lseg, lseg->pls_range.iomode,
                                 lseg->pls_range.offset,
                                 lseg->pls_range.length);
+                       if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+                               tmp_list = &lo->plh_return_segs;
                         if (mark_lseg_invalid(lseg, tmp_list))
                                 continue;
                         remaining++;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c

index 73ab7c5..ea19dbf 100644 (file)
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -91,6 +91,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
         info->dtpref = fsinfo.tsize;
         info->maxfilesize = 0x7FFFFFFF;
         info->lease_time = 0;
+       info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
         return 0;
  }
  
diff --git a/fs/nfs/super.c b/fs/nfs/super.c

index 4aaa1f5..19a212f 100644 (file)
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -116,16 +116,12 @@ static void unregister_nfs4_fs(void)
  #ifdef CONFIG_NFS_V4_2
  static void nfs_ssc_register_ops(void)
  {
-#ifdef CONFIG_NFSD_V4
         nfs_ssc_register(&nfs_ssc_clnt_ops_tbl);
-#endif
  }
  
  static void nfs_ssc_unregister_ops(void)
  {
-#ifdef CONFIG_NFSD_V4
         nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl);
-#endif
  }
  #endif /* CONFIG_NFS_V4_2 */
  
@@ -867,7 +863,7 @@ static int nfs_request_mount(struct fs_context *fc,
          * Now ask the mount server to map our export path
          * to a file handle.
          */
-       status = nfs_mount(&request);
+       status = nfs_mount(&request, ctx->timeo, ctx->retrans);
         if (status != 0) {
                 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
                                 request.hostname, status);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index f05a903..3bf8217 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -764,9 +764,6 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
          * with invalidate/truncate.
          */
         spin_lock(&mapping->private_lock);
-       if (!nfs_have_writebacks(inode) &&
-           NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
-               inode_inc_iversion_raw(inode);
         if (likely(!PageSwapCache(req->wb_page))) {
                 set_bit(PG_MAPPED, &req->wb_flags);
                 SetPagePrivate(req->wb_page);
@@ -1293,7 +1290,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
         if (nfs_have_delegated_attributes(inode))
                 goto out;
         if (nfsi->cache_validity &
-           (NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE))
+           (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE))
                 return false;
         smp_rmb();
         if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
@@ -1604,7 +1601,7 @@ static int nfs_writeback_done(struct rpc_task *task,
         /* Deal with the suid/sgid bit corner case */
         if (nfs_should_remove_suid(inode)) {
                 spin_lock(&inode->i_lock);
-               nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+               nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
                 spin_unlock(&inode->i_lock);
         }
         return 0;
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig

index 5fa38ad..f229172 100644 (file)
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -138,7 +138,7 @@ config NFSD_FLEXFILELAYOUT
  
  config NFSD_V4_2_INTER_SSC
         bool "NFSv4.2 inter server to server COPY"
-       depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2
+       depends on NFSD_V4 && NFS_V4_2
         help
           This option enables support for NFSv4.2 inter server to
           server copy where the destination server calls the NFSv4.2
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h

index 5b4c67c..15004c4 100644 (file)
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -452,6 +452,7 @@ enum lock_type4 {
  #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
  #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
  #define FATTR4_WORD2_CLONE_BLKSIZE     (1UL << 13)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE  (1UL << 15)
  #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
  #define FATTR4_WORD2_MODE_UMASK                (1UL << 17)
  #define FATTR4_WORD2_XATTR_SUPPORT     (1UL << 18)
@@ -709,6 +710,14 @@ struct nl4_server {
         } u;
  };
  
+enum nfs4_change_attr_type {
+       NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
+       NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
+       NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+       NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
+       NFS4_CHANGE_TYPE_IS_UNDEFINED = 4,
+};
+
  /*
   * Options for setxattr. These match the flags for setxattr(2).
   */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h

index eadaabd..ffba254 100644 (file)
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -246,11 +246,15 @@ struct nfs4_copy_state {
                                 BIT(13)         /* Deferred cache invalidation */
  #define NFS_INO_INVALID_BLOCKS BIT(14)         /* cached blocks are invalid */
  #define NFS_INO_INVALID_XATTR  BIT(15)         /* xattrs are invalid */
+#define NFS_INO_INVALID_NLINK  BIT(16)         /* cached nlinks is invalid */
+#define NFS_INO_INVALID_MODE   BIT(17)         /* cached mode is invalid */
  
  #define NFS_INO_INVALID_ATTR   (NFS_INO_INVALID_CHANGE \
                 | NFS_INO_INVALID_CTIME \
                 | NFS_INO_INVALID_MTIME \
                 | NFS_INO_INVALID_SIZE \
+               | NFS_INO_INVALID_NLINK \
+               | NFS_INO_INVALID_MODE \
                 | NFS_INO_INVALID_OTHER)        /* inode metadata is invalid */
  
  /*
@@ -386,7 +390,7 @@ extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
  extern int nfs_permission(struct user_namespace *, struct inode *, int);
  extern int nfs_open(struct inode *, struct file *);
  extern int nfs_attribute_cache_expired(struct inode *inode);
-extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
+extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags);
  extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
  extern int nfs_clear_invalid_mapping(struct address_space *mapping);
  extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h

index a28d71b..d71a0e9 100644 (file)
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -156,6 +156,7 @@ struct nfs_server {
  #define NFS_MOUNT_WRITE_EAGER          0x01000000
  #define NFS_MOUNT_WRITE_WAIT           0x02000000
  
+       unsigned int            fattr_valid;    /* Valid attributes */
         unsigned int            caps;           /* server capabilities */
         unsigned int            rsize;          /* read size */
         unsigned int            rpages;         /* read size (in pages) */
@@ -180,6 +181,9 @@ struct nfs_server {
  #define NFS_OPTION_FSCACHE     0x00000001      /* - local caching enabled */
  #define NFS_OPTION_MIGRATION   0x00000002      /* - NFSv4 migration enabled */
  
+       enum nfs4_change_attr_type
+                               change_attr_type;/* Description of change attribute */
+
         struct nfs_fsid         fsid;
         __u64                   maxfilesize;    /* maximum file size */
         struct timespec64       time_delta;     /* smallest time granularity */
@@ -265,16 +269,7 @@ struct nfs_server {
  #define NFS_CAP_SYMLINKS       (1U << 2)
  #define NFS_CAP_ACLS           (1U << 3)
  #define NFS_CAP_ATOMIC_OPEN    (1U << 4)
-/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */
  #define NFS_CAP_LGOPEN         (1U << 5)
-#define NFS_CAP_FILEID         (1U << 6)
-#define NFS_CAP_MODE           (1U << 7)
-#define NFS_CAP_NLINK          (1U << 8)
-#define NFS_CAP_OWNER          (1U << 9)
-#define NFS_CAP_OWNER_GROUP    (1U << 10)
-#define NFS_CAP_ATIME          (1U << 11)
-#define NFS_CAP_CTIME          (1U << 12)
-#define NFS_CAP_MTIME          (1U << 13)
  #define NFS_CAP_POSIX_LOCK     (1U << 14)
  #define NFS_CAP_UIDGID_NOMAP   (1U << 15)
  #define NFS_CAP_STATEID_NFSV41 (1U << 16)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h

index 3327239..717ecc8 100644 (file)
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -15,6 +15,8 @@
  #define NFS_DEF_FILE_IO_SIZE   (4096U)
  #define NFS_MIN_FILE_IO_SIZE   (1024U)
  
+#define NFS_BITMASK_SZ         3
+
  struct nfs4_string {
         unsigned int len;
         char *data;
@@ -150,6 +152,8 @@ struct nfs_fsinfo {
         __u32                   layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
         __u32                   blksize; /* preferred pnfs io block size */
         __u32                   clone_blksize; /* granularity of a CLONE operation */
+       enum nfs4_change_attr_type
+                               change_attr_type; /* Info about change attr */
         __u32                   xattr_support; /* User xattrs supported */
  };
  
@@ -525,7 +529,8 @@ struct nfs_closeargs {
         struct nfs_seqid *      seqid;
         fmode_t                 fmode;
         u32                     share_access;
-       u32 *                   bitmask;
+       const u32 *             bitmask;
+       u32                     bitmask_store[NFS_BITMASK_SZ];
         struct nfs4_layoutreturn_args *lr_args;
  };
  
@@ -608,7 +613,8 @@ struct nfs4_delegreturnargs {
         struct nfs4_sequence_args       seq_args;
         const struct nfs_fh *fhandle;
         const nfs4_stateid *stateid;
-       u32 * bitmask;
+       const u32 *bitmask;
+       u32 bitmask_store[NFS_BITMASK_SZ];
         struct nfs4_layoutreturn_args *lr_args;
  };
  
@@ -648,7 +654,8 @@ struct nfs_pgio_args {
         union {
                 unsigned int            replen;                 /* used by read */
                 struct {
-                       u32 *                   bitmask;        /* used by write */
+                       const u32 *             bitmask;        /* used by write */
+                       u32 bitmask_store[NFS_BITMASK_SZ];      /* used by write */
                         enum nfs3_stable_how    stable;         /* used by write */
                 };
         };
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h

index d2e97ee..d81fe8b 100644 (file)
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -247,6 +247,7 @@ struct rpc_xprt {
         struct rpc_task *       snd_task;       /* Task blocked in send */
  
         struct list_head        xmit_queue;     /* Send queue */
+       atomic_long_t           xmit_queuelen;
  
         struct svc_xprt         *bc_xprt;       /* NFSv4.1 backchannel */
  #if defined(CONFIG_SUNRPC_BACKCHANNEL)
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h

index c838e7a..bd55908 100644 (file)
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -60,6 +60,46 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class,
                                 ),                                      \
                                 TP_ARGS(wc, cid))
  
+DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class,
+       TP_PROTO(
+               const struct ib_wc *wc,
+               const struct rpc_rdma_cid *cid
+       ),
+
+       TP_ARGS(wc, cid),
+
+       TP_STRUCT__entry(
+               __field(u32, cq_id)
+               __field(int, completion_id)
+               __field(unsigned long, status)
+               __field(unsigned int, vendor_err)
+       ),
+
+       TP_fast_assign(
+               __entry->cq_id = cid->ci_queue_id;
+               __entry->completion_id = cid->ci_completion_id;
+               __entry->status = wc->status;
+               if (wc->status)
+                       __entry->vendor_err = wc->vendor_err;
+               else
+                       __entry->vendor_err = 0;
+       ),
+
+       TP_printk("cq.id=%u mr.id=%d status=%s (%lu/0x%x)",
+               __entry->cq_id, __entry->completion_id,
+               rdma_show_wc_status(__entry->status),
+               __entry->status, __entry->vendor_err
+       )
+);
+
+#define DEFINE_MR_COMPLETION_EVENT(name)                               \
+               DEFINE_EVENT(rpcrdma_mr_completion_class, name,         \
+                               TP_PROTO(                               \
+                                       const struct ib_wc *wc,         \
+                                       const struct rpc_rdma_cid *cid  \
+                               ),                                      \
+                               TP_ARGS(wc, cid))
+
  DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class,
         TP_PROTO(
                 const struct ib_wc *wc,
@@ -150,19 +190,17 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt,
         TP_ARGS(r_xprt),
  
         TP_STRUCT__entry(
-               __field(const void *, r_xprt)
                 __string(addr, rpcrdma_addrstr(r_xprt))
                 __string(port, rpcrdma_portstr(r_xprt))
         ),
  
         TP_fast_assign(
-               __entry->r_xprt = r_xprt;
                 __assign_str(addr, rpcrdma_addrstr(r_xprt));
                 __assign_str(port, rpcrdma_portstr(r_xprt));
         ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p",
-               __get_str(addr), __get_str(port), __entry->r_xprt
+       TP_printk("peer=[%s]:%s",
+               __get_str(addr), __get_str(port)
         )
  );
  
@@ -182,7 +220,6 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
         TP_ARGS(r_xprt, rc),
  
         TP_STRUCT__entry(
-               __field(const void *, r_xprt)
                 __field(int, rc)
                 __field(int, connect_status)
                 __string(addr, rpcrdma_addrstr(r_xprt))
@@ -190,15 +227,14 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
         ),
  
         TP_fast_assign(
-               __entry->r_xprt = r_xprt;
                 __entry->rc = rc;
                 __entry->connect_status = r_xprt->rx_ep->re_connect_status;
                 __assign_str(addr, rpcrdma_addrstr(r_xprt));
                 __assign_str(port, rpcrdma_portstr(r_xprt));
         ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d",
-               __get_str(addr), __get_str(port), __entry->r_xprt,
+       TP_printk("peer=[%s]:%s rc=%d connection status=%d",
+               __get_str(addr), __get_str(port),
                 __entry->rc, __entry->connect_status
         )
  );
@@ -343,7 +379,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class,
  
                 __entry->task_id = task->tk_pid;
                 __entry->client_id = task->tk_client->cl_clid;
-               __entry->mr_id  = mr->frwr.fr_mr->res.id;
+               __entry->mr_id  = mr->mr_ibmr->res.id;
                 __entry->nents  = mr->mr_nents;
                 __entry->handle = mr->mr_handle;
                 __entry->length = mr->mr_length;
@@ -384,7 +420,7 @@ DECLARE_EVENT_CLASS(xprtrdma_anonymous_mr_class,
         ),
  
         TP_fast_assign(
-               __entry->mr_id  = mr->frwr.fr_mr->res.id;
+               __entry->mr_id  = mr->mr_ibmr->res.id;
                 __entry->nents  = mr->mr_nents;
                 __entry->handle = mr->mr_handle;
                 __entry->length = mr->mr_length;
@@ -495,22 +531,19 @@ TRACE_EVENT(xprtrdma_op_connect,
         TP_ARGS(r_xprt, delay),
  
         TP_STRUCT__entry(
-               __field(const void *, r_xprt)
                 __field(unsigned long, delay)
                 __string(addr, rpcrdma_addrstr(r_xprt))
                 __string(port, rpcrdma_portstr(r_xprt))
         ),
  
         TP_fast_assign(
-               __entry->r_xprt = r_xprt;
                 __entry->delay = delay;
                 __assign_str(addr, rpcrdma_addrstr(r_xprt));
                 __assign_str(port, rpcrdma_portstr(r_xprt));
         ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu",
-               __get_str(addr), __get_str(port), __entry->r_xprt,
-               __entry->delay
+       TP_printk("peer=[%s]:%s delay=%lu",
+               __get_str(addr), __get_str(port), __entry->delay
         )
  );
  
@@ -525,7 +558,6 @@ TRACE_EVENT(xprtrdma_op_set_cto,
         TP_ARGS(r_xprt, connect, reconnect),
  
         TP_STRUCT__entry(
-               __field(const void *, r_xprt)
                 __field(unsigned long, connect)
                 __field(unsigned long, reconnect)
                 __string(addr, rpcrdma_addrstr(r_xprt))
@@ -533,51 +565,18 @@ TRACE_EVENT(xprtrdma_op_set_cto,
         ),
  
         TP_fast_assign(
-               __entry->r_xprt = r_xprt;
                 __entry->connect = connect;
                 __entry->reconnect = reconnect;
                 __assign_str(addr, rpcrdma_addrstr(r_xprt));
                 __assign_str(port, rpcrdma_portstr(r_xprt));
         ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu",
-               __get_str(addr), __get_str(port), __entry->r_xprt,
+       TP_printk("peer=[%s]:%s connect=%lu reconnect=%lu",
+               __get_str(addr), __get_str(port),
                 __entry->connect / HZ, __entry->reconnect / HZ
         )
  );
  
-TRACE_EVENT(xprtrdma_qp_event,
-       TP_PROTO(
-               const struct rpcrdma_ep *ep,
-               const struct ib_event *event
-       ),
-
-       TP_ARGS(ep, event),
-
-       TP_STRUCT__entry(
-               __field(unsigned long, event)
-               __string(name, event->device->name)
-               __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
-               __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
-       ),
-
-       TP_fast_assign(
-               const struct rdma_cm_id *id = ep->re_id;
-
-               __entry->event = event->event;
-               __assign_str(name, event->device->name);
-               memcpy(__entry->srcaddr, &id->route.addr.src_addr,
-                      sizeof(struct sockaddr_in6));
-               memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
-                      sizeof(struct sockaddr_in6));
-       ),
-
-       TP_printk("%pISpc -> %pISpc device=%s %s (%lu)",
-               __entry->srcaddr, __entry->dstaddr, __get_str(name),
-               rdma_show_ib_event(__entry->event), __entry->event
-       )
-);
-
  /**
   ** Call events
   **/
@@ -591,22 +590,19 @@ TRACE_EVENT(xprtrdma_createmrs,
         TP_ARGS(r_xprt, count),
  
         TP_STRUCT__entry(
-               __field(const void *, r_xprt)
                 __string(addr, rpcrdma_addrstr(r_xprt))
                 __string(port, rpcrdma_portstr(r_xprt))
                 __field(unsigned int, count)
         ),
  
         TP_fast_assign(
-               __entry->r_xprt = r_xprt;
                 __entry->count = count;
                 __assign_str(addr, rpcrdma_addrstr(r_xprt));
                 __assign_str(port, rpcrdma_portstr(r_xprt));
         ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs",
-               __get_str(addr), __get_str(port), __entry->r_xprt,
-               __entry->count
+       TP_printk("peer=[%s]:%s created %u MRs",
+               __get_str(addr), __get_str(port), __entry->count
         )
  );
  
@@ -829,7 +825,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
         TP_ARGS(r_xprt, count, status),
  
         TP_STRUCT__entry(
-               __field(const void *, r_xprt)
+               __field(u32, cq_id)
                 __field(unsigned int, count)
                 __field(int, status)
                 __field(int, posted)
@@ -838,16 +834,18 @@ TRACE_EVENT(xprtrdma_post_recvs,
         ),
  
         TP_fast_assign(
-               __entry->r_xprt = r_xprt;
+               const struct rpcrdma_ep *ep = r_xprt->rx_ep;
+
+               __entry->cq_id = ep->re_attr.recv_cq->res.id;
                 __entry->count = count;
                 __entry->status = status;
-               __entry->posted = r_xprt->rx_ep->re_receive_count;
+               __entry->posted = ep->re_receive_count;
                 __assign_str(addr, rpcrdma_addrstr(r_xprt));
                 __assign_str(port, rpcrdma_portstr(r_xprt));
         ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
-               __get_str(addr), __get_str(port), __entry->r_xprt,
+       TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)",
+               __get_str(addr), __get_str(port), __entry->cq_id,
                 __entry->count, __entry->posted, __entry->status
         )
  );
@@ -886,10 +884,10 @@ TRACE_EVENT(xprtrdma_post_linv_err,
  DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive);
  
  DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_fastreg);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_wake);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_done);
  
  TRACE_EVENT(xprtrdma_frwr_alloc,
         TP_PROTO(
@@ -905,7 +903,7 @@ TRACE_EVENT(xprtrdma_frwr_alloc,
         ),
  
         TP_fast_assign(
-               __entry->mr_id = mr->frwr.fr_mr->res.id;
+               __entry->mr_id = mr->mr_ibmr->res.id;
                 __entry->rc = rc;
         ),
  
@@ -933,7 +931,7 @@ TRACE_EVENT(xprtrdma_frwr_dereg,
         ),
  
         TP_fast_assign(
-               __entry->mr_id  = mr->frwr.fr_mr->res.id;
+               __entry->mr_id  = mr->mr_ibmr->res.id;
                 __entry->nents  = mr->mr_nents;
                 __entry->handle = mr->mr_handle;
                 __entry->length = mr->mr_length;
@@ -966,7 +964,7 @@ TRACE_EVENT(xprtrdma_frwr_sgerr,
         ),
  
         TP_fast_assign(
-               __entry->mr_id = mr->frwr.fr_mr->res.id;
+               __entry->mr_id = mr->mr_ibmr->res.id;
                 __entry->addr = mr->mr_sg->dma_address;
                 __entry->dir = mr->mr_dir;
                 __entry->nents = sg_nents;
@@ -996,7 +994,7 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
         ),
  
         TP_fast_assign(
-               __entry->mr_id = mr->frwr.fr_mr->res.id;
+               __entry->mr_id = mr->mr_ibmr->res.id;
                 __entry->addr = mr->mr_sg->dma_address;
                 __entry->dir = mr->mr_dir;
                 __entry->num_mapped = num_mapped;
@@ -1010,11 +1008,12 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
         )
  );
  
+DEFINE_MR_EVENT(fastreg);
  DEFINE_MR_EVENT(localinv);
+DEFINE_MR_EVENT(reminv);
  DEFINE_MR_EVENT(map);
  
  DEFINE_ANON_MR_EVENT(unmap);
-DEFINE_ANON_MR_EVENT(recycle);
  
  TRACE_EVENT(xprtrdma_dma_maperr,
         TP_PROTO(
@@ -1248,22 +1247,19 @@ TRACE_EVENT(xprtrdma_cb_setup,
         TP_ARGS(r_xprt, reqs),
  
         TP_STRUCT__entry(
-               __field(const void *, r_xprt)
                 __field(unsigned int, reqs)
                 __string(addr, rpcrdma_addrstr(r_xprt))
                 __string(port, rpcrdma_portstr(r_xprt))
         ),
  
         TP_fast_assign(
-               __entry->r_xprt = r_xprt;
                 __entry->reqs = reqs;
                 __assign_str(addr, rpcrdma_addrstr(r_xprt));
                 __assign_str(port, rpcrdma_portstr(r_xprt));
         ),
  
-       TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs",
-               __get_str(addr), __get_str(port),
-               __entry->r_xprt, __entry->reqs
+       TP_printk("peer=[%s]:%s %u reqs",
+               __get_str(addr), __get_str(port), __entry->reqs
         )
  );
  
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h

index bda16e9..d02e01a 100644 (file)
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1079,6 +1079,46 @@ TRACE_EVENT(xprt_transmit,
                 __entry->seqno, __entry->status)
  );
  
+TRACE_EVENT(xprt_retransmit,
+       TP_PROTO(
+               const struct rpc_rqst *rqst
+       ),
+
+       TP_ARGS(rqst),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, task_id)
+               __field(unsigned int, client_id)
+               __field(u32, xid)
+               __field(int, ntrans)
+               __field(int, version)
+               __string(progname,
+                        rqst->rq_task->tk_client->cl_program->name)
+               __string(procedure,
+                        rqst->rq_task->tk_msg.rpc_proc->p_name)
+       ),
+
+       TP_fast_assign(
+               struct rpc_task *task = rqst->rq_task;
+
+               __entry->task_id = task->tk_pid;
+               __entry->client_id = task->tk_client ?
+                       task->tk_client->cl_clid : -1;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+               __entry->ntrans = rqst->rq_ntrans;
+               __assign_str(progname,
+                            task->tk_client->cl_program->name)
+               __entry->version = task->tk_client->cl_vers;
+               __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+       ),
+
+       TP_printk(
+               "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d",
+               __entry->task_id, __entry->client_id, __entry->xid,
+               __get_str(progname), __entry->version, __get_str(procedure),
+               __entry->ntrans)
+);
+
  TRACE_EVENT(xprt_ping,
         TP_PROTO(const struct rpc_xprt *xprt, int status),
  
@@ -1141,7 +1181,6 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
  
  DEFINE_WRITELOCK_EVENT(reserve_xprt);
  DEFINE_WRITELOCK_EVENT(release_xprt);
-DEFINE_WRITELOCK_EVENT(transmit_queued);
  
  DECLARE_EVENT_CLASS(xprt_cong_event,
         TP_PROTO(
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c

index 612f0a6..f555d33 100644 (file)
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1799,7 +1799,6 @@ call_allocate(struct rpc_task *task)
  
         status = xprt->ops->buf_alloc(task);
         trace_rpc_buf_alloc(task, status);
-       xprt_inject_disconnect(xprt);
         if (status == 0)
                 return;
         if (status != -ENOMEM) {
@@ -2457,12 +2456,6 @@ call_decode(struct rpc_task *task)
                 task->tk_flags &= ~RPC_CALL_MAJORSEEN;
         }
  
-       /*
-        * Ensure that we see all writes made by xprt_complete_rqst()
-        * before it changed req->rq_reply_bytes_recvd.
-        */
-       smp_rmb();
-
         /*
          * Did we ever call xprt_complete_rqst()? If not, we should assume
          * the message is incomplete.
@@ -2471,6 +2464,11 @@ call_decode(struct rpc_task *task)
         if (!req->rq_reply_bytes_recvd)
                 goto out;
  
+       /* Ensure that we see all writes made by xprt_complete_rqst()
+        * before it changed req->rq_reply_bytes_recvd.
+        */
+       smp_rmb();
+
         req->rq_rcv_buf.len = req->rq_private_buf.len;
         trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf);
  
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c

index 38fe2ce..647b323 100644 (file)
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -344,13 +344,15 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
                                     const char *hostname,
                                     struct sockaddr *srvaddr, size_t salen,
                                     int proto, u32 version,
-                                   const struct cred *cred)
+                                   const struct cred *cred,
+                                   const struct rpc_timeout *timeo)
  {
         struct rpc_create_args args = {
                 .net            = net,
                 .protocol       = proto,
                 .address        = srvaddr,
                 .addrsize       = salen,
+               .timeout        = timeo,
                 .servername     = hostname,
                 .nodename       = nodename,
                 .program        = &rpcb_program,
@@ -705,7 +707,8 @@ void rpcb_getport_async(struct rpc_task *task)
                                 clnt->cl_nodename,
                                 xprt->servername, sap, salen,
                                 xprt->prot, bind_version,
-                               clnt->cl_cred);
+                               clnt->cl_cred,
+                               task->tk_client->cl_timeout);
         if (IS_ERR(rpcb_clnt)) {
                 status = PTR_ERR(rpcb_clnt);
                 goto bailout_nofree;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c

index 691ccf8..e5b5a96 100644 (file)
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -698,9 +698,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
         const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
         int status = 0;
  
-       if (time_before(jiffies, req->rq_minortimeo))
-               return status;
         if (time_before(jiffies, req->rq_majortimeo)) {
+               if (time_before(jiffies, req->rq_minortimeo))
+                       return status;
                 if (to->to_exponential)
                         req->rq_timeout <<= 1;
                 else
@@ -1352,6 +1352,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
                 list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
                 INIT_LIST_HEAD(&req->rq_xmit2);
  out:
+               atomic_long_inc(&xprt->xmit_queuelen);
                 set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
                 spin_unlock(&xprt->queue_lock);
         }
@@ -1381,6 +1382,7 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
                 }
         } else
                 list_del(&req->rq_xmit2);
+       atomic_long_dec(&req->rq_xprt->xmit_queuelen);
  }
  
  /**
@@ -1469,8 +1471,6 @@ bool xprt_prepare_transmit(struct rpc_task *task)
         struct rpc_xprt *xprt = req->rq_xprt;
  
         if (!xprt_lock_write(xprt, task)) {
-               trace_xprt_transmit_queued(xprt, task);
-
                 /* Race breaker: someone may have transmitted us */
                 if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
                         rpc_wake_up_queued_task_set_status(&xprt->sending,
@@ -1483,7 +1483,10 @@ bool xprt_prepare_transmit(struct rpc_task *task)
  
  void xprt_end_transmit(struct rpc_task *task)
  {
-       xprt_release_write(task->tk_rqstp->rq_xprt, task);
+       struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
+
+       xprt_inject_disconnect(xprt);
+       xprt_release_write(xprt, task);
  }
  
  /**
@@ -1537,8 +1540,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
                 return status;
         }
  
-       if (is_retrans)
+       if (is_retrans) {
                 task->tk_client->cl_stats->rpcretrans++;
+               trace_xprt_retransmit(req);
+       }
  
         xprt_inject_disconnect(xprt);
  
@@ -1885,7 +1890,6 @@ void xprt_release(struct rpc_task *task)
         spin_unlock(&xprt->transport_lock);
         if (req->rq_buffer)
                 xprt->ops->buf_free(task);
-       xprt_inject_disconnect(xprt);
         xdr_free_bvec(&req->rq_rcv_buf);
         xdr_free_bvec(&req->rq_snd_buf);
         if (req->rq_cred != NULL)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c

index a249837..1151efd 100644 (file)
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -155,9 +155,11 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
  void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
  {
         struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+       struct rpcrdma_rep *rep = req->rl_reply;
         struct rpc_xprt *xprt = rqst->rq_xprt;
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  
-       rpcrdma_recv_buffer_put(req->rl_reply);
+       rpcrdma_rep_put(&r_xprt->rx_buf, rep);
         req->rl_reply = NULL;
  
         spin_lock(&xprt->bc_pa_lock);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c

index 766a104..229fcc9 100644 (file)
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -49,20 +49,13 @@
  # define RPCDBG_FACILITY       RPCDBG_TRANS
  #endif
  
-/**
- * frwr_release_mr - Destroy one MR
- * @mr: MR allocated by frwr_mr_init
- *
- */
-void frwr_release_mr(struct rpcrdma_mr *mr)
+static void frwr_cid_init(struct rpcrdma_ep *ep,
+                         struct rpcrdma_mr *mr)
  {
-       int rc;
+       struct rpc_rdma_cid *cid = &mr->mr_cid;
  
-       rc = ib_dereg_mr(mr->frwr.fr_mr);
-       if (rc)
-               trace_xprtrdma_frwr_dereg(mr, rc);
-       kfree(mr->mr_sg);
-       kfree(mr);
+       cid->ci_queue_id = ep->re_attr.send_cq->res.id;
+       cid->ci_completion_id = mr->mr_ibmr->res.id;
  }
  
  static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
@@ -75,20 +68,22 @@ static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
         }
  }
  
-static void frwr_mr_recycle(struct rpcrdma_mr *mr)
+/**
+ * frwr_mr_release - Destroy one MR
+ * @mr: MR allocated by frwr_mr_init
+ *
+ */
+void frwr_mr_release(struct rpcrdma_mr *mr)
  {
-       struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
-       trace_xprtrdma_mr_recycle(mr);
-
-       frwr_mr_unmap(r_xprt, mr);
+       int rc;
  
-       spin_lock(&r_xprt->rx_buf.rb_lock);
-       list_del(&mr->mr_all);
-       r_xprt->rx_stats.mrs_recycled++;
-       spin_unlock(&r_xprt->rx_buf.rb_lock);
+       frwr_mr_unmap(mr->mr_xprt, mr);
  
-       frwr_release_mr(mr);
+       rc = ib_dereg_mr(mr->mr_ibmr);
+       if (rc)
+               trace_xprtrdma_frwr_dereg(mr, rc);
+       kfree(mr->mr_sg);
+       kfree(mr);
  }
  
  static void frwr_mr_put(struct rpcrdma_mr *mr)
@@ -144,10 +139,11 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
                 goto out_list_err;
  
         mr->mr_xprt = r_xprt;
-       mr->frwr.fr_mr = frmr;
+       mr->mr_ibmr = frmr;
         mr->mr_device = NULL;
         INIT_LIST_HEAD(&mr->mr_list);
-       init_completion(&mr->frwr.fr_linv_done);
+       init_completion(&mr->mr_linv_done);
+       frwr_cid_init(ep, mr);
  
         sg_init_table(sg, depth);
         mr->mr_sg = sg;
@@ -257,6 +253,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
         ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
         ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
         ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+       ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;
         ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
  
         ep->re_max_rdma_segs =
@@ -326,7 +323,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                 goto out_dmamap_err;
         mr->mr_device = ep->re_id->device;
  
-       ibmr = mr->frwr.fr_mr;
+       ibmr = mr->mr_ibmr;
         n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
         if (n != dma_nents)
                 goto out_mapmr_err;
@@ -336,7 +333,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
         key = (u8)(ibmr->rkey & 0x000000FF);
         ib_update_fast_reg_key(ibmr, ++key);
  
-       reg_wr = &mr->frwr.fr_regwr;
+       reg_wr = &mr->mr_regwr;
         reg_wr->mr = ibmr;
         reg_wr->key = ibmr->rkey;
         reg_wr->access = writing ?
@@ -364,29 +361,19 @@ out_mapmr_err:
   * @cq: completion queue
   * @wc: WCE for a completed FastReg WR
   *
+ * Each flushed MR gets destroyed after the QP has drained.
   */
  static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
  {
         struct ib_cqe *cqe = wc->wr_cqe;
-       struct rpcrdma_frwr *frwr =
-               container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+       struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
  
         /* WARNING: Only wr_cqe and status are reliable at this point */
-       trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid);
-       /* The MR will get recycled when the associated req is retransmitted */
+       trace_xprtrdma_wc_fastreg(wc, &mr->mr_cid);
  
         rpcrdma_flush_disconnect(cq->cq_context, wc);
  }
  
-static void frwr_cid_init(struct rpcrdma_ep *ep,
-                         struct rpcrdma_frwr *frwr)
-{
-       struct rpc_rdma_cid *cid = &frwr->fr_cid;
-
-       cid->ci_queue_id = ep->re_attr.send_cq->res.id;
-       cid->ci_completion_id = frwr->fr_mr->res.id;
-}
-
  /**
   * frwr_send - post Send WRs containing the RPC Call message
   * @r_xprt: controlling transport instance
@@ -403,27 +390,36 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
   */
  int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
  {
+       struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
         struct rpcrdma_ep *ep = r_xprt->rx_ep;
-       struct ib_send_wr *post_wr;
         struct rpcrdma_mr *mr;
+       unsigned int num_wrs;
  
-       post_wr = &req->rl_wr;
+       num_wrs = 1;
+       post_wr = send_wr;
         list_for_each_entry(mr, &req->rl_registered, mr_list) {
-               struct rpcrdma_frwr *frwr;
-
-               frwr = &mr->frwr;
-
-               frwr->fr_cqe.done = frwr_wc_fastreg;
-               frwr_cid_init(ep, frwr);
-               frwr->fr_regwr.wr.next = post_wr;
-               frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
-               frwr->fr_regwr.wr.num_sge = 0;
-               frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
-               frwr->fr_regwr.wr.send_flags = 0;
+               trace_xprtrdma_mr_fastreg(mr);
+
+               mr->mr_cqe.done = frwr_wc_fastreg;
+               mr->mr_regwr.wr.next = post_wr;
+               mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
+               mr->mr_regwr.wr.num_sge = 0;
+               mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
+               mr->mr_regwr.wr.send_flags = 0;
+               post_wr = &mr->mr_regwr.wr;
+               ++num_wrs;
+       }
  
-               post_wr = &frwr->fr_regwr.wr;
+       if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
+               send_wr->send_flags |= IB_SEND_SIGNALED;
+               ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
+                                         num_wrs - ep->re_send_count);
+       } else {
+               send_wr->send_flags &= ~IB_SEND_SIGNALED;
+               ep->re_send_count -= num_wrs;
         }
  
+       trace_xprtrdma_post_send(req);
         return ib_post_send(ep->re_id->qp, post_wr, NULL);
  }
  
@@ -440,6 +436,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
         list_for_each_entry(mr, mrs, mr_list)
                 if (mr->mr_handle == rep->rr_inv_rkey) {
                         list_del_init(&mr->mr_list);
+                       trace_xprtrdma_mr_reminv(mr);
                         frwr_mr_put(mr);
                         break;  /* only one invalidated MR per RPC */
                 }
@@ -447,9 +444,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
  
  static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
  {
-       if (wc->status != IB_WC_SUCCESS)
-               frwr_mr_recycle(mr);
-       else
+       if (likely(wc->status == IB_WC_SUCCESS))
                 frwr_mr_put(mr);
  }
  
@@ -462,12 +457,10 @@ static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
  static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
  {
         struct ib_cqe *cqe = wc->wr_cqe;
-       struct rpcrdma_frwr *frwr =
-               container_of(cqe, struct rpcrdma_frwr, fr_cqe);
-       struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+       struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
  
         /* WARNING: Only wr_cqe and status are reliable at this point */
-       trace_xprtrdma_wc_li(wc, &frwr->fr_cid);
+       trace_xprtrdma_wc_li(wc, &mr->mr_cid);
         frwr_mr_done(wc, mr);
  
         rpcrdma_flush_disconnect(cq->cq_context, wc);
@@ -483,14 +476,12 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
  static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
  {
         struct ib_cqe *cqe = wc->wr_cqe;
-       struct rpcrdma_frwr *frwr =
-               container_of(cqe, struct rpcrdma_frwr, fr_cqe);
-       struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+       struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
  
         /* WARNING: Only wr_cqe and status are reliable at this point */
-       trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid);
+       trace_xprtrdma_wc_li_wake(wc, &mr->mr_cid);
         frwr_mr_done(wc, mr);
-       complete(&frwr->fr_linv_done);
+       complete(&mr->mr_linv_done);
  
         rpcrdma_flush_disconnect(cq->cq_context, wc);
  }
@@ -511,7 +502,6 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
         struct ib_send_wr *first, **prev, *last;
         struct rpcrdma_ep *ep = r_xprt->rx_ep;
         const struct ib_send_wr *bad_wr;
-       struct rpcrdma_frwr *frwr;
         struct rpcrdma_mr *mr;
         int rc;
  
@@ -520,35 +510,34 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
          * Chain the LOCAL_INV Work Requests and post them with
          * a single ib_post_send() call.
          */
-       frwr = NULL;
         prev = &first;
         while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
  
                 trace_xprtrdma_mr_localinv(mr);
                 r_xprt->rx_stats.local_inv_needed++;
  
-               frwr = &mr->frwr;
-               frwr->fr_cqe.done = frwr_wc_localinv;
-               frwr_cid_init(ep, frwr);
-               last = &frwr->fr_invwr;
+               last = &mr->mr_invwr;
                 last->next = NULL;
-               last->wr_cqe = &frwr->fr_cqe;
+               last->wr_cqe = &mr->mr_cqe;
                 last->sg_list = NULL;
                 last->num_sge = 0;
                 last->opcode = IB_WR_LOCAL_INV;
                 last->send_flags = IB_SEND_SIGNALED;
                 last->ex.invalidate_rkey = mr->mr_handle;
  
+               last->wr_cqe->done = frwr_wc_localinv;
+
                 *prev = last;
                 prev = &last->next;
         }
+       mr = container_of(last, struct rpcrdma_mr, mr_invwr);
  
         /* Strong send queue ordering guarantees that when the
          * last WR in the chain completes, all WRs in the chain
          * are complete.
          */
-       frwr->fr_cqe.done = frwr_wc_localinv_wake;
-       reinit_completion(&frwr->fr_linv_done);
+       last->wr_cqe->done = frwr_wc_localinv_wake;
+       reinit_completion(&mr->mr_linv_done);
  
         /* Transport disconnect drains the receive CQ before it
          * replaces the QP. The RPC reply handler won't call us
@@ -562,22 +551,12 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
          * not happen, so don't wait in that case.
          */
         if (bad_wr != first)
-               wait_for_completion(&frwr->fr_linv_done);
+               wait_for_completion(&mr->mr_linv_done);
         if (!rc)
                 return;
  
-       /* Recycle MRs in the LOCAL_INV chain that did not get posted.
-        */
+       /* On error, the MRs get destroyed once the QP has drained. */
         trace_xprtrdma_post_linv_err(req, rc);
-       while (bad_wr) {
-               frwr = container_of(bad_wr, struct rpcrdma_frwr,
-                                   fr_invwr);
-               mr = container_of(frwr, struct rpcrdma_mr, frwr);
-               bad_wr = bad_wr->next;
-
-               list_del_init(&mr->mr_list);
-               frwr_mr_recycle(mr);
-       }
  }
  
  /**
@@ -589,20 +568,24 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
  static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
  {
         struct ib_cqe *cqe = wc->wr_cqe;
-       struct rpcrdma_frwr *frwr =
-               container_of(cqe, struct rpcrdma_frwr, fr_cqe);
-       struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
-       struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
+       struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
+       struct rpcrdma_rep *rep;
  
         /* WARNING: Only wr_cqe and status are reliable at this point */
-       trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid);
-       frwr_mr_done(wc, mr);
+       trace_xprtrdma_wc_li_done(wc, &mr->mr_cid);
  
-       /* Ensure @rep is generated before frwr_mr_done */
+       /* Ensure that @rep is generated before the MR is released */
+       rep = mr->mr_req->rl_reply;
         smp_rmb();
-       rpcrdma_complete_rqst(rep);
  
-       rpcrdma_flush_disconnect(cq->cq_context, wc);
+       if (wc->status != IB_WC_SUCCESS) {
+               if (rep)
+                       rpcrdma_unpin_rqst(rep);
+               rpcrdma_flush_disconnect(cq->cq_context, wc);
+               return;
+       }
+       frwr_mr_put(mr);
+       rpcrdma_complete_rqst(rep);
  }
  
  /**
@@ -619,33 +602,29 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
  {
         struct ib_send_wr *first, *last, **prev;
         struct rpcrdma_ep *ep = r_xprt->rx_ep;
-       const struct ib_send_wr *bad_wr;
-       struct rpcrdma_frwr *frwr;
         struct rpcrdma_mr *mr;
         int rc;
  
         /* Chain the LOCAL_INV Work Requests and post them with
          * a single ib_post_send() call.
          */
-       frwr = NULL;
         prev = &first;
         while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
  
                 trace_xprtrdma_mr_localinv(mr);
                 r_xprt->rx_stats.local_inv_needed++;
  
-               frwr = &mr->frwr;
-               frwr->fr_cqe.done = frwr_wc_localinv;
-               frwr_cid_init(ep, frwr);
-               last = &frwr->fr_invwr;
+               last = &mr->mr_invwr;
                 last->next = NULL;
-               last->wr_cqe = &frwr->fr_cqe;
+               last->wr_cqe = &mr->mr_cqe;
                 last->sg_list = NULL;
                 last->num_sge = 0;
                 last->opcode = IB_WR_LOCAL_INV;
                 last->send_flags = IB_SEND_SIGNALED;
                 last->ex.invalidate_rkey = mr->mr_handle;
  
+               last->wr_cqe->done = frwr_wc_localinv;
+
                 *prev = last;
                 prev = &last->next;
         }
@@ -655,31 +634,23 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
          * are complete. The last completion will wake up the
          * RPC waiter.
          */
-       frwr->fr_cqe.done = frwr_wc_localinv_done;
+       last->wr_cqe->done = frwr_wc_localinv_done;
  
         /* Transport disconnect drains the receive CQ before it
          * replaces the QP. The RPC reply handler won't call us
          * unless re_id->qp is a valid pointer.
          */
-       bad_wr = NULL;
-       rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
+       rc = ib_post_send(ep->re_id->qp, first, NULL);
         if (!rc)
                 return;
  
-       /* Recycle MRs in the LOCAL_INV chain that did not get posted.
-        */
+       /* On error, the MRs get destroyed once the QP has drained. */
         trace_xprtrdma_post_linv_err(req, rc);
-       while (bad_wr) {
-               frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
-               mr = container_of(frwr, struct rpcrdma_mr, frwr);
-               bad_wr = bad_wr->next;
-
-               frwr_mr_recycle(mr);
-       }
  
         /* The final LOCAL_INV WR in the chain is supposed to
-        * do the wake. If it was never posted, the wake will
-        * not happen, so wake here in that case.
+        * do the wake. If it was never posted, the wake does
+        * not happen. Unpin the rqst in preparation for its
+        * retransmission.
          */
-       rpcrdma_complete_rqst(req->rl_reply);
+       rpcrdma_unpin_rqst(req->rl_reply);
  }
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c

index 292f066..649f7d8 100644 (file)
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1326,9 +1326,35 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
         return -EIO;
  }
  
-/* Perform XID lookup, reconstruction of the RPC reply, and
- * RPC completion while holding the transport lock to ensure
- * the rep, rqst, and rq_task pointers remain stable.
+/**
+ * rpcrdma_unpin_rqst - Release rqst without completing it
+ * @rep: RPC/RDMA Receive context
+ *
+ * This is done when a connection is lost so that a Reply
+ * can be dropped and its matching Call can be subsequently
+ * retransmitted on a new connection.
+ */
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
+{
+       struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
+       struct rpc_rqst *rqst = rep->rr_rqst;
+       struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+
+       req->rl_reply = NULL;
+       rep->rr_rqst = NULL;
+
+       spin_lock(&xprt->queue_lock);
+       xprt_unpin_rqst(rqst);
+       spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * rpcrdma_complete_rqst - Pass completed rqst back to RPC
+ * @rep: RPC/RDMA Receive context
+ *
+ * Reconstruct the RPC reply and complete the transaction
+ * while @rqst is still pinned to ensure the rep, rqst, and
+ * rq_task pointers remain stable.
   */
  void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
  {
@@ -1430,13 +1456,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
                 credits = 1;    /* don't deadlock */
         else if (credits > r_xprt->rx_ep->re_max_requests)
                 credits = r_xprt->rx_ep->re_max_requests;
+       rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
+                          false);
         if (buf->rb_credits != credits)
                 rpcrdma_update_cwnd(r_xprt, credits);
-       rpcrdma_post_recvs(r_xprt, false);
  
         req = rpcr_to_rdmar(rqst);
         if (unlikely(req->rl_reply))
-               rpcrdma_recv_buffer_put(req->rl_reply);
+               rpcrdma_rep_put(buf, req->rl_reply);
         req->rl_reply = rep;
         rep->rr_rqst = rqst;
  
@@ -1464,5 +1491,5 @@ out_shortreply:
         trace_xprtrdma_reply_short_err(rep);
  
  out:
-       rpcrdma_recv_buffer_put(rep);
+       rpcrdma_rep_put(buf, rep);
  }
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c

index 78d29d1..0995359 100644 (file)
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -262,8 +262,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
   * xprt_rdma_inject_disconnect - inject a connection fault
   * @xprt: transport context
   *
- * If @xprt is connected, disconnect it to simulate spurious connection
- * loss.
+ * If @xprt is connected, disconnect it to simulate spurious
+ * connection loss. Caller must hold @xprt's send lock to
+ * ensure that data structures and hardware resources are
+ * stable during the rdma_disconnect() call.
   */
  static void
  xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c

index ec912cf..1e965a3 100644 (file)
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -101,6 +101,12 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
         struct rpcrdma_ep *ep = r_xprt->rx_ep;
         struct rdma_cm_id *id = ep->re_id;
  
+       /* Wait for rpcrdma_post_recvs() to leave its critical
+        * section.
+        */
+       if (atomic_inc_return(&ep->re_receiving) > 1)
+               wait_for_completion(&ep->re_done);
+
         /* Flush Receives, then wait for deferred Reply work
          * to complete.
          */
@@ -114,22 +120,6 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
         rpcrdma_ep_put(ep);
  }
  
-/**
- * rpcrdma_qp_event_handler - Handle one QP event (error notification)
- * @event: details of the event
- * @context: ep that owns QP where event occurred
- *
- * Called from the RDMA provider (device driver) possibly in an interrupt
- * context. The QP is always destroyed before the ID, so the ID will be
- * reliably available when this handler is invoked.
- */
-static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
-{
-       struct rpcrdma_ep *ep = context;
-
-       trace_xprtrdma_qp_event(ep, event);
-}
-
  /* Ensure xprt_force_disconnect() is invoked exactly once when a
   * connection is closed or lost. (The important thing is it needs
   * to be invoked "at least" once).
@@ -205,7 +195,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
  
  out_flushed:
         rpcrdma_flush_disconnect(r_xprt, wc);
-       rpcrdma_rep_destroy(rep);
+       rpcrdma_rep_put(&r_xprt->rx_buf, rep);
  }
  
  static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
@@ -414,6 +404,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
         __module_get(THIS_MODULE);
         device = id->device;
         ep->re_id = id;
+       reinit_completion(&ep->re_done);
  
         ep->re_max_requests = r_xprt->rx_xprt.max_reqs;
         ep->re_inline_send = xprt_rdma_max_inline_write;
@@ -424,8 +415,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
  
         r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests);
  
-       ep->re_attr.event_handler = rpcrdma_qp_event_handler;
-       ep->re_attr.qp_context = ep;
         ep->re_attr.srq = NULL;
         ep->re_attr.cap.max_inline_data = 0;
         ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -535,7 +524,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
          * outstanding Receives.
          */
         rpcrdma_ep_get(ep);
-       rpcrdma_post_recvs(r_xprt, true);
+       rpcrdma_post_recvs(r_xprt, 1, true);
  
         rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
         if (rc)
@@ -954,13 +943,11 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
                 rpcrdma_req_reset(req);
  }
  
-/* No locking needed here. This function is called only by the
- * Receive completion handler.
- */
  static noinline
  struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
                                        bool temp)
  {
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
         struct rpcrdma_rep *rep;
  
         rep = kzalloc(sizeof(*rep), GFP_KERNEL);
@@ -987,7 +974,10 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
         rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
         rep->rr_recv_wr.num_sge = 1;
         rep->rr_temp = temp;
-       list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps);
+
+       spin_lock(&buf->rb_lock);
+       list_add(&rep->rr_all, &buf->rb_all_reps);
+       spin_unlock(&buf->rb_lock);
         return rep;
  
  out_free_regbuf:
@@ -998,16 +988,23 @@ out:
         return NULL;
  }
  
-/* No locking needed here. This function is invoked only by the
- * Receive completion handler, or during transport shutdown.
- */
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
  {
-       list_del(&rep->rr_all);
         rpcrdma_regbuf_free(rep->rr_rdmabuf);
         kfree(rep);
  }
  
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+       struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
+
+       spin_lock(&buf->rb_lock);
+       list_del(&rep->rr_all);
+       spin_unlock(&buf->rb_lock);
+
+       rpcrdma_rep_free(rep);
+}
+
  static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
  {
         struct llist_node *node;
@@ -1019,12 +1016,21 @@ static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
         return llist_entry(node, struct rpcrdma_rep, rr_node);
  }
  
-static void rpcrdma_rep_put(struct rpcrdma_buffer *buf,
-                           struct rpcrdma_rep *rep)
+/**
+ * rpcrdma_rep_put - Release rpcrdma_rep back to free list
+ * @buf: buffer pool
+ * @rep: rep to release
+ *
+ */
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep)
  {
         llist_add(&rep->rr_node, &buf->rb_free_reps);
  }
  
+/* Caller must ensure the QP is quiescent (RQ is drained) before
+ * invoking this function, to guarantee rb_all_reps is not
+ * changing.
+ */
  static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
  {
         struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@@ -1032,7 +1038,7 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
  
         list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
                 rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
-               rep->rr_temp = true;
+               rep->rr_temp = true;    /* Mark this rep for destruction */
         }
  }
  
@@ -1040,8 +1046,18 @@ static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
  {
         struct rpcrdma_rep *rep;
  
-       while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
-               rpcrdma_rep_destroy(rep);
+       spin_lock(&buf->rb_lock);
+       while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
+                                              struct rpcrdma_rep,
+                                              rr_all)) != NULL) {
+               list_del(&rep->rr_all);
+               spin_unlock(&buf->rb_lock);
+
+               rpcrdma_rep_free(rep);
+
+               spin_lock(&buf->rb_lock);
+       }
+       spin_unlock(&buf->rb_lock);
  }
  
  /**
@@ -1104,7 +1120,7 @@ void rpcrdma_req_destroy(struct rpcrdma_req *req)
                 list_del(&mr->mr_all);
                 spin_unlock(&buf->rb_lock);
  
-               frwr_release_mr(mr);
+               frwr_mr_release(mr);
         }
  
         rpcrdma_regbuf_free(req->rl_recvbuf);
@@ -1135,7 +1151,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt)
                 list_del(&mr->mr_all);
                 spin_unlock(&buf->rb_lock);
  
-               frwr_release_mr(mr);
+               frwr_mr_release(mr);
  
                 spin_lock(&buf->rb_lock);
         }
@@ -1221,17 +1237,6 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
         spin_unlock(&buffers->rb_lock);
  }
  
-/**
- * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
- * @rep: rep to release
- *
- * Used after error conditions.
- */
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
-{
-       rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
-}
-
  /* Returns a pointer to a rpcrdma_regbuf object, or NULL.
   *
   * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
@@ -1342,21 +1347,7 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
   */
  int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
  {
-       struct ib_send_wr *send_wr = &req->rl_wr;
-       struct rpcrdma_ep *ep = r_xprt->rx_ep;
-       int rc;
-
-       if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) {
-               send_wr->send_flags |= IB_SEND_SIGNALED;
-               ep->re_send_count = ep->re_send_batch;
-       } else {
-               send_wr->send_flags &= ~IB_SEND_SIGNALED;
-               --ep->re_send_count;
-       }
-
-       trace_xprtrdma_post_send(req);
-       rc = frwr_send(r_xprt, req);
-       if (rc)
+       if (frwr_send(r_xprt, req))
                 return -ENOTCONN;
         return 0;
  }
@@ -1364,27 +1355,30 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
  /**
   * rpcrdma_post_recvs - Refill the Receive Queue
   * @r_xprt: controlling transport instance
- * @temp: mark Receive buffers to be deleted after use
+ * @needed: current credit grant
+ * @temp: mark Receive buffers to be deleted after one use
   *
   */
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
  {
         struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
         struct rpcrdma_ep *ep = r_xprt->rx_ep;
         struct ib_recv_wr *wr, *bad_wr;
         struct rpcrdma_rep *rep;
-       int needed, count, rc;
+       int count, rc;
  
         rc = 0;
         count = 0;
  
-       needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
         if (likely(ep->re_receive_count > needed))
                 goto out;
         needed -= ep->re_receive_count;
         if (!temp)
                 needed += RPCRDMA_MAX_RECV_BATCH;
  
+       if (atomic_inc_return(&ep->re_receiving) > 1)
+               goto out;
+
         /* fast path: all needed reps can be found on the free list */
         wr = NULL;
         while (needed) {
@@ -1410,6 +1404,9 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
  
         rc = ib_post_recv(ep->re_id->qp, wr,
                           (const struct ib_recv_wr **)&bad_wr);
+       if (atomic_dec_return(&ep->re_receiving) > 0)
+               complete(&ep->re_done);
+
  out:
         trace_xprtrdma_post_recvs(r_xprt, count, rc);
         if (rc) {
@@ -1418,7 +1415,7 @@ out:
  
                         rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
                         wr = wr->next;
-                       rpcrdma_recv_buffer_put(rep);
+                       rpcrdma_rep_put(buf, rep);
                         --count;
                 }
         }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h

index fe3be98..436ad73 100644 (file)
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -83,6 +83,7 @@ struct rpcrdma_ep {
         unsigned int            re_max_inline_recv;
         int                     re_async_rc;
         int                     re_connect_status;
+       atomic_t                re_receiving;
         atomic_t                re_force_disconnect;
         struct ib_qp_init_attr  re_attr;
         wait_queue_head_t       re_connect_wait;
@@ -228,31 +229,28 @@ struct rpcrdma_sendctx {
   * An external memory region is any buffer or page that is registered
   * on the fly (ie, not pre-registered).
   */
-struct rpcrdma_frwr {
-       struct ib_mr                    *fr_mr;
-       struct ib_cqe                   fr_cqe;
-       struct rpc_rdma_cid             fr_cid;
-       struct completion               fr_linv_done;
-       union {
-               struct ib_reg_wr        fr_regwr;
-               struct ib_send_wr       fr_invwr;
-       };
-};
-
  struct rpcrdma_req;
  struct rpcrdma_mr {
         struct list_head        mr_list;
         struct rpcrdma_req      *mr_req;
+
+       struct ib_mr            *mr_ibmr;
         struct ib_device        *mr_device;
         struct scatterlist      *mr_sg;
         int                     mr_nents;
         enum dma_data_direction mr_dir;
-       struct rpcrdma_frwr     frwr;
+       struct ib_cqe           mr_cqe;
+       struct completion       mr_linv_done;
+       union {
+               struct ib_reg_wr        mr_regwr;
+               struct ib_send_wr       mr_invwr;
+       };
         struct rpcrdma_xprt     *mr_xprt;
         u32                     mr_handle;
         u32                     mr_length;
         u64                     mr_offset;
         struct list_head        mr_all;
+       struct rpc_rdma_cid     mr_cid;
  };
  
  /*
@@ -461,7 +459,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
  void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
  
  int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
  
  /*
   * Buffer calls - xprtrdma/verbs.c
@@ -480,7 +478,7 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
  struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
  void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
                         struct rpcrdma_req *req);
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
  
  bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
                             gfp_t flags);
@@ -527,7 +525,7 @@ rpcrdma_data_dir(bool writing)
  void frwr_reset(struct rpcrdma_req *req);
  int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device);
  int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr);
-void frwr_release_mr(struct rpcrdma_mr *mr);
+void frwr_mr_release(struct rpcrdma_mr *mr);
  struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
                                 struct rpcrdma_mr_seg *seg,
                                 int nsegs, bool writing, __be32 xid,
@@ -560,6 +558,7 @@ int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
  void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep);
  void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt);
  void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep);
  void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
  
  static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c

index e35760f..47aa47a 100644 (file)
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -558,6 +558,10 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
         struct rpc_rqst *req;
         ssize_t ret;
  
+       /* Is this transport associated with the backchannel? */
+       if (!xprt->bc_serv)
+               return -ESHUTDOWN;
+
         /* Look up and lock the request corresponding to the given XID */
         req = xprt_lookup_bc_request(xprt, transport->recv.xid);
         if (!req) {
@@ -1018,6 +1022,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
          * to cope with writespace callbacks arriving _after_ we have
          * called sendmsg(). */
         req->rq_xtime = ktime_get();
+       tcp_sock_set_cork(transport->inet, true);
         while (1) {
                 status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
                                            transport->xmit.offset, rm, &sent);
@@ -1032,6 +1037,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
                 if (likely(req->rq_bytes_sent >= msglen)) {
                         req->rq_xmit_bytes_sent += transport->xmit.offset;
                         transport->xmit.offset = 0;
+                       if (atomic_long_read(&xprt->xmit_queuelen) == 1)
+                               tcp_sock_set_cork(transport->inet, false);
                         return 0;
                 }
  
@@ -2163,6 +2170,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                 }
  
                 xs_tcp_set_socket_timeouts(xprt, sock);
+               tcp_sock_set_nodelay(sk);
  
                 write_lock_bh(&sk->sk_callback_lock);
  
@@ -2177,7 +2185,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
  
                 /* socket options */
                 sock_reset_flag(sk, SOCK_LINGER);
-               tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
  
                 xprt_clear_connected(xprt);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 7 May 2021 18:23:41 +0000 (11:23 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 7 May 2021 18:23:41 +0000 (11:23 -0700)
fs/Kconfig		patch \| blob \| history
fs/nfs/callback_proc.c		patch \| blob \| history
fs/nfs/client.c		patch \| blob \| history
fs/nfs/delegation.c		patch \| blob \| history
fs/nfs/delegation.h		patch \| blob \| history
fs/nfs/dir.c		patch \| blob \| history
fs/nfs/export.c		patch \| blob \| history
fs/nfs/file.c		patch \| blob \| history
fs/nfs/flexfilelayout/flexfilelayout.c		patch \| blob \| history
fs/nfs/fs_context.c		patch \| blob \| history
fs/nfs/inode.c		patch \| blob \| history
fs/nfs/internal.h		patch \| blob \| history
fs/nfs/io.c		patch \| blob \| history
fs/nfs/mount_clnt.c		patch \| blob \| history
fs/nfs/nfs3acl.c		patch \| blob \| history
fs/nfs/nfs3xdr.c		patch \| blob \| history
fs/nfs/nfs42proc.c		patch \| blob \| history
fs/nfs/nfs42xattr.c		patch \| blob \| history
fs/nfs/nfs4file.c		patch \| blob \| history
fs/nfs/nfs4proc.c		patch \| blob \| history
fs/nfs/nfs4state.c		patch \| blob \| history
fs/nfs/nfs4trace.h		patch \| blob \| history
fs/nfs/nfs4xdr.c		patch \| blob \| history
fs/nfs/nfstrace.c		patch \| blob \| history
fs/nfs/nfstrace.h		patch \| blob \| history
fs/nfs/pagelist.c		patch \| blob \| history
fs/nfs/pnfs.c		patch \| blob \| history
fs/nfs/proc.c		patch \| blob \| history
fs/nfs/super.c		patch \| blob \| history
fs/nfs/write.c		patch \| blob \| history
fs/nfsd/Kconfig		patch \| blob \| history
include/linux/nfs4.h		patch \| blob \| history
include/linux/nfs_fs.h		patch \| blob \| history
include/linux/nfs_fs_sb.h		patch \| blob \| history
include/linux/nfs_xdr.h		patch \| blob \| history
include/linux/sunrpc/xprt.h		patch \| blob \| history
include/trace/events/rpcrdma.h		patch \| blob \| history
include/trace/events/sunrpc.h		patch \| blob \| history
net/sunrpc/clnt.c		patch \| blob \| history
net/sunrpc/rpcb_clnt.c		patch \| blob \| history
net/sunrpc/xprt.c		patch \| blob \| history
net/sunrpc/xprtrdma/backchannel.c		patch \| blob \| history
net/sunrpc/xprtrdma/frwr_ops.c		patch \| blob \| history
net/sunrpc/xprtrdma/rpc_rdma.c		patch \| blob \| history
net/sunrpc/xprtrdma/transport.c		patch \| blob \| history
net/sunrpc/xprtrdma/verbs.c		patch \| blob \| history
net/sunrpc/xprtrdma/xprt_rdma.h		patch \| blob \| history
net/sunrpc/xprtsock.c		patch \| blob \| history