default y
config NFS_V4_2_SSC_HELPER
- tristate
- default y if NFS_V4=y || NFS_FS=y
+ bool
+ default y if NFS_V4_2
source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig"
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
if (!pnfs_layout_is_valid(lo))
continue;
- if (stateid != NULL &&
- !nfs4_stateid_match_other(stateid, &lo->plh_stateid))
+ if (!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
continue;
- if (!nfs_sb_active(server->super))
- continue;
- inode = igrab(lo->plh_inode);
+ if (nfs_sb_active(server->super))
+ inode = igrab(lo->plh_inode);
+ else
+ inode = ERR_PTR(-EAGAIN);
rcu_read_unlock();
if (inode)
return inode;
continue;
if (nfsi->layout != lo)
continue;
- if (!nfs_sb_active(server->super))
- continue;
- inode = igrab(lo->plh_inode);
+ if (nfs_sb_active(server->super))
+ inode = igrab(lo->plh_inode);
+ else
+ inode = ERR_PTR(-EAGAIN);
rcu_read_unlock();
if (inode)
return inode;
to->to_maxval = to->to_initval;
to->to_exponential = 0;
break;
-#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
case XPRT_TRANSPORT_UDP:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_UDP_RETRANS;
to->to_maxval = NFS_MAX_UDP_TIMEOUT;
to->to_exponential = 1;
break;
-#endif
default:
BUG();
}
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
server->options = ctx->options;
- server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
- NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
- NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
+ server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+
+ switch (clp->rpc_ops->version) {
+ case 2:
+ server->fattr_valid = NFS_ATTR_FATTR_V2;
+ break;
+ case 3:
+ server->fattr_valid = NFS_ATTR_FATTR_V3;
+ break;
+ default:
+ server->fattr_valid = NFS_ATTR_FATTR_V4;
+ }
if (ctx->rsize)
server->rsize = nfs_block_size(ctx->rsize, NULL);
server->maxfilesize = fsinfo->maxfilesize;
server->time_delta = fsinfo->time_delta;
+ server->change_attr_type = fsinfo->change_attr_type;
server->clone_blksize = fsinfo->clone_blksize;
/* We're airborne Set socket buffersize */
return NULL;
}
+ server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+
ida_init(&server->openowner_id);
ida_init(&server->lockowner_id);
pnfs_init_server(server);
return ret;
}
/**
- * nfs_have_delegation - check if inode has a delegation, mark it
+ * nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
* @flags: delegation types to check for
if (freeme == NULL)
goto out;
add_new:
+ /*
+ * If we didn't revalidate the change attribute before setting
+ * the delegation, then pre-emptively ask for a full attribute
+ * cache revalidation.
+ */
+ spin_lock(&inode->i_lock);
+ if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_CHANGE)
+ nfs_set_cache_invalid(inode,
+ NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
+ NFS_INO_INVALID_OTHER | NFS_INO_INVALID_DATA |
+ NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
+ NFS_INO_INVALID_XATTR);
+ spin_unlock(&inode->i_lock);
+
list_add_tail_rcu(&delegation->super_list, &server->delegations);
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
atomic_long_inc(&nfs_active_delegations);
trace_nfs4_set_delegation(inode, type);
-
- spin_lock(&inode->i_lock);
- if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME))
- NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED;
- spin_unlock(&inode->i_lock);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
}
/**
- * nfs_inode_return_delegation - synchronously return a delegation
+ * nfs4_inode_return_delegation - synchronously return a delegation
* @inode: inode to process
*
* This routine will always flush any dirty data to disk on the
}
/**
- * nfs_inode_return_delegation_on_close - asynchronously return a delegation
+ * nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process
*
* This routine is called on file close in order to determine if the
}
/**
- * nfs_super_return_all_delegations - return delegations for one superblock
+ * nfs_server_return_all_delegations - return delegations for one superblock
* @server: pointer to nfs_server to process
*
*/
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
- return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) &&
- !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED);
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
}
#endif
break;
}
+ verf_arg = verf_res;
+
status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays);
} while (!status && nfs_readdir_page_needs_filling(page));
}
return res;
}
- memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
+ /*
+ * Set the cookie verifier if the page cache was empty
+ */
+ if (desc->page_index == 0)
+ memcpy(nfsi->cookieverf, verf,
+ sizeof(nfsi->cookieverf));
}
res = nfs_readdir_search_array(desc);
if (res == 0) {
/*
* Once we've found the start of the dirent within a page: fill 'er up...
*/
-static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
+static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
+ const __be32 *verf)
{
struct file *file = desc->file;
- struct nfs_inode *nfsi = NFS_I(file_inode(file));
struct nfs_cache_array *array;
unsigned int i = 0;
desc->eof = true;
break;
}
- memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
+ memcpy(desc->verf, verf, sizeof(desc->verf));
if (i < (array->size-1))
desc->dir_cookie = array->array[i+1].cookie;
else
for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
desc->page = arrays[i];
- nfs_do_filldir(desc);
+ nfs_do_filldir(desc, verf);
}
desc->page = NULL;
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
+ struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_dir_context *dir_ctx = file->private_data;
struct nfs_readdir_descriptor *desc;
int res;
break;
}
if (res == -ETOOSMALL && desc->plus) {
- clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
+ clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
nfs_zap_caches(inode);
desc->page_index = 0;
desc->plus = false;
if (res < 0)
break;
- nfs_do_filldir(desc);
+ nfs_do_filldir(desc, nfsi->cookieverf);
nfs_readdir_page_unlock_and_put_cached(desc);
} while (!desc->eof);
NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
nfs_set_cache_invalid(
inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
- NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED);
+ NFS_INO_INVALID_NLINK);
spin_unlock(&inode->i_lock);
}
if (S_ISDIR(inode->i_mode))
return 0;
- if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) {
+ if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
ret = __nfs_revalidate_inode(server, inode);
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
- res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
+ NFS_INO_INVALID_OTHER);
if (res == 0)
res = generic_permission(&init_user_ns, inode, mask);
goto out;
static u64 nfs_fetch_iversion(struct inode *inode)
{
- struct nfs_server *server = NFS_SERVER(inode);
-
- /* Is this the right call?: */
- nfs_revalidate_inode(server, inode);
- /*
- * Also, note we're ignoring any returned error. That seems to be
- * the practice for cache consistency information elsewhere in
- * the server, but I'm not sure why.
- */
- if (server->nfs_client->rpc_ops->version >= 4)
- return inode_peek_iversion_raw(inode);
- else
- return time_to_chattr(&inode->i_ctime);
+ nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
+ return inode_peek_iversion_raw(inode);
}
const struct export_operations nfs_export_ops = {
if (filp->f_flags & O_DIRECT)
goto force_reval;
- if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE))
+ if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE))
goto force_reval;
return 0;
force_reval:
if (unlikely(!p))
return -ENOBUFS;
fh->size = be32_to_cpup(p++);
- if (fh->size > sizeof(struct nfs_fh)) {
+ if (fh->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
fh->size);
return -EOVERFLOW;
return 0;
}
+#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
+static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
+{
+ return true;
+}
+#else
+static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
+{
+ if (ctx->version == 4)
+ return true;
+ return false;
+}
+#endif
+
/*
* Sanity check the NFS transport protocol.
- *
*/
-static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
+static int nfs_validate_transport_protocol(struct fs_context *fc,
+ struct nfs_fs_context *ctx)
{
switch (ctx->nfs_server.protocol) {
case XPRT_TRANSPORT_UDP:
+ if (nfs_server_transport_udp_invalid(ctx))
+ goto out_invalid_transport_udp;
+ break;
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_RDMA:
break;
default:
ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
}
+ return 0;
+out_invalid_transport_udp:
+ return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
}
/*
*/
static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx)
{
- nfs_validate_transport_protocol(ctx);
-
if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP ||
ctx->mount_server.protocol == XPRT_TRANSPORT_TCP)
return;
struct nfs_fh *mntfh = ctx->mntfh;
struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
+ int ret;
if (data == NULL)
goto out_no_data;
memset(mntfh->data + mntfh->size, 0,
sizeof(mntfh->data) - mntfh->size);
+ /*
+ * for proto == XPRT_TRANSPORT_UDP, which is what uses
+ * to_exponential, implying shift: limit the shift value
+ * to BITS_PER_LONG (majortimeo is unsigned long)
+ */
+ if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */
+ if (data->retrans >= 64) /* shift value is too large */
+ goto out_invalid_data;
+
/*
* Translate to nfs_fs_context, which nfs_fill_super
* can deal with.
goto generic;
}
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
+
ctx->skip_reconfig_option_check = true;
return 0;
out_invalid_fh:
return nfs_invalf(fc, "NFS: invalid root filehandle");
+
+out_invalid_data:
+ return nfs_invalf(fc, "NFS: invalid binary mount data");
}
#if IS_ENABLED(CONFIG_NFS_V4)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ int ret;
char *c;
if (!data) {
ctx->acdirmin = data->acdirmin;
ctx->acdirmax = data->acdirmax;
ctx->nfs_server.protocol = data->proto;
- nfs_validate_transport_protocol(ctx);
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
done:
ctx->skip_reconfig_option_check = true;
return 0;
out_no_address:
return nfs_invalf(fc, "NFS4: mount program didn't pass remote address");
-
-out_invalid_transport_udp:
- return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
}
#endif
if (!nfs_verify_server_address(sap))
goto out_no_address;
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
+
if (ctx->version == 4) {
if (IS_ENABLED(CONFIG_NFS_V4)) {
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
port = NFS_PORT;
max_namelen = NFS4_MAXNAMLEN;
max_pathlen = NFS4_MAXPATHLEN;
- nfs_validate_transport_protocol(ctx);
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL |
NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK |
NFS_MOUNT_LOCAL_FCNTL);
}
} else {
nfs_set_mount_transport_protocol(ctx);
-#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
-#endif
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
port = NFS_RDMA_PORT;
}
out_v4_not_compiled:
nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel");
return -EPROTONOSUPPORT;
-out_invalid_transport_udp:
- return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
out_no_address:
return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
out_mountproto_mismatch:
return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
}
-static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags)
+static bool nfs_check_cache_flags_invalid(struct inode *inode,
+ unsigned long flags)
{
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- /* Special case for the pagecache or access cache */
- if (flags == NFS_INO_REVAL_PAGECACHE &&
- !(cache_validity & NFS_INO_REVAL_FORCED))
- return false;
return (cache_validity & flags) != 0;
}
-static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags)
-{
- unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
-
- if ((cache_validity & flags) != 0)
- return true;
- if (nfs_attribute_timeout(inode))
- return true;
- return false;
-}
-
bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
{
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
- return nfs_check_cache_invalid_delegated(inode, flags);
-
- return nfs_check_cache_invalid_not_delegated(inode, flags);
+ if (nfs_check_cache_flags_invalid(inode, flags))
+ return true;
+ return nfs_attribute_cache_expired(inode);
}
EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
if (have_delegation) {
if (!(flags & NFS_INO_REVAL_FORCED))
- flags &= ~NFS_INO_INVALID_OTHER;
- flags &= ~(NFS_INO_INVALID_CHANGE
- | NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_INVALID_XATTR);
- }
+ flags &= ~(NFS_INO_INVALID_MODE |
+ NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_XATTR);
+ flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
+ } else if (flags & NFS_INO_REVAL_PAGECACHE)
+ flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE;
if (!nfs_has_xattr_cache(nfsi))
flags &= ~NFS_INO_INVALID_XATTR;
+ if (flags & NFS_INO_INVALID_DATA)
+ nfs_fscache_invalidate(inode);
if (inode->i_mapping->nrpages == 0)
flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
+ flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED);
nfsi->cache_validity |= flags;
- if (flags & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
}
EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
.fattr = fattr
};
struct inode *inode = ERR_PTR(-ENOENT);
+ u64 fattr_supported = NFS_SB(sb)->fattr_valid;
unsigned long hash;
nfs_attr_check_mountpoint(sb, fattr);
inode->i_mode = fattr->mode;
nfsi->cache_validity = 0;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
- && nfs_server_capable(inode, NFS_CAP_MODE))
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ && (fattr_supported & NFS_ATTR_FATTR_MODE))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
- else if (nfs_server_capable(inode, NFS_CAP_ATIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_ATIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
- else if (nfs_server_capable(inode, NFS_CAP_MTIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
- else if (nfs_server_capable(inode, NFS_CAP_CTIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode_set_iversion_raw(inode, fattr->change_attr);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE);
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink);
- else if (nfs_server_capable(inode, NFS_CAP_NLINK))
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
- else if (nfs_server_capable(inode, NFS_CAP_OWNER))
+ else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
- else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
+ else if (fattr_supported & NFS_ATTR_FATTR_GROUP)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
if (nfs_server_capable(inode, NFS_CAP_XATTR))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
+ else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED &&
+ fattr->size != 0)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
/*
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- }
-
- if (nfsi->cache_validity != 0)
- nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+ } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED &&
+ fattr->size != 0)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
nfs_setsecurity(inode, fattr, label);
}
/* Optimization: if the end result is no change, don't RPC */
- attr->ia_valid &= NFS_VALID_ATTRS;
- if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
+ if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
trace_nfs_setattr_enter(inode);
spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & ATTR_SIZE) != 0) {
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
+ NFS_INO_INVALID_BLOCKS);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME;
+ if ((attr->ia_valid & ATTR_KILL_SUID) != 0 &&
+ inode->i_mode & S_ISUID)
+ inode->i_mode &= ~S_ISUID;
+ if ((attr->ia_valid & ATTR_KILL_SGID) != 0 &&
+ (inode->i_mode & (S_ISGID | S_IXGRP)) ==
+ (S_ISGID | S_IXGRP))
+ inode->i_mode &= ~S_ISGID;
if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO;
dput(parent);
}
-static bool nfs_need_revalidate_inode(struct inode *inode)
+static u32 nfs_get_valid_attrmask(struct inode *inode)
{
- if (NFS_I(inode)->cache_validity &
- (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
- return true;
- if (nfs_attribute_cache_expired(inode))
- return true;
- return false;
+ unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ u32 reply_mask = STATX_INO | STATX_TYPE;
+
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ reply_mask |= STATX_ATIME;
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ reply_mask |= STATX_CTIME;
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ reply_mask |= STATX_MTIME;
+ if (!(cache_validity & NFS_INO_INVALID_SIZE))
+ reply_mask |= STATX_SIZE;
+ if (!(cache_validity & NFS_INO_INVALID_NLINK))
+ reply_mask |= STATX_NLINK;
+ if (!(cache_validity & NFS_INO_INVALID_MODE))
+ reply_mask |= STATX_MODE;
+ if (!(cache_validity & NFS_INO_INVALID_OTHER))
+ reply_mask |= STATX_UID | STATX_GID;
+ if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
+ reply_mask |= STATX_BLOCKS;
+ return reply_mask;
}
int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
trace_nfs_getattr_enter(inode);
+ request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
+ STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
+ STATX_INO | STATX_SIZE | STATX_BLOCKS;
+
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
nfs_readdirplus_parent_cache_hit(path->dentry);
- goto out_no_update;
+ goto out_no_revalidate;
}
/* Flush out writes to the server in order to update c/mtime. */
/* Check whether the cached attributes are stale */
do_update |= force_sync || nfs_attribute_cache_expired(inode);
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- do_update |= cache_validity &
- (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL);
+ do_update |= cache_validity & NFS_INO_INVALID_CHANGE;
if (request_mask & STATX_ATIME)
do_update |= cache_validity & NFS_INO_INVALID_ATIME;
- if (request_mask & (STATX_CTIME|STATX_MTIME))
- do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE;
+ if (request_mask & STATX_CTIME)
+ do_update |= cache_validity & NFS_INO_INVALID_CTIME;
+ if (request_mask & STATX_MTIME)
+ do_update |= cache_validity & NFS_INO_INVALID_MTIME;
+ if (request_mask & STATX_SIZE)
+ do_update |= cache_validity & NFS_INO_INVALID_SIZE;
+ if (request_mask & STATX_NLINK)
+ do_update |= cache_validity & NFS_INO_INVALID_NLINK;
+ if (request_mask & STATX_MODE)
+ do_update |= cache_validity & NFS_INO_INVALID_MODE;
+ if (request_mask & (STATX_UID | STATX_GID))
+ do_update |= cache_validity & NFS_INO_INVALID_OTHER;
if (request_mask & STATX_BLOCKS)
do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
+
if (do_update) {
/* Update the attribute cache */
if (!(server->flags & NFS_MOUNT_NOAC))
nfs_readdirplus_parent_cache_hit(path->dentry);
out_no_revalidate:
/* Only return attributes that were revalidated. */
- stat->result_mask &= request_mask;
-out_no_update:
+ stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
+
generic_fillattr(&init_user_ns, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
if (S_ISDIR(inode->i_mode))
{
struct nfs_inode *nfsi;
struct inode *inode;
- struct nfs_server *server;
if (!(ctx->mode & FMODE_WRITE))
return;
return;
if (!list_empty(&nfsi->open_files))
return;
- server = NFS_SERVER(inode);
- if (server->flags & NFS_MOUNT_NOCTO)
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)
return;
- nfs_revalidate_inode(server, inode);
+ nfs_revalidate_inode(inode,
+ NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
}
EXPORT_SYMBOL_GPL(nfs_close_context);
/**
* nfs_revalidate_inode - Revalidate the inode attributes
- * @server: pointer to nfs_server struct
* @inode: pointer to inode struct
+ * @flags: cache flags to check
*
* Updates inode attribute information by retrieving the data from the server.
*/
-int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+int nfs_revalidate_inode(struct inode *inode, unsigned long flags)
{
- if (!nfs_need_revalidate_inode(inode))
+ if (!nfs_check_cache_invalid(inode, flags))
return NFS_STALE(inode) ? -ESTALE : 0;
- return __nfs_revalidate_inode(server, inode);
+ return __nfs_revalidate_inode(NFS_SERVER(inode), inode);
}
EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
bool nfs_mapping_need_revalidate_inode(struct inode *inode)
{
- return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
+ return nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE) ||
NFS_STALE(inode);
}
if (!nfs_file_has_buffered_writers(nfsi)) {
/* Verify a few of the more important attributes */
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
- invalid |= NFS_INO_INVALID_CHANGE
- | NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_CHANGE;
ts = inode->i_mtime;
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size);
if (cur_size != new_isize)
- invalid |= NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_SIZE;
}
}
/* Have any file permissions changed? */
if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_MODE;
if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_OTHER;
if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_OTHER;
/* Has the link count changed? */
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
- invalid |= NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_NLINK;
ts = inode->i_atime;
if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
#endif
/**
- * nfs_inode_attrs_need_update - check if the inode attributes need updating
+ * nfs_inode_attrs_cmp_generic - compare attributes
+ * @fattr: attributes
* @inode: pointer to inode
+ *
+ * Attempt to divine whether or not an RPC call reply carrying stale
+ * attributes got scheduled after another call carrying updated ones.
+ * Note also the check for wraparound of 'attr_gencount'
+ *
+ * The function returns '1' if it thinks the attributes in @fattr are
+ * more recent than the ones cached in @inode. Otherwise it returns
+ * the value '0'.
+ */
+static int nfs_inode_attrs_cmp_generic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
+
+ return (long)(fattr->gencount - attr_gencount) > 0 ||
+ (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0;
+}
+
+/**
+ * nfs_inode_attrs_cmp_monotonic - compare attributes
* @fattr: attributes
+ * @inode: pointer to inode
*
* Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones.
*
- * To do so, the function first assumes that a more recent ctime means
- * that the attributes in fattr are newer, however it also attempt to
- * catch the case where ctime either didn't change, or went backwards
- * (if someone reset the clock on the server) by looking at whether
- * or not this RPC call was started after the inode was last updated.
- * Note also the check for wraparound of 'attr_gencount'
+ * We assume that the server observes monotonic semantics for
+ * the change attribute, so a larger value means that the attributes in
+ * @fattr are more recent, in which case the function returns the
+ * value '1'.
+ * A return value of '0' indicates no measurable change
+ * A return value of '-1' means that the attributes in @inode are
+ * more recent.
+ */
+static int nfs_inode_attrs_cmp_monotonic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ s64 diff = fattr->change_attr - inode_peek_iversion_raw(inode);
+ if (diff > 0)
+ return 1;
+ return diff == 0 ? 0 : -1;
+}
+
+/**
+ * nfs_inode_attrs_cmp_strict_monotonic - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
*
- * The function returns 'true' if it thinks the attributes in 'fattr' are
- * more recent than the ones cached in the inode.
+ * Attempt to divine whether or not an RPC call reply carrying stale
+ * attributes got scheduled after another call carrying updated ones.
*
+ * We assume that the server observes strictly monotonic semantics for
+ * the change attribute, so a larger value means that the attributes in
+ * @fattr are more recent, in which case the function returns the
+ * value '1'.
+ * A return value of '-1' means that the attributes in @inode are
+ * more recent or unchanged.
*/
-static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
+static int nfs_inode_attrs_cmp_strict_monotonic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
{
- const struct nfs_inode *nfsi = NFS_I(inode);
+ return nfs_inode_attrs_cmp_monotonic(fattr, inode) > 0 ? 1 : -1;
+}
- return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
- ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
+/**
+ * nfs_inode_attrs_cmp - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * This function returns '1' if it thinks the attributes in @fattr are
+ * more recent than the ones cached in @inode. It returns '-1' if
+ * the attributes in @inode are more recent than the ones in @fattr,
+ * and it returns 0 if not sure.
+ */
+static int nfs_inode_attrs_cmp(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ if (nfs_inode_attrs_cmp_generic(fattr, inode) > 0)
+ return 1;
+ switch (NFS_SERVER(inode)->change_attr_type) {
+ case NFS4_CHANGE_TYPE_IS_UNDEFINED:
+ break;
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
+ break;
+ return nfs_inode_attrs_cmp_monotonic(fattr, inode);
+ default:
+ if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
+ break;
+ return nfs_inode_attrs_cmp_strict_monotonic(fattr, inode);
+ }
+ return 0;
+}
+
+/**
+ * nfs_inode_finish_partial_attr_update - complete a previous inode update
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * Returns '1' if the last attribute update left the inode cached
+ * attributes in a partially unrevalidated state, and @fattr
+ * matches the change attribute of that partial update.
+ * Otherwise returns '0'.
+ */
+static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ const unsigned long check_valid =
+ NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK;
+ unsigned long cache_validity = NFS_I(inode)->cache_validity;
+
+ if (!(cache_validity & NFS_INO_INVALID_CHANGE) &&
+ (cache_validity & check_valid) != 0 &&
+ (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
+ nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0)
+ return 1;
+ return 0;
}
-static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
+static int nfs_refresh_inode_locked(struct inode *inode,
+ struct nfs_fattr *fattr)
{
- int ret;
+ int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
+ int ret = 0;
trace_nfs_refresh_inode_enter(inode);
- if (nfs_inode_attrs_need_update(inode, fattr))
+ if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode))
ret = nfs_update_inode(inode, fattr);
- else
+ else if (attr_cmp == 0)
ret = nfs_check_inode_attributes(inode, fattr);
trace_nfs_refresh_inode_exit(inode, ret);
*/
int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
{
+ int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
int status;
/* Don't do a WCC update if these attributes are already stale */
- if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
- !nfs_inode_attrs_need_update(inode, fattr)) {
+ if (attr_cmp < 0)
+ return 0;
+ if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) {
fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
| NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME
*/
static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
- struct nfs_server *server;
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_isize, new_isize;
+ u64 fattr_supported = server->fattr_valid;
unsigned long invalid = 0;
unsigned long now = jiffies;
unsigned long save_cache_validity;
goto out_err;
}
- server = NFS_SERVER(inode);
/* Update the fsid? */
if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED
- | NFS_INO_REVAL_PAGECACHE
| NFS_INO_INVALID_BLOCKS);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
if (pnfs_layoutcommit_outstanding(inode)) {
- nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR;
+ nfsi->cache_validity |=
+ save_cache_validity &
+ (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS);
cache_revalidated = false;
}
save_cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_SIZE
+ | NFS_INO_INVALID_BLOCKS
+ | NFS_INO_INVALID_NLINK
+ | NFS_INO_INVALID_MODE
| NFS_INO_INVALID_OTHER;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
attr_changed = true;
}
} else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_CHANGE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_REVAL_FORCED);
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_CHANGE;
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
inode->i_mtime = fattr->mtime;
- } else if (server->caps & NFS_CAP_MTIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_MTIME
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_MTIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_MTIME;
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
inode->i_ctime = fattr->ctime;
- } else if (server->caps & NFS_CAP_CTIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_CTIME
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_CTIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_CTIME;
cache_revalidated = false;
}
(long long)cur_isize,
(long long)new_isize);
}
+ if (new_isize == 0 &&
+ !(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED |
+ NFS_ATTR_FATTR_BLOCKS_USED))) {
+ fattr->du.nfs3.used = 0;
+ fattr->valid |= NFS_ATTR_FATTR_SPACE_USED;
+ }
} else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_REVAL_FORCED);
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_SIZE;
cache_revalidated = false;
}
-
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
- else if (server->caps & NFS_CAP_ATIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_ATIME
- | NFS_INO_REVAL_FORCED);
+ else if (fattr_supported & NFS_ATTR_FATTR_ATIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_ATIME;
cache_revalidated = false;
}
| NFS_INO_INVALID_ACL;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_MODE) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_MODE) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_MODE;
cache_revalidated = false;
}
inode->i_uid = fattr->uid;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_OWNER) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_OTHER;
cache_revalidated = false;
}
inode->i_gid = fattr->gid;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_OWNER_GROUP) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_OTHER;
cache_revalidated = false;
}
set_nlink(inode, fattr->nlink);
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_NLINK) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_NLINK;
cache_revalidated = false;
}
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+ } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_BLOCKS;
+ cache_revalidated = false;
+ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) {
inode->i_blocks = fattr->du.nfs2.blocks;
- else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_BLOCKS
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_BLOCKS;
cache_revalidated = false;
}
/* Update attrtimeo value if we're out of the unstable period */
if (attr_changed) {
- invalid &= ~NFS_INO_INVALID_ATTR;
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
nfsi->attrtimeo_timestamp = now;
}
/* Set the barrier to be more recent than this fattr */
- if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+ if ((long)(fattr->gencount - nfsi->attr_gencount) > 0)
nfsi->attr_gencount = fattr->gencount;
}
struct net *net;
};
-extern int nfs_mount(struct nfs_mount_request *info);
+extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans);
extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
}
/**
- * nfs_end_io_direct - declare the file is being used for direct i/o
+ * nfs_start_io_direct - declare the file is being used for direct i/o
* @inode: file inode
*
* Declare that a direct I/O operation is about to start, and ensure
/**
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments
+ * @timeo: deciseconds the mount waits for a response before it retries
+ * @retrans: number of times the mount retries a request
*
- * Uses default timeout parameters specified by underlying transport. On
- * successful return, the auth_flavs list and auth_flav_len will be populated
- * with the list from the server or a faked-up list if the server didn't
- * provide one.
+ * Uses timeout parameters specified by caller. On successful return, the
+ * auth_flavs list and auth_flav_len will be populated with the list from the
+ * server or a faked-up list if the server didn't provide one.
*/
-int nfs_mount(struct nfs_mount_request *info)
+int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans)
{
+ struct rpc_timeout mnt_timeout;
struct mountres result = {
.fh = info->fh,
.auth_count = info->auth_flav_len,
.protocol = info->protocol,
.address = info->sap,
.addrsize = info->salen,
+ .timeout = &mnt_timeout,
.servername = info->hostname,
.program = &mnt_program,
.version = info->version,
if (info->noresvport)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+ nfs_init_timeout_values(&mnt_timeout, info->protocol, timeo, retrans);
mnt_clnt = rpc_create(&args);
if (IS_ERR(mnt_clnt))
goto out_clnt_err;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
return ERR_PTR(-EOPNOTSUPP);
- status = nfs_revalidate_inode(server, inode);
+ status = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (status < 0)
return ERR_PTR(status);
if (unlikely(!p))
return -EIO;
length = be32_to_cpup(p++);
- if (unlikely(length > NFS3_FHSIZE))
+ if (unlikely(length > NFS3_FHSIZE || length == 0))
goto out_toobig;
p = xdr_inline_decode(xdr, length);
if (unlikely(!p))
memcpy(fh->data, p, length);
return 0;
out_toobig:
- dprintk("NFS: file handle size (%u) too big\n", length);
+ trace_nfs_xdr_bad_filehandle(xdr, NFSERR_BADHANDLE);
return -E2BIG;
}
/* ignore properties */
result->lease_time = 0;
+ result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
return 0;
}
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
+ u32 bitmask[3];
struct nfs42_falloc_args args = {
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
.falloc_length = len,
- .falloc_bitmask = nfs4_fattr_bitmap,
+ .falloc_bitmask = bitmask,
};
struct nfs42_falloc_res res = {
.falloc_server = server,
return status;
}
+ memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
+ if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
+ bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr)
return -ENOMEM;
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
if (status == 0)
- status = nfs_post_op_update_inode(inode, res.falloc_fattr);
+ status = nfs_post_op_update_inode_force_wcc(inode,
+ res.falloc_fattr);
kfree(res.falloc_fattr);
return status;
static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
loff_t offset, loff_t len)
{
- struct nfs_server *server = NFS_SERVER(file_inode(filep));
+ struct inode *inode = file_inode(filep);
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
struct nfs_lock_context *lock;
int err;
if (IS_ERR(lock))
return PTR_ERR(lock);
- exception.inode = file_inode(filep);
+ exception.inode = inode;
exception.state = lock->open_context->state;
+ err = nfs_sync_inode(inode);
+ if (err)
+ goto out;
+
do {
err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
if (err == -ENOTSUPP) {
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
-
+out:
nfs_put_lock_context(lock);
return err;
}
return -EOPNOTSUPP;
inode_lock(inode);
- err = nfs_sync_inode(inode);
- if (err)
- goto out_unlock;
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0)
truncate_pagecache_range(inode, offset, (offset + len) -1);
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
-out_unlock:
+
inode_unlock(inode);
return err;
}
return status;
}
+/**
+ * nfs42_copy_dest_done - perform inode cache updates after clone/copy offload
+ * @inode: pointer to destination inode
+ * @pos: destination offset
+ * @len: copy length
+ *
+ * Punch a hole in the inode page cache, so that the NFS client will
+ * know to retrieve new data.
+ * Update the file size if necessary, and then mark the inode as having
+ * invalid cached values for change attribute, ctime, mtime and space used.
+ */
+static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
+{
+ loff_t newsize = pos + len;
+ loff_t end = newsize - 1;
+
+ truncate_pagecache_range(inode, pos, end);
+ spin_lock(&inode->i_lock);
+ if (newsize > i_size_read(inode))
+ i_size_write(inode, newsize);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME |
+ NFS_INO_INVALID_BLOCKS);
+ spin_unlock(&inode->i_lock);
+}
+
static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_lock_context *src_lock,
struct file *dst,
goto out;
}
- truncate_pagecache_range(dst_inode, pos_dst,
- pos_dst + res->write_res.count);
- spin_lock(&dst_inode->i_lock);
- nfs_set_cache_invalid(
- dst_inode, NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED |
- NFS_INO_INVALID_SIZE | NFS_INO_INVALID_ATTR |
- NFS_INO_INVALID_DATA);
- spin_unlock(&dst_inode->i_lock);
- spin_lock(&src_inode->i_lock);
- nfs_set_cache_invalid(src_inode, NFS_INO_REVAL_PAGECACHE |
- NFS_INO_REVAL_FORCED |
- NFS_INO_INVALID_ATIME);
- spin_unlock(&src_inode->i_lock);
+ nfs42_copy_dest_done(dst_inode, pos_dst, res->write_res.count);
+ nfs_invalidate_atime(src_inode);
status = res->write_res.count;
out:
if (args->sync)
if (status)
return status;
- return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+ if (whence == SEEK_DATA && res.sr_eof)
+ return -NFS4ERR_NXIO;
+ else
+ return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
- if (status == 0)
+ if (status == 0) {
+ nfs42_copy_dest_done(dst_inode, dst_offset, count);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
+ }
kfree(res.dst_fattr);
return status;
* make it easier to copy the value after an RPC, even if
* the value will not be passed up to application (e.g.
* for a 'query' getxattr with NULL buffer).
- * @len: Length of the value. Can be 0 for zero-length attribues.
+ * @len: Length of the value. Can be 0 for zero-length attributes.
* @value and @pages will be NULL if @len is 0.
*/
static struct nfs4_xattr_entry *
*/
void nfs42_ssc_register_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs42_ssc_register(&nfs4_ssc_clnt_ops_tbl);
-#endif
}
/**
*/
void nfs42_ssc_unregister_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs42_ssc_unregister(&nfs4_ssc_clnt_ops_tbl);
-#endif
}
#endif /* CONFIG_NFS_V4_2 */
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
#endif
-static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label);
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
+ const __u32 *src, struct inode *inode,
+ struct nfs_server *server,
+ struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label *
| FATTR4_WORD1_FS_LAYOUT_TYPES,
FATTR4_WORD2_LAYOUT_BLKSIZE
| FATTR4_WORD2_CLONE_BLKSIZE
+ | FATTR4_WORD2_CHANGE_ATTR_TYPE
| FATTR4_WORD2_XATTR_SUPPORT
};
};
static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
- struct inode *inode)
+ struct inode *inode, unsigned long flags)
{
unsigned long cache_validity;
if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
return;
- cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- if (!(cache_validity & NFS_INO_REVAL_FORCED))
- cache_validity &= ~(NFS_INO_INVALID_CHANGE
- | NFS_INO_INVALID_SIZE);
+ cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
+ /* Remove the attributes over which we have full control */
+ dst[1] &= ~FATTR4_WORD1_RAWDEV;
if (!(cache_validity & NFS_INO_INVALID_SIZE))
dst[0] &= ~FATTR4_WORD0_SIZE;
if (!(cache_validity & NFS_INO_INVALID_CHANGE))
dst[0] &= ~FATTR4_WORD0_CHANGE;
-}
-static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst,
- const __u32 *src, struct inode *inode)
-{
- nfs4_bitmap_copy_adjust(dst, src, inode);
+ if (!(cache_validity & NFS_INO_INVALID_MODE))
+ dst[1] &= ~FATTR4_WORD1_MODE;
+ if (!(cache_validity & NFS_INO_INVALID_OTHER))
+ dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
}
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
static void
nfs4_inc_nlink_locked(struct inode *inode)
{
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_NLINK);
inc_nlink(inode);
}
+static void
+nfs4_inc_nlink(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ nfs4_inc_nlink_locked(inode);
+ spin_unlock(&inode->i_lock);
+}
+
static void
nfs4_dec_nlink_locked(struct inode *inode)
{
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_NLINK);
drop_nlink(inode);
}
unsigned long timestamp, unsigned long cache_validity)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ u64 change_attr = inode_peek_iversion_raw(inode);
cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
- if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) {
- nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+ switch (NFS_SERVER(inode)->change_attr_type) {
+ case NFS4_CHANGE_TYPE_IS_UNDEFINED:
+ break;
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ if ((s64)(change_attr - cinfo->after) > 0)
+ goto out;
+ break;
+ default:
+ if ((s64)(change_attr - cinfo->after) >= 0)
+ goto out;
+ }
+
+ if (cinfo->atomic && cinfo->before == change_attr) {
nfsi->attrtimeo_timestamp = jiffies;
} else {
if (S_ISDIR(inode->i_mode)) {
cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- if (cinfo->before != inode_peek_iversion_raw(inode))
+ if (cinfo->before != change_attr)
cache_validity |= NFS_INO_INVALID_ACCESS |
NFS_INO_INVALID_ACL |
NFS_INO_INVALID_XATTR;
inode_set_iversion_raw(inode, cinfo->after);
nfsi->read_cache_jiffies = timestamp;
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
- nfs_set_cache_invalid(inode, cache_validity);
nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
+out:
+ nfs_set_cache_invalid(inode, cache_validity);
}
void
.inode = inode,
.stateid = &arg.stateid,
};
+ unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
int err;
+ if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
+ adjust_flags |= NFS_INO_INVALID_MODE;
+ if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
+ adjust_flags |= NFS_INO_INVALID_OTHER;
+
do {
- nfs4_bitmap_copy_adjust_setattr(bitmask,
- nfs4_bitmask(server, olabel),
- inode);
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, olabel),
+ inode, adjust_flags);
err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx);
switch (err) {
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
if (!nfs4_have_delegation(inode, FMODE_READ)) {
- calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
- nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL);
+ nfs4_bitmask_set(calldata->arg.bitmask_store,
+ server->cache_consistency_bitmask,
+ inode, server, NULL);
+ calldata->arg.bitmask = calldata->arg.bitmask_store;
} else
calldata->arg.bitmask = NULL;
}
res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
}
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
- server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
- NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
- NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
- NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
- NFS_CAP_CTIME|NFS_CAP_MTIME|
- NFS_CAP_SECURITY_LABEL);
+ server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
+ NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL);
+ server->fattr_valid = NFS_ATTR_FATTR_V4;
if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
server->caps |= NFS_CAP_ACLS;
server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS;
- if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
- server->caps |= NFS_CAP_FILEID;
- if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
- server->caps |= NFS_CAP_MODE;
- if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
- server->caps |= NFS_CAP_NLINK;
- if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
- server->caps |= NFS_CAP_OWNER;
- if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
- server->caps |= NFS_CAP_OWNER_GROUP;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
- server->caps |= NFS_CAP_ATIME;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
- server->caps |= NFS_CAP_CTIME;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
- server->caps |= NFS_CAP_MTIME;
+ if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_MODE;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_NLINK;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER))
+ server->fattr_valid &= ~(NFS_ATTR_FATTR_OWNER |
+ NFS_ATTR_FATTR_OWNER_NAME);
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP))
+ server->fattr_valid &= ~(NFS_ATTR_FATTR_GROUP |
+ NFS_ATTR_FATTR_GROUP_NAME);
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_SPACE_USED))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_SPACE_USED;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_ATIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
- if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
- server->caps |= NFS_CAP_SECURITY_LABEL;
+ if (!(res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_V4_SECURITY_LABEL;
#endif
memcpy(server->attr_bitmask_nl, res.attr_bitmask,
sizeof(server->attr_bitmask));
if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
task_flags |= RPC_TASK_TIMEOUT;
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
-
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode, 0);
nfs_fattr_init(fattr);
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
return nfs4_do_call_sync(server->client, server, &msg,
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
- NFS_INO_INVALID_DATA);
/* Removing a directory decrements nlink in the parent */
if (ftype == NF4DIR && dir->i_nlink > 2)
nfs4_dec_nlink_locked(dir);
+ nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
+ NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock);
}
return status;
/* Note: If we moved a directory, nlink will change */
nfs4_update_changeattr(old_dir, &res->old_cinfo,
res->old_fattr->time_start,
- NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_DATA);
nfs4_update_changeattr(new_dir, &res->new_cinfo,
res->new_fattr->time_start,
- NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_DATA);
} else
nfs4_update_changeattr(old_dir, &res->old_cinfo,
}
nfs4_inode_make_writeable(inode);
- nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode);
-
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.label), inode,
+ NFS_INO_INVALID_CHANGE);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
NFS_INO_INVALID_DATA);
+ nfs4_inc_nlink(inode);
status = nfs_post_op_update_inode(inode, res.fattr);
if (!status)
nfs_setsecurity(inode, res.fattr, res.label);
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
- data->res.fattr->time_start,
- NFS_INO_INVALID_DATA);
/* Creating a directory bumps nlink in the parent */
if (data->arg.ftype == NF4DIR)
nfs4_inc_nlink_locked(dir);
+ nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock);
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
}
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label)
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
+ struct inode *inode, struct nfs_server *server,
+ struct nfs4_label *label)
{
-
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ unsigned int i;
- if ((cache_validity & NFS_INO_INVALID_DATA) ||
- (cache_validity & NFS_INO_REVAL_PAGECACHE) ||
- (cache_validity & NFS_INO_REVAL_FORCED) ||
- (cache_validity & NFS_INO_INVALID_OTHER))
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
+ memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
+ if (cache_validity & NFS_INO_INVALID_CHANGE)
+ bitmask[0] |= FATTR4_WORD0_CHANGE;
if (cache_validity & NFS_INO_INVALID_ATIME)
bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
+ if (cache_validity & NFS_INO_INVALID_MODE)
+ bitmask[1] |= FATTR4_WORD1_MODE;
if (cache_validity & NFS_INO_INVALID_OTHER)
- bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
- FATTR4_WORD1_OWNER_GROUP |
- FATTR4_WORD1_NUMLINKS;
+ bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP;
+ if (cache_validity & NFS_INO_INVALID_NLINK)
+ bitmask[1] |= FATTR4_WORD1_NUMLINKS;
if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
- if (cache_validity & NFS_INO_INVALID_CHANGE)
- bitmask[0] |= FATTR4_WORD0_CHANGE;
if (cache_validity & NFS_INO_INVALID_CTIME)
bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME)
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
- if (cache_validity & NFS_INO_INVALID_SIZE)
- bitmask[0] |= FATTR4_WORD0_SIZE;
if (cache_validity & NFS_INO_INVALID_BLOCKS)
bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
+ if (cache_validity & NFS_INO_INVALID_SIZE)
+ bitmask[0] |= FATTR4_WORD0_SIZE;
+
+ for (i = 0; i < NFS4_BITMASK_SZ; i++)
+ bitmask[i] &= server->attr_bitmask[i];
}
static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
hdr->args.bitmask = NULL;
hdr->res.fattr = NULL;
} else {
- hdr->args.bitmask = server->cache_consistency_bitmask;
- nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL);
+ nfs4_bitmask_set(hdr->args.bitmask_store,
+ server->cache_consistency_bitmask,
+ hdr->inode, server, NULL);
+ hdr->args.bitmask = hdr->args.bitmask_store;
}
if (!hdr->pgio_done_cb)
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
- ret = nfs_revalidate_inode(server, inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret < 0)
return ret;
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
- data->args.bitmask = server->cache_consistency_bitmask;
- nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL);
+ nfs4_bitmask_set(data->args.bitmask_store,
+ server->cache_consistency_bitmask, inode, server,
+ NULL);
+ data->args.bitmask = data->args.bitmask_store;
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
data->res.fattr = &data->fattr;
#ifdef CONFIG_NFS_V4_1
struct nfs4_lock_waiter {
- struct task_struct *task;
struct inode *inode;
- struct nfs_lowner *owner;
+ struct nfs_lowner owner;
+ wait_queue_entry_t wait;
};
static int
nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
{
- int ret;
- struct nfs4_lock_waiter *waiter = wait->private;
+ struct nfs4_lock_waiter *waiter =
+ container_of(wait, struct nfs4_lock_waiter, wait);
/* NULL key means to wake up everyone */
if (key) {
struct cb_notify_lock_args *cbnl = key;
struct nfs_lowner *lowner = &cbnl->cbnl_owner,
- *wowner = waiter->owner;
+ *wowner = &waiter->owner;
/* Only wake if the callback was for the same owner. */
if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev)
return 0;
}
- /* override "private" so we can use default_wake_function */
- wait->private = waiter->task;
- ret = woken_wake_function(wait, mode, flags, key);
- if (ret)
- list_del_init(&wait->entry);
- wait->private = waiter;
- return ret;
+ return woken_wake_function(wait, mode, flags, key);
}
static int
nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- int status = -ERESTARTSYS;
struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs_client *clp = server->nfs_client;
wait_queue_head_t *q = &clp->cl_lock_waitq;
- struct nfs_lowner owner = { .clientid = clp->cl_clientid,
- .id = lsp->ls_seqid.owner_id,
- .s_dev = server->s_dev };
- struct nfs4_lock_waiter waiter = { .task = current,
- .inode = state->inode,
- .owner = &owner};
- wait_queue_entry_t wait;
+ struct nfs4_lock_waiter waiter = {
+ .inode = state->inode,
+ .owner = { .clientid = clp->cl_clientid,
+ .id = lsp->ls_seqid.owner_id,
+ .s_dev = server->s_dev },
+ };
+ int status;
/* Don't bother with waitqueue if we don't expect a callback */
if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
return nfs4_retry_setlk_simple(state, cmd, request);
- init_wait(&wait);
- wait.private = &waiter;
- wait.func = nfs4_wake_lock_waiter;
+ init_wait(&waiter.wait);
+ waiter.wait.func = nfs4_wake_lock_waiter;
+ add_wait_queue(q, &waiter.wait);
- while(!signalled()) {
- add_wait_queue(q, &wait);
+ do {
status = nfs4_proc_setlk(state, cmd, request);
- if ((status != -EAGAIN) || IS_SETLK(cmd)) {
- finish_wait(q, &wait);
+ if (status != -EAGAIN || IS_SETLK(cmd))
break;
- }
status = -ERESTARTSYS;
freezer_do_not_count();
- wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT);
+ wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
+ NFS4_LOCK_MAXTIMEOUT);
freezer_count();
- finish_wait(q, &wait);
- }
+ } while (!signalled());
+
+ remove_wait_queue(q, &waiter.wait);
return status;
}
return -EACCES;
}
- ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret)
return ret;
return 0;
}
- ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret)
return ret;
}
/**
- * nfs4_purge_state_owners - Release all cached state owners
+ * nfs4_free_state_owners - Release all cached state owners
* @head: resulting list of state owners
*
* Frees a list of state owners that was generated by
)
)
-TRACE_EVENT(nfs4_xdr_status,
+TRACE_EVENT(nfs4_xdr_bad_operation,
+ TP_PROTO(
+ const struct xdr_stream *xdr,
+ u32 op,
+ u32 expected
+ ),
+
+ TP_ARGS(xdr, op, expected),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, op)
+ __field(u32, expected)
+ ),
+
+ TP_fast_assign(
+ const struct rpc_rqst *rqstp = xdr->rqst;
+ const struct rpc_task *task = rqstp->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->op = op;
+ __entry->expected = expected;
+ ),
+
+ TP_printk(
+ "task:%u@%d xid=0x%08x operation=%u, expected=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->op, __entry->expected
+ )
+);
+
+DECLARE_EVENT_CLASS(nfs4_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
u32 op,
__entry->op
)
);
+#define DEFINE_NFS4_XDR_EVENT(name) \
+ DEFINE_EVENT(nfs4_xdr_event, name, \
+ TP_PROTO( \
+ const struct xdr_stream *xdr, \
+ u32 op, \
+ u32 error \
+ ), \
+ TP_ARGS(xdr, op, error))
+DEFINE_NFS4_XDR_EVENT(nfs4_xdr_status);
+DEFINE_NFS4_XDR_EVENT(nfs4_xdr_bad_filehandle);
DECLARE_EVENT_CLASS(nfs4_cb_error_class,
TP_PROTO(
* layout types will be returned.
*/
#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \
- nfs4_fattr_bitmap_maxsz + 4 + 8 + 5)
+ nfs4_fattr_bitmap_maxsz + 1 + \
+ 1 /* lease time */ + \
+ 2 /* max filesize */ + \
+ 2 /* max read */ + \
+ 2 /* max write */ + \
+ nfstime4_maxsz /* time delta */ + \
+ 5 /* fs layout types */ + \
+ 1 /* layout blksize */ + \
+ 1 /* clone blksize */ + \
+ 1 /* change attr type */ + \
+ 1 /* xattr support */)
#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
#define decode_renew_maxsz (op_decode_hdr_maxsz)
#define encode_setclientid_maxsz \
*nfs_retval = nfs4_stat_to_errno(nfserr);
return true;
out_bad_operation:
- dprintk("nfs: Server returned operation"
- " %d but we issued a request for %d\n",
- opnum, expected);
+ trace_nfs4_xdr_bad_operation(xdr, opnum, expected);
*nfs_retval = -EREMOTEIO;
return false;
out_overflow:
if (unlikely(!p))
return -EIO;
len = be32_to_cpup(p);
- if (len > NFS4_FHSIZE)
- return -EIO;
+ if (len > NFS4_FHSIZE || len == 0) {
+ trace_nfs4_xdr_bad_filehandle(xdr, OP_READDIR,
+ NFS4ERR_BADHANDLE);
+ return -EREMOTEIO;
+ }
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
return -EIO;
return 0;
}
+static int decode_attr_change_attr_type(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ enum nfs4_change_attr_type *res)
+{
+ u32 tmp = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+
+ dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
+ if (bitmap[2] & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+ if (xdr_stream_decode_u32(xdr, &tmp))
+ return -EIO;
+ bitmap[2] &= ~FATTR4_WORD2_CHANGE_ATTR_TYPE;
+ }
+
+ switch(tmp) {
+ case NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR:
+ case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER:
+ case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS:
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ *res = tmp;
+ break;
+ default:
+ *res = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+ }
+ return 0;
+}
+
static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
{
unsigned int savep;
if (status)
goto xdr_error;
+ status = decode_attr_change_attr_type(xdr, bitmap,
+ &fsinfo->change_attr_type);
+ if (status)
+ goto xdr_error;
+
status = decode_attr_xattrsupport(xdr, bitmap,
&fsinfo->xattr_support);
if (status)
if (unlikely(!p))
return -EIO;
len = be32_to_cpup(p);
- if (len > NFS4_FHSIZE)
- return -EIO;
+ if (len > NFS4_FHSIZE || len == 0) {
+ trace_nfs4_xdr_bad_filehandle(xdr, OP_GETFH, NFS4ERR_BADHANDLE);
+ return -EREMOTEIO;
+ }
fh->size = len;
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_bad_filehandle);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
+TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE);
#define nfs_show_cache_validity(v) \
__print_flags(v, "|", \
{ NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \
{ NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \
{ NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \
- { NFS_INO_INVALID_XATTR, "INVALID_XATTR" })
+ { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \
+ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \
+ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \
+ { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \
+ { NFS_INO_INVALID_MODE, "INVALID_MODE" })
TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS);
TRACE_DEFINE_ENUM(NFS_INO_STALE);
{ NFSERR_BADTYPE, "BADTYPE" }, \
{ NFSERR_JUKEBOX, "JUKEBOX" })
-TRACE_EVENT(nfs_xdr_status,
+DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
int error
nfs_show_status(__entry->error)
)
);
+#define DEFINE_NFS_XDR_EVENT(name) \
+ DEFINE_EVENT(nfs_xdr_event, name, \
+ TP_PROTO( \
+ const struct xdr_stream *xdr, \
+ int error \
+ ), \
+ TP_ARGS(xdr, error))
+DEFINE_NFS_XDR_EVENT(nfs_xdr_status);
+DEFINE_NFS_XDR_EVENT(nfs_xdr_bad_filehandle);
#endif /* _TRACE_NFS_H */
}
/**
- * nfs_release_request - Release the count on an NFS read/write request
+ * nfs_free_request - Release the count on an NFS read/write request
* @req: request to release
*
* Note: Should never be called with the spinlock held!
}
/**
- * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
+ * __nfs_pageio_add_request - Attempt to coalesce a request into a page list.
* @desc: destination io descriptor
* @req: request
*
}
valid_layout = pnfs_layout_is_valid(lo);
pnfs_clear_layoutcommit(ino, &tmp_list);
- pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0);
+ pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0);
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
struct pnfs_layout_range range = {
.iomode = IOMODE_ANY,
.length = NFS4_MAX_UINT64,
};
- pnfs_set_plh_return_info(lo, IOMODE_ANY, 0);
- pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
- &range, 0);
+ pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
goto out_forget;
} else {
/* We have a completely new layout */
assert_spin_locked(&lo->plh_inode->i_lock);
+ if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+ tmp_list = &lo->plh_return_segs;
+
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
dprintk("%s: marking lseg %p iomode %d "
lseg, lseg->pls_range.iomode,
lseg->pls_range.offset,
lseg->pls_range.length);
+ if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+ tmp_list = &lo->plh_return_segs;
if (mark_lseg_invalid(lseg, tmp_list))
continue;
remaining++;
info->dtpref = fsinfo.tsize;
info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0;
+ info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
return 0;
}
#ifdef CONFIG_NFS_V4_2
static void nfs_ssc_register_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs_ssc_register(&nfs_ssc_clnt_ops_tbl);
-#endif
}
static void nfs_ssc_unregister_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl);
-#endif
}
#endif /* CONFIG_NFS_V4_2 */
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount(&request);
+ status = nfs_mount(&request, ctx->timeo, ctx->retrans);
if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status);
* with invalidate/truncate.
*/
spin_lock(&mapping->private_lock);
- if (!nfs_have_writebacks(inode) &&
- NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
- inode_inc_iversion_raw(inode);
if (likely(!PageSwapCache(req->wb_page))) {
set_bit(PG_MAPPED, &req->wb_flags);
SetPagePrivate(req->wb_page);
if (nfs_have_delegated_attributes(inode))
goto out;
if (nfsi->cache_validity &
- (NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE))
+ (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE))
return false;
smp_rmb();
if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
/* Deal with the suid/sgid bit corner case */
if (nfs_should_remove_suid(inode)) {
spin_lock(&inode->i_lock);
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
spin_unlock(&inode->i_lock);
}
return 0;
config NFSD_V4_2_INTER_SSC
bool "NFSv4.2 inter server to server COPY"
- depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2
+ depends on NFSD_V4 && NFS_V4_2
help
This option enables support for NFSv4.2 inter server to
server copy where the destination server calls the NFSv4.2
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
#define FATTR4_WORD2_MODE_UMASK (1UL << 17)
#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18)
} u;
};
+enum nfs4_change_attr_type {
+ NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
+ NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
+ NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+ NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
+ NFS4_CHANGE_TYPE_IS_UNDEFINED = 4,
+};
+
/*
* Options for setxattr. These match the flags for setxattr(2).
*/
BIT(13) /* Deferred cache invalidation */
#define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */
#define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */
+#define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */
+#define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */
#define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \
| NFS_INO_INVALID_CTIME \
| NFS_INO_INVALID_MTIME \
| NFS_INO_INVALID_SIZE \
+ | NFS_INO_INVALID_NLINK \
+ | NFS_INO_INVALID_MODE \
| NFS_INO_INVALID_OTHER) /* inode metadata is invalid */
/*
extern int nfs_permission(struct user_namespace *, struct inode *, int);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_attribute_cache_expired(struct inode *inode);
-extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
+extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_clear_invalid_mapping(struct address_space *mapping);
extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
#define NFS_MOUNT_WRITE_EAGER 0x01000000
#define NFS_MOUNT_WRITE_WAIT 0x02000000
+ unsigned int fattr_valid; /* Valid attributes */
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
unsigned int rpages; /* read size (in pages) */
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
+ enum nfs4_change_attr_type
+ change_attr_type;/* Description of change attribute */
+
struct nfs_fsid fsid;
__u64 maxfilesize; /* maximum file size */
struct timespec64 time_delta; /* smallest time granularity */
#define NFS_CAP_SYMLINKS (1U << 2)
#define NFS_CAP_ACLS (1U << 3)
#define NFS_CAP_ATOMIC_OPEN (1U << 4)
-/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */
#define NFS_CAP_LGOPEN (1U << 5)
-#define NFS_CAP_FILEID (1U << 6)
-#define NFS_CAP_MODE (1U << 7)
-#define NFS_CAP_NLINK (1U << 8)
-#define NFS_CAP_OWNER (1U << 9)
-#define NFS_CAP_OWNER_GROUP (1U << 10)
-#define NFS_CAP_ATIME (1U << 11)
-#define NFS_CAP_CTIME (1U << 12)
-#define NFS_CAP_MTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
#define NFS_DEF_FILE_IO_SIZE (4096U)
#define NFS_MIN_FILE_IO_SIZE (1024U)
+#define NFS_BITMASK_SZ 3
+
struct nfs4_string {
unsigned int len;
char *data;
__u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
__u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */
+ enum nfs4_change_attr_type
+ change_attr_type; /* Info about change attr */
__u32 xattr_support; /* User xattrs supported */
};
struct nfs_seqid * seqid;
fmode_t fmode;
u32 share_access;
- u32 * bitmask;
+ const u32 * bitmask;
+ u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args;
};
struct nfs4_sequence_args seq_args;
const struct nfs_fh *fhandle;
const nfs4_stateid *stateid;
- u32 * bitmask;
+ const u32 *bitmask;
+ u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args;
};
union {
unsigned int replen; /* used by read */
struct {
- u32 * bitmask; /* used by write */
+ const u32 * bitmask; /* used by write */
+ u32 bitmask_store[NFS_BITMASK_SZ]; /* used by write */
enum nfs3_stable_how stable; /* used by write */
};
};
struct rpc_task * snd_task; /* Task blocked in send */
struct list_head xmit_queue; /* Send queue */
+ atomic_long_t xmit_queuelen;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
), \
TP_ARGS(wc, cid))
+DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class,
+ TP_PROTO(
+ const struct ib_wc *wc,
+ const struct rpc_rdma_cid *cid
+ ),
+
+ TP_ARGS(wc, cid),
+
+ TP_STRUCT__entry(
+ __field(u32, cq_id)
+ __field(int, completion_id)
+ __field(unsigned long, status)
+ __field(unsigned int, vendor_err)
+ ),
+
+ TP_fast_assign(
+ __entry->cq_id = cid->ci_queue_id;
+ __entry->completion_id = cid->ci_completion_id;
+ __entry->status = wc->status;
+ if (wc->status)
+ __entry->vendor_err = wc->vendor_err;
+ else
+ __entry->vendor_err = 0;
+ ),
+
+ TP_printk("cq.id=%u mr.id=%d status=%s (%lu/0x%x)",
+ __entry->cq_id, __entry->completion_id,
+ rdma_show_wc_status(__entry->status),
+ __entry->status, __entry->vendor_err
+ )
+);
+
+#define DEFINE_MR_COMPLETION_EVENT(name) \
+ DEFINE_EVENT(rpcrdma_mr_completion_class, name, \
+ TP_PROTO( \
+ const struct ib_wc *wc, \
+ const struct rpc_rdma_cid *cid \
+ ), \
+ TP_ARGS(wc, cid))
+
DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class,
TP_PROTO(
const struct ib_wc *wc,
TP_ARGS(r_xprt),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p",
- __get_str(addr), __get_str(port), __entry->r_xprt
+ TP_printk("peer=[%s]:%s",
+ __get_str(addr), __get_str(port)
)
);
TP_ARGS(r_xprt, rc),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(int, rc)
__field(int, connect_status)
__string(addr, rpcrdma_addrstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->rc = rc;
__entry->connect_status = r_xprt->rx_ep->re_connect_status;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s rc=%d connection status=%d",
+ __get_str(addr), __get_str(port),
__entry->rc, __entry->connect_status
)
);
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
TP_ARGS(r_xprt, delay),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned long, delay)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->delay = delay;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu",
- __get_str(addr), __get_str(port), __entry->r_xprt,
- __entry->delay
+ TP_printk("peer=[%s]:%s delay=%lu",
+ __get_str(addr), __get_str(port), __entry->delay
)
);
TP_ARGS(r_xprt, connect, reconnect),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned long, connect)
__field(unsigned long, reconnect)
__string(addr, rpcrdma_addrstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->connect = connect;
__entry->reconnect = reconnect;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s connect=%lu reconnect=%lu",
+ __get_str(addr), __get_str(port),
__entry->connect / HZ, __entry->reconnect / HZ
)
);
-TRACE_EVENT(xprtrdma_qp_event,
- TP_PROTO(
- const struct rpcrdma_ep *ep,
- const struct ib_event *event
- ),
-
- TP_ARGS(ep, event),
-
- TP_STRUCT__entry(
- __field(unsigned long, event)
- __string(name, event->device->name)
- __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
- __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
- ),
-
- TP_fast_assign(
- const struct rdma_cm_id *id = ep->re_id;
-
- __entry->event = event->event;
- __assign_str(name, event->device->name);
- memcpy(__entry->srcaddr, &id->route.addr.src_addr,
- sizeof(struct sockaddr_in6));
- memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
- sizeof(struct sockaddr_in6));
- ),
-
- TP_printk("%pISpc -> %pISpc device=%s %s (%lu)",
- __entry->srcaddr, __entry->dstaddr, __get_str(name),
- rdma_show_ib_event(__entry->event), __entry->event
- )
-);
-
/**
** Call events
**/
TP_ARGS(r_xprt, count),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
__field(unsigned int, count)
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->count = count;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs",
- __get_str(addr), __get_str(port), __entry->r_xprt,
- __entry->count
+ TP_printk("peer=[%s]:%s created %u MRs",
+ __get_str(addr), __get_str(port), __entry->count
)
);
TP_ARGS(r_xprt, count, status),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
+ __field(u32, cq_id)
__field(unsigned int, count)
__field(int, status)
__field(int, posted)
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
+ const struct rpcrdma_ep *ep = r_xprt->rx_ep;
+
+ __entry->cq_id = ep->re_attr.recv_cq->res.id;
__entry->count = count;
__entry->status = status;
- __entry->posted = r_xprt->rx_ep->re_receive_count;
+ __entry->posted = ep->re_receive_count;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)",
+ __get_str(addr), __get_str(port), __entry->cq_id,
__entry->count, __entry->posted, __entry->status
)
);
DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_fastreg);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_wake);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_done);
TRACE_EVENT(xprtrdma_frwr_alloc,
TP_PROTO(
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->rc = rc;
),
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->addr = mr->mr_sg->dma_address;
__entry->dir = mr->mr_dir;
__entry->nents = sg_nents;
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->addr = mr->mr_sg->dma_address;
__entry->dir = mr->mr_dir;
__entry->num_mapped = num_mapped;
)
);
+DEFINE_MR_EVENT(fastreg);
DEFINE_MR_EVENT(localinv);
+DEFINE_MR_EVENT(reminv);
DEFINE_MR_EVENT(map);
DEFINE_ANON_MR_EVENT(unmap);
-DEFINE_ANON_MR_EVENT(recycle);
TRACE_EVENT(xprtrdma_dma_maperr,
TP_PROTO(
TP_ARGS(r_xprt, reqs),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned int, reqs)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->reqs = reqs;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs",
- __get_str(addr), __get_str(port),
- __entry->r_xprt, __entry->reqs
+ TP_printk("peer=[%s]:%s %u reqs",
+ __get_str(addr), __get_str(port), __entry->reqs
)
);
__entry->seqno, __entry->status)
);
+TRACE_EVENT(xprt_retransmit,
+ TP_PROTO(
+ const struct rpc_rqst *rqst
+ ),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(int, ntrans)
+ __field(int, version)
+ __string(progname,
+ rqst->rq_task->tk_client->cl_program->name)
+ __string(procedure,
+ rqst->rq_task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_fast_assign(
+ struct rpc_task *task = rqst->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->ntrans = rqst->rq_ntrans;
+ __assign_str(progname,
+ task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_printk(
+ "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->ntrans)
+);
+
TRACE_EVENT(xprt_ping,
TP_PROTO(const struct rpc_xprt *xprt, int status),
DEFINE_WRITELOCK_EVENT(reserve_xprt);
DEFINE_WRITELOCK_EVENT(release_xprt);
-DEFINE_WRITELOCK_EVENT(transmit_queued);
DECLARE_EVENT_CLASS(xprt_cong_event,
TP_PROTO(
status = xprt->ops->buf_alloc(task);
trace_rpc_buf_alloc(task, status);
- xprt_inject_disconnect(xprt);
if (status == 0)
return;
if (status != -ENOMEM) {
task->tk_flags &= ~RPC_CALL_MAJORSEEN;
}
- /*
- * Ensure that we see all writes made by xprt_complete_rqst()
- * before it changed req->rq_reply_bytes_recvd.
- */
- smp_rmb();
-
/*
* Did we ever call xprt_complete_rqst()? If not, we should assume
* the message is incomplete.
if (!req->rq_reply_bytes_recvd)
goto out;
+ /* Ensure that we see all writes made by xprt_complete_rqst()
+ * before it changed req->rq_reply_bytes_recvd.
+ */
+ smp_rmb();
+
req->rq_rcv_buf.len = req->rq_private_buf.len;
trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf);
const char *hostname,
struct sockaddr *srvaddr, size_t salen,
int proto, u32 version,
- const struct cred *cred)
+ const struct cred *cred,
+ const struct rpc_timeout *timeo)
{
struct rpc_create_args args = {
.net = net,
.protocol = proto,
.address = srvaddr,
.addrsize = salen,
+ .timeout = timeo,
.servername = hostname,
.nodename = nodename,
.program = &rpcb_program,
clnt->cl_nodename,
xprt->servername, sap, salen,
xprt->prot, bind_version,
- clnt->cl_cred);
+ clnt->cl_cred,
+ task->tk_client->cl_timeout);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
goto bailout_nofree;
const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
int status = 0;
- if (time_before(jiffies, req->rq_minortimeo))
- return status;
if (time_before(jiffies, req->rq_majortimeo)) {
+ if (time_before(jiffies, req->rq_minortimeo))
+ return status;
if (to->to_exponential)
req->rq_timeout <<= 1;
else
list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2);
out:
+ atomic_long_inc(&xprt->xmit_queuelen);
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
}
}
} else
list_del(&req->rq_xmit2);
+ atomic_long_dec(&req->rq_xprt->xmit_queuelen);
}
/**
struct rpc_xprt *xprt = req->rq_xprt;
if (!xprt_lock_write(xprt, task)) {
- trace_xprt_transmit_queued(xprt, task);
-
/* Race breaker: someone may have transmitted us */
if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
rpc_wake_up_queued_task_set_status(&xprt->sending,
void xprt_end_transmit(struct rpc_task *task)
{
- xprt_release_write(task->tk_rqstp->rq_xprt, task);
+ struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
+
+ xprt_inject_disconnect(xprt);
+ xprt_release_write(xprt, task);
}
/**
return status;
}
- if (is_retrans)
+ if (is_retrans) {
task->tk_client->cl_stats->rpcretrans++;
+ trace_xprt_retransmit(req);
+ }
xprt_inject_disconnect(xprt);
spin_unlock(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
- xprt_inject_disconnect(xprt);
xdr_free_bvec(&req->rq_rcv_buf);
xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_rep *rep = req->rl_reply;
struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- rpcrdma_recv_buffer_put(req->rl_reply);
+ rpcrdma_rep_put(&r_xprt->rx_buf, rep);
req->rl_reply = NULL;
spin_lock(&xprt->bc_pa_lock);
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-/**
- * frwr_release_mr - Destroy one MR
- * @mr: MR allocated by frwr_mr_init
- *
- */
-void frwr_release_mr(struct rpcrdma_mr *mr)
+static void frwr_cid_init(struct rpcrdma_ep *ep,
+ struct rpcrdma_mr *mr)
{
- int rc;
+ struct rpc_rdma_cid *cid = &mr->mr_cid;
- rc = ib_dereg_mr(mr->frwr.fr_mr);
- if (rc)
- trace_xprtrdma_frwr_dereg(mr, rc);
- kfree(mr->mr_sg);
- kfree(mr);
+ cid->ci_queue_id = ep->re_attr.send_cq->res.id;
+ cid->ci_completion_id = mr->mr_ibmr->res.id;
}
static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
}
}
-static void frwr_mr_recycle(struct rpcrdma_mr *mr)
+/**
+ * frwr_mr_release - Destroy one MR
+ * @mr: MR allocated by frwr_mr_init
+ *
+ */
+void frwr_mr_release(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
- trace_xprtrdma_mr_recycle(mr);
-
- frwr_mr_unmap(r_xprt, mr);
+ int rc;
- spin_lock(&r_xprt->rx_buf.rb_lock);
- list_del(&mr->mr_all);
- r_xprt->rx_stats.mrs_recycled++;
- spin_unlock(&r_xprt->rx_buf.rb_lock);
+ frwr_mr_unmap(mr->mr_xprt, mr);
- frwr_release_mr(mr);
+ rc = ib_dereg_mr(mr->mr_ibmr);
+ if (rc)
+ trace_xprtrdma_frwr_dereg(mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static void frwr_mr_put(struct rpcrdma_mr *mr)
goto out_list_err;
mr->mr_xprt = r_xprt;
- mr->frwr.fr_mr = frmr;
+ mr->mr_ibmr = frmr;
mr->mr_device = NULL;
INIT_LIST_HEAD(&mr->mr_list);
- init_completion(&mr->frwr.fr_linv_done);
+ init_completion(&mr->mr_linv_done);
+ frwr_cid_init(ep, mr);
sg_init_table(sg, depth);
mr->mr_sg = sg;
ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+ ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;
ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ep->re_max_rdma_segs =
goto out_dmamap_err;
mr->mr_device = ep->re_id->device;
- ibmr = mr->frwr.fr_mr;
+ ibmr = mr->mr_ibmr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
if (n != dma_nents)
goto out_mapmr_err;
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);
- reg_wr = &mr->frwr.fr_regwr;
+ reg_wr = &mr->mr_regwr;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
* @cq: completion queue
* @wc: WCE for a completed FastReg WR
*
+ * Each flushed MR gets destroyed after the QP has drained.
*/
static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid);
- /* The MR will get recycled when the associated req is retransmitted */
+ trace_xprtrdma_wc_fastreg(wc, &mr->mr_cid);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
-static void frwr_cid_init(struct rpcrdma_ep *ep,
- struct rpcrdma_frwr *frwr)
-{
- struct rpc_rdma_cid *cid = &frwr->fr_cid;
-
- cid->ci_queue_id = ep->re_attr.send_cq->res.id;
- cid->ci_completion_id = frwr->fr_mr->res.id;
-}
-
/**
* frwr_send - post Send WRs containing the RPC Call message
* @r_xprt: controlling transport instance
*/
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
+ struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
- struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
+ unsigned int num_wrs;
- post_wr = &req->rl_wr;
+ num_wrs = 1;
+ post_wr = send_wr;
list_for_each_entry(mr, &req->rl_registered, mr_list) {
- struct rpcrdma_frwr *frwr;
-
- frwr = &mr->frwr;
-
- frwr->fr_cqe.done = frwr_wc_fastreg;
- frwr_cid_init(ep, frwr);
- frwr->fr_regwr.wr.next = post_wr;
- frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
- frwr->fr_regwr.wr.num_sge = 0;
- frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
- frwr->fr_regwr.wr.send_flags = 0;
+ trace_xprtrdma_mr_fastreg(mr);
+
+ mr->mr_cqe.done = frwr_wc_fastreg;
+ mr->mr_regwr.wr.next = post_wr;
+ mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
+ mr->mr_regwr.wr.num_sge = 0;
+ mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
+ mr->mr_regwr.wr.send_flags = 0;
+ post_wr = &mr->mr_regwr.wr;
+ ++num_wrs;
+ }
- post_wr = &frwr->fr_regwr.wr;
+ if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
+ send_wr->send_flags |= IB_SEND_SIGNALED;
+ ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
+ num_wrs - ep->re_send_count);
+ } else {
+ send_wr->send_flags &= ~IB_SEND_SIGNALED;
+ ep->re_send_count -= num_wrs;
}
+ trace_xprtrdma_post_send(req);
return ib_post_send(ep->re_id->qp, post_wr, NULL);
}
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
+ trace_xprtrdma_mr_reminv(mr);
frwr_mr_put(mr);
break; /* only one invalidated MR per RPC */
}
static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
{
- if (wc->status != IB_WC_SUCCESS)
- frwr_mr_recycle(mr);
- else
+ if (likely(wc->status == IB_WC_SUCCESS))
frwr_mr_put(mr);
}
static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li(wc, &frwr->fr_cid);
+ trace_xprtrdma_wc_li(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
rpcrdma_flush_disconnect(cq->cq_context, wc);
static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid);
+ trace_xprtrdma_wc_li_wake(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
- complete(&frwr->fr_linv_done);
+ complete(&mr->mr_linv_done);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
struct ib_send_wr *first, **prev, *last;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
int rc;
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- frwr = NULL;
prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
- frwr = &mr->frwr;
- frwr->fr_cqe.done = frwr_wc_localinv;
- frwr_cid_init(ep, frwr);
- last = &frwr->fr_invwr;
+ last = &mr->mr_invwr;
last->next = NULL;
- last->wr_cqe = &frwr->fr_cqe;
+ last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL;
last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
+ last->wr_cqe->done = frwr_wc_localinv;
+
*prev = last;
prev = &last->next;
}
+ mr = container_of(last, struct rpcrdma_mr, mr_invwr);
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
- frwr->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&frwr->fr_linv_done);
+ last->wr_cqe->done = frwr_wc_localinv_wake;
+ reinit_completion(&mr->mr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* not happen, so don't wait in that case.
*/
if (bad_wr != first)
- wait_for_completion(&frwr->fr_linv_done);
+ wait_for_completion(&mr->mr_linv_done);
if (!rc)
return;
- /* Recycle MRs in the LOCAL_INV chain that did not get posted.
- */
+ /* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
- while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr,
- fr_invwr);
- mr = container_of(frwr, struct rpcrdma_mr, frwr);
- bad_wr = bad_wr->next;
-
- list_del_init(&mr->mr_list);
- frwr_mr_recycle(mr);
- }
}
/**
static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
- struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
+ struct rpcrdma_rep *rep;
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid);
- frwr_mr_done(wc, mr);
+ trace_xprtrdma_wc_li_done(wc, &mr->mr_cid);
- /* Ensure @rep is generated before frwr_mr_done */
+ /* Ensure that @rep is generated before the MR is released */
+ rep = mr->mr_req->rl_reply;
smp_rmb();
- rpcrdma_complete_rqst(rep);
- rpcrdma_flush_disconnect(cq->cq_context, wc);
+ if (wc->status != IB_WC_SUCCESS) {
+ if (rep)
+ rpcrdma_unpin_rqst(rep);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
+ return;
+ }
+ frwr_mr_put(mr);
+ rpcrdma_complete_rqst(rep);
}
/**
{
struct ib_send_wr *first, *last, **prev;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
- const struct ib_send_wr *bad_wr;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
int rc;
/* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- frwr = NULL;
prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
- frwr = &mr->frwr;
- frwr->fr_cqe.done = frwr_wc_localinv;
- frwr_cid_init(ep, frwr);
- last = &frwr->fr_invwr;
+ last = &mr->mr_invwr;
last->next = NULL;
- last->wr_cqe = &frwr->fr_cqe;
+ last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL;
last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
+ last->wr_cqe->done = frwr_wc_localinv;
+
*prev = last;
prev = &last->next;
}
* are complete. The last completion will wake up the
* RPC waiter.
*/
- frwr->fr_cqe.done = frwr_wc_localinv_done;
+ last->wr_cqe->done = frwr_wc_localinv_done;
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless re_id->qp is a valid pointer.
*/
- bad_wr = NULL;
- rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
+ rc = ib_post_send(ep->re_id->qp, first, NULL);
if (!rc)
return;
- /* Recycle MRs in the LOCAL_INV chain that did not get posted.
- */
+ /* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
- while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
- mr = container_of(frwr, struct rpcrdma_mr, frwr);
- bad_wr = bad_wr->next;
-
- frwr_mr_recycle(mr);
- }
/* The final LOCAL_INV WR in the chain is supposed to
- * do the wake. If it was never posted, the wake will
- * not happen, so wake here in that case.
+ * do the wake. If it was never posted, the wake does
+ * not happen. Unpin the rqst in preparation for its
+ * retransmission.
*/
- rpcrdma_complete_rqst(req->rl_reply);
+ rpcrdma_unpin_rqst(req->rl_reply);
}
return -EIO;
}
-/* Perform XID lookup, reconstruction of the RPC reply, and
- * RPC completion while holding the transport lock to ensure
- * the rep, rqst, and rq_task pointers remain stable.
+/**
+ * rpcrdma_unpin_rqst - Release rqst without completing it
+ * @rep: RPC/RDMA Receive context
+ *
+ * This is done when a connection is lost so that a Reply
+ * can be dropped and its matching Call can be subsequently
+ * retransmitted on a new connection.
+ */
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
+{
+ struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
+ struct rpc_rqst *rqst = rep->rr_rqst;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+
+ req->rl_reply = NULL;
+ rep->rr_rqst = NULL;
+
+ spin_lock(&xprt->queue_lock);
+ xprt_unpin_rqst(rqst);
+ spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * rpcrdma_complete_rqst - Pass completed rqst back to RPC
+ * @rep: RPC/RDMA Receive context
+ *
+ * Reconstruct the RPC reply and complete the transaction
+ * while @rqst is still pinned to ensure the rep, rqst, and
+ * rq_task pointers remain stable.
*/
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
{
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
+ rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
+ false);
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
- rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst);
if (unlikely(req->rl_reply))
- rpcrdma_recv_buffer_put(req->rl_reply);
+ rpcrdma_rep_put(buf, req->rl_reply);
req->rl_reply = rep;
rep->rr_rqst = rqst;
trace_xprtrdma_reply_short_err(rep);
out:
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_put(buf, rep);
}
* xprt_rdma_inject_disconnect - inject a connection fault
* @xprt: transport context
*
- * If @xprt is connected, disconnect it to simulate spurious connection
- * loss.
+ * If @xprt is connected, disconnect it to simulate spurious
+ * connection loss. Caller must hold @xprt's send lock to
+ * ensure that data structures and hardware resources are
+ * stable during the rdma_disconnect() call.
*/
static void
xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rdma_cm_id *id = ep->re_id;
+ /* Wait for rpcrdma_post_recvs() to leave its critical
+ * section.
+ */
+ if (atomic_inc_return(&ep->re_receiving) > 1)
+ wait_for_completion(&ep->re_done);
+
/* Flush Receives, then wait for deferred Reply work
* to complete.
*/
rpcrdma_ep_put(ep);
}
-/**
- * rpcrdma_qp_event_handler - Handle one QP event (error notification)
- * @event: details of the event
- * @context: ep that owns QP where event occurred
- *
- * Called from the RDMA provider (device driver) possibly in an interrupt
- * context. The QP is always destroyed before the ID, so the ID will be
- * reliably available when this handler is invoked.
- */
-static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
-{
- struct rpcrdma_ep *ep = context;
-
- trace_xprtrdma_qp_event(ep, event);
-}
-
/* Ensure xprt_force_disconnect() is invoked exactly once when a
* connection is closed or lost. (The important thing is it needs
* to be invoked "at least" once).
out_flushed:
rpcrdma_flush_disconnect(r_xprt, wc);
- rpcrdma_rep_destroy(rep);
+ rpcrdma_rep_put(&r_xprt->rx_buf, rep);
}
static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
__module_get(THIS_MODULE);
device = id->device;
ep->re_id = id;
+ reinit_completion(&ep->re_done);
ep->re_max_requests = r_xprt->rx_xprt.max_reqs;
ep->re_inline_send = xprt_rdma_max_inline_write;
r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests);
- ep->re_attr.event_handler = rpcrdma_qp_event_handler;
- ep->re_attr.qp_context = ep;
ep->re_attr.srq = NULL;
ep->re_attr.cap.max_inline_data = 0;
ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
- rpcrdma_post_recvs(r_xprt, true);
+ rpcrdma_post_recvs(r_xprt, 1, true);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
rpcrdma_req_reset(req);
}
-/* No locking needed here. This function is called only by the
- * Receive completion handler.
- */
static noinline
struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
bool temp)
{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp;
- list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps);
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_all, &buf->rb_all_reps);
+ spin_unlock(&buf->rb_lock);
return rep;
out_free_regbuf:
return NULL;
}
-/* No locking needed here. This function is invoked only by the
- * Receive completion handler, or during transport shutdown.
- */
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
{
- list_del(&rep->rr_all);
rpcrdma_regbuf_free(rep->rr_rdmabuf);
kfree(rep);
}
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_free(rep);
+}
+
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
return llist_entry(node, struct rpcrdma_rep, rr_node);
}
-static void rpcrdma_rep_put(struct rpcrdma_buffer *buf,
- struct rpcrdma_rep *rep)
+/**
+ * rpcrdma_rep_put - Release rpcrdma_rep back to free list
+ * @buf: buffer pool
+ * @rep: rep to release
+ *
+ */
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep)
{
llist_add(&rep->rr_node, &buf->rb_free_reps);
}
+/* Caller must ensure the QP is quiescent (RQ is drained) before
+ * invoking this function, to guarantee rb_all_reps is not
+ * changing.
+ */
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
- rep->rr_temp = true;
+ rep->rr_temp = true; /* Mark this rep for destruction */
}
}
{
struct rpcrdma_rep *rep;
- while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
- rpcrdma_rep_destroy(rep);
+ spin_lock(&buf->rb_lock);
+ while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
+ struct rpcrdma_rep,
+ rr_all)) != NULL) {
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_free(rep);
+
+ spin_lock(&buf->rb_lock);
+ }
+ spin_unlock(&buf->rb_lock);
}
/**
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
- frwr_release_mr(mr);
+ frwr_mr_release(mr);
}
rpcrdma_regbuf_free(req->rl_recvbuf);
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
- frwr_release_mr(mr);
+ frwr_mr_release(mr);
spin_lock(&buf->rb_lock);
}
spin_unlock(&buffers->rb_lock);
}
-/**
- * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
- * @rep: rep to release
- *
- * Used after error conditions.
- */
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
-{
- rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
-}
-
/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
*/
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
- struct ib_send_wr *send_wr = &req->rl_wr;
- struct rpcrdma_ep *ep = r_xprt->rx_ep;
- int rc;
-
- if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) {
- send_wr->send_flags |= IB_SEND_SIGNALED;
- ep->re_send_count = ep->re_send_batch;
- } else {
- send_wr->send_flags &= ~IB_SEND_SIGNALED;
- --ep->re_send_count;
- }
-
- trace_xprtrdma_post_send(req);
- rc = frwr_send(r_xprt, req);
- if (rc)
+ if (frwr_send(r_xprt, req))
return -ENOTCONN;
return 0;
}
/**
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
- * @temp: mark Receive buffers to be deleted after use
+ * @needed: current credit grant
+ * @temp: mark Receive buffers to be deleted after one use
*
*/
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_recv_wr *wr, *bad_wr;
struct rpcrdma_rep *rep;
- int needed, count, rc;
+ int count, rc;
rc = 0;
count = 0;
- needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (likely(ep->re_receive_count > needed))
goto out;
needed -= ep->re_receive_count;
if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH;
+ if (atomic_inc_return(&ep->re_receiving) > 1)
+ goto out;
+
/* fast path: all needed reps can be found on the free list */
wr = NULL;
while (needed) {
rc = ib_post_recv(ep->re_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
+ if (atomic_dec_return(&ep->re_receiving) > 0)
+ complete(&ep->re_done);
+
out:
trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) {
rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
wr = wr->next;
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_put(buf, rep);
--count;
}
}
unsigned int re_max_inline_recv;
int re_async_rc;
int re_connect_status;
+ atomic_t re_receiving;
atomic_t re_force_disconnect;
struct ib_qp_init_attr re_attr;
wait_queue_head_t re_connect_wait;
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*/
-struct rpcrdma_frwr {
- struct ib_mr *fr_mr;
- struct ib_cqe fr_cqe;
- struct rpc_rdma_cid fr_cid;
- struct completion fr_linv_done;
- union {
- struct ib_reg_wr fr_regwr;
- struct ib_send_wr fr_invwr;
- };
-};
-
struct rpcrdma_req;
struct rpcrdma_mr {
struct list_head mr_list;
struct rpcrdma_req *mr_req;
+
+ struct ib_mr *mr_ibmr;
struct ib_device *mr_device;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
- struct rpcrdma_frwr frwr;
+ struct ib_cqe mr_cqe;
+ struct completion mr_linv_done;
+ union {
+ struct ib_reg_wr mr_regwr;
+ struct ib_send_wr mr_invwr;
+ };
struct rpcrdma_xprt *mr_xprt;
u32 mr_handle;
u32 mr_length;
u64 mr_offset;
struct list_head mr_all;
+ struct rpc_rdma_cid mr_cid;
};
/*
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
struct rpcrdma_req *req);
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
gfp_t flags);
void frwr_reset(struct rpcrdma_req *req);
int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device);
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr);
-void frwr_release_mr(struct rpcrdma_mr *mr);
+void frwr_mr_release(struct rpcrdma_mr *mr);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, __be32 xid,
void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep);
void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
struct rpc_rqst *req;
ssize_t ret;
+ /* Is this transport associated with the backchannel? */
+ if (!xprt->bc_serv)
+ return -ESHUTDOWN;
+
/* Look up and lock the request corresponding to the given XID */
req = xprt_lookup_bc_request(xprt, transport->recv.xid);
if (!req) {
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
req->rq_xtime = ktime_get();
+ tcp_sock_set_cork(transport->inet, true);
while (1) {
status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent);
if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0;
+ if (atomic_long_read(&xprt->xmit_queuelen) == 1)
+ tcp_sock_set_cork(transport->inet, false);
return 0;
}
}
xs_tcp_set_socket_timeouts(xprt, sock);
+ tcp_sock_set_nodelay(sk);
write_lock_bh(&sk->sk_callback_lock);
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
- tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
xprt_clear_connected(xprt);