nfsd: Replace use of rwsem with errseq_t
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Sun, 19 Dec 2021 01:38:01 +0000 (20:38 -0500)
committerChuck Lever <chuck.lever@oracle.com>
Sat, 8 Jan 2022 19:42:02 +0000 (14:42 -0500)
The nfsd_file nf_rwsem is currently being used to separate file write
and commit instances to ensure that we catch errors and apply them to
the correct write/commit.
We can improve scalability at the expense of a little accuracy (some
extra false positives) by replacing the nf_rwsem with more careful
use of the errseq_t mechanism to track errors across the different
operations.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[ cel: rebased on zero-verifier fix ]

fs/nfsd/filecache.c
fs/nfsd/filecache.h
fs/nfsd/nfs4proc.c
fs/nfsd/vfs.c

index aa5dca4..e290454 100644 (file)
@@ -189,7 +189,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
                                __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
                }
                nf->nf_mark = NULL;
-               init_rwsem(&nf->nf_rwsem);
                trace_nfsd_file_alloc(nf);
        }
        return nf;
index 7872df5..435ceab 100644 (file)
@@ -46,7 +46,6 @@ struct nfsd_file {
        refcount_t              nf_ref;
        unsigned char           nf_may;
        struct nfsd_file_mark   *nf_mark;
-       struct rw_semaphore     nf_rwsem;
 };
 
 int nfsd_file_cache_init(void);
index a6dc5e1..56405fc 100644 (file)
@@ -1510,6 +1510,9 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
 
 static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
 {
+       struct file *dst = copy->nf_dst->nf_file;
+       struct file *src = copy->nf_src->nf_file;
+       errseq_t since;
        ssize_t bytes_copied = 0;
        u64 bytes_total = copy->cp_count;
        u64 src_pos = copy->cp_src_pos;
@@ -1522,9 +1525,8 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
        do {
                if (kthread_should_stop())
                        break;
-               bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file,
-                               src_pos, copy->nf_dst->nf_file, dst_pos,
-                               bytes_total);
+               bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
+                                                   bytes_total);
                if (bytes_copied <= 0)
                        break;
                bytes_total -= bytes_copied;
@@ -1534,11 +1536,11 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
        } while (bytes_total > 0 && !copy->cp_synchronous);
        /* for a non-zero asynchronous copy do a commit of data */
        if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) {
-               down_write(&copy->nf_dst->nf_rwsem);
-               status = vfs_fsync_range(copy->nf_dst->nf_file,
-                                        copy->cp_dst_pos,
+               since = READ_ONCE(dst->f_wb_err);
+               status = vfs_fsync_range(dst, copy->cp_dst_pos,
                                         copy->cp_res.wr_bytes_written, 0);
-               up_write(&copy->nf_dst->nf_rwsem);
+               if (!status)
+                       status = filemap_check_wb_err(dst->f_mapping, since);
                if (!status)
                        copy->committed = true;
        }
index 74c3451..316ed70 100644 (file)
@@ -522,10 +522,11 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
 {
        struct file *src = nf_src->nf_file;
        struct file *dst = nf_dst->nf_file;
+       errseq_t since;
        loff_t cloned;
        __be32 ret = 0;
 
-       down_write(&nf_dst->nf_rwsem);
+       since = READ_ONCE(dst->f_wb_err);
        cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
        if (cloned < 0) {
                ret = nfserrno(cloned);
@@ -539,6 +540,8 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
                loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
                int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
 
+               if (!status)
+                       status = filemap_check_wb_err(dst->f_mapping, since);
                if (!status)
                        status = commit_inode_metadata(file_inode(src));
                if (status < 0) {
@@ -548,7 +551,6 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
                }
        }
 out_err:
-       up_write(&nf_dst->nf_rwsem);
        return ret;
 }
 
@@ -956,6 +958,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
        struct super_block      *sb = file_inode(file)->i_sb;
        struct svc_export       *exp;
        struct iov_iter         iter;
+       errseq_t                since;
        __be32                  nfserr;
        int                     host_err;
        int                     use_wgather;
@@ -993,8 +996,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
                flags |= RWF_SYNC;
 
        iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
+       since = READ_ONCE(file->f_wb_err);
        if (flags & RWF_SYNC) {
-               down_write(&nf->nf_rwsem);
                if (verf)
                        nfsd_copy_boot_verifier(verf,
                                        net_generic(SVC_NET(rqstp),
@@ -1003,15 +1006,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
                if (host_err < 0)
                        nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
                                                 nfsd_net_id));
-               up_write(&nf->nf_rwsem);
        } else {
-               down_read(&nf->nf_rwsem);
                if (verf)
                        nfsd_copy_boot_verifier(verf,
                                        net_generic(SVC_NET(rqstp),
                                        nfsd_net_id));
                host_err = vfs_iter_write(file, &iter, &pos, flags);
-               up_read(&nf->nf_rwsem);
        }
        if (host_err < 0) {
                nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
@@ -1021,6 +1021,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
        *cnt = host_err;
        nfsd_stats_io_write_add(exp, *cnt);
        fsnotify_modify(file);
+       host_err = filemap_check_wb_err(file->f_mapping, since);
+       if (host_err < 0)
+               goto out_nfserr;
 
        if (stable && use_wgather) {
                host_err = wait_for_concurrent_writes(file);
@@ -1101,19 +1104,6 @@ out:
 }
 
 #ifdef CONFIG_NFSD_V3
-static int
-nfsd_filemap_write_and_wait_range(struct nfsd_file *nf, loff_t offset,
-                                 loff_t end)
-{
-       struct address_space *mapping = nf->nf_file->f_mapping;
-       int ret = filemap_fdatawrite_range(mapping, offset, end);
-
-       if (ret)
-               return ret;
-       filemap_fdatawait_range_keep_errors(mapping, offset, end);
-       return 0;
-}
-
 /*
  * Commit all pending writes to stable storage.
  *
@@ -1144,25 +1134,25 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (err)
                goto out;
        if (EX_ISSYNC(fhp->fh_export)) {
-               int err2 = nfsd_filemap_write_and_wait_range(nf, offset, end);
+               errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
+               int err2;
 
-               down_write(&nf->nf_rwsem);
-               if (!err2)
-                       err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+               err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
                switch (err2) {
                case 0:
                        nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
                                                nfsd_net_id));
+                       err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+                                                   since);
                        break;
                case -EINVAL:
                        err = nfserr_notsupp;
                        break;
                default:
-                       err = nfserrno(err2);
                        nfsd_reset_boot_verifier(net_generic(nf->nf_net,
                                                 nfsd_net_id));
                }
-               up_write(&nf->nf_rwsem);
+               err = nfserrno(err2);
        } else
                nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
                                        nfsd_net_id));