Merge tag 'nfsd-4.17' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Apr 2018 02:15:29 +0000 (19:15 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Apr 2018 02:15:29 +0000 (19:15 -0700)
Pull nfsd updates from Bruce Fields:
 "Chuck Lever did a bunch of work on nfsd tracepoints, on RDMA, and on
  server xdr decoding (with an eye towards eliminating a data copy in
  the RDMA case).

  I did some refactoring of the delegation code in preparation for
  eliminating some delegation self-conflicts and implementing write
  delegations"

* tag 'nfsd-4.17' of git://linux-nfs.org/~bfields/linux: (40 commits)
  nfsd: fix incorrect umasks
  sunrpc: remove incorrect HMAC request initialization
  NFSD: Clean up legacy NFS SYMLINK argument XDR decoders
  NFSD: Clean up legacy NFS WRITE argument XDR decoders
  nfsd: Trace NFSv4 COMPOUND execution
  nfsd: Add I/O trace points in the NFSv4 read proc
  nfsd: Add I/O trace points in the NFSv4 write path
  nfsd: Add "nfsd_" to trace point names
  nfsd: Record request byte count, not count of vectors
  nfsd: Fix NFSD trace points
  svc: Report xprt dequeue latency
  sunrpc: Report per-RPC execution stats
  sunrpc: Re-purpose trace_svc_process
  sunrpc: Save remote presentation address in svc_xprt for trace events
  sunrpc: Simplify trace_svc_recv
  sunrpc: Simplify do_enqueue tracing
  sunrpc: Move trace_svc_xprt_dequeue()
  sunrpc: Update show_svc_xprt_flags() to include recently added flags
  svc: Simplify ->xpo_secure_port
  sunrpc: Remove unneeded pointer dereference
  ...

31 files changed:
fs/lockd/svc.c
fs/nfsd/nfs3proc.c
fs/nfsd/nfs3xdr.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4layouts.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfsfh.c
fs/nfsd/nfsproc.c
fs/nfsd/nfsxdr.c
fs/nfsd/trace.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/nfsd/xdr.h
fs/nfsd/xdr3.h
fs/nfsd/xdr4.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/svc_rdma.h
include/linux/sunrpc/svc_xprt.h
include/trace/events/sunrpc.h
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/auth_gss/gss_krb5_seal.c
net/sunrpc/auth_gss/gss_krb5_unseal.c
net/sunrpc/cache.c
net/sunrpc/svc.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcsock.c
net/sunrpc/xprtrdma/svc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_transport.c

index 9c36d61..346ed16 100644 (file)
@@ -57,8 +57,8 @@ static struct task_struct     *nlmsvc_task;
 static struct svc_rqst         *nlmsvc_rqst;
 unsigned long                  nlmsvc_timeout;
 
-atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
-DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
+static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
 
 unsigned int lockd_net_id;
 
index 1d0ce3c..6259a4b 100644 (file)
@@ -192,6 +192,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
        struct nfsd3_writeres *resp = rqstp->rq_resp;
        __be32  nfserr;
        unsigned long cnt = argp->len;
+       unsigned int nvecs;
 
        dprintk("nfsd: WRITE(3)    %s %d bytes at %Lu%s\n",
                                SVCFH_fmt(&argp->fh),
@@ -201,9 +202,12 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
 
        fh_copy(&resp->fh, &argp->fh);
        resp->committed = argp->stable;
+       nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+       if (!nvecs)
+               RETURN_STATUS(nfserr_io);
        nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
-                               rqstp->rq_vec, argp->vlen,
-                               &cnt, resp->committed);
+                           rqstp->rq_vec, nvecs, &cnt,
+                           resp->committed);
        resp->count = cnt;
        RETURN_STATUS(nfserr);
 }
@@ -279,6 +283,16 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
        struct nfsd3_diropres *resp = rqstp->rq_resp;
        __be32  nfserr;
 
+       if (argp->tlen == 0)
+               RETURN_STATUS(nfserr_inval);
+       if (argp->tlen > NFS3_MAXPATHLEN)
+               RETURN_STATUS(nfserr_nametoolong);
+
+       argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+                                               argp->tlen);
+       if (IS_ERR(argp->tname))
+               RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
+
        dprintk("nfsd: SYMLINK(3)  %s %.*s -> %.*s\n",
                                SVCFH_fmt(&argp->ffh),
                                argp->flen, argp->fname,
index 1a70581..3192b54 100644 (file)
@@ -391,7 +391,7 @@ int
 nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd3_writeargs *args = rqstp->rq_argp;
-       unsigned int len, v, hdr, dlen;
+       unsigned int len, hdr, dlen;
        u32 max_blocksize = svc_max_payload(rqstp);
        struct kvec *head = rqstp->rq_arg.head;
        struct kvec *tail = rqstp->rq_arg.tail;
@@ -433,17 +433,9 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
                args->count = max_blocksize;
                len = args->len = max_blocksize;
        }
-       rqstp->rq_vec[0].iov_base = (void*)p;
-       rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
-       v = 0;
-       while (len > rqstp->rq_vec[v].iov_len) {
-               len -= rqstp->rq_vec[v].iov_len;
-               v++;
-               rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
-               rqstp->rq_vec[v].iov_len = PAGE_SIZE;
-       }
-       rqstp->rq_vec[v].iov_len = len;
-       args->vlen = v + 1;
+
+       args->first.iov_base = (void *)p;
+       args->first.iov_len = head->iov_len - hdr;
        return 1;
 }
 
@@ -489,51 +481,24 @@ int
 nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd3_symlinkargs *args = rqstp->rq_argp;
-       unsigned int len, avail;
-       char *old, *new;
-       struct kvec *vec;
+       char *base = (char *)p;
+       size_t dlen;
 
        if (!(p = decode_fh(p, &args->ffh)) ||
-           !(p = decode_filename(p, &args->fname, &args->flen))
-               )
+           !(p = decode_filename(p, &args->fname, &args->flen)))
                return 0;
        p = decode_sattr3(p, &args->attrs);
 
-       /* now decode the pathname, which might be larger than the first page.
-        * As we have to check for nul's anyway, we copy it into a new page
-        * This page appears in the rq_res.pages list, but as pages_len is always
-        * 0, it won't get in the way
-        */
-       len = ntohl(*p++);
-       if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
-               return 0;
-       args->tname = new = page_address(*(rqstp->rq_next_page++));
-       args->tlen = len;
-       /* first copy and check from the first page */
-       old = (char*)p;
-       vec = &rqstp->rq_arg.head[0];
-       if ((void *)old > vec->iov_base + vec->iov_len)
-               return 0;
-       avail = vec->iov_len - (old - (char*)vec->iov_base);
-       while (len && avail && *old) {
-               *new++ = *old++;
-               len--;
-               avail--;
-       }
-       /* now copy next page if there is one */
-       if (len && !avail && rqstp->rq_arg.page_len) {
-               avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
-               old = page_address(rqstp->rq_arg.pages[0]);
-       }
-       while (len && avail && *old) {
-               *new++ = *old++;
-               len--;
-               avail--;
-       }
-       *new = '\0';
-       if (len)
-               return 0;
+       args->tlen = ntohl(*p++);
+
+       args->first.iov_base = p;
+       args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
+       args->first.iov_len -= (char *)p - base;
 
+       dlen = args->first.iov_len + rqstp->rq_arg.page_len +
+              rqstp->rq_arg.tail[0].iov_len;
+       if (dlen < XDR_QUADLEN(args->tlen) << 2)
+               return 0;
        return 1;
 }
 
index 49b0a9e..1f04d2a 100644 (file)
@@ -223,8 +223,8 @@ static int nfs_cb_stat_to_errno(int status)
        return -status;
 }
 
-static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected,
-                              int *status)
+static int decode_cb_op_status(struct xdr_stream *xdr,
+                              enum nfs_cb_opnum4 expected, int *status)
 {
        __be32 *p;
        u32 op;
index 7d88836..228faf0 100644 (file)
@@ -165,7 +165,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
        struct nfs4_client *clp = ls->ls_stid.sc_client;
        struct nfs4_file *fp = ls->ls_stid.sc_file;
 
-       trace_layoutstate_free(&ls->ls_stid.sc_stateid);
+       trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid);
 
        spin_lock(&clp->cl_lock);
        list_del_init(&ls->ls_perclnt);
@@ -264,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
        list_add(&ls->ls_perfile, &fp->fi_lo_states);
        spin_unlock(&fp->fi_lock);
 
-       trace_layoutstate_alloc(&ls->ls_stid.sc_stateid);
+       trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid);
        return ls;
 }
 
@@ -334,7 +334,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
        if (list_empty(&ls->ls_layouts))
                goto out_unlock;
 
-       trace_layout_recall(&ls->ls_stid.sc_stateid);
+       trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
 
        refcount_inc(&ls->ls_stid.sc_count);
        nfsd4_run_cb(&ls->ls_recall);
@@ -507,7 +507,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
                                                false, lrp->lr_layout_type,
                                                &ls);
        if (nfserr) {
-               trace_layout_return_lookup_fail(&lrp->lr_sid);
+               trace_nfsd_layout_return_lookup_fail(&lrp->lr_sid);
                return nfserr;
        }
 
@@ -523,7 +523,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
                        nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid);
                lrp->lrs_present = 1;
        } else {
-               trace_layoutstate_unhash(&ls->ls_stid.sc_stateid);
+               trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
                nfs4_unhash_stid(&ls->ls_stid);
                lrp->lrs_present = 0;
        }
@@ -694,7 +694,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
                /*
                 * Unknown error or non-responding client, we'll need to fence.
                 */
-               trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
+               trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
 
                ops = nfsd4_layout_ops[ls->ls_layout_type];
                if (ops->fence_client)
@@ -703,7 +703,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
                        nfsd4_cb_layout_fail(ls);
                return -1;
        case -NFS4ERR_NOMATCHING_LAYOUT:
-               trace_layout_recall_done(&ls->ls_stid.sc_stateid);
+               trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
                task->tk_status = 0;
                return 1;
        }
@@ -716,7 +716,7 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb)
                container_of(cb, struct nfs4_layout_stateid, ls_recall);
        LIST_HEAD(reaplist);
 
-       trace_layout_recall_release(&ls->ls_stid.sc_stateid);
+       trace_nfsd_layout_recall_release(&ls->ls_stid.sc_stateid);
 
        nfsd4_return_all_layouts(ls, &reaplist);
        nfsd4_free_layouts(&reaplist);
index a0bed2b..5d99e88 100644 (file)
@@ -32,6 +32,7 @@
  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+#include <linux/fs_struct.h>
 #include <linux/file.h>
 #include <linux/falloc.h>
 #include <linux/slab.h>
@@ -252,11 +253,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
                 * Note: create modes (UNCHECKED,GUARDED...) are the same
                 * in NFSv4 as in v3 except EXCLUSIVE4_1.
                 */
+               current->fs->umask = open->op_umask;
                status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
                                        open->op_fname.len, &open->op_iattr,
                                        *resfh, open->op_createmode,
                                        (u32 *)open->op_verf.data,
                                        &open->op_truncate, &open->op_created);
+               current->fs->umask = 0;
 
                if (!status && open->op_label.len)
                        nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
@@ -603,6 +606,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (status)
                return status;
 
+       current->fs->umask = create->cr_umask;
        switch (create->cr_type) {
        case NF4LNK:
                status = nfsd_symlink(rqstp, &cstate->current_fh,
@@ -611,20 +615,22 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                break;
 
        case NF4BLK:
+               status = nfserr_inval;
                rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
                if (MAJOR(rdev) != create->cr_specdata1 ||
                    MINOR(rdev) != create->cr_specdata2)
-                       return nfserr_inval;
+                       goto out_umask;
                status = nfsd_create(rqstp, &cstate->current_fh,
                                     create->cr_name, create->cr_namelen,
                                     &create->cr_iattr, S_IFBLK, rdev, &resfh);
                break;
 
        case NF4CHR:
+               status = nfserr_inval;
                rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
                if (MAJOR(rdev) != create->cr_specdata1 ||
                    MINOR(rdev) != create->cr_specdata2)
-                       return nfserr_inval;
+                       goto out_umask;
                status = nfsd_create(rqstp, &cstate->current_fh,
                                     create->cr_name, create->cr_namelen,
                                     &create->cr_iattr,S_IFCHR, rdev, &resfh);
@@ -668,6 +674,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        fh_dup2(&cstate->current_fh, &resfh);
 out:
        fh_put(&resfh);
+out_umask:
+       current->fs->umask = 0;
        return status;
 }
 
@@ -751,6 +759,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (read->rd_offset >= OFFSET_MAX)
                return nfserr_inval;
 
+       trace_nfsd_read_start(rqstp, &cstate->current_fh,
+                             read->rd_offset, read->rd_length);
+
        /*
         * If we do a zero copy read, then a client will see read data
         * that reflects the state of the file *after* performing the
@@ -783,6 +794,8 @@ nfsd4_read_release(union nfsd4_op_u *u)
 {
        if (u->read.rd_filp)
                fput(u->read.rd_filp);
+       trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
+                            u->read.rd_offset, u->read.rd_length);
 }
 
 static __be32
@@ -1001,6 +1014,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (write->wr_offset >= OFFSET_MAX)
                return nfserr_inval;
 
+       cnt = write->wr_buflen;
+       trace_nfsd_write_start(rqstp, &cstate->current_fh,
+                              write->wr_offset, cnt);
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                                stateid, WR_STATE, &filp, NULL);
        if (status) {
@@ -1008,7 +1024,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                return status;
        }
 
-       cnt = write->wr_buflen;
        write->wr_how_written = write->wr_stable_how;
        gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
 
@@ -1021,7 +1036,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        fput(filp);
 
        write->wr_bytes_written = cnt;
-
+       trace_nfsd_write_done(rqstp, &cstate->current_fh,
+                             write->wr_offset, cnt);
        return status;
 }
 
@@ -1106,7 +1122,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        else {
                copy->cp_res.wr_bytes_written = bytes;
                copy->cp_res.wr_stable_how = NFS_UNSTABLE;
-               copy->cp_consecutive = 1;
                copy->cp_synchronous = 1;
                gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
                status = nfs_ok;
@@ -1412,7 +1427,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
        nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid,
                                                true, lgp->lg_layout_type, &ls);
        if (nfserr) {
-               trace_layout_get_lookup_fail(&lgp->lg_sid);
+               trace_nfsd_layout_get_lookup_fail(&lgp->lg_sid);
                goto out;
        }
 
@@ -1481,7 +1496,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
                                                false, lcp->lc_layout_type,
                                                &ls);
        if (nfserr) {
-               trace_layout_commit_lookup_fail(&lcp->lc_sid);
+               trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid);
                /* fixup error code as per RFC5661 */
                if (nfserr == nfserr_bad_stateid)
                        nfserr = nfserr_badlayout;
@@ -1714,12 +1729,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
                goto encode_op;
        }
 
+       trace_nfsd_compound(rqstp, args->opcnt);
        while (!status && resp->opcnt < args->opcnt) {
                op = &args->ops[resp->opcnt++];
 
-               dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
-                       resp->opcnt, args->opcnt, op->opnum,
-                       nfsd4_op_name(op->opnum));
                /*
                 * The XDR decode routines may have pre-set op->status;
                 * for example, if there is a miscellaneous XDR error
@@ -1793,9 +1806,8 @@ encode_op:
                        status = op->status;
                }
 
-               dprintk("nfsv4 compound op %p opcnt %d #%d: %d: status %d\n",
-                       args->ops, args->opcnt, resp->opcnt, op->opnum,
-                       be32_to_cpu(status));
+               trace_nfsd_compound_status(args->opcnt, resp->opcnt, status,
+                                          nfsd4_op_name(op->opnum));
 
                nfsd4_cstate_clear_replay(cstate);
                nfsd4_increment_op_stats(op->opnum);
index 61b770e..fc74d6f 100644 (file)
@@ -98,6 +98,7 @@ enum nfsd4_st_mutex_lock_subclass {
  */
 static DECLARE_WAIT_QUEUE_HEAD(close_wq);
 
+static struct kmem_cache *client_slab;
 static struct kmem_cache *openowner_slab;
 static struct kmem_cache *lockowner_slab;
 static struct kmem_cache *file_slab;
@@ -806,7 +807,8 @@ static void block_delegations(struct knfsd_fh *fh)
 }
 
 static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
+                struct svc_fh *current_fh,
                 struct nfs4_clnt_odstate *odstate)
 {
        struct nfs4_delegation *dp;
@@ -837,6 +839,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
        dp->dl_retries = 1;
        nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
                      &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+       get_nfs4_file(fp);
+       dp->dl_stid.sc_file = fp;
        return dp;
 out_dec:
        atomic_long_dec(&num_delegations);
@@ -874,19 +878,35 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
        spin_unlock(&stid->sc_lock);
 }
 
-static void nfs4_put_deleg_lease(struct nfs4_file *fp)
+static void put_deleg_file(struct nfs4_file *fp)
 {
        struct file *filp = NULL;
 
        spin_lock(&fp->fi_lock);
-       if (fp->fi_deleg_file && --fp->fi_delegees == 0)
+       if (--fp->fi_delegees == 0)
                swap(filp, fp->fi_deleg_file);
        spin_unlock(&fp->fi_lock);
 
-       if (filp) {
-               vfs_setlease(filp, F_UNLCK, NULL, (void **)&fp);
+       if (filp)
                fput(filp);
-       }
+}
+
+static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
+{
+       struct nfs4_file *fp = dp->dl_stid.sc_file;
+       struct file *filp = fp->fi_deleg_file;
+
+       WARN_ON_ONCE(!fp->fi_delegees);
+
+       vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
+       put_deleg_file(fp);
+}
+
+static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
+{
+       put_clnt_odstate(dp->dl_clnt_odstate);
+       nfs4_unlock_deleg_lease(dp);
+       nfs4_put_stid(&dp->dl_stid);
 }
 
 void nfs4_unhash_stid(struct nfs4_stid *s)
@@ -895,20 +915,16 @@ void nfs4_unhash_stid(struct nfs4_stid *s)
 }
 
 /**
- * nfs4_get_existing_delegation - Discover if this delegation already exists
+ * nfs4_delegation_exists - Discover if this delegation already exists
  * @clp:     a pointer to the nfs4_client we're granting a delegation to
  * @fp:      a pointer to the nfs4_file we're granting a delegation on
  *
  * Return:
- *      On success: NULL if an existing delegation was not found.
- *
- *      On error: -EAGAIN if one was previously granted to this nfs4_client
- *                 for this nfs4_file.
- *
+ *      On success: true iff an existing delegation is found
  */
 
-static int
-nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
+static bool
+nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp)
 {
        struct nfs4_delegation *searchdp = NULL;
        struct nfs4_client *searchclp = NULL;
@@ -919,10 +935,10 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
        list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
                searchclp = searchdp->dl_stid.sc_client;
                if (clp == searchclp) {
-                       return -EAGAIN;
+                       return true;
                }
        }
-       return 0;
+       return false;
 }
 
 /**
@@ -941,16 +957,13 @@ nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
 static int
 hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
 {
-       int status;
        struct nfs4_client *clp = dp->dl_stid.sc_client;
 
        lockdep_assert_held(&state_lock);
        lockdep_assert_held(&fp->fi_lock);
 
-       status = nfs4_get_existing_delegation(clp, fp);
-       if (status)
-               return status;
-       ++fp->fi_delegees;
+       if (nfs4_delegation_exists(clp, fp))
+               return -EAGAIN;
        refcount_inc(&dp->dl_stid.sc_count);
        dp->dl_stid.sc_type = NFS4_DELEG_STID;
        list_add(&dp->dl_perfile, &fp->fi_delegations);
@@ -986,11 +999,8 @@ static void destroy_delegation(struct nfs4_delegation *dp)
        spin_lock(&state_lock);
        unhashed = unhash_delegation_locked(dp);
        spin_unlock(&state_lock);
-       if (unhashed) {
-               put_clnt_odstate(dp->dl_clnt_odstate);
-               nfs4_put_deleg_lease(dp->dl_stid.sc_file);
-               nfs4_put_stid(&dp->dl_stid);
-       }
+       if (unhashed)
+               destroy_unhashed_deleg(dp);
 }
 
 static void revoke_delegation(struct nfs4_delegation *dp)
@@ -999,17 +1009,14 @@ static void revoke_delegation(struct nfs4_delegation *dp)
 
        WARN_ON(!list_empty(&dp->dl_recall_lru));
 
-       put_clnt_odstate(dp->dl_clnt_odstate);
-       nfs4_put_deleg_lease(dp->dl_stid.sc_file);
-
-       if (clp->cl_minorversion == 0)
-               nfs4_put_stid(&dp->dl_stid);
-       else {
+       if (clp->cl_minorversion) {
                dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
+               refcount_inc(&dp->dl_stid.sc_count);
                spin_lock(&clp->cl_lock);
                list_add(&dp->dl_recall_lru, &clp->cl_revoked);
                spin_unlock(&clp->cl_lock);
        }
+       destroy_unhashed_deleg(dp);
 }
 
 /* 
@@ -1794,7 +1801,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
        struct nfs4_client *clp;
        int i;
 
-       clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
+       clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
        if (clp == NULL)
                return NULL;
        clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
@@ -1825,7 +1832,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 err_no_hashtbl:
        kfree(clp->cl_name.data);
 err_no_name:
-       kfree(clp);
+       kmem_cache_free(client_slab, clp);
        return NULL;
 }
 
@@ -1845,7 +1852,7 @@ free_client(struct nfs4_client *clp)
        kfree(clp->cl_ownerstr_hashtbl);
        kfree(clp->cl_name.data);
        idr_destroy(&clp->cl_stateids);
-       kfree(clp);
+       kmem_cache_free(client_slab, clp);
 }
 
 /* must be called under the client_lock */
@@ -1911,9 +1918,7 @@ __destroy_client(struct nfs4_client *clp)
        while (!list_empty(&reaplist)) {
                dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
                list_del_init(&dp->dl_recall_lru);
-               put_clnt_odstate(dp->dl_clnt_odstate);
-               nfs4_put_deleg_lease(dp->dl_stid.sc_file);
-               nfs4_put_stid(&dp->dl_stid);
+               destroy_unhashed_deleg(dp);
        }
        while (!list_empty(&clp->cl_revoked)) {
                dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
@@ -2953,7 +2958,7 @@ out_no_session:
 static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
 {
        if (!session)
-               return 0;
+               return false;
        return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
 }
 
@@ -3471,21 +3476,26 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
 void
 nfsd4_free_slabs(void)
 {
-       kmem_cache_destroy(odstate_slab);
+       kmem_cache_destroy(client_slab);
        kmem_cache_destroy(openowner_slab);
        kmem_cache_destroy(lockowner_slab);
        kmem_cache_destroy(file_slab);
        kmem_cache_destroy(stateid_slab);
        kmem_cache_destroy(deleg_slab);
+       kmem_cache_destroy(odstate_slab);
 }
 
 int
 nfsd4_init_slabs(void)
 {
+       client_slab = kmem_cache_create("nfsd4_clients",
+                       sizeof(struct nfs4_client), 0, 0, NULL);
+       if (client_slab == NULL)
+               goto out;
        openowner_slab = kmem_cache_create("nfsd4_openowners",
                        sizeof(struct nfs4_openowner), 0, 0, NULL);
        if (openowner_slab == NULL)
-               goto out;
+               goto out_free_client_slab;
        lockowner_slab = kmem_cache_create("nfsd4_lockowners",
                        sizeof(struct nfs4_lockowner), 0, 0, NULL);
        if (lockowner_slab == NULL)
@@ -3518,6 +3528,8 @@ out_free_lockowner_slab:
        kmem_cache_destroy(lockowner_slab);
 out_free_openowner_slab:
        kmem_cache_destroy(openowner_slab);
+out_free_client_slab:
+       kmem_cache_destroy(client_slab);
 out:
        dprintk("nfsd4: out of memory while initializing nfsv4\n");
        return -ENOMEM;
@@ -3945,17 +3957,9 @@ static bool
 nfsd_break_deleg_cb(struct file_lock *fl)
 {
        bool ret = false;
-       struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
-       struct nfs4_delegation *dp;
+       struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+       struct nfs4_file *fp = dp->dl_stid.sc_file;
 
-       if (!fp) {
-               WARN(1, "(%p)->fl_owner NULL\n", fl);
-               return ret;
-       }
-       if (fp->fi_had_conflict) {
-               WARN(1, "duplicate break on %p\n", fp);
-               return ret;
-       }
        /*
         * We don't want the locks code to timeout the lease for us;
         * we'll remove it ourself if a delegation isn't returned
@@ -3965,15 +3969,7 @@ nfsd_break_deleg_cb(struct file_lock *fl)
 
        spin_lock(&fp->fi_lock);
        fp->fi_had_conflict = true;
-       /*
-        * If there are no delegations on the list, then return true
-        * so that the lease code will go ahead and delete it.
-        */
-       if (list_empty(&fp->fi_delegations))
-               ret = true;
-       else
-               list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
-                       nfsd_break_one_deleg(dp);
+       nfsd_break_one_deleg(dp);
        spin_unlock(&fp->fi_lock);
        return ret;
 }
@@ -4297,7 +4293,8 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
        return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
 }
 
-static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
+static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
+                                               int flag)
 {
        struct file_lock *fl;
 
@@ -4308,124 +4305,88 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
        fl->fl_flags = FL_DELEG;
        fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
        fl->fl_end = OFFSET_MAX;
-       fl->fl_owner = (fl_owner_t)fp;
+       fl->fl_owner = (fl_owner_t)dp;
        fl->fl_pid = current->tgid;
+       fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
        return fl;
 }
 
-/**
- * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
- * @dp:   a pointer to the nfs4_delegation we're adding.
- *
- * Return:
- *      On success: Return code will be 0 on success.
- *
- *      On error: -EAGAIN if there was an existing delegation.
- *                 nonzero if there is an error in other cases.
- *
- */
-
-static int nfs4_setlease(struct nfs4_delegation *dp)
-{
-       struct nfs4_file *fp = dp->dl_stid.sc_file;
-       struct file_lock *fl;
-       struct file *filp;
-       int status = 0;
-
-       fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
-       if (!fl)
-               return -ENOMEM;
-       filp = find_readable_file(fp);
-       if (!filp) {
-               /* We should always have a readable file here */
-               WARN_ON_ONCE(1);
-               locks_free_lock(fl);
-               return -EBADF;
-       }
-       fl->fl_file = filp;
-       status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
-       if (fl)
-               locks_free_lock(fl);
-       if (status)
-               goto out_fput;
-       spin_lock(&state_lock);
-       spin_lock(&fp->fi_lock);
-       /* Did the lease get broken before we took the lock? */
-       status = -EAGAIN;
-       if (fp->fi_had_conflict)
-               goto out_unlock;
-       /* Race breaker */
-       if (fp->fi_deleg_file) {
-               status = hash_delegation_locked(dp, fp);
-               goto out_unlock;
-       }
-       fp->fi_deleg_file = filp;
-       fp->fi_delegees = 0;
-       status = hash_delegation_locked(dp, fp);
-       spin_unlock(&fp->fi_lock);
-       spin_unlock(&state_lock);
-       if (status) {
-               /* Should never happen, this is a new fi_deleg_file  */
-               WARN_ON_ONCE(1);
-               goto out_fput;
-       }
-       return 0;
-out_unlock:
-       spin_unlock(&fp->fi_lock);
-       spin_unlock(&state_lock);
-out_fput:
-       fput(filp);
-       return status;
-}
-
 static struct nfs4_delegation *
 nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
                    struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
 {
-       int status;
+       int status = 0;
        struct nfs4_delegation *dp;
+       struct file *filp;
+       struct file_lock *fl;
 
+       /*
+        * The fi_had_conflict and nfs_get_existing_delegation checks
+        * here are just optimizations; we'll need to recheck them at
+        * the end:
+        */
        if (fp->fi_had_conflict)
                return ERR_PTR(-EAGAIN);
 
+       filp = find_readable_file(fp);
+       if (!filp) {
+               /* We should always have a readable file here */
+               WARN_ON_ONCE(1);
+               return ERR_PTR(-EBADF);
+       }
        spin_lock(&state_lock);
        spin_lock(&fp->fi_lock);
-       status = nfs4_get_existing_delegation(clp, fp);
+       if (nfs4_delegation_exists(clp, fp))
+               status = -EAGAIN;
+       else if (!fp->fi_deleg_file) {
+               fp->fi_deleg_file = filp;
+               /* increment early to prevent fi_deleg_file from being
+                * cleared */
+               fp->fi_delegees = 1;
+               filp = NULL;
+       } else
+               fp->fi_delegees++;
        spin_unlock(&fp->fi_lock);
        spin_unlock(&state_lock);
-
+       if (filp)
+               fput(filp);
        if (status)
                return ERR_PTR(status);
 
-       dp = alloc_init_deleg(clp, fh, odstate);
+       status = -ENOMEM;
+       dp = alloc_init_deleg(clp, fp, fh, odstate);
        if (!dp)
-               return ERR_PTR(-ENOMEM);
+               goto out_delegees;
+
+       fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+       if (!fl)
+               goto out_stid;
+
+       status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
+       if (fl)
+               locks_free_lock(fl);
+       if (status)
+               goto out_clnt_odstate;
 
-       get_nfs4_file(fp);
        spin_lock(&state_lock);
        spin_lock(&fp->fi_lock);
-       dp->dl_stid.sc_file = fp;
-       if (!fp->fi_deleg_file) {
-               spin_unlock(&fp->fi_lock);
-               spin_unlock(&state_lock);
-               status = nfs4_setlease(dp);
-               goto out;
-       }
-       if (fp->fi_had_conflict) {
+       if (fp->fi_had_conflict)
                status = -EAGAIN;
-               goto out_unlock;
-       }
-       status = hash_delegation_locked(dp, fp);
-out_unlock:
+       else
+               status = hash_delegation_locked(dp, fp);
        spin_unlock(&fp->fi_lock);
        spin_unlock(&state_lock);
-out:
-       if (status) {
-               put_clnt_odstate(dp->dl_clnt_odstate);
-               nfs4_put_stid(&dp->dl_stid);
-               return ERR_PTR(status);
-       }
+
+       if (status)
+               destroy_unhashed_deleg(dp);
        return dp;
+out_clnt_odstate:
+       put_clnt_odstate(dp->dl_clnt_odstate);
+out_stid:
+       nfs4_put_stid(&dp->dl_stid);
+out_delegees:
+       put_deleg_file(fp);
+       return ERR_PTR(status);
 }
 
 static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -5521,15 +5482,26 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                goto out; 
 
        stp->st_stid.sc_type = NFS4_CLOSED_STID;
+
+       /*
+        * Technically we don't _really_ have to increment or copy it, since
+        * it should just be gone after this operation and we clobber the
+        * copied value below, but we continue to do so here just to ensure
+        * that racing ops see that there was a state change.
+        */
        nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
 
        nfsd4_close_open_stateid(stp);
        mutex_unlock(&stp->st_mutex);
 
-       /* See RFC5661 sectionm 18.2.4 */
-       if (stp->st_stid.sc_client->cl_minorversion)
-               memcpy(&close->cl_stateid, &close_stateid,
-                               sizeof(close->cl_stateid));
+       /* v4.1+ suggests that we send a special stateid in here, since the
+        * clients should just ignore this anyway. Since this is not useful
+        * for v4.0 clients either, we set it to the special close_stateid
+        * universally.
+        *
+        * See RFC5661 section 18.2.4, and RFC7530 section 16.2.5
+        */
+       memcpy(&close->cl_stateid, &close_stateid, sizeof(close->cl_stateid));
 
        /* put reference from nfs4_preprocess_seqid_op */
        nfs4_put_stid(&stp->st_stid);
@@ -7264,9 +7236,7 @@ nfs4_state_shutdown_net(struct net *net)
        list_for_each_safe(pos, next, &reaplist) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
                list_del_init(&dp->dl_recall_lru);
-               put_clnt_odstate(dp->dl_clnt_odstate);
-               nfs4_put_deleg_lease(dp->dl_stid.sc_file);
-               nfs4_put_stid(&dp->dl_stid);
+               destroy_unhashed_deleg(dp);
        }
 
        nfsd4_client_tracking_exit(net);
index e502fd1..1d048dd 100644 (file)
@@ -33,7 +33,6 @@
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/fs_struct.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/namei.h>
@@ -682,7 +681,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
 
        status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
                                    &create->cr_acl, &create->cr_label,
-                                   &current->fs->umask);
+                                   &create->cr_umask);
        if (status)
                goto out;
 
@@ -927,7 +926,6 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
        case NFS4_OPEN_NOCREATE:
                break;
        case NFS4_OPEN_CREATE:
-               current->fs->umask = 0;
                READ_BUF(4);
                open->op_createmode = be32_to_cpup(p++);
                switch (open->op_createmode) {
@@ -935,7 +933,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
                case NFS4_CREATE_GUARDED:
                        status = nfsd4_decode_fattr(argp, open->op_bmval,
                                &open->op_iattr, &open->op_acl, &open->op_label,
-                               &current->fs->umask);
+                               &open->op_umask);
                        if (status)
                                goto out;
                        break;
@@ -950,7 +948,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
                        COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
                        status = nfsd4_decode_fattr(argp, open->op_bmval,
                                &open->op_iattr, &open->op_acl, &open->op_label,
-                               &current->fs->umask);
+                               &open->op_umask);
                        if (status)
                                goto out;
                        break;
@@ -1759,7 +1757,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
        p = xdr_decode_hyper(p, &copy->cp_src_pos);
        p = xdr_decode_hyper(p, &copy->cp_dst_pos);
        p = xdr_decode_hyper(p, &copy->cp_count);
-       copy->cp_consecutive = be32_to_cpup(p++);
+       p++; /* ca_consecutive: we always do consecutive copies */
        copy->cp_synchronous = be32_to_cpup(p++);
        tmp = be32_to_cpup(p); /* Source server list not supported */
 
@@ -3427,8 +3425,9 @@ static __be32 nfsd4_encode_splice_read(
                return nfserr_resource;
 
        len = maxcount;
-       nfserr = nfsd_splice_read(read->rd_rqstp, file,
-                                 read->rd_offset, &maxcount);
+       nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
+                                 file, read->rd_offset, &maxcount);
+       read->rd_length = maxcount;
        if (nfserr) {
                /*
                 * nfsd_splice_actor may have already messed with the
@@ -3511,8 +3510,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
        read->rd_vlen = v;
 
        len = maxcount;
-       nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
-                       read->rd_vlen, &maxcount);
+       nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
+                           resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
+       read->rd_length = maxcount;
        if (nfserr)
                return nfserr;
        xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
@@ -4214,7 +4214,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
                return nfserr;
 
        p = xdr_reserve_space(&resp->xdr, 4 + 4);
-       *p++ = cpu_to_be32(copy->cp_consecutive);
+       *p++ = xdr_one; /* cr_consecutive */
        *p++ = cpu_to_be32(copy->cp_synchronous);
        return 0;
 }
index 8aa0118..a008e76 100644 (file)
@@ -87,13 +87,23 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
        return nfserr_inval;
 }
 
+static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags)
+{
+       if (flags & NFSEXP_INSECURE_PORT)
+               return true;
+       /* We don't require gss requests to use low ports: */
+       if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS)
+               return true;
+       return test_bit(RQ_SECURE, &rqstp->rq_flags);
+}
+
 static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
                                          struct svc_export *exp)
 {
        int flags = nfsexp_flags(rqstp, exp);
 
        /* Check if the request originated from a secure port. */
-       if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && !(flags & NFSEXP_INSECURE_PORT)) {
+       if (!nfsd_originating_port_ok(rqstp, flags)) {
                RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
                dprintk("nfsd: request from insecure port %s!\n",
                        svc_print_addr(rqstp, buf, sizeof(buf)));
index 43c0419..f107f9f 100644 (file)
@@ -212,13 +212,18 @@ nfsd_proc_write(struct svc_rqst *rqstp)
        struct nfsd_attrstat *resp = rqstp->rq_resp;
        __be32  nfserr;
        unsigned long cnt = argp->len;
+       unsigned int nvecs;
 
        dprintk("nfsd: WRITE    %s %d bytes at %d\n",
                SVCFH_fmt(&argp->fh),
                argp->len, argp->offset);
 
-       nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
-                               rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
+       nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+       if (!nvecs)
+               return nfserr_io;
+       nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
+                           argp->offset, rqstp->rq_vec, nvecs,
+                           &cnt, NFS_DATA_SYNC);
        return nfsd_return_attrs(nfserr, resp);
 }
 
@@ -444,17 +449,19 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
        struct svc_fh   newfh;
        __be32          nfserr;
 
+       if (argp->tlen > NFS_MAXPATHLEN)
+               return nfserr_nametoolong;
+
+       argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+                                               argp->tlen);
+       if (IS_ERR(argp->tname))
+               return nfserrno(PTR_ERR(argp->tname));
+
        dprintk("nfsd: SYMLINK  %s %.*s -> %.*s\n",
                SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
                argp->tlen, argp->tname);
 
        fh_init(&newfh, NFS_FHSIZE);
-       /*
-        * Crazy hack: the request fits in a page, and already-decoded
-        * attributes follow argp->tname, so it's safe to just write a
-        * null to ensure it's null-terminated:
-        */
-       argp->tname[argp->tlen] = '\0';
        nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
                                                 argp->tname, &newfh);
 
index 79b6064..a43e826 100644 (file)
@@ -70,22 +70,6 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp)
        return p;
 }
 
-static __be32 *
-decode_pathname(__be32 *p, char **namp, unsigned int *lenp)
-{
-       char            *name;
-       unsigned int    i;
-
-       if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
-               for (i = 0, name = *namp; i < *lenp; i++, name++) {
-                       if (*name == '\0')
-                               return NULL;
-               }
-       }
-
-       return p;
-}
-
 static __be32 *
 decode_sattr(__be32 *p, struct iattr *iap)
 {
@@ -287,7 +271,6 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
        struct nfsd_writeargs *args = rqstp->rq_argp;
        unsigned int len, hdr, dlen;
        struct kvec *head = rqstp->rq_arg.head;
-       int v;
 
        p = decode_fh(p, &args->fh);
        if (!p)
@@ -323,17 +306,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
        if (dlen < XDR_QUADLEN(len)*4)
                return 0;
 
-       rqstp->rq_vec[0].iov_base = (void*)p;
-       rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
-       v = 0;
-       while (len > rqstp->rq_vec[v].iov_len) {
-               len -= rqstp->rq_vec[v].iov_len;
-               v++;
-               rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_pages[v]);
-               rqstp->rq_vec[v].iov_len = PAGE_SIZE;
-       }
-       rqstp->rq_vec[v].iov_len = len;
-       args->vlen = v + 1;
+       args->first.iov_base = (void *)p;
+       args->first.iov_len = head->iov_len - hdr;
        return 1;
 }
 
@@ -394,14 +368,39 @@ int
 nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 {
        struct nfsd_symlinkargs *args = rqstp->rq_argp;
+       char *base = (char *)p;
+       size_t xdrlen;
 
        if (   !(p = decode_fh(p, &args->ffh))
-           || !(p = decode_filename(p, &args->fname, &args->flen))
-           || !(p = decode_pathname(p, &args->tname, &args->tlen)))
+           || !(p = decode_filename(p, &args->fname, &args->flen)))
                return 0;
-       p = decode_sattr(p, &args->attrs);
 
-       return xdr_argsize_check(rqstp, p);
+       args->tlen = ntohl(*p++);
+       if (args->tlen == 0)
+               return 0;
+
+       args->first.iov_base = p;
+       args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
+       args->first.iov_len -= (char *)p - base;
+
+       /* This request is never larger than a page. Therefore,
+        * transport will deliver either:
+        * 1. pathname in the pagelist -> sattr is in the tail.
+        * 2. everything in the head buffer -> sattr is in the head.
+        */
+       if (rqstp->rq_arg.page_len) {
+               if (args->tlen != rqstp->rq_arg.page_len)
+                       return 0;
+               p = rqstp->rq_arg.tail[0].iov_base;
+       } else {
+               xdrlen = XDR_QUADLEN(args->tlen);
+               if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
+                       return 0;
+               p += xdrlen;
+       }
+       decode_sattr(p, &args->attrs);
+
+       return 1;
 }
 
 int
index 8b2f1d9..80933e4 100644 (file)
 #include <linux/tracepoint.h>
 #include "nfsfh.h"
 
+TRACE_EVENT(nfsd_compound,
+       TP_PROTO(const struct svc_rqst *rqst,
+                u32 args_opcnt),
+       TP_ARGS(rqst, args_opcnt),
+       TP_STRUCT__entry(
+               __field(u32, xid)
+               __field(u32, args_opcnt)
+       ),
+       TP_fast_assign(
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+               __entry->args_opcnt = args_opcnt;
+       ),
+       TP_printk("xid=0x%08x opcnt=%u",
+               __entry->xid, __entry->args_opcnt)
+)
+
+TRACE_EVENT(nfsd_compound_status,
+       TP_PROTO(u32 args_opcnt,
+                u32 resp_opcnt,
+                __be32 status,
+                const char *name),
+       TP_ARGS(args_opcnt, resp_opcnt, status, name),
+       TP_STRUCT__entry(
+               __field(u32, args_opcnt)
+               __field(u32, resp_opcnt)
+               __field(int, status)
+               __string(name, name)
+       ),
+       TP_fast_assign(
+               __entry->args_opcnt = args_opcnt;
+               __entry->resp_opcnt = resp_opcnt;
+               __entry->status = be32_to_cpu(status);
+               __assign_str(name, name);
+       ),
+       TP_printk("op=%u/%u %s status=%d",
+               __entry->resp_opcnt, __entry->args_opcnt,
+               __get_str(name), __entry->status)
+)
+
 DECLARE_EVENT_CLASS(nfsd_io_class,
        TP_PROTO(struct svc_rqst *rqstp,
                 struct svc_fh  *fhp,
                 loff_t         offset,
-                int            len),
+                unsigned long  len),
        TP_ARGS(rqstp, fhp, offset, len),
        TP_STRUCT__entry(
-               __field(__be32, xid)
-               __field_struct(struct knfsd_fh, fh)
+               __field(u32, xid)
+               __field(u32, fh_hash)
                __field(loff_t, offset)
-               __field(int, len)
+               __field(unsigned long, len)
        ),
        TP_fast_assign(
-               __entry->xid = rqstp->rq_xid,
-               fh_copy_shallow(&__entry->fh, &fhp->fh_handle);
+               __entry->xid = be32_to_cpu(rqstp->rq_xid);
+               __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
                __entry->offset = offset;
                __entry->len = len;
        ),
-       TP_printk("xid=0x%x fh=0x%x offset=%lld len=%d",
-                 __be32_to_cpu(__entry->xid), knfsd_fh_hash(&__entry->fh),
+       TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu",
+                 __entry->xid, __entry->fh_hash,
                  __entry->offset, __entry->len)
 )
 
 #define DEFINE_NFSD_IO_EVENT(name)             \
-DEFINE_EVENT(nfsd_io_class, name,              \
+DEFINE_EVENT(nfsd_io_class, nfsd_##name,       \
        TP_PROTO(struct svc_rqst *rqstp,        \
                 struct svc_fh  *fhp,           \
                 loff_t         offset,         \
-                int            len),           \
+                unsigned long  len),           \
        TP_ARGS(rqstp, fhp, offset, len))
 
 DEFINE_NFSD_IO_EVENT(read_start);
-DEFINE_NFSD_IO_EVENT(read_opened);
+DEFINE_NFSD_IO_EVENT(read_splice);
+DEFINE_NFSD_IO_EVENT(read_vector);
 DEFINE_NFSD_IO_EVENT(read_io_done);
 DEFINE_NFSD_IO_EVENT(read_done);
 DEFINE_NFSD_IO_EVENT(write_start);
@@ -51,6 +91,40 @@ DEFINE_NFSD_IO_EVENT(write_opened);
 DEFINE_NFSD_IO_EVENT(write_io_done);
 DEFINE_NFSD_IO_EVENT(write_done);
 
+DECLARE_EVENT_CLASS(nfsd_err_class,
+       TP_PROTO(struct svc_rqst *rqstp,
+                struct svc_fh  *fhp,
+                loff_t         offset,
+                int            status),
+       TP_ARGS(rqstp, fhp, offset, status),
+       TP_STRUCT__entry(
+               __field(u32, xid)
+               __field(u32, fh_hash)
+               __field(loff_t, offset)
+               __field(int, status)
+       ),
+       TP_fast_assign(
+               __entry->xid = be32_to_cpu(rqstp->rq_xid);
+               __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+               __entry->offset = offset;
+               __entry->status = status;
+       ),
+       TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld status=%d",
+                 __entry->xid, __entry->fh_hash,
+                 __entry->offset, __entry->status)
+)
+
+#define DEFINE_NFSD_ERR_EVENT(name)            \
+DEFINE_EVENT(nfsd_err_class, nfsd_##name,      \
+       TP_PROTO(struct svc_rqst *rqstp,        \
+                struct svc_fh  *fhp,           \
+                loff_t         offset,         \
+                int            len),           \
+       TP_ARGS(rqstp, fhp, offset, len))
+
+DEFINE_NFSD_ERR_EVENT(read_err);
+DEFINE_NFSD_ERR_EVENT(write_err);
+
 #include "state.h"
 
 DECLARE_EVENT_CLASS(nfsd_stateid_class,
@@ -76,7 +150,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class,
 )
 
 #define DEFINE_STATEID_EVENT(name) \
-DEFINE_EVENT(nfsd_stateid_class, name, \
+DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \
        TP_PROTO(stateid_t *stp), \
        TP_ARGS(stp))
 DEFINE_STATEID_EVENT(layoutstate_alloc);
index a3c9bfa..2410b09 100644 (file)
@@ -881,20 +881,24 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
        return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
 }
 
-static __be32
-nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
+static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                              struct file *file, loff_t offset,
+                              unsigned long *count, int host_err)
 {
        if (host_err >= 0) {
                nfsdstats.io_read += host_err;
                *count = host_err;
                fsnotify_access(file);
+               trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
                return 0;
-       } else 
+       } else {
+               trace_nfsd_read_err(rqstp, fhp, offset, host_err);
                return nfserrno(host_err);
+       }
 }
 
-__be32 nfsd_splice_read(struct svc_rqst *rqstp,
-                    struct file *file, loff_t offset, unsigned long *count)
+__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                       struct file *file, loff_t offset, unsigned long *count)
 {
        struct splice_desc sd = {
                .len            = 0,
@@ -904,21 +908,23 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp,
        };
        int host_err;
 
+       trace_nfsd_read_splice(rqstp, fhp, offset, *count);
        rqstp->rq_next_page = rqstp->rq_respages + 1;
        host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
-       return nfsd_finish_read(file, count, host_err);
+       return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
 }
 
-__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
-               unsigned long *count)
+__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                 struct file *file, loff_t offset,
+                 struct kvec *vec, int vlen, unsigned long *count)
 {
        struct iov_iter iter;
        int host_err;
 
+       trace_nfsd_read_vector(rqstp, fhp, offset, *count);
        iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
        host_err = vfs_iter_read(file, &iter, &offset, 0);
-
-       return nfsd_finish_read(file, count, host_err);
+       return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
 }
 
 /*
@@ -965,13 +971,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 {
        struct svc_export       *exp;
        struct iov_iter         iter;
-       __be32                  err = 0;
+       __be32                  nfserr;
        int                     host_err;
        int                     use_wgather;
        loff_t                  pos = offset;
        unsigned int            pflags = current->flags;
        rwf_t                   flags = 0;
 
+       trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
+
        if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
                /*
                 * We want less throttling in balance_dirty_pages()
@@ -994,22 +1002,23 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        host_err = vfs_iter_write(file, &iter, &pos, flags);
        if (host_err < 0)
                goto out_nfserr;
-       *cnt = host_err;
-       nfsdstats.io_write += host_err;
+       nfsdstats.io_write += *cnt;
        fsnotify_modify(file);
 
        if (stable && use_wgather)
                host_err = wait_for_concurrent_writes(file);
 
 out_nfserr:
-       dprintk("nfsd: write complete host_err=%d\n", host_err);
-       if (host_err >= 0)
-               err = 0;
-       else
-               err = nfserrno(host_err);
+       if (host_err >= 0) {
+               trace_nfsd_write_io_done(rqstp, fhp, offset, *cnt);
+               nfserr = nfs_ok;
+       } else {
+               trace_nfsd_write_err(rqstp, fhp, offset, host_err);
+               nfserr = nfserrno(host_err);
+       }
        if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
                current_restore_flags(pflags, PF_LESS_THROTTLE);
-       return err;
+       return nfserr;
 }
 
 /*
@@ -1024,27 +1033,23 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
        struct raparms  *ra;
        __be32 err;
 
-       trace_read_start(rqstp, fhp, offset, vlen);
+       trace_nfsd_read_start(rqstp, fhp, offset, *count);
        err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
        if (err)
                return err;
 
        ra = nfsd_init_raparms(file);
 
-       trace_read_opened(rqstp, fhp, offset, vlen);
-
        if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
-               err = nfsd_splice_read(rqstp, file, offset, count);
+               err = nfsd_splice_read(rqstp, fhp, file, offset, count);
        else
-               err = nfsd_readv(file, offset, vec, vlen, count);
-
-       trace_read_io_done(rqstp, fhp, offset, vlen);
+               err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
 
        if (ra)
                nfsd_put_raparams(file, ra);
        fput(file);
 
-       trace_read_done(rqstp, fhp, offset, vlen);
+       trace_nfsd_read_done(rqstp, fhp, offset, *count);
 
        return err;
 }
@@ -1061,18 +1066,16 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
        struct file *file = NULL;
        __be32 err = 0;
 
-       trace_write_start(rqstp, fhp, offset, vlen);
+       trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
 
        err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
        if (err)
                goto out;
 
-       trace_write_opened(rqstp, fhp, offset, vlen);
        err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
-       trace_write_io_done(rqstp, fhp, offset, vlen);
        fput(file);
 out:
-       trace_write_done(rqstp, fhp, offset, vlen);
+       trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
        return err;
 }
 
index be6d8e0..a7e1073 100644 (file)
@@ -78,10 +78,13 @@ __be32              nfsd_commit(struct svc_rqst *, struct svc_fh *,
 __be32         nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
                                int, struct file **);
 struct raparms;
-__be32         nfsd_splice_read(struct svc_rqst *,
-                               struct file *, loff_t, unsigned long *);
-__be32         nfsd_readv(struct file *, loff_t, struct kvec *, int,
-                               unsigned long *);
+__be32         nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                               struct file *file, loff_t offset,
+                               unsigned long *count);
+__be32         nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                               struct file *file, loff_t offset,
+                               struct kvec *vec, int vlen,
+                               unsigned long *count);
 __be32                 nfsd_read(struct svc_rqst *, struct svc_fh *,
                                loff_t, struct kvec *, int, unsigned long *);
 __be32                 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
index 2f4f22e..ea7cca3 100644 (file)
@@ -34,7 +34,7 @@ struct nfsd_writeargs {
        svc_fh                  fh;
        __u32                   offset;
        int                     len;
-       int                     vlen;
+       struct kvec             first;
 };
 
 struct nfsd_createargs {
@@ -72,6 +72,7 @@ struct nfsd_symlinkargs {
        char *                  tname;
        unsigned int            tlen;
        struct iattr            attrs;
+       struct kvec             first;
 };
 
 struct nfsd_readdirargs {
index 056bf8a..2cb29e9 100644 (file)
@@ -41,7 +41,7 @@ struct nfsd3_writeargs {
        __u32                   count;
        int                     stable;
        __u32                   len;
-       int                     vlen;
+       struct kvec             first;
 };
 
 struct nfsd3_createargs {
@@ -90,6 +90,7 @@ struct nfsd3_symlinkargs {
        char *                  tname;
        unsigned int            tlen;
        struct iattr            attrs;
+       struct kvec             first;
 };
 
 struct nfsd3_readdirargs {
index bc29511..17c453a 100644 (file)
@@ -110,6 +110,7 @@ struct nfsd4_create {
                struct {
                        u32 datalen;
                        char *data;
+                       struct kvec first;
                } link;   /* NF4LNK */
                struct {
                        u32 specdata1;
@@ -118,12 +119,14 @@ struct nfsd4_create {
        } u;
        u32             cr_bmval[3];        /* request */
        struct iattr    cr_iattr;           /* request */
+       int             cr_umask;           /* request */
        struct nfsd4_change_info  cr_cinfo; /* response */
        struct nfs4_acl *cr_acl;
        struct xdr_netobj cr_label;
 };
 #define cr_datalen     u.link.datalen
 #define cr_data                u.link.data
+#define cr_first       u.link.first
 #define cr_specdata1   u.dev.specdata1
 #define cr_specdata2   u.dev.specdata2
 
@@ -228,6 +231,7 @@ struct nfsd4_open {
        u32             op_why_no_deleg;    /* response - DELEG_NONE_EXT only */
        u32             op_create;          /* request */
        u32             op_createmode;      /* request */
+       int             op_umask;           /* request */
        u32             op_bmval[3];        /* request */
        struct iattr    op_iattr;           /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
        nfs4_verifier   op_verf __attribute__((aligned(32)));
@@ -518,7 +522,6 @@ struct nfsd4_copy {
        u64             cp_count;
 
        /* both */
-       bool            cp_consecutive;
        bool            cp_synchronous;
 
        /* response */
index 786ae22..574368e 100644 (file)
@@ -272,6 +272,7 @@ struct svc_rqst {
 #define        RQ_BUSY         (6)                     /* request is busy */
 #define        RQ_DATA         (7)                     /* request has data */
        unsigned long           rq_flags;       /* flags field */
+       ktime_t                 rq_qtime;       /* enqueue time */
 
        void *                  rq_argp;        /* decoded arguments */
        void *                  rq_resp;        /* xdr'd results */
@@ -283,6 +284,7 @@ struct svc_rqst {
        int                     rq_reserved;    /* space on socket outq
                                                 * reserved for this request
                                                 */
+       ktime_t                 rq_stime;       /* start time */
 
        struct cache_req        rq_chandle;     /* handle passed to caches for 
                                                 * request delaying 
@@ -493,6 +495,10 @@ void                  svc_wake_up(struct svc_serv *);
 void              svc_reserve(struct svc_rqst *rqstp, int space);
 struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
 char *            svc_print_addr(struct svc_rqst *, char *, size_t);
+unsigned int      svc_fill_write_vector(struct svc_rqst *rqstp,
+                                        struct kvec *first, size_t total);
+char             *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
+                                            struct kvec *first, size_t total);
 
 #define        RPC_MAX_ADDRBUFLEN      (63U)
 
index 4b731b0..7337e12 100644 (file)
@@ -132,9 +132,6 @@ struct svcxprt_rdma {
 #define RDMAXPRT_CONN_PENDING  3
 
 #define RPCRDMA_LISTEN_BACKLOG  10
-/* The default ORD value is based on two outstanding full-size writes with a
- * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ.  */
-#define RPCRDMA_ORD             (64/4)
 #define RPCRDMA_MAX_REQUESTS    32
 
 /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
index 1caf7bc..c3d7206 100644 (file)
@@ -25,7 +25,7 @@ struct svc_xprt_ops {
        void            (*xpo_release_rqst)(struct svc_rqst *);
        void            (*xpo_detach)(struct svc_xprt *);
        void            (*xpo_free)(struct svc_xprt *);
-       int             (*xpo_secure_port)(struct svc_rqst *);
+       void            (*xpo_secure_port)(struct svc_rqst *rqstp);
        void            (*xpo_kill_temp_xprt)(struct svc_xprt *);
 };
 
@@ -83,6 +83,7 @@ struct svc_xprt {
        size_t                  xpt_locallen;   /* length of address */
        struct sockaddr_storage xpt_remote;     /* remote peer's address */
        size_t                  xpt_remotelen;  /* length of address */
+       char                    xpt_remotebuf[INET6_ADDRSTRLEN + 10];
        struct rpc_wait_queue   xpt_bc_pending; /* backchannel wait queue */
        struct list_head        xpt_users;      /* callbacks on free */
 
@@ -152,7 +153,10 @@ static inline void svc_xprt_set_remote(struct svc_xprt *xprt,
 {
        memcpy(&xprt->xpt_remote, sa, salen);
        xprt->xpt_remotelen = salen;
+       snprintf(xprt->xpt_remotebuf, sizeof(xprt->xpt_remotebuf) - 1,
+                "%pISpc", sa);
 }
+
 static inline unsigned short svc_addr_port(const struct sockaddr *sa)
 {
        const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
index 970c91a..922cb89 100644 (file)
@@ -485,31 +485,55 @@ TRACE_EVENT(xs_tcp_data_recv,
                { (1UL << RQ_BUSY),             "RQ_BUSY"})
 
 TRACE_EVENT(svc_recv,
-       TP_PROTO(struct svc_rqst *rqst, int status),
+       TP_PROTO(struct svc_rqst *rqst, int len),
 
-       TP_ARGS(rqst, status),
+       TP_ARGS(rqst, len),
 
        TP_STRUCT__entry(
                __field(u32, xid)
-               __field(int, status)
+               __field(int, len)
                __field(unsigned long, flags)
-               __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
+               __string(addr, rqst->rq_xprt->xpt_remotebuf)
        ),
 
        TP_fast_assign(
-               __entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0;
-               __entry->status = status;
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+               __entry->len = len;
                __entry->flags = rqst->rq_flags;
-               memcpy(__get_dynamic_array(addr),
-                       &rqst->rq_addr, rqst->rq_addrlen);
+               __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
        ),
 
-       TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s",
-                       (struct sockaddr *)__get_dynamic_array(addr),
-                       __entry->xid, __entry->status,
+       TP_printk("addr=%s xid=0x%08x len=%d flags=%s",
+                       __get_str(addr), __entry->xid, __entry->len,
                        show_rqstp_flags(__entry->flags))
 );
 
+TRACE_EVENT(svc_process,
+       TP_PROTO(const struct svc_rqst *rqst, const char *name),
+
+       TP_ARGS(rqst, name),
+
+       TP_STRUCT__entry(
+               __field(u32, xid)
+               __field(u32, vers)
+               __field(u32, proc)
+               __string(service, name)
+               __string(addr, rqst->rq_xprt->xpt_remotebuf)
+       ),
+
+       TP_fast_assign(
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+               __entry->vers = rqst->rq_vers;
+               __entry->proc = rqst->rq_proc;
+               __assign_str(service, name);
+               __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
+       ),
+
+       TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%u",
+                       __get_str(addr), __entry->xid,
+                       __get_str(service), __entry->vers, __entry->proc)
+);
+
 DECLARE_EVENT_CLASS(svc_rqst_event,
 
        TP_PROTO(struct svc_rqst *rqst),
@@ -519,20 +543,18 @@ DECLARE_EVENT_CLASS(svc_rqst_event,
        TP_STRUCT__entry(
                __field(u32, xid)
                __field(unsigned long, flags)
-               __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
+               __string(addr, rqst->rq_xprt->xpt_remotebuf)
        ),
 
        TP_fast_assign(
                __entry->xid = be32_to_cpu(rqst->rq_xid);
                __entry->flags = rqst->rq_flags;
-               memcpy(__get_dynamic_array(addr),
-                       &rqst->rq_addr, rqst->rq_addrlen);
+               __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
        ),
 
-       TP_printk("addr=%pIScp rq_xid=0x%08x flags=%s",
-               (struct sockaddr *)__get_dynamic_array(addr),
-               __entry->xid,
-               show_rqstp_flags(__entry->flags))
+       TP_printk("addr=%s xid=0x%08x flags=%s",
+                       __get_str(addr), __entry->xid,
+                       show_rqstp_flags(__entry->flags))
 );
 
 DEFINE_EVENT(svc_rqst_event, svc_defer,
@@ -553,27 +575,21 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
                __field(u32, xid)
                __field(int, status)
                __field(unsigned long, flags)
-               __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
+               __string(addr, rqst->rq_xprt->xpt_remotebuf)
        ),
 
        TP_fast_assign(
                __entry->xid = be32_to_cpu(rqst->rq_xid);
                __entry->status = status;
                __entry->flags = rqst->rq_flags;
-               memcpy(__get_dynamic_array(addr),
-                       &rqst->rq_addr, rqst->rq_addrlen);
+               __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
        ),
 
-       TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s",
-               (struct sockaddr *)__get_dynamic_array(addr),
-               __entry->xid,
-               __entry->status, show_rqstp_flags(__entry->flags))
+       TP_printk("addr=%s xid=0x%08x status=%d flags=%s",
+                 __get_str(addr), __entry->xid,
+                 __entry->status, show_rqstp_flags(__entry->flags))
 );
 
-DEFINE_EVENT(svc_rqst_status, svc_process,
-       TP_PROTO(struct svc_rqst *rqst, int status),
-       TP_ARGS(rqst, status));
-
 DEFINE_EVENT(svc_rqst_status, svc_send,
        TP_PROTO(struct svc_rqst *rqst, int status),
        TP_ARGS(rqst, status));
@@ -591,7 +607,9 @@ DEFINE_EVENT(svc_rqst_status, svc_send,
                { (1UL << XPT_OLD),             "XPT_OLD"},             \
                { (1UL << XPT_LISTENER),        "XPT_LISTENER"},        \
                { (1UL << XPT_CACHE_AUTH),      "XPT_CACHE_AUTH"},      \
-               { (1UL << XPT_LOCAL),           "XPT_LOCAL"})
+               { (1UL << XPT_LOCAL),           "XPT_LOCAL"},           \
+               { (1UL << XPT_KILL_TEMP),       "XPT_KILL_TEMP"},       \
+               { (1UL << XPT_CONG_CTRL),       "XPT_CONG_CTRL"})
 
 TRACE_EVENT(svc_xprt_do_enqueue,
        TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),
@@ -602,26 +620,19 @@ TRACE_EVENT(svc_xprt_do_enqueue,
                __field(struct svc_xprt *, xprt)
                __field(int, pid)
                __field(unsigned long, flags)
-               __dynamic_array(unsigned char, addr, xprt != NULL ?
-                       xprt->xpt_remotelen : 0)
+               __string(addr, xprt->xpt_remotebuf)
        ),
 
        TP_fast_assign(
                __entry->xprt = xprt;
                __entry->pid = rqst? rqst->rq_task->pid : 0;
-               if (xprt) {
-                       memcpy(__get_dynamic_array(addr),
-                               &xprt->xpt_remote,
-                               xprt->xpt_remotelen);
-                       __entry->flags = xprt->xpt_flags;
-               } else
-                       __entry->flags = 0;
-       ),
-
-       TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
-               __get_dynamic_array_len(addr) != 0 ?
-                       (struct sockaddr *)__get_dynamic_array(addr) : NULL,
-               __entry->pid, show_svc_xprt_flags(__entry->flags))
+               __entry->flags = xprt->xpt_flags;
+               __assign_str(addr, xprt->xpt_remotebuf);
+       ),
+
+       TP_printk("xprt=%p addr=%s pid=%d flags=%s",
+                       __entry->xprt, __get_str(addr),
+                       __entry->pid, show_svc_xprt_flags(__entry->flags))
 );
 
 DECLARE_EVENT_CLASS(svc_xprt_event,
@@ -632,35 +643,50 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
        TP_STRUCT__entry(
                __field(struct svc_xprt *, xprt)
                __field(unsigned long, flags)
-               __dynamic_array(unsigned char, addr, xprt != NULL ?
-                       xprt->xpt_remotelen : 0)
+               __string(addr, xprt->xpt_remotebuf)
        ),
 
        TP_fast_assign(
                __entry->xprt = xprt;
-               if (xprt) {
-                       memcpy(__get_dynamic_array(addr),
-                                       &xprt->xpt_remote,
-                                       xprt->xpt_remotelen);
-                       __entry->flags = xprt->xpt_flags;
-               } else
-                       __entry->flags = 0;
-       ),
-
-       TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt,
-               __get_dynamic_array_len(addr) != 0 ?
-                       (struct sockaddr *)__get_dynamic_array(addr) : NULL,
-               show_svc_xprt_flags(__entry->flags))
-);
+               __entry->flags = xprt->xpt_flags;
+               __assign_str(addr, xprt->xpt_remotebuf);
+       ),
 
-DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue,
-       TP_PROTO(struct svc_xprt *xprt),
-       TP_ARGS(xprt));
+       TP_printk("xprt=%p addr=%s flags=%s",
+                       __entry->xprt, __get_str(addr),
+                       show_svc_xprt_flags(__entry->flags))
+);
 
 DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
        TP_PROTO(struct svc_xprt *xprt),
        TP_ARGS(xprt));
 
+TRACE_EVENT(svc_xprt_dequeue,
+       TP_PROTO(struct svc_rqst *rqst),
+
+       TP_ARGS(rqst),
+
+       TP_STRUCT__entry(
+               __field(struct svc_xprt *, xprt)
+               __field(unsigned long, flags)
+               __field(unsigned long, wakeup)
+               __string(addr, rqst->rq_xprt->xpt_remotebuf)
+       ),
+
+       TP_fast_assign(
+               __entry->xprt = rqst->rq_xprt;
+               __entry->flags = rqst->rq_xprt->xpt_flags;
+               __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(),
+                                                       rqst->rq_qtime));
+               __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
+       ),
+
+       TP_printk("xprt=%p addr=%s flags=%s wakeup-us=%lu",
+                       __entry->xprt, __get_str(addr),
+                       show_svc_xprt_flags(__entry->flags),
+                       __entry->wakeup)
+);
+
 TRACE_EVENT(svc_wake_up,
        TP_PROTO(int pid),
 
@@ -686,28 +712,42 @@ TRACE_EVENT(svc_handle_xprt,
                __field(struct svc_xprt *, xprt)
                __field(int, len)
                __field(unsigned long, flags)
-               __dynamic_array(unsigned char, addr, xprt != NULL ?
-                       xprt->xpt_remotelen : 0)
+               __string(addr, xprt->xpt_remotebuf)
        ),
 
        TP_fast_assign(
                __entry->xprt = xprt;
                __entry->len = len;
-               if (xprt) {
-                       memcpy(__get_dynamic_array(addr),
-                                       &xprt->xpt_remote,
-                                       xprt->xpt_remotelen);
-                       __entry->flags = xprt->xpt_flags;
-               } else
-                       __entry->flags = 0;
-       ),
-
-       TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
-               __get_dynamic_array_len(addr) != 0 ?
-                       (struct sockaddr *)__get_dynamic_array(addr) : NULL,
+               __entry->flags = xprt->xpt_flags;
+               __assign_str(addr, xprt->xpt_remotebuf);
+       ),
+
+       TP_printk("xprt=%p addr=%s len=%d flags=%s",
+               __entry->xprt, __get_str(addr),
                __entry->len, show_svc_xprt_flags(__entry->flags))
 );
 
+TRACE_EVENT(svc_stats_latency,
+       TP_PROTO(const struct svc_rqst *rqst),
+
+       TP_ARGS(rqst),
+
+       TP_STRUCT__entry(
+               __field(u32, xid)
+               __field(unsigned long, execute)
+               __string(addr, rqst->rq_xprt->xpt_remotebuf)
+       ),
+
+       TP_fast_assign(
+               __entry->xid = be32_to_cpu(rqst->rq_xid);
+               __entry->execute = ktime_to_us(ktime_sub(ktime_get(),
+                                                        rqst->rq_stime));
+               __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
+       ),
+
+       TP_printk("addr=%s xid=0x%08x execute-us=%lu",
+               __get_str(addr), __entry->xid, __entry->execute)
+);
 
 DECLARE_EVENT_CLASS(svc_deferred_event,
        TP_PROTO(struct svc_deferred_req *dr),
@@ -716,18 +756,16 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
 
        TP_STRUCT__entry(
                __field(u32, xid)
-               __dynamic_array(unsigned char, addr, dr->addrlen)
+               __string(addr, dr->xprt->xpt_remotebuf)
        ),
 
        TP_fast_assign(
                __entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
                                                       (dr->xprt_hlen>>2)));
-               memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen);
+               __assign_str(addr, dr->xprt->xpt_remotebuf);
        ),
 
-       TP_printk("addr=%pIScp xid=0x%08x",
-               (struct sockaddr *)__get_dynamic_array(addr),
-               __entry->xid)
+       TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid)
 );
 
 DEFINE_EVENT(svc_deferred_event, svc_drop_deferred,
index 12649c9..8654494 100644 (file)
@@ -237,9 +237,6 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
 
        ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
 
-       err = crypto_ahash_init(req);
-       if (err)
-               goto out;
        err = crypto_ahash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
        if (err)
                goto out;
index 1d74d65..94a2b3f 100644 (file)
@@ -177,6 +177,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
        u64 seq_send;
        u8 *cksumkey;
        unsigned int cksum_usage;
+       __be64 seq_send_be64;
 
        dprintk("RPC:       %s\n", __func__);
 
@@ -187,7 +188,9 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
        spin_lock(&krb5_seq_lock);
        seq_send = ctx->seq_send64++;
        spin_unlock(&krb5_seq_lock);
-       *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+
+       seq_send_be64 = cpu_to_be64(seq_send);
+       memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
 
        if (ctx->initiate) {
                cksumkey = ctx->initiator_sign;
index dcf9515..b601a73 100644 (file)
@@ -155,10 +155,12 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
        u8 flags;
        int i;
        unsigned int cksum_usage;
+       __be16 be16_ptr;
 
        dprintk("RPC:       %s\n", __func__);
 
-       if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
+       memcpy(&be16_ptr, (char *) ptr, 2);
+       if (be16_to_cpu(be16_ptr) != KG2_TOK_MIC)
                return GSS_S_DEFECTIVE_TOKEN;
 
        flags = ptr[2];
index c536cc2..cdda474 100644 (file)
@@ -1450,8 +1450,8 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
                           struct cache_detail *cd)
 {
        char tbuf[20];
-       char *bp, *ep;
-       time_t then, now;
+       char *ep;
+       time_t now;
 
        if (*ppos || count > sizeof(tbuf)-1)
                return -EINVAL;
@@ -1461,24 +1461,24 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
        simple_strtoul(tbuf, &ep, 0);
        if (*ep && *ep != '\n')
                return -EINVAL;
+       /* Note that while we check that 'buf' holds a valid number,
+        * we always ignore the value and just flush everything.
+        * Making use of the number leads to races.
+        */
 
-       bp = tbuf;
-       then = get_expiry(&bp);
        now = seconds_since_boot();
-       cd->nextcheck = now;
-       /* Can only set flush_time to 1 second beyond "now", or
-        * possibly 1 second beyond flushtime.  This is because
-        * flush_time never goes backwards so it mustn't get too far
-        * ahead of time.
+       /* Always flush everything, so behave like cache_purge()
+        * Do this by advancing flush_time to the current time,
+        * or by one second if it has already reached the current time.
+        * Newly added cache entries will always have ->last_refresh greater
+        * that ->flush_time, so they don't get flushed prematurely.
         */
-       if (then >= now) {
-               /* Want to flush everything, so behave like cache_purge() */
-               if (cd->flush_time >= now)
-                       now = cd->flush_time + 1;
-               then = now;
-       }
 
-       cd->flush_time = then;
+       if (cd->flush_time >= now)
+               now = cd->flush_time + 1;
+
+       cd->flush_time = now;
+       cd->nextcheck = now;
        cache_flush();
 
        *ppos += count;
index 387cc4a..30a4226 100644 (file)
@@ -1255,6 +1255,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
        /* Syntactic check complete */
        serv->sv_stats->rpccnt++;
+       trace_svc_process(rqstp, progp->pg_name);
 
        /* Build the reply header. */
        statp = resv->iov_base +resv->iov_len;
@@ -1431,14 +1432,10 @@ svc_process(struct svc_rqst *rqstp)
        }
 
        /* Returns 1 for send, 0 for drop */
-       if (likely(svc_process_common(rqstp, argv, resv))) {
-               int ret = svc_send(rqstp);
+       if (likely(svc_process_common(rqstp, argv, resv)))
+               return svc_send(rqstp);
 
-               trace_svc_process(rqstp, ret);
-               return ret;
-       }
 out_drop:
-       trace_svc_process(rqstp, 0);
        svc_drop(rqstp);
        return 0;
 }
@@ -1536,3 +1533,112 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
        return max;
 }
 EXPORT_SYMBOL_GPL(svc_max_payload);
+
+/**
+ * svc_fill_write_vector - Construct data argument for VFS write call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of write payload
+ * @total: total number of bytes of write payload
+ *
+ * Returns the number of elements populated in the data argument array.
+ */
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
+                                  size_t total)
+{
+       struct kvec *vec = rqstp->rq_vec;
+       struct page **pages;
+       unsigned int i;
+
+       /* Some types of transport can present the write payload
+        * entirely in rq_arg.pages. In this case, @first is empty.
+        */
+       i = 0;
+       if (first->iov_len) {
+               vec[i].iov_base = first->iov_base;
+               vec[i].iov_len = min_t(size_t, total, first->iov_len);
+               total -= vec[i].iov_len;
+               ++i;
+       }
+
+       WARN_ON_ONCE(rqstp->rq_arg.page_base != 0);
+       pages = rqstp->rq_arg.pages;
+       while (total) {
+               vec[i].iov_base = page_address(*pages);
+               vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
+               total -= vec[i].iov_len;
+               ++i;
+
+               ++pages;
+       }
+
+       WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
+       return i;
+}
+EXPORT_SYMBOL_GPL(svc_fill_write_vector);
+
+/**
+ * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
+ * @rqstp: svc_rqst to operate on
+ * @first: buffer containing first section of pathname
+ * @total: total length of the pathname argument
+ *
+ * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
+ * released automatically when @rqstp is recycled.
+ */
+char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
+                               size_t total)
+{
+       struct xdr_buf *arg = &rqstp->rq_arg;
+       struct page **pages;
+       char *result;
+
+       /* VFS API demands a NUL-terminated pathname. This function
+        * uses a page from @rqstp as the pathname buffer, to enable
+        * direct placement. Thus the total buffer size is PAGE_SIZE.
+        * Space in this buffer for NUL-termination requires that we
+        * cap the size of the returned symlink pathname just a
+        * little early.
+        */
+       if (total > PAGE_SIZE - 1)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       /* Some types of transport can present the pathname entirely
+        * in rq_arg.pages. If not, then copy the pathname into one
+        * page.
+        */
+       pages = arg->pages;
+       WARN_ON_ONCE(arg->page_base != 0);
+       if (first->iov_base == 0) {
+               result = page_address(*pages);
+               result[total] = '\0';
+       } else {
+               size_t len, remaining;
+               char *dst;
+
+               result = page_address(*(rqstp->rq_next_page++));
+               dst = result;
+               remaining = total;
+
+               len = min_t(size_t, total, first->iov_len);
+               memcpy(dst, first->iov_base, len);
+               dst += len;
+               remaining -= len;
+
+               /* No more than one page left */
+               if (remaining) {
+                       len = min_t(size_t, remaining, PAGE_SIZE);
+                       memcpy(dst, page_address(*pages), len);
+                       dst += len;
+               }
+
+               *dst = '\0';
+       }
+
+       /* Sanity check: we don't allow the pathname argument to
+        * contain a NUL byte.
+        */
+       if (strlen(result) != total)
+               return ERR_PTR(-EINVAL);
+       return result;
+}
+EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
index f9307bd..5185efb 100644 (file)
@@ -173,6 +173,7 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
        set_bit(XPT_BUSY, &xprt->xpt_flags);
        rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
        xprt->xpt_net = get_net(net);
+       strcpy(xprt->xpt_remotebuf, "uninitialized");
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
@@ -382,25 +383,21 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
        int cpu;
 
        if (!svc_xprt_has_something_to_do(xprt))
-               goto out;
+               return;
 
        /* Mark transport as busy. It will remain in this state until
         * the provider calls svc_xprt_received. We update XPT_BUSY
         * atomically because it also guards against trying to enqueue
         * the transport twice.
         */
-       if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
-               /* Don't enqueue transport while already enqueued */
-               dprintk("svc: transport %p busy, not enqueued\n", xprt);
-               goto out;
-       }
+       if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
+               return;
 
        cpu = get_cpu();
        pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
 
        atomic_long_inc(&pool->sp_stats.packets);
 
-       dprintk("svc: transport %p put into queue\n", xprt);
        spin_lock_bh(&pool->sp_lock);
        list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
        pool->sp_stats.sockets_queued++;
@@ -412,6 +409,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
                if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
                        continue;
                atomic_long_inc(&pool->sp_stats.threads_woken);
+               rqstp->rq_qtime = ktime_get();
                wake_up_process(rqstp->rq_task);
                goto out_unlock;
        }
@@ -420,7 +418,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 out_unlock:
        rcu_read_unlock();
        put_cpu();
-out:
        trace_svc_xprt_do_enqueue(xprt, rqstp);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
@@ -454,13 +451,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
                                        struct svc_xprt, xpt_ready);
                list_del_init(&xprt->xpt_ready);
                svc_xprt_get(xprt);
-
-               dprintk("svc: transport %p dequeued, inuse=%d\n",
-                       xprt, kref_read(&xprt->xpt_ref));
        }
        spin_unlock_bh(&pool->sp_lock);
 out:
-       trace_svc_xprt_dequeue(xprt);
        return xprt;
 }
 
@@ -492,7 +485,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
 {
        struct svc_xprt *xprt = rqstp->rq_xprt;
 
-       rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+       xprt->xpt_ops->xpo_release_rqst(rqstp);
 
        kfree(rqstp->rq_deferred);
        rqstp->rq_deferred = NULL;
@@ -538,7 +531,6 @@ void svc_wake_up(struct svc_serv *serv)
                if (test_bit(RQ_BUSY, &rqstp->rq_flags))
                        continue;
                rcu_read_unlock();
-               dprintk("svc: daemon %p woken up.\n", rqstp);
                wake_up_process(rqstp->rq_task);
                trace_svc_wake_up(rqstp->rq_task->pid);
                return;
@@ -734,6 +726,7 @@ out_found:
                rqstp->rq_chandle.thread_wait = 5*HZ;
        else
                rqstp->rq_chandle.thread_wait = 1*HZ;
+       trace_svc_xprt_dequeue(rqstp);
        return rqstp->rq_xprt;
 }
 
@@ -789,7 +782,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
                        len = svc_deferred_recv(rqstp);
                else
                        len = xprt->xpt_ops->xpo_recvfrom(rqstp);
-               dprintk("svc: got len=%d\n", len);
+               rqstp->rq_stime = ktime_get();
                rqstp->rq_reserved = serv->sv_max_mesg;
                atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
        }
@@ -844,10 +837,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 
        clear_bit(XPT_OLD, &xprt->xpt_flags);
 
-       if (xprt->xpt_ops->xpo_secure_port(rqstp))
-               set_bit(RQ_SECURE, &rqstp->rq_flags);
-       else
-               clear_bit(RQ_SECURE, &rqstp->rq_flags);
+       xprt->xpt_ops->xpo_secure_port(rqstp);
        rqstp->rq_chandle.defer = svc_defer;
        rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
 
@@ -859,7 +849,6 @@ out_release:
        rqstp->rq_res.len = 0;
        svc_xprt_release(rqstp);
 out:
-       trace_svc_recv(rqstp, err);
        return err;
 }
 EXPORT_SYMBOL_GPL(svc_recv);
@@ -889,7 +878,7 @@ int svc_send(struct svc_rqst *rqstp)
                goto out;
 
        /* release the receive skb before sending the reply */
-       rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp);
+       xprt->xpt_ops->xpo_release_rqst(rqstp);
 
        /* calculate over-all length */
        xb = &rqstp->rq_res;
@@ -899,6 +888,7 @@ int svc_send(struct svc_rqst *rqstp)
 
        /* Grab mutex to serialize outgoing data. */
        mutex_lock(&xprt->xpt_mutex);
+       trace_svc_stats_latency(rqstp);
        if (test_bit(XPT_DEAD, &xprt->xpt_flags)
                        || test_bit(XPT_CLOSE, &xprt->xpt_flags))
                len = -ENOTCONN;
@@ -906,12 +896,12 @@ int svc_send(struct svc_rqst *rqstp)
                len = xprt->xpt_ops->xpo_sendto(rqstp);
        mutex_unlock(&xprt->xpt_mutex);
        rpc_wake_up(&xprt->xpt_bc_pending);
+       trace_svc_send(rqstp, len);
        svc_xprt_release(rqstp);
 
        if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
                len = 0;
 out:
-       trace_svc_send(rqstp, len);
        return len;
 }
 
index 08cd951..5445145 100644 (file)
@@ -391,9 +391,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
        release_sock(sock->sk);
 }
 
-static int svc_sock_secure_port(struct svc_rqst *rqstp)
+static void svc_sock_secure_port(struct svc_rqst *rqstp)
 {
-       return svc_port_is_privileged(svc_addr(rqstp));
+       if (svc_port_is_privileged(svc_addr(rqstp)))
+               set_bit(RQ_SECURE, &rqstp->rq_flags);
+       else
+               clear_bit(RQ_SECURE, &rqstp->rq_flags);
 }
 
 /*
@@ -1309,6 +1312,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
        set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
        if (sk->sk_state == TCP_LISTEN) {
                dprintk("setting up TCP socket for listening\n");
+               strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
                set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
                sk->sk_data_ready = svc_tcp_listen_data_ready;
                set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
index a4a8f69..dd8a431 100644 (file)
@@ -51,9 +51,9 @@
 #define RPCDBG_FACILITY        RPCDBG_SVCXPRT
 
 /* RPC/RDMA parameters */
-unsigned int svcrdma_ord = RPCRDMA_ORD;
+unsigned int svcrdma_ord = 16; /* historical default */
 static unsigned int min_ord = 1;
-static unsigned int max_ord = 4096;
+static unsigned int max_ord = 255;
 unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
 unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
 static unsigned int min_max_requests = 4;
index 19e9c6b..3d45015 100644 (file)
  * the RDMA_RECV completion. The SGL should contain full pages up until the
  * last one.
  */
-static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
-                              struct svc_rdma_op_ctxt *ctxt,
-                              u32 byte_count)
+static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
+                                  struct svc_rdma_op_ctxt *ctxt)
 {
        struct page *page;
-       u32 bc;
        int sge_no;
+       u32 len;
 
-       /* Swap the page in the SGE with the page in argpages */
+       /* The reply path assumes the Call's transport header resides
+        * in rqstp->rq_pages[0].
+        */
        page = ctxt->pages[0];
        put_page(rqstp->rq_pages[0]);
        rqstp->rq_pages[0] = page;
@@ -126,35 +127,35 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
        /* Set up the XDR head */
        rqstp->rq_arg.head[0].iov_base = page_address(page);
        rqstp->rq_arg.head[0].iov_len =
-               min_t(size_t, byte_count, ctxt->sge[0].length);
-       rqstp->rq_arg.len = byte_count;
-       rqstp->rq_arg.buflen = byte_count;
+               min_t(size_t, ctxt->byte_len, ctxt->sge[0].length);
+       rqstp->rq_arg.len = ctxt->byte_len;
+       rqstp->rq_arg.buflen = ctxt->byte_len;
 
        /* Compute bytes past head in the SGL */
-       bc = byte_count - rqstp->rq_arg.head[0].iov_len;
+       len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len;
 
        /* If data remains, store it in the pagelist */
-       rqstp->rq_arg.page_len = bc;
+       rqstp->rq_arg.page_len = len;
        rqstp->rq_arg.page_base = 0;
 
        sge_no = 1;
-       while (bc && sge_no < ctxt->count) {
+       while (len && sge_no < ctxt->count) {
                page = ctxt->pages[sge_no];
                put_page(rqstp->rq_pages[sge_no]);
                rqstp->rq_pages[sge_no] = page;
-               bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
+               len -= min_t(u32, len, ctxt->sge[sge_no].length);
                sge_no++;
        }
        rqstp->rq_respages = &rqstp->rq_pages[sge_no];
        rqstp->rq_next_page = rqstp->rq_respages + 1;
 
        /* If not all pages were used from the SGL, free the remaining ones */
-       bc = sge_no;
+       len = sge_no;
        while (sge_no < ctxt->count) {
                page = ctxt->pages[sge_no++];
                put_page(page);
        }
-       ctxt->count = bc;
+       ctxt->count = len;
 
        /* Set up tail */
        rqstp->rq_arg.tail[0].iov_base = NULL;
@@ -534,10 +535,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
                ctxt, rdma_xprt, rqstp);
        atomic_inc(&rdma_stat_recv);
 
-       /* Build up the XDR from the receive buffers. */
-       rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len);
+       svc_rdma_build_arg_xdr(rqstp, ctxt);
 
-       /* Decode the RDMA header. */
        p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
        ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg);
        if (ret < 0)
index 9ad12a2..96cc8f6 100644 (file)
@@ -69,7 +69,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *);
 static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
 static int svc_rdma_has_wspace(struct svc_xprt *xprt);
-static int svc_rdma_secure_port(struct svc_rqst *);
+static void svc_rdma_secure_port(struct svc_rqst *);
 static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
 
 static const struct svc_xprt_ops svc_rdma_ops = {
@@ -330,9 +330,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 
 flushed:
        if (wc->status != IB_WC_WR_FLUSH_ERR)
-               pr_warn("svcrdma: receive: %s (%u/0x%x)\n",
-                       ib_wc_status_msg(wc->status),
-                       wc->status, wc->vendor_err);
+               pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
+                      ib_wc_status_msg(wc->status),
+                      wc->status, wc->vendor_err);
        set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
        svc_rdma_put_context(ctxt, 1);
 
@@ -401,8 +401,10 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
         */
        set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
 
-       if (listener)
+       if (listener) {
+               strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
                set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
+       }
 
        return cma_xprt;
 }
@@ -762,13 +764,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        if (!svc_rdma_prealloc_ctxts(newxprt))
                goto errout;
 
-       /*
-        * Limit ORD based on client limit, local device limit, and
-        * configured svcrdma limit.
-        */
-       newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
-       newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
-
        newxprt->sc_pd = ib_alloc_pd(dev, 0);
        if (IS_ERR(newxprt->sc_pd)) {
                dprintk("svcrdma: error creating PD for connect request\n");
@@ -843,15 +838,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
        memset(&conn_param, 0, sizeof conn_param);
        conn_param.responder_resources = 0;
-       conn_param.initiator_depth = newxprt->sc_ord;
+       conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
+                                          dev->attrs.max_qp_init_rd_atom);
+       if (!conn_param.initiator_depth) {
+               dprintk("svcrdma: invalid ORD setting\n");
+               ret = -EINVAL;
+               goto errout;
+       }
        conn_param.private_data = &pmsg;
        conn_param.private_data_len = sizeof(pmsg);
        ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
-       if (ret) {
-               dprintk("svcrdma: failed to accept new connection, ret=%d\n",
-                      ret);
+       if (ret)
                goto errout;
-       }
 
        dprintk("svcrdma: new connection %p accepted:\n", newxprt);
        sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
@@ -862,7 +860,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
        dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
        dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
-       dprintk("    ord             : %d\n", newxprt->sc_ord);
+       dprintk("    ord             : %d\n", conn_param.initiator_depth);
 
        return &newxprt->sc_xprt;
 
@@ -992,9 +990,9 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
        return 1;
 }
 
-static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+static void svc_rdma_secure_port(struct svc_rqst *rqstp)
 {
-       return 1;
+       set_bit(RQ_SECURE, &rqstp->rq_flags);
 }
 
 static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)