Merge branch 'devel' into for-linus
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Wed, 1 Apr 2009 17:28:15 +0000 (13:28 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Wed, 1 Apr 2009 17:28:15 +0000 (13:28 -0400)
39 files changed:
fs/lockd/clntlock.c
fs/lockd/mon.c
fs/lockd/svc.c
fs/nfs/callback.c
fs/nfs/callback.h
fs/nfs/client.c
fs/nfs/dir.c
fs/nfs/file.c
fs/nfs/getroot.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/nfs2xdr.c
fs/nfs/nfs3proc.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/pagelist.c
fs/nfs/proc.c
fs/nfs/super.c
fs/nfs/write.c
fs/nfsd/nfsctl.c
fs/nfsd/nfssvc.c
include/linux/nfs_fs.h
include/linux/nfs_fs_sb.h
include/linux/nfs_xdr.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/svc_xprt.h
include/linux/sunrpc/xprt.h
net/sunrpc/Kconfig
net/sunrpc/clnt.c
net/sunrpc/rpcb_clnt.c
net/sunrpc/svc.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcsock.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtsock.c

index aedc47a..1f3b0fc 100644 (file)
@@ -139,55 +139,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
        return 0;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static const struct in6_addr *nlmclnt_map_v4addr(const struct sockaddr *sap,
-                                                struct in6_addr *addr_mapped)
-{
-       const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
-
-       switch (sap->sa_family) {
-       case AF_INET6:
-               return &((const struct sockaddr_in6 *)sap)->sin6_addr;
-       case AF_INET:
-               ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, addr_mapped);
-               return addr_mapped;
-       }
-
-       return NULL;
-}
-
-/*
- * If lockd is using a PF_INET6 listener, all incoming requests appear
- * to come from AF_INET6 remotes.  The address of AF_INET remotes are
- * mapped to AF_INET6 automatically by the network layer.  In case the
- * user passed an AF_INET server address at mount time, ensure both
- * addresses are AF_INET6 before comparing them.
- */
-static int nlmclnt_cmp_addr(const struct nlm_host *host,
-                           const struct sockaddr *sap)
-{
-       const struct in6_addr *addr1;
-       const struct in6_addr *addr2;
-       struct in6_addr addr1_mapped;
-       struct in6_addr addr2_mapped;
-
-       addr1 = nlmclnt_map_v4addr(nlm_addr(host), &addr1_mapped);
-       if (likely(addr1 != NULL)) {
-               addr2 = nlmclnt_map_v4addr(sap, &addr2_mapped);
-               if (likely(addr2 != NULL))
-                       return ipv6_addr_equal(addr1, addr2);
-       }
-
-       return 0;
-}
-#else  /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-static int nlmclnt_cmp_addr(const struct nlm_host *host,
-                           const struct sockaddr *sap)
-{
-       return nlm_cmp_addr(nlm_addr(host), sap);
-}
-#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-
 /*
  * The server lockd has called us back to tell us the lock was granted
  */
@@ -215,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
                 */
                if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
                        continue;
-               if (!nlmclnt_cmp_addr(block->b_host, addr))
+               if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
                        continue;
                if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
                        continue;
index 5e2c4d5..6d5d4a4 100644 (file)
@@ -16,6 +16,8 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 
+#include <asm/unaligned.h>
+
 #define NLMDBG_FACILITY                NLMDBG_MONITOR
 #define NSM_PROGRAM            100024
 #define NSM_VERSION            1
@@ -274,10 +276,12 @@ static void nsm_init_private(struct nsm_handle *nsm)
 {
        u64 *p = (u64 *)&nsm->sm_priv.data;
        struct timespec ts;
+       s64 ns;
 
        ktime_get_ts(&ts);
-       *p++ = timespec_to_ns(&ts);
-       *p = (unsigned long)nsm;
+       ns = timespec_to_ns(&ts);
+       put_unaligned(ns, p);
+       put_unaligned((unsigned long)nsm, p + 1);
 }
 
 static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
index 64f1c31..abf8388 100644 (file)
@@ -52,17 +52,6 @@ static struct task_struct    *nlmsvc_task;
 static struct svc_rqst         *nlmsvc_rqst;
 unsigned long                  nlmsvc_timeout;
 
-/*
- * If the kernel has IPv6 support available, always listen for
- * both AF_INET and AF_INET6 requests.
- */
-#if (defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) && \
-       defined(CONFIG_SUNRPC_REGISTER_V4)
-static const sa_family_t       nlmsvc_family = AF_INET6;
-#else  /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */
-static const sa_family_t       nlmsvc_family = AF_INET;
-#endif /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */
-
 /*
  * These can be set at insmod time (useful for NFS as root filesystem),
  * and also changed through the sysctl interface.  -- Jamie Lokier, Aug 2003
@@ -204,19 +193,30 @@ lockd(void *vrqstp)
        return 0;
 }
 
-static int create_lockd_listener(struct svc_serv *serv, char *name,
-                                unsigned short port)
+static int create_lockd_listener(struct svc_serv *serv, const char *name,
+                                const int family, const unsigned short port)
 {
        struct svc_xprt *xprt;
 
-       xprt = svc_find_xprt(serv, name, 0, 0);
+       xprt = svc_find_xprt(serv, name, family, 0);
        if (xprt == NULL)
-               return svc_create_xprt(serv, name, port, SVC_SOCK_DEFAULTS);
-
+               return svc_create_xprt(serv, name, family, port,
+                                               SVC_SOCK_DEFAULTS);
        svc_xprt_put(xprt);
        return 0;
 }
 
+static int create_lockd_family(struct svc_serv *serv, const int family)
+{
+       int err;
+
+       err = create_lockd_listener(serv, "udp", family, nlm_udpport);
+       if (err < 0)
+               return err;
+
+       return create_lockd_listener(serv, "tcp", family, nlm_tcpport);
+}
+
 /*
  * Ensure there are active UDP and TCP listeners for lockd.
  *
@@ -232,13 +232,15 @@ static int make_socks(struct svc_serv *serv)
        static int warned;
        int err;
 
-       err = create_lockd_listener(serv, "udp", nlm_udpport);
+       err = create_lockd_family(serv, PF_INET);
        if (err < 0)
                goto out_err;
 
-       err = create_lockd_listener(serv, "tcp", nlm_tcpport);
-       if (err < 0)
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+       err = create_lockd_family(serv, PF_INET6);
+       if (err < 0 && err != -EAFNOSUPPORT)
                goto out_err;
+#endif /* CONFIG_IPV6 || CONFIG_IPV6_MODULE */
 
        warned = 0;
        return 0;
@@ -274,7 +276,7 @@ int lockd_up(void)
                        "lockd_up: no pid, %d users??\n", nlmsvc_users);
 
        error = -ENOMEM;
-       serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, nlmsvc_family, NULL);
+       serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
        if (!serv) {
                printk(KERN_WARNING "lockd_up: create service failed\n");
                goto out;
index 3e634f2..a886e69 100644 (file)
@@ -38,19 +38,10 @@ static struct svc_program nfs4_callback_program;
 
 unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
+unsigned short nfs_callback_tcpport6;
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
 
-/*
- * If the kernel has IPv6 support available, always listen for
- * both AF_INET and AF_INET6 requests.
- */
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static const sa_family_t       nfs_callback_family = AF_INET6;
-#else
-static const sa_family_t       nfs_callback_family = AF_INET;
-#endif
-
 static int param_set_port(const char *val, struct kernel_param *kp)
 {
        char *endp;
@@ -116,19 +107,29 @@ int nfs_callback_up(void)
        mutex_lock(&nfs_callback_mutex);
        if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
                goto out;
-       serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
-                               nfs_callback_family, NULL);
+       serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
        ret = -ENOMEM;
        if (!serv)
                goto out_err;
 
-       ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport,
-                             SVC_SOCK_ANONYMOUS);
+       ret = svc_create_xprt(serv, "tcp", PF_INET,
+                               nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
        if (ret <= 0)
                goto out_err;
        nfs_callback_tcpport = ret;
        dprintk("NFS: Callback listener port = %u (af %u)\n",
-                       nfs_callback_tcpport, nfs_callback_family);
+                       nfs_callback_tcpport, PF_INET);
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+       ret = svc_create_xprt(serv, "tcp", PF_INET6,
+                               nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
+       if (ret > 0) {
+               nfs_callback_tcpport6 = ret;
+               dprintk("NFS: Callback listener port = %u (af %u)\n",
+                               nfs_callback_tcpport6, PF_INET6);
+       } else if (ret != -EAFNOSUPPORT)
+               goto out_err;
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 
        nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
        if (IS_ERR(nfs_callback_info.rqst)) {
index bb25d21..e110e28 100644 (file)
@@ -72,5 +72,6 @@ extern void nfs_callback_down(void);
 
 extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
+extern unsigned short nfs_callback_tcpport6;
 
 #endif /* __LINUX_FS_NFS_CALLBACK_H */
index 2277421..aba3801 100644 (file)
@@ -224,38 +224,6 @@ void nfs_put_client(struct nfs_client *clp)
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped)
-{
-       switch (sa->sa_family) {
-               default:
-                       return NULL;
-               case AF_INET6:
-                       return &((const struct sockaddr_in6 *)sa)->sin6_addr;
-                       break;
-               case AF_INET:
-                       ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr,
-                                       addr_mapped);
-                       return addr_mapped;
-       }
-}
-
-static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
-               const struct sockaddr *sa2)
-{
-       const struct in6_addr *addr1;
-       const struct in6_addr *addr2;
-       struct in6_addr addr1_mapped;
-       struct in6_addr addr2_mapped;
-
-       addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped);
-       if (likely(addr1 != NULL)) {
-               addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped);
-               if (likely(addr2 != NULL))
-                       return ipv6_addr_equal(addr1, addr2);
-       }
-       return 0;
-}
-
 /*
  * Test if two ip6 socket addresses refer to the same socket by
  * comparing relevant fields. The padding bytes specifically, are not
@@ -267,38 +235,21 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
  *
  * The caller should ensure both socket addresses are AF_INET6.
  */
-static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
-                               const struct sockaddr *sa2)
+static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+                                     const struct sockaddr *sa2)
 {
-       const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1;
-       const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2;
+       const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
+       const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
 
-       if (!ipv6_addr_equal(&saddr1->sin6_addr,
-                            &saddr1->sin6_addr))
+       if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
+           sin1->sin6_scope_id != sin2->sin6_scope_id)
                return 0;
-       if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
-           saddr1->sin6_scope_id != saddr2->sin6_scope_id)
-               return 0;
-       return saddr1->sin6_port == saddr2->sin6_port;
-}
-#else
-static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
-                                const struct sockaddr_in *sa2)
-{
-       return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
-}
 
-static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
-                                const struct sockaddr *sa2)
-{
-       if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET))
-               return 0;
-       return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
-                       (const struct sockaddr_in *)sa2);
+       return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
 }
-
-static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
-                               const struct sockaddr * sa2)
+#else  /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
+static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+                                     const struct sockaddr *sa2)
 {
        return 0;
 }
@@ -311,20 +262,57 @@ static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
  *
  * The caller should ensure both socket addresses are AF_INET.
  */
+static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1,
+                                     const struct sockaddr *sa2)
+{
+       const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
+       const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
+
+       return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+}
+
+static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
+                               const struct sockaddr *sa2)
+{
+       const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
+       const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
+
+       return nfs_sockaddr_match_ipaddr6(sa1, sa2) &&
+               (sin1->sin6_port == sin2->sin6_port);
+}
+
 static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
                                const struct sockaddr *sa2)
 {
-       const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1;
-       const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2;
+       const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
+       const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
 
-       if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr)
+       return nfs_sockaddr_match_ipaddr4(sa1, sa2) &&
+               (sin1->sin_port == sin2->sin_port);
+}
+
+/*
+ * Test if two socket addresses represent the same actual socket,
+ * by comparing (only) relevant fields, excluding the port number.
+ */
+static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+                                    const struct sockaddr *sa2)
+{
+       if (sa1->sa_family != sa2->sa_family)
                return 0;
-       return saddr1->sin_port == saddr2->sin_port;
+
+       switch (sa1->sa_family) {
+       case AF_INET:
+               return nfs_sockaddr_match_ipaddr4(sa1, sa2);
+       case AF_INET6:
+               return nfs_sockaddr_match_ipaddr6(sa1, sa2);
+       }
+       return 0;
 }
 
 /*
  * Test if two socket addresses represent the same actual socket,
- * by comparing (only) relevant fields.
+ * by comparing (only) relevant fields, including the port number.
  */
 static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
                            const struct sockaddr *sa2)
index 78bf72f..370b190 100644 (file)
@@ -1624,8 +1624,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                } else if (atomic_read(&new_dentry->d_count) > 1)
                        /* dentry still busy? */
                        goto out;
-       } else
-               nfs_drop_nlink(new_inode);
+       }
 
 go_ahead:
        /*
@@ -1638,10 +1637,8 @@ go_ahead:
        }
        nfs_inode_return_delegation(old_inode);
 
-       if (new_inode != NULL) {
+       if (new_inode != NULL)
                nfs_inode_return_delegation(new_inode);
-               d_delete(new_dentry);
-       }
 
        error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
                                           new_dir, &new_dentry->d_name);
@@ -1650,6 +1647,8 @@ out:
        if (rehash)
                d_rehash(rehash);
        if (!error) {
+               if (new_inode != NULL)
+                       nfs_drop_nlink(new_inode);
                d_move(old_dentry, new_dentry);
                nfs_set_verifier(new_dentry,
                                        nfs_save_change_attribute(new_dir));
index cec7939..0abf3f3 100644 (file)
@@ -64,11 +64,7 @@ const struct file_operations nfs_file_operations = {
        .write          = do_sync_write,
        .aio_read       = nfs_file_read,
        .aio_write      = nfs_file_write,
-#ifdef CONFIG_MMU
        .mmap           = nfs_file_mmap,
-#else
-       .mmap           = generic_file_mmap,
-#endif
        .open           = nfs_file_open,
        .flush          = nfs_file_flush,
        .release        = nfs_file_release,
@@ -141,9 +137,6 @@ nfs_file_release(struct inode *inode, struct file *filp)
                        dentry->d_parent->d_name.name,
                        dentry->d_name.name);
 
-       /* Ensure that dirty pages are flushed out with the right creds */
-       if (filp->f_mode & FMODE_WRITE)
-               nfs_wb_all(dentry->d_inode);
        nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
        return nfs_release(inode, filp);
 }
@@ -235,7 +228,6 @@ nfs_file_flush(struct file *file, fl_owner_t id)
        struct nfs_open_context *ctx = nfs_file_open_context(file);
        struct dentry   *dentry = file->f_path.dentry;
        struct inode    *inode = dentry->d_inode;
-       int             status;
 
        dprintk("NFS: flush(%s/%s)\n",
                        dentry->d_parent->d_name.name,
@@ -245,11 +237,8 @@ nfs_file_flush(struct file *file, fl_owner_t id)
                return 0;
        nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 
-       /* Ensure that data+attribute caches are up to date after close() */
-       status = nfs_do_fsync(ctx, inode);
-       if (!status)
-               nfs_revalidate_inode(NFS_SERVER(inode), inode);
-       return status;
+       /* Flush writes to the server and return any errors */
+       return nfs_do_fsync(ctx, inode);
 }
 
 static ssize_t
@@ -304,11 +293,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
        dprintk("NFS: mmap(%s/%s)\n",
                dentry->d_parent->d_name.name, dentry->d_name.name);
 
-       status = nfs_revalidate_mapping(inode, file->f_mapping);
+       /* Note: generic_file_mmap() returns ENOSYS on nommu systems
+        *       so we call that before revalidating the mapping
+        */
+       status = generic_file_mmap(file, vma);
        if (!status) {
                vma->vm_ops = &nfs_file_vm_ops;
-               vma->vm_flags |= VM_CAN_NONLINEAR;
-               file_accessed(file);
+               status = nfs_revalidate_mapping(inode, file->f_mapping);
        }
        return status;
 }
@@ -354,6 +345,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
                file->f_path.dentry->d_name.name,
                mapping->host->i_ino, len, (long long) pos);
 
+       /*
+        * Prevent starvation issues if someone is doing a consistency
+        * sync-to-disk
+        */
+       ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+                       nfs_wait_bit_killable, TASK_KILLABLE);
+       if (ret)
+               return ret;
+
        page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
index b7c9b2d..46177cb 100644 (file)
@@ -156,7 +156,7 @@ int nfs4_path_walk(struct nfs_server *server,
                return ret;
        }
 
-       if (fattr.type != NFDIR) {
+       if (!S_ISDIR(fattr.mode)) {
                printk(KERN_ERR "nfs4_get_root:"
                       " getroot encountered non-directory\n");
                return -ENOTDIR;
@@ -213,7 +213,7 @@ eat_dot_dir:
                return ret;
        }
 
-       if (fattr.type != NFDIR) {
+       if (!S_ISDIR(fattr.mode)) {
                printk(KERN_ERR "nfs4_get_root:"
                       " lookupfh encountered non-directory\n");
                return -ENOTDIR;
index 0c38168..a834d1d 100644 (file)
@@ -65,6 +65,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
        return nfs_fileid_to_ino_t(fattr->fileid);
 }
 
+/**
+ * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
+ * @word: long word containing the bit lock
+ */
+int nfs_wait_bit_killable(void *word)
+{
+       if (fatal_signal_pending(current))
+               return -ERESTARTSYS;
+       schedule();
+       return 0;
+}
+
 /**
  * nfs_compat_user_ino64 - returns the user-visible inode number
  * @fileid: 64-bit fileid
@@ -249,13 +261,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
        struct inode *inode = ERR_PTR(-ENOENT);
        unsigned long hash;
 
-       if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+       if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0)
                goto out_no_inode;
-
-       if (!fattr->nlink) {
-               printk("NFS: Buggy server - nlink == 0!\n");
+       if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
                goto out_no_inode;
-       }
 
        hash = nfs_fattr_to_ino_t(fattr);
 
@@ -291,7 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                            && fattr->size <= NFS_LIMIT_READDIRPLUS)
                                set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
                        /* Deal with crossing mountpoints */
-                       if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
+                       if ((fattr->valid & NFS_ATTR_FATTR_FSID)
+                                       && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
                                if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
                                        inode->i_op = &nfs_referral_inode_operations;
                                else
@@ -304,28 +314,45 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                else
                        init_special_inode(inode, inode->i_mode, fattr->rdev);
 
+               memset(&inode->i_atime, 0, sizeof(inode->i_atime));
+               memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
+               memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
+               nfsi->change_attr = 0;
+               inode->i_size = 0;
+               inode->i_nlink = 0;
+               inode->i_uid = -2;
+               inode->i_gid = -2;
+               inode->i_blocks = 0;
+               memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+
                nfsi->read_cache_jiffies = fattr->time_start;
                nfsi->attr_gencount = fattr->gencount;
-               inode->i_atime = fattr->atime;
-               inode->i_mtime = fattr->mtime;
-               inode->i_ctime = fattr->ctime;
-               if (fattr->valid & NFS_ATTR_FATTR_V4)
+               if (fattr->valid & NFS_ATTR_FATTR_ATIME)
+                       inode->i_atime = fattr->atime;
+               if (fattr->valid & NFS_ATTR_FATTR_MTIME)
+                       inode->i_mtime = fattr->mtime;
+               if (fattr->valid & NFS_ATTR_FATTR_CTIME)
+                       inode->i_ctime = fattr->ctime;
+               if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
                        nfsi->change_attr = fattr->change_attr;
-               inode->i_size = nfs_size_to_loff_t(fattr->size);
-               inode->i_nlink = fattr->nlink;
-               inode->i_uid = fattr->uid;
-               inode->i_gid = fattr->gid;
-               if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+               if (fattr->valid & NFS_ATTR_FATTR_SIZE)
+                       inode->i_size = nfs_size_to_loff_t(fattr->size);
+               if (fattr->valid & NFS_ATTR_FATTR_NLINK)
+                       inode->i_nlink = fattr->nlink;
+               if (fattr->valid & NFS_ATTR_FATTR_OWNER)
+                       inode->i_uid = fattr->uid;
+               if (fattr->valid & NFS_ATTR_FATTR_GROUP)
+                       inode->i_gid = fattr->gid;
+               if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+                       inode->i_blocks = fattr->du.nfs2.blocks;
+               if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
                        /*
                         * report the blocks in 512byte units
                         */
                        inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-               } else {
-                       inode->i_blocks = fattr->du.nfs2.blocks;
                }
                nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
                nfsi->attrtimeo_timestamp = now;
-               memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
                nfsi->access_cache = RB_ROOT;
 
                unlock_new_inode(inode);
@@ -514,6 +541,32 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        return err;
 }
 
+/**
+ * nfs_close_context - Common close_context() routine NFSv2/v3
+ * @ctx: pointer to context
+ * @is_sync: is this a synchronous close
+ *
+ * always ensure that the attributes are up to date if we're mounted
+ * with close-to-open semantics
+ */
+void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
+{
+       struct inode *inode;
+       struct nfs_server *server;
+
+       if (!(ctx->mode & FMODE_WRITE))
+               return;
+       if (!is_sync)
+               return;
+       inode = ctx->path.dentry->d_inode;
+       if (!list_empty(&NFS_I(inode)->open_files))
+               return;
+       server = NFS_SERVER(inode);
+       if (server->flags & NFS_MOUNT_NOCTO)
+               return;
+       nfs_revalidate_inode(server, inode);
+}
+
 static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
 {
        struct nfs_open_context *ctx;
@@ -540,24 +593,15 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
        return ctx;
 }
 
-static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
+static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 {
-       struct inode *inode;
-
-       if (ctx == NULL)
-               return;
+       struct inode *inode = ctx->path.dentry->d_inode;
 
-       inode = ctx->path.dentry->d_inode;
        if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
                return;
        list_del(&ctx->list);
        spin_unlock(&inode->i_lock);
-       if (ctx->state != NULL) {
-               if (wait)
-                       nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
-               else
-                       nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
-       }
+       NFS_PROTO(inode)->close_context(ctx, is_sync);
        if (ctx->cred != NULL)
                put_rpccred(ctx->cred);
        path_put(&ctx->path);
@@ -670,9 +714,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
        if (NFS_STALE(inode))
                goto out;
 
-       if (NFS_STALE(inode))
-               goto out;
-
        nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
        status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
        if (status != 0) {
@@ -815,25 +856,31 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
 
-       if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 &&
-                       nfsi->change_attr == fattr->pre_change_attr) {
+       if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
+                       && (fattr->valid & NFS_ATTR_FATTR_CHANGE)
+                       && nfsi->change_attr == fattr->pre_change_attr) {
                nfsi->change_attr = fattr->change_attr;
                if (S_ISDIR(inode->i_mode))
                        nfsi->cache_validity |= NFS_INO_INVALID_DATA;
        }
        /* If we have atomic WCC data, we may update some attributes */
-       if ((fattr->valid & NFS_ATTR_WCC) != 0) {
-               if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
+       if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
+                       && (fattr->valid & NFS_ATTR_FATTR_CTIME)
+                       && timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
                        memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-               if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
+
+       if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
+                       && (fattr->valid & NFS_ATTR_FATTR_MTIME)
+                       && timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
                        memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
                        if (S_ISDIR(inode->i_mode))
                                nfsi->cache_validity |= NFS_INO_INVALID_DATA;
-               }
-               if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) &&
-                   nfsi->npages == 0)
-                       i_size_write(inode, nfs_size_to_loff_t(fattr->size));
        }
+       if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
+                       && (fattr->valid & NFS_ATTR_FATTR_SIZE)
+                       && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
+                       && nfsi->npages == 0)
+                       i_size_write(inode, nfs_size_to_loff_t(fattr->size));
 }
 
 /**
@@ -853,35 +900,39 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 
 
        /* Has the inode gone and changed behind our back? */
-       if (nfsi->fileid != fattr->fileid
-                       || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+       if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
+               return -EIO;
+       if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
                return -EIO;
-       }
 
-       if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
+       if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
                        nfsi->change_attr != fattr->change_attr)
                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
        /* Verify a few of the more important attributes */
-       if (!timespec_equal(&inode->i_mtime, &fattr->mtime))
+       if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
-       cur_size = i_size_read(inode);
-       new_isize = nfs_size_to_loff_t(fattr->size);
-       if (cur_size != new_isize && nfsi->npages == 0)
-               invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+       if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
+               cur_size = i_size_read(inode);
+               new_isize = nfs_size_to_loff_t(fattr->size);
+               if (cur_size != new_isize && nfsi->npages == 0)
+                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+       }
 
        /* Have any file permissions changed? */
-       if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
-                       || inode->i_uid != fattr->uid
-                       || inode->i_gid != fattr->gid)
+       if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
+               invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+       if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
+               invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+       if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
                invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
 
        /* Has the link count changed? */
-       if (inode->i_nlink != fattr->nlink)
+       if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
                invalid |= NFS_INO_INVALID_ATTR;
 
-       if (!timespec_equal(&inode->i_atime, &fattr->atime))
+       if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime))
                invalid |= NFS_INO_INVALID_ATIME;
 
        if (invalid != 0)
@@ -893,11 +944,15 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 
 static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
 {
+       if (!(fattr->valid & NFS_ATTR_FATTR_CTIME))
+               return 0;
        return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
 }
 
 static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
 {
+       if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+               return 0;
        return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
 }
 
@@ -1033,20 +1088,31 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
        /* Don't do a WCC update if these attributes are already stale */
        if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
                        !nfs_inode_attrs_need_update(inode, fattr)) {
-               fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC);
+               fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
+                               | NFS_ATTR_FATTR_PRESIZE
+                               | NFS_ATTR_FATTR_PREMTIME
+                               | NFS_ATTR_FATTR_PRECTIME);
                goto out_noforce;
        }
-       if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
-                       (fattr->valid & NFS_ATTR_WCC_V4) == 0) {
+       if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
+                       (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) {
                fattr->pre_change_attr = NFS_I(inode)->change_attr;
-               fattr->valid |= NFS_ATTR_WCC_V4;
+               fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
        }
-       if ((fattr->valid & NFS_ATTR_FATTR) != 0 &&
-                       (fattr->valid & NFS_ATTR_WCC) == 0) {
+       if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
+                       (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
                memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
+               fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
+       }
+       if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
+                       (fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
                memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
+               fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
+       }
+       if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
+                       (fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) {
                fattr->pre_size = i_size_read(inode);
-               fattr->valid |= NFS_ATTR_WCC;
+               fattr->valid |= NFS_ATTR_FATTR_PRESIZE;
        }
 out_noforce:
        status = nfs_post_op_update_inode_locked(inode, fattr);
@@ -1078,18 +1144,18 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        __func__, inode->i_sb->s_id, inode->i_ino,
                        atomic_read(&inode->i_count), fattr->valid);
 
-       if (nfsi->fileid != fattr->fileid)
+       if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
                goto out_fileid;
 
        /*
         * Make sure the inode's type hasn't changed.
         */
-       if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+       if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
                goto out_changed;
 
        server = NFS_SERVER(inode);
        /* Update the fsid? */
-       if (S_ISDIR(inode->i_mode) &&
+       if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
                        !nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
                        !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
                server->fsid = fattr->fsid;
@@ -1099,14 +1165,27 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
         */
        nfsi->read_cache_jiffies = fattr->time_start;
 
-       nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME
-                       | NFS_INO_REVAL_PAGECACHE);
+       if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME)))
+           nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
+                   | NFS_INO_INVALID_ATIME
+                   | NFS_INO_REVAL_PAGECACHE);
 
        /* Do atomic weak cache consistency updates */
        nfs_wcc_update_inode(inode, fattr);
 
        /* More cache consistency checks */
-       if (!(fattr->valid & NFS_ATTR_FATTR_V4)) {
+       if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
+               if (nfsi->change_attr != fattr->change_attr) {
+                       dprintk("NFS: change_attr change on server for file %s/%ld\n",
+                                       inode->i_sb->s_id, inode->i_ino);
+                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                       if (S_ISDIR(inode->i_mode))
+                               nfs_force_lookup_revalidate(inode);
+                       nfsi->change_attr = fattr->change_attr;
+               }
+       }
+
+       if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
                /* NFSv2/v3: Check if the mtime agrees */
                if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
                        dprintk("NFS: mtime change on server for file %s/%ld\n",
@@ -1114,59 +1193,80 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
                        if (S_ISDIR(inode->i_mode))
                                nfs_force_lookup_revalidate(inode);
+                       memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
                }
+       }
+       if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
                /* If ctime has changed we should definitely clear access+acl caches */
-               if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
+               if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-       } else if (nfsi->change_attr != fattr->change_attr) {
-               dprintk("NFS: change_attr change on server for file %s/%ld\n",
-                               inode->i_sb->s_id, inode->i_ino);
-               invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-               if (S_ISDIR(inode->i_mode))
-                       nfs_force_lookup_revalidate(inode);
+                       /* and probably clear data for a directory too as utimes can cause
+                        * havoc with our cache.
+                        */
+                       if (S_ISDIR(inode->i_mode)) {
+                               invalid |= NFS_INO_INVALID_DATA;
+                               nfs_force_lookup_revalidate(inode);
+                       }
+                       memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
+               }
        }
 
        /* Check if our cached file size is stale */
-       new_isize = nfs_size_to_loff_t(fattr->size);
-       cur_isize = i_size_read(inode);
-       if (new_isize != cur_isize) {
-               /* Do we perhaps have any outstanding writes, or has
-                * the file grown beyond our last write? */
-               if (nfsi->npages == 0 || new_isize > cur_isize) {
-                       i_size_write(inode, new_isize);
-                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+       if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
+               new_isize = nfs_size_to_loff_t(fattr->size);
+               cur_isize = i_size_read(inode);
+               if (new_isize != cur_isize) {
+                       /* Do we perhaps have any outstanding writes, or has
+                        * the file grown beyond our last write? */
+                       if (nfsi->npages == 0 || new_isize > cur_isize) {
+                               i_size_write(inode, new_isize);
+                               invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+                       }
+                       dprintk("NFS: isize change on server for file %s/%ld\n",
+                                       inode->i_sb->s_id, inode->i_ino);
                }
-               dprintk("NFS: isize change on server for file %s/%ld\n",
-                               inode->i_sb->s_id, inode->i_ino);
        }
 
 
-       memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
-       memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-       memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
-       nfsi->change_attr = fattr->change_attr;
-
-       if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
-           inode->i_uid != fattr->uid ||
-           inode->i_gid != fattr->gid)
-               invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+       if (fattr->valid & NFS_ATTR_FATTR_ATIME)
+               memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
 
-       if (inode->i_nlink != fattr->nlink)
-               invalid |= NFS_INO_INVALID_ATTR;
+       if (fattr->valid & NFS_ATTR_FATTR_MODE) {
+               if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
+                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                       inode->i_mode = fattr->mode;
+               }
+       }
+       if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
+               if (inode->i_uid != fattr->uid) {
+                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                       inode->i_uid = fattr->uid;
+               }
+       }
+       if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
+               if (inode->i_gid != fattr->gid) {
+                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                       inode->i_gid = fattr->gid;
+               }
+       }
 
-       inode->i_mode = fattr->mode;
-       inode->i_nlink = fattr->nlink;
-       inode->i_uid = fattr->uid;
-       inode->i_gid = fattr->gid;
+       if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
+               if (inode->i_nlink != fattr->nlink) {
+                       invalid |= NFS_INO_INVALID_ATTR;
+                       if (S_ISDIR(inode->i_mode))
+                               invalid |= NFS_INO_INVALID_DATA;
+                       inode->i_nlink = fattr->nlink;
+               }
+       }
 
-       if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+       if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
                /*
                 * report the blocks in 512byte units
                 */
                inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
-       } else {
-               inode->i_blocks = fattr->du.nfs2.blocks;
        }
+       if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+               inode->i_blocks = fattr->du.nfs2.blocks;
 
        /* Update attrtimeo value if we're out of the unstable period */
        if (invalid & NFS_INO_INVALID_ATTR) {
@@ -1274,7 +1374,6 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
        INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
        INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
-       nfsi->ncommit = 0;
        nfsi->npages = 0;
        atomic_set(&nfsi->silly_count, 1);
        INIT_HLIST_HEAD(&nfsi->silly_list);
index 340ede8..2041f68 100644 (file)
@@ -152,6 +152,9 @@ extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 #endif
 
+/* proc.c */
+void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
+
 /* dir.c */
 extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
 
@@ -165,6 +168,7 @@ extern void nfs_clear_inode(struct inode *);
 extern void nfs4_clear_inode(struct inode *);
 #endif
 void nfs_zap_acl_cache(struct inode *inode);
+extern int nfs_wait_bit_killable(void *word);
 
 /* super.c */
 void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
index 28bab67..c862c93 100644 (file)
@@ -120,8 +120,8 @@ xdr_decode_time(__be32 *p, struct timespec *timep)
 static __be32 *
 xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
 {
-       u32 rdev;
-       fattr->type = (enum nfs_ftype) ntohl(*p++);
+       u32 rdev, type;
+       type = ntohl(*p++);
        fattr->mode = ntohl(*p++);
        fattr->nlink = ntohl(*p++);
        fattr->uid = ntohl(*p++);
@@ -136,10 +136,9 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
        p = xdr_decode_time(p, &fattr->atime);
        p = xdr_decode_time(p, &fattr->mtime);
        p = xdr_decode_time(p, &fattr->ctime);
-       fattr->valid |= NFS_ATTR_FATTR;
+       fattr->valid |= NFS_ATTR_FATTR_V2;
        fattr->rdev = new_decode_dev(rdev);
-       if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) {
-               fattr->type = NFFIFO;
+       if (type == NFCHR && rdev == NFS2_FIFO_DEV) {
                fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
                fattr->rdev = 0;
        }
index c55be7a..b82fe68 100644 (file)
@@ -834,4 +834,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .commit_done    = nfs3_commit_done,
        .lock           = nfs3_proc_lock,
        .clear_acl_cache = nfs3_forget_cached_acls,
+       .close_context  = nfs_close_context,
 };
index 6cdeacf..e6a1932 100644 (file)
 /*
  * Map file type to S_IFMT bits
  */
-static struct {
-       unsigned int    mode;
-       unsigned int    nfs2type;
-} nfs_type2fmt[] = {
-      { 0,             NFNON   },
-      { S_IFREG,       NFREG   },
-      { S_IFDIR,       NFDIR   },
-      { S_IFBLK,       NFBLK   },
-      { S_IFCHR,       NFCHR   },
-      { S_IFLNK,       NFLNK   },
-      { S_IFSOCK,      NFSOCK  },
-      { S_IFIFO,       NFFIFO  },
-      { 0,             NFBAD   }
+static const umode_t nfs_type2fmt[] = {
+       [NF3BAD] = 0,
+       [NF3REG] = S_IFREG,
+       [NF3DIR] = S_IFDIR,
+       [NF3BLK] = S_IFBLK,
+       [NF3CHR] = S_IFCHR,
+       [NF3LNK] = S_IFLNK,
+       [NF3SOCK] = S_IFSOCK,
+       [NF3FIFO] = S_IFIFO,
 };
 
 /*
@@ -148,13 +144,12 @@ static __be32 *
 xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
 {
        unsigned int    type, major, minor;
-       int             fmode;
+       umode_t         fmode;
 
        type = ntohl(*p++);
-       if (type >= NF3BAD)
-               type = NF3BAD;
-       fmode = nfs_type2fmt[type].mode;
-       fattr->type = nfs_type2fmt[type].nfs2type;
+       if (type > NF3FIFO)
+               type = NF3NON;
+       fmode = nfs_type2fmt[type];
        fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
        fattr->nlink = ntohl(*p++);
        fattr->uid = ntohl(*p++);
@@ -177,7 +172,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
        p = xdr_decode_time3(p, &fattr->ctime);
 
        /* Update the mode bits */
-       fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
+       fattr->valid |= NFS_ATTR_FATTR_V3;
        return p;
 }
 
@@ -233,7 +228,9 @@ xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr)
        p = xdr_decode_hyper(p, &fattr->pre_size);
        p = xdr_decode_time3(p, &fattr->pre_mtime);
        p = xdr_decode_time3(p, &fattr->pre_ctime);
-       fattr->valid |= NFS_ATTR_WCC;
+       fattr->valid |= NFS_ATTR_FATTR_PRESIZE
+               | NFS_ATTR_FATTR_PREMTIME
+               | NFS_ATTR_FATTR_PRECTIME;
        return p;
 }
 
index 8dde84b..97baccc 100644 (file)
@@ -193,14 +193,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
        kunmap_atomic(start, KM_USER0);
 }
 
-static int nfs4_wait_bit_killable(void *word)
-{
-       if (fatal_signal_pending(current))
-               return -ERESTARTSYS;
-       schedule();
-       return 0;
-}
-
 static int nfs4_wait_clnt_recover(struct nfs_client *clp)
 {
        int res;
@@ -208,7 +200,7 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp)
        might_sleep();
 
        res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
-                       nfs4_wait_bit_killable, TASK_KILLABLE);
+                       nfs_wait_bit_killable, TASK_KILLABLE);
        return res;
 }
 
@@ -1439,7 +1431,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
        if (calldata->arg.seqid == NULL)
                goto out_free_calldata;
        calldata->arg.fmode = 0;
-       calldata->arg.bitmask = server->attr_bitmask;
+       calldata->arg.bitmask = server->cache_consistency_bitmask;
        calldata->res.fattr = &calldata->fattr;
        calldata->res.seqid = calldata->arg.seqid;
        calldata->res.server = server;
@@ -1580,6 +1572,15 @@ out_drop:
        return 0;
 }
 
+void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
+{
+       if (ctx->state == NULL)
+               return;
+       if (is_sync)
+               nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
+       else
+               nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+}
 
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
@@ -1600,6 +1601,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
                        server->caps |= NFS_CAP_HARDLINKS;
                if (res.has_symlinks != 0)
                        server->caps |= NFS_CAP_SYMLINKS;
+               memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
+               server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
+               server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
                server->acl_bitmask = res.acl_bitmask;
        }
        return status;
@@ -2079,7 +2083,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
        struct nfs_removeargs *args = msg->rpc_argp;
        struct nfs_removeres *res = msg->rpc_resp;
 
-       args->bitmask = server->attr_bitmask;
+       args->bitmask = server->cache_consistency_bitmask;
        res->server = server;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
 }
@@ -2323,7 +2327,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                .pages = &page,
                .pgbase = 0,
                .count = count,
-               .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+               .bitmask = NFS_SERVER(dentry->d_inode)->cache_consistency_bitmask,
        };
        struct nfs4_readdir_res res;
        struct rpc_message msg = {
@@ -2552,7 +2556,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 {
        struct nfs_server *server = NFS_SERVER(data->inode);
 
-       data->args.bitmask = server->attr_bitmask;
+       data->args.bitmask = server->cache_consistency_bitmask;
        data->res.server = server;
        data->timestamp   = jiffies;
 
@@ -2575,7 +2579,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
 {
        struct nfs_server *server = NFS_SERVER(data->inode);
        
-       data->args.bitmask = server->attr_bitmask;
+       data->args.bitmask = server->cache_consistency_bitmask;
        data->res.server = server;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
 }
@@ -3678,6 +3682,19 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
        return len;
 }
 
+static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
+{
+       if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) &&
+               (fattr->valid & NFS_ATTR_FATTR_FSID) &&
+               (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
+               return;
+
+       fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
+               NFS_ATTR_FATTR_NLINK;
+       fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+       fattr->nlink = 2;
+}
+
 int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
                struct nfs4_fs_locations *fs_locations, struct page *page)
 {
@@ -3704,6 +3721,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
        fs_locations->server = server;
        fs_locations->nlocations = 0;
        status = rpc_call_sync(server->client, &msg, 0);
+       nfs_fixup_referral_attributes(&fs_locations->fattr);
        dprintk("%s: returned status = %d\n", __func__, status);
        return status;
 }
@@ -3767,6 +3785,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .commit_done    = nfs4_commit_done,
        .lock           = nfs4_proc_lock,
        .clear_acl_cache = nfs4_zap_acl_attr,
+       .close_context  = nfs4_close_context,
 };
 
 /*
index 2022fe4..0298e90 100644 (file)
@@ -62,8 +62,14 @@ static LIST_HEAD(nfs4_clientid_list);
 
 static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
-       int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
-                       nfs_callback_tcpport, cred);
+       unsigned short port;
+       int status;
+
+       port = nfs_callback_tcpport;
+       if (clp->cl_addr.ss_family == AF_INET6)
+               port = nfs_callback_tcpport6;
+
+       status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred);
        if (status == 0)
                status = nfs4_proc_setclientid_confirm(clp, cred);
        if (status == 0)
index d1e4c8f..1690f0e 100644 (file)
@@ -522,20 +522,17 @@ static int nfs4_stat_to_errno(int);
                                 decode_lookup_maxsz + \
                                 decode_fs_locations_maxsz)
 
-static struct {
-       unsigned int    mode;
-       unsigned int    nfs2type;
-} nfs_type2fmt[] = {
-       { 0,            NFNON        },
-       { S_IFREG,      NFREG        },
-       { S_IFDIR,      NFDIR        },
-       { S_IFBLK,      NFBLK        },
-       { S_IFCHR,      NFCHR        },
-       { S_IFLNK,      NFLNK        },
-       { S_IFSOCK,     NFSOCK       },
-       { S_IFIFO,      NFFIFO       },
-       { 0,            NFNON        },
-       { 0,            NFNON        },
+static const umode_t nfs_type2fmt[] = {
+       [NF4BAD] = 0,
+       [NF4REG] = S_IFREG,
+       [NF4DIR] = S_IFDIR,
+       [NF4BLK] = S_IFBLK,
+       [NF4CHR] = S_IFCHR,
+       [NF4LNK] = S_IFLNK,
+       [NF4SOCK] = S_IFSOCK,
+       [NF4FIFO] = S_IFIFO,
+       [NF4ATTRDIR] = 0,
+       [NF4NAMEDATTR] = 0,
 };
 
 struct compound_hdr {
@@ -2160,6 +2157,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3
 static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type)
 {
        __be32 *p;
+       int ret = 0;
 
        *type = 0;
        if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
@@ -2172,14 +2170,16 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
                        return -EIO;
                }
                bitmap[0] &= ~FATTR4_WORD0_TYPE;
+               ret = NFS_ATTR_FATTR_TYPE;
        }
-       dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type].nfs2type);
-       return 0;
+       dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
+       return ret;
 }
 
 static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
 {
        __be32 *p;
+       int ret = 0;
 
        *change = 0;
        if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
@@ -2188,15 +2188,17 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
                READ_BUF(8);
                READ64(*change);
                bitmap[0] &= ~FATTR4_WORD0_CHANGE;
+               ret = NFS_ATTR_FATTR_CHANGE;
        }
        dprintk("%s: change attribute=%Lu\n", __func__,
                        (unsigned long long)*change);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
 {
        __be32 *p;
+       int ret = 0;
 
        *size = 0;
        if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
@@ -2205,9 +2207,10 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
                READ_BUF(8);
                READ64(*size);
                bitmap[0] &= ~FATTR4_WORD0_SIZE;
+               ret = NFS_ATTR_FATTR_SIZE;
        }
        dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2245,6 +2248,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
 static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
 {
        __be32 *p;
+       int ret = 0;
 
        fsid->major = 0;
        fsid->minor = 0;
@@ -2255,11 +2259,12 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
                READ64(fsid->major);
                READ64(fsid->minor);
                bitmap[0] &= ~FATTR4_WORD0_FSID;
+               ret = NFS_ATTR_FATTR_FSID;
        }
        dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __func__,
                        (unsigned long long)fsid->major,
                        (unsigned long long)fsid->minor);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2297,6 +2302,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
 static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
 {
        __be32 *p;
+       int ret = 0;
 
        *fileid = 0;
        if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
@@ -2305,14 +2311,16 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
                READ_BUF(8);
                READ64(*fileid);
                bitmap[0] &= ~FATTR4_WORD0_FILEID;
+               ret = NFS_ATTR_FATTR_FILEID;
        }
        dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
 {
        __be32 *p;
+       int ret = 0;
 
        *fileid = 0;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
@@ -2321,9 +2329,10 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
                READ_BUF(8);
                READ64(*fileid);
                bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+               ret = NFS_ATTR_FATTR_FILEID;
        }
        dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2479,6 +2488,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
                if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
                        res->nlocations++;
        }
+       if (res->nlocations != 0)
+               status = NFS_ATTR_FATTR_V4_REFERRAL;
 out:
        dprintk("%s: fs_locations done, error = %d\n", __func__, status);
        return status;
@@ -2580,26 +2591,30 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
        return status;
 }
 
-static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode)
+static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
 {
+       uint32_t tmp;
        __be32 *p;
+       int ret = 0;
 
        *mode = 0;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
                READ_BUF(4);
-               READ32(*mode);
-               *mode &= ~S_IFMT;
+               READ32(tmp);
+               *mode = tmp & ~S_IFMT;
                bitmap[1] &= ~FATTR4_WORD1_MODE;
+               ret = NFS_ATTR_FATTR_MODE;
        }
        dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
 {
        __be32 *p;
+       int ret = 0;
 
        *nlink = 1;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
@@ -2608,15 +2623,17 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
                READ_BUF(4);
                READ32(*nlink);
                bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
+               ret = NFS_ATTR_FATTR_NLINK;
        }
        dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid)
 {
        uint32_t len;
        __be32 *p;
+       int ret = 0;
 
        *uid = -2;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
@@ -2626,7 +2643,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
                READ32(len);
                READ_BUF(len);
                if (len < XDR_MAX_NETOBJ) {
-                       if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0)
+                       if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
+                               ret = NFS_ATTR_FATTR_OWNER;
+                       else
                                dprintk("%s: nfs_map_name_to_uid failed!\n",
                                                __func__);
                } else
@@ -2635,13 +2654,14 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
                bitmap[1] &= ~FATTR4_WORD1_OWNER;
        }
        dprintk("%s: uid=%d\n", __func__, (int)*uid);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid)
 {
        uint32_t len;
        __be32 *p;
+       int ret = 0;
 
        *gid = -2;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
@@ -2651,7 +2671,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
                READ32(len);
                READ_BUF(len);
                if (len < XDR_MAX_NETOBJ) {
-                       if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0)
+                       if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
+                               ret = NFS_ATTR_FATTR_GROUP;
+                       else
                                dprintk("%s: nfs_map_group_to_gid failed!\n",
                                                __func__);
                } else
@@ -2660,13 +2682,14 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
                bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
        }
        dprintk("%s: gid=%d\n", __func__, (int)*gid);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
 {
        uint32_t major = 0, minor = 0;
        __be32 *p;
+       int ret = 0;
 
        *rdev = MKDEV(0,0);
        if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U)))
@@ -2681,9 +2704,10 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
                if (MAJOR(tmp) == major && MINOR(tmp) == minor)
                        *rdev = tmp;
                bitmap[1] &= ~ FATTR4_WORD1_RAWDEV;
+               ret = NFS_ATTR_FATTR_RDEV;
        }
        dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2740,6 +2764,7 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
 static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
 {
        __be32 *p;
+       int ret = 0;
 
        *used = 0;
        if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
@@ -2748,10 +2773,11 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
                READ_BUF(8);
                READ64(*used);
                bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
+               ret = NFS_ATTR_FATTR_SPACE_USED;
        }
        dprintk("%s: space used=%Lu\n", __func__,
                        (unsigned long long)*used);
-       return 0;
+       return ret;
 }
 
 static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -2778,6 +2804,8 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) {
                status = decode_attr_time(xdr, time);
+               if (status == 0)
+                       status = NFS_ATTR_FATTR_ATIME;
                bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS;
        }
        dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec);
@@ -2794,6 +2822,8 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) {
                status = decode_attr_time(xdr, time);
+               if (status == 0)
+                       status = NFS_ATTR_FATTR_CTIME;
                bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA;
        }
        dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec);
@@ -2810,6 +2840,8 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str
                return -EIO;
        if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) {
                status = decode_attr_time(xdr, time);
+               if (status == 0)
+                       status = NFS_ATTR_FATTR_MTIME;
                bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY;
        }
        dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec);
@@ -2994,63 +3026,116 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
        uint32_t attrlen,
                 bitmap[2] = {0},
                 type;
-       int status, fmode = 0;
+       int status;
+       umode_t fmode = 0;
        uint64_t fileid;
 
-       if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
-               goto xdr_error;
-       if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
+       status = decode_op_hdr(xdr, OP_GETATTR);
+       if (status < 0)
                goto xdr_error;
 
-       fattr->bitmap[0] = bitmap[0];
-       fattr->bitmap[1] = bitmap[1];
+       status = decode_attr_bitmap(xdr, bitmap);
+       if (status < 0)
+               goto xdr_error;
 
-       if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
+       status = decode_attr_length(xdr, &attrlen, &savep);
+       if (status < 0)
                goto xdr_error;
 
 
-       if ((status = decode_attr_type(xdr, bitmap, &type)) != 0)
+       status = decode_attr_type(xdr, bitmap, &type);
+       if (status < 0)
                goto xdr_error;
-       fattr->type = nfs_type2fmt[type].nfs2type;
-       fmode = nfs_type2fmt[type].mode;
+       fattr->mode = 0;
+       if (status != 0) {
+               fattr->mode |= nfs_type2fmt[type];
+               fattr->valid |= status;
+       }
 
-       if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0)
+       status = decode_attr_change(xdr, bitmap, &fattr->change_attr);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_size(xdr, bitmap, &fattr->size);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_fsid(xdr, bitmap, &fattr->fsid);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
+       fattr->valid |= status;
+
+       status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
                                                struct nfs4_fs_locations,
-                                               fattr))) != 0)
+                                               fattr));
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_mode(xdr, bitmap, &fmode);
+       if (status < 0)
                goto xdr_error;
-       fattr->mode |= fmode;
-       if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
+       if (status != 0) {
+               fattr->mode |= fmode;
+               fattr->valid |= status;
+       }
+
+       status = decode_attr_nlink(xdr, bitmap, &fattr->nlink);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_rdev(xdr, bitmap, &fattr->rdev);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_time_access(xdr, bitmap, &fattr->atime);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime);
+       if (status < 0)
                goto xdr_error;
-       if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0)
+       fattr->valid |= status;
+
+       status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid);
+       if (status < 0)
                goto xdr_error;
-       if (fattr->fileid == 0 && fileid != 0)
+       if (status != 0 && !(fattr->valid & status)) {
                fattr->fileid = fileid;
-       if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
-               fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
+               fattr->valid |= status;
+       }
+
+       status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
        dprintk("%s: xdr returned %d\n", __func__, -status);
        return status;
@@ -4078,9 +4163,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
        status = decode_setattr(&xdr, res);
        if (status)
                goto out;
-       status = decode_getfattr(&xdr, res->fattr, res->server);
-       if (status == NFS4ERR_DELAY)
-               status = 0;
+       decode_getfattr(&xdr, res->fattr, res->server);
 out:
        return status;
 }
index 7f07920..e297593 100644 (file)
@@ -176,17 +176,6 @@ void nfs_release_request(struct nfs_page *req)
        kref_put(&req->wb_kref, nfs_free_request);
 }
 
-static int nfs_wait_bit_killable(void *word)
-{
-       int ret = 0;
-
-       if (fatal_signal_pending(current))
-               ret = -ERESTARTSYS;
-       else
-               schedule();
-       return ret;
-}
-
 /**
  * nfs_wait_on_request - Wait for a request to complete.
  * @req: request to wait upon.
index 1934652..7be72d9 100644 (file)
@@ -663,4 +663,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .commit_setup   = nfs_proc_commit_setup,
        .lock           = nfs_proc_lock,
        .lock_check_bounds = nfs_lock_check_bounds,
+       .close_context  = nfs_close_context,
 };
index d6686f4..0942fcb 100644 (file)
@@ -1018,6 +1018,7 @@ static int nfs_parse_mount_options(char *raw,
                case Opt_rdma:
                        mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
                        mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+                       xprt_load_transport(p);
                        break;
                case Opt_acl:
                        mnt->flags &= ~NFS_MOUNT_NOACL;
@@ -1205,12 +1206,14 @@ static int nfs_parse_mount_options(char *raw,
                                /* vector side protocols to TCP */
                                mnt->flags |= NFS_MOUNT_TCP;
                                mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+                               xprt_load_transport(string);
                                break;
                        default:
                                errors++;
                                dfprintk(MOUNT, "NFS:   unrecognized "
                                                "transport protocol\n");
                        }
+                       kfree(string);
                        break;
                case Opt_mountproto:
                        string = match_strdup(args);
@@ -1218,7 +1221,6 @@ static int nfs_parse_mount_options(char *raw,
                                goto out_nomem;
                        token = match_token(string,
                                            nfs_xprt_protocol_tokens, args);
-                       kfree(string);
 
                        switch (token) {
                        case Opt_xprt_udp:
index 9f98458..e560a78 100644 (file)
@@ -313,19 +313,34 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
        struct inode *inode = mapping->host;
+       unsigned long *bitlock = &NFS_I(inode)->flags;
        struct nfs_pageio_descriptor pgio;
        int err;
 
+       /* Stop dirtying of new pages while we sync */
+       err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
+                       nfs_wait_bit_killable, TASK_KILLABLE);
+       if (err)
+               goto out_err;
+
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
        nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
        err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
        nfs_pageio_complete(&pgio);
+
+       clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
+       smp_mb__after_clear_bit();
+       wake_up_bit(bitlock, NFS_INO_FLUSHING);
+
        if (err < 0)
-               return err;
-       if (pgio.pg_error < 0)
-               return pgio.pg_error;
+               goto out_err;
+       err = pgio.pg_error;
+       if (err < 0)
+               goto out_err;
        return 0;
+out_err:
+       return err;
 }
 
 /*
@@ -404,7 +419,6 @@ nfs_mark_request_commit(struct nfs_page *req)
        struct nfs_inode *nfsi = NFS_I(inode);
 
        spin_lock(&inode->i_lock);
-       nfsi->ncommit++;
        set_bit(PG_CLEAN, &(req)->wb_flags);
        radix_tree_tag_set(&nfsi->nfs_page_tree,
                        req->wb_index,
@@ -524,6 +538,12 @@ static void nfs_cancel_commit_list(struct list_head *head)
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static int
+nfs_need_commit(struct nfs_inode *nfsi)
+{
+       return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT);
+}
+
 /*
  * nfs_scan_commit - Scan an inode for commit requests
  * @inode: NFS inode to scan
@@ -538,16 +558,18 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-       int res = 0;
 
-       if (nfsi->ncommit != 0) {
-               res = nfs_scan_list(nfsi, dst, idx_start, npages,
-                               NFS_PAGE_TAG_COMMIT);
-               nfsi->ncommit -= res;
-       }
-       return res;
+       if (!nfs_need_commit(nfsi))
+               return 0;
+
+       return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
 }
 #else
+static inline int nfs_need_commit(struct nfs_inode *nfsi)
+{
+       return 0;
+}
+
 static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
        return 0;
@@ -820,7 +842,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
        data->args.stable  = NFS_UNSTABLE;
        if (how & FLUSH_STABLE) {
                data->args.stable = NFS_DATA_SYNC;
-               if (!NFS_I(inode)->ncommit)
+               if (!nfs_need_commit(NFS_I(inode)))
                        data->args.stable = NFS_FILE_SYNC;
        }
 
@@ -1425,18 +1447,13 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
 {
        struct writeback_control wbc = {
                .bdi = mapping->backing_dev_info,
-               .sync_mode = WB_SYNC_NONE,
+               .sync_mode = WB_SYNC_ALL,
                .nr_to_write = LONG_MAX,
                .range_start = 0,
                .range_end = LLONG_MAX,
                .for_writepages = 1,
        };
-       int ret;
 
-       ret = __nfs_write_mapping(mapping, &wbc, how);
-       if (ret < 0)
-               return ret;
-       wbc.sync_mode = WB_SYNC_ALL;
        return __nfs_write_mapping(mapping, &wbc, how);
 }
 
index 3d93b20..a4ed864 100644 (file)
@@ -938,10 +938,12 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
                char transport[16];
                int port;
                if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
+                       if (port < 1 || port > 65535)
+                               return -EINVAL;
                        err = nfsd_create_serv();
                        if (!err) {
                                err = svc_create_xprt(nfsd_serv,
-                                                     transport, port,
+                                                     transport, PF_INET, port,
                                                      SVC_SOCK_ANONYMOUS);
                                if (err == -ENOENT)
                                        /* Give a reasonable perror msg for
@@ -960,7 +962,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
                char transport[16];
                int port;
                if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
-                       if (port == 0)
+                       if (port < 1 || port > 65535)
                                return -EINVAL;
                        if (nfsd_serv) {
                                xprt = svc_find_xprt(nfsd_serv, transport,
index 07e4f5d..bc3567b 100644 (file)
@@ -229,7 +229,6 @@ int nfsd_create_serv(void)
 
        atomic_set(&nfsd_busy, 0);
        nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
-                                     AF_INET,
                                      nfsd_last_thread, nfsd, THIS_MODULE);
        if (nfsd_serv == NULL)
                err = -ENOMEM;
@@ -244,7 +243,7 @@ static int nfsd_init_socks(int port)
        if (!list_empty(&nfsd_serv->sv_permsocks))
                return 0;
 
-       error = svc_create_xprt(nfsd_serv, "udp", port,
+       error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
                                        SVC_SOCK_DEFAULTS);
        if (error < 0)
                return error;
@@ -253,7 +252,7 @@ static int nfsd_init_socks(int port)
        if (error < 0)
                return error;
 
-       error = svc_create_xprt(nfsd_serv, "tcp", port,
+       error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
                                        SVC_SOCK_DEFAULTS);
        if (error < 0)
                return error;
index 8cc8807..bde2557 100644 (file)
@@ -166,8 +166,7 @@ struct nfs_inode {
         */
        struct radix_tree_root  nfs_page_tree;
 
-       unsigned long           ncommit,
-                               npages;
+       unsigned long           npages;
 
        /* Open contexts for shared mmap writes */
        struct list_head        open_files;
@@ -207,6 +206,7 @@ struct nfs_inode {
 #define NFS_INO_STALE          (1)             /* possible stale inode */
 #define NFS_INO_ACL_LRU_SET    (2)             /* Inode is on the LRU list */
 #define NFS_INO_MOUNTPOINT     (3)             /* inode is remote mountpoint */
+#define NFS_INO_FLUSHING       (4)             /* inode is flushing out data */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
index 9bb81ae..29b1e40 100644 (file)
@@ -106,6 +106,11 @@ struct nfs_server {
        u32                     attr_bitmask[2];/* V4 bitmask representing the set
                                                   of attributes supported on this
                                                   filesystem */
+       u32                     cache_consistency_bitmask[2];
+                                               /* V4 bitmask representing the subset
+                                                  of change attribute, size, ctime
+                                                  and mtime attributes supported by
+                                                  the server */
        u32                     acl_bitmask;    /* V4 bitmask representing the ACEs
                                                   that are supported on this
                                                   filesystem */
index 43a713f..b89c34e 100644 (file)
@@ -27,12 +27,8 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid
 }
 
 struct nfs_fattr {
-       unsigned short          valid;          /* which fields are valid */
-       __u64                   pre_size;       /* pre_op_attr.size       */
-       struct timespec         pre_mtime;      /* pre_op_attr.mtime      */
-       struct timespec         pre_ctime;      /* pre_op_attr.ctime      */
-       enum nfs_ftype          type;           /* always use NFSv2 types */
-       __u32                   mode;
+       unsigned int            valid;          /* which fields are valid */
+       umode_t                 mode;
        __u32                   nlink;
        __u32                   uid;
        __u32                   gid;
@@ -52,19 +48,55 @@ struct nfs_fattr {
        struct timespec         atime;
        struct timespec         mtime;
        struct timespec         ctime;
-       __u32                   bitmap[2];      /* NFSv4 returned attribute bitmap */
        __u64                   change_attr;    /* NFSv4 change attribute */
        __u64                   pre_change_attr;/* pre-op NFSv4 change attribute */
+       __u64                   pre_size;       /* pre_op_attr.size       */
+       struct timespec         pre_mtime;      /* pre_op_attr.mtime      */
+       struct timespec         pre_ctime;      /* pre_op_attr.ctime      */
        unsigned long           time_start;
        unsigned long           gencount;
 };
 
-#define NFS_ATTR_WCC           0x0001          /* pre-op WCC data    */
-#define NFS_ATTR_FATTR         0x0002          /* post-op attributes */
-#define NFS_ATTR_FATTR_V3      0x0004          /* NFSv3 attributes */
-#define NFS_ATTR_FATTR_V4      0x0008          /* NFSv4 change attribute */
-#define NFS_ATTR_WCC_V4                0x0010          /* pre-op change attribute */
-#define NFS_ATTR_FATTR_V4_REFERRAL     0x0020          /* NFSv4 referral */
+#define NFS_ATTR_FATTR_TYPE            (1U << 0)
+#define NFS_ATTR_FATTR_MODE            (1U << 1)
+#define NFS_ATTR_FATTR_NLINK           (1U << 2)
+#define NFS_ATTR_FATTR_OWNER           (1U << 3)
+#define NFS_ATTR_FATTR_GROUP           (1U << 4)
+#define NFS_ATTR_FATTR_RDEV            (1U << 5)
+#define NFS_ATTR_FATTR_SIZE            (1U << 6)
+#define NFS_ATTR_FATTR_PRESIZE         (1U << 7)
+#define NFS_ATTR_FATTR_BLOCKS_USED     (1U << 8)
+#define NFS_ATTR_FATTR_SPACE_USED      (1U << 9)
+#define NFS_ATTR_FATTR_FSID            (1U << 10)
+#define NFS_ATTR_FATTR_FILEID          (1U << 11)
+#define NFS_ATTR_FATTR_ATIME           (1U << 12)
+#define NFS_ATTR_FATTR_MTIME           (1U << 13)
+#define NFS_ATTR_FATTR_CTIME           (1U << 14)
+#define NFS_ATTR_FATTR_PREMTIME                (1U << 15)
+#define NFS_ATTR_FATTR_PRECTIME                (1U << 16)
+#define NFS_ATTR_FATTR_CHANGE          (1U << 17)
+#define NFS_ATTR_FATTR_PRECHANGE       (1U << 18)
+#define NFS_ATTR_FATTR_V4_REFERRAL     (1U << 19)      /* NFSv4 referral */
+
+#define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
+               | NFS_ATTR_FATTR_MODE \
+               | NFS_ATTR_FATTR_NLINK \
+               | NFS_ATTR_FATTR_OWNER \
+               | NFS_ATTR_FATTR_GROUP \
+               | NFS_ATTR_FATTR_RDEV \
+               | NFS_ATTR_FATTR_SIZE \
+               | NFS_ATTR_FATTR_FSID \
+               | NFS_ATTR_FATTR_FILEID \
+               | NFS_ATTR_FATTR_ATIME \
+               | NFS_ATTR_FATTR_MTIME \
+               | NFS_ATTR_FATTR_CTIME)
+#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \
+               | NFS_ATTR_FATTR_BLOCKS_USED)
+#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \
+               | NFS_ATTR_FATTR_SPACE_USED)
+#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
+               | NFS_ATTR_FATTR_SPACE_USED \
+               | NFS_ATTR_FATTR_CHANGE)
 
 /*
  * Info on the file system
@@ -836,6 +868,7 @@ struct nfs_rpc_ops {
        int     (*lock)(struct file *, int, struct file_lock *);
        int     (*lock_check_bounds)(const struct file_lock *);
        void    (*clear_acl_cache)(struct inode *);
+       void    (*close_context)(struct nfs_open_context *ctx, int);
 };
 
 /*
index 3435d24..d3a4c02 100644 (file)
@@ -69,7 +69,6 @@ struct svc_serv {
        struct list_head        sv_tempsocks;   /* all temporary sockets */
        int                     sv_tmpcnt;      /* count of temporary sockets */
        struct timer_list       sv_temptimer;   /* timer for aging temporary sockets */
-       sa_family_t             sv_family;      /* listener's address family */
 
        char *                  sv_name;        /* service name */
 
@@ -385,19 +384,19 @@ struct svc_procedure {
 /*
  * Function prototypes.
  */
-struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t,
+struct svc_serv *svc_create(struct svc_program *, unsigned int,
                            void (*shutdown)(struct svc_serv *));
 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
                                        struct svc_pool *pool);
 void              svc_exit_thread(struct svc_rqst *);
 struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
-                       sa_family_t, void (*shutdown)(struct svc_serv *),
+                       void (*shutdown)(struct svc_serv *),
                        svc_thread_fn, struct module *);
 int               svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 void              svc_destroy(struct svc_serv *);
 int               svc_process(struct svc_rqst *);
-int               svc_register(const struct svc_serv *, const unsigned short,
-                               const unsigned short);
+int               svc_register(const struct svc_serv *, const int,
+                               const unsigned short, const unsigned short);
 
 void              svc_wake_up(struct svc_serv *);
 void              svc_reserve(struct svc_rqst *rqstp, int space);
index 0127dac..0d9cb6e 100644 (file)
@@ -71,7 +71,8 @@ int   svc_reg_xprt_class(struct svc_xprt_class *);
 void   svc_unreg_xprt_class(struct svc_xprt_class *);
 void   svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
                      struct svc_serv *);
-int    svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
+int    svc_create_xprt(struct svc_serv *, const char *, const int,
+                       const unsigned short, int);
 void   svc_xprt_enqueue(struct svc_xprt *xprt);
 void   svc_xprt_received(struct svc_xprt *);
 void   svc_xprt_put(struct svc_xprt *xprt);
@@ -80,7 +81,8 @@ void  svc_close_xprt(struct svc_xprt *xprt);
 void   svc_delete_xprt(struct svc_xprt *xprt);
 int    svc_port_is_privileged(struct sockaddr *sin);
 int    svc_print_xprts(char *buf, int maxlen);
-struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int);
+struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
+                       const sa_family_t af, const unsigned short port);
 int    svc_xprt_names(struct svc_serv *serv, char *buf, int buflen);
 
 static inline void svc_xprt_get(struct svc_xprt *xprt)
@@ -88,29 +90,32 @@ static inline void svc_xprt_get(struct svc_xprt *xprt)
        kref_get(&xprt->xpt_ref);
 }
 static inline void svc_xprt_set_local(struct svc_xprt *xprt,
-                                     struct sockaddr *sa, int salen)
+                                     const struct sockaddr *sa,
+                                     const size_t salen)
 {
        memcpy(&xprt->xpt_local, sa, salen);
        xprt->xpt_locallen = salen;
 }
 static inline void svc_xprt_set_remote(struct svc_xprt *xprt,
-                                      struct sockaddr *sa, int salen)
+                                      const struct sockaddr *sa,
+                                      const size_t salen)
 {
        memcpy(&xprt->xpt_remote, sa, salen);
        xprt->xpt_remotelen = salen;
 }
-static inline unsigned short svc_addr_port(struct sockaddr *sa)
+static inline unsigned short svc_addr_port(const struct sockaddr *sa)
 {
-       unsigned short ret = 0;
+       const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
+       const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa;
+
        switch (sa->sa_family) {
        case AF_INET:
-               ret = ntohs(((struct sockaddr_in *)sa)->sin_port);
-               break;
+               return ntohs(sin->sin_port);
        case AF_INET6:
-               ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
-               break;
+               return ntohs(sin6->sin6_port);
        }
-       return ret;
+
+       return 0;
 }
 
 static inline size_t svc_addr_len(struct sockaddr *sa)
@@ -124,36 +129,39 @@ static inline size_t svc_addr_len(struct sockaddr *sa)
        return -EAFNOSUPPORT;
 }
 
-static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt)
+static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt)
 {
-       return svc_addr_port((struct sockaddr *)&xprt->xpt_local);
+       return svc_addr_port((const struct sockaddr *)&xprt->xpt_local);
 }
 
-static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt)
+static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt)
 {
-       return svc_addr_port((struct sockaddr *)&xprt->xpt_remote);
+       return svc_addr_port((const struct sockaddr *)&xprt->xpt_remote);
 }
 
-static inline char *__svc_print_addr(struct sockaddr *addr,
-                                    char *buf, size_t len)
+static inline char *__svc_print_addr(const struct sockaddr *addr,
+                                    char *buf, const size_t len)
 {
+       const struct sockaddr_in *sin = (const struct sockaddr_in *)addr;
+       const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr;
+
        switch (addr->sa_family) {
        case AF_INET:
-               snprintf(buf, len, "%pI4, port=%u",
-                       &((struct sockaddr_in *)addr)->sin_addr,
-                       ntohs(((struct sockaddr_in *) addr)->sin_port));
+               snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr,
+                       ntohs(sin->sin_port));
                break;
 
        case AF_INET6:
                snprintf(buf, len, "%pI6, port=%u",
-                        &((struct sockaddr_in6 *)addr)->sin6_addr,
-                       ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
+                        &sin6->sin6_addr,
+                       ntohs(sin6->sin6_port));
                break;
 
        default:
                snprintf(buf, len, "unknown address type: %d", addr->sa_family);
                break;
        }
+
        return buf;
 }
 #endif /* SUNRPC_SVC_XPRT_H */
index 11fc71d..1758d9f 100644 (file)
@@ -235,6 +235,7 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *
  */
 int                    xprt_register_transport(struct xprt_class *type);
 int                    xprt_unregister_transport(struct xprt_class *type);
+int                    xprt_load_transport(const char *);
 void                   xprt_set_retrans_timeout_def(struct rpc_task *task);
 void                   xprt_set_retrans_timeout_rtt(struct rpc_task *task);
 void                   xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
@@ -259,6 +260,7 @@ void                        xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
 #define XPRT_BOUND             (4)
 #define XPRT_BINDING           (5)
 #define XPRT_CLOSING           (6)
+#define XPRT_CONNECTION_ABORT  (7)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
index 5592883..afd91c7 100644 (file)
@@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA
 
          If unsure, say N.
 
-config SUNRPC_REGISTER_V4
-       bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
-       depends on SUNRPC && EXPERIMENTAL
-       default n
-       help
-         Sun added support for registering RPC services at an IPv6
-         address by creating two new versions of the rpcbind protocol
-         (RFC 1833).
-
-         This option enables support in the kernel RPC server for
-         registering kernel RPC services via version 4 of the rpcbind
-         protocol.  If you enable this option, you must run a portmapper
-         daemon that supports rpcbind protocol version 4.
-
-         Serving NFS over IPv6 from knfsd (the kernel's NFS server)
-         requires that you enable this option and use a portmapper that
-         supports rpcbind version 4.
-
-         If unsure, say N to get traditional behavior (register kernel
-         RPC services using only rpcbind version 2).  Distributions
-         using the legacy Linux portmapper daemon must say N here.
-
 config RPCSEC_GSS_KRB5
        tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
        depends on SUNRPC && EXPERIMENTAL
index 836f15c..5abab09 100644 (file)
@@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task)
        dprint_status(task);
 
        task->tk_status = 0;
-       if (status >= 0) {
+       if (status >= 0 || status == -EAGAIN) {
                clnt->cl_stats->netreconn++;
                task->tk_action = call_transmit;
                return;
        }
 
-       /* Something failed: remote service port may have changed */
-       rpc_force_rebind(clnt);
-
        switch (status) {
-       case -ENOTCONN:
-       case -EAGAIN:
-               task->tk_action = call_bind;
-               if (!RPC_IS_SOFT(task))
-                       return;
                /* if soft mounted, test if we've timed out */
        case -ETIMEDOUT:
                task->tk_action = call_timeout;
-               return;
+               break;
+       default:
+               rpc_exit(task, -EIO);
        }
-       rpc_exit(task, -EIO);
 }
 
 /*
@@ -1105,14 +1098,26 @@ static void
 call_transmit_status(struct rpc_task *task)
 {
        task->tk_action = call_status;
-       /*
-        * Special case: if we've been waiting on the socket's write_space()
-        * callback, then don't call xprt_end_transmit().
-        */
-       if (task->tk_status == -EAGAIN)
-               return;
-       xprt_end_transmit(task);
-       rpc_task_force_reencode(task);
+       switch (task->tk_status) {
+       case -EAGAIN:
+               break;
+       default:
+               xprt_end_transmit(task);
+               /*
+                * Special cases: if we've been waiting on the
+                * socket's write_space() callback, or if the
+                * socket just returned a connection error,
+                * then hold onto the transport lock.
+                */
+       case -ECONNREFUSED:
+       case -ECONNRESET:
+       case -ENOTCONN:
+       case -EHOSTDOWN:
+       case -EHOSTUNREACH:
+       case -ENETUNREACH:
+       case -EPIPE:
+               rpc_task_force_reencode(task);
+       }
 }
 
 /*
@@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task)
                        xprt_conditional_disconnect(task->tk_xprt,
                                        req->rq_connect_cookie);
                break;
+       case -ECONNRESET:
        case -ECONNREFUSED:
-       case -ENOTCONN:
                rpc_force_rebind(clnt);
+               rpc_delay(task, 3*HZ);
+       case -EPIPE:
+       case -ENOTCONN:
                task->tk_action = call_bind;
                break;
        case -EAGAIN:
index 03ae007..beee6da 100644 (file)
@@ -63,9 +63,16 @@ enum {
  * r_owner
  *
  * The "owner" is allowed to unset a service in the rpcbind database.
- * We always use the following (arbitrary) fixed string.
+ *
+ * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a
+ * UID which it maps to a local user name via a password lookup.
+ * In all other cases it is ignored.
+ *
+ * For SET/UNSET requests, user space provides a value, even for
+ * network requests, and GETADDR uses an empty string.  We follow
+ * those precedents here.
  */
-#define RPCB_OWNER_STRING      "rpcb"
+#define RPCB_OWNER_STRING      "0"
 #define RPCB_MAXOWNERLEN       sizeof(RPCB_OWNER_STRING)
 
 static void                    rpcb_getport_done(struct rpc_task *, void *);
@@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
        .sin_port               = htons(RPCBIND_PORT),
 };
 
-static const struct sockaddr_in6 rpcb_in6addr_loopback = {
-       .sin6_family            = AF_INET6,
-       .sin6_addr              = IN6ADDR_LOOPBACK_INIT,
-       .sin6_port              = htons(RPCBIND_PORT),
-};
-
 static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
                                          size_t addrlen, u32 version)
 {
@@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
        return rpc_create(&args);
 }
 
-static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
-                             u32 version, struct rpc_message *msg)
+static int rpcb_register_call(const u32 version, struct rpc_message *msg)
 {
+       struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback;
+       size_t addrlen = sizeof(rpcb_inaddr_loopback);
        struct rpc_clnt *rpcb_clnt;
        int result, error = 0;
 
@@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
                error = PTR_ERR(rpcb_clnt);
 
        if (error < 0) {
-               printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+               dprintk("RPC:       failed to contact local rpcbind "
                                "server (errno %d).\n", -error);
                return error;
        }
@@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
        if (port)
                msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
 
-       return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
-                                       sizeof(rpcb_inaddr_loopback),
-                                       RPCBVERS_2, &msg);
+       return rpcb_register_call(RPCBVERS_2, &msg);
 }
 
 /*
  * Fill in AF_INET family-specific arguments to register
  */
-static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
-                               struct rpc_message *msg)
+static int rpcb_register_inet4(const struct sockaddr *sap,
+                              struct rpc_message *msg)
 {
+       const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
        struct rpcbind_args *map = msg->rpc_argp;
-       unsigned short port = ntohs(address_to_register->sin_port);
+       unsigned short port = ntohs(sin->sin_port);
        char buf[32];
 
        /* Construct AF_INET universal address */
        snprintf(buf, sizeof(buf), "%pI4.%u.%u",
-                &address_to_register->sin_addr.s_addr,
-                port >> 8, port & 0xff);
+                &sin->sin_addr.s_addr, port >> 8, port & 0xff);
        map->r_addr = buf;
 
        dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
@@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
        if (port)
                msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
 
-       return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
-                                       sizeof(rpcb_inaddr_loopback),
-                                       RPCBVERS_4, msg);
+       return rpcb_register_call(RPCBVERS_4, msg);
 }
 
 /*
  * Fill in AF_INET6 family-specific arguments to register
  */
-static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
-                               struct rpc_message *msg)
+static int rpcb_register_inet6(const struct sockaddr *sap,
+                              struct rpc_message *msg)
 {
+       const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
        struct rpcbind_args *map = msg->rpc_argp;
-       unsigned short port = ntohs(address_to_register->sin6_port);
+       unsigned short port = ntohs(sin6->sin6_port);
        char buf[64];
 
        /* Construct AF_INET6 universal address */
-       if (ipv6_addr_any(&address_to_register->sin6_addr))
+       if (ipv6_addr_any(&sin6->sin6_addr))
                snprintf(buf, sizeof(buf), "::.%u.%u",
                                port >> 8, port & 0xff);
        else
                snprintf(buf, sizeof(buf), "%pI6.%u.%u",
-                        &address_to_register->sin6_addr,
-                        port >> 8, port & 0xff);
+                        &sin6->sin6_addr, port >> 8, port & 0xff);
        map->r_addr = buf;
 
        dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
@@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
        if (port)
                msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
 
-       return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
-                                       sizeof(rpcb_in6addr_loopback),
-                                       RPCBVERS_4, msg);
+       return rpcb_register_call(RPCBVERS_4, msg);
+}
+
+static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
+{
+       struct rpcbind_args *map = msg->rpc_argp;
+
+       dprintk("RPC:       unregistering [%u, %u, '%s'] with "
+               "local rpcbind\n",
+                       map->r_prog, map->r_vers, map->r_netid);
+
+       map->r_addr = "";
+       msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
+
+       return rpcb_register_call(RPCBVERS_4, msg);
 }
 
 /**
@@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
  * invoke this function once for each [program, version, address,
  * netid] tuple they wish to advertise.
  *
- * Callers may also unregister RPC services that are no longer
- * available by setting the port number in the passed-in address
- * to zero.  Callers pass a netid of "" to unregister all
- * transport netids associated with [program, version, address].
+ * Callers may also unregister RPC services that are registered at a
+ * specific address by setting the port number in @address to zero.
+ * They may unregister all registered protocol families at once for
+ * a service by passing a NULL @address argument.  If @netid is ""
+ * then all netids for [program, version, address] are unregistered.
  *
  * This function uses rpcbind protocol version 4 to contact the
  * local rpcbind daemon.  The local rpcbind daemon must support
@@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version,
                .rpc_argp       = &map,
        };
 
+       if (address == NULL)
+               return rpcb_unregister_all_protofamilies(&msg);
+
        switch (address->sa_family) {
        case AF_INET:
-               return rpcb_register_netid4((struct sockaddr_in *)address,
-                                           &msg);
+               return rpcb_register_inet4(address, &msg);
        case AF_INET6:
-               return rpcb_register_netid6((struct sockaddr_in6 *)address,
-                                           &msg);
+               return rpcb_register_inet6(address, &msg);
        }
 
        return -EAFNOSUPPORT;
@@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task)
        map->r_xprt = xprt_get(xprt);
        map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
        map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
-       map->r_owner = RPCB_OWNER_STRING;       /* ignored for GETADDR */
+       map->r_owner = "";
        map->r_status = -EIO;
 
        child = rpcb_call_async(rpcb_clnt, map, proc);
@@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
        *portp = 0;
        addr_len = ntohl(*p++);
 
+       if (addr_len == 0) {
+               dprintk("RPC:       rpcb_decode_getaddr: "
+                                       "service is not registered\n");
+               return 0;
+       }
+
        /*
-        * Simple sanity check.  The smallest possible universal
-        * address is an IPv4 address string containing 11 bytes.
+        * Simple sanity check.
         */
-       if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
+       if (addr_len > RPCBIND_MAXUADDRLEN)
                goto out_err;
 
        /*
index bb507e2..9f2f241 100644 (file)
@@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
  */
 static struct svc_serv *
 __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-          sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+            void (*shutdown)(struct svc_serv *serv))
 {
        struct svc_serv *serv;
        unsigned int vers;
@@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
        if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
                return NULL;
-       serv->sv_family    = family;
        serv->sv_name      = prog->pg_name;
        serv->sv_program   = prog;
        serv->sv_nrthreads = 1;
@@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 struct svc_serv *
 svc_create(struct svc_program *prog, unsigned int bufsize,
-               sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+          void (*shutdown)(struct svc_serv *serv))
 {
-       return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
+       return __svc_create(prog, bufsize, /*npools*/1, shutdown);
 }
 EXPORT_SYMBOL_GPL(svc_create);
 
 struct svc_serv *
 svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-                 sa_family_t family, void (*shutdown)(struct svc_serv *serv),
+                 void (*shutdown)(struct svc_serv *serv),
                  svc_thread_fn func, struct module *mod)
 {
        struct svc_serv *serv;
        unsigned int npools = svc_pool_map_get();
 
-       serv = __svc_create(prog, bufsize, npools, family, shutdown);
+       serv = __svc_create(prog, bufsize, npools, shutdown);
 
        if (serv != NULL) {
                serv->sv_function = func;
@@ -719,8 +718,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
 }
 EXPORT_SYMBOL_GPL(svc_exit_thread);
 
-#ifdef CONFIG_SUNRPC_REGISTER_V4
-
 /*
  * Register an "inet" protocol family netid with the local
  * rpcbind daemon via an rpcbind v4 SET request.
@@ -735,12 +732,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
                                const unsigned short protocol,
                                const unsigned short port)
 {
-       struct sockaddr_in sin = {
+       const struct sockaddr_in sin = {
                .sin_family             = AF_INET,
                .sin_addr.s_addr        = htonl(INADDR_ANY),
                .sin_port               = htons(port),
        };
-       char *netid;
+       const char *netid;
+       int error;
 
        switch (protocol) {
        case IPPROTO_UDP:
@@ -750,13 +748,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
                netid = RPCBIND_NETID_TCP;
                break;
        default:
-               return -EPROTONOSUPPORT;
+               return -ENOPROTOOPT;
        }
 
-       return rpcb_v4_register(program, version,
-                               (struct sockaddr *)&sin, netid);
+       error = rpcb_v4_register(program, version,
+                                       (const struct sockaddr *)&sin, netid);
+
+       /*
+        * User space didn't support rpcbind v4, so retry this
+        * registration request with the legacy rpcbind v2 protocol.
+        */
+       if (error == -EPROTONOSUPPORT)
+               error = rpcb_register(program, version, protocol, port);
+
+       return error;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 /*
  * Register an "inet6" protocol family netid with the local
  * rpcbind daemon via an rpcbind v4 SET request.
@@ -771,12 +779,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
                                const unsigned short protocol,
                                const unsigned short port)
 {
-       struct sockaddr_in6 sin6 = {
+       const struct sockaddr_in6 sin6 = {
                .sin6_family            = AF_INET6,
                .sin6_addr              = IN6ADDR_ANY_INIT,
                .sin6_port              = htons(port),
        };
-       char *netid;
+       const char *netid;
+       int error;
 
        switch (protocol) {
        case IPPROTO_UDP:
@@ -786,12 +795,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
                netid = RPCBIND_NETID_TCP6;
                break;
        default:
-               return -EPROTONOSUPPORT;
+               return -ENOPROTOOPT;
        }
 
-       return rpcb_v4_register(program, version,
-                               (struct sockaddr *)&sin6, netid);
+       error = rpcb_v4_register(program, version,
+                                       (const struct sockaddr *)&sin6, netid);
+
+       /*
+        * User space didn't support rpcbind version 4, so we won't
+        * use a PF_INET6 listener.
+        */
+       if (error == -EPROTONOSUPPORT)
+               error = -EAFNOSUPPORT;
+
+       return error;
 }
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 
 /*
  * Register a kernel RPC service via rpcbind version 4.
@@ -799,69 +818,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
  * Returns zero on success; a negative errno value is returned
  * if any error occurs.
  */
-static int __svc_register(const u32 program, const u32 version,
-                         const sa_family_t family,
+static int __svc_register(const char *progname,
+                         const u32 program, const u32 version,
+                         const int family,
                          const unsigned short protocol,
                          const unsigned short port)
 {
-       int error;
+       int error = -EAFNOSUPPORT;
 
        switch (family) {
-       case AF_INET:
-               return __svc_rpcb_register4(program, version,
+       case PF_INET:
+               error = __svc_rpcb_register4(program, version,
                                                protocol, port);
-       case AF_INET6:
+               break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+       case PF_INET6:
                error = __svc_rpcb_register6(program, version,
                                                protocol, port);
-               if (error < 0)
-                       return error;
-
-               /*
-                * Work around bug in some versions of Linux rpcbind
-                * which don't allow registration of both inet and
-                * inet6 netids.
-                *
-                * Error return ignored for now.
-                */
-               __svc_rpcb_register4(program, version,
-                                               protocol, port);
-               return 0;
+#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
        }
 
-       return -EAFNOSUPPORT;
-}
-
-#else  /* CONFIG_SUNRPC_REGISTER_V4 */
-
-/*
- * Register a kernel RPC service via rpcbind version 2.
- *
- * Returns zero on success; a negative errno value is returned
- * if any error occurs.
- */
-static int __svc_register(const u32 program, const u32 version,
-                         sa_family_t family,
-                         const unsigned short protocol,
-                         const unsigned short port)
-{
-       if (family != AF_INET)
-               return -EAFNOSUPPORT;
-
-       return rpcb_register(program, version, protocol, port);
+       if (error < 0)
+               printk(KERN_WARNING "svc: failed to register %sv%u RPC "
+                       "service (errno %d).\n", progname, version, -error);
+       return error;
 }
 
-#endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
 /**
  * svc_register - register an RPC service with the local portmapper
  * @serv: svc_serv struct for the service to register
+ * @family: protocol family of service's listener socket
  * @proto: transport protocol number to advertise
  * @port: port to advertise
  *
- * Service is registered for any address in serv's address family
+ * Service is registered for any address in the passed-in protocol family
  */
-int svc_register(const struct svc_serv *serv, const unsigned short proto,
-                const unsigned short port)
+int svc_register(const struct svc_serv *serv, const int family,
+                const unsigned short proto, const unsigned short port)
 {
        struct svc_program      *progp;
        unsigned int            i;
@@ -879,15 +872,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
                                        i,
                                        proto == IPPROTO_UDP?  "udp" : "tcp",
                                        port,
-                                       serv->sv_family,
+                                       family,
                                        progp->pg_vers[i]->vs_hidden?
                                                " (but not telling portmap)" : "");
 
                        if (progp->pg_vers[i]->vs_hidden)
                                continue;
 
-                       error = __svc_register(progp->pg_prog, i,
-                                               serv->sv_family, proto, port);
+                       error = __svc_register(progp->pg_name, progp->pg_prog,
+                                               i, family, proto, port);
                        if (error < 0)
                                break;
                }
@@ -896,38 +889,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
        return error;
 }
 
-#ifdef CONFIG_SUNRPC_REGISTER_V4
-
+/*
+ * If user space is running rpcbind, it should take the v4 UNSET
+ * and clear everything for this [program, version].  If user space
+ * is running portmap, it will reject the v4 UNSET, but won't have
+ * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
+ * in this case to clear all existing entries for [program, version].
+ */
 static void __svc_unregister(const u32 program, const u32 version,
                             const char *progname)
 {
-       struct sockaddr_in6 sin6 = {
-               .sin6_family            = AF_INET6,
-               .sin6_addr              = IN6ADDR_ANY_INIT,
-               .sin6_port              = 0,
-       };
        int error;
 
-       error = rpcb_v4_register(program, version,
-                               (struct sockaddr *)&sin6, "");
-       dprintk("svc: %s(%sv%u), error %d\n",
-                       __func__, progname, version, error);
-}
-
-#else  /* CONFIG_SUNRPC_REGISTER_V4 */
+       error = rpcb_v4_register(program, version, NULL, "");
 
-static void __svc_unregister(const u32 program, const u32 version,
-                            const char *progname)
-{
-       int error;
+       /*
+        * User space didn't support rpcbind v4, so retry this
+        * request with the legacy rpcbind v2 protocol.
+        */
+       if (error == -EPROTONOSUPPORT)
+               error = rpcb_register(program, version, 0, 0);
 
-       error = rpcb_register(program, version, 0, 0);
        dprintk("svc: %s(%sv%u), error %d\n",
                        __func__, progname, version, error);
 }
 
-#endif /* CONFIG_SUNRPC_REGISTER_V4 */
-
 /*
  * All netids, bind addresses and ports registered for [program, version]
  * are removed from the local rpcbind database (if the service is not
index e588df5..2819ee0 100644 (file)
@@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init);
 
 static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
                                         struct svc_serv *serv,
-                                        unsigned short port, int flags)
+                                        const int family,
+                                        const unsigned short port,
+                                        int flags)
 {
        struct sockaddr_in sin = {
                .sin_family             = AF_INET,
@@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
        struct sockaddr *sap;
        size_t len;
 
-       switch (serv->sv_family) {
-       case AF_INET:
+       switch (family) {
+       case PF_INET:
                sap = (struct sockaddr *)&sin;
                len = sizeof(sin);
                break;
-       case AF_INET6:
+       case PF_INET6:
                sap = (struct sockaddr *)&sin6;
                len = sizeof(sin6);
                break;
@@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
        return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
 }
 
-int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
+int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+                   const int family, const unsigned short port,
                    int flags)
 {
        struct svc_xprt_class *xcl;
@@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
                        goto err;
 
                spin_unlock(&svc_xprt_class_lock);
-               newxprt = __svc_xpo_create(xcl, serv, port, flags);
+               newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
                if (IS_ERR(newxprt)) {
                        module_put(xcl->xcl_owner);
                        return PTR_ERR(newxprt);
@@ -1033,7 +1036,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
        return dr;
 }
 
-/*
+/**
+ * svc_find_xprt - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @af: Address family of transport's local address
+ * @port: transport's IP port number
+ *
  * Return the transport instance pointer for the endpoint accepting
  * connections/peer traffic from the specified transport class,
  * address family and port.
@@ -1042,14 +1051,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
  * wild-card, and will result in matching the first transport in the
  * service's list that has a matching class name.
  */
-struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
-                              int af, int port)
+struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
+                              const sa_family_t af, const unsigned short port)
 {
        struct svc_xprt *xprt;
        struct svc_xprt *found = NULL;
 
        /* Sanity check the args */
-       if (!serv || !xcl_name)
+       if (serv == NULL || xcl_name == NULL)
                return found;
 
        spin_lock_bh(&serv->sv_lock);
@@ -1058,7 +1067,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
                        continue;
                if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
                        continue;
-               if (port && port != svc_xprt_local_port(xprt))
+               if (port != 0 && port != svc_xprt_local_port(xprt))
                        continue;
                found = xprt;
                svc_xprt_get(xprt);
index 5763e64..9d50423 100644 (file)
@@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
        struct svc_sock *svsk;
        struct sock     *inet;
        int             pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
-       int             val;
 
        dprintk("svc: svc_setup_socket %p\n", sock);
        if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 
        /* Register socket with portmapper */
        if (*errp >= 0 && pmap_register)
-               *errp = svc_register(serv, inet->sk_protocol,
+               *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
                                     ntohs(inet_sk(inet)->sport));
 
        if (*errp < 0) {
@@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
        else
                svc_tcp_init(svsk, serv);
 
-       /*
-        * We start one listener per sv_serv.  We want AF_INET
-        * requests to be automatically shunted to our AF_INET6
-        * listener using a mapped IPv4 address.  Make sure
-        * no-one starts an equivalent IPv4 listener, which
-        * would steal our incoming connections.
-        */
-       val = 0;
-       if (serv->sv_family == AF_INET6)
-               kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
-                                       (char *)&val, sizeof(val));
-
        dprintk("svc: svc_setup_socket created %p (inet %p)\n",
                                svsk, svsk->sk_sk);
 
@@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
        struct sockaddr_storage addr;
        struct sockaddr *newsin = (struct sockaddr *)&addr;
        int             newlen;
+       int             family;
+       int             val;
        RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 
        dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
                                "sockets supported\n");
                return ERR_PTR(-EINVAL);
        }
+
        type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
+       switch (sin->sa_family) {
+       case AF_INET6:
+               family = PF_INET6;
+               break;
+       case AF_INET:
+               family = PF_INET;
+               break;
+       default:
+               return ERR_PTR(-EINVAL);
+       }
 
-       error = sock_create_kern(sin->sa_family, type, protocol, &sock);
+       error = sock_create_kern(family, type, protocol, &sock);
        if (error < 0)
                return ERR_PTR(error);
 
        svc_reclassify_socket(sock);
 
+       /*
+        * If this is an PF_INET6 listener, we want to avoid
+        * getting requests from IPv4 remotes.  Those should
+        * be shunted to a PF_INET listener via rpcbind.
+        */
+       val = 1;
+       if (family == PF_INET6)
+               kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
+                                       (char *)&val, sizeof(val));
+
        if (type == SOCK_STREAM)
                sock->sk->sk_reuse = 1;         /* allow address reuse */
        error = kernel_bind(sock, sin, len);
index 62098d1..a0bfe53 100644 (file)
@@ -151,6 +151,37 @@ out:
 }
 EXPORT_SYMBOL_GPL(xprt_unregister_transport);
 
+/**
+ * xprt_load_transport - load a transport implementation
+ * @transport_name: transport to load
+ *
+ * Returns:
+ * 0:          transport successfully loaded
+ * -ENOENT:    transport module not available
+ */
+int xprt_load_transport(const char *transport_name)
+{
+       struct xprt_class *t;
+       char module_name[sizeof t->name + 5];
+       int result;
+
+       result = 0;
+       spin_lock(&xprt_list_lock);
+       list_for_each_entry(t, &xprt_list, list) {
+               if (strcmp(t->name, transport_name) == 0) {
+                       spin_unlock(&xprt_list_lock);
+                       goto out;
+               }
+       }
+       spin_unlock(&xprt_list_lock);
+       strcpy(module_name, "xprt");
+       strncat(module_name, transport_name, sizeof t->name);
+       result = request_module(module_name);
+out:
+       return result;
+}
+EXPORT_SYMBOL_GPL(xprt_load_transport);
+
 /**
  * xprt_reserve_xprt - serialize write access to transports
  * @task: task that is requesting access to the transport
@@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
        dprintk("RPC:       disconnected transport %p\n", xprt);
        spin_lock_bh(&xprt->transport_lock);
        xprt_clear_connected(xprt);
-       xprt_wake_pending_tasks(xprt, -ENOTCONN);
+       xprt_wake_pending_tasks(xprt, -EAGAIN);
        spin_unlock_bh(&xprt->transport_lock);
 }
 EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
        /* Try to schedule an autoclose RPC call */
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
                queue_work(rpciod_workqueue, &xprt->task_cleanup);
-       xprt_wake_pending_tasks(xprt, -ENOTCONN);
+       xprt_wake_pending_tasks(xprt, -EAGAIN);
        spin_unlock_bh(&xprt->transport_lock);
 }
 
@@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
        /* Try to schedule an autoclose RPC call */
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
                queue_work(rpciod_workqueue, &xprt->task_cleanup);
-       xprt_wake_pending_tasks(xprt, -ENOTCONN);
+       xprt_wake_pending_tasks(xprt, -EAGAIN);
 out:
        spin_unlock_bh(&xprt->transport_lock);
 }
@@ -695,9 +726,8 @@ static void xprt_connect_status(struct rpc_task *task)
        }
 
        switch (task->tk_status) {
-       case -ENOTCONN:
-               dprintk("RPC: %5u xprt_connect_status: connection broken\n",
-                               task->tk_pid);
+       case -EAGAIN:
+               dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
                break;
        case -ETIMEDOUT:
                dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
@@ -818,15 +848,8 @@ int xprt_prepare_transmit(struct rpc_task *task)
                err = req->rq_received;
                goto out_unlock;
        }
-       if (!xprt->ops->reserve_xprt(task)) {
+       if (!xprt->ops->reserve_xprt(task))
                err = -EAGAIN;
-               goto out_unlock;
-       }
-
-       if (!xprt_connected(xprt)) {
-               err = -ENOTCONN;
-               goto out_unlock;
-       }
 out_unlock:
        spin_unlock_bh(&xprt->transport_lock);
        return err;
@@ -870,32 +893,26 @@ void xprt_transmit(struct rpc_task *task)
        req->rq_connect_cookie = xprt->connect_cookie;
        req->rq_xtime = jiffies;
        status = xprt->ops->send_request(task);
-       if (status == 0) {
-               dprintk("RPC: %5u xmit complete\n", task->tk_pid);
-               spin_lock_bh(&xprt->transport_lock);
+       if (status != 0) {
+               task->tk_status = status;
+               return;
+       }
 
-               xprt->ops->set_retrans_timeout(task);
+       dprintk("RPC: %5u xmit complete\n", task->tk_pid);
+       spin_lock_bh(&xprt->transport_lock);
 
-               xprt->stat.sends++;
-               xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
-               xprt->stat.bklog_u += xprt->backlog.qlen;
+       xprt->ops->set_retrans_timeout(task);
 
-               /* Don't race with disconnect */
-               if (!xprt_connected(xprt))
-                       task->tk_status = -ENOTCONN;
-               else if (!req->rq_received)
-                       rpc_sleep_on(&xprt->pending, task, xprt_timer);
-               spin_unlock_bh(&xprt->transport_lock);
-               return;
-       }
+       xprt->stat.sends++;
+       xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
+       xprt->stat.bklog_u += xprt->backlog.qlen;
 
-       /* Note: at this point, task->tk_sleeping has not yet been set,
-        *       hence there is no danger of the waking up task being put on
-        *       schedq, and being picked up by a parallel run of rpciod().
-        */
-       task->tk_status = status;
-       if (status == -ECONNREFUSED)
-               rpc_sleep_on(&xprt->sending, task, NULL);
+       /* Don't race with disconnect */
+       if (!xprt_connected(xprt))
+               task->tk_status = -ENOTCONN;
+       else if (!req->rq_received)
+               rpc_sleep_on(&xprt->pending, task, xprt_timer);
+       spin_unlock_bh(&xprt->transport_lock);
 }
 
 static inline void do_xprt_reserve(struct rpc_task *task)
index 14106d2..e5e28d1 100644 (file)
@@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
                __func__, pad, destp, rqst->rq_slen, curlen);
 
        copy_len = rqst->rq_snd_buf.page_len;
+
+       if (rqst->rq_snd_buf.tail[0].iov_len) {
+               curlen = rqst->rq_snd_buf.tail[0].iov_len;
+               if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
+                       memmove(destp + copy_len,
+                               rqst->rq_snd_buf.tail[0].iov_base, curlen);
+                       r_xprt->rx_stats.pullup_copy_count += curlen;
+               }
+               dprintk("RPC:       %s: tail destp 0x%p len %d\n",
+                       __func__, destp + copy_len, curlen);
+               rqst->rq_svec[0].iov_len += curlen;
+       }
+
        r_xprt->rx_stats.pullup_copy_count += copy_len;
        npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
        for (i = 0; copy_len && i < npages; i++) {
@@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
                destp += curlen;
                copy_len -= curlen;
        }
-       if (rqst->rq_snd_buf.tail[0].iov_len) {
-               curlen = rqst->rq_snd_buf.tail[0].iov_len;
-               if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
-                       memcpy(destp,
-                               rqst->rq_snd_buf.tail[0].iov_base, curlen);
-                       r_xprt->rx_stats.pullup_copy_count += curlen;
-               }
-               dprintk("RPC:       %s: tail destp 0x%p len %d curlen %d\n",
-                       __func__, destp, copy_len, curlen);
-               rqst->rq_svec[0].iov_len += curlen;
-       }
        /* header now contains entire send message */
        return pad;
 }
@@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
                if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
                        curlen = rqst->rq_rcv_buf.tail[0].iov_len;
                if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
-                       memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
+                       memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
                dprintk("RPC:       %s: tail srcp 0x%p len %d curlen %d\n",
                        __func__, srcp, copy_len, curlen);
                rqst->rq_rcv_buf.tail[0].iov_len = curlen;
index a3334e3..6c26a67 100644 (file)
@@ -191,7 +191,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
                   struct xdr_buf *xdr,
                   struct svc_rdma_req_map *vec)
 {
-       int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
        int sge_no;
        u32 sge_bytes;
        u32 page_bytes;
@@ -235,7 +234,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
                sge_no++;
        }
 
-       BUG_ON(sge_no > sge_max);
+       dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+               "page_base %u page_len %u head_len %zu tail_len %zu\n",
+               sge_no, page_no, xdr->page_base, xdr->page_len,
+               xdr->head[0].iov_len, xdr->tail[0].iov_len);
+
        vec->count = sge_no;
        return 0;
 }
@@ -579,7 +582,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
                        ctxt->sge[page_no+1].length = 0;
        }
        BUG_ON(sge_no > rdma->sc_max_sge);
-       BUG_ON(sge_no > ctxt->count);
        memset(&send_wr, 0, sizeof send_wr);
        ctxt->wr_op = IB_WR_SEND;
        send_wr.wr_id = (unsigned long)ctxt;
index 568330e..d40ff50 100644 (file)
@@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
 unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
 unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
 
+#define XS_TCP_LINGER_TO       (15U * HZ)
+static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
+
 /*
  * We can register our own files under /proc/sys/sunrpc by
  * calling register_sysctl_table() again.  The files in that
@@ -116,6 +119,14 @@ static ctl_table xs_tunables_table[] = {
                .extra1         = &xprt_min_resvport_limit,
                .extra2         = &xprt_max_resvport_limit
        },
+       {
+               .procname       = "tcp_fin_timeout",
+               .data           = &xs_tcp_fin_timeout,
+               .maxlen         = sizeof(xs_tcp_fin_timeout),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+               .strategy       = sysctl_jiffies
+       },
        {
                .ctl_name = 0,
        },
@@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task)
  * @task: task to put to sleep
  *
  */
-static void xs_nospace(struct rpc_task *task)
+static int xs_nospace(struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = req->rq_xprt;
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       int ret = 0;
 
        dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
                        task->tk_pid, req->rq_slen - req->rq_bytes_sent,
@@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task)
        /* Don't race with disconnect */
        if (xprt_connected(xprt)) {
                if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+                       ret = -EAGAIN;
                        /*
                         * Notify TCP that we're limited by the application
                         * window size
@@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task)
                }
        } else {
                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               task->tk_status = -ENOTCONN;
+               ret = -ENOTCONN;
        }
 
        spin_unlock_bh(&xprt->transport_lock);
+       return ret;
 }
 
 /**
@@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task)
                /* Still some bytes left; set up for a retry later. */
                status = -EAGAIN;
        }
+       if (!transport->sock)
+               goto out;
 
        switch (status) {
        case -ENOTSOCK:
@@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task)
                /* Should we call xs_close() here? */
                break;
        case -EAGAIN:
-               xs_nospace(task);
+               status = xs_nospace(task);
                break;
+       default:
+               dprintk("RPC:       sendmsg returned unrecognized error %d\n",
+                       -status);
        case -ENETUNREACH:
        case -EPIPE:
        case -ECONNREFUSED:
                /* When the server has died, an ICMP port unreachable message
                 * prompts ECONNREFUSED. */
                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               break;
-       default:
-               clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               dprintk("RPC:       sendmsg returned unrecognized error %d\n",
-                       -status);
        }
-
+out:
        return status;
 }
 
@@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
                status = -EAGAIN;
                break;
        }
+       if (!transport->sock)
+               goto out;
 
        switch (status) {
        case -ENOTSOCK:
@@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task)
                /* Should we call xs_close() here? */
                break;
        case -EAGAIN:
-               xs_nospace(task);
+               status = xs_nospace(task);
                break;
+       default:
+               dprintk("RPC:       sendmsg returned unrecognized error %d\n",
+                       -status);
        case -ECONNRESET:
+       case -EPIPE:
                xs_tcp_shutdown(xprt);
        case -ECONNREFUSED:
        case -ENOTCONN:
-       case -EPIPE:
-               status = -ENOTCONN;
-               clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               break;
-       default:
-               dprintk("RPC:       sendmsg returned unrecognized error %d\n",
-                       -status);
                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               xs_tcp_shutdown(xprt);
        }
-
+out:
        return status;
 }
 
@@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
        sk->sk_error_report = transport->old_error_report;
 }
 
-/**
- * xs_close - close a socket
- * @xprt: transport
- *
- * This is used when all requests are complete; ie, no DRC state remains
- * on the server we want to save.
- */
-static void xs_close(struct rpc_xprt *xprt)
+static void xs_reset_transport(struct sock_xprt *transport)
 {
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct socket *sock = transport->sock;
        struct sock *sk = transport->inet;
 
-       if (!sk)
-               goto clear_close_wait;
-
-       dprintk("RPC:       xs_close xprt %p\n", xprt);
+       if (sk == NULL)
+               return;
 
        write_lock_bh(&sk->sk_callback_lock);
        transport->inet = NULL;
@@ -797,8 +799,25 @@ static void xs_close(struct rpc_xprt *xprt)
        sk->sk_no_check = 0;
 
        sock_release(sock);
-clear_close_wait:
+}
+
+/**
+ * xs_close - close a socket
+ * @xprt: transport
+ *
+ * This is used when all requests are complete; ie, no DRC state remains
+ * on the server we want to save.
+ */
+static void xs_close(struct rpc_xprt *xprt)
+{
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+       dprintk("RPC:       xs_close xprt %p\n", xprt);
+
+       xs_reset_transport(transport);
+
        smp_mb__before_clear_bit();
+       clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
        clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
        clear_bit(XPRT_CLOSING, &xprt->state);
        smp_mb__after_clear_bit();
@@ -1126,6 +1145,47 @@ out:
        read_unlock(&sk->sk_callback_lock);
 }
 
+/*
+ * Do the equivalent of linger/linger2 handling for dealing with
+ * broken servers that don't close the socket in a timely
+ * fashion
+ */
+static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
+               unsigned long timeout)
+{
+       struct sock_xprt *transport;
+
+       if (xprt_test_and_set_connecting(xprt))
+               return;
+       set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+       transport = container_of(xprt, struct sock_xprt, xprt);
+       queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
+                          timeout);
+}
+
+static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
+{
+       struct sock_xprt *transport;
+
+       transport = container_of(xprt, struct sock_xprt, xprt);
+
+       if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
+           !cancel_delayed_work(&transport->connect_worker))
+               return;
+       clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+       xprt_clear_connecting(xprt);
+}
+
+static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+{
+       smp_mb__before_clear_bit();
+       clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+       clear_bit(XPRT_CLOSING, &xprt->state);
+       smp_mb__after_clear_bit();
+       /* Mark transport as closed and wake up all pending tasks */
+       xprt_disconnect_done(xprt);
+}
+
 /**
  * xs_tcp_state_change - callback to handle TCP socket state changes
  * @sk: socket whose state has changed
@@ -1158,7 +1218,7 @@ static void xs_tcp_state_change(struct sock *sk)
                        transport->tcp_flags =
                                TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
 
-                       xprt_wake_pending_tasks(xprt, 0);
+                       xprt_wake_pending_tasks(xprt, -EAGAIN);
                }
                spin_unlock_bh(&xprt->transport_lock);
                break;
@@ -1171,10 +1231,10 @@ static void xs_tcp_state_change(struct sock *sk)
                clear_bit(XPRT_CONNECTED, &xprt->state);
                clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
                smp_mb__after_clear_bit();
+               xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
                break;
        case TCP_CLOSE_WAIT:
                /* The server initiated a shutdown of the socket */
-               set_bit(XPRT_CLOSING, &xprt->state);
                xprt_force_disconnect(xprt);
        case TCP_SYN_SENT:
                xprt->connect_cookie++;
@@ -1187,40 +1247,35 @@ static void xs_tcp_state_change(struct sock *sk)
                        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
                break;
        case TCP_LAST_ACK:
+               set_bit(XPRT_CLOSING, &xprt->state);
+               xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
                smp_mb__before_clear_bit();
                clear_bit(XPRT_CONNECTED, &xprt->state);
                smp_mb__after_clear_bit();
                break;
        case TCP_CLOSE:
-               smp_mb__before_clear_bit();
-               clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
-               clear_bit(XPRT_CLOSING, &xprt->state);
-               smp_mb__after_clear_bit();
-               /* Mark transport as closed and wake up all pending tasks */
-               xprt_disconnect_done(xprt);
+               xs_tcp_cancel_linger_timeout(xprt);
+               xs_sock_mark_closed(xprt);
        }
  out:
        read_unlock(&sk->sk_callback_lock);
 }
 
 /**
- * xs_tcp_error_report - callback mainly for catching RST events
+ * xs_error_report - callback mainly for catching socket errors
  * @sk: socket
  */
-static void xs_tcp_error_report(struct sock *sk)
+static void xs_error_report(struct sock *sk)
 {
        struct rpc_xprt *xprt;
 
        read_lock(&sk->sk_callback_lock);
-       if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
-               goto out;
        if (!(xprt = xprt_from_sock(sk)))
                goto out;
        dprintk("RPC:       %s client %p...\n"
                        "RPC:       error %d\n",
                        __func__, xprt, sk->sk_err);
-
-       xprt_force_disconnect(xprt);
+       xprt_wake_pending_tasks(xprt, -EAGAIN);
 out:
        read_unlock(&sk->sk_callback_lock);
 }
@@ -1494,6 +1549,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                sk->sk_user_data = xprt;
                sk->sk_data_ready = xs_udp_data_ready;
                sk->sk_write_space = xs_udp_write_space;
+               sk->sk_error_report = xs_error_report;
                sk->sk_no_check = UDP_CSUM_NORCV;
                sk->sk_allocation = GFP_ATOMIC;
 
@@ -1526,9 +1582,10 @@ static void xs_udp_connect_worker4(struct work_struct *work)
                goto out;
 
        /* Start by resetting any existing state */
-       xs_close(xprt);
+       xs_reset_transport(transport);
 
-       if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+       err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+       if (err < 0) {
                dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
                goto out;
        }
@@ -1545,8 +1602,8 @@ static void xs_udp_connect_worker4(struct work_struct *work)
        xs_udp_finish_connecting(xprt, sock);
        status = 0;
 out:
-       xprt_wake_pending_tasks(xprt, status);
        xprt_clear_connecting(xprt);
+       xprt_wake_pending_tasks(xprt, status);
 }
 
 /**
@@ -1567,9 +1624,10 @@ static void xs_udp_connect_worker6(struct work_struct *work)
                goto out;
 
        /* Start by resetting any existing state */
-       xs_close(xprt);
+       xs_reset_transport(transport);
 
-       if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+       err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
+       if (err < 0) {
                dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
                goto out;
        }
@@ -1586,18 +1644,17 @@ static void xs_udp_connect_worker6(struct work_struct *work)
        xs_udp_finish_connecting(xprt, sock);
        status = 0;
 out:
-       xprt_wake_pending_tasks(xprt, status);
        xprt_clear_connecting(xprt);
+       xprt_wake_pending_tasks(xprt, status);
 }
 
 /*
  * We need to preserve the port number so the reply cache on the server can
  * find our cached RPC replies when we get around to reconnecting.
  */
-static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
+static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
 {
        int result;
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct sockaddr any;
 
        dprintk("RPC:       disconnecting xprt %p to reuse port\n", xprt);
@@ -1609,11 +1666,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
        memset(&any, 0, sizeof(any));
        any.sa_family = AF_UNSPEC;
        result = kernel_connect(transport->sock, &any, sizeof(any), 0);
-       if (result)
+       if (!result)
+               xs_sock_mark_closed(xprt);
+       else
                dprintk("RPC:       AF_UNSPEC connect return code %d\n",
                                result);
 }
 
+static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
+{
+       unsigned int state = transport->inet->sk_state;
+
+       if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
+               return;
+       if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
+               return;
+       xs_abort_connection(xprt, transport);
+}
+
 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1629,7 +1699,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                sk->sk_data_ready = xs_tcp_data_ready;
                sk->sk_state_change = xs_tcp_state_change;
                sk->sk_write_space = xs_tcp_write_space;
-               sk->sk_error_report = xs_tcp_error_report;
+               sk->sk_error_report = xs_error_report;
                sk->sk_allocation = GFP_ATOMIC;
 
                /* socket options */
@@ -1657,37 +1727,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 }
 
 /**
- * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
- * @work: RPC transport to connect
+ * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
+ * @xprt: RPC transport to connect
+ * @transport: socket transport to connect
+ * @create_sock: function to create a socket of the correct type
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_tcp_connect_worker4(struct work_struct *work)
+static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
+               struct sock_xprt *transport,
+               struct socket *(*create_sock)(struct rpc_xprt *,
+                       struct sock_xprt *))
 {
-       struct sock_xprt *transport =
-               container_of(work, struct sock_xprt, connect_worker.work);
-       struct rpc_xprt *xprt = &transport->xprt;
        struct socket *sock = transport->sock;
-       int err, status = -EIO;
+       int status = -EIO;
 
        if (xprt->shutdown)
                goto out;
 
        if (!sock) {
-               /* start from scratch */
-               if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
-                       dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
+               clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+               sock = create_sock(xprt, transport);
+               if (IS_ERR(sock)) {
+                       status = PTR_ERR(sock);
                        goto out;
                }
-               xs_reclassify_socket4(sock);
+       } else {
+               int abort_and_exit;
 
-               if (xs_bind4(transport, sock) < 0) {
-                       sock_release(sock);
-                       goto out;
-               }
-       } else
+               abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
+                               &xprt->state);
                /* "close" the socket, preserving the local port */
-               xs_tcp_reuse_connection(xprt);
+               xs_tcp_reuse_connection(xprt, transport);
+
+               if (abort_and_exit)
+                       goto out_eagain;
+       }
 
        dprintk("RPC:       worker connecting xprt %p to address: %s\n",
                        xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
@@ -1696,83 +1771,104 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
        dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
                        xprt, -status, xprt_connected(xprt),
                        sock->sk->sk_state);
-       if (status < 0) {
-               switch (status) {
-                       case -EINPROGRESS:
-                       case -EALREADY:
-                               goto out_clear;
-                       case -ECONNREFUSED:
-                       case -ECONNRESET:
-                               /* retry with existing socket, after a delay */
-                               break;
-                       default:
-                               /* get rid of existing socket, and retry */
-                               xs_tcp_shutdown(xprt);
-               }
+       switch (status) {
+       case -ECONNREFUSED:
+       case -ECONNRESET:
+       case -ENETUNREACH:
+               /* retry with existing socket, after a delay */
+       case 0:
+       case -EINPROGRESS:
+       case -EALREADY:
+               xprt_clear_connecting(xprt);
+               return;
        }
+       /* get rid of existing socket, and retry */
+       xs_tcp_shutdown(xprt);
+       printk("%s: connect returned unhandled error %d\n",
+                       __func__, status);
+out_eagain:
+       status = -EAGAIN;
 out:
-       xprt_wake_pending_tasks(xprt, status);
-out_clear:
        xprt_clear_connecting(xprt);
+       xprt_wake_pending_tasks(xprt, status);
+}
+
+static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
+               struct sock_xprt *transport)
+{
+       struct socket *sock;
+       int err;
+
+       /* start from scratch */
+       err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+       if (err < 0) {
+               dprintk("RPC:       can't create TCP transport socket (%d).\n",
+                               -err);
+               goto out_err;
+       }
+       xs_reclassify_socket4(sock);
+
+       if (xs_bind4(transport, sock) < 0) {
+               sock_release(sock);
+               goto out_err;
+       }
+       return sock;
+out_err:
+       return ERR_PTR(-EIO);
 }
 
 /**
- * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
  * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_tcp_connect_worker6(struct work_struct *work)
+static void xs_tcp_connect_worker4(struct work_struct *work)
 {
        struct sock_xprt *transport =
                container_of(work, struct sock_xprt, connect_worker.work);
        struct rpc_xprt *xprt = &transport->xprt;
-       struct socket *sock = transport->sock;
-       int err, status = -EIO;
 
-       if (xprt->shutdown)
-               goto out;
+       xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
+}
 
-       if (!sock) {
-               /* start from scratch */
-               if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
-                       dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
-                       goto out;
-               }
-               xs_reclassify_socket6(sock);
+static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
+               struct sock_xprt *transport)
+{
+       struct socket *sock;
+       int err;
+
+       /* start from scratch */
+       err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
+       if (err < 0) {
+               dprintk("RPC:       can't create TCP transport socket (%d).\n",
+                               -err);
+               goto out_err;
+       }
+       xs_reclassify_socket6(sock);
 
-               if (xs_bind6(transport, sock) < 0) {
-                       sock_release(sock);
-                       goto out;
-               }
-       } else
-               /* "close" the socket, preserving the local port */
-               xs_tcp_reuse_connection(xprt);
+       if (xs_bind6(transport, sock) < 0) {
+               sock_release(sock);
+               goto out_err;
+       }
+       return sock;
+out_err:
+       return ERR_PTR(-EIO);
+}
 
-       dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-                       xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+/**
+ * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker6(struct work_struct *work)
+{
+       struct sock_xprt *transport =
+               container_of(work, struct sock_xprt, connect_worker.work);
+       struct rpc_xprt *xprt = &transport->xprt;
 
-       status = xs_tcp_finish_connecting(xprt, sock);
-       dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
-                       xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
-       if (status < 0) {
-               switch (status) {
-                       case -EINPROGRESS:
-                       case -EALREADY:
-                               goto out_clear;
-                       case -ECONNREFUSED:
-                       case -ECONNRESET:
-                               /* retry with existing socket, after a delay */
-                               break;
-                       default:
-                               /* get rid of existing socket, and retry */
-                               xs_tcp_shutdown(xprt);
-               }
-       }
-out:
-       xprt_wake_pending_tasks(xprt, status);
-out_clear:
-       xprt_clear_connecting(xprt);
+       xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
 }
 
 /**
@@ -1817,9 +1913,6 @@ static void xs_tcp_connect(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_xprt;
 
-       /* Initiate graceful shutdown of the socket if not already done */
-       if (test_bit(XPRT_CONNECTED, &xprt->state))
-               xs_tcp_shutdown(xprt);
        /* Exit if we need to wait for socket shutdown to complete */
        if (test_bit(XPRT_CLOSING, &xprt->state))
                return;