Merge tag 'fs.setgid.v6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner...
[linux-2.6-microblaze.git] / fs / namei.c
index 6e86ccc..53b4bc0 100644 (file)
@@ -567,7 +567,7 @@ struct nameidata {
        struct path     root;
        struct inode    *inode; /* path.dentry.d_inode */
        unsigned int    flags, state;
-       unsigned        seq, m_seq, r_seq;
+       unsigned        seq, next_seq, m_seq, r_seq;
        int             last_type;
        unsigned        depth;
        int             total_link_count;
@@ -665,6 +665,13 @@ static void drop_links(struct nameidata *nd)
        }
 }
 
+static void leave_rcu(struct nameidata *nd)
+{
+       nd->flags &= ~LOOKUP_RCU;
+       nd->seq = nd->next_seq = 0;
+       rcu_read_unlock();
+}
+
 static void terminate_walk(struct nameidata *nd)
 {
        drop_links(nd);
@@ -678,8 +685,7 @@ static void terminate_walk(struct nameidata *nd)
                        nd->state &= ~ND_ROOT_GRABBED;
                }
        } else {
-               nd->flags &= ~LOOKUP_RCU;
-               rcu_read_unlock();
+               leave_rcu(nd);
        }
        nd->depth = 0;
        nd->path.mnt = NULL;
@@ -765,14 +771,13 @@ static bool try_to_unlazy(struct nameidata *nd)
 
        BUG_ON(!(nd->flags & LOOKUP_RCU));
 
-       nd->flags &= ~LOOKUP_RCU;
        if (unlikely(!legitimize_links(nd)))
                goto out1;
        if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
                goto out;
        if (unlikely(!legitimize_root(nd)))
                goto out;
-       rcu_read_unlock();
+       leave_rcu(nd);
        BUG_ON(nd->inode != parent->d_inode);
        return true;
 
@@ -780,7 +785,7 @@ out1:
        nd->path.mnt = NULL;
        nd->path.dentry = NULL;
 out:
-       rcu_read_unlock();
+       leave_rcu(nd);
        return false;
 }
 
@@ -788,7 +793,6 @@ out:
  * try_to_unlazy_next - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
  * @dentry: next dentry to step into
- * @seq: seq number to check @dentry against
  * Returns: true on success, false on failure
  *
  * Similar to try_to_unlazy(), but here we have the next dentry already
@@ -797,15 +801,19 @@ out:
  * Nothing should touch nameidata between try_to_unlazy_next() failure and
  * terminate_walk().
  */
-static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
 {
+       int res;
        BUG_ON(!(nd->flags & LOOKUP_RCU));
 
-       nd->flags &= ~LOOKUP_RCU;
        if (unlikely(!legitimize_links(nd)))
                goto out2;
-       if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
-               goto out2;
+       res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
+       if (unlikely(res)) {
+               if (res > 0)
+                       goto out2;
+               goto out1;
+       }
        if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
                goto out1;
 
@@ -818,7 +826,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
         */
        if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
                goto out;
-       if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
+       if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
                goto out_dput;
        /*
         * Sequence counts matched. Now make sure that the root is
@@ -826,7 +834,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
         */
        if (unlikely(!legitimize_root(nd)))
                goto out_dput;
-       rcu_read_unlock();
+       leave_rcu(nd);
        return true;
 
 out2:
@@ -834,10 +842,10 @@ out2:
 out1:
        nd->path.dentry = NULL;
 out:
-       rcu_read_unlock();
+       leave_rcu(nd);
        return false;
 out_dput:
-       rcu_read_unlock();
+       leave_rcu(nd);
        dput(dentry);
        return false;
 }
@@ -962,7 +970,7 @@ static int nd_jump_root(struct nameidata *nd)
                d = nd->path.dentry;
                nd->inode = d->d_inode;
                nd->seq = nd->root_seq;
-               if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
+               if (read_seqcount_retry(&d->d_seq, nd->seq))
                        return -ECHILD;
        } else {
                path_put(&nd->path);
@@ -1466,8 +1474,7 @@ EXPORT_SYMBOL(follow_down);
  * Try to skip to top of mountpoint pile in rcuwalk mode.  Fail if
  * we meet a managed dentry that would need blocking.
  */
-static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
-                              struct inode **inode, unsigned *seqp)
+static bool __follow_mount_rcu(struct nameidata *nd, struct path *path)
 {
        struct dentry *dentry = path->dentry;
        unsigned int flags = dentry->d_flags;
@@ -1496,15 +1503,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
                                path->mnt = &mounted->mnt;
                                dentry = path->dentry = mounted->mnt.mnt_root;
                                nd->state |= ND_JUMPED;
-                               *seqp = read_seqcount_begin(&dentry->d_seq);
-                               *inode = dentry->d_inode;
-                               /*
-                                * We don't need to re-check ->d_seq after this
-                                * ->d_inode read - there will be an RCU delay
-                                * between mount hash removal and ->mnt_root
-                                * becoming unpinned.
-                                */
+                               nd->next_seq = read_seqcount_begin(&dentry->d_seq);
                                flags = dentry->d_flags;
+                               // makes sure that non-RCU pathwalk could reach
+                               // this state.
+                               if (read_seqretry(&mount_lock, nd->m_seq))
+                                       return false;
                                continue;
                        }
                        if (read_seqretry(&mount_lock, nd->m_seq))
@@ -1515,8 +1519,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 }
 
 static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
-                         struct path *path, struct inode **inode,
-                         unsigned int *seqp)
+                         struct path *path)
 {
        bool jumped;
        int ret;
@@ -1524,16 +1527,15 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
        path->mnt = nd->path.mnt;
        path->dentry = dentry;
        if (nd->flags & LOOKUP_RCU) {
-               unsigned int seq = *seqp;
-               if (unlikely(!*inode))
-                       return -ENOENT;
-               if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
+               unsigned int seq = nd->next_seq;
+               if (likely(__follow_mount_rcu(nd, path)))
                        return 0;
-               if (!try_to_unlazy_next(nd, dentry, seq))
-                       return -ECHILD;
-               // *path might've been clobbered by __follow_mount_rcu()
+               // *path and nd->next_seq might've been clobbered
                path->mnt = nd->path.mnt;
                path->dentry = dentry;
+               nd->next_seq = seq;
+               if (!try_to_unlazy_next(nd, dentry))
+                       return -ECHILD;
        }
        ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
        if (jumped) {
@@ -1546,9 +1548,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
                dput(path->dentry);
                if (path->mnt != nd->path.mnt)
                        mntput(path->mnt);
-       } else {
-               *inode = d_backing_inode(path->dentry);
-               *seqp = 0; /* out of RCU mode, so the value doesn't matter */
        }
        return ret;
 }
@@ -1607,9 +1606,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
        return dentry;
 }
 
-static struct dentry *lookup_fast(struct nameidata *nd,
-                                 struct inode **inode,
-                                 unsigned *seqp)
+static struct dentry *lookup_fast(struct nameidata *nd)
 {
        struct dentry *dentry, *parent = nd->path.dentry;
        int status = 1;
@@ -1620,37 +1617,24 @@ static struct dentry *lookup_fast(struct nameidata *nd,
         * going to fall back to non-racy lookup.
         */
        if (nd->flags & LOOKUP_RCU) {
-               unsigned seq;
-               dentry = __d_lookup_rcu(parent, &nd->last, &seq);
+               dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
                if (unlikely(!dentry)) {
                        if (!try_to_unlazy(nd))
                                return ERR_PTR(-ECHILD);
                        return NULL;
                }
 
-               /*
-                * This sequence count validates that the inode matches
-                * the dentry name information from lookup.
-                */
-               *inode = d_backing_inode(dentry);
-               if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
-                       return ERR_PTR(-ECHILD);
-
                /*
                 * This sequence count validates that the parent had no
                 * changes while we did the lookup of the dentry above.
-                *
-                * The memory barrier in read_seqcount_begin of child is
-                *  enough, we can use __read_seqcount_retry here.
                 */
-               if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
+               if (read_seqcount_retry(&parent->d_seq, nd->seq))
                        return ERR_PTR(-ECHILD);
 
-               *seqp = seq;
                status = d_revalidate(dentry, nd->flags);
                if (likely(status > 0))
                        return dentry;
-               if (!try_to_unlazy_next(nd, dentry, seq))
+               if (!try_to_unlazy_next(nd, dentry))
                        return ERR_PTR(-ECHILD);
                if (status == -ECHILD)
                        /* we'd been told to redo it in non-rcu mode */
@@ -1731,7 +1715,7 @@ static inline int may_lookup(struct user_namespace *mnt_userns,
        return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
 }
 
-static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
+static int reserve_stack(struct nameidata *nd, struct path *link)
 {
        if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
                return -ELOOP;
@@ -1746,7 +1730,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
        if (nd->flags & LOOKUP_RCU) {
                // we need to grab link before we do unlazy.  And we can't skip
                // unlazy even if we fail to grab the link - cleanup needs it
-               bool grabbed_link = legitimize_path(nd, link, seq);
+               bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
 
                if (!try_to_unlazy(nd) || !grabbed_link)
                        return -ECHILD;
@@ -1760,11 +1744,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
 enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
 
 static const char *pick_link(struct nameidata *nd, struct path *link,
-                    struct inode *inode, unsigned seq, int flags)
+                    struct inode *inode, int flags)
 {
        struct saved *last;
        const char *res;
-       int error = reserve_stack(nd, link, seq);
+       int error = reserve_stack(nd, link);
 
        if (unlikely(error)) {
                if (!(nd->flags & LOOKUP_RCU))
@@ -1774,7 +1758,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
        last = nd->stack + nd->depth++;
        last->link = *link;
        clear_delayed_call(&last->done);
-       last->seq = seq;
+       last->seq = nd->next_seq;
 
        if (flags & WALK_TRAILING) {
                error = may_follow_link(nd, inode);
@@ -1836,43 +1820,50 @@ all_done: // pure jump
  * to do this check without having to look at inode->i_op,
  * so we keep a cache of "no, this doesn't need follow_link"
  * for the common case.
+ *
+ * NOTE: dentry must be what nd->next_seq had been sampled from.
  */
 static const char *step_into(struct nameidata *nd, int flags,
-                    struct dentry *dentry, struct inode *inode, unsigned seq)
+                    struct dentry *dentry)
 {
        struct path path;
-       int err = handle_mounts(nd, dentry, &path, &inode, &seq);
+       struct inode *inode;
+       int err = handle_mounts(nd, dentry, &path);
 
        if (err < 0)
                return ERR_PTR(err);
+       inode = path.dentry->d_inode;
        if (likely(!d_is_symlink(path.dentry)) ||
           ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
           (flags & WALK_NOFOLLOW)) {
                /* not a symlink or should not follow */
-               if (!(nd->flags & LOOKUP_RCU)) {
+               if (nd->flags & LOOKUP_RCU) {
+                       if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
+                               return ERR_PTR(-ECHILD);
+                       if (unlikely(!inode))
+                               return ERR_PTR(-ENOENT);
+               } else {
                        dput(nd->path.dentry);
                        if (nd->path.mnt != path.mnt)
                                mntput(nd->path.mnt);
                }
                nd->path = path;
                nd->inode = inode;
-               nd->seq = seq;
+               nd->seq = nd->next_seq;
                return NULL;
        }
        if (nd->flags & LOOKUP_RCU) {
                /* make sure that d_is_symlink above matches inode */
-               if (read_seqcount_retry(&path.dentry->d_seq, seq))
+               if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
                        return ERR_PTR(-ECHILD);
        } else {
                if (path.mnt == nd->path.mnt)
                        mntget(path.mnt);
        }
-       return pick_link(nd, &path, inode, seq, flags);
+       return pick_link(nd, &path, inode, flags);
 }
 
-static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
-                                       struct inode **inodep,
-                                       unsigned *seqp)
+static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
 {
        struct dentry *parent, *old;
 
@@ -1889,30 +1880,30 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
                nd->path = path;
                nd->inode = path.dentry->d_inode;
                nd->seq = seq;
-               if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+               // makes sure that non-RCU pathwalk could reach this state
+               if (read_seqretry(&mount_lock, nd->m_seq))
                        return ERR_PTR(-ECHILD);
                /* we know that mountpoint was pinned */
        }
        old = nd->path.dentry;
        parent = old->d_parent;
-       *inodep = parent->d_inode;
-       *seqp = read_seqcount_begin(&parent->d_seq);
-       if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
+       nd->next_seq = read_seqcount_begin(&parent->d_seq);
+       // makes sure that non-RCU pathwalk could reach this state
+       if (read_seqcount_retry(&old->d_seq, nd->seq))
                return ERR_PTR(-ECHILD);
        if (unlikely(!path_connected(nd->path.mnt, parent)))
                return ERR_PTR(-ECHILD);
        return parent;
 in_root:
-       if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+       if (read_seqretry(&mount_lock, nd->m_seq))
                return ERR_PTR(-ECHILD);
        if (unlikely(nd->flags & LOOKUP_BENEATH))
                return ERR_PTR(-ECHILD);
-       return NULL;
+       nd->next_seq = nd->seq;
+       return nd->path.dentry;
 }
 
-static struct dentry *follow_dotdot(struct nameidata *nd,
-                                struct inode **inodep,
-                                unsigned *seqp)
+static struct dentry *follow_dotdot(struct nameidata *nd)
 {
        struct dentry *parent;
 
@@ -1936,15 +1927,12 @@ static struct dentry *follow_dotdot(struct nameidata *nd,
                dput(parent);
                return ERR_PTR(-ENOENT);
        }
-       *seqp = 0;
-       *inodep = parent->d_inode;
        return parent;
 
 in_root:
        if (unlikely(nd->flags & LOOKUP_BENEATH))
                return ERR_PTR(-EXDEV);
-       dget(nd->path.dentry);
-       return NULL;
+       return dget(nd->path.dentry);
 }
 
 static const char *handle_dots(struct nameidata *nd, int type)
@@ -1952,8 +1940,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
        if (type == LAST_DOTDOT) {
                const char *error = NULL;
                struct dentry *parent;
-               struct inode *inode;
-               unsigned seq;
 
                if (!nd->root.mnt) {
                        error = ERR_PTR(set_root(nd));
@@ -1961,17 +1947,12 @@ static const char *handle_dots(struct nameidata *nd, int type)
                                return error;
                }
                if (nd->flags & LOOKUP_RCU)
-                       parent = follow_dotdot_rcu(nd, &inode, &seq);
+                       parent = follow_dotdot_rcu(nd);
                else
-                       parent = follow_dotdot(nd, &inode, &seq);
+                       parent = follow_dotdot(nd);
                if (IS_ERR(parent))
                        return ERR_CAST(parent);
-               if (unlikely(!parent))
-                       error = step_into(nd, WALK_NOFOLLOW,
-                                        nd->path.dentry, nd->inode, nd->seq);
-               else
-                       error = step_into(nd, WALK_NOFOLLOW,
-                                        parent, inode, seq);
+               error = step_into(nd, WALK_NOFOLLOW, parent);
                if (unlikely(error))
                        return error;
 
@@ -1983,9 +1964,9 @@ static const char *handle_dots(struct nameidata *nd, int type)
                         * some fallback).
                         */
                        smp_rmb();
-                       if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
+                       if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq))
                                return ERR_PTR(-EAGAIN);
-                       if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
+                       if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq))
                                return ERR_PTR(-EAGAIN);
                }
        }
@@ -1995,8 +1976,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
 static const char *walk_component(struct nameidata *nd, int flags)
 {
        struct dentry *dentry;
-       struct inode *inode;
-       unsigned seq;
        /*
         * "." and ".." are special - ".." especially so because it has
         * to be able to know about the current root directory and
@@ -2007,7 +1986,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
                        put_link(nd);
                return handle_dots(nd, nd->last_type);
        }
-       dentry = lookup_fast(nd, &inode, &seq);
+       dentry = lookup_fast(nd);
        if (IS_ERR(dentry))
                return ERR_CAST(dentry);
        if (unlikely(!dentry)) {
@@ -2017,7 +1996,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
        }
        if (!(flags & WALK_MORE) && nd->depth)
                put_link(nd);
-       return step_into(nd, flags, dentry, inode, seq);
+       return step_into(nd, flags, dentry);
 }
 
 /*
@@ -2372,6 +2351,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
                flags &= ~LOOKUP_RCU;
        if (flags & LOOKUP_RCU)
                rcu_read_lock();
+       else
+               nd->seq = nd->next_seq = 0;
 
        nd->flags = flags;
        nd->state |= ND_JUMPED;
@@ -2473,8 +2454,8 @@ static int handle_lookup_down(struct nameidata *nd)
 {
        if (!(nd->flags & LOOKUP_RCU))
                dget(nd->path.dentry);
-       return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
-                       nd->path.dentry, nd->inode, nd->seq));
+       nd->next_seq = nd->seq;
+       return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
 }
 
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -3451,8 +3432,6 @@ static const char *open_last_lookups(struct nameidata *nd,
        struct dentry *dir = nd->path.dentry;
        int open_flag = op->open_flag;
        bool got_write = false;
-       unsigned seq;
-       struct inode *inode;
        struct dentry *dentry;
        const char *res;
 
@@ -3468,7 +3447,7 @@ static const char *open_last_lookups(struct nameidata *nd,
                if (nd->last.name[nd->last.len])
                        nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
                /* we _can_ be in RCU mode here */
-               dentry = lookup_fast(nd, &inode, &seq);
+               dentry = lookup_fast(nd);
                if (IS_ERR(dentry))
                        return ERR_CAST(dentry);
                if (likely(dentry))
@@ -3522,7 +3501,7 @@ static const char *open_last_lookups(struct nameidata *nd,
 finish_lookup:
        if (nd->depth)
                put_link(nd);
-       res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
+       res = step_into(nd, WALK_TRAILING, dentry);
        if (unlikely(res))
                nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
        return res;