namei: introduce struct renamedata

[linux-2.6-microblaze.git] / fs / namei.c
diff --git a/fs/namei.c b/fs/namei.c

index 03d0e11..38ab518 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -259,7 +259,24 @@ void putname(struct filename *name)
                 __putname(name);
  }
  
-static int check_acl(struct inode *inode, int mask)
+/**
+ * check_acl - perform ACL permission checking
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @inode:     inode to check permissions on
+ * @mask:      right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
+ * This function performs the ACL permission checking. Since this function
+ * retrieve POSIX acls it needs to know whether it is called from a blocking or
+ * non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+static int check_acl(struct user_namespace *mnt_userns,
+                    struct inode *inode, int mask)
  {
  #ifdef CONFIG_FS_POSIX_ACL
         struct posix_acl *acl;
@@ -271,14 +288,14 @@ static int check_acl(struct inode *inode, int mask)
                 /* no ->get_acl() calls in RCU mode... */
                 if (is_uncached_acl(acl))
                         return -ECHILD;
-               return posix_acl_permission(inode, acl, mask);
+               return posix_acl_permission(mnt_userns, inode, acl, mask);
         }
  
         acl = get_acl(inode, ACL_TYPE_ACCESS);
         if (IS_ERR(acl))
                 return PTR_ERR(acl);
         if (acl) {
-               int error = posix_acl_permission(inode, acl, mask);
+               int error = posix_acl_permission(mnt_userns, inode, acl, mask);
                 posix_acl_release(acl);
                 return error;
         }
@@ -287,18 +304,31 @@ static int check_acl(struct inode *inode, int mask)
         return -EAGAIN;
  }
  
-/*
- * This does the basic UNIX permission checking.
+/**
+ * acl_permission_check - perform basic UNIX permission checking
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @inode:     inode to check permissions on
+ * @mask:      right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
+ * This function performs the basic UNIX permission checking. Since this
+ * function may retrieve POSIX acls it needs to know whether it is called from a
+ * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
   *
- * Note that the POSIX ACL check cares about the MAY_NOT_BLOCK bit,
- * for RCU walking.
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
   */
-static int acl_permission_check(struct inode *inode, int mask)
+static int acl_permission_check(struct user_namespace *mnt_userns,
+                               struct inode *inode, int mask)
  {
         unsigned int mode = inode->i_mode;
+       kuid_t i_uid;
  
         /* Are we the owner? If so, ACL's don't matter */
-       if (likely(uid_eq(current_fsuid(), inode->i_uid))) {
+       i_uid = i_uid_into_mnt(mnt_userns, inode);
+       if (likely(uid_eq(current_fsuid(), i_uid))) {
                 mask &= 7;
                 mode >>= 6;
                 return (mask & ~mode) ? -EACCES : 0;
@@ -306,7 +336,7 @@ static int acl_permission_check(struct inode *inode, int mask)
  
         /* Do we have ACL's? */
         if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
-               int error = check_acl(inode, mask);
+               int error = check_acl(mnt_userns, inode, mask);
                 if (error != -EAGAIN)
                         return error;
         }
@@ -320,7 +350,8 @@ static int acl_permission_check(struct inode *inode, int mask)
          * about? Need to check group ownership if so.
          */
         if (mask & (mode ^ (mode >> 3))) {
-               if (in_group_p(inode->i_gid))
+               kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+               if (in_group_p(kgid))
                         mode >>= 3;
         }
  
@@ -330,6 +361,7 @@ static int acl_permission_check(struct inode *inode, int mask)
  
  /**
   * generic_permission -  check for access rights on a Posix-like filesystem
+ * @mnt_userns:        user namespace of the mount the inode was found from
   * @inode:     inode to check access rights for
   * @mask:      right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
   *             %MAY_NOT_BLOCK ...)
@@ -342,25 +374,33 @@ static int acl_permission_check(struct inode *inode, int mask)
   * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
   * request cannot be satisfied (eg. requires blocking or too much complexity).
   * It would then be called again in ref-walk mode.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
   */
-int generic_permission(struct inode *inode, int mask)
+int generic_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                      int mask)
  {
         int ret;
  
         /*
          * Do the basic permission checks.
          */
-       ret = acl_permission_check(inode, mask);
+       ret = acl_permission_check(mnt_userns, inode, mask);
         if (ret != -EACCES)
                 return ret;
  
         if (S_ISDIR(inode->i_mode)) {
                 /* DACs are overridable for directories */
                 if (!(mask & MAY_WRITE))
-                       if (capable_wrt_inode_uidgid(inode,
+                       if (capable_wrt_inode_uidgid(mnt_userns, inode,
                                                      CAP_DAC_READ_SEARCH))
                                 return 0;
-               if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
+               if (capable_wrt_inode_uidgid(mnt_userns, inode,
+                                            CAP_DAC_OVERRIDE))
                         return 0;
                 return -EACCES;
         }
@@ -370,7 +410,8 @@ int generic_permission(struct inode *inode, int mask)
          */
         mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
         if (mask == MAY_READ)
-               if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
+               if (capable_wrt_inode_uidgid(mnt_userns, inode,
+                                            CAP_DAC_READ_SEARCH))
                         return 0;
         /*
          * Read/write DACs are always overridable.
@@ -378,20 +419,27 @@ int generic_permission(struct inode *inode, int mask)
          * at least one exec bit set.
          */
         if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
-               if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
+               if (capable_wrt_inode_uidgid(mnt_userns, inode,
+                                            CAP_DAC_OVERRIDE))
                         return 0;
  
         return -EACCES;
  }
  EXPORT_SYMBOL(generic_permission);
  
-/*
+/**
+ * do_inode_permission - UNIX permission checking
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @inode:     inode to check permissions on
+ * @mask:      right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
   * We _really_ want to just do "generic_permission()" without
   * even looking at the inode->i_op values. So we keep a cache
   * flag in inode->i_opflags, that says "this has not special
   * permission function, use the fast case".
   */
-static inline int do_inode_permission(struct inode *inode, int mask)
+static inline int do_inode_permission(struct user_namespace *mnt_userns,
+                                     struct inode *inode, int mask)
  {
         if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
                 if (likely(inode->i_op->permission))
@@ -402,7 +450,7 @@ static inline int do_inode_permission(struct inode *inode, int mask)
                 inode->i_opflags |= IOP_FASTPERM;
                 spin_unlock(&inode->i_lock);
         }
-       return generic_permission(inode, mask);
+       return generic_permission(mnt_userns, inode, mask);
  }
  
  /**
@@ -427,8 +475,9 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
  
  /**
   * inode_permission - Check for access rights to a given inode
- * @inode: Inode to check permission on
- * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ * @mnt_userns:        User namespace of the mount the inode was found from
+ * @inode:     Inode to check permission on
+ * @mask:      Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
   *
   * Check for read/write/execute permissions on an inode.  We use fs[ug]id for
   * this, letting us set arbitrary permissions for filesystem access without
@@ -436,7 +485,8 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
   *
   * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
   */
-int inode_permission(struct inode *inode, int mask)
+int inode_permission(struct user_namespace *mnt_userns,
+                    struct inode *inode, int mask)
  {
         int retval;
  
@@ -456,11 +506,11 @@ int inode_permission(struct inode *inode, int mask)
                  * written back improperly if their true value is unknown
                  * to the vfs.
                  */
-               if (HAS_UNMAPPED_ID(inode))
+               if (HAS_UNMAPPED_ID(mnt_userns, inode))
                         return -EACCES;
         }
  
-       retval = do_inode_permission(inode, mask);
+       retval = do_inode_permission(mnt_userns, inode, mask);
         if (retval)
                 return retval;
  
@@ -954,11 +1004,16 @@ int sysctl_protected_regular __read_mostly;
   */
  static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
  {
+       struct user_namespace *mnt_userns;
+       kuid_t i_uid;
+
         if (!sysctl_protected_symlinks)
                 return 0;
  
+       mnt_userns = mnt_user_ns(nd->path.mnt);
+       i_uid = i_uid_into_mnt(mnt_userns, inode);
         /* Allowed if owner and follower match. */
-       if (uid_eq(current_cred()->fsuid, inode->i_uid))
+       if (uid_eq(current_cred()->fsuid, i_uid))
                 return 0;
  
         /* Allowed if parent directory not sticky and world-writable. */
@@ -966,7 +1021,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
                 return 0;
  
         /* Allowed if parent directory and link owner match. */
-       if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
+       if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, i_uid))
                 return 0;
  
         if (nd->flags & LOOKUP_RCU)
@@ -979,6 +1034,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
  
  /**
   * safe_hardlink_source - Check for safe hardlink conditions
+ * @mnt_userns:        user namespace of the mount the inode was found from
   * @inode: the source inode to hardlink from
   *
   * Return false if at least one of the following conditions:
@@ -989,7 +1045,8 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
   *
   * Otherwise returns true.
   */
-static bool safe_hardlink_source(struct inode *inode)
+static bool safe_hardlink_source(struct user_namespace *mnt_userns,
+                                struct inode *inode)
  {
         umode_t mode = inode->i_mode;
  
@@ -1006,7 +1063,7 @@ static bool safe_hardlink_source(struct inode *inode)
                 return false;
  
         /* Hardlinking to unreadable or unwritable sources is dangerous. */
-       if (inode_permission(inode, MAY_READ | MAY_WRITE))
+       if (inode_permission(mnt_userns, inode, MAY_READ | MAY_WRITE))
                 return false;
  
         return true;
@@ -1014,6 +1071,7 @@ static bool safe_hardlink_source(struct inode *inode)
  
  /**
   * may_linkat - Check permissions for creating a hardlink
+ * @mnt_userns:        user namespace of the mount the inode was found from
   * @link: the source to hardlink from
   *
   * Block hardlink when all of:
@@ -1022,14 +1080,21 @@ static bool safe_hardlink_source(struct inode *inode)
   *  - hardlink source is unsafe (see safe_hardlink_source() above)
   *  - not CAP_FOWNER in a namespace with the inode owner uid mapped
   *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ *
   * Returns 0 if successful, -ve on error.
   */
-int may_linkat(struct path *link)
+int may_linkat(struct user_namespace *mnt_userns, struct path *link)
  {
         struct inode *inode = link->dentry->d_inode;
  
         /* Inode writeback is not safe when the uid or gid are invalid. */
-       if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+       if (!uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
+           !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
                 return -EOVERFLOW;
  
         if (!sysctl_protected_hardlinks)
@@ -1038,7 +1103,8 @@ int may_linkat(struct path *link)
         /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
          * otherwise, it must be a safe source.
          */
-       if (safe_hardlink_source(inode) || inode_owner_or_capable(inode))
+       if (safe_hardlink_source(mnt_userns, inode) ||
+           inode_owner_or_capable(mnt_userns, inode))
                 return 0;
  
         audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
@@ -1049,6 +1115,7 @@ int may_linkat(struct path *link)
   * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
   *                       should be allowed, or not, on files that already
   *                       exist.
+ * @mnt_userns:        user namespace of the mount the inode was found from
   * @dir_mode: mode bits of directory
   * @dir_uid: owner of directory
   * @inode: the inode of the file to open
@@ -1064,16 +1131,25 @@ int may_linkat(struct path *link)
   * the directory doesn't have to be world writable: being group writable will
   * be enough.
   *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ *
   * Returns 0 if the open is allowed, -ve on error.
   */
-static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
-                               struct inode * const inode)
+static int may_create_in_sticky(struct user_namespace *mnt_userns,
+                               struct nameidata *nd, struct inode *const inode)
  {
+       umode_t dir_mode = nd->dir_mode;
+       kuid_t dir_uid = nd->dir_uid;
+
         if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
             (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
             likely(!(dir_mode & S_ISVTX)) ||
-           uid_eq(inode->i_uid, dir_uid) ||
-           uid_eq(current_fsuid(), inode->i_uid))
+           uid_eq(i_uid_into_mnt(mnt_userns, inode), dir_uid) ||
+           uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)))
                 return 0;
  
         if (likely(dir_mode & 0002) ||
@@ -1563,16 +1639,18 @@ static struct dentry *lookup_slow(const struct qstr *name,
         return res;
  }
  
-static inline int may_lookup(struct nameidata *nd)
+static inline int may_lookup(struct user_namespace *mnt_userns,
+                            struct nameidata *nd)
  {
         if (nd->flags & LOOKUP_RCU) {
-               int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
+               int err = inode_permission(mnt_userns, nd->inode,
+                                          MAY_EXEC | MAY_NOT_BLOCK);
                 if (err != -ECHILD)
                         return err;
                 if (unlazy_walk(nd))
                         return -ECHILD;
         }
-       return inode_permission(nd->inode, MAY_EXEC);
+       return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
  }
  
  static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
@@ -2114,8 +2192,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                 return PTR_ERR(name);
         while (*name=='/')
                 name++;
-       if (!*name)
+       if (!*name) {
+               nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
                 return 0;
+       }
  
         /* At this point we know we have a real path component. */
         for(;;) {
@@ -2123,7 +2203,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                 u64 hash_len;
                 int type;
  
-               err = may_lookup(nd);
+               err = may_lookup(&init_user_ns, nd);
                 if (err)
                         return err;
  
@@ -2171,7 +2251,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
  OK:
                         /* pathname or trailing symlink, done */
                         if (!depth) {
-                               nd->dir_uid = nd->inode->i_uid;
+                               nd->dir_uid = i_uid_into_mnt(&init_user_ns, nd->inode);
                                 nd->dir_mode = nd->inode->i_mode;
                                 nd->flags &= ~LOOKUP_PARENT;
                                 return 0;
@@ -2504,7 +2584,7 @@ static int lookup_one_len_common(const char *name, struct dentry *base,
                         return err;
         }
  
-       return inode_permission(base->d_inode, MAY_EXEC);
+       return inode_permission(&init_user_ns, base->d_inode, MAY_EXEC);
  }
  
  /**
@@ -2649,15 +2729,16 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
  }
  EXPORT_SYMBOL(user_path_at_empty);
  
-int __check_sticky(struct inode *dir, struct inode *inode)
+int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir,
+                  struct inode *inode)
  {
         kuid_t fsuid = current_fsuid();
  
-       if (uid_eq(inode->i_uid, fsuid))
+       if (uid_eq(i_uid_into_mnt(mnt_userns, inode), fsuid))
                 return 0;
-       if (uid_eq(dir->i_uid, fsuid))
+       if (uid_eq(i_uid_into_mnt(mnt_userns, dir), fsuid))
                 return 0;
-       return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
+       return !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FOWNER);
  }
  EXPORT_SYMBOL(__check_sticky);
  
@@ -2681,7 +2762,8 @@ EXPORT_SYMBOL(__check_sticky);
   * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
   *     nfs_async_unlink().
   */
-static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
+static int may_delete(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *victim, bool isdir)
  {
         struct inode *inode = d_backing_inode(victim);
         int error;
@@ -2693,19 +2775,21 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
         BUG_ON(victim->d_parent->d_inode != dir);
  
         /* Inode writeback is not safe when the uid or gid are invalid. */
-       if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+       if (!uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
+           !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
                 return -EOVERFLOW;
  
         audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
  
-       error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
         if (error)
                 return error;
         if (IS_APPEND(dir))
                 return -EPERM;
  
-       if (check_sticky(dir, inode) || IS_APPEND(inode) ||
-           IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
+       if (check_sticky(mnt_userns, dir, inode) || IS_APPEND(inode) ||
+           IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) ||
+           HAS_UNMAPPED_ID(mnt_userns, inode))
                 return -EPERM;
         if (isdir) {
                 if (!d_is_dir(victim))
@@ -2730,7 +2814,8 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
   *  4. We should have write and exec permissions on dir
   *  5. We can't do it if dir is immutable (done in permission())
   */
-static inline int may_create(struct inode *dir, struct dentry *child)
+static inline int may_create(struct user_namespace *mnt_userns,
+                            struct inode *dir, struct dentry *child)
  {
         struct user_namespace *s_user_ns;
         audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
@@ -2739,10 +2824,10 @@ static inline int may_create(struct inode *dir, struct dentry *child)
         if (IS_DEADDIR(dir))
                 return -ENOENT;
         s_user_ns = dir->i_sb->s_user_ns;
-       if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-           !kgid_has_mapping(s_user_ns, current_fsgid()))
+       if (!kuid_has_mapping(s_user_ns, fsuid_into_mnt(mnt_userns)) ||
+           !kgid_has_mapping(s_user_ns, fsgid_into_mnt(mnt_userns)))
                 return -EOVERFLOW;
-       return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
  }
  
  /*
@@ -2792,7 +2877,7 @@ EXPORT_SYMBOL(unlock_rename);
  int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                 bool want_excl)
  {
-       int error = may_create(dir, dentry);
+       int error = may_create(&init_user_ns, dir, dentry);
         if (error)
                 return error;
  
@@ -2815,7 +2900,7 @@ int vfs_mkobj(struct dentry *dentry, umode_t mode,
                 void *arg)
  {
         struct inode *dir = dentry->d_parent->d_inode;
-       int error = may_create(dir, dentry);
+       int error = may_create(&init_user_ns, dir, dentry);
         if (error)
                 return error;
  
@@ -2837,7 +2922,8 @@ bool may_open_dev(const struct path *path)
                 !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
  }
  
-static int may_open(const struct path *path, int acc_mode, int flag)
+static int may_open(struct user_namespace *mnt_userns, const struct path *path,
+                   int acc_mode, int flag)
  {
         struct dentry *dentry = path->dentry;
         struct inode *inode = dentry->d_inode;
@@ -2872,7 +2958,7 @@ static int may_open(const struct path *path, int acc_mode, int flag)
                 break;
         }
  
-       error = inode_permission(inode, MAY_OPEN | acc_mode);
+       error = inode_permission(mnt_userns, inode, MAY_OPEN | acc_mode);
         if (error)
                 return error;
  
@@ -2887,7 +2973,7 @@ static int may_open(const struct path *path, int acc_mode, int flag)
         }
  
         /* O_NOATIME can only be set by the owner or superuser */
-       if (flag & O_NOATIME && !inode_owner_or_capable(inode))
+       if (flag & O_NOATIME && !inode_owner_or_capable(mnt_userns, inode))
                 return -EPERM;
  
         return 0;
@@ -2922,7 +3008,9 @@ static inline int open_to_namei_flags(int flag)
         return flag;
  }
  
-static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t mode)
+static int may_o_create(struct user_namespace *mnt_userns,
+                       const struct path *dir, struct dentry *dentry,
+                       umode_t mode)
  {
         struct user_namespace *s_user_ns;
         int error = security_path_mknod(dir, dentry, mode, 0);
@@ -2930,11 +3018,12 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
                 return error;
  
         s_user_ns = dir->dentry->d_sb->s_user_ns;
-       if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-           !kgid_has_mapping(s_user_ns, current_fsgid()))
+       if (!kuid_has_mapping(s_user_ns, fsuid_into_mnt(mnt_userns)) ||
+           !kgid_has_mapping(s_user_ns, fsgid_into_mnt(mnt_userns)))
                 return -EOVERFLOW;
  
-       error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+       error = inode_permission(mnt_userns, dir->dentry->d_inode,
+                                MAY_WRITE | MAY_EXEC);
         if (error)
                 return error;
  
@@ -3066,7 +3155,8 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
                 if (!IS_POSIXACL(dir->d_inode))
                         mode &= ~current_umask();
                 if (likely(got_write))
-                       create_error = may_o_create(&nd->path, dentry, mode);
+                       create_error = may_o_create(&init_user_ns, &nd->path,
+                                                   dentry, mode);
                 else
                         create_error = -EROFS;
         }
@@ -3227,7 +3317,7 @@ static int do_open(struct nameidata *nd,
                         return -EEXIST;
                 if (d_is_dir(nd->path.dentry))
                         return -EISDIR;
-               error = may_create_in_sticky(nd->dir_mode, nd->dir_uid,
+               error = may_create_in_sticky(&init_user_ns, nd,
                                              d_backing_inode(nd->path.dentry));
                 if (unlikely(error))
                         return error;
@@ -3247,7 +3337,7 @@ static int do_open(struct nameidata *nd,
                         return error;
                 do_truncate = true;
         }
-       error = may_open(&nd->path, acc_mode, open_flag);
+       error = may_open(&init_user_ns, &nd->path, acc_mode, open_flag);
         if (!error && !(file->f_mode & FMODE_OPENED))
                 error = vfs_open(&nd->path, file);
         if (!error)
@@ -3271,7 +3361,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
         int error;
  
         /* we want directory to be writable */
-       error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       error = inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
         if (error)
                 goto out_err;
         error = -EOPNOTSUPP;
@@ -3322,7 +3412,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
         path.dentry = child;
         audit_inode(nd->name, child, 0);
         /* Don't check for other permissions, the inode was just created */
-       error = may_open(&path, 0, op->open_flag);
+       error = may_open(&init_user_ns, &path, 0, op->open_flag);
         if (error)
                 goto out2;
         file->f_path.mnt = path.mnt;
@@ -3529,7 +3619,7 @@ EXPORT_SYMBOL(user_path_create);
  int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
  {
         bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
-       int error = may_create(dir, dentry);
+       int error = may_create(&init_user_ns, dir, dentry);
  
         if (error)
                 return error;
@@ -3630,7 +3720,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
  
  int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  {
-       int error = may_create(dir, dentry);
+       int error = may_create(&init_user_ns, dir, dentry);
         unsigned max_links = dir->i_sb->s_max_links;
  
         if (error)
@@ -3691,7 +3781,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
  
  int vfs_rmdir(struct inode *dir, struct dentry *dentry)
  {
-       int error = may_delete(dir, dentry, 1);
+       int error = may_delete(&init_user_ns, dir, dentry, 1);
  
         if (error)
                 return error;
@@ -3813,7 +3903,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
  int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
  {
         struct inode *target = dentry->d_inode;
-       int error = may_delete(dir, dentry, 0);
+       int error = may_delete(&init_user_ns, dir, dentry, 0);
  
         if (error)
                 return error;
@@ -3945,7 +4035,7 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
  
  int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
  {
-       int error = may_create(dir, dentry);
+       int error = may_create(&init_user_ns, dir, dentry);
  
         if (error)
                 return error;
@@ -4034,7 +4124,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
         if (!inode)
                 return -ENOENT;
  
-       error = may_create(dir, new_dentry);
+       error = may_create(&init_user_ns, dir, new_dentry);
         if (error)
                 return error;
  
@@ -4051,7 +4141,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
          * be writen back improperly if their true value is unknown to
          * the vfs.
          */
-       if (HAS_UNMAPPED_ID(inode))
+       if (HAS_UNMAPPED_ID(&init_user_ns, inode))
                 return -EPERM;
         if (!dir->i_op->link)
                 return -EPERM;
@@ -4133,7 +4223,7 @@ retry:
         error = -EXDEV;
         if (old_path.mnt != new_path.mnt)
                 goto out_dput;
-       error = may_linkat(&old_path);
+       error = may_linkat(&init_user_ns, &old_path);
         if (unlikely(error))
                 goto out_dput;
         error = security_path_link(old_path.dentry, &new_path, new_dentry);
@@ -4221,11 +4311,15 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
   *        ->i_mutex on parents, which works but leads to some truly excessive
   *        locking].
   */
-int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-              struct inode *new_dir, struct dentry *new_dentry,
-              struct inode **delegated_inode, unsigned int flags)
+int vfs_rename(struct renamedata *rd)
  {
         int error;
+       struct user_namespace *mnt_userns = &init_user_ns;
+       struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
+       struct dentry *old_dentry = rd->old_dentry;
+       struct dentry *new_dentry = rd->new_dentry;
+       struct inode **delegated_inode = rd->delegated_inode;
+       unsigned int flags = rd->flags;
         bool is_dir = d_is_dir(old_dentry);
         struct inode *source = old_dentry->d_inode;
         struct inode *target = new_dentry->d_inode;
@@ -4236,19 +4330,19 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (source == target)
                 return 0;
  
-       error = may_delete(old_dir, old_dentry, is_dir);
+       error = may_delete(mnt_userns, old_dir, old_dentry, is_dir);
         if (error)
                 return error;
  
         if (!target) {
-               error = may_create(new_dir, new_dentry);
+               error = may_create(mnt_userns, new_dir, new_dentry);
         } else {
                 new_is_dir = d_is_dir(new_dentry);
  
                 if (!(flags & RENAME_EXCHANGE))
-                       error = may_delete(new_dir, new_dentry, is_dir);
+                       error = may_delete(mnt_userns, new_dir, new_dentry, is_dir);
                 else
-                       error = may_delete(new_dir, new_dentry, new_is_dir);
+                       error = may_delete(mnt_userns, new_dir, new_dentry, new_is_dir);
         }
         if (error)
                 return error;
@@ -4262,12 +4356,14 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
          */
         if (new_dir != old_dir) {
                 if (is_dir) {
-                       error = inode_permission(source, MAY_WRITE);
+                       error = inode_permission(&init_user_ns, source,
+                                                MAY_WRITE);
                         if (error)
                                 return error;
                 }
                 if ((flags & RENAME_EXCHANGE) && new_is_dir) {
-                       error = inode_permission(target, MAY_WRITE);
+                       error = inode_permission(&init_user_ns, target,
+                                                MAY_WRITE);
                         if (error)
                                 return error;
                 }
@@ -4349,6 +4445,7 @@ EXPORT_SYMBOL(vfs_rename);
  int do_renameat2(int olddfd, struct filename *from, int newdfd,
                  struct filename *to, unsigned int flags)
  {
+       struct renamedata rd;
         struct dentry *old_dentry, *new_dentry;
         struct dentry *trap;
         struct path old_path, new_path;
@@ -4452,9 +4549,14 @@ retry_deleg:
                                      &new_path, new_dentry, flags);
         if (error)
                 goto exit5;
-       error = vfs_rename(old_path.dentry->d_inode, old_dentry,
-                          new_path.dentry->d_inode, new_dentry,
-                          &delegated_inode, flags);
+
+       rd.old_dir         = old_path.dentry->d_inode;
+       rd.old_dentry      = old_dentry;
+       rd.new_dir         = new_path.dentry->d_inode;
+       rd.new_dentry      = new_dentry;
+       rd.delegated_inode = &delegated_inode;
+       rd.flags           = flags;
+       error = vfs_rename(&rd);
  exit5:
         dput(new_dentry);
  exit4: